diff options
Diffstat (limited to 'arch')
378 files changed, 13792 insertions, 4680 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 0c79b9d95f74..f7c96635d3b4 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -280,6 +280,10 @@ config ISA (MCA) or VESA. ISA is an older system, now being displaced by PCI; newer boards don't support it. If you have ISA, say Y, otherwise N. +config ISA_DMA_API + bool + default y + config PCI bool depends on !ALPHA_JENSEN diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index b5d0fd2bb10a..64e450dddb49 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -457,22 +457,6 @@ osf_getdomainname(char __user *name, int namelen) return 0; } -asmlinkage long -osf_shmat(int shmid, void __user *shmaddr, int shmflg) -{ - unsigned long raddr; - long err; - - err = do_shmat(shmid, shmaddr, shmflg, &raddr); - - /* - * This works because all user-level addresses are - * non-negative longs! - */ - return err ? err : (long)raddr; -} - - /* * The following stuff should move into a header file should it ever * be labeled "officially supported." Right now, there is just enough diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index d00583161574..bbd37536d14e 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -14,6 +14,7 @@ #include <linux/user.h> #include <linux/slab.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -335,7 +336,7 @@ do_sys_ptrace(long request, long pid, long addr, long data, /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -365,7 +366,7 @@ do_sys_ptrace(long request, long pid, long addr, long data, case PTRACE_SINGLESTEP: /* execute single instruction. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; /* Mark single stepping. */ child->thread_info->bpt_nsaved = -1; diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 3864b33562ee..052120882876 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -227,7 +227,7 @@ sys_call_table: .quad sys_semop .quad osf_utsname .quad sys_lchown - .quad osf_shmat + .quad sys_shmat .quad sys_shmctl /* 210 */ .quad sys_shmdt .quad sys_shmget diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4055115ae0e2..bf397a9f8ac2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -85,6 +85,7 @@ choice config ARCH_CLPS7500 bool "Cirrus-CL-PS7500FE" select TIMER_ACORN + select ISA config ARCH_CLPS711X bool "CLPS711x/EP721x-based" @@ -96,6 +97,7 @@ config ARCH_CO285 config ARCH_EBSA110 bool "EBSA-110" + select ISA help This is an evaluation board for the StrongARM processor available from Digital. It has limited hardware on-board, including an onboard @@ -120,13 +122,16 @@ config ARCH_INTEGRATOR config ARCH_IOP3XX bool "IOP3xx-based" + select PCI config ARCH_IXP4XX bool "IXP4xx-based" select DMABOUNCE + select PCI config ARCH_IXP2000 bool "IXP2400/2800-based" + select PCI config ARCH_L7200 bool "LinkUp-L7200" @@ -155,6 +160,8 @@ config ARCH_RPC config ARCH_SA1100 bool "SA1100-based" + select ISA + select DISCONTIGMEM config ARCH_S3C2410 bool "Samsung S3C2410" @@ -165,6 +172,9 @@ config ARCH_S3C2410 config ARCH_SHARK bool "Shark" + select ISA + select ISA_DMA + select PCI config ARCH_LH7A40X bool "Sharp LH7A40X" @@ -252,8 +262,6 @@ config ARM_AMBA config ISA bool - depends on FOOTBRIDGE_HOST || ARCH_SHARK || ARCH_CLPS7500 || ARCH_EBSA110 || ARCH_CDB89712 || ARCH_EDB7211 || ARCH_SA1100 || ARCH_MX1ADS - default y help Find out whether you have ISA slots on your motherboard. ISA is the name of a bus system, i.e. the way the CPU talks to the other stuff @@ -263,12 +271,13 @@ config ISA config ISA_DMA bool - depends on FOOTBRIDGE_HOST || ARCH_SHARK + +config ISA_DMA_API + bool default y config PCI bool "PCI support" if ARCH_INTEGRATOR_AP - default y if ARCH_SHARK || FOOTBRIDGE_HOST || ARCH_IOP3XX || ARCH_IXP4XX || ARCH_IXP2000 help Find out whether you have a PCI motherboard. PCI is the name of a bus system, i.e. the way the CPU talks to the other stuff inside @@ -296,7 +305,7 @@ menu "Kernel Features" config SMP bool "Symmetric Multi-Processing (EXPERIMENTAL)" - depends on EXPERIMENTAL && n + depends on EXPERIMENTAL #&& n help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If @@ -336,8 +345,7 @@ config PREEMPT config DISCONTIGMEM bool - depends on ARCH_EDB7211 || ARCH_SA1100 || (ARCH_LH7A40X && !LH7A40X_CONTIGMEM) - default y + default (ARCH_LH7A40X && !LH7A40X_CONTIGMEM) help Say Y to support efficient handling of discontiguous physical memory, for architectures which are either NUMA (Non-Uniform Memory Access) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index c0e7aff3dec2..7c7f475e213e 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -18,48 +18,30 @@ * Please select one of the following when turning on debugging. */ #ifdef DEBUG -#if defined(CONFIG_DEBUG_DC21285_PORT) - .macro loadsp, rb - mov \rb, #0x42000000 - .endm - .macro writeb, rb - str \rb, [r3, #0x160] - .endm -#elif defined(CONFIG_DEBUG_ICEDCC) + +#include <asm/arch/debug-macro.S> + +#if defined(CONFIG_DEBUG_ICEDCC) .macro loadsp, rb .endm - .macro writeb, rb - mcr p14, 0, \rb, c0, c1, 0 - .endm -#elif defined(CONFIG_FOOTBRIDGE) - .macro loadsp, rb - mov \rb, #0x7c000000 + .macro writeb, ch, rb + mcr p14, 0, \ch, c0, c1, 0 .endm - .macro writeb, rb - strb \rb, [r3, #0x3f8] +#else + .macro writeb, ch, rb + senduart \ch, \rb .endm -#elif defined(CONFIG_ARCH_RPC) + +#if defined(CONFIG_FOOTBRIDGE) || \ + defined(CONFIG_ARCH_RPC) || \ + defined(CONFIG_ARCH_INTEGRATOR) || \ + defined(CONFIG_ARCH_PXA) || \ + defined(CONFIG_ARCH_IXP4XX) || \ + defined(CONFIG_ARCH_IXP2000) || \ + defined(CONFIG_ARCH_LH7A40X) || \ + defined(CONFIG_ARCH_OMAP) .macro loadsp, rb - mov \rb, #0x03000000 - orr \rb, \rb, #0x00010000 - .endm - .macro writeb, rb - strb \rb, [r3, #0x3f8 << 2] - .endm -#elif defined(CONFIG_ARCH_INTEGRATOR) - .macro loadsp, rb - mov \rb, #0x16000000 - .endm - .macro writeb, rb - strb \rb, [r3, #0] - .endm -#elif defined(CONFIG_ARCH_PXA) /* Xscale-type */ - .macro loadsp, rb - mov \rb, #0x40000000 - orr \rb, \rb, #0x00100000 - .endm - .macro writeb, rb - strb \rb, [r3, #0] + addruart \rb .endm #elif defined(CONFIG_ARCH_SA1100) .macro loadsp, rb @@ -70,65 +52,22 @@ add \rb, \rb, #0x00010000 @ Ser1 # endif .endm - .macro writeb, rb - str \rb, [r3, #0x14] @ UTDR - .endm -#elif defined(CONFIG_ARCH_IXP4XX) - .macro loadsp, rb - mov \rb, #0xc8000000 - .endm - .macro writeb, rb - str \rb, [r3, #0] -#elif defined(CONFIG_ARCH_IXP2000) - .macro loadsp, rb - mov \rb, #0xc0000000 - orr \rb, \rb, #0x00030000 - .endm - .macro writeb, rb - str \rb, [r3, #0] - .endm -#elif defined(CONFIG_ARCH_LH7A40X) - .macro loadsp, rb - ldr \rb, =0x80000700 @ UART2 UARTBASE - .endm - .macro writeb, rb - strb \rb, [r3, #0] - .endm -#elif defined(CONFIG_ARCH_OMAP) - .macro loadsp, rb - mov \rb, #0xff000000 @ physical base address - add \rb, \rb, #0x00fb0000 -#if defined(CONFIG_OMAP_LL_DEBUG_UART2) || defined(CONFIG_OMAP_LL_DEBUG_UART3) - add \rb, \rb, #0x00000800 -#endif -#ifdef CONFIG_OMAP_LL_DEBUG_UART3 - add \rb, \rb, #0x00009000 -#endif - .endm - .macro writeb, rb - strb \rb, [r3] - .endm #elif defined(CONFIG_ARCH_IOP331) .macro loadsp, rb mov \rb, #0xff000000 orr \rb, \rb, #0x00ff0000 orr \rb, \rb, #0x0000f700 @ location of the UART .endm - .macro writeb, rb - str \rb, [r3, #0] - .endm #elif defined(CONFIG_ARCH_S3C2410) - .macro loadsp, rb + .macro loadsp, rb mov \rb, #0x50000000 add \rb, \rb, #0x4000 * CONFIG_S3C2410_LOWLEVEL_UART_PORT .endm - .macro writeb, rb - strb \rb, [r3, #0x20] - .endm #else #error no serial architecture defined #endif #endif +#endif .macro kputc,val mov r0, \val @@ -734,7 +673,7 @@ puts: loadsp r3 1: ldrb r2, [r0], #1 teq r2, #0 moveq pc, lr -2: writeb r2 +2: writeb r2, r3 mov r1, #0x00020000 3: subs r1, r1, #1 bne 3b diff --git a/arch/arm/common/rtctime.c b/arch/arm/common/rtctime.c index c397e71f938d..72b03f201eb9 100644 --- a/arch/arm/common/rtctime.c +++ b/arch/arm/common/rtctime.c @@ -141,10 +141,10 @@ void rtc_next_alarm_time(struct rtc_time *next, struct rtc_time *now, struct rtc next->tm_sec = alrm->tm_sec; } -static inline void rtc_read_time(struct rtc_ops *ops, struct rtc_time *tm) +static inline int rtc_read_time(struct rtc_ops *ops, struct rtc_time *tm) { memset(tm, 0, sizeof(struct rtc_time)); - ops->read_time(tm); + return ops->read_time(tm); } static inline int rtc_set_time(struct rtc_ops *ops, struct rtc_time *tm) @@ -163,8 +163,7 @@ static inline int rtc_read_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm) int ret = -EINVAL; if (ops->read_alarm) { memset(alrm, 0, sizeof(struct rtc_wkalrm)); - ops->read_alarm(alrm); - ret = 0; + ret = ops->read_alarm(alrm); } return ret; } @@ -283,7 +282,9 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, break; case RTC_RD_TIME: - rtc_read_time(ops, &tm); + ret = rtc_read_time(ops, &tm); + if (ret) + break; ret = copy_to_user(uarg, &tm, sizeof(tm)); if (ret) ret = -EFAULT; @@ -424,15 +425,15 @@ static int rtc_read_proc(char *page, char **start, off_t off, int count, int *eo struct rtc_time tm; char *p = page; - rtc_read_time(ops, &tm); - - p += sprintf(p, - "rtc_time\t: %02d:%02d:%02d\n" - "rtc_date\t: %04d-%02d-%02d\n" - "rtc_epoch\t: %04lu\n", - tm.tm_hour, tm.tm_min, tm.tm_sec, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - rtc_epoch); + if (rtc_read_time(ops, &tm) == 0) { + p += sprintf(p, + "rtc_time\t: %02d:%02d:%02d\n" + "rtc_date\t: %04d-%02d-%02d\n" + "rtc_epoch\t: %04lu\n", + tm.tm_hour, tm.tm_min, tm.tm_sec, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + rtc_epoch); + } if (rtc_read_alarm(ops, &alrm) == 0) { p += sprintf(p, "alrm_time\t: "); diff --git a/arch/arm/configs/ixdp2800_defconfig b/arch/arm/configs/ixdp2800_defconfig index d36f99192962..7be3521f91fc 100644 --- a/arch/arm/configs/ixdp2800_defconfig +++ b/arch/arm/configs/ixdp2800_defconfig @@ -133,7 +133,7 @@ CONFIG_ALIGNMENT_TRAP=y # CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="console=ttyS0,9600 root=/dev/nfs ip=bootp mem=64M@0x0 pci=firmware" +CONFIG_CMDLINE="console=ttyS0,9600 root=/dev/nfs ip=bootp mem=64M@0x0" # CONFIG_XIP_KERNEL is not set # diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 07a56ff61494..4a2af55e134b 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -31,8 +31,3 @@ head-y := head.o obj-$(CONFIG_DEBUG_LL) += debug.o extra-y := $(head-y) init_task.o vmlinux.lds - -# Spell out some dependencies that aren't automatically figured out -$(obj)/entry-armv.o: $(obj)/entry-header.S include/asm-arm/constants.h -$(obj)/entry-common.o: $(obj)/entry-header.S include/asm-arm/constants.h \ - $(obj)/calls.S diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 99d43259ff89..c1ff4d1f1bfd 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -64,6 +64,26 @@ int main(void) DEFINE(TI_VFPSTATE, offsetof(struct thread_info, vfpstate)); DEFINE(TI_IWMMXT_STATE, (offsetof(struct thread_info, fpstate)+4)&~7); BLANK(); + DEFINE(S_R0, offsetof(struct pt_regs, ARM_r0)); + DEFINE(S_R1, offsetof(struct pt_regs, ARM_r1)); + DEFINE(S_R2, offsetof(struct pt_regs, ARM_r2)); + DEFINE(S_R3, offsetof(struct pt_regs, ARM_r3)); + DEFINE(S_R4, offsetof(struct pt_regs, ARM_r4)); + DEFINE(S_R5, offsetof(struct pt_regs, ARM_r5)); + DEFINE(S_R6, offsetof(struct pt_regs, ARM_r6)); + DEFINE(S_R7, offsetof(struct pt_regs, ARM_r7)); + DEFINE(S_R8, offsetof(struct pt_regs, ARM_r8)); + DEFINE(S_R9, offsetof(struct pt_regs, ARM_r9)); + DEFINE(S_R10, offsetof(struct pt_regs, ARM_r10)); + DEFINE(S_FP, offsetof(struct pt_regs, ARM_fp)); + DEFINE(S_IP, offsetof(struct pt_regs, ARM_ip)); + DEFINE(S_SP, offsetof(struct pt_regs, ARM_sp)); + DEFINE(S_LR, offsetof(struct pt_regs, ARM_lr)); + DEFINE(S_PC, offsetof(struct pt_regs, ARM_pc)); + DEFINE(S_PSR, offsetof(struct pt_regs, ARM_cpsr)); + DEFINE(S_OLD_R0, offsetof(struct pt_regs, ARM_ORIG_r0)); + DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); + BLANK(); #if __LINUX_ARM_ARCH__ >= 6 DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); BLANK(); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index bb27c317d94b..4eb36155dc93 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -14,12 +14,12 @@ * it to save wrong values... Be aware! */ #include <linux/config.h> -#include <linux/init.h> -#include <asm/thread_info.h> #include <asm/glue.h> -#include <asm/ptrace.h> #include <asm/vfpmacros.h> +#include <asm/hardware.h> /* should be moved into entry-macro.S */ +#include <asm/arch/irqs.h> /* should be moved into entry-macro.S */ +#include <asm/arch/entry-macro.S> #include "entry-header.S" @@ -118,7 +118,7 @@ __dabt_svc: @ @ IRQs off again before pulling preserved data off the stack @ - disable_irq r0 + disable_irq @ @ restore SPSR and restart the instruction @@ -198,7 +198,7 @@ __und_svc: @ @ IRQs off again before pulling preserved data off the stack @ -1: disable_irq r0 +1: disable_irq @ @ restore SPSR and restart the instruction @@ -232,7 +232,7 @@ __pabt_svc: @ @ IRQs off again before pulling preserved data off the stack @ - disable_irq r0 + disable_irq @ @ restore SPSR and restart the instruction @@ -269,6 +269,12 @@ __pabt_svc: add r5, sp, #S_PC ldmia r7, {r2 - r4} @ Get USR pc, cpsr +#if __LINUX_ARM_ARCH__ < 6 + @ make sure our user space atomic helper is aborted + cmp r2, #VIRT_OFFSET + bichs r3, r3, #PSR_Z_BIT +#endif + @ @ We are now ready to fill in the remaining blanks on the stack: @ @@ -316,7 +322,7 @@ __dabt_usr: @ @ IRQs on, then call the main handler @ - enable_irq r2 + enable_irq mov r2, sp adr lr, ret_from_exception b do_DataAbort @@ -418,7 +424,7 @@ call_fpe: movcss r7, r5, lsr #(TIF_USING_IWMMXT + 1) bcs iwmmxt_task_enable #endif - enable_irq r7 + enable_irq add pc, pc, r8, lsr #6 mov r0, r0 @@ -472,7 +478,7 @@ fpundefinstr: __pabt_usr: usr_entry abt - enable_irq r0 @ Enable interrupts + enable_irq @ Enable interrupts mov r0, r2 @ address (pc) mov r1, sp @ regs bl do_PrefetchAbort @ call abort handler @@ -499,8 +505,12 @@ ENTRY(__switch_to) mra r4, r5, acc0 stmia ip, {r4, r5} #endif +#if defined(CONFIG_HAS_TLS_REG) + mcr p15, 0, r3, c13, c0, 3 @ set TLS register +#elif !defined(CONFIG_TLS_REG_EMUL) mov r4, #0xffff0fff - str r3, [r4, #-3] @ Set TLS ptr + str r3, [r4, #-15] @ TLS val at 0xffff0ff0 +#endif mcr p15, 0, r6, c3, c0, 0 @ Set domain register #ifdef CONFIG_VFP @ Always disable VFP so we can lazily save/restore the old @@ -519,11 +529,209 @@ ENTRY(__switch_to) ldmib r2, {r4 - sl, fp, sp, pc} @ Load all regs saved previously __INIT + +/* + * User helpers. + * + * These are segment of kernel provided user code reachable from user space + * at a fixed address in kernel memory. This is used to provide user space + * with some operations which require kernel help because of unimplemented + * native feature and/or instructions in many ARM CPUs. The idea is for + * this code to be executed directly in user mode for best efficiency but + * which is too intimate with the kernel counter part to be left to user + * libraries. In fact this code might even differ from one CPU to another + * depending on the available instruction set and restrictions like on + * SMP systems. In other words, the kernel reserves the right to change + * this code as needed without warning. Only the entry points and their + * results are guaranteed to be stable. + * + * Each segment is 32-byte aligned and will be moved to the top of the high + * vector page. New segments (if ever needed) must be added in front of + * existing ones. This mechanism should be used only for things that are + * really small and justified, and not be abused freely. + * + * User space is expected to implement those things inline when optimizing + * for a processor that has the necessary native support, but only if such + * resulting binaries are already to be incompatible with earlier ARM + * processors due to the use of unsupported instructions other than what + * is provided here. In other words don't make binaries unable to run on + * earlier processors just for the sake of not using these kernel helpers + * if your compiled code is not going to use the new instructions for other + * purpose. + */ + + .align 5 + .globl __kuser_helper_start +__kuser_helper_start: + +/* + * Reference prototype: + * + * int __kernel_cmpxchg(int oldval, int newval, int *ptr) + * + * Input: + * + * r0 = oldval + * r1 = newval + * r2 = ptr + * lr = return address + * + * Output: + * + * r0 = returned value (zero or non-zero) + * C flag = set if r0 == 0, clear if r0 != 0 + * + * Clobbered: + * + * r3, ip, flags + * + * Definition and user space usage example: + * + * typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr); + * #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0) + * + * Atomically store newval in *ptr if *ptr is equal to oldval for user space. + * Return zero if *ptr was changed or non-zero if no exchange happened. + * The C flag is also set if *ptr was changed to allow for assembly + * optimization in the calling code. + * + * For example, a user space atomic_add implementation could look like this: + * + * #define atomic_add(ptr, val) \ + * ({ register unsigned int *__ptr asm("r2") = (ptr); \ + * register unsigned int __result asm("r1"); \ + * asm volatile ( \ + * "1: @ atomic_add\n\t" \ + * "ldr r0, [r2]\n\t" \ + * "mov r3, #0xffff0fff\n\t" \ + * "add lr, pc, #4\n\t" \ + * "add r1, r0, %2\n\t" \ + * "add pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \ + * "bcc 1b" \ + * : "=&r" (__result) \ + * : "r" (__ptr), "rIL" (val) \ + * : "r0","r3","ip","lr","cc","memory" ); \ + * __result; }) + */ + +__kuser_cmpxchg: @ 0xffff0fc0 + +#if __LINUX_ARM_ARCH__ < 6 + +#ifdef CONFIG_SMP /* sanity check */ +#error "CONFIG_SMP on a machine supporting pre-ARMv6 processors?" +#endif + + /* + * Theory of operation: + * + * We set the Z flag before loading oldval. If ever an exception + * occurs we can not be sure the loaded value will still be the same + * when the exception returns, therefore the user exception handler + * will clear the Z flag whenever the interrupted user code was + * actually from the kernel address space (see the usr_entry macro). + * + * The post-increment on the str is used to prevent a race with an + * exception happening just after the str instruction which would + * clear the Z flag although the exchange was done. + */ + teq ip, ip @ set Z flag + ldr ip, [r2] @ load current val + add r3, r2, #1 @ prepare store ptr + teqeq ip, r0 @ compare with oldval if still allowed + streq r1, [r3, #-1]! @ store newval if still allowed + subs r0, r2, r3 @ if r2 == r3 the str occured + mov pc, lr + +#else + + ldrex r3, [r2] + subs r3, r3, r0 + strexeq r3, r1, [r2] + rsbs r0, r3, #0 + mov pc, lr + +#endif + + .align 5 + +/* + * Reference prototype: + * + * int __kernel_get_tls(void) + * + * Input: + * + * lr = return address + * + * Output: + * + * r0 = TLS value + * + * Clobbered: + * + * the Z flag might be lost + * + * Definition and user space usage example: + * + * typedef int (__kernel_get_tls_t)(void); + * #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0) + * + * Get the TLS value as previously set via the __ARM_NR_set_tls syscall. + * + * This could be used as follows: + * + * #define __kernel_get_tls() \ + * ({ register unsigned int __val asm("r0"); \ + * asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \ + * : "=r" (__val) : : "lr","cc" ); \ + * __val; }) + */ + +__kuser_get_tls: @ 0xffff0fe0 + +#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL) + + ldr r0, [pc, #(16 - 8)] @ TLS stored at 0xffff0ff0 + mov pc, lr + +#else + + mrc p15, 0, r0, c13, c0, 3 @ read TLS register + mov pc, lr + +#endif + + .rep 5 + .word 0 @ pad up to __kuser_helper_version + .endr + +/* + * Reference declaration: + * + * extern unsigned int __kernel_helper_version; + * + * Definition and user space usage example: + * + * #define __kernel_helper_version (*(unsigned int *)0xffff0ffc) + * + * User space may read this to determine the curent number of helpers + * available. + */ + +__kuser_helper_version: @ 0xffff0ffc + .word ((__kuser_helper_end - __kuser_helper_start) >> 5) + + .globl __kuser_helper_end +__kuser_helper_end: + + /* * Vector stubs. * - * This code is copied to 0x200 or 0xffff0200 so we can use branches in the - * vectors, rather than ldr's. + * This code is copied to 0xffff0200 so we can use branches in the + * vectors, rather than ldr's. Note that this code must not + * exceed 0x300 bytes. * * Common stub entry macro: * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC @@ -544,7 +752,7 @@ vector_\name: @ mrs r13, cpsr bic r13, r13, #MODE_MASK - orr r13, r13, #MODE_SVC + orr r13, r13, #SVC_MODE msr spsr_cxsf, r13 @ switch to SVC_32 mode and lr, lr, #15 @@ -552,6 +760,7 @@ vector_\name: movs pc, lr @ Changes mode and branches .endm + .globl __stubs_start __stubs_start: /* * Interrupt dispatcher @@ -686,37 +895,24 @@ vector_addrexcptn: .LCsabt: .word __temp_abt + .globl __stubs_end __stubs_end: - .equ __real_stubs_start, .LCvectors + 0x200 + .equ stubs_offset, __vectors_start + 0x200 - __stubs_start -.LCvectors: + .globl __vectors_start +__vectors_start: swi SYS_ERROR0 - b __real_stubs_start + (vector_und - __stubs_start) - ldr pc, __real_stubs_start + (.LCvswi - __stubs_start) - b __real_stubs_start + (vector_pabt - __stubs_start) - b __real_stubs_start + (vector_dabt - __stubs_start) - b __real_stubs_start + (vector_addrexcptn - __stubs_start) - b __real_stubs_start + (vector_irq - __stubs_start) - b __real_stubs_start + (vector_fiq - __stubs_start) - -ENTRY(__trap_init) - stmfd sp!, {r4 - r6, lr} - - mov r0, #0xff000000 - orr r0, r0, #0x00ff0000 @ high vectors position - adr r1, .LCvectors @ set up the vectors - ldmia r1, {r1, r2, r3, r4, r5, r6, ip, lr} - stmia r0, {r1, r2, r3, r4, r5, r6, ip, lr} - - add r2, r0, #0x200 - adr r0, __stubs_start @ copy stubs to 0x200 - adr r1, __stubs_end -1: ldr r3, [r0], #4 - str r3, [r2], #4 - cmp r0, r1 - blt 1b - LOADREGS(fd, sp!, {r4 - r6, pc}) + b vector_und + stubs_offset + ldr pc, .LCvswi + stubs_offset + b vector_pabt + stubs_offset + b vector_dabt + stubs_offset + b vector_addrexcptn + stubs_offset + b vector_irq + stubs_offset + b vector_fiq + stubs_offset + + .globl __vectors_end +__vectors_end: .data diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 53a7e0dea44d..3f8d0e3aefab 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -9,19 +9,10 @@ */ #include <linux/config.h> -#include <asm/thread_info.h> -#include <asm/ptrace.h> #include <asm/unistd.h> #include "entry-header.S" -/* - * We rely on the fact that R0 is at the bottom of the stack (due to - * slow/fast restore user regs). - */ -#if S_R0 != 0 -#error "Please fix" -#endif .align 5 /* @@ -30,11 +21,19 @@ * stack. */ ret_fast_syscall: - disable_irq r1 @ disable interrupts + disable_irq @ disable interrupts ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK bne fast_work_pending - fast_restore_user_regs + + @ fast_restore_user_regs + ldr r1, [sp, #S_OFF + S_PSR] @ get calling cpsr + ldr lr, [sp, #S_OFF + S_PC]! @ get pc + msr spsr_cxsf, r1 @ save in spsr_svc + ldmdb sp, {r1 - lr}^ @ get calling r1 - lr + mov r0, r0 + add sp, sp, #S_FRAME_SIZE - S_PC + movs pc, lr @ return & move spsr_svc into cpsr /* * Ok, we need to do extra processing, enter the slow path. @@ -49,7 +48,7 @@ work_pending: mov r0, sp @ 'regs' mov r2, why @ 'syscall' bl do_notify_resume - disable_irq r1 @ disable interrupts + disable_irq @ disable interrupts b no_work_pending work_resched: @@ -59,12 +58,19 @@ work_resched: */ ENTRY(ret_to_user) ret_slow_syscall: - disable_irq r1 @ disable interrupts + disable_irq @ disable interrupts ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK bne work_pending no_work_pending: - slow_restore_user_regs + @ slow_restore_user_regs + ldr r1, [sp, #S_PSR] @ get calling cpsr + ldr lr, [sp, #S_PC]! @ get pc + msr spsr_cxsf, r1 @ save in spsr_svc + ldmdb sp, {r0 - lr}^ @ get calling r1 - lr + mov r0, r0 + add sp, sp, #S_FRAME_SIZE - S_PC + movs pc, lr @ return & move spsr_svc into cpsr /* * This is how we return from a fork. @@ -116,9 +122,26 @@ ENTRY(ret_from_fork) .align 5 ENTRY(vector_swi) - save_user_regs + sub sp, sp, #S_FRAME_SIZE + stmia sp, {r0 - r12} @ Calling r0 - r12 + add r8, sp, #S_PC + stmdb r8, {sp, lr}^ @ Calling sp, lr + mrs r8, spsr @ called from non-FIQ mode, so ok. + str lr, [sp, #S_PC] @ Save calling PC + str r8, [sp, #S_PSR] @ Save CPSR + str r0, [sp, #S_OLD_R0] @ Save OLD_R0 zero_fp - get_scno + + /* + * Get the system call number. + */ +#ifdef CONFIG_ARM_THUMB + tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs + addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in + ldreq scno, [lr, #-4] +#else + ldr scno, [lr, #-4] @ get SWI instruction +#endif arm710_bug_check scno, ip #ifdef CONFIG_ALIGNMENT_TRAP @@ -126,14 +149,14 @@ ENTRY(vector_swi) ldr ip, [ip] mcr p15, 0, ip, c1, c0 @ update control register #endif - enable_irq ip + enable_irq str r4, [sp, #-S_OFF]! @ push fifth arg get_thread_info tsk ldr ip, [tsk, #TI_FLAGS] @ check for syscall tracing bic scno, scno, #0xff000000 @ mask off SWI op-code - eor scno, scno, #OS_NUMBER << 20 @ check OS number + eor scno, scno, #__NR_SYSCALL_BASE @ check OS number adr tbl, sys_call_table @ load syscall table pointer tst ip, #_TIF_SYSCALL_TRACE @ are we tracing syscalls? bne __sys_trace @@ -144,8 +167,8 @@ ENTRY(vector_swi) add r1, sp, #S_OFF 2: mov why, #0 @ no longer a real syscall - cmp scno, #ARMSWI_OFFSET - eor r0, scno, #OS_NUMBER << 20 @ put OS number back + cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) + eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back bcs arm_syscall b sys_ni_syscall @ not private func @@ -190,7 +213,7 @@ ENTRY(sys_call_table) @ r5 = syscall table .type sys_syscall, #function sys_syscall: - eor scno, r0, #OS_NUMBER << 20 + eor scno, r0, #__NR_SYSCALL_BASE cmp scno, #__NR_syscall - __NR_SYSCALL_BASE cmpne scno, #NR_syscalls @ check range stmloia sp, {r5, r6} @ shuffle args diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 4039d8c120b5..a3d40a0e2b04 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -1,24 +1,11 @@ -#include <linux/config.h> /* for CONFIG_ARCH_xxxx */ +#include <linux/config.h> +#include <linux/init.h> #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/constants.h> #include <asm/errno.h> -#include <asm/hardware.h> -#include <asm/arch/irqs.h> -#include <asm/arch/entry-macro.S> - -#ifndef MODE_SVC -#define MODE_SVC 0x13 -#endif - - .macro zero_fp -#ifdef CONFIG_FRAME_POINTER - mov fp, #0 -#endif - .endm - - .text +#include <asm/thread_info.h> @ Bad Abort numbers @ ----------------- @@ -29,113 +16,44 @@ #define BAD_IRQ 3 #define BAD_UNDEFINSTR 4 -#define PT_TRACESYS 0x00000002 - -@ OS version number used in SWIs -@ RISC OS is 0 -@ RISC iX is 8 @ -#define OS_NUMBER 9 -#define ARMSWI_OFFSET 0x000f0000 - +@ Most of the stack format comes from struct pt_regs, but with +@ the addition of 8 bytes for storing syscall args 5 and 6. @ -@ Stack format (ensured by USER_* and SVC_*) -@ -#define S_FRAME_SIZE 72 -#define S_OLD_R0 68 -#define S_PSR 64 - -#define S_PC 60 -#define S_LR 56 -#define S_SP 52 -#define S_IP 48 -#define S_FP 44 -#define S_R10 40 -#define S_R9 36 -#define S_R8 32 -#define S_R7 28 -#define S_R6 24 -#define S_R5 20 -#define S_R4 16 -#define S_R3 12 -#define S_R2 8 -#define S_R1 4 -#define S_R0 0 #define S_OFF 8 - .macro set_cpsr_c, reg, mode - msr cpsr_c, \mode +/* + * The SWI code relies on the fact that R0 is at the bottom of the stack + * (due to slow/fast restore user regs). + */ +#if S_R0 != 0 +#error "Please fix" +#endif + + .macro zero_fp +#ifdef CONFIG_FRAME_POINTER + mov fp, #0 +#endif .endm #if __LINUX_ARM_ARCH__ >= 6 - .macro disable_irq, temp + .macro disable_irq cpsid i .endm - .macro enable_irq, temp + .macro enable_irq cpsie i .endm #else - .macro disable_irq, temp - set_cpsr_c \temp, #PSR_I_BIT | MODE_SVC + .macro disable_irq + msr cpsr_c, #PSR_I_BIT | SVC_MODE .endm - .macro enable_irq, temp - set_cpsr_c \temp, #MODE_SVC + .macro enable_irq + msr cpsr_c, #SVC_MODE .endm #endif - .macro save_user_regs - sub sp, sp, #S_FRAME_SIZE - stmia sp, {r0 - r12} @ Calling r0 - r12 - add r8, sp, #S_PC - stmdb r8, {sp, lr}^ @ Calling sp, lr - mrs r8, spsr @ called from non-FIQ mode, so ok. - str lr, [sp, #S_PC] @ Save calling PC - str r8, [sp, #S_PSR] @ Save CPSR - str r0, [sp, #S_OLD_R0] @ Save OLD_R0 - .endm - - .macro restore_user_regs - ldr r1, [sp, #S_PSR] @ Get calling cpsr - disable_irq ip @ disable IRQs - ldr lr, [sp, #S_PC]! @ Get PC - msr spsr_cxsf, r1 @ save in spsr_svc - ldmdb sp, {r0 - lr}^ @ Get calling r0 - lr - mov r0, r0 - add sp, sp, #S_FRAME_SIZE - S_PC - movs pc, lr @ return & move spsr_svc into cpsr - .endm - -/* - * Must be called with IRQs already disabled. - */ - .macro fast_restore_user_regs - ldr r1, [sp, #S_OFF + S_PSR] @ get calling cpsr - ldr lr, [sp, #S_OFF + S_PC]! @ get pc - msr spsr_cxsf, r1 @ save in spsr_svc - ldmdb sp, {r1 - lr}^ @ get calling r1 - lr - mov r0, r0 - add sp, sp, #S_FRAME_SIZE - S_PC - movs pc, lr @ return & move spsr_svc into cpsr - .endm - -/* - * Must be called with IRQs already disabled. - */ - .macro slow_restore_user_regs - ldr r1, [sp, #S_PSR] @ get calling cpsr - ldr lr, [sp, #S_PC]! @ get pc - msr spsr_cxsf, r1 @ save in spsr_svc - ldmdb sp, {r0 - lr}^ @ get calling r1 - lr - mov r0, r0 - add sp, sp, #S_FRAME_SIZE - S_PC - movs pc, lr @ return & move spsr_svc into cpsr - .endm - - .macro mask_pc, rd, rm - .endm - .macro get_thread_info, rd mov \rd, sp, lsr #13 mov \rd, \rd, lsl #13 @@ -165,18 +83,3 @@ scno .req r7 @ syscall number tbl .req r8 @ syscall table pointer why .req r8 @ Linux syscall (!= 0) tsk .req r9 @ current thread_info - -/* - * Get the system call number. - */ - .macro get_scno -#ifdef CONFIG_ARM_THUMB - tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs - addne scno, r7, #OS_NUMBER << 20 @ put OS number in - ldreq scno, [lr, #-4] - -#else - mask_pc lr, lr - ldr scno, [lr, #-4] @ get SWI instruction -#endif - .endm diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 171b3e811c71..4733877296d4 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -19,6 +19,7 @@ #include <asm/procinfo.h> #include <asm/ptrace.h> #include <asm/constants.h> +#include <asm/thread_info.h> #include <asm/system.h> #define PROCINFO_MMUFLAGS 8 @@ -131,7 +132,7 @@ __switch_data: .long processor_id @ r4 .long __machine_arch_type @ r5 .long cr_alignment @ r6 - .long init_thread_union+8192 @ sp + .long init_thread_union + THREAD_START_SP @ sp /* * The following fragment of code is executed with the MMU on, and uses diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 26eacd3e5def..8f146a4b4752 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -256,8 +256,6 @@ static unsigned long *thread_info_head; static unsigned int nr_thread_info; #define EXTRA_TASK_STRUCT 4 -#define ll_alloc_task_struct() ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) -#define ll_free_task_struct(p) free_pages((unsigned long)(p),1) struct thread_info *alloc_thread_info(struct task_struct *task) { @@ -274,17 +272,16 @@ struct thread_info *alloc_thread_info(struct task_struct *task) } if (!thread) - thread = ll_alloc_task_struct(); + thread = (struct thread_info *) + __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); -#ifdef CONFIG_MAGIC_SYSRQ +#ifdef CONFIG_DEBUG_STACK_USAGE /* * The stack must be cleared if you want SYSRQ-T to * give sensible stack usage information */ - if (thread) { - char *p = (char *)thread; - memzero(p+KERNEL_STACK_SIZE, KERNEL_STACK_SIZE); - } + if (thread) + memzero(thread, THREAD_SIZE); #endif return thread; } @@ -297,7 +294,7 @@ void free_thread_info(struct thread_info *thread) thread_info_head = p; nr_thread_info += 1; } else - ll_free_task_struct(thread); + free_pages((unsigned long)thread, THREAD_SIZE_ORDER); } /* @@ -350,7 +347,7 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start, struct thread_info *thread = p->thread_info; struct pt_regs *childregs; - childregs = ((struct pt_regs *)((unsigned long)thread + THREAD_SIZE - 8)) - 1; + childregs = ((struct pt_regs *)((unsigned long)thread + THREAD_START_SP)) - 1; *childregs = *regs; childregs->ARM_r0 = 0; childregs->ARM_sp = stack_start; @@ -447,15 +444,17 @@ EXPORT_SYMBOL(kernel_thread); unsigned long get_wchan(struct task_struct *p) { unsigned long fp, lr; - unsigned long stack_page; + unsigned long stack_start, stack_end; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack_page = 4096 + (unsigned long)p->thread_info; + stack_start = (unsigned long)(p->thread_info + 1); + stack_end = ((unsigned long)p->thread_info) + THREAD_SIZE; + fp = thread_saved_fp(p); do { - if (fp < stack_page || fp > 4092+stack_page) + if (fp < stack_start || fp > stack_end) return 0; lr = pc_pointer (((unsigned long *)fp)[-1]); if (!in_sched_functions(lr)) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index efd7a341614b..cd99b83f14c2 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -19,6 +19,7 @@ #include <linux/user.h> #include <linux/security.h> #include <linux/init.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -693,7 +694,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat case PTRACE_SYSCALL: case PTRACE_CONT: ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -728,7 +729,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat */ case PTRACE_SINGLESTEP: ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; child->ptrace |= PT_SINGLESTEP; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 7ba6342cf93d..f897ce2ccf0d 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -227,18 +227,6 @@ asmlinkage int sys_ipc(uint call, int first, int second, int third, } } -asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg, - unsigned long __user *addr) -{ - unsigned long ret; - long err; - - err = do_shmat(shmid, shmaddr, shmflg, &ret); - if (err == 0) - err = put_user(ret, addr); - return err; -} - /* Fork a new task - this creates a new program thread. * This is called indirectly via a small wrapper */ @@ -314,7 +302,7 @@ long execve(const char *filename, char **argv, char **envp) "b ret_to_user" : : "r" (current_thread_info()), - "Ir" (THREAD_SIZE - 8 - sizeof(regs)), + "Ir" (THREAD_START_SP - sizeof(regs)), "r" (®s), "Ir" (sizeof(regs)) : "r0", "r1", "r2", "r3", "ip", "memory"); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 6e31718f6008..14df16b983f4 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -218,7 +218,8 @@ NORET_TYPE void die(const char *str, struct pt_regs *regs, int err) tsk->comm, tsk->pid, tsk->thread_info + 1); if (!user_mode(regs) || in_interrupt()) { - dump_mem("Stack: ", regs->ARM_sp, 8192+(unsigned long)tsk->thread_info); + dump_mem("Stack: ", regs->ARM_sp, + THREAD_SIZE + (unsigned long)tsk->thread_info); dump_backtrace(regs, tsk); dump_instr(regs); } @@ -450,13 +451,17 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) case NR(set_tls): thread->tp_value = regs->ARM_r0; +#if defined(CONFIG_HAS_TLS_REG) + asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) ); +#elif !defined(CONFIG_TLS_REG_EMUL) /* - * Our user accessible TLS ptr is located at 0xffff0ffc. - * On SMP read access to this address must raise a fault - * and be emulated from the data abort handler. - * m + * User space must never try to access this directly. + * Expect your app to break eventually if you do so. + * The user helper at 0xffff0fe0 must be used instead. + * (see entry-armv.S for details) */ - *((unsigned long *)0xffff0ffc) = thread->tp_value; + *((unsigned int *)0xffff0ff0) = regs->ARM_r0; +#endif return 0; default: @@ -493,6 +498,44 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) return 0; } +#ifdef CONFIG_TLS_REG_EMUL + +/* + * We might be running on an ARMv6+ processor which should have the TLS + * register but for some reason we can't use it, or maybe an SMP system + * using a pre-ARMv6 processor (there are apparently a few prototypes like + * that in existence) and therefore access to that register must be + * emulated. + */ + +static int get_tp_trap(struct pt_regs *regs, unsigned int instr) +{ + int reg = (instr >> 12) & 15; + if (reg == 15) + return 1; + regs->uregs[reg] = current_thread_info()->tp_value; + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook arm_mrc_hook = { + .instr_mask = 0x0fff0fff, + .instr_val = 0x0e1d0f70, + .cpsr_mask = PSR_T_BIT, + .cpsr_val = 0, + .fn = get_tp_trap, +}; + +static int __init arm_mrc_hook_init(void) +{ + register_undef_hook(&arm_mrc_hook); + return 0; +} + +late_initcall(arm_mrc_hook_init); + +#endif + void __bad_xchg(volatile void *ptr, int size) { printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n", @@ -578,9 +621,19 @@ EXPORT_SYMBOL(abort); void __init trap_init(void) { - extern void __trap_init(void); + extern char __stubs_start[], __stubs_end[]; + extern char __vectors_start[], __vectors_end[]; + extern char __kuser_helper_start[], __kuser_helper_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; - __trap_init(); + /* + * Copy the vectors, stubs and kuser helpers (in entry-armv.S) + * into the vector page, mapped at 0xffff0000, and ensure these + * are visible to the instruction stream. + */ + memcpy((void *)0xffff0000, __vectors_start, __vectors_end - __vectors_start); + memcpy((void *)0xffff0200, __stubs_start, __stubs_end - __stubs_start); + memcpy((void *)0xffff1000 - kuser_sz, __kuser_helper_start, kuser_sz); flush_icache_range(0xffff0000, 0xffff0000 + PAGE_SIZE); modify_domain(DOMAIN_USER, DOMAIN_CLIENT); } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index a39c6a42d68a..ad2d66c93a5c 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -5,6 +5,7 @@ #include <asm-generic/vmlinux.lds.h> #include <linux/config.h> +#include <asm/thread_info.h> OUTPUT_ARCH(arm) ENTRY(stext) @@ -103,7 +104,7 @@ SECTIONS __data_loc = ALIGN(4); /* location in binary */ . = DATAADDR; #else - . = ALIGN(8192); + . = ALIGN(THREAD_SIZE); __data_loc = .; #endif diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig index f6e676322ca9..45c930ccd064 100644 --- a/arch/arm/mach-clps711x/Kconfig +++ b/arch/arm/mach-clps711x/Kconfig @@ -10,6 +10,7 @@ config ARCH_AUTCPU12 config ARCH_CDB89712 bool "CDB89712" + select ISA help This is an evaluation board from Cirrus for the CS89712 processor. The board includes 2 serial ports, Ethernet, IRDA, and expansion @@ -26,6 +27,8 @@ config ARCH_CLEP7312 config ARCH_EDB7211 bool "EDB7211" + select ISA + select DISCONTIGMEM help Say Y here if you intend to run this kernel on a Cirrus Logic EDB-7211 evaluation board. diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig index 1090c680b6dd..324d9edeec38 100644 --- a/arch/arm/mach-footbridge/Kconfig +++ b/arch/arm/mach-footbridge/Kconfig @@ -5,6 +5,9 @@ menu "Footbridge Implementations" config ARCH_CATS bool "CATS" select FOOTBRIDGE_HOST + select ISA + select ISA_DMA + select PCI help Say Y here if you intend to run this kernel on the CATS. @@ -13,6 +16,9 @@ config ARCH_CATS config ARCH_PERSONAL_SERVER bool "Compaq Personal Server" select FOOTBRIDGE_HOST + select ISA + select ISA_DMA + select PCI ---help--- Say Y here if you intend to run this kernel on the Compaq Personal Server. @@ -42,6 +48,9 @@ config ARCH_EBSA285_HOST bool "EBSA285 (host mode)" select ARCH_EBSA285 select FOOTBRIDGE_HOST + select ISA + select ISA_DMA + select PCI help Say Y here if you intend to run this kernel on the EBSA285 card in host ("central function") mode. @@ -51,6 +60,9 @@ config ARCH_EBSA285_HOST config ARCH_NETWINDER bool "NetWinder" select FOOTBRIDGE_HOST + select ISA + select ISA_DMA + select PCI help Say Y here if you intend to run this kernel on the Rebel.COM NetWinder. Information about this machine can be found at: diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index ec85813ee5dc..cddd194ac6eb 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -4,6 +4,7 @@ menu "IMX Implementations" config ARCH_MX1ADS bool "mx1ads" depends on ARCH_IMX + select ISA help Say Y here if you are using the Motorola MX1ADS board diff --git a/arch/arm/mach-imx/generic.c b/arch/arm/mach-imx/generic.c index 54377d0f578c..41e5849ae8da 100644 --- a/arch/arm/mach-imx/generic.c +++ b/arch/arm/mach-imx/generic.c @@ -26,6 +26,7 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> +#include <asm/arch/imxfb.h> #include <asm/hardware.h> #include <asm/mach/map.h> @@ -228,6 +229,14 @@ static struct platform_device imx_uart2_device = { .resource = imx_uart2_resources, }; +static struct imxfb_mach_info imx_fb_info; + +void __init set_imx_fb_info(struct imxfb_mach_info *hard_imx_fb_info) +{ + memcpy(&imx_fb_info,hard_imx_fb_info,sizeof(struct imxfb_mach_info)); +} +EXPORT_SYMBOL(set_imx_fb_info); + static struct resource imxfb_resources[] = { [0] = { .start = 0x00205000, @@ -241,9 +250,16 @@ static struct resource imxfb_resources[] = { }, }; +static u64 fb_dma_mask = ~(u64)0; + static struct platform_device imxfb_device = { .name = "imx-fb", .id = 0, + .dev = { + .platform_data = &imx_fb_info, + .dma_mask = &fb_dma_mask, + .coherent_dma_mask = 0xffffffff, + }, .num_resources = ARRAY_SIZE(imxfb_resources), .resource = imxfb_resources, }; diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 86c50c3889b7..bd17b5154311 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -216,7 +216,9 @@ integrator_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) write_seqlock(&xtime_lock); - // ...clear the interrupt + /* + * clear the interrupt + */ timer1->TimerClear = 1; timer_tick(regs); @@ -264,7 +266,7 @@ void __init integrator_time_init(unsigned long reload, unsigned int ctrl) timer1->TimerValue = timer_reload; timer1->TimerControl = timer_ctrl; - /* + /* * Make irqs happen for the system timer */ setup_irq(IRQ_TIMERINT1, &integrator_timer_irq); diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 68e15c36e336..3b948e8c2751 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -420,7 +420,22 @@ static struct clcd_panel vga = { */ static void cp_clcd_enable(struct clcd_fb *fb) { - cm_control(CM_CTRL_LCDMUXSEL_MASK, CM_CTRL_LCDMUXSEL_VGA); + u32 val; + + if (fb->fb.var.bits_per_pixel <= 8) + val = CM_CTRL_LCDMUXSEL_VGA_8421BPP; + else if (fb->fb.var.bits_per_pixel <= 16) + val = CM_CTRL_LCDMUXSEL_VGA_16BPP; + else + val = 0; /* no idea for this, don't trust the docs */ + + cm_control(CM_CTRL_LCDMUXSEL_MASK| + CM_CTRL_LCDEN0| + CM_CTRL_LCDEN1| + CM_CTRL_STATIC1| + CM_CTRL_STATIC2| + CM_CTRL_STATIC| + CM_CTRL_n24BITEN, val); } static unsigned long framesize = SZ_1M; diff --git a/arch/arm/mach-integrator/leds.c b/arch/arm/mach-integrator/leds.c index 9d182b77b312..d2c0ab21150c 100644 --- a/arch/arm/mach-integrator/leds.c +++ b/arch/arm/mach-integrator/leds.c @@ -37,7 +37,7 @@ static void integrator_leds_event(led_event_t ledevt) unsigned long flags; const unsigned int dbg_base = IO_ADDRESS(INTEGRATOR_DBG_BASE); unsigned int update_alpha_leds; - + // yup, change the LEDs local_irq_save(flags); update_alpha_leds = 0; diff --git a/arch/arm/mach-integrator/time.c b/arch/arm/mach-integrator/time.c index 20729de2af28..1a844ca139e0 100644 --- a/arch/arm/mach-integrator/time.c +++ b/arch/arm/mach-integrator/time.c @@ -40,25 +40,32 @@ static int integrator_set_rtc(void) return 1; } -static void rtc_read_alarm(struct rtc_wkalrm *alrm) +static int rtc_read_alarm(struct rtc_wkalrm *alrm) { rtc_time_to_tm(readl(rtc_base + RTC_MR), &alrm->time); + return 0; } -static int rtc_set_alarm(struct rtc_wkalrm *alrm) +static inline int rtc_set_alarm(struct rtc_wkalrm *alrm) { unsigned long time; int ret; - ret = rtc_tm_to_time(&alrm->time, &time); + /* + * At the moment, we can only deal with non-wildcarded alarm times. + */ + ret = rtc_valid_tm(&alrm->time); + if (ret == 0) + ret = rtc_tm_to_time(&alrm->time, &time); if (ret == 0) writel(time, rtc_base + RTC_MR); return ret; } -static void rtc_read_time(struct rtc_time *tm) +static int rtc_read_time(struct rtc_time *tm) { rtc_time_to_tm(readl(rtc_base + RTC_DR), tm); + return 0; } /* @@ -69,7 +76,7 @@ static void rtc_read_time(struct rtc_time *tm) * edge of the 1Hz clock, we must write the time one second * in advance. */ -static int rtc_set_time(struct rtc_time *tm) +static inline int rtc_set_time(struct rtc_time *tm) { unsigned long time; int ret; diff --git a/arch/arm/mach-ixp2000/ixdp2800.c b/arch/arm/mach-ixp2000/ixdp2800.c index c4683aaff84a..aec13c7108a9 100644 --- a/arch/arm/mach-ixp2000/ixdp2800.c +++ b/arch/arm/mach-ixp2000/ixdp2800.c @@ -65,19 +65,102 @@ static struct sys_timer ixdp2800_timer = { /************************************************************************* * IXDP2800 PCI *************************************************************************/ +static void __init ixdp2800_slave_disable_pci_master(void) +{ + *IXP2000_PCI_CMDSTAT &= ~(PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY); +} + +static void __init ixdp2800_master_wait_for_slave(void) +{ + volatile u32 *addr; + + printk(KERN_INFO "IXDP2800: waiting for slave NPU to configure " + "its BAR sizes\n"); + + addr = ixp2000_pci_config_addr(0, IXDP2X00_SLAVE_NPU_DEVFN, + PCI_BASE_ADDRESS_1); + do { + *addr = 0xffffffff; + cpu_relax(); + } while (*addr != 0xfe000008); + + addr = ixp2000_pci_config_addr(0, IXDP2X00_SLAVE_NPU_DEVFN, + PCI_BASE_ADDRESS_2); + do { + *addr = 0xffffffff; + cpu_relax(); + } while (*addr != 0xc0000008); + + /* + * Configure the slave's SDRAM BAR by hand. + */ + *addr = 0x40000008; +} + +static void __init ixdp2800_slave_wait_for_master_enable(void) +{ + printk(KERN_INFO "IXDP2800: waiting for master NPU to enable us\n"); + + while ((*IXP2000_PCI_CMDSTAT & PCI_COMMAND_MASTER) == 0) + cpu_relax(); +} + void __init ixdp2800_pci_preinit(void) { printk("ixdp2x00_pci_preinit called\n"); - *IXP2000_PCI_ADDR_EXT = 0x0000e000; + *IXP2000_PCI_ADDR_EXT = 0x0001e000; + + if (!ixdp2x00_master_npu()) + ixdp2800_slave_disable_pci_master(); - *IXP2000_PCI_DRAM_BASE_ADDR_MASK = (0x40000000 - 1) & ~0xfffff; *IXP2000_PCI_SRAM_BASE_ADDR_MASK = (0x2000000 - 1) & ~0x3ffff; + *IXP2000_PCI_DRAM_BASE_ADDR_MASK = (0x40000000 - 1) & ~0xfffff; ixp2000_pci_preinit(); + + if (ixdp2x00_master_npu()) { + /* + * Wait until the slave set its SRAM/SDRAM BAR sizes + * correctly before we proceed to scan and enumerate + * the bus. + */ + ixdp2800_master_wait_for_slave(); + + /* + * We configure the SDRAM BARs by hand because they + * are 1G and fall outside of the regular allocated + * PCI address space. + */ + *IXP2000_PCI_SDRAM_BAR = 0x00000008; + } else { + /* + * Wait for the master to complete scanning the bus + * and assigning resources before we proceed to scan + * the bus ourselves. Set pci=firmware to honor the + * master's resource assignment. + */ + ixdp2800_slave_wait_for_master_enable(); + pcibios_setup("firmware"); + } } -int ixdp2800_pci_setup(int nr, struct pci_sys_data *sys) +/* + * We assign the SDRAM BARs for the two IXP2800 CPUs by hand, outside + * of the regular PCI window, because there's only 512M of outbound PCI + * memory window on each IXP, while we need 1G for each of the BARs. + */ +static void __devinit ixp2800_pci_fixup(struct pci_dev *dev) +{ + if (machine_is_ixdp2800()) { + dev->resource[2].start = 0; + dev->resource[2].end = 0; + dev->resource[2].flags = 0; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IXP2800, ixp2800_pci_fixup); + +static int __init ixdp2800_pci_setup(int nr, struct pci_sys_data *sys) { sys->mem_offset = 0x00000000; @@ -129,22 +212,47 @@ static int __init ixdp2800_pci_map_irq(struct pci_dev *dev, u8 slot, u8 pin) } else return IRQ_IXP2000_PCIB; /* Slave NIC interrupt */ } -static void ixdp2800_pci_postinit(void) +static void __init ixdp2800_master_enable_slave(void) { - struct pci_dev *dev; + volatile u32 *addr; - if (ixdp2x00_master_npu()) { - dev = pci_find_slot(1, IXDP2800_SLAVE_ENET_DEVFN); - pci_remove_bus_device(dev); - } else { - dev = pci_find_slot(1, IXDP2800_MASTER_ENET_DEVFN); - pci_remove_bus_device(dev); + printk(KERN_INFO "IXDP2800: enabling slave NPU\n"); + + addr = (volatile u32 *)ixp2000_pci_config_addr(0, + IXDP2X00_SLAVE_NPU_DEVFN, + PCI_COMMAND); + + *addr |= PCI_COMMAND_MASTER; +} +static void __init ixdp2800_master_wait_for_slave_bus_scan(void) +{ + volatile u32 *addr; + + printk(KERN_INFO "IXDP2800: waiting for slave to finish bus scan\n"); + + addr = (volatile u32 *)ixp2000_pci_config_addr(0, + IXDP2X00_SLAVE_NPU_DEVFN, + PCI_COMMAND); + while ((*addr & PCI_COMMAND_MEMORY) == 0) + cpu_relax(); +} + +static void __init ixdp2800_slave_signal_bus_scan_completion(void) +{ + printk(KERN_INFO "IXDP2800: bus scan done, signaling master\n"); + *IXP2000_PCI_CMDSTAT |= PCI_COMMAND_MEMORY; +} + +static void __init ixdp2800_pci_postinit(void) +{ + if (!ixdp2x00_master_npu()) { ixdp2x00_slave_pci_postinit(); + ixdp2800_slave_signal_bus_scan_completion(); } } -struct hw_pci ixdp2800_pci __initdata = { +struct __initdata hw_pci ixdp2800_pci __initdata = { .nr_controllers = 1, .setup = ixdp2800_pci_setup, .preinit = ixdp2800_pci_preinit, @@ -155,8 +263,21 @@ struct hw_pci ixdp2800_pci __initdata = { int __init ixdp2800_pci_init(void) { - if (machine_is_ixdp2800()) + if (machine_is_ixdp2800()) { + struct pci_dev *dev; + pci_common_init(&ixdp2800_pci); + if (ixdp2x00_master_npu()) { + dev = pci_find_slot(1, IXDP2800_SLAVE_ENET_DEVFN); + pci_remove_bus_device(dev); + + ixdp2800_master_enable_slave(); + ixdp2800_master_wait_for_slave_bus_scan(); + } else { + dev = pci_find_slot(1, IXDP2800_MASTER_ENET_DEVFN); + pci_remove_bus_device(dev); + } + } return 0; } diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c index 831f8ffb6b61..5ff2f2718c58 100644 --- a/arch/arm/mach-ixp2000/pci.c +++ b/arch/arm/mach-ixp2000/pci.c @@ -37,7 +37,7 @@ static int pci_master_aborts = 0; static int clear_master_aborts(void); -static u32 * +u32 * ixp2000_pci_config_addr(unsigned int bus_nr, unsigned int devfn, int where) { u32 *paddress; @@ -208,15 +208,15 @@ ixp2000_pci_preinit(void) * use our own resource space. */ static struct resource ixp2000_pci_mem_space = { - .start = 0x00000000, + .start = 0xe0000000, .end = 0xffffffff, .flags = IORESOURCE_MEM, .name = "PCI Mem Space" }; static struct resource ixp2000_pci_io_space = { - .start = 0x00000000, - .end = 0xffffffff, + .start = 0x00010000, + .end = 0x0001ffff, .flags = IORESOURCE_IO, .name = "PCI I/O Space" }; diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 94bcdb933e41..aa92e3708838 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -502,15 +502,6 @@ pci_set_dma_mask(struct pci_dev *dev, u64 mask) } int -pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - if (mask >= SZ_64M - 1 ) - return 0; - - return -EIO; -} - -int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) { if (mask >= SZ_64M - 1 ) @@ -520,7 +511,6 @@ pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) } EXPORT_SYMBOL(pci_set_dma_mask); -EXPORT_SYMBOL(pci_dac_set_dma_mask); EXPORT_SYMBOL(pci_set_consistent_dma_mask); EXPORT_SYMBOL(ixp4xx_pci_read); EXPORT_SYMBOL(ixp4xx_pci_write); diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index b1575b8dc1cd..a45aaa115a76 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -220,6 +220,30 @@ static struct platform_device stuart_device = { .id = 2, }; +static struct resource i2c_resources[] = { + { + .start = 0x40301680, + .end = 0x403016a3, + .flags = IORESOURCE_MEM, + }, { + .start = IRQ_I2C, + .end = IRQ_I2C, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device i2c_device = { + .name = "pxa2xx-i2c", + .id = 0, + .resource = i2c_resources, + .num_resources = ARRAY_SIZE(i2c_resources), +}; + +void __init pxa_set_i2c_info(struct i2c_pxa_platform_data *info) +{ + i2c_device.dev.platform_data = info; +} + static struct platform_device *devices[] __initdata = { &pxamci_device, &udc_device, @@ -227,6 +251,7 @@ static struct platform_device *devices[] __initdata = { &ffuart_device, &btuart_device, &stuart_device, + &i2c_device, }; static int __init pxa_init(void) diff --git a/arch/arm/mach-pxa/sleep.S b/arch/arm/mach-pxa/sleep.S index 16cad2c2497c..5786ccad938c 100644 --- a/arch/arm/mach-pxa/sleep.S +++ b/arch/arm/mach-pxa/sleep.S @@ -18,6 +18,11 @@ #include <asm/arch/pxa-regs.h> +#ifdef CONFIG_PXA27x // workaround for Errata 50 +#define MDREFR_KDIV 0x200a4000 // all banks +#define CCCR_SLEEP 0x00000107 // L=7 2N=2 A=0 PPDIS=0 CPDIS=0 +#endif + .text /* @@ -28,7 +33,9 @@ ENTRY(pxa_cpu_suspend) +#ifndef CONFIG_IWMMXT mra r2, r3, acc0 +#endif stmfd sp!, {r2 - r12, lr} @ save registers on stack @ get coprocessor registers @@ -61,14 +68,23 @@ ENTRY(pxa_cpu_suspend) @ prepare value for sleep mode mov r1, #3 @ sleep mode - @ prepare to put SDRAM into self-refresh manually + @ prepare pointer to physical address 0 (virtual mapping in generic.c) + mov r2, #UNCACHED_PHYS_0 + + @ prepare SDRAM refresh settings ldr r4, =MDREFR ldr r5, [r4] + + @ enable SDRAM self-refresh mode orr r5, r5, #MDREFR_SLFRSH - @ prepare pointer to physical address 0 (virtual mapping in generic.c) - mov r2, #UNCACHED_PHYS_0 +#ifdef CONFIG_PXA27x + @ set SDCLKx divide-by-2 bits (this is part of a workaround for Errata 50) + ldr r6, =MDREFR_KDIV + orr r5, r5, r6 +#endif +#ifdef CONFIG_PXA25x @ Intel PXA255 Specification Update notes problems @ about suspending with PXBus operating above 133MHz @ (see Errata 31, GPIO output signals, ... unpredictable in sleep @@ -100,6 +116,18 @@ ENTRY(pxa_cpu_suspend) mov r0, #0 mcr p14, 0, r0, c6, c0, 0 orr r0, r0, #2 @ initiate change bit +#endif +#ifdef CONFIG_PXA27x + @ Intel PXA270 Specification Update notes problems sleeping + @ with core operating above 91 MHz + @ (see Errata 50, ...processor does not exit from sleep...) + + ldr r6, =CCCR + ldr r8, [r6] @ keep original value for resume + + ldr r7, =CCCR_SLEEP @ prepare CCCR sleep value + mov r0, #0x2 @ prepare value for CLKCFG +#endif @ align execution to a cache line b 1f @@ -111,6 +139,7 @@ ENTRY(pxa_cpu_suspend) @ All needed values are now in registers. @ These last instructions should be in cache +#if defined(CONFIG_PXA25x) || defined(CONFIG_PXA27x) @ initiate the frequency change... str r7, [r6] mcr p14, 0, r0, c6, c0, 0 @@ -118,14 +147,27 @@ ENTRY(pxa_cpu_suspend) @ restore the original cpu speed value for resume str r8, [r6] - @ put SDRAM into self-refresh - str r5, [r4] + @ need 6 13-MHz cycles before changing PWRMODE + @ just set frequency to 91-MHz... 6*91/13 = 42 + + mov r0, #42 +10: subs r0, r0, #1 + bne 10b +#endif + + @ Do not reorder... + @ Intel PXA270 Specification Update notes problems performing + @ external accesses after SDRAM is put in self-refresh mode + @ (see Errata 39 ...hangs when entering self-refresh mode) @ force address lines low by reading at physical address 0 ldr r3, [r2] + @ put SDRAM into self-refresh + str r5, [r4] + @ enter sleep mode - mcr p14, 0, r1, c7, c0, 0 + mcr p14, 0, r1, c7, c0, 0 @ PWRMODE 20: b 20b @ loop waiting for sleep @@ -188,7 +230,9 @@ resume_after_mmu: bl cpu_xscale_proc_init #endif ldmfd sp!, {r2, r3} +#ifndef CONFIG_IWMMXT mar acc0, r2, r3 +#endif ldmfd sp!, {r4 - r12, pc} @ return to caller diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 5b670c9ac5ef..c4fc6be629de 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -409,3 +409,24 @@ config CPU_BPREDICT_DISABLE depends on CPU_ARM1020 help Say Y here to disable branch prediction. If unsure, say N. + +config TLS_REG_EMUL + bool + default y if (SMP || CPU_32v6) && (CPU_32v5 || CPU_32v4 || CPU_32v3) + help + We might be running on an ARMv6+ processor which should have the TLS + register but for some reason we can't use it, or maybe an SMP system + using a pre-ARMv6 processor (there are apparently a few prototypes + like that in existence) and therefore access to that register must + be emulated. + +config HAS_TLS_REG + bool + depends on CPU_32v6 + default y if !TLS_REG_EMUL + help + This selects support for the CP15 thread register. + It is defined to be available on ARMv6 or later. If a particular + ARMv6 or later CPU doesn't support it then it must omc;ide "select + TLS_REG_EMUL" along with its other caracteristics. + diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 38b2cbb89beb..8f76f3df7b4c 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -1,5 +1,6 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#include "abort-macro.S" /* * Function: v6_early_abort * @@ -13,11 +14,26 @@ * : sp = pointer to registers * * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. */ .align 5 ENTRY(v6_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR +/* + * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR. + * The test below covers all the write situations, including Java bytecodes + */ + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r3, #PSR_J_BIT @ Java? + movne pc, lr + do_thumb_abort + ldreq r3, [r2] @ read aborted ARM instruction + do_ldrd_abort + tst r3, #1 << 20 @ L = 0 -> write + orreq r1, r1, #1 << 11 @ yes. mov pc, lr diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c index f5a87db8b498..585dfb8e20b9 100644 --- a/arch/arm/mm/mm-armv.c +++ b/arch/arm/mm/mm-armv.c @@ -411,9 +411,10 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY].prot_sect &= ~PMD_BIT4; mem_types[MT_ROM].prot_sect &= ~PMD_BIT4; /* - * Mark cache clean areas read only from SVC mode - * and no access from userspace. + * Mark cache clean areas and XIP ROM read only + * from SVC mode and no access from userspace. */ + mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; } diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig index 3955de5af4c0..6caed90661fc 100644 --- a/arch/arm26/Kconfig +++ b/arch/arm26/Kconfig @@ -89,6 +89,10 @@ config PAGESIZE_16 machine with 4MB of memory. endmenu +config ISA_DMA_API + bool + default y + menu "General setup" # Compressed boot loader in ROM. Yes, we really want to ask about diff --git a/arch/arm26/kernel/ptrace.c b/arch/arm26/kernel/ptrace.c index 2a137146a77c..8a52124de0e1 100644 --- a/arch/arm26/kernel/ptrace.c +++ b/arch/arm26/kernel/ptrace.c @@ -18,6 +18,7 @@ #include <linux/ptrace.h> #include <linux/user.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -591,7 +592,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat case PTRACE_SYSCALL: case PTRACE_CONT: ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -626,7 +627,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat */ case PTRACE_SINGLESTEP: ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; child->ptrace |= PT_SINGLESTEP; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); diff --git a/arch/arm26/mm/small_page.c b/arch/arm26/mm/small_page.c index 77be86cca789..30447106c25f 100644 --- a/arch/arm26/mm/small_page.c +++ b/arch/arm26/mm/small_page.c @@ -92,8 +92,7 @@ static unsigned long __get_small_page(int priority, struct order *order) page = list_entry(order->queue.next, struct page, lru); again: #ifdef PEDANTIC - if (USED_MAP(page) & ~order->all_used) - PAGE_BUG(page); + BUG_ON(USED_MAP(page) & ~order->all_used); #endif offset = ffz(USED_MAP(page)); SET_USED(page, offset); @@ -141,8 +140,7 @@ static void __free_small_page(unsigned long spage, struct order *order) goto non_small; #ifdef PEDANTIC - if (USED_MAP(page) & ~order->all_used) - PAGE_BUG(page); + BUG_ON(USED_MAP(page) & ~order->all_used); #endif spage = spage >> order->shift; diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c index da15db8ae482..581ecabaae53 100644 --- a/arch/cris/arch-v10/kernel/ptrace.c +++ b/arch/cris/arch-v10/kernel/ptrace.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/user.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -184,7 +185,7 @@ sys_ptrace(long request, long pid, long addr, long data) case PTRACE_CONT: ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) { @@ -219,7 +220,7 @@ sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SINGLESTEP: ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c index 2a0efb739adc..cbe03cba9f02 100644 --- a/arch/frv/kernel/ptrace.c +++ b/arch/frv/kernel/ptrace.c @@ -20,6 +20,7 @@ #include <linux/user.h> #include <linux/config.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -239,7 +240,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -267,7 +268,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SINGLESTEP: /* set the trap flag. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); ptrace_enable(child); diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index 5f19d774a288..05c15e869777 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -24,6 +24,7 @@ #include <linux/ptrace.h> #include <linux/user.h> #include <linux/config.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -171,7 +172,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned long) data >= _NSIG) + if (!valid_signal(data)) break ; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -202,7 +203,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 17a0cbce6f30..e382f32d435e 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -183,7 +183,7 @@ config M386 - "Winchip-C6" for original IDT Winchip. - "Winchip-2" for IDT Winchip 2. - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. - - "MediaGX/Geode" for Cyrix MediaGX aka Geode. + - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). @@ -311,12 +311,10 @@ config MWINCHIP3D stores for this CPU, which can increase performance of some operations. -config MGEODE - bool "MediaGX/Geode" +config MGEODEGX1 + bool "GeodeGX1" help - Select this for a Cyrix MediaGX aka Geode chip. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment reqirements. + Select this for a Geode GX1 (Cyrix MediaGX) chip. config MCYRIXIII bool "CyrixIII/VIA-C3" @@ -368,7 +366,7 @@ config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || X86_GENERIC default "4" if X86_ELAN || M486 || M386 - default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE + default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 default "6" if MK7 || MK8 || MPENTIUMM config RWSEM_GENERIC_SPINLOCK @@ -387,7 +385,7 @@ config GENERIC_CALIBRATE_DELAY config X86_PPRO_FENCE bool - depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODE + depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 default y config X86_F00F_BUG @@ -417,7 +415,7 @@ config X86_POPAD_OK config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODE + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 default y config X86_GOOD_APIC @@ -442,7 +440,7 @@ config X86_USE_3DNOW config X86_OOSTORE bool - depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MGEODE) && MTRR + depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR default y config HPET_TIMER @@ -578,7 +576,7 @@ config X86_VISWS_APIC config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODE) && !X86_NUMAQ + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ default y config X86_MCE @@ -653,6 +651,24 @@ config I8K Say Y if you intend to run this kernel on a Dell Inspiron 8000. Say N otherwise. +config X86_REBOOTFIXUPS + bool "Enable X86 board specific fixups for reboot" + depends on X86 + default n + ---help--- + This enables chipset and/or board specific fixups to be done + in order to get reboot to work correctly. This is only needed on + some combinations of hardware and BIOS. The symptom, for which + this config is intended, is when reboot ends with a stalled/hung + system. + + Currently, the only fixup is for the Geode GX1/CS5530A/TROM2.1. + combination. + + Say Y if you want to enable the fixup. Currently, it's safe to + enable this option even if you don't need it. + Say N otherwise. + config MICROCODE tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support" ---help--- @@ -1155,6 +1171,10 @@ source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" +config ISA_DMA_API + bool + default y + config ISA bool "ISA support" depends on !(X86_VOYAGER || X86_VISWS) diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 314c7146e9bf..1c36ca332a96 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -14,7 +14,7 @@ # 19990713 Artur Skawina <skawina@geocities.com> # Added '-march' and '-mpreferred-stack-boundary' support # -# Kianusch Sayah Karadji <kianusch@sk-tech.net> +# 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net> # Added support for GEODE CPU LDFLAGS := -m elf_i386 @@ -54,8 +54,8 @@ cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) # AMD Elan support cflags-$(CONFIG_X86_ELAN) += -march=i486 -# MediaGX aka Geode support -cflags-$(CONFIG_MGEODE) += $(call cc-option,-march=pentium-mmx,-march=i586) +# Geode GX1 support +cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) # -mregparm=3 works ok on gcc-3.0 and later # @@ -123,7 +123,7 @@ AFLAGS += $(mflags-y) boot := arch/i386/boot .PHONY: zImage bzImage compressed zlilo bzlilo \ - zdisk bzdisk fdimage fdimage144 fdimage288 install + zdisk bzdisk fdimage fdimage144 fdimage288 install kernel_install all: bzImage @@ -145,8 +145,9 @@ zdisk bzdisk: vmlinux fdimage fdimage144 fdimage288: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ -install: - $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ +install: vmlinux +install kernel_install: + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install prepare: include/asm-$(ARCH)/asm_offsets.h CLEAN_FILES += include/asm-$(ARCH)/asm_offsets.h diff --git a/arch/i386/boot/bootsect.S b/arch/i386/boot/bootsect.S index ba9fe14db6a9..011b7a4993d4 100644 --- a/arch/i386/boot/bootsect.S +++ b/arch/i386/boot/bootsect.S @@ -83,7 +83,7 @@ bugger_off_msg: .ascii "\n" .ascii "Remove disk and press any key to reboot . . .\r\n" .byte 0 - + # Kernel attributes; used by setup diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index fa67045234a3..cedc55cc47de 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -12,7 +12,6 @@ #include <linux/linkage.h> #include <linux/vmalloc.h> #include <linux/tty.h> -#include <video/edid.h> #include <asm/io.h> /* diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index a934ab32bf8e..caa1fde6904e 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -164,7 +164,7 @@ ramdisk_max: .long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff trampoline: call start_of_setup .align 16 # The offset at this point is 0x240 - .space (0x7ff-0x240+1) # E820 & EDD space (ending at 0x7ff) + .space (0xeff-0x240+1) # E820 & EDD space (ending at 0xeff) # End of setup header ##################################################### start_of_setup: @@ -333,9 +333,9 @@ jmpe820: # sizeof(e820rec). # good820: - movb (E820NR), %al # up to 32 entries + movb (E820NR), %al # up to 128 entries cmpb $E820MAX, %al - jnl bail820 + jae bail820 incb (E820NR) movw %di, %ax diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index 925d3f5a3824..0587477c99f2 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S @@ -1924,36 +1924,36 @@ skip10: movb %ah, %al ret store_edid: - pushw %es # just save all registers - pushw %ax + pushw %es # just save all registers + pushw %ax pushw %bx pushw %cx pushw %dx pushw %di - pushw %fs + pushw %fs popw %es movl $0x13131313, %eax # memset block with 0x13 movw $32, %cx movw $0x140, %di cld - rep - stosl + rep + stosl - movw $0x4f15, %ax # do VBE/DDC + movw $0x4f15, %ax # do VBE/DDC movw $0x01, %bx movw $0x00, %cx movw $0x01, %dx movw $0x140, %di - int $0x10 + int $0x10 - popw %di # restore all registers + popw %di # restore all registers popw %dx popw %cx popw %bx popw %ax - popw %es + popw %es ret # VIDEO_SELECT-only variables diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index aacdae6f372d..0fbcfe00dd8d 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o +obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 53eb5cfd5b63..848bb97af7ca 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -650,7 +650,7 @@ acpi_find_rsdp (void) */ rsdp_phys = acpi_scan_rsdp (0, 0x400); if (!rsdp_phys) - rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF); + rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000); return rsdp_phys; } diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index e3879f7625c2..d509836b70c3 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1265,8 +1265,6 @@ int __init APIC_init_uniprocessor (void) setup_local_APIC(); - if (nmi_watchdog == NMI_LOCAL_APIC) - check_nmi_watchdog(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config) if (!skip_ioapic_setup && nr_ioapics) diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 8d182e875cd7..16dbc4151be4 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -24,7 +24,7 @@ __asm__(".align 4\nvide: ret"); static void __init init_amd(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_SMP int cpu = c == &boot_cpu_data ? 0 : c - cpu_data; #endif u32 l, h; @@ -198,7 +198,7 @@ static void __init init_amd(struct cpuinfo_x86 *c) c->x86_num_cores = 1; } -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_SMP /* * On a AMD dual core setup the lower bits of the APIC id * distingush the cores. Assumes number of cores is a power diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c index 933b0dd62f48..9027a987006b 100644 --- a/arch/i386/kernel/cpu/mtrr/cyrix.c +++ b/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -218,12 +218,12 @@ typedef struct { mtrr_type type; } arr_state_t; -static arr_state_t arr_state[8] __initdata = { +static arr_state_t arr_state[8] __devinitdata = { {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} }; -static unsigned char ccr_state[7] __initdata = { 0, 0, 0, 0, 0, 0, 0 }; +static unsigned char ccr_state[7] __devinitdata = { 0, 0, 0, 0, 0, 0, 0 }; static void cyrix_set_all(void) { diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index 9f7a7ea6388d..f468a979e9aa 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -124,8 +124,8 @@ int generic_get_free_region(unsigned long base, unsigned long size) return -ENOSPC; } -void generic_get_mtrr(unsigned int reg, unsigned long *base, - unsigned int *size, mtrr_type * type) +static void generic_get_mtrr(unsigned int reg, unsigned long *base, + unsigned int *size, mtrr_type * type) { unsigned int mask_lo, mask_hi, base_lo, base_hi; diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 54999e4c55fd..e1c2042b9b7e 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -72,17 +72,21 @@ void set_mtrr_ops(struct mtrr_ops * ops) static int have_wrcomb(void) { struct pci_dev *dev; + u8 rev; if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { - /* ServerWorks LE chipsets have problems with write-combining + /* ServerWorks LE chipsets < rev 6 have problems with write-combining Don't allow it and leave room for other chipsets to be tagged */ if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { - printk(KERN_INFO "mtrr: Serverworks LE detected. Write-combining disabled.\n"); - pci_dev_put(dev); - return 0; + pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + if (rev <= 5) { + printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); + pci_dev_put(dev); + return 0; + } } - /* Intel 450NX errata # 23. Non ascending cachline evictions to + /* Intel 450NX errata # 23. Non ascending cacheline evictions to write combining memory may resulting in data corruption */ if (dev->vendor == PCI_VENDOR_ID_INTEL && dev->device == PCI_DEVICE_ID_INTEL_82451NX) { diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 4f28eba7fb8a..7323c19f354e 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -25,7 +25,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", /* AMD-defined */ - "pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow", diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3c73dc865ead..a991d4e5edd2 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -260,11 +260,9 @@ restore_nocheck: .section .fixup,"ax" iret_exc: sti - movl $__USER_DS, %edx - movl %edx, %ds - movl %edx, %es - movl $11,%eax - call do_exit + pushl $0 # no error code + pushl $do_iret_error + jmp error_code .previous .section __ex_table,"a" .align 4 @@ -516,8 +514,6 @@ debug_stack_correct: xorl %edx,%edx # error code 0 movl %esp,%eax # pt_regs pointer call do_debug - testl %eax,%eax - jnz restore_all jmp ret_from_exception /* @@ -598,8 +594,6 @@ ENTRY(int3) xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 - testl %eax,%eax - jnz restore_all jmp ret_from_exception ENTRY(overflow) @@ -658,296 +652,6 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code -.data -ENTRY(sys_call_table) - .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ - .long sys_exit - .long sys_fork - .long sys_read - .long sys_write - .long sys_open /* 5 */ - .long sys_close - .long sys_waitpid - .long sys_creat - .long sys_link - .long sys_unlink /* 10 */ - .long sys_execve - .long sys_chdir - .long sys_time - .long sys_mknod - .long sys_chmod /* 15 */ - .long sys_lchown16 - .long sys_ni_syscall /* old break syscall holder */ - .long sys_stat - .long sys_lseek - .long sys_getpid /* 20 */ - .long sys_mount - .long sys_oldumount - .long sys_setuid16 - .long sys_getuid16 - .long sys_stime /* 25 */ - .long sys_ptrace - .long sys_alarm - .long sys_fstat - .long sys_pause - .long sys_utime /* 30 */ - .long sys_ni_syscall /* old stty syscall holder */ - .long sys_ni_syscall /* old gtty syscall holder */ - .long sys_access - .long sys_nice - .long sys_ni_syscall /* 35 - old ftime syscall holder */ - .long sys_sync - .long sys_kill - .long sys_rename - .long sys_mkdir - .long sys_rmdir /* 40 */ - .long sys_dup - .long sys_pipe - .long sys_times - .long sys_ni_syscall /* old prof syscall holder */ - .long sys_brk /* 45 */ - .long sys_setgid16 - .long sys_getgid16 - .long sys_signal - .long sys_geteuid16 - .long sys_getegid16 /* 50 */ - .long sys_acct - .long sys_umount /* recycled never used phys() */ - .long sys_ni_syscall /* old lock syscall holder */ - .long sys_ioctl - .long sys_fcntl /* 55 */ - .long sys_ni_syscall /* old mpx syscall holder */ - .long sys_setpgid - .long sys_ni_syscall /* old ulimit syscall holder */ - .long sys_olduname - .long sys_umask /* 60 */ - .long sys_chroot - .long sys_ustat - .long sys_dup2 - .long sys_getppid - .long sys_getpgrp /* 65 */ - .long sys_setsid - .long sys_sigaction - .long sys_sgetmask - .long sys_ssetmask - .long sys_setreuid16 /* 70 */ - .long sys_setregid16 - .long sys_sigsuspend - .long sys_sigpending - .long sys_sethostname - .long sys_setrlimit /* 75 */ - .long sys_old_getrlimit - .long sys_getrusage - .long sys_gettimeofday - .long sys_settimeofday - .long sys_getgroups16 /* 80 */ - .long sys_setgroups16 - .long old_select - .long sys_symlink - .long sys_lstat - .long sys_readlink /* 85 */ - .long sys_uselib - .long sys_swapon - .long sys_reboot - .long old_readdir - .long old_mmap /* 90 */ - .long sys_munmap - .long sys_truncate - .long sys_ftruncate - .long sys_fchmod - .long sys_fchown16 /* 95 */ - .long sys_getpriority - .long sys_setpriority - .long sys_ni_syscall /* old profil syscall holder */ - .long sys_statfs - .long sys_fstatfs /* 100 */ - .long sys_ioperm - .long sys_socketcall - .long sys_syslog - .long sys_setitimer - .long sys_getitimer /* 105 */ - .long sys_newstat - .long sys_newlstat - .long sys_newfstat - .long sys_uname - .long sys_iopl /* 110 */ - .long sys_vhangup - .long sys_ni_syscall /* old "idle" system call */ - .long sys_vm86old - .long sys_wait4 - .long sys_swapoff /* 115 */ - .long sys_sysinfo - .long sys_ipc - .long sys_fsync - .long sys_sigreturn - .long sys_clone /* 120 */ - .long sys_setdomainname - .long sys_newuname - .long sys_modify_ldt - .long sys_adjtimex - .long sys_mprotect /* 125 */ - .long sys_sigprocmask - .long sys_ni_syscall /* old "create_module" */ - .long sys_init_module - .long sys_delete_module - .long sys_ni_syscall /* 130: old "get_kernel_syms" */ - .long sys_quotactl - .long sys_getpgid - .long sys_fchdir - .long sys_bdflush - .long sys_sysfs /* 135 */ - .long sys_personality - .long sys_ni_syscall /* reserved for afs_syscall */ - .long sys_setfsuid16 - .long sys_setfsgid16 - .long sys_llseek /* 140 */ - .long sys_getdents - .long sys_select - .long sys_flock - .long sys_msync - .long sys_readv /* 145 */ - .long sys_writev - .long sys_getsid - .long sys_fdatasync - .long sys_sysctl - .long sys_mlock /* 150 */ - .long sys_munlock - .long sys_mlockall - .long sys_munlockall - .long sys_sched_setparam - .long sys_sched_getparam /* 155 */ - .long sys_sched_setscheduler - .long sys_sched_getscheduler - .long sys_sched_yield - .long sys_sched_get_priority_max - .long sys_sched_get_priority_min /* 160 */ - .long sys_sched_rr_get_interval - .long sys_nanosleep - .long sys_mremap - .long sys_setresuid16 - .long sys_getresuid16 /* 165 */ - .long sys_vm86 - .long sys_ni_syscall /* Old sys_query_module */ - .long sys_poll - .long sys_nfsservctl - .long sys_setresgid16 /* 170 */ - .long sys_getresgid16 - .long sys_prctl - .long sys_rt_sigreturn - .long sys_rt_sigaction - .long sys_rt_sigprocmask /* 175 */ - .long sys_rt_sigpending - .long sys_rt_sigtimedwait - .long sys_rt_sigqueueinfo - .long sys_rt_sigsuspend - .long sys_pread64 /* 180 */ - .long sys_pwrite64 - .long sys_chown16 - .long sys_getcwd - .long sys_capget - .long sys_capset /* 185 */ - .long sys_sigaltstack - .long sys_sendfile - .long sys_ni_syscall /* reserved for streams1 */ - .long sys_ni_syscall /* reserved for streams2 */ - .long sys_vfork /* 190 */ - .long sys_getrlimit - .long sys_mmap2 - .long sys_truncate64 - .long sys_ftruncate64 - .long sys_stat64 /* 195 */ - .long sys_lstat64 - .long sys_fstat64 - .long sys_lchown - .long sys_getuid - .long sys_getgid /* 200 */ - .long sys_geteuid - .long sys_getegid - .long sys_setreuid - .long sys_setregid - .long sys_getgroups /* 205 */ - .long sys_setgroups - .long sys_fchown - .long sys_setresuid - .long sys_getresuid - .long sys_setresgid /* 210 */ - .long sys_getresgid - .long sys_chown - .long sys_setuid - .long sys_setgid - .long sys_setfsuid /* 215 */ - .long sys_setfsgid - .long sys_pivot_root - .long sys_mincore - .long sys_madvise - .long sys_getdents64 /* 220 */ - .long sys_fcntl64 - .long sys_ni_syscall /* reserved for TUX */ - .long sys_ni_syscall - .long sys_gettid - .long sys_readahead /* 225 */ - .long sys_setxattr - .long sys_lsetxattr - .long sys_fsetxattr - .long sys_getxattr - .long sys_lgetxattr /* 230 */ - .long sys_fgetxattr - .long sys_listxattr - .long sys_llistxattr - .long sys_flistxattr - .long sys_removexattr /* 235 */ - .long sys_lremovexattr - .long sys_fremovexattr - .long sys_tkill - .long sys_sendfile64 - .long sys_futex /* 240 */ - .long sys_sched_setaffinity - .long sys_sched_getaffinity - .long sys_set_thread_area - .long sys_get_thread_area - .long sys_io_setup /* 245 */ - .long sys_io_destroy - .long sys_io_getevents - .long sys_io_submit - .long sys_io_cancel - .long sys_fadvise64 /* 250 */ - .long sys_ni_syscall - .long sys_exit_group - .long sys_lookup_dcookie - .long sys_epoll_create - .long sys_epoll_ctl /* 255 */ - .long sys_epoll_wait - .long sys_remap_file_pages - .long sys_set_tid_address - .long sys_timer_create - .long sys_timer_settime /* 260 */ - .long sys_timer_gettime - .long sys_timer_getoverrun - .long sys_timer_delete - .long sys_clock_settime - .long sys_clock_gettime /* 265 */ - .long sys_clock_getres - .long sys_clock_nanosleep - .long sys_statfs64 - .long sys_fstatfs64 - .long sys_tgkill /* 270 */ - .long sys_utimes - .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ - .long sys_mbind - .long sys_get_mempolicy - .long sys_set_mempolicy - .long sys_mq_open - .long sys_mq_unlink - .long sys_mq_timedsend - .long sys_mq_timedreceive /* 280 */ - .long sys_mq_notify - .long sys_mq_getsetattr - .long sys_ni_syscall /* reserved for kexec */ - .long sys_waitid - .long sys_ni_syscall /* 285 */ /* available */ - .long sys_add_key - .long sys_request_key - .long sys_keyctl +#include "syscall_table.S" syscall_table_size=(.-sys_call_table) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index d273fd746192..e966fc8c44c4 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -380,6 +380,7 @@ rp_sidt: ALIGN ignore_int: cld +#ifdef CONFIG_PRINTK pushl %eax pushl %ecx pushl %edx @@ -400,6 +401,7 @@ ignore_int: popl %edx popl %ecx popl %eax +#endif iret /* diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 14ec354bec92..903190a4b3ff 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -169,10 +169,6 @@ EXPORT_SYMBOL(rtc_lock); EXPORT_SYMBOL_GPL(set_nmi_callback); EXPORT_SYMBOL_GPL(unset_nmi_callback); -#undef memcmp -extern int memcmp(const void *,const void *,__kernel_size_t); -EXPORT_SYMBOL(memcmp); - EXPORT_SYMBOL(register_die_notifier); #ifdef CONFIG_HAVE_DEC_LOCK EXPORT_SYMBOL(_atomic_dec_and_lock); diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 5e0d55be5435..7a324e8b86f9 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -2175,7 +2175,6 @@ static inline void check_timer(void) disable_8259A_irq(0); setup_nmi(); enable_8259A_irq(0); - check_nmi_watchdog(); } return; } @@ -2198,7 +2197,6 @@ static inline void check_timer(void) add_pin_to_irq(0, 0, pin2); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); - check_nmi_watchdog(); } return; } diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 671681659243..59ff9b455069 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -217,6 +217,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) *tos &= ~(TF_MASK | IF_MASK); *tos |= kprobe_old_eflags; break; + case 0xc3: /* ret/lret */ + case 0xcb: + case 0xc2: + case 0xca: + regs->eflags &= ~TF_MASK; + /* eip is already adjusted, no more changes required*/ + return; case 0xe8: /* call relative - Fix return addr */ *tos = orig_eip + (*tos - copy_eip); break; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 2f89d000f954..2c0ee9c2d020 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -102,20 +102,21 @@ int nmi_active; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) -int __init check_nmi_watchdog (void) +static int __init check_nmi_watchdog(void) { unsigned int prev_nmi_count[NR_CPUS]; int cpu; - printk(KERN_INFO "testing NMI watchdog ... "); + if (nmi_watchdog == NMI_NONE) + return 0; + + printk(KERN_INFO "Testing NMI watchdog ... "); for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - /* FIXME: Only boot CPU is online at this stage. Check CPUs - as they come up. */ for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_SMP /* Check cpu_callin_map here because that is set @@ -139,6 +140,8 @@ int __init check_nmi_watchdog (void) return 0; } +/* This needs to happen later in boot so counters are working */ +late_initcall(check_nmi_watchdog); static int __init setup_nmi_watchdog(char *str) { diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index b2203e21acb3..96e3ea6b17c7 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -400,11 +400,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, int err; childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; - *childregs = *regs; - childregs->eax = 0; - childregs->esp = esp; - - p->thread.esp = (unsigned long) childregs; /* * The below -8 is to reserve 8 bytes on top of the ring0 stack. * This is necessary to guarantee that the entire "struct pt_regs" @@ -415,7 +410,13 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, * "struct pt_regs" is possible, but they may contain the * completely wrong values. */ - p->thread.esp0 = (unsigned long) (childregs+1) - 8; + childregs = (struct pt_regs *) ((unsigned long) childregs - 8); + *childregs = *regs; + childregs->eax = 0; + childregs->esp = esp; + + p->thread.esp = (unsigned long) childregs; + p->thread.esp0 = (unsigned long) (childregs+1); p->thread.eip = (unsigned long) ret_from_fork; @@ -611,8 +612,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas * Save away %fs and %gs. No need to save %es and %ds, as * those are always kernel segments while inside the kernel. */ - asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs)); - asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs)); + asm volatile("mov %%fs,%0":"=m" (prev->fs)); + asm volatile("mov %%gs,%0":"=m" (prev->gs)); /* * Restore %fs and %gs if needed. diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index b2f17640ceff..e34f651fa13c 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -16,6 +16,7 @@ #include <linux/security.h> #include <linux/audit.h> #include <linux/seccomp.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -511,7 +512,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -543,7 +544,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SINGLESTEP: /* set the trap flag. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_singlestep(child); @@ -682,24 +683,18 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit) /* do the secure computing check first */ secure_computing(regs->orig_eax); - if (unlikely(current->audit_context)) { - if (!entryexit) - audit_syscall_entry(current, regs->orig_eax, - regs->ebx, regs->ecx, - regs->edx, regs->esi); - else - audit_syscall_exit(current, regs->eax); - } + if (unlikely(current->audit_context) && entryexit) + audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax); if (!(current->ptrace & PT_PTRACED)) - return; + goto out; /* Fake a debug trap */ if (test_thread_flag(TIF_SINGLESTEP)) send_sigtrap(current, regs, 0); if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + goto out; /* the 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ @@ -714,4 +709,9 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + out: + if (unlikely(current->audit_context) && !entryexit) + audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax, + regs->ebx, regs->ecx, regs->edx, regs->esi); + } diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 3d7e994563df..6dc27eb70ee7 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -13,6 +13,7 @@ #include <asm/uaccess.h> #include <asm/apic.h> #include "mach_reboot.h" +#include <linux/reboot_fixups.h> /* * Power off function, if any @@ -348,6 +349,7 @@ void machine_restart(char * __unused) /* rebooting needs to touch the page at absolute addr 0 */ *((unsigned short *)__va(0x472)) = reboot_mode; for (;;) { + mach_reboot_fixups(); /* for board specific fixups */ mach_reboot(); /* That didn't work - force a triple fault.. */ __asm__ __volatile__("lidt %0": :"m" (no_idt)); diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c new file mode 100644 index 000000000000..1b183b378c2c --- /dev/null +++ b/arch/i386/kernel/reboot_fixups.c @@ -0,0 +1,56 @@ +/* + * linux/arch/i386/kernel/reboot_fixups.c + * + * This is a good place to put board specific reboot fixups. + * + * List of supported fixups: + * geode-gx1/cs5530a - Jaya Kumar <jayalk@intworks.biz> + * + */ + +#include <asm/delay.h> +#include <linux/pci.h> + +static void cs5530a_warm_reset(struct pci_dev *dev) +{ + /* writing 1 to the reset control register, 0x44 causes the + cs5530a to perform a system warm reset */ + pci_write_config_byte(dev, 0x44, 0x1); + udelay(50); /* shouldn't get here but be safe and spin-a-while */ + return; +} + +struct device_fixup { + unsigned int vendor; + unsigned int device; + void (*reboot_fixup)(struct pci_dev *); +}; + +static struct device_fixup fixups_table[] = { +{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, +}; + +/* + * we see if any fixup is available for our current hardware. if there + * is a fixup, we call it and we expect to never return from it. if we + * do return, we keep looking and then eventually fall back to the + * standard mach_reboot on return. + */ +void mach_reboot_fixups(void) +{ + struct device_fixup *cur; + struct pci_dev *dev; + int i; + + for (i=0; i < (sizeof(fixups_table)/sizeof(fixups_table[0])); i++) { + cur = &(fixups_table[i]); + dev = pci_get_device(cur->vendor, cur->device, 0); + if (!dev) + continue; + + cur->reboot_fixup(dev); + } + + printk(KERN_WARNING "No reboot fixup found for your hardware\n"); +} + diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index fd36d2f65f88..cbea7ac582e5 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1089,9 +1089,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) } } - if (nmi_watchdog == NMI_LOCAL_APIC) - check_nmi_watchdog(); - smpboot_setup_io_apic(); setup_boot_APIC_clock(); diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S new file mode 100644 index 000000000000..6cd1ed311f02 --- /dev/null +++ b/arch/i386/kernel/syscall_table.S @@ -0,0 +1,291 @@ +.data +ENTRY(sys_call_table) + .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ + .long sys_exit + .long sys_fork + .long sys_read + .long sys_write + .long sys_open /* 5 */ + .long sys_close + .long sys_waitpid + .long sys_creat + .long sys_link + .long sys_unlink /* 10 */ + .long sys_execve + .long sys_chdir + .long sys_time + .long sys_mknod + .long sys_chmod /* 15 */ + .long sys_lchown16 + .long sys_ni_syscall /* old break syscall holder */ + .long sys_stat + .long sys_lseek + .long sys_getpid /* 20 */ + .long sys_mount + .long sys_oldumount + .long sys_setuid16 + .long sys_getuid16 + .long sys_stime /* 25 */ + .long sys_ptrace + .long sys_alarm + .long sys_fstat + .long sys_pause + .long sys_utime /* 30 */ + .long sys_ni_syscall /* old stty syscall holder */ + .long sys_ni_syscall /* old gtty syscall holder */ + .long sys_access + .long sys_nice + .long sys_ni_syscall /* 35 - old ftime syscall holder */ + .long sys_sync + .long sys_kill + .long sys_rename + .long sys_mkdir + .long sys_rmdir /* 40 */ + .long sys_dup + .long sys_pipe + .long sys_times + .long sys_ni_syscall /* old prof syscall holder */ + .long sys_brk /* 45 */ + .long sys_setgid16 + .long sys_getgid16 + .long sys_signal + .long sys_geteuid16 + .long sys_getegid16 /* 50 */ + .long sys_acct + .long sys_umount /* recycled never used phys() */ + .long sys_ni_syscall /* old lock syscall holder */ + .long sys_ioctl + .long sys_fcntl /* 55 */ + .long sys_ni_syscall /* old mpx syscall holder */ + .long sys_setpgid + .long sys_ni_syscall /* old ulimit syscall holder */ + .long sys_olduname + .long sys_umask /* 60 */ + .long sys_chroot + .long sys_ustat + .long sys_dup2 + .long sys_getppid + .long sys_getpgrp /* 65 */ + .long sys_setsid + .long sys_sigaction + .long sys_sgetmask + .long sys_ssetmask + .long sys_setreuid16 /* 70 */ + .long sys_setregid16 + .long sys_sigsuspend + .long sys_sigpending + .long sys_sethostname + .long sys_setrlimit /* 75 */ + .long sys_old_getrlimit + .long sys_getrusage + .long sys_gettimeofday + .long sys_settimeofday + .long sys_getgroups16 /* 80 */ + .long sys_setgroups16 + .long old_select + .long sys_symlink + .long sys_lstat + .long sys_readlink /* 85 */ + .long sys_uselib + .long sys_swapon + .long sys_reboot + .long old_readdir + .long old_mmap /* 90 */ + .long sys_munmap + .long sys_truncate + .long sys_ftruncate + .long sys_fchmod + .long sys_fchown16 /* 95 */ + .long sys_getpriority + .long sys_setpriority + .long sys_ni_syscall /* old profil syscall holder */ + .long sys_statfs + .long sys_fstatfs /* 100 */ + .long sys_ioperm + .long sys_socketcall + .long sys_syslog + .long sys_setitimer + .long sys_getitimer /* 105 */ + .long sys_newstat + .long sys_newlstat + .long sys_newfstat + .long sys_uname + .long sys_iopl /* 110 */ + .long sys_vhangup + .long sys_ni_syscall /* old "idle" system call */ + .long sys_vm86old + .long sys_wait4 + .long sys_swapoff /* 115 */ + .long sys_sysinfo + .long sys_ipc + .long sys_fsync + .long sys_sigreturn + .long sys_clone /* 120 */ + .long sys_setdomainname + .long sys_newuname + .long sys_modify_ldt + .long sys_adjtimex + .long sys_mprotect /* 125 */ + .long sys_sigprocmask + .long sys_ni_syscall /* old "create_module" */ + .long sys_init_module + .long sys_delete_module + .long sys_ni_syscall /* 130: old "get_kernel_syms" */ + .long sys_quotactl + .long sys_getpgid + .long sys_fchdir + .long sys_bdflush + .long sys_sysfs /* 135 */ + .long sys_personality + .long sys_ni_syscall /* reserved for afs_syscall */ + .long sys_setfsuid16 + .long sys_setfsgid16 + .long sys_llseek /* 140 */ + .long sys_getdents + .long sys_select + .long sys_flock + .long sys_msync + .long sys_readv /* 145 */ + .long sys_writev + .long sys_getsid + .long sys_fdatasync + .long sys_sysctl + .long sys_mlock /* 150 */ + .long sys_munlock + .long sys_mlockall + .long sys_munlockall + .long sys_sched_setparam + .long sys_sched_getparam /* 155 */ + .long sys_sched_setscheduler + .long sys_sched_getscheduler + .long sys_sched_yield + .long sys_sched_get_priority_max + .long sys_sched_get_priority_min /* 160 */ + .long sys_sched_rr_get_interval + .long sys_nanosleep + .long sys_mremap + .long sys_setresuid16 + .long sys_getresuid16 /* 165 */ + .long sys_vm86 + .long sys_ni_syscall /* Old sys_query_module */ + .long sys_poll + .long sys_nfsservctl + .long sys_setresgid16 /* 170 */ + .long sys_getresgid16 + .long sys_prctl + .long sys_rt_sigreturn + .long sys_rt_sigaction + .long sys_rt_sigprocmask /* 175 */ + .long sys_rt_sigpending + .long sys_rt_sigtimedwait + .long sys_rt_sigqueueinfo + .long sys_rt_sigsuspend + .long sys_pread64 /* 180 */ + .long sys_pwrite64 + .long sys_chown16 + .long sys_getcwd + .long sys_capget + .long sys_capset /* 185 */ + .long sys_sigaltstack + .long sys_sendfile + .long sys_ni_syscall /* reserved for streams1 */ + .long sys_ni_syscall /* reserved for streams2 */ + .long sys_vfork /* 190 */ + .long sys_getrlimit + .long sys_mmap2 + .long sys_truncate64 + .long sys_ftruncate64 + .long sys_stat64 /* 195 */ + .long sys_lstat64 + .long sys_fstat64 + .long sys_lchown + .long sys_getuid + .long sys_getgid /* 200 */ + .long sys_geteuid + .long sys_getegid + .long sys_setreuid + .long sys_setregid + .long sys_getgroups /* 205 */ + .long sys_setgroups + .long sys_fchown + .long sys_setresuid + .long sys_getresuid + .long sys_setresgid /* 210 */ + .long sys_getresgid + .long sys_chown + .long sys_setuid + .long sys_setgid + .long sys_setfsuid /* 215 */ + .long sys_setfsgid + .long sys_pivot_root + .long sys_mincore + .long sys_madvise + .long sys_getdents64 /* 220 */ + .long sys_fcntl64 + .long sys_ni_syscall /* reserved for TUX */ + .long sys_ni_syscall + .long sys_gettid + .long sys_readahead /* 225 */ + .long sys_setxattr + .long sys_lsetxattr + .long sys_fsetxattr + .long sys_getxattr + .long sys_lgetxattr /* 230 */ + .long sys_fgetxattr + .long sys_listxattr + .long sys_llistxattr + .long sys_flistxattr + .long sys_removexattr /* 235 */ + .long sys_lremovexattr + .long sys_fremovexattr + .long sys_tkill + .long sys_sendfile64 + .long sys_futex /* 240 */ + .long sys_sched_setaffinity + .long sys_sched_getaffinity + .long sys_set_thread_area + .long sys_get_thread_area + .long sys_io_setup /* 245 */ + .long sys_io_destroy + .long sys_io_getevents + .long sys_io_submit + .long sys_io_cancel + .long sys_fadvise64 /* 250 */ + .long sys_ni_syscall + .long sys_exit_group + .long sys_lookup_dcookie + .long sys_epoll_create + .long sys_epoll_ctl /* 255 */ + .long sys_epoll_wait + .long sys_remap_file_pages + .long sys_set_tid_address + .long sys_timer_create + .long sys_timer_settime /* 260 */ + .long sys_timer_gettime + .long sys_timer_getoverrun + .long sys_timer_delete + .long sys_clock_settime + .long sys_clock_gettime /* 265 */ + .long sys_clock_getres + .long sys_clock_nanosleep + .long sys_statfs64 + .long sys_fstatfs64 + .long sys_tgkill /* 270 */ + .long sys_utimes + .long sys_fadvise64_64 + .long sys_ni_syscall /* sys_vserver */ + .long sys_mbind + .long sys_get_mempolicy + .long sys_set_mempolicy + .long sys_mq_open + .long sys_mq_unlink + .long sys_mq_timedsend + .long sys_mq_timedreceive /* 280 */ + .long sys_mq_notify + .long sys_mq_getsetattr + .long sys_ni_syscall /* reserved for kexec */ + .long sys_waitid + .long sys_ni_syscall /* 285 */ /* available */ + .long sys_add_key + .long sys_request_key + .long sys_keyctl diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 4d75b373f90e..a0dcb7c87c30 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -441,7 +441,7 @@ static void __init hpet_time_init(void) set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - if (hpet_enable() >= 0) { + if ((hpet_enable() >= 0) && hpet_use_timer) { printk("Using HPET for base-timer\n"); } diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 244a31b04be7..10a0cbb88e75 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -26,6 +26,7 @@ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* hpet clks count per tick */ unsigned long hpet_address; /* hpet memory map physical address */ +int hpet_use_timer; static int use_hpet; /* can be used for runtime check of hpet */ static int boot_hpet_disable; /* boottime override for HPET timer */ @@ -73,27 +74,30 @@ static int hpet_timer_stop_set_go(unsigned long tick) hpet_writel(0, HPET_COUNTER); hpet_writel(0, HPET_COUNTER + 4); - /* - * Set up timer 0, as periodic with first interrupt to happen at - * hpet_tick, and period also hpet_tick. - */ - cfg = hpet_readl(HPET_T0_CFG); - cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | - HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); - - /* - * The first write after writing TN_SETVAL to the config register sets - * the counter value, the second write sets the threshold. - */ - hpet_writel(tick, HPET_T0_CMP); - hpet_writel(tick, HPET_T0_CMP); + if (hpet_use_timer) { + /* + * Set up timer 0, as periodic with first interrupt to happen at + * hpet_tick, and period also hpet_tick. + */ + cfg = hpet_readl(HPET_T0_CFG); + cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | + HPET_TN_SETVAL | HPET_TN_32BIT; + hpet_writel(cfg, HPET_T0_CFG); + /* + * The first write after writing TN_SETVAL to the config register sets + * the counter value, the second write sets the threshold. + */ + hpet_writel(tick, HPET_T0_CMP); + hpet_writel(tick, HPET_T0_CMP); + } /* * Go! */ cfg = hpet_readl(HPET_CFG); - cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY; + if (hpet_use_timer) + cfg |= HPET_CFG_LEGACY; + cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); return 0; @@ -128,12 +132,11 @@ int __init hpet_enable(void) * However, we can do with one timer otherwise using the * the single HPET timer for system time. */ - if ( #ifdef CONFIG_HPET_EMULATE_RTC - !(id & HPET_ID_NUMBER) || -#endif - !(id & HPET_ID_LEGSUP)) + if (!(id & HPET_ID_NUMBER)) return -1; +#endif + hpet_period = hpet_readl(HPET_PERIOD); if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD)) @@ -152,6 +155,8 @@ int __init hpet_enable(void) if (hpet_tick_rem > (hpet_period >> 1)) hpet_tick++; /* rounding the result */ + hpet_use_timer = id & HPET_ID_LEGSUP; + if (hpet_timer_stop_set_go(hpet_tick)) return -1; @@ -202,7 +207,8 @@ int __init hpet_enable(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - wait_timer_tick = wait_hpet_tick; + if (hpet_use_timer) + wait_timer_tick = wait_hpet_tick; #endif return 0; } diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index 713134e71844..f778f471a09a 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -79,7 +79,7 @@ static unsigned long get_offset_hpet(void) eax = hpet_readl(HPET_COUNTER); eax -= hpet_last; /* hpet delta */ - + eax = min(hpet_tick, eax); /* * Time offset = (hpet delta) * ( usecs per HPET clock ) * = (hpet delta) * ( usecs per tick / HPET clocks per tick) @@ -105,9 +105,12 @@ static void mark_offset_hpet(void) last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; rdtsc(last_tsc_low, last_tsc_high); - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) { - int lost_ticks = (offset - hpet_last) / hpet_tick; + if (hpet_use_timer) + offset = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + offset = hpet_readl(HPET_COUNTER); + if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) { + int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1; jiffies_64 += lost_ticks; } hpet_last = offset; diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index a685994e5c8e..7926d967be00 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -477,7 +477,7 @@ static int __init init_tsc(char* override) if (cpu_has_tsc) { unsigned long tsc_quotient; #ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()){ + if (is_hpet_enabled() && hpet_use_timer) { unsigned long result, remain; printk("Using TSC for gettimeofday\n"); tsc_quotient = calibrate_tsc_hpet(NULL); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 6c0e383915b6..00c63419c06f 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -451,6 +451,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) fastcall void do_general_protection(struct pt_regs * regs, long error_code) { @@ -642,16 +643,15 @@ void unset_nmi_callback(void) } #ifdef CONFIG_KPROBES -fastcall int do_int3(struct pt_regs *regs, long error_code) +fastcall void do_int3(struct pt_regs *regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) - return 1; + return; /* This is an interrupt gate, because kprobes wants interrupts disabled. Normal trap handlers don't. */ restore_interrupts(regs); do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); - return 0; } #endif diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index 2f3d52dacff7..ec0f68ce6886 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -222,7 +222,7 @@ asmlinkage int sys_vm86(struct pt_regs regs) goto out; case VM86_PLUS_INSTALL_CHECK: /* NOTE: on old vm86 stuff this will return the error - from verify_area(), because the subfunction is + from access_ok(), because the subfunction is interpreted as (invalid) address to vm86_struct. So the installation check works. */ @@ -294,8 +294,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk */ info->regs32->eax = 0; tsk->thread.saved_esp0 = tsk->thread.esp0; - asm volatile("movl %%fs,%0":"=m" (tsk->thread.saved_fs)); - asm volatile("movl %%gs,%0":"=m" (tsk->thread.saved_gs)); + asm volatile("mov %%fs,%0":"=m" (tsk->thread.saved_fs)); + asm volatile("mov %%gs,%0":"=m" (tsk->thread.saved_gs)); tss = &per_cpu(init_tss, get_cpu()); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; @@ -717,12 +717,12 @@ static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs) irqbits |= irq_bit; if (vm86_irqs[intno].sig) send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1); - spin_unlock_irqrestore(&irqbits_lock, flags); /* * IRQ will be re-enabled when user asks for the irq (whether * polling or as a result of the signal) */ - disable_irq(intno); + disable_irq_nosync(intno); + spin_unlock_irqrestore(&irqbits_lock, flags); return IRQ_HANDLED; out: @@ -754,17 +754,20 @@ static inline int get_and_reset_irq(int irqnumber) { int bit; unsigned long flags; + int ret = 0; if (invalid_vm86_irq(irqnumber)) return 0; if (vm86_irqs[irqnumber].tsk != current) return 0; spin_lock_irqsave(&irqbits_lock, flags); bit = irqbits & (1 << irqnumber); irqbits &= ~bit; + if (bit) { + enable_irq(irqnumber); + ret = 1; + } + spin_unlock_irqrestore(&irqbits_lock, flags); - if (!bit) - return 0; - enable_irq(irqnumber); - return 1; + return ret; } diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c index b2e462abf337..c58d0c14f274 100644 --- a/arch/i386/oprofile/nmi_timer_int.c +++ b/arch/i386/oprofile/nmi_timer_int.c @@ -36,7 +36,7 @@ static void timer_stop(void) { enable_timer_nmi_watchdog(); unset_nmi_callback(); - synchronize_kernel(); + synchronize_sched(); /* Allow already-started NMIs to complete. */ } diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index e07589d04f64..d6598da4b67b 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -495,6 +495,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH6_1: case PCI_DEVICE_ID_INTEL_ICH7_0: case PCI_DEVICE_ID_INTEL_ICH7_1: + case PCI_DEVICE_ID_INTEL_ICH7_30: + case PCI_DEVICE_ID_INTEL_ICH7_31: case PCI_DEVICE_ID_INTEL_ESB2_0: r->name = "PIIX/ICH"; r->get = pirq_piix_get; diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 33fcb205fcb7..3ad2c4af099c 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -46,6 +46,10 @@ config GENERIC_IOMAP bool default y +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool + default y + choice prompt "System type" default IA64_GENERIC @@ -217,6 +221,16 @@ config IA64_SGI_SN_SIM If you are compiling a kernel that will run under SGI's IA-64 simulator (Medusa) then say Y, otherwise say N. +config IA64_SGI_SN_XP + tristate "Support communication between SGI SSIs" + depends on MSPEC + help + An SGI machine can be divided into multiple Single System + Images which act independently of each other and have + hardware based memory protection from the others. Enabling + this feature will allow for direct communication between SSIs + based on a network adapter and DMA messaging. + config FORCE_MAX_ZONEORDER int default "18" @@ -261,6 +275,15 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config SCHED_SMT + bool "SMT scheduler support" + depends on SMP + default off + help + Improves the CPU scheduler's decision making when dealing with + Intel IA64 chips with MultiThreading at a cost of slightly increased + overhead in some places. If unsure say N here. + config PREEMPT bool "Preemptible Kernel" help @@ -329,7 +352,7 @@ menu "Power management and ACPI" config PM bool "Power Management support" - depends on IA64_GENERIC || IA64_DIG || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB + depends on !IA64_HP_SIM default y help "Power Management" means that parts of your computer are shut diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig index bfeb952fe8e2..6ff7107fee4d 100644 --- a/arch/ia64/configs/sn2_defconfig +++ b/arch/ia64/configs/sn2_defconfig @@ -574,6 +574,8 @@ CONFIG_SERIAL_NONSTANDARD=y # CONFIG_N_HDLC is not set # CONFIG_STALDRV is not set CONFIG_SGI_SNSC=y +CONFIG_SGI_TIOCX=y +CONFIG_SGI_MBCS=m # # Serial drivers diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index 99830e8fc9ba..9086b789f6ac 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11-rc2 -# Sat Jan 22 11:17:02 2005 +# Linux kernel version: 2.6.12-rc3 +# Tue May 3 15:55:04 2005 # # @@ -10,6 +10,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -21,24 +22,27 @@ CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y # CONFIG_AUDIT is not set -CONFIG_LOG_BUF_SHIFT=20 CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +# CONFIG_CPUSETS is not set # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SHMEM=y CONFIG_CC_ALIGN_FUNCTIONS=0 CONFIG_CC_ALIGN_LABELS=0 CONFIG_CC_ALIGN_LOOPS=0 CONFIG_CC_ALIGN_JUMPS=0 # CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 # # Loadable module support @@ -85,6 +89,7 @@ CONFIG_FORCE_MAX_ZONEORDER=18 CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y +# CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set CONFIG_HAVE_DEC_LOCK=y CONFIG_IA32_SUPPORT=y @@ -135,6 +140,7 @@ CONFIG_PCI_DOMAINS=y # CONFIG_PCI_MSI is not set CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCI Hotplug Support @@ -152,10 +158,6 @@ CONFIG_HOTPLUG_PCI_ACPI=m # CONFIG_PCCARD is not set # -# PC-card bridges -# - -# # Device Drivers # @@ -195,9 +197,10 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_UB is not set -CONFIG_BLK_DEV_RAM=m +CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" # CONFIG_CDROM_PKTCDVD is not set @@ -313,7 +316,6 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_PIO is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_IPS is not set @@ -325,7 +327,6 @@ CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 # CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set # CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_ISP is not set CONFIG_SCSI_QLOGIC_FC=y # CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set CONFIG_SCSI_QLOGIC_1280=y @@ -336,6 +337,7 @@ CONFIG_SCSI_QLA22XX=m CONFIG_SCSI_QLA2300=m CONFIG_SCSI_QLA2322=m # CONFIG_SCSI_QLA6312 is not set +# CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set @@ -358,6 +360,7 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m +# CONFIG_DM_MULTIPATH is not set # # Fusion MPT device support @@ -386,7 +389,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -CONFIG_NETLINK_DEV=y CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -446,7 +448,6 @@ CONFIG_DUMMY=m # CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set # # ARCnet devices @@ -484,7 +485,6 @@ CONFIG_NET_PCI=y # CONFIG_DGRS is not set CONFIG_EEPRO100=m CONFIG_E100=m -# CONFIG_E100_NAPI is not set # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set @@ -566,25 +566,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_EVBUG is not set # -# Input I/O drivers -# -CONFIG_GAMEPORT=m -CONFIG_SOUND_GAMEPORT=m -# CONFIG_GAMEPORT_NS558 is not set -# CONFIG_GAMEPORT_L4 is not set -# CONFIG_GAMEPORT_EMU10K1 is not set -# CONFIG_GAMEPORT_VORTEX is not set -# CONFIG_GAMEPORT_FM801 is not set -# CONFIG_GAMEPORT_CS461X is not set -CONFIG_SERIO=y -CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set -# CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PCIPS2 is not set -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set - -# # Input Device Drivers # CONFIG_INPUT_KEYBOARD=y @@ -602,6 +583,24 @@ CONFIG_MOUSE_PS2=y # CONFIG_INPUT_MISC is not set # +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +CONFIG_GAMEPORT=m +# CONFIG_GAMEPORT_NS558 is not set +# CONFIG_GAMEPORT_L4 is not set +# CONFIG_GAMEPORT_EMU10K1 is not set +# CONFIG_GAMEPORT_VORTEX is not set +# CONFIG_GAMEPORT_FM801 is not set +# CONFIG_GAMEPORT_CS461X is not set +CONFIG_SOUND_GAMEPORT=m + +# # Character devices # CONFIG_VT=y @@ -615,6 +614,8 @@ CONFIG_SERIAL_NONSTANDARD=y # CONFIG_SYNCLINK is not set # CONFIG_SYNCLINKMP is not set # CONFIG_N_HDLC is not set +# CONFIG_SPECIALIX is not set +# CONFIG_SX is not set # CONFIG_STALDRV is not set # @@ -635,6 +636,7 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -670,6 +672,12 @@ CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set CONFIG_HPET_MMAP=y CONFIG_MAX_RAW_DEVS=256 +# CONFIG_HANGCHECK_TIMER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set # # I2C support @@ -705,7 +713,6 @@ CONFIG_MAX_RAW_DEVS=256 # CONFIG_VGA_CONSOLE=y CONFIG_DUMMY_CONSOLE=y -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Sound @@ -715,6 +722,8 @@ CONFIG_DUMMY_CONSOLE=y # # USB support # +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -726,8 +735,6 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set # CONFIG_USB_OTG is not set -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y # # USB Host Controller Drivers @@ -736,6 +743,8 @@ CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set CONFIG_USB_OHCI_HCD=m +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set @@ -751,12 +760,11 @@ CONFIG_USB_UHCI_HCD=y # CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_RW_DETECT is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set # CONFIG_USB_STORAGE_DPCM is not set -# CONFIG_USB_STORAGE_HP8200e is not set +# CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set @@ -800,6 +808,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +# CONFIG_USB_MON is not set # # USB port drivers @@ -824,6 +833,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_PHIDGETKIT is not set # CONFIG_USB_PHIDGETSERVO is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_TEST is not set # @@ -867,7 +877,12 @@ CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y + +# +# XFS support +# CONFIG_XFS_FS=y +CONFIG_XFS_EXPORT=y # CONFIG_XFS_RT is not set # CONFIG_XFS_QUOTA is not set # CONFIG_XFS_SECURITY is not set @@ -945,7 +960,7 @@ CONFIG_NFSD_V4=y CONFIG_NFSD_TCP=y CONFIG_LOCKD=m CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=m +CONFIG_EXPORTFS=y CONFIG_SUNRPC=m CONFIG_SUNRPC_GSS=m CONFIG_RPCSEC_GSS_KRB5=m @@ -1042,8 +1057,10 @@ CONFIG_GENERIC_IRQ_PROBE=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=20 # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1077,6 +1094,7 @@ CONFIG_CRYPTO_MD5=m # CONFIG_CRYPTO_SHA256 is not set # CONFIG_CRYPTO_SHA512 is not set # CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set CONFIG_CRYPTO_DES=m # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_TWOFISH is not set diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 017c9ab5fc1b..b8db6e3e5e81 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1,9 +1,9 @@ /* ** IA64 System Bus Adapter (SBA) I/O MMU manager ** -** (c) Copyright 2002-2004 Alex Williamson +** (c) Copyright 2002-2005 Alex Williamson ** (c) Copyright 2002-2003 Grant Grundler -** (c) Copyright 2002-2004 Hewlett-Packard Company +** (c) Copyright 2002-2005 Hewlett-Packard Company ** ** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) ** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) @@ -459,21 +459,32 @@ get_iovp_order (unsigned long size) * sba_search_bitmap - find free space in IO PDIR resource bitmap * @ioc: IO MMU structure which owns the pdir we are interested in. * @bits_wanted: number of entries we need. + * @use_hint: use res_hint to indicate where to start looking * * Find consecutive free bits in resource bitmap. * Each bit represents one entry in the IO Pdir. * Cool perf optimization: search for log2(size) bits at a time. */ static SBA_INLINE unsigned long -sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted) +sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) { - unsigned long *res_ptr = ioc->res_hint; + unsigned long *res_ptr; unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); - unsigned long pide = ~0UL; + unsigned long flags, pide = ~0UL; ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); ASSERT(res_ptr < res_end); + spin_lock_irqsave(&ioc->res_lock, flags); + + /* Allow caller to force a search through the entire resource space */ + if (likely(use_hint)) { + res_ptr = ioc->res_hint; + } else { + res_ptr = (ulong *)ioc->res_map; + ioc->res_bitshift = 0; + } + /* * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts * if a TLB entry is purged while in use. sba_mark_invalid() @@ -570,10 +581,12 @@ not_found: prefetch(ioc->res_map); ioc->res_hint = (unsigned long *) ioc->res_map; ioc->res_bitshift = 0; + spin_unlock_irqrestore(&ioc->res_lock, flags); return (pide); found_it: ioc->res_hint = res_ptr; + spin_unlock_irqrestore(&ioc->res_lock, flags); return (pide); } @@ -594,36 +607,36 @@ sba_alloc_range(struct ioc *ioc, size_t size) unsigned long itc_start; #endif unsigned long pide; - unsigned long flags; ASSERT(pages_needed); ASSERT(0 == (size & ~iovp_mask)); - spin_lock_irqsave(&ioc->res_lock, flags); - #ifdef PDIR_SEARCH_TIMING itc_start = ia64_get_itc(); #endif /* ** "seek and ye shall find"...praying never hurts either... */ - pide = sba_search_bitmap(ioc, pages_needed); + pide = sba_search_bitmap(ioc, pages_needed, 1); if (unlikely(pide >= (ioc->res_size << 3))) { - pide = sba_search_bitmap(ioc, pages_needed); + pide = sba_search_bitmap(ioc, pages_needed, 0); if (unlikely(pide >= (ioc->res_size << 3))) { #if DELAYED_RESOURCE_CNT > 0 + unsigned long flags; + /* ** With delayed resource freeing, we can give this one more shot. We're ** getting close to being in trouble here, so do what we can to make this ** one count. */ - spin_lock(&ioc->saved_lock); + spin_lock_irqsave(&ioc->saved_lock, flags); if (ioc->saved_cnt > 0) { struct sba_dma_pair *d; int cnt = ioc->saved_cnt; - d = &(ioc->saved[ioc->saved_cnt]); + d = &(ioc->saved[ioc->saved_cnt - 1]); + spin_lock(&ioc->res_lock); while (cnt--) { sba_mark_invalid(ioc, d->iova, d->size); sba_free_range(ioc, d->iova, d->size); @@ -631,10 +644,11 @@ sba_alloc_range(struct ioc *ioc, size_t size) } ioc->saved_cnt = 0; READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock(&ioc->res_lock); } - spin_unlock(&ioc->saved_lock); + spin_unlock_irqrestore(&ioc->saved_lock, flags); - pide = sba_search_bitmap(ioc, pages_needed); + pide = sba_search_bitmap(ioc, pages_needed, 0); if (unlikely(pide >= (ioc->res_size << 3))) panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", ioc->ioc_hpa); @@ -664,8 +678,6 @@ sba_alloc_range(struct ioc *ioc, size_t size) (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), ioc->res_bitshift ); - spin_unlock_irqrestore(&ioc->res_lock, flags); - return (pide); } @@ -950,6 +962,30 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir) return SBA_IOVA(ioc, iovp, offset); } +#ifdef ENABLE_MARK_CLEAN +static SBA_INLINE void +sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) +{ + u32 iovp = (u32) SBA_IOVP(ioc,iova); + int off = PDIR_INDEX(iovp); + void *addr; + + if (size <= iovp_size) { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, size); + } else { + do { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, min(size, iovp_size)); + off++; + size -= iovp_size; + } while (size > 0); + } +} +#endif + /** * sba_unmap_single - unmap one IOVA and free resources * @dev: instance of PCI owned by the driver that's asking. @@ -995,6 +1031,10 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) size += offset; size = ROUNDUP(size, iovp_size); +#ifdef ENABLE_MARK_CLEAN + if (dir == DMA_FROM_DEVICE) + sba_mark_clean(ioc, iova, size); +#endif #if DELAYED_RESOURCE_CNT > 0 spin_lock_irqsave(&ioc->saved_lock, flags); @@ -1021,30 +1061,6 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ spin_unlock_irqrestore(&ioc->res_lock, flags); #endif /* DELAYED_RESOURCE_CNT == 0 */ -#ifdef ENABLE_MARK_CLEAN - if (dir == DMA_FROM_DEVICE) { - u32 iovp = (u32) SBA_IOVP(ioc,iova); - int off = PDIR_INDEX(iovp); - void *addr; - - if (size <= iovp_size) { - addr = phys_to_virt(ioc->pdir_base[off] & - ~0xE000000000000FFFULL); - mark_clean(addr, size); - } else { - size_t byte_cnt = size; - - do { - addr = phys_to_virt(ioc->pdir_base[off] & - ~0xE000000000000FFFULL); - mark_clean(addr, min(byte_cnt, iovp_size)); - off++; - byte_cnt -= iovp_size; - - } while (byte_cnt > 0); - } - } -#endif } @@ -1928,43 +1944,17 @@ sba_connect_bus(struct pci_bus *bus) static void __init sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) { - struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *obj; - acpi_handle phandle; unsigned int node; + int pxm; ioc->node = MAX_NUMNODES; - /* - * Check for a _PXM on this node first. We don't typically see - * one here, so we'll end up getting it from the parent. - */ - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) { - if (ACPI_FAILURE(acpi_get_parent(handle, &phandle))) - return; - - /* Reset the acpi buffer */ - buffer.length = ACPI_ALLOCATE_BUFFER; - buffer.pointer = NULL; + pxm = acpi_get_pxm(handle); - if (ACPI_FAILURE(acpi_evaluate_object(phandle, "_PXM", NULL, - &buffer))) - return; - } - - if (!buffer.length || !buffer.pointer) + if (pxm < 0) return; - obj = buffer.pointer; - - if (obj->type != ACPI_TYPE_INTEGER || - obj->integer.value >= MAX_PXM_DOMAINS) { - acpi_os_free(buffer.pointer); - return; - } - - node = pxm_to_nid_map[obj->integer.value]; - acpi_os_free(buffer.pointer); + node = pxm_to_nid_map[pxm]; if (node >= MAX_NUMNODES || !node_online(node)) return; diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c index 19b02adce68c..ebb89be2aa2d 100644 --- a/arch/ia64/ia32/ia32_signal.c +++ b/arch/ia64/ia32/ia32_signal.c @@ -460,10 +460,9 @@ __ia32_rt_sigsuspend (compat_sigset_t *sset, unsigned int sigsetsize, struct sig sigset_t oldset, set; scr->scratch_unat = 0; /* avoid leaking kernel bits to user level */ - memset(&set, 0, sizeof(&set)); + memset(&set, 0, sizeof(set)); - if (memcpy(&set.sig, &sset->sig, sigsetsize)) - return -EFAULT; + memcpy(&set.sig, &sset->sig, sigsetsize); sigdelsetmask(&set, ~_BLOCKABLE); diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index a8e99c56a768..72dfd9e7de0f 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -779,7 +779,7 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret) union acpi_object *obj; struct acpi_table_iosapic *iosapic; unsigned int gsi_base; - int node; + int pxm, node; /* Only care about objects w/ a method that returns the MADT */ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) @@ -805,29 +805,16 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret) gsi_base = iosapic->global_irq_base; acpi_os_free(buffer.pointer); - buffer.length = ACPI_ALLOCATE_BUFFER; - buffer.pointer = NULL; /* - * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell + * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell * us which node to associate this with. */ - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) - return AE_OK; - - if (!buffer.length || !buffer.pointer) - return AE_OK; - - obj = buffer.pointer; - - if (obj->type != ACPI_TYPE_INTEGER || - obj->integer.value >= MAX_PXM_DOMAINS) { - acpi_os_free(buffer.pointer); + pxm = acpi_get_pxm(handle); + if (pxm < 0) return AE_OK; - } - node = pxm_to_nid_map[obj->integer.value]; - acpi_os_free(buffer.pointer); + node = pxm_to_nid_map[pxm]; if (node >= MAX_NUMNODES || !node_online(node) || cpus_empty(node_to_cpumask(node))) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 0272c010a3ba..81c45d447394 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -728,12 +728,8 @@ ENTRY(ia64_leave_syscall) mov f8=f0 // clear f8 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs - mov.m ar.ssd=r0 // M2 clear ar.ssd - cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs - ;; ld8 r25=[r3],16 // M0|1 load ar.unat - mov.m ar.csd=r0 // M2 clear ar.csd - mov r22=r0 // clear r22 + cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs ;; ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled @@ -756,11 +752,15 @@ ENTRY(ia64_leave_syscall) mov f7=f0 // clear f7 ;; ld8.fill r12=[r2] // restore r12 (sp) + mov.m ar.ssd=r0 // M2 clear ar.ssd + mov r22=r0 // clear r22 + ld8.fill r15=[r3] // restore r15 +(pUStk) st1 [r14]=r17 addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 ;; -(pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8 -(pUStk) st1 [r14]=r17 +(pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8 + mov.m ar.csd=r0 // M2 clear ar.csd mov b6=r18 // I0 restore b6 ;; mov r14=r0 // clear r14 @@ -782,7 +782,7 @@ GLOBAL_ENTRY(ia64_ret_from_ia32_execve) st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit .mem.offset 8,0 st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit -END(ia64_ret_from_ia32_execve_syscall) +END(ia64_ret_from_ia32_execve) // fall through #endif /* CONFIG_IA32_SUPPORT */ GLOBAL_ENTRY(ia64_leave_kernel) @@ -1417,7 +1417,7 @@ sys_call_table: data8 sys_msgrcv data8 sys_msgctl data8 sys_shmget - data8 ia64_shmat + data8 sys_shmat data8 sys_shmdt // 1115 data8 sys_shmctl data8 sys_syslog diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 0d8650f7fce7..4f3cdef75797 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -611,8 +611,10 @@ GLOBAL_ENTRY(fsys_bubble_down) movl r2=ia64_ret_from_syscall ;; mov rp=r2 // set the real return addr - tbit.z p8,p0=r3,TIF_SYSCALL_TRACE + and r3=_TIF_SYSCALL_TRACEAUDIT,r3 ;; + cmp.eq p8,p0=r3,r0 + (p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8 (p8) br.call.sptk.many b6=b6 // ignore this return addr br.cond.sptk ia64_trace_syscall diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 105c7fec8c6d..8d3a9291b47f 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -15,6 +15,8 @@ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com> * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. + * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com> + * Support for CPU Hotplug */ #include <linux/config.h> @@ -29,6 +31,139 @@ #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/system.h> +#include <asm/mca_asm.h> + +#ifdef CONFIG_HOTPLUG_CPU +#define SAL_PSR_BITS_TO_SET \ + (IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL) + +#define SAVE_FROM_REG(src, ptr, dest) \ + mov dest=src;; \ + st8 [ptr]=dest,0x08 + +#define RESTORE_REG(reg, ptr, _tmp) \ + ld8 _tmp=[ptr],0x08;; \ + mov reg=_tmp + +#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\ + mov ar.lc=IA64_NUM_DBG_REGS-1;; \ + mov _idx=0;; \ +1: \ + SAVE_FROM_REG(_breg[_idx], ptr, _dest);; \ + add _idx=1,_idx;; \ + br.cloop.sptk.many 1b + +#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\ + mov ar.lc=IA64_NUM_DBG_REGS-1;; \ + mov _idx=0;; \ +_lbl: RESTORE_REG(_breg[_idx], ptr, _tmp);; \ + add _idx=1, _idx;; \ + br.cloop.sptk.many _lbl + +#define SAVE_ONE_RR(num, _reg, _tmp) \ + movl _tmp=(num<<61);; \ + mov _reg=rr[_tmp] + +#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ + SAVE_ONE_RR(0,_r0, _tmp);; \ + SAVE_ONE_RR(1,_r1, _tmp);; \ + SAVE_ONE_RR(2,_r2, _tmp);; \ + SAVE_ONE_RR(3,_r3, _tmp);; \ + SAVE_ONE_RR(4,_r4, _tmp);; \ + SAVE_ONE_RR(5,_r5, _tmp);; \ + SAVE_ONE_RR(6,_r6, _tmp);; \ + SAVE_ONE_RR(7,_r7, _tmp);; + +#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ + st8 [ptr]=_r0, 8;; \ + st8 [ptr]=_r1, 8;; \ + st8 [ptr]=_r2, 8;; \ + st8 [ptr]=_r3, 8;; \ + st8 [ptr]=_r4, 8;; \ + st8 [ptr]=_r5, 8;; \ + st8 [ptr]=_r6, 8;; \ + st8 [ptr]=_r7, 8;; + +#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \ + mov ar.lc=0x08-1;; \ + movl _idx1=0x00;; \ +RestRR: \ + dep.z _idx2=_idx1,61,3;; \ + ld8 _tmp=[ptr],8;; \ + mov rr[_idx2]=_tmp;; \ + srlz.d;; \ + add _idx1=1,_idx1;; \ + br.cloop.sptk.few RestRR + +#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \ + movl reg1=sal_state_for_booting_cpu;; \ + ld8 reg2=[reg1];; + +/* + * Adjust region registers saved before starting to save + * break regs and rest of the states that need to be preserved. + */ +#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred) \ + SAVE_FROM_REG(b0,_reg1,_reg2);; \ + SAVE_FROM_REG(b1,_reg1,_reg2);; \ + SAVE_FROM_REG(b2,_reg1,_reg2);; \ + SAVE_FROM_REG(b3,_reg1,_reg2);; \ + SAVE_FROM_REG(b4,_reg1,_reg2);; \ + SAVE_FROM_REG(b5,_reg1,_reg2);; \ + st8 [_reg1]=r1,0x08;; \ + st8 [_reg1]=r12,0x08;; \ + st8 [_reg1]=r13,0x08;; \ + SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.pfs,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.rnat,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.unat,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.dcr,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.iva,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.pta,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.itv,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.pmv,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);; \ + st8 [_reg1]=r4,0x08;; \ + st8 [_reg1]=r5,0x08;; \ + st8 [_reg1]=r6,0x08;; \ + st8 [_reg1]=r7,0x08;; \ + st8 [_reg1]=_pred,0x08;; \ + SAVE_FROM_REG(ar.lc, _reg1, _reg2);; \ + stf.spill.nta [_reg1]=f2,16;; \ + stf.spill.nta [_reg1]=f3,16;; \ + stf.spill.nta [_reg1]=f4,16;; \ + stf.spill.nta [_reg1]=f5,16;; \ + stf.spill.nta [_reg1]=f16,16;; \ + stf.spill.nta [_reg1]=f17,16;; \ + stf.spill.nta [_reg1]=f18,16;; \ + stf.spill.nta [_reg1]=f19,16;; \ + stf.spill.nta [_reg1]=f20,16;; \ + stf.spill.nta [_reg1]=f21,16;; \ + stf.spill.nta [_reg1]=f22,16;; \ + stf.spill.nta [_reg1]=f23,16;; \ + stf.spill.nta [_reg1]=f24,16;; \ + stf.spill.nta [_reg1]=f25,16;; \ + stf.spill.nta [_reg1]=f26,16;; \ + stf.spill.nta [_reg1]=f27,16;; \ + stf.spill.nta [_reg1]=f28,16;; \ + stf.spill.nta [_reg1]=f29,16;; \ + stf.spill.nta [_reg1]=f30,16;; \ + stf.spill.nta [_reg1]=f31,16;; + +#else +#define SET_AREA_FOR_BOOTING_CPU(a1, a2) +#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3) +#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) +#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) +#endif + +#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \ + movl _tmp1=(num << 61);; \ + mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \ + mov rr[_tmp1]=_tmp2 .section __special_page_section,"ax" @@ -64,6 +199,12 @@ start_ap: srlz.i ;; /* + * Save the region registers, predicate before they get clobbered + */ + SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15); + mov r25=pr;; + + /* * Initialize kernel region registers: * rr[0]: VHPT enabled, page size = PAGE_SHIFT * rr[1]: VHPT enabled, page size = PAGE_SHIFT @@ -76,32 +217,14 @@ start_ap: * We initialize all of them to prevent inadvertently assuming * something about the state of address translation early in boot. */ - mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1) - movl r7=(0<<61) - mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1) - movl r9=(1<<61) - mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1) - movl r11=(2<<61) - mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1) - movl r13=(3<<61) - mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1) - movl r15=(4<<61) - mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1) - movl r17=(5<<61) - mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) - movl r19=(6<<61) - mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) - movl r21=(7<<61) - ;; - mov rr[r7]=r6 - mov rr[r9]=r8 - mov rr[r11]=r10 - mov rr[r13]=r12 - mov rr[r15]=r14 - mov rr[r17]=r16 - mov rr[r19]=r18 - mov rr[r21]=r20 - ;; + SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);; + SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);; /* * Now pin mappings into the TLB for kernel text and data */ @@ -142,6 +265,12 @@ start_ap: ;; 1: // now we are in virtual mode + SET_AREA_FOR_BOOTING_CPU(r2, r16); + + STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15); + SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25) + ;; + // set IVT entry point---can't access I/O ports without it movl r3=ia64_ivt ;; @@ -211,12 +340,13 @@ start_ap: mov IA64_KR(CURRENT_STACK)=r16 mov r13=r2 /* - * Reserve space at the top of the stack for "struct pt_regs". Kernel threads - * don't store interesting values in that structure, but the space still needs - * to be there because time-critical stuff such as the context switching can - * be implemented more efficiently (for example, __switch_to() + * Reserve space at the top of the stack for "struct pt_regs". Kernel + * threads don't store interesting values in that structure, but the space + * still needs to be there because time-critical stuff such as the context + * switching can be implemented more efficiently (for example, __switch_to() * always sets the psr.dfh bit of the task it is switching to). */ + addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE mov ar.rsc=0 // place RSE in enforced lazy mode @@ -993,4 +1123,98 @@ END(ia64_spinlock_contention) #endif +#ifdef CONFIG_HOTPLUG_CPU +GLOBAL_ENTRY(ia64_jump_to_sal) + alloc r16=ar.pfs,1,0,0,0;; + rsm psr.i | psr.ic +{ + flushrs + srlz.i +} + tpa r25=in0 + movl r18=tlb_purge_done;; + DATA_VA_TO_PA(r18);; + mov b1=r18 // Return location + movl r18=ia64_do_tlb_purge;; + DATA_VA_TO_PA(r18);; + mov b2=r18 // doing tlb_flush work + mov ar.rsc=0 // Put RSE in enforced lazy, LE mode + movl r17=1f;; + DATA_VA_TO_PA(r17);; + mov cr.iip=r17 + movl r16=SAL_PSR_BITS_TO_SET;; + mov cr.ipsr=r16 + mov cr.ifs=r0;; + rfi;; +1: + /* + * Invalidate all TLB data/inst + */ + br.sptk.many b2;; // jump to tlb purge code + +tlb_purge_done: + RESTORE_REGION_REGS(r25, r17,r18,r19);; + RESTORE_REG(b0, r25, r17);; + RESTORE_REG(b1, r25, r17);; + RESTORE_REG(b2, r25, r17);; + RESTORE_REG(b3, r25, r17);; + RESTORE_REG(b4, r25, r17);; + RESTORE_REG(b5, r25, r17);; + ld8 r1=[r25],0x08;; + ld8 r12=[r25],0x08;; + ld8 r13=[r25],0x08;; + RESTORE_REG(ar.fpsr, r25, r17);; + RESTORE_REG(ar.pfs, r25, r17);; + RESTORE_REG(ar.rnat, r25, r17);; + RESTORE_REG(ar.unat, r25, r17);; + RESTORE_REG(ar.bspstore, r25, r17);; + RESTORE_REG(cr.dcr, r25, r17);; + RESTORE_REG(cr.iva, r25, r17);; + RESTORE_REG(cr.pta, r25, r17);; + RESTORE_REG(cr.itv, r25, r17);; + RESTORE_REG(cr.pmv, r25, r17);; + RESTORE_REG(cr.cmcv, r25, r17);; + RESTORE_REG(cr.lrr0, r25, r17);; + RESTORE_REG(cr.lrr1, r25, r17);; + ld8 r4=[r25],0x08;; + ld8 r5=[r25],0x08;; + ld8 r6=[r25],0x08;; + ld8 r7=[r25],0x08;; + ld8 r17=[r25],0x08;; + mov pr=r17,-1;; + RESTORE_REG(ar.lc, r25, r17);; + /* + * Now Restore floating point regs + */ + ldf.fill.nta f2=[r25],16;; + ldf.fill.nta f3=[r25],16;; + ldf.fill.nta f4=[r25],16;; + ldf.fill.nta f5=[r25],16;; + ldf.fill.nta f16=[r25],16;; + ldf.fill.nta f17=[r25],16;; + ldf.fill.nta f18=[r25],16;; + ldf.fill.nta f19=[r25],16;; + ldf.fill.nta f20=[r25],16;; + ldf.fill.nta f21=[r25],16;; + ldf.fill.nta f22=[r25],16;; + ldf.fill.nta f23=[r25],16;; + ldf.fill.nta f24=[r25],16;; + ldf.fill.nta f25=[r25],16;; + ldf.fill.nta f26=[r25],16;; + ldf.fill.nta f27=[r25],16;; + ldf.fill.nta f28=[r25],16;; + ldf.fill.nta f29=[r25],16;; + ldf.fill.nta f30=[r25],16;; + ldf.fill.nta f31=[r25],16;; + + /* + * Now that we have done all the register restores + * we are now ready for the big DIVE to SAL Land + */ + ssm psr.ic;; + srlz.d;; + br.ret.sptk.many b0;; +END(ia64_jump_to_sal) +#endif /* CONFIG_HOTPLUG_CPU */ + #endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index c15be5c38f56..88b014381df5 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -79,6 +79,7 @@ #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/string.h> +#include <linux/bootmem.h> #include <asm/delay.h> #include <asm/hw_irq.h> @@ -98,19 +99,30 @@ #define DBG(fmt...) #endif +#define NR_PREALLOCATE_RTE_ENTRIES (PAGE_SIZE / sizeof(struct iosapic_rte_info)) +#define RTE_PREALLOCATED (1) + static DEFINE_SPINLOCK(iosapic_lock); /* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */ -static struct iosapic_intr_info { +struct iosapic_rte_info { + struct list_head rte_list; /* node in list of RTEs sharing the same vector */ char __iomem *addr; /* base address of IOSAPIC */ - u32 low32; /* current value of low word of Redirection table entry */ unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ - char rte_index; /* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */ + char rte_index; /* IOSAPIC RTE index */ + int refcnt; /* reference counter */ + unsigned int flags; /* flags */ +} ____cacheline_aligned; + +static struct iosapic_intr_info { + struct list_head rtes; /* RTEs using this vector (empty => not an IOSAPIC interrupt) */ + int count; /* # of RTEs that shares this vector */ + u32 low32; /* current value of low word of Redirection table entry */ + unsigned int dest; /* destination CPU physical ID */ unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */ unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ - int refcnt; /* reference counter */ } iosapic_intr_info[IA64_NUM_VECTORS]; static struct iosapic { @@ -126,6 +138,8 @@ static int num_iosapic; static unsigned char pcat_compat __initdata; /* 8259 compatibility flag */ +static int iosapic_kmalloc_ok; +static LIST_HEAD(free_rte_list); /* * Find an IOSAPIC associated with a GSI @@ -147,10 +161,12 @@ static inline int _gsi_to_vector (unsigned int gsi) { struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info) - if (info->gsi_base + info->rte_index == gsi) - return info - iosapic_intr_info; + list_for_each_entry(rte, &info->rtes, rte_list) + if (rte->gsi_base + rte->rte_index == gsi) + return info - iosapic_intr_info; return -1; } @@ -167,33 +183,52 @@ gsi_to_vector (unsigned int gsi) int gsi_to_irq (unsigned int gsi) { + unsigned long flags; + int irq; /* * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq * numbers... */ - return _gsi_to_vector(gsi); + spin_lock_irqsave(&iosapic_lock, flags); + { + irq = _gsi_to_vector(gsi); + } + spin_unlock_irqrestore(&iosapic_lock, flags); + + return irq; +} + +static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec) +{ + struct iosapic_rte_info *rte; + + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) + if (rte->gsi_base + rte->rte_index == gsi) + return rte; + return NULL; } static void -set_rte (unsigned int vector, unsigned int dest, int mask) +set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) { unsigned long pol, trigger, dmode; u32 low32, high32; char __iomem *addr; int rte_index; char redir; + struct iosapic_rte_info *rte; DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest); - rte_index = iosapic_intr_info[vector].rte_index; - if (rte_index < 0) + rte = gsi_vector_to_rte(gsi, vector); + if (!rte) return; /* not an IOSAPIC interrupt */ - addr = iosapic_intr_info[vector].addr; + rte_index = rte->rte_index; + addr = rte->addr; pol = iosapic_intr_info[vector].polarity; trigger = iosapic_intr_info[vector].trigger; dmode = iosapic_intr_info[vector].dmode; - vector &= (~IA64_IRQ_REDIRECTED); redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; @@ -221,6 +256,7 @@ set_rte (unsigned int vector, unsigned int dest, int mask) iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); iosapic_intr_info[vector].low32 = low32; + iosapic_intr_info[vector].dest = dest; } static void @@ -237,18 +273,20 @@ mask_irq (unsigned int irq) u32 low32; int rte_index; ia64_vector vec = irq_to_vector(irq); + struct iosapic_rte_info *rte; - addr = iosapic_intr_info[vec].addr; - rte_index = iosapic_intr_info[vec].rte_index; - - if (rte_index < 0) + if (list_empty(&iosapic_intr_info[vec].rtes)) return; /* not an IOSAPIC interrupt! */ spin_lock_irqsave(&iosapic_lock, flags); { /* set only the mask bit */ low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { + addr = rte->addr; + rte_index = rte->rte_index; + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + } } spin_unlock_irqrestore(&iosapic_lock, flags); } @@ -261,16 +299,19 @@ unmask_irq (unsigned int irq) u32 low32; int rte_index; ia64_vector vec = irq_to_vector(irq); + struct iosapic_rte_info *rte; - addr = iosapic_intr_info[vec].addr; - rte_index = iosapic_intr_info[vec].rte_index; - if (rte_index < 0) + if (list_empty(&iosapic_intr_info[vec].rtes)) return; /* not an IOSAPIC interrupt! */ spin_lock_irqsave(&iosapic_lock, flags); { low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { + addr = rte->addr; + rte_index = rte->rte_index; + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + } } spin_unlock_irqrestore(&iosapic_lock, flags); } @@ -286,6 +327,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) char __iomem *addr; int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; ia64_vector vec; + struct iosapic_rte_info *rte; irq &= (~IA64_IRQ_REDIRECTED); vec = irq_to_vector(irq); @@ -295,10 +337,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) dest = cpu_physical_id(first_cpu(mask)); - rte_index = iosapic_intr_info[vec].rte_index; - addr = iosapic_intr_info[vec].addr; - - if (rte_index < 0) + if (list_empty(&iosapic_intr_info[vec].rtes)) return; /* not an IOSAPIC interrupt */ set_irq_affinity_info(irq, dest, redir); @@ -318,8 +357,13 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); iosapic_intr_info[vec].low32 = low32; - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_intr_info[vec].dest = dest; + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { + addr = rte->addr; + rte_index = rte->rte_index; + iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + } } spin_unlock_irqrestore(&iosapic_lock, flags); #endif @@ -340,9 +384,11 @@ static void iosapic_end_level_irq (unsigned int irq) { ia64_vector vec = irq_to_vector(irq); + struct iosapic_rte_info *rte; move_irq(irq); - iosapic_eoi(iosapic_intr_info[vec].addr, vec); + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) + iosapic_eoi(rte->addr, vec); } #define iosapic_shutdown_level_irq mask_irq @@ -422,6 +468,34 @@ iosapic_version (char __iomem *addr) return iosapic_read(addr, IOSAPIC_VERSION); } +static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol) +{ + int i, vector = -1, min_count = -1; + struct iosapic_intr_info *info; + + /* + * shared vectors for edge-triggered interrupts are not + * supported yet + */ + if (trigger == IOSAPIC_EDGE) + return -1; + + for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) { + info = &iosapic_intr_info[i]; + if (info->trigger == trigger && info->polarity == pol && + (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) { + if (min_count == -1 || info->count < min_count) { + vector = i; + min_count = info->count; + } + } + } + if (vector < 0) + panic("%s: out of interrupt vectors!\n", __FUNCTION__); + + return vector; +} + /* * if the given vector is already owned by other, * assign a new vector for the other and make the vector available @@ -431,19 +505,63 @@ iosapic_reassign_vector (int vector) { int new_vector; - if (iosapic_intr_info[vector].rte_index >= 0 || iosapic_intr_info[vector].addr - || iosapic_intr_info[vector].gsi_base || iosapic_intr_info[vector].dmode - || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger) - { + if (!list_empty(&iosapic_intr_info[vector].rtes)) { new_vector = assign_irq_vector(AUTO_ASSIGN); printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector); memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector], sizeof(struct iosapic_intr_info)); + INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes); + list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes); memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); - iosapic_intr_info[vector].rte_index = -1; + iosapic_intr_info[vector].low32 = IOSAPIC_MASK; + INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); } } +static struct iosapic_rte_info *iosapic_alloc_rte (void) +{ + int i; + struct iosapic_rte_info *rte; + int preallocated = 0; + + if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) { + rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES); + if (!rte) + return NULL; + for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++) + list_add(&rte->rte_list, &free_rte_list); + } + + if (!list_empty(&free_rte_list)) { + rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list); + list_del(&rte->rte_list); + preallocated++; + } else { + rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC); + if (!rte) + return NULL; + } + + memset(rte, 0, sizeof(struct iosapic_rte_info)); + if (preallocated) + rte->flags |= RTE_PREALLOCATED; + + return rte; +} + +static void iosapic_free_rte (struct iosapic_rte_info *rte) +{ + if (rte->flags & RTE_PREALLOCATED) + list_add_tail(&rte->rte_list, &free_rte_list); + else + kfree(rte); +} + +static inline int vector_is_shared (int vector) +{ + return (iosapic_intr_info[vector].count > 1); +} + static void register_intr (unsigned int gsi, int vector, unsigned char delivery, unsigned long polarity, unsigned long trigger) @@ -454,6 +572,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, int index; unsigned long gsi_base; void __iomem *iosapic_address; + struct iosapic_rte_info *rte; index = find_iosapic(gsi); if (index < 0) { @@ -464,14 +583,33 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, iosapic_address = iosapic_lists[index].addr; gsi_base = iosapic_lists[index].gsi_base; - rte_index = gsi - gsi_base; - iosapic_intr_info[vector].rte_index = rte_index; + rte = gsi_vector_to_rte(gsi, vector); + if (!rte) { + rte = iosapic_alloc_rte(); + if (!rte) { + printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__); + return; + } + + rte_index = gsi - gsi_base; + rte->rte_index = rte_index; + rte->addr = iosapic_address; + rte->gsi_base = gsi_base; + rte->refcnt++; + list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes); + iosapic_intr_info[vector].count++; + } + else if (vector_is_shared(vector)) { + struct iosapic_intr_info *info = &iosapic_intr_info[vector]; + if (info->trigger != trigger || info->polarity != polarity) { + printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__); + return; + } + } + iosapic_intr_info[vector].polarity = polarity; iosapic_intr_info[vector].dmode = delivery; - iosapic_intr_info[vector].addr = iosapic_address; - iosapic_intr_info[vector].gsi_base = gsi_base; iosapic_intr_info[vector].trigger = trigger; - iosapic_intr_info[vector].refcnt++; if (trigger == IOSAPIC_EDGE) irq_type = &irq_type_iosapic_edge; @@ -494,6 +632,13 @@ get_target_cpu (unsigned int gsi, int vector) static int cpu = -1; /* + * In case of vector shared by multiple RTEs, all RTEs that + * share the vector need to use the same destination CPU. + */ + if (!list_empty(&iosapic_intr_info[vector].rtes)) + return iosapic_intr_info[vector].dest; + + /* * If the platform supports redirection via XTP, let it * distribute interrupts. */ @@ -565,10 +710,12 @@ int iosapic_register_intr (unsigned int gsi, unsigned long polarity, unsigned long trigger) { - int vector; + int vector, mask = 1; unsigned int dest; unsigned long flags; - + struct iosapic_rte_info *rte; + u32 low32; +again: /* * If this GSI has already been registered (i.e., it's a * shared interrupt, or we lost a race to register it), @@ -578,19 +725,45 @@ iosapic_register_intr (unsigned int gsi, { vector = gsi_to_vector(gsi); if (vector > 0) { - iosapic_intr_info[vector].refcnt++; + rte = gsi_vector_to_rte(gsi, vector); + rte->refcnt++; spin_unlock_irqrestore(&iosapic_lock, flags); return vector; } + } + spin_unlock_irqrestore(&iosapic_lock, flags); + + /* If vector is running out, we try to find a sharable vector */ + vector = assign_irq_vector_nopanic(AUTO_ASSIGN); + if (vector < 0) + vector = iosapic_find_sharable_vector(trigger, polarity); + + spin_lock_irqsave(&irq_descp(vector)->lock, flags); + spin_lock(&iosapic_lock); + { + if (gsi_to_vector(gsi) > 0) { + if (list_empty(&iosapic_intr_info[vector].rtes)) + free_irq_vector(vector); + spin_unlock(&iosapic_lock); + spin_unlock_irqrestore(&irq_descp(vector)->lock, flags); + goto again; + } - vector = assign_irq_vector(AUTO_ASSIGN); dest = get_target_cpu(gsi, vector); register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, - polarity, trigger); + polarity, trigger); - set_rte(vector, dest, 1); + /* + * If the vector is shared and already unmasked for + * other interrupt sources, don't mask it. + */ + low32 = iosapic_intr_info[vector].low32; + if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK)) + mask = 0; + set_rte(gsi, vector, dest, mask); } - spin_unlock_irqrestore(&iosapic_lock, flags); + spin_unlock(&iosapic_lock); + spin_unlock_irqrestore(&irq_descp(vector)->lock, flags); printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), @@ -607,8 +780,10 @@ iosapic_unregister_intr (unsigned int gsi) unsigned long flags; int irq, vector; irq_desc_t *idesc; - int rte_index; + u32 low32; unsigned long trigger, polarity; + unsigned int dest; + struct iosapic_rte_info *rte; /* * If the irq associated with the gsi is not found, @@ -627,54 +802,56 @@ iosapic_unregister_intr (unsigned int gsi) spin_lock_irqsave(&idesc->lock, flags); spin_lock(&iosapic_lock); { - rte_index = iosapic_intr_info[vector].rte_index; - if (rte_index < 0) { - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&idesc->lock, flags); + if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) { printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); WARN_ON(1); - return; + goto out; } - if (--iosapic_intr_info[vector].refcnt > 0) { - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&idesc->lock, flags); - return; - } + if (--rte->refcnt > 0) + goto out; - /* - * If interrupt handlers still exist on the irq - * associated with the gsi, don't unregister the - * interrupt. - */ - if (idesc->action) { - iosapic_intr_info[vector].refcnt++; - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&idesc->lock, flags); - printk(KERN_WARNING "Cannot unregister GSI. IRQ %u is still in use.\n", irq); - return; - } + /* Mask the interrupt */ + low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK; + iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32); - /* Clear the interrupt controller descriptor. */ - idesc->handler = &no_irq_type; + /* Remove the rte entry from the list */ + list_del(&rte->rte_list); + iosapic_intr_info[vector].count--; + iosapic_free_rte(rte); - trigger = iosapic_intr_info[vector].trigger; + trigger = iosapic_intr_info[vector].trigger; polarity = iosapic_intr_info[vector].polarity; + dest = iosapic_intr_info[vector].dest; + printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n", + gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, vector); + + if (list_empty(&iosapic_intr_info[vector].rtes)) { + /* Sanity check */ + BUG_ON(iosapic_intr_info[vector].count); + + /* Clear the interrupt controller descriptor */ + idesc->handler = &no_irq_type; + + /* Clear the interrupt information */ + memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); + iosapic_intr_info[vector].low32 |= IOSAPIC_MASK; + INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); + + if (idesc->action) { + printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq); + WARN_ON(1); + } - /* Clear the interrupt information. */ - memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); - iosapic_intr_info[vector].rte_index = -1; /* mark as unused */ + /* Free the interrupt vector */ + free_irq_vector(vector); + } } + out: spin_unlock(&iosapic_lock); spin_unlock_irqrestore(&idesc->lock, flags); - - /* Free the interrupt vector */ - free_irq_vector(vector); - - printk(KERN_INFO "GSI %u (%s, %s) -> vector %d unregisterd.\n", - gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), - (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - vector); } #endif /* CONFIG_ACPI_DEALLOCATE_IRQ */ @@ -724,7 +901,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), cpu_logical_id(dest), dest, vector); - set_rte(vector, dest, mask); + set_rte(gsi, vector, dest, mask); return vector; } @@ -750,7 +927,7 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, polarity == IOSAPIC_POL_HIGH ? "high" : "low", cpu_logical_id(dest), dest, vector); - set_rte(vector, dest, 1); + set_rte(gsi, vector, dest, 1); } void __init @@ -758,8 +935,10 @@ iosapic_system_init (int system_pcat_compat) { int vector; - for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) - iosapic_intr_info[vector].rte_index = -1; /* mark as unused */ + for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) { + iosapic_intr_info[vector].low32 = IOSAPIC_MASK; + INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); /* mark as unused */ + } pcat_compat = system_pcat_compat; if (pcat_compat) { @@ -825,3 +1004,10 @@ map_iosapic_to_node(unsigned int gsi_base, int node) return; } #endif + +static int __init iosapic_enable_kmalloc (void) +{ + iosapic_kmalloc_ok = 1; + return 0; +} +core_initcall (iosapic_enable_kmalloc); diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 5ba06ebe355b..4fe60c7a2e90 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -63,20 +63,30 @@ EXPORT_SYMBOL(isa_irq_to_vector_map); static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)]; int -assign_irq_vector (int irq) +assign_irq_vector_nopanic (int irq) { int pos, vector; again: pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS); vector = IA64_FIRST_DEVICE_VECTOR + pos; if (vector > IA64_LAST_DEVICE_VECTOR) - /* XXX could look for sharable vectors instead of panic'ing... */ - panic("assign_irq_vector: out of interrupt vectors!"); + return -1; if (test_and_set_bit(pos, ia64_vector_mask)) goto again; return vector; } +int +assign_irq_vector (int irq) +{ + int vector = assign_irq_vector_nopanic(irq); + + if (vector < 0) + panic("assign_irq_vector: out of interrupt vectors!"); + + return vector; +} + void free_irq_vector (int vector) { diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index cf3f8014f9ad..ef3fd7265b67 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -110,46 +110,19 @@ .global ia64_os_mca_dispatch_end .global ia64_sal_to_os_handoff_state .global ia64_os_to_sal_handoff_state + .global ia64_do_tlb_purge .text .align 16 -ia64_os_mca_dispatch: - - // Serialize all MCA processing - mov r3=1;; - LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);; -ia64_os_mca_spin: - xchg8 r4=[r2],r3;; - cmp.ne p6,p0=r4,r0 -(p6) br ia64_os_mca_spin - - // Save the SAL to OS MCA handoff state as defined - // by SAL SPEC 3.0 - // NOTE : The order in which the state gets saved - // is dependent on the way the C-structure - // for ia64_mca_sal_to_os_state_t has been - // defined in include/asm/mca.h - SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) - ;; - - // LOG PROCESSOR STATE INFO FROM HERE ON.. -begin_os_mca_dump: - br ia64_os_mca_proc_state_dump;; - -ia64_os_mca_done_dump: - - LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56) - ;; - ld8 r18=[r16] // Get processor state parameter on existing PALE_CHECK. - ;; - tbit.nz p6,p7=r18,60 -(p7) br.spnt done_tlb_purge_and_reload - - // The following code purges TC and TR entries. Then reload all TC entries. - // Purge percpu data TC entries. -begin_tlb_purge_and_reload: +/* + * Just the TLB purge part is moved to a separate function + * so we can re-use the code for cpu hotplug code as well + * Caller should now setup b1, so we can branch once the + * tlb flush is complete. + */ +ia64_do_tlb_purge: #define O(member) IA64_CPUINFO_##member##_OFFSET GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 @@ -230,6 +203,51 @@ begin_tlb_purge_and_reload: ;; srlz.i ;; + // Now branch away to caller. + br.sptk.many b1 + ;; + +ia64_os_mca_dispatch: + + // Serialize all MCA processing + mov r3=1;; + LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);; +ia64_os_mca_spin: + xchg8 r4=[r2],r3;; + cmp.ne p6,p0=r4,r0 +(p6) br ia64_os_mca_spin + + // Save the SAL to OS MCA handoff state as defined + // by SAL SPEC 3.0 + // NOTE : The order in which the state gets saved + // is dependent on the way the C-structure + // for ia64_mca_sal_to_os_state_t has been + // defined in include/asm/mca.h + SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) + ;; + + // LOG PROCESSOR STATE INFO FROM HERE ON.. +begin_os_mca_dump: + br ia64_os_mca_proc_state_dump;; + +ia64_os_mca_done_dump: + + LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56) + ;; + ld8 r18=[r16] // Get processor state parameter on existing PALE_CHECK. + ;; + tbit.nz p6,p7=r18,60 +(p7) br.spnt done_tlb_purge_and_reload + + // The following code purges TC and TR entries. Then reload all TC entries. + // Purge percpu data TC entries. +begin_tlb_purge_and_reload: + movl r18=ia64_reload_tr;; + LOAD_PHYSICAL(p0,r18,ia64_reload_tr);; + mov b1=r18;; + br.sptk.many ia64_do_tlb_purge;; + +ia64_reload_tr: // Finally reload the TR registers. // 1. Reload DTR/ITR registers for kernel. mov r18=KERNEL_TR_PAGE_SHIFT<<2 diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index ab478172c349..abc0113a821d 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -132,8 +132,7 @@ mca_handler_bh(unsigned long paddr) spin_unlock(&mca_bh_lock); /* This process is about to be killed itself */ - force_sig(SIGKILL, current); - schedule(); + do_exit(SIGKILL); } /** @@ -439,6 +438,7 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; psr2->cpl = 0; psr2->ri = 0; + psr2->i = 0; return 1; } diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S index bcfa05acc561..2d7e0217638d 100644 --- a/arch/ia64/kernel/mca_drv_asm.S +++ b/arch/ia64/kernel/mca_drv_asm.S @@ -10,6 +10,7 @@ #include <asm/asmmacro.h> #include <asm/processor.h> +#include <asm/ptrace.h> GLOBAL_ENTRY(mca_handler_bhhook) invala // clear RSE ? @@ -20,12 +21,21 @@ GLOBAL_ENTRY(mca_handler_bhhook) ;; alloc r16=ar.pfs,0,2,1,0 // make a new frame ;; + mov ar.rsc=0 + ;; mov r13=IA64_KR(CURRENT) // current task pointer ;; - adds r12=IA64_TASK_THREAD_KSP_OFFSET,r13 + mov r2=r13 + ;; + addl r22=IA64_RBS_OFFSET,r2 + ;; + mov ar.bspstore=r22 ;; - ld8 r12=[r12] // stack pointer + addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 ;; + adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; + st1 [r2]=r0 // clear current->thread.on_ustack flag mov loc0=r16 movl loc1=mca_handler_bh // recovery C function ;; @@ -34,7 +44,9 @@ GLOBAL_ENTRY(mca_handler_bhhook) ;; mov loc1=rp ;; - br.call.sptk.many rp=b6 // not return ... + ssm psr.i + ;; + br.call.sptk.many rp=b6 // does not return ... ;; mov ar.pfs=loc0 mov rp=loc1 diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 71147be3279c..71c101601e3e 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -480,14 +480,6 @@ typedef struct { #define PFM_CMD_ARG_MANY -1 /* cannot be zero */ typedef struct { - int debug; /* turn on/off debugging via syslog */ - int debug_ovfl; /* turn on/off debug printk in overflow handler */ - int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ - int expert_mode; /* turn on/off value checking */ - int debug_pfm_read; -} pfm_sysctl_t; - -typedef struct { unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ @@ -514,8 +506,8 @@ static LIST_HEAD(pfm_buffer_fmt_list); static pmu_config_t *pmu_conf; /* sysctl() controls */ -static pfm_sysctl_t pfm_sysctl; -int pfm_debug_var; +pfm_sysctl_t pfm_sysctl; +EXPORT_SYMBOL(pfm_sysctl); static ctl_table pfm_ctl_table[]={ {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, @@ -1273,6 +1265,8 @@ out: } EXPORT_SYMBOL(pfm_unregister_buffer_fmt); +extern void update_pal_halt_status(int); + static int pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) { @@ -1319,6 +1313,11 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) is_syswide, cpu)); + /* + * disable default_idle() to go to PAL_HALT + */ + update_pal_halt_status(0); + UNLOCK_PFS(flags); return 0; @@ -1374,6 +1373,12 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) is_syswide, cpu)); + /* + * if possible, enable default_idle() to go into PAL_HALT + */ + if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) + update_pal_halt_status(1); + UNLOCK_PFS(flags); return 0; @@ -1576,7 +1581,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) goto abort_locked; } - DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); + DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); ret = -EFAULT; if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); @@ -3695,8 +3700,6 @@ pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) pfm_sysctl.debug = m == 0 ? 0 : 1; - pfm_debug_var = pfm_sysctl.debug; - printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); if (m == 0) { @@ -4212,7 +4215,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", req->load_pid, ctx->ctx_state)); - return -EINVAL; + return -EBUSY; } DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); @@ -4714,16 +4717,26 @@ recheck: if (task == current || ctx->ctx_fl_system) return 0; /* - * if context is UNLOADED we are safe to go + * we are monitoring another thread */ - if (state == PFM_CTX_UNLOADED) return 0; - - /* - * no command can operate on a zombie context - */ - if (state == PFM_CTX_ZOMBIE) { - DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); - return -EINVAL; + switch(state) { + case PFM_CTX_UNLOADED: + /* + * if context is UNLOADED we are safe to go + */ + return 0; + case PFM_CTX_ZOMBIE: + /* + * no command can operate on a zombie context + */ + DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); + return -EINVAL; + case PFM_CTX_MASKED: + /* + * PMU state has been saved to software even though + * the thread may still be running. + */ + if (cmd != PFM_UNLOAD_CONTEXT) return 0; } /* @@ -4996,13 +5009,21 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) } static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); - + /* + * pfm_handle_work() can be called with interrupts enabled + * (TIF_NEED_RESCHED) or disabled. The down_interruptible + * call may sleep, therefore we must re-enable interrupts + * to avoid deadlocks. It is safe to do so because this function + * is called ONLY when returning to user level (PUStk=1), in which case + * there is no risk of kernel stack overflow due to deep + * interrupt nesting. + */ void pfm_handle_work(void) { pfm_context_t *ctx; struct pt_regs *regs; - unsigned long flags; + unsigned long flags, dummy_flags; unsigned long ovfl_regs; unsigned int reason; int ret; @@ -5039,18 +5060,15 @@ pfm_handle_work(void) //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking; + /* + * restore interrupt mask to what it was on entry. + * Could be enabled/diasbled. + */ UNPROTECT_CTX(ctx, flags); - /* - * pfm_handle_work() is currently called with interrupts disabled. - * The down_interruptible call may sleep, therefore we - * must re-enable interrupts to avoid deadlocks. It is - * safe to do so because this function is called ONLY - * when returning to user level (PUStk=1), in which case - * there is no risk of kernel stack overflow due to deep - * interrupt nesting. - */ - BUG_ON(flags & IA64_PSR_I); + /* + * force interrupt enable because of down_interruptible() + */ local_irq_enable(); DPRINT(("before block sleeping\n")); @@ -5064,12 +5082,12 @@ pfm_handle_work(void) DPRINT(("after block sleeping ret=%d\n", ret)); /* - * disable interrupts to restore state we had upon entering - * this function + * lock context and mask interrupts again + * We save flags into a dummy because we may have + * altered interrupts mask compared to entry in this + * function. */ - local_irq_disable(); - - PROTECT_CTX(ctx, flags); + PROTECT_CTX(ctx, dummy_flags); /* * we need to read the ovfl_regs only after wake-up @@ -5095,7 +5113,9 @@ skip_blocking: ctx->ctx_ovfl_regs[0] = 0UL; nothing_to_do: - + /* + * restore flags as they were upon entry + */ UNPROTECT_CTX(ctx, flags); } diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c index 965d29004555..344941db0a9e 100644 --- a/arch/ia64/kernel/perfmon_default_smpl.c +++ b/arch/ia64/kernel/perfmon_default_smpl.c @@ -20,24 +20,17 @@ MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); MODULE_DESCRIPTION("perfmon default sampling format"); MODULE_LICENSE("GPL"); -MODULE_PARM(debug, "i"); -MODULE_PARM_DESC(debug, "debug"); - -MODULE_PARM(debug_ovfl, "i"); -MODULE_PARM_DESC(debug_ovfl, "debug ovfl"); - - #define DEFAULT_DEBUG 1 #ifdef DEFAULT_DEBUG #define DPRINT(a) \ do { \ - if (unlikely(debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ + if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ } while (0) #define DPRINT_ovfl(a) \ do { \ - if (unlikely(debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ + if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ } while (0) #else @@ -45,8 +38,6 @@ MODULE_PARM_DESC(debug_ovfl, "debug ovfl"); #define DPRINT_ovfl(a) #endif -static int debug, debug_ovfl; - static int default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data) { diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 91293388dd29..ebb71f3d6d19 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -3,6 +3,7 @@ * * Copyright (C) 1998-2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> + * 04/11/17 Ashok Raj <ashok.raj@intel.com> Added CPU Hotplug Support */ #define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */ #include <linux/config.h> @@ -49,7 +50,7 @@ #include "sigframe.h" void (*ia64_mark_idle)(int); -static cpumask_t cpu_idle_map; +static DEFINE_PER_CPU(unsigned int, cpu_idle_state); unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); @@ -172,7 +173,9 @@ do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall ia64_do_signal(oldset, scr, in_syscall); } -static int pal_halt = 1; +static int pal_halt = 1; +static int can_do_pal_halt = 1; + static int __init nohalt_setup(char * str) { pal_halt = 0; @@ -180,16 +183,20 @@ static int __init nohalt_setup(char * str) } __setup("nohalt", nohalt_setup); +void +update_pal_halt_status(int status) +{ + can_do_pal_halt = pal_halt && status; +} + /* * We use this if we don't have any better idle routine.. */ void default_idle (void) { - unsigned long pmu_active = ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_PP | IA64_PSR_UP); - while (!need_resched()) - if (pal_halt && !pmu_active) + if (can_do_pal_halt) safe_halt(); else cpu_relax(); @@ -200,27 +207,20 @@ default_idle (void) static inline void play_dead(void) { extern void ia64_cpu_local_tick (void); + unsigned int this_cpu = smp_processor_id(); + /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; - /* We shouldn't have to disable interrupts while dead, but - * some interrupts just don't seem to go away, and this makes - * it "work" for testing purposes. */ max_xtp(); local_irq_disable(); - /* Death loop */ - while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) - cpu_relax(); - + idle_task_exit(); + ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]); /* - * Enable timer interrupts from now on - * Not required if we put processor in SAL_BOOT_RENDEZ mode. + * The above is a point of no-return, the processor is + * expected to be in SAL loop now. */ - local_flush_tlb_all(); - cpu_set(smp_processor_id(), cpu_online_map); - wmb(); - ia64_cpu_local_tick (); - local_irq_enable(); + BUG(); } #else static inline void play_dead(void) @@ -229,20 +229,31 @@ static inline void play_dead(void) } #endif /* CONFIG_HOTPLUG_CPU */ - void cpu_idle_wait(void) { - int cpu; - cpumask_t map; + unsigned int cpu, this_cpu = get_cpu(); + cpumask_t map; + + set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); + put_cpu(); - for_each_online_cpu(cpu) - cpu_set(cpu, cpu_idle_map); + cpus_clear(map); + for_each_online_cpu(cpu) { + per_cpu(cpu_idle_state, cpu) = 1; + cpu_set(cpu, map); + } - wmb(); - do { - ssleep(1); - cpus_and(map, cpu_idle_map, cpu_online_map); - } while (!cpus_empty(map)); + __get_cpu_var(cpu_idle_state) = 0; + + wmb(); + do { + ssleep(1); + for_each_online_cpu(cpu) { + if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) + cpu_clear(cpu, map); + } + cpus_and(map, map, cpu_online_map); + } while (!cpus_empty(map)); } EXPORT_SYMBOL_GPL(cpu_idle_wait); @@ -250,7 +261,6 @@ void __attribute__((noreturn)) cpu_idle (void) { void (*mark_idle)(int) = ia64_mark_idle; - int cpu = smp_processor_id(); /* endless idle loop with no priority at all */ while (1) { @@ -261,12 +271,13 @@ cpu_idle (void) while (!need_resched()) { void (*idle)(void); + if (__get_cpu_var(cpu_idle_state)) + __get_cpu_var(cpu_idle_state) = 0; + + rmb(); if (mark_idle) (*mark_idle)(1); - if (cpu_isset(cpu, cpu_idle_map)) - cpu_clear(cpu, cpu_idle_map); - rmb(); idle = pm_idle; if (!idle) idle = default_idle; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 55789fcd7210..907464ee7273 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -17,6 +17,7 @@ #include <linux/user.h> #include <linux/security.h> #include <linux/audit.h> +#include <linux/signal.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -1481,7 +1482,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) case PTRACE_CONT: /* restart after signal. */ ret = -EIO; - if (data > _NSIG) + if (!valid_signal(data)) goto out_tsk; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -1520,7 +1521,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) /* let child execute for one instruction */ case PTRACE_SINGLEBLOCK: ret = -EIO; - if (data > _NSIG) + if (!valid_signal(data)) goto out_tsk; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -1595,20 +1596,25 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, struct pt_regs regs) { - long syscall; + if (test_thread_flag(TIF_SYSCALL_TRACE) + && (current->ptrace & PT_PTRACED)) + syscall_trace(); if (unlikely(current->audit_context)) { - if (IS_IA32_PROCESS(®s)) + long syscall; + int arch; + + if (IS_IA32_PROCESS(®s)) { syscall = regs.r1; - else + arch = AUDIT_ARCH_I386; + } else { syscall = regs.r15; + arch = AUDIT_ARCH_IA64; + } - audit_syscall_entry(current, syscall, arg0, arg1, arg2, arg3); + audit_syscall_entry(current, arch, syscall, arg0, arg1, arg2, arg3); } - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - syscall_trace(); } /* "asmlinkage" so the input arguments are preserved... */ @@ -1619,7 +1625,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, struct pt_regs regs) { if (unlikely(current->audit_context)) - audit_syscall_exit(current, regs.r8); + audit_syscall_exit(current, AUDITSC_RESULT(regs.r10), regs.r8); if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index f05650c801d2..b7e6b4cb374b 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -4,10 +4,15 @@ * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> * Stephane Eranian <eranian@hpl.hp.com> - * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com> + * Copyright (C) 2000, 2004 Intel Corp + * Rohit Seth <rohit.seth@intel.com> + * Suresh Siddha <suresh.b.siddha@intel.com> + * Gordon Jin <gordon.jin@intel.com> * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> * + * 12/26/04 S.Siddha, G.Jin, R.Seth + * Add multi-threading and multi-core detection * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo(). * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map * 03/31/00 R.Seth cpu_initialized and current->processor fixes @@ -296,6 +301,34 @@ mark_bsp_online (void) #endif } +#ifdef CONFIG_SMP +static void +check_for_logical_procs (void) +{ + pal_logical_to_physical_t info; + s64 status; + + status = ia64_pal_logical_to_phys(0, &info); + if (status == -1) { + printk(KERN_INFO "No logical to physical processor mapping " + "available\n"); + return; + } + if (status) { + printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n", + status); + return; + } + /* + * Total number of siblings that BSP has. Though not all of them + * may have booted successfully. The correct number of siblings + * booted is in info.overview_num_log. + */ + smp_num_siblings = info.overview_tpc; + smp_num_cpucores = info.overview_cpp; +} +#endif + void __init setup_arch (char **cmdline_p) { @@ -356,6 +389,19 @@ setup_arch (char **cmdline_p) #ifdef CONFIG_SMP cpu_physical_id(0) = hard_smp_processor_id(); + + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); + + check_for_logical_procs(); + if (smp_num_cpucores > 1) + printk(KERN_INFO + "cpu package is Multi-Core capable: number of cores=%d\n", + smp_num_cpucores); + if (smp_num_siblings > 1) + printk(KERN_INFO + "cpu package is Multi-Threading capable: number of siblings=%d\n", + smp_num_siblings); #endif cpu_init(); /* initialize the bootstrap CPU */ @@ -459,12 +505,23 @@ show_cpuinfo (struct seq_file *m, void *v) "cpu regs : %u\n" "cpu MHz : %lu.%06lu\n" "itc MHz : %lu.%06lu\n" - "BogoMIPS : %lu.%02lu\n\n", + "BogoMIPS : %lu.%02lu\n", cpunum, c->vendor, family, c->model, c->revision, c->archrev, features, c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000, c->itc_freq / 1000000, c->itc_freq % 1000000, lpj*HZ/500000, (lpj*HZ/5000) % 100); +#ifdef CONFIG_SMP + seq_printf(m, "siblings : %u\n", c->num_log); + if (c->threads_per_core > 1 || c->cores_per_socket > 1) + seq_printf(m, + "physical id: %u\n" + "core id : %u\n" + "thread id : %u\n", + c->socket_id, c->core_id, c->thread_id); +#endif + seq_printf(m,"\n"); + return 0; } @@ -533,6 +590,14 @@ identify_cpu (struct cpuinfo_ia64 *c) memcpy(c->vendor, cpuid.field.vendor, 16); #ifdef CONFIG_SMP c->cpu = smp_processor_id(); + + /* below default values will be overwritten by identify_siblings() + * for Multi-Threading/Multi-Core capable cpu's + */ + c->threads_per_core = c->cores_per_socket = c->num_log = 1; + c->socket_id = -1; + + identify_siblings(c); #endif c->ppn = cpuid.field.ppn; c->number = cpuid.field.number; diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 6891d86937d9..499b7e5317cf 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -224,7 +224,8 @@ ia64_rt_sigreturn (struct sigscratch *scr) * could be corrupted. */ retval = (long) &ia64_leave_kernel; - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (test_thread_flag(TIF_SYSCALL_TRACE) + || test_thread_flag(TIF_SYSCALL_AUDIT)) /* * strace expects to be notified after sigreturn returns even though the * context to which we return may not be in the middle of a syscall. diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 5318f0cbfc26..0d5ee57c9865 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -1,14 +1,25 @@ /* * SMP boot-related support * - * Copyright (C) 1998-2003 Hewlett-Packard Co + * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2001, 2004-2005 Intel Corp + * Rohit Seth <rohit.seth@intel.com> + * Suresh Siddha <suresh.b.siddha@intel.com> + * Gordon Jin <gordon.jin@intel.com> + * Ashok Raj <ashok.raj@intel.com> * * 01/05/16 Rohit Seth <rohit.seth@intel.com> Moved SMP booting functions from smp.c to here. * 01/04/27 David Mosberger <davidm@hpl.hp.com> Added ITC synching code. * 02/07/31 David Mosberger <davidm@hpl.hp.com> Switch over to hotplug-CPU boot-sequence. * smp_boot_cpus()/smp_commence() is replaced by * smp_prepare_cpus()/__cpu_up()/smp_cpus_done(). + * 04/06/21 Ashok Raj <ashok.raj@intel.com> Added CPU Hotplug Support + * 04/12/26 Jin Gordon <gordon.jin@intel.com> + * 04/12/26 Rohit Seth <rohit.seth@intel.com> + * Add multi-threading and multi-core detection + * 05/01/30 Suresh Siddha <suresh.b.siddha@intel.com> + * Setup cpu_sibling_map and cpu_core_map */ #include <linux/config.h> @@ -58,6 +69,37 @@ #define Dprintk(x...) #endif +#ifdef CONFIG_HOTPLUG_CPU +/* + * Store all idle threads, this can be reused instead of creating + * a new thread. Also avoids complicated thread destroy functionality + * for idle threads. + */ +struct task_struct *idle_thread_array[NR_CPUS]; + +/* + * Global array allocated for NR_CPUS at boot time + */ +struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS]; + +/* + * start_ap in head.S uses this to store current booting cpu + * info. + */ +struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; + +#define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]); + +#define get_idle_for_cpu(x) (idle_thread_array[(x)]) +#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) + +#else + +#define get_idle_for_cpu(x) (NULL) +#define set_idle_for_cpu(x,p) +#define set_brendez_area(x) +#endif + /* * ITC synchronization related stuff: @@ -90,6 +132,11 @@ EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_possible_map; EXPORT_SYMBOL(cpu_possible_map); +cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; +int smp_num_siblings = 1; +int smp_num_cpucores = 1; + /* which logical CPU number maps to which CPU (physical APIC ID) */ volatile int ia64_cpu_to_sapicid[NR_CPUS]; EXPORT_SYMBOL(ia64_cpu_to_sapicid); @@ -124,7 +171,8 @@ sync_master (void *arg) local_irq_save(flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { - while (!go[MASTER]); + while (!go[MASTER]) + cpu_relax(); go[MASTER] = 0; go[SLAVE] = ia64_get_itc(); } @@ -147,7 +195,8 @@ get_delta (long *rt, long *master) for (i = 0; i < NUM_ITERS; ++i) { t0 = ia64_get_itc(); go[MASTER] = 1; - while (!(tm = go[SLAVE])); + while (!(tm = go[SLAVE])) + cpu_relax(); go[SLAVE] = 0; t1 = ia64_get_itc(); @@ -226,7 +275,8 @@ ia64_sync_itc (unsigned int master) return; } - while (go[MASTER]); /* wait for master to be ready */ + while (go[MASTER]) + cpu_relax(); /* wait for master to be ready */ spin_lock_irqsave(&itc_sync_lock, flags); { @@ -345,7 +395,6 @@ start_secondary (void *unused) { /* Early console may use I/O ports */ ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); - Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); efi_map_pal_code(); cpu_init(); @@ -384,6 +433,13 @@ do_boot_cpu (int sapicid, int cpu) .done = COMPLETION_INITIALIZER(c_idle.done), }; DECLARE_WORK(work, do_fork_idle, &c_idle); + + c_idle.idle = get_idle_for_cpu(cpu); + if (c_idle.idle) { + init_idle(c_idle.idle, cpu); + goto do_rest; + } + /* * We can't use kernel_thread since we must avoid to reschedule the child. */ @@ -396,10 +452,15 @@ do_boot_cpu (int sapicid, int cpu) if (IS_ERR(c_idle.idle)) panic("failed fork for CPU %d", cpu); + + set_idle_for_cpu(cpu, c_idle.idle); + +do_rest: task_for_booting_cpu = c_idle.idle; Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); + set_brendez_area(cpu); platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); /* @@ -552,19 +613,70 @@ void __devinit smp_prepare_boot_cpu(void) cpu_set(smp_processor_id(), cpu_callin_map); } +/* + * mt_info[] is a temporary store for all info returned by + * PAL_LOGICAL_TO_PHYSICAL, to be copied into cpuinfo_ia64 when the + * specific cpu comes. + */ +static struct { + __u32 socket_id; + __u16 core_id; + __u16 thread_id; + __u16 proc_fixed_addr; + __u8 valid; +}mt_info[NR_CPUS] __devinit; + #ifdef CONFIG_HOTPLUG_CPU -extern void fixup_irqs(void); -/* must be called with cpucontrol mutex held */ -static int __devinit cpu_enable(unsigned int cpu) +static inline void +remove_from_mtinfo(int cpu) { - per_cpu(cpu_state,cpu) = CPU_UP_PREPARE; - wmb(); + int i; - while (!cpu_online(cpu)) - cpu_relax(); - return 0; + for_each_cpu(i) + if (mt_info[i].valid && mt_info[i].socket_id == + cpu_data(cpu)->socket_id) + mt_info[i].valid = 0; +} + +static inline void +clear_cpu_sibling_map(int cpu) +{ + int i; + + for_each_cpu_mask(i, cpu_sibling_map[cpu]) + cpu_clear(cpu, cpu_sibling_map[i]); + for_each_cpu_mask(i, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[i]); + + cpu_sibling_map[cpu] = cpu_core_map[cpu] = CPU_MASK_NONE; +} + +static void +remove_siblinginfo(int cpu) +{ + int last = 0; + + if (cpu_data(cpu)->threads_per_core == 1 && + cpu_data(cpu)->cores_per_socket == 1) { + cpu_clear(cpu, cpu_core_map[cpu]); + cpu_clear(cpu, cpu_sibling_map[cpu]); + return; + } + + last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0); + + /* remove it from all sibling map's */ + clear_cpu_sibling_map(cpu); + + /* if this cpu is the last in the core group, remove all its info + * from mt_info structure + */ + if (last) + remove_from_mtinfo(cpu); } +extern void fixup_irqs(void); +/* must be called with cpucontrol mutex held */ int __cpu_disable(void) { int cpu = smp_processor_id(); @@ -575,9 +687,10 @@ int __cpu_disable(void) if (cpu == 0) return -EBUSY; + remove_siblinginfo(cpu); fixup_irqs(); local_flush_tlb_all(); - printk ("Disabled cpu %u\n", smp_processor_id()); + cpu_clear(cpu, cpu_callin_map); return 0; } @@ -589,12 +702,7 @@ void __cpu_die(unsigned int cpu) /* They ack this in play_dead by setting CPU_DEAD */ if (per_cpu(cpu_state, cpu) == CPU_DEAD) { - /* - * TBD: Enable this when physical removal - * or when we put the processor is put in - * SAL_BOOT_RENDEZ mode - * cpu_clear(cpu, cpu_callin_map); - */ + printk ("CPU %d is now offline\n", cpu); return; } msleep(100); @@ -602,11 +710,6 @@ void __cpu_die(unsigned int cpu) printk(KERN_ERR "CPU %u didn't die...\n", cpu); } #else /* !CONFIG_HOTPLUG_CPU */ -static int __devinit cpu_enable(unsigned int cpu) -{ - return 0; -} - int __cpu_disable(void) { return -ENOSYS; @@ -637,6 +740,23 @@ smp_cpus_done (unsigned int dummy) (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); } +static inline void __devinit +set_cpu_sibling_map(int cpu) +{ + int i; + + for_each_online_cpu(i) { + if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) { + cpu_set(i, cpu_sibling_map[cpu]); + cpu_set(cpu, cpu_sibling_map[i]); + } + } + } +} + int __devinit __cpu_up (unsigned int cpu) { @@ -648,21 +768,26 @@ __cpu_up (unsigned int cpu) return -EINVAL; /* - * Already booted.. just enable and get outa idle lool + * Already booted cpu? not valid anymore since we dont + * do idle loop tightspin anymore. */ if (cpu_isset(cpu, cpu_callin_map)) - { - cpu_enable(cpu); - local_irq_enable(); - while (!cpu_isset(cpu, cpu_online_map)) - mb(); - return 0; - } + return -EINVAL; + /* Processor goes to start_secondary(), sets online flag */ ret = do_boot_cpu(sapicid, cpu); if (ret < 0) return ret; + if (cpu_data(cpu)->threads_per_core == 1 && + cpu_data(cpu)->cores_per_socket == 1) { + cpu_set(cpu, cpu_sibling_map[cpu]); + cpu_set(cpu, cpu_core_map[cpu]); + return 0; + } + + set_cpu_sibling_map(cpu); + return 0; } @@ -690,3 +815,106 @@ init_smp_config(void) ia64_sal_strerror(sal_ret)); } +static inline int __devinit +check_for_mtinfo_index(void) +{ + int i; + + for_each_cpu(i) + if (!mt_info[i].valid) + return i; + + return -1; +} + +/* + * Search the mt_info to find out if this socket's cid/tid information is + * cached or not. If the socket exists, fill in the core_id and thread_id + * in cpuinfo + */ +static int __devinit +check_for_new_socket(__u16 logical_address, struct cpuinfo_ia64 *c) +{ + int i; + __u32 sid = c->socket_id; + + for_each_cpu(i) { + if (mt_info[i].valid && mt_info[i].proc_fixed_addr == logical_address + && mt_info[i].socket_id == sid) { + c->core_id = mt_info[i].core_id; + c->thread_id = mt_info[i].thread_id; + return 1; /* not a new socket */ + } + } + return 0; +} + +/* + * identify_siblings(cpu) gets called from identify_cpu. This populates the + * information related to logical execution units in per_cpu_data structure. + */ +void __devinit +identify_siblings(struct cpuinfo_ia64 *c) +{ + s64 status; + u16 pltid; + u64 proc_fixed_addr; + int count, i; + pal_logical_to_physical_t info; + + if (smp_num_cpucores == 1 && smp_num_siblings == 1) + return; + + if ((status = ia64_pal_logical_to_phys(0, &info)) != PAL_STATUS_SUCCESS) { + printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n", + status); + return; + } + if ((status = ia64_sal_physical_id_info(&pltid)) != PAL_STATUS_SUCCESS) { + printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status); + return; + } + if ((status = ia64_pal_fixed_addr(&proc_fixed_addr)) != PAL_STATUS_SUCCESS) { + printk(KERN_ERR "ia64_pal_fixed_addr failed with %ld\n", status); + return; + } + + c->socket_id = (pltid << 8) | info.overview_ppid; + c->cores_per_socket = info.overview_cpp; + c->threads_per_core = info.overview_tpc; + count = c->num_log = info.overview_num_log; + + /* If the thread and core id information is already cached, then + * we will simply update cpu_info and return. Otherwise, we will + * do the PAL calls and cache core and thread id's of all the siblings. + */ + if (check_for_new_socket(proc_fixed_addr, c)) + return; + + for (i = 0; i < count; i++) { + int index; + + if (i && (status = ia64_pal_logical_to_phys(i, &info)) + != PAL_STATUS_SUCCESS) { + printk(KERN_ERR "ia64_pal_logical_to_phys failed" + " with %ld\n", status); + return; + } + if (info.log2_la == proc_fixed_addr) { + c->core_id = info.log1_cid; + c->thread_id = info.log1_tid; + } + + index = check_for_mtinfo_index(); + /* We will not do the mt_info caching optimization in this case. + */ + if (index < 0) + continue; + + mt_info[index].valid = 1; + mt_info[index].socket_id = c->socket_id; + mt_info[index].core_id = info.log1_cid; + mt_info[index].thread_id = info.log1_tid; + mt_info[index].proc_fixed_addr = info.log2_la; + } +} diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 3ac216e1c8bb..a8cf6d8a509c 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -93,20 +93,6 @@ sys_getpagesize (void) } asmlinkage unsigned long -ia64_shmat (int shmid, void __user *shmaddr, int shmflg) -{ - unsigned long raddr; - int retval; - - retval = do_shmat(shmid, shmaddr, shmflg, &raddr); - if (retval < 0) - return retval; - - force_successful_syscall_return(); - return raddr; -} - -asmlinkage unsigned long ia64_brk (unsigned long brk) { unsigned long rlim, retval, newbrk, oldbrk; diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index d494ff647cac..2776a074c6f1 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -1943,23 +1943,30 @@ EXPORT_SYMBOL(unw_unwind); int unw_unwind_to_user (struct unw_frame_info *info) { - unsigned long ip, sp; + unsigned long ip, sp, pr = 0; while (unw_unwind(info) >= 0) { - if (unw_get_rp(info, &ip) < 0) { - unw_get_ip(info, &ip); - UNW_DPRINT(0, "unwind.%s: failed to read return pointer (ip=0x%lx)\n", - __FUNCTION__, ip); - return -1; - } unw_get_sp(info, &sp); - if (sp >= (unsigned long)info->task + IA64_STK_OFFSET) + if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp) + < IA64_PT_REGS_SIZE) { + UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n", + __FUNCTION__); break; - if (ip < FIXADDR_USER_END) + } + if (unw_is_intr_frame(info) && + (pr & (1UL << PRED_USER_STACK))) return 0; + if (unw_get_pr (info, &pr) < 0) { + unw_get_rp(info, &ip); + UNW_DPRINT(0, "unwind.%s: failed to read " + "predicate register (ip=0x%lx)\n", + __FUNCTION__, ip); + return -1; + } } unw_get_ip(info, &ip); - UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip); + UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", + __FUNCTION__, ip); return -1; } EXPORT_SYMBOL(unw_unwind_to_user); diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S index 29c802b19669..a1af9146cfdb 100644 --- a/arch/ia64/lib/flush.S +++ b/arch/ia64/lib/flush.S @@ -1,8 +1,8 @@ /* * Cache flushing routines. * - * Copyright (C) 1999-2001 Hewlett-Packard Co - * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> */ #include <asm/asmmacro.h> #include <asm/page.h> @@ -26,7 +26,7 @@ GLOBAL_ENTRY(flush_icache_range) mov ar.lc=r8 ;; -.Loop: fc in0 // issuable on M0 only +.Loop: fc.i in0 // issuable on M2 only add in0=32,in0 br.cloop.sptk.few .Loop ;; diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S index 6f26ef7cc236..6f308e62c137 100644 --- a/arch/ia64/lib/memcpy_mck.S +++ b/arch/ia64/lib/memcpy_mck.S @@ -75,6 +75,7 @@ GLOBAL_ENTRY(memcpy) mov f6=f0 br.cond.sptk .common_code ;; +END(memcpy) GLOBAL_ENTRY(__copy_user) .prologue // check dest alignment @@ -300,7 +301,7 @@ EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8) add src_pre_mem=0,src0 // prefetch src pointer add dst_pre_mem=0,dst0 // prefetch dest pointer and src0=-8,src0 // 1st src pointer -(p7) mov ar.lc = r21 +(p7) mov ar.lc = cnt (p8) mov ar.lc = r0 ;; TEXT_ALIGN(32) @@ -524,7 +525,6 @@ EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ #undef B #undef C #undef D -END(memcpy) /* * Due to lack of local tag support in gcc 2.x assembler, it is not clear which diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S index bd8cf907fe22..f26c16aefb1c 100644 --- a/arch/ia64/lib/memset.S +++ b/arch/ia64/lib/memset.S @@ -57,10 +57,10 @@ GLOBAL_ENTRY(memset) { .mmi .prologue alloc tmp = ar.pfs, 3, 0, 0, 0 - .body lfetch.nt1 [dest] // .save ar.lc, save_lc mov.i save_lc = ar.lc + .body } { .mmi mov ret0 = dest // return value cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 6daf15ac8940..91a055f5731f 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -61,7 +61,8 @@ show_mem (void) printk("%d reserved pages\n", reserved); printk("%d pages shared\n", shared); printk("%d pages swap cached\n", cached); - printk("%ld pages in page table cache\n", pgtable_cache_size); + printk("%ld pages in page table cache\n", + pgtable_quicklist_total_size()); } /* physical address where the bootmem map is located */ diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 3456a9b6971e..c00710929390 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -582,7 +582,8 @@ void show_mem(void) printk("%d reserved pages\n", total_reserved); printk("%d pages shared\n", total_shared); printk("%d pages swap cached\n", total_cached); - printk("Total of %ld pages in page table cache\n", pgtable_cache_size); + printk("Total of %ld pages in page table cache\n", + pgtable_quicklist_total_size()); printk("%d free buffer pages\n", nr_free_buffer_pages()); } diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index da859125aaef..4174ec999dde 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -209,10 +209,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re } no_context: - if (isr & IA64_ISR_SP) { + if ((isr & IA64_ISR_SP) + || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) + { /* - * This fault was due to a speculative load set the "ed" bit in the psr to - * ensure forward progress (target register will get a NaT). + * This fault was due to a speculative load or lfetch.fault, set the "ed" + * bit in the psr to ensure forward progress. (Target register will get a + * NaT for ld.s, lfetch will be canceled.) */ ia64_psr(regs)->ed = 1; return; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 65cf839573ea..547785e3cba2 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -39,6 +39,9 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist); +DEFINE_PER_CPU(long, __pgtable_quicklist_size); + extern void ia64_tlb_init (void); unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; @@ -50,27 +53,53 @@ struct page *vmem_map; EXPORT_SYMBOL(vmem_map); #endif -static int pgt_cache_water[2] = { 25, 50 }; - -struct page *zero_page_memmap_ptr; /* map entry for zero page */ +struct page *zero_page_memmap_ptr; /* map entry for zero page */ EXPORT_SYMBOL(zero_page_memmap_ptr); +#define MIN_PGT_PAGES 25UL +#define MAX_PGT_FREES_PER_PASS 16L +#define PGT_FRACTION_OF_NODE_MEM 16 + +static inline long +max_pgt_pages(void) +{ + u64 node_free_pages, max_pgt_pages; + +#ifndef CONFIG_NUMA + node_free_pages = nr_free_pages(); +#else + node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id())); +#endif + max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM; + max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES); + return max_pgt_pages; +} + +static inline long +min_pages_to_free(void) +{ + long pages_to_free; + + pages_to_free = pgtable_quicklist_size - max_pgt_pages(); + pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS); + return pages_to_free; +} + void -check_pgt_cache (void) +check_pgt_cache(void) { - int low, high; + long pages_to_free; - low = pgt_cache_water[0]; - high = pgt_cache_water[1]; + if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES)) + return; preempt_disable(); - if (pgtable_cache_size > (u64) high) { - do { - if (pgd_quicklist) - free_page((unsigned long)pgd_alloc_one_fast(NULL)); - if (pmd_quicklist) - free_page((unsigned long)pmd_alloc_one_fast(NULL, 0)); - } while (pgtable_cache_size > (u64) low); + while (unlikely((pages_to_free = min_pages_to_free()) > 0)) { + while (pages_to_free--) { + free_page((unsigned long)pgtable_quicklist_alloc()); + } + preempt_enable(); + preempt_disable(); } preempt_enable(); } @@ -523,11 +552,14 @@ void mem_init (void) { long reserved_pages, codesize, datasize, initsize; - unsigned long num_pgt_pages; pg_data_t *pgdat; int i; static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel; + BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE); + BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE); + BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE); + #ifdef CONFIG_PCI /* * This needs to be called _after_ the command line has been parsed but _before_ @@ -564,18 +596,6 @@ mem_init (void) num_physpages << (PAGE_SHIFT - 10), codesize >> 10, reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10); - /* - * Allow for enough (cached) page table pages so that we can map the entire memory - * at least once. Each task also needs a couple of page tables pages, so add in a - * fudge factor for that (don't use "threads-max" here; that would be wrong!). - * Don't allow the cache to be more than 10% of total memory, though. - */ -# define NUM_TASKS 500 /* typical number of tasks */ - num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS; - if (num_pgt_pages > nr_free_pages() / 10) - num_pgt_pages = nr_free_pages() / 10; - if (num_pgt_pages > (u64) pgt_cache_water[1]) - pgt_cache_water[1] = num_pgt_pages; /* * For fsyscall entrpoints with no light-weight handler, use the ordinary diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/arch/ia64/sn/include/pci/pcibr_provider.h index b1f05ffec70b..1cd291d8badd 100644 --- a/arch/ia64/sn/include/pci/pcibr_provider.h +++ b/arch/ia64/sn/include/pci/pcibr_provider.h @@ -123,9 +123,11 @@ pcibr_lock(struct pcibus_info *pcibus_info) } #define pcibr_unlock(pcibus_info, flag) spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag) +extern int pcibr_init_provider(void); extern void *pcibr_bus_fixup(struct pcibus_bussoft *); -extern uint64_t pcibr_dma_map(struct pcidev_info *, unsigned long, size_t, unsigned int); -extern void pcibr_dma_unmap(struct pcidev_info *, dma_addr_t, int); +extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t); +extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t); +extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int); /* * prototypes for the bridge asic register access routines in pcibr_reg.c diff --git a/arch/ia64/sn/include/pci/pcibus_provider_defs.h b/arch/ia64/sn/include/pci/pcibus_provider_defs.h deleted file mode 100644 index 07065615bbea..000000000000 --- a/arch/ia64/sn/include/pci/pcibus_provider_defs.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. - */ -#ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H -#define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H - -/* - * SN pci asic types. Do not ever renumber these or reuse values. The - * values must agree with what prom thinks they are. - */ - -#define PCIIO_ASIC_TYPE_UNKNOWN 0 -#define PCIIO_ASIC_TYPE_PPB 1 -#define PCIIO_ASIC_TYPE_PIC 2 -#define PCIIO_ASIC_TYPE_TIOCP 3 - -/* - * Common pciio bus provider data. There should be one of these as the - * first field in any pciio based provider soft structure (e.g. pcibr_soft - * tioca_soft, etc). - */ - -struct pcibus_bussoft { - uint32_t bs_asic_type; /* chipset type */ - uint32_t bs_xid; /* xwidget id */ - uint64_t bs_persist_busnum; /* Persistent Bus Number */ - uint64_t bs_legacy_io; /* legacy io pio addr */ - uint64_t bs_legacy_mem; /* legacy mem pio addr */ - uint64_t bs_base; /* widget base */ - struct xwidget_info *bs_xwidget_info; -}; - -/* - * DMA mapping flags - */ - -#define SN_PCIDMA_CONSISTENT 0x0001 - -#endif /* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */ diff --git a/arch/ia64/sn/include/pci/pcidev.h b/arch/ia64/sn/include/pci/pcidev.h deleted file mode 100644 index 81eb95d3bf47..000000000000 --- a/arch/ia64/sn/include/pci/pcidev.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. - */ -#ifndef _ASM_IA64_SN_PCI_PCIDEV_H -#define _ASM_IA64_SN_PCI_PCIDEV_H - -#include <linux/pci.h> - -extern struct sn_irq_info **sn_irq; - -#define SN_PCIDEV_INFO(pci_dev) \ - ((struct pcidev_info *)(pci_dev)->sysdata) - -/* - * Given a pci_bus, return the sn pcibus_bussoft struct. Note that - * this only works for root busses, not for busses represented by PPB's. - */ - -#define SN_PCIBUS_BUSSOFT(pci_bus) \ - ((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data)) - -/* - * Given a struct pci_dev, return the sn pcibus_bussoft struct. Note - * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due - * due to possible PPB's in the path. - */ - -#define SN_PCIDEV_BUSSOFT(pci_dev) \ - (SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info) - -#define PCIIO_BUS_NONE 255 /* bus 255 reserved */ -#define PCIIO_SLOT_NONE 255 -#define PCIIO_FUNC_NONE 255 -#define PCIIO_VENDOR_ID_NONE (-1) - -struct pcidev_info { - uint64_t pdi_pio_mapped_addr[7]; /* 6 BARs PLUS 1 ROM */ - uint64_t pdi_slot_host_handle; /* Bus and devfn Host pci_dev */ - - struct pcibus_bussoft *pdi_pcibus_info; /* Kernel common bus soft */ - struct pcidev_info *pdi_host_pcidev_info; /* Kernel Host pci_dev */ - struct pci_dev *pdi_linux_pcidev; /* Kernel pci_dev */ - - struct sn_irq_info *pdi_sn_irq_info; -}; - -extern void sn_irq_fixup(struct pci_dev *pci_dev, - struct sn_irq_info *sn_irq_info); - -#endif /* _ASM_IA64_SN_PCI_PCIDEV_H */ diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 6c7f4d9e8ea0..4351c4ff9845 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile @@ -4,9 +4,15 @@ # License. See the file "COPYING" in the main directory of this archive # for more details. # -# Copyright (C) 1999,2001-2003 Silicon Graphics, Inc. All Rights Reserved. +# Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All Rights Reserved. # obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ huberror.o io_init.o iomv.o klconflib.o sn2/ obj-$(CONFIG_IA64_GENERIC) += machvec.o +obj-$(CONFIG_SGI_TIOCX) += tiocx.o +obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o +xp-y := xp_main.o xp_nofault.o +obj-$(CONFIG_IA64_SGI_SN_XP) += xpc.o +xpc-y := xpc_main.o xpc_channel.o xpc_partition.o +obj-$(CONFIG_IA64_SGI_SN_XP) += xpnet.o diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index ce0bc4085eae..647deae9bfcd 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. */ #include <linux/config.h> @@ -170,10 +170,6 @@ retry_bteop: /* Initialize the notification to a known value. */ *bte->most_rcnt_na = BTE_WORD_BUSY; - /* Set the status reg busy bit and transfer length */ - BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size)); - BTE_LNSTAT_STORE(bte, IBLS_BUSY | transfer_size); - /* Set the source and destination registers */ BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src)))); BTE_SRC_STORE(bte, TO_PHYS(src)); @@ -188,7 +184,7 @@ retry_bteop: /* Initiate the transfer */ BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); - BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode)); + BTE_START_TRANSFER(bte, transfer_size, BTE_VALID_MODE(mode)); itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); @@ -429,10 +425,16 @@ void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda; for (i = 0; i < BTES_PER_NODE; i++) { + u64 *base_addr; + /* Which link status register should we use? */ - unsigned long link_status = (i == 0 ? IIO_IBLS0 : IIO_IBLS1); - mynodepda->bte_if[i].bte_base_addr = (u64 *) - REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), link_status); + base_addr = (u64 *) + REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i)); + mynodepda->bte_if[i].bte_base_addr = base_addr; + mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr); + mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr); + mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr); + mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr); /* * Initialize the notification and spinlock diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c index fd104312c6bd..fcbc748ae433 100644 --- a/arch/ia64/sn/kernel/bte_error.c +++ b/arch/ia64/sn/kernel/bte_error.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. */ #include <linux/types.h> @@ -33,48 +33,28 @@ void bte_error_handler(unsigned long); * Wait until all BTE related CRBs are completed * and then reset the interfaces. */ -void bte_error_handler(unsigned long _nodepda) +void shub1_bte_error_handler(unsigned long _nodepda) { struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; - spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; nasid_t nasid; int i; int valid_crbs; - unsigned long irq_flags; - volatile u64 *notify; - bte_result_t bh_error; ii_imem_u_t imem; /* II IMEM Register */ ii_icrb0_d_u_t icrbd; /* II CRB Register D */ ii_ibcr_u_t ibcr; ii_icmr_u_t icmr; ii_ieclr_u_t ieclr; - BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda, + BTE_PRINTK(("shub1_bte_error_handler(%p) - %d\n", err_nodepda, smp_processor_id())); - spin_lock_irqsave(recovery_lock, irq_flags); - if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) && (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, smp_processor_id())); - spin_unlock_irqrestore(recovery_lock, irq_flags); return; } - /* - * Lock all interfaces on this node to prevent new transfers - * from being queued. - */ - for (i = 0; i < BTES_PER_NODE; i++) { - if (err_nodepda->bte_if[i].cleanup_active) { - continue; - } - spin_lock(&err_nodepda->bte_if[i].spinlock); - BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda, - smp_processor_id(), i)); - err_nodepda->bte_if[i].cleanup_active = 1; - } /* Determine information about our hub */ nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); @@ -101,7 +81,6 @@ void bte_error_handler(unsigned long _nodepda) mod_timer(recovery_timer, HZ * 5); BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, smp_processor_id())); - spin_unlock_irqrestore(recovery_lock, irq_flags); return; } if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { @@ -120,8 +99,6 @@ void bte_error_handler(unsigned long _nodepda) BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", err_nodepda, smp_processor_id(), i)); - spin_unlock_irqrestore(recovery_lock, - irq_flags); return; } } @@ -146,6 +123,51 @@ void bte_error_handler(unsigned long _nodepda) ibcr.ii_ibcr_fld_s.i_soft_reset = 1; REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); + del_timer(recovery_timer); +} + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +void bte_error_handler(unsigned long _nodepda) +{ + struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; + spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; + int i; + nasid_t nasid; + unsigned long irq_flags; + volatile u64 *notify; + bte_result_t bh_error; + + BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda, + smp_processor_id())); + + spin_lock_irqsave(recovery_lock, irq_flags); + + /* + * Lock all interfaces on this node to prevent new transfers + * from being queued. + */ + for (i = 0; i < BTES_PER_NODE; i++) { + if (err_nodepda->bte_if[i].cleanup_active) { + continue; + } + spin_lock(&err_nodepda->bte_if[i].spinlock); + BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda, + smp_processor_id(), i)); + err_nodepda->bte_if[i].cleanup_active = 1; + } + + if (is_shub1()) { + shub1_bte_error_handler(_nodepda); + } else { + nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); + + if (ia64_sn_bte_recovery(nasid)) + panic("bte_error_handler(): Fatal BTE Error"); + } + for (i = 0; i < BTES_PER_NODE; i++) { bh_error = err_nodepda->bte_if[i].bh_error; if (bh_error != BTE_SUCCESS) { @@ -165,8 +187,6 @@ void bte_error_handler(unsigned long _nodepda) spin_unlock(&err_nodepda->bte_if[i].spinlock); } - del_timer(recovery_timer); - spin_unlock_irqrestore(recovery_lock, irq_flags); } diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index 2bdf684c5066..5c39b43ba3c0 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992 - 1997, 2000,2002-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992 - 1997, 2000,2002-2005 Silicon Graphics, Inc. All rights reserved. */ #include <linux/types.h> @@ -38,8 +38,11 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep) if ((int)ret_stuff.v0) panic("hubii_eint_handler(): Fatal TIO Error"); - if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ - (void)hubiio_crb_error_handler(hubdev_info); + if (is_shub1()) { + if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ + (void)hubiio_crb_error_handler(hubdev_info); + } else + bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid))); return IRQ_HANDLED; } diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 001880812b7c..9e07f5463f21 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -11,14 +11,15 @@ #include <asm/sn/types.h> #include <asm/sn/sn_sal.h> #include <asm/sn/addrs.h> -#include "pci/pcibus_provider_defs.h" -#include "pci/pcidev.h" +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> #include "pci/pcibr_provider.h" #include "xtalk/xwidgetdev.h" #include <asm/sn/geo.h> #include "xtalk/hubdev.h" #include <asm/sn/io.h> #include <asm/sn/simulator.h> +#include <asm/sn/tioca_provider.h> char master_baseio_wid; nasid_t master_nasid = INVALID_NASID; /* Partition Master */ @@ -34,6 +35,37 @@ struct brick { int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */ +struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ + +/* + * Hooks and struct for unsupported pci providers + */ + +static dma_addr_t +sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size) +{ + return 0; +} + +static void +sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction) +{ + return; +} + +static void * +sn_default_pci_bus_fixup(struct pcibus_bussoft *soft) +{ + return NULL; +} + +static struct sn_pcibus_provider sn_pci_default_provider = { + .dma_map = sn_default_pci_map, + .dma_map_consistent = sn_default_pci_map, + .dma_unmap = sn_default_pci_unmap, + .bus_fixup = sn_default_pci_bus_fixup, +}; + /* * Retrieve the DMA Flush List given nasid. This list is needed * to implement the WAR - Flush DMA data on PIO Reads. @@ -142,6 +174,12 @@ static void sn_fixup_ionodes(void) if (status) continue; + /* Attach the error interrupt handlers */ + if (nasid & 1) + ice_error_init(hubdev); + else + hub_error_init(hubdev); + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; @@ -179,10 +217,6 @@ static void sn_fixup_ionodes(void) sn_flush_device_list; } - if (!(i & 1)) - hub_error_init(hubdev); - else - ice_error_init(hubdev); } } @@ -201,6 +235,7 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) struct sn_irq_info *sn_irq_info; struct pci_dev *host_pci_dev; int status = 0; + struct pcibus_bussoft *bs; dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL); if (SN_PCIDEV_INFO(dev) <= 0) @@ -241,6 +276,7 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) } /* set up host bus linkages */ + bs = SN_PCIBUS_BUSSOFT(dev->bus); host_pci_dev = pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32, SN_PCIDEV_INFO(dev)-> @@ -248,10 +284,16 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev); SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev; - SN_PCIDEV_INFO(dev)->pdi_pcibus_info = SN_PCIBUS_BUSSOFT(dev->bus); + SN_PCIDEV_INFO(dev)->pdi_pcibus_info = bs; + + if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { + SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type]; + } else { + SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider; + } /* Only set up IRQ stuff if this device has a host bus context */ - if (SN_PCIDEV_BUSSOFT(dev) && sn_irq_info->irq_irq) { + if (bs && sn_irq_info->irq_irq) { SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info; dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq; sn_irq_fixup(dev, sn_irq_info); @@ -271,6 +313,7 @@ static void sn_pci_controller_fixup(int segment, int busnum) struct pcibus_bussoft *prom_bussoft_ptr; struct hubdev_info *hubdev_info; void *provider_soft; + struct sn_pcibus_provider *provider; status = sal_get_pcibus_info((u64) segment, (u64) busnum, @@ -291,16 +334,22 @@ static void sn_pci_controller_fixup(int segment, int busnum) /* * Per-provider fixup. Copies the contents from prom to local * area and links SN_PCIBUS_BUSSOFT(). - * - * Note: Provider is responsible for ensuring that prom_bussoft_ptr - * represents an asic-type that it can handle. */ - if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) { - return; /* no further fixup necessary */ + if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) { + return; /* unsupported asic type */ + } + + provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; + if (provider == NULL) { + return; /* no provider registerd for this asic */ + } + + provider_soft = NULL; + if (provider->bus_fixup) { + provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr); } - provider_soft = pcibr_bus_fixup(prom_bussoft_ptr); if (provider_soft == NULL) { return; /* fixup failed or not applicable */ } @@ -339,6 +388,17 @@ static int __init sn_pci_init(void) return 0; /* + * prime sn_pci_provider[]. Individial provider init routines will + * override their respective default entries. + */ + + for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++) + sn_pci_provider[i] = &sn_pci_default_provider; + + pcibr_init_provider(); + tioca_init_provider(); + + /* * This is needed to avoid bounce limit checks in the blk layer */ ia64_max_iommu_merge_mask = ~PAGE_MASK; diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 3be44724f6c8..0f4e8138658f 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -13,8 +13,8 @@ #include <asm/sn/addrs.h> #include <asm/sn/arch.h> #include "xtalk/xwidgetdev.h" -#include "pci/pcibus_provider_defs.h" -#include "pci/pcidev.h" +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> #include "pci/pcibr_provider.h" #include <asm/sn/shub_mmr.h> #include <asm/sn/sn_sal.h> @@ -82,20 +82,9 @@ static void sn_ack_irq(unsigned int irq) nasid = get_nasid(); event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED)); - if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) { - mask |= (1 << SH_EVENT_OCCURRED_UART_INT_SHFT); - } - if (event_occurred & SH_EVENT_OCCURRED_IPI_INT_MASK) { - mask |= (1 << SH_EVENT_OCCURRED_IPI_INT_SHFT); - } - if (event_occurred & SH_EVENT_OCCURRED_II_INT0_MASK) { - mask |= (1 << SH_EVENT_OCCURRED_II_INT0_SHFT); - } - if (event_occurred & SH_EVENT_OCCURRED_II_INT1_MASK) { - mask |= (1 << SH_EVENT_OCCURRED_II_INT1_SHFT); - } + mask = event_occurred & SH_ALL_INT_MASK; HUB_S((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED_ALIAS), - mask); + mask); __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); move_irq(irq); diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c index 857774bb2c9a..6546db6abdba 100644 --- a/arch/ia64/sn/kernel/mca.c +++ b/arch/ia64/sn/kernel/mca.c @@ -37,6 +37,11 @@ static u64 *sn_oemdata_size, sn_oemdata_bufsize; * This function is the callback routine that SAL calls to log error * info for platform errors. buf is appended to sn_oemdata, resizing as * required. + * Note: this is a SAL to OS callback, running under the same rules as the SAL + * code. SAL calls are run with preempt disabled so this routine must not + * sleep. vmalloc can sleep so print_hook cannot resize the output buffer + * itself, instead it must set the required size and return to let the caller + * resize the buffer then redrive the SAL call. */ static int print_hook(const char *fmt, ...) { @@ -47,18 +52,8 @@ static int print_hook(const char *fmt, ...) vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); len = strlen(buf); - while (*sn_oemdata_size + len + 1 > sn_oemdata_bufsize) { - u8 *newbuf = vmalloc(sn_oemdata_bufsize += 1000); - if (!newbuf) { - printk(KERN_ERR "%s: unable to extend sn_oemdata\n", - __FUNCTION__); - return 0; - } - memcpy(newbuf, *sn_oemdata, *sn_oemdata_size); - vfree(*sn_oemdata); - *sn_oemdata = newbuf; - } - memcpy(*sn_oemdata + *sn_oemdata_size, buf, len + 1); + if (*sn_oemdata_size + len <= sn_oemdata_bufsize) + memcpy(*sn_oemdata + *sn_oemdata_size, buf, len); *sn_oemdata_size += len; return 0; } @@ -98,7 +93,20 @@ sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata, sn_oemdata = oemdata; sn_oemdata_size = oemdata_size; sn_oemdata_bufsize = 0; - ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); + *sn_oemdata_size = PAGE_SIZE; /* first guess at how much data will be generated */ + while (*sn_oemdata_size > sn_oemdata_bufsize) { + u8 *newbuf = vmalloc(*sn_oemdata_size); + if (!newbuf) { + printk(KERN_ERR "%s: unable to extend sn_oemdata\n", + __FUNCTION__); + return 1; + } + vfree(*sn_oemdata); + *sn_oemdata = newbuf; + sn_oemdata_bufsize = *sn_oemdata_size; + *sn_oemdata_size = 0; + ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); + } up(&sn_oemdata_mutex); return 0; } diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index f0306b516afb..4fb44984afe6 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. */ #include <linux/config.h> @@ -29,6 +29,7 @@ #include <linux/sched.h> #include <linux/root_dev.h> #include <linux/nodemask.h> +#include <linux/pm.h> #include <asm/io.h> #include <asm/sal.h> @@ -72,6 +73,12 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info); +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); +EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); + +DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); +EXPORT_PER_CPU_SYMBOL(__sn_nodepda); + partid_t sn_partid = -1; EXPORT_SYMBOL(sn_partid); char sn_system_serial_number_string[128]; @@ -353,6 +360,14 @@ void __init sn_setup(char **cmdline_p) screen_info = sn_screen_info; sn_timer_init(); + + /* + * set pm_power_off to a SAL call to allow + * sn machines to power off. The SAL call can be replaced + * by an ACPI interface call when ACPI is fully implemented + * for sn. + */ + pm_power_off = ia64_sn_power_down; } /** @@ -364,11 +379,11 @@ static void __init sn_init_pdas(char **cmdline_p) { cnodeid_t cnode; - memset(pda->cnodeid_to_nasid_table, -1, - sizeof(pda->cnodeid_to_nasid_table)); + memset(sn_cnodeid_to_nasid, -1, + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); for_each_online_node(cnode) - pda->cnodeid_to_nasid_table[cnode] = - pxm_to_nasid(nid_to_pxm_map[cnode]); + sn_cnodeid_to_nasid[cnode] = + pxm_to_nasid(nid_to_pxm_map[cnode]); numionodes = num_online_nodes(); scan_for_ionodes(); @@ -468,7 +483,8 @@ void __init sn_cpu_init(void) cnode = nasid_to_cnodeid(nasid); - pda->p_nodepda = nodepdaindr[cnode]; + sn_nodepda = nodepdaindr[cnode]; + pda->led_address = (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT)); pda->led_state = LED_ALWAYS_SET; @@ -477,15 +493,18 @@ void __init sn_cpu_init(void) pda->idle_flag = 0; if (cpuid != 0) { - memcpy(pda->cnodeid_to_nasid_table, - pdacpu(0)->cnodeid_to_nasid_table, - sizeof(pda->cnodeid_to_nasid_table)); + /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */ + memcpy(sn_cnodeid_to_nasid, + (&per_cpu(__sn_cnodeid_to_nasid, 0)), + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); } /* * Check for WARs. * Only needs to be done once, on BSP. - * Has to be done after loop above, because it uses pda.cnodeid_to_nasid_table[i]. + * Has to be done after loop above, because it uses this cpu's + * sn_cnodeid_to_nasid table which was just initialized if this + * isn't cpu 0. * Has to be done before assignment below. */ if (!wars_have_been_checked) { @@ -571,8 +590,7 @@ static void __init scan_for_ionodes(void) brd = find_lboard_any(brd, KLTYPE_SNIA); while (brd) { - pda->cnodeid_to_nasid_table[numionodes] = - brd->brd_nasid; + sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; physical_node_map[brd->brd_nasid] = numionodes; root_lboard[numionodes] = brd; numionodes++; @@ -593,8 +611,7 @@ static void __init scan_for_ionodes(void) root_lboard[nasid_to_cnodeid(nasid)], KLTYPE_TIO); while (brd) { - pda->cnodeid_to_nasid_table[numionodes] = - brd->brd_nasid; + sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; physical_node_map[brd->brd_nasid] = numionodes; root_lboard[numionodes] = brd; numionodes++; @@ -605,7 +622,6 @@ static void __init scan_for_ionodes(void) brd = find_lboard_any(brd, KLTYPE_TIO); } } - } int @@ -614,7 +630,8 @@ nasid_slice_to_cpuid(int nasid, int slice) long cpu; for (cpu=0; cpu < NR_CPUS; cpu++) - if (nodepda->phys_cpuid[cpu].nasid == nasid && nodepda->phys_cpuid[cpu].slice == slice) + if (cpuid_to_nasid(cpu) == nasid && + cpuid_to_slice(cpu) == slice) return cpu; return -1; diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 197356460ee1..833e700fdac9 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -28,6 +28,7 @@ #include <linux/vmalloc.h> #include <linux/seq_file.h> #include <linux/miscdevice.h> +#include <linux/utsname.h> #include <linux/cpumask.h> #include <linux/smp_lock.h> #include <linux/nodemask.h> @@ -43,6 +44,7 @@ #include <asm/sn/module.h> #include <asm/sn/geo.h> #include <asm/sn/sn2/sn_hwperf.h> +#include <asm/sn/addrs.h> static void *sn_hwperf_salheap = NULL; static int sn_hwperf_obj_cnt = 0; @@ -81,26 +83,45 @@ out: return e; } +static int sn_hwperf_location_to_bpos(char *location, + int *rack, int *bay, int *slot, int *slab) +{ + char type; + + /* first scan for an old style geoid string */ + if (sscanf(location, "%03d%c%02d#%d", + rack, &type, bay, slab) == 4) + *slot = 0; + else /* scan for a new bladed geoid string */ + if (sscanf(location, "%03d%c%02d^%02d#%d", + rack, &type, bay, slot, slab) != 5) + return -1; + /* success */ + return 0; +} + static int sn_hwperf_geoid_to_cnode(char *location) { int cnode; geoid_t geoid; moduleid_t module_id; - char type; - int rack, slot, slab; - int this_rack, this_slot, this_slab; + int rack, bay, slot, slab; + int this_rack, this_bay, this_slot, this_slab; - if (sscanf(location, "%03d%c%02d#%d", &rack, &type, &slot, &slab) != 4) + if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab)) return -1; for (cnode = 0; cnode < numionodes; cnode++) { geoid = cnodeid_get_geoid(cnode); module_id = geo_module(geoid); this_rack = MODULE_GET_RACK(module_id); - this_slot = MODULE_GET_BPOS(module_id); + this_bay = MODULE_GET_BPOS(module_id); + this_slot = geo_slot(geoid); this_slab = geo_slab(geoid); - if (rack == this_rack && slot == this_slot && slab == this_slab) + if (rack == this_rack && bay == this_bay && + slot == this_slot && slab == this_slab) { break; + } } return cnode < numionodes ? cnode : -1; @@ -153,11 +174,36 @@ static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj, return slabname; } +static void print_pci_topology(struct seq_file *s, + struct sn_hwperf_object_info *obj, int *ordinal, + u64 rack, u64 bay, u64 slot, u64 slab) +{ + char *p1; + char *p2; + char *pg; + + if (!(pg = (char *)get_zeroed_page(GFP_KERNEL))) + return; /* ignore */ + if (ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab, + __pa(pg), PAGE_SIZE) == SN_HWPERF_OP_OK) { + for (p1=pg; *p1 && p1 < pg + PAGE_SIZE;) { + if (!(p2 = strchr(p1, '\n'))) + break; + *p2 = '\0'; + seq_printf(s, "pcibus %d %s-%s\n", + *ordinal, obj->location, p1); + (*ordinal)++; + p1 = p2 + 1; + } + } + free_page((unsigned long)pg); +} + static int sn_topology_show(struct seq_file *s, void *d) { int sz; int pt; - int e; + int e = 0; int i; int j; const char *slabname; @@ -169,11 +215,44 @@ static int sn_topology_show(struct seq_file *s, void *d) struct sn_hwperf_object_info *p; struct sn_hwperf_object_info *obj = d; /* this object */ struct sn_hwperf_object_info *objs = s->private; /* all objects */ + int rack, bay, slot, slab; + u8 shubtype; + u8 system_size; + u8 sharing_size; + u8 partid; + u8 coher; + u8 nasid_shift; + u8 region_size; + u16 nasid_mask; + int nasid_msb; + int pci_bus_ordinal = 0; if (obj == objs) { - seq_printf(s, "# sn_topology version 1\n"); + seq_printf(s, "# sn_topology version 2\n"); seq_printf(s, "# objtype ordinal location partition" " [attribute value [, ...]]\n"); + + if (ia64_sn_get_sn_info(0, + &shubtype, &nasid_mask, &nasid_shift, &system_size, + &sharing_size, &partid, &coher, ®ion_size)) + BUG(); + for (nasid_msb=63; nasid_msb > 0; nasid_msb--) { + if (((u64)nasid_mask << nasid_shift) & (1ULL << nasid_msb)) + break; + } + seq_printf(s, "partition %u %s local " + "shubtype %s, " + "nasid_mask 0x%016lx, " + "nasid_bits %d:%d, " + "system_size %d, " + "sharing_size %d, " + "coherency_domain %d, " + "region_size %d\n", + + partid, system_utsname.nodename, + shubtype ? "shub2" : "shub1", + (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift, + system_size, sharing_size, coher, region_size); } if (SN_HWPERF_FOREIGN(obj)) { @@ -181,7 +260,7 @@ static int sn_topology_show(struct seq_file *s, void *d) return 0; } - for (i = 0; obj->name[i]; i++) { + for (i = 0; i < SN_HWPERF_MAXSTRING && obj->name[i]; i++) { if (obj->name[i] == ' ') obj->name[i] = '_'; } @@ -221,6 +300,17 @@ static int sn_topology_show(struct seq_file *s, void *d) seq_putc(s, '\n'); } } + + /* + * PCI busses attached to this node, if any + */ + if (sn_hwperf_location_to_bpos(obj->location, + &rack, &bay, &slot, &slab)) { + /* export pci bus info */ + print_pci_topology(s, obj, &pci_bus_ordinal, + rack, bay, slot, slab); + + } } if (obj->ports) { @@ -397,6 +487,9 @@ static int sn_hwperf_map_err(int hwperf_err) break; case SN_HWPERF_OP_BUSY: + e = -EBUSY; + break; + case SN_HWPERF_OP_RECONFIGURE: e = -EAGAIN; break; @@ -549,6 +642,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) r = sn_hwperf_op_cpu(&op_info); if (r) { r = sn_hwperf_map_err(r); + a.v0 = v0; goto error; } break; diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c new file mode 100644 index 000000000000..ab9b5f35c2a7 --- /dev/null +++ b/arch/ia64/sn/kernel/tiocx.c @@ -0,0 +1,552 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/proc_fs.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <asm/uaccess.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/io.h> +#include <asm/sn/types.h> +#include <asm/sn/shubio.h> +#include <asm/sn/tiocx.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> +#include "tio.h" +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +#define CX_DEV_NONE 0 +#define DEVICE_NAME "tiocx" +#define WIDGET_ID 0 +#define TIOCX_DEBUG 0 + +#if TIOCX_DEBUG +#define DBG(fmt...) printk(KERN_ALERT fmt) +#else +#define DBG(fmt...) +#endif + +struct device_attribute dev_attr_cxdev_control; + +/** + * tiocx_match - Try to match driver id list with device. + * @dev: device pointer + * @drv: driver pointer + * + * Returns 1 if match, 0 otherwise. + */ +static int tiocx_match(struct device *dev, struct device_driver *drv) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + struct cx_drv *cx_drv = to_cx_driver(drv); + const struct cx_device_id *ids = cx_drv->id_table; + + if (!ids) + return 0; + + while (ids->part_num) { + if (ids->part_num == cx_dev->cx_id.part_num) + return 1; + ids++; + } + return 0; + +} + +static int tiocx_hotplug(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) +{ + return -ENODEV; +} + +static void tiocx_bus_release(struct device *dev) +{ + kfree(to_cx_dev(dev)); +} + +struct bus_type tiocx_bus_type = { + .name = "tiocx", + .match = tiocx_match, + .hotplug = tiocx_hotplug, +}; + +/** + * cx_device_match - Find cx_device in the id table. + * @ids: id table from driver + * @cx_device: part/mfg id for the device + * + */ +static const struct cx_device_id *cx_device_match(const struct cx_device_id + *ids, + struct cx_dev *cx_device) +{ + /* + * NOTES: We may want to check for CX_ANY_ID too. + * Do we want to match against nasid too? + * CX_DEV_NONE == 0, if the driver tries to register for + * part/mfg == 0 we should return no-match (NULL) here. + */ + while (ids->part_num && ids->mfg_num) { + if (ids->part_num == cx_device->cx_id.part_num && + ids->mfg_num == cx_device->cx_id.mfg_num) + return ids; + ids++; + } + + return NULL; +} + +/** + * cx_device_probe - Look for matching device. + * Call driver probe routine if found. + * @cx_driver: driver table (cx_drv struct) from driver + * @cx_device: part/mfg id for the device + */ +static int cx_device_probe(struct device *dev) +{ + const struct cx_device_id *id; + struct cx_drv *cx_drv = to_cx_driver(dev->driver); + struct cx_dev *cx_dev = to_cx_dev(dev); + int error = 0; + + if (!cx_dev->driver && cx_drv->probe) { + id = cx_device_match(cx_drv->id_table, cx_dev); + if (id) { + if ((error = cx_drv->probe(cx_dev, id)) < 0) + return error; + else + cx_dev->driver = cx_drv; + } + } + + return error; +} + +/** + * cx_driver_remove - Remove driver from device struct. + * @dev: device + */ +static int cx_driver_remove(struct device *dev) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + struct cx_drv *cx_drv = cx_dev->driver; + if (cx_drv->remove) + cx_drv->remove(cx_dev); + cx_dev->driver = NULL; + return 0; +} + +/** + * cx_driver_register - Register the driver. + * @cx_driver: driver table (cx_drv struct) from driver + * + * Called from the driver init routine to register a driver. + * The cx_drv struct contains the driver name, a pointer to + * a table of part/mfg numbers and a pointer to the driver's + * probe/attach routine. + */ +int cx_driver_register(struct cx_drv *cx_driver) +{ + cx_driver->driver.name = cx_driver->name; + cx_driver->driver.bus = &tiocx_bus_type; + cx_driver->driver.probe = cx_device_probe; + cx_driver->driver.remove = cx_driver_remove; + + return driver_register(&cx_driver->driver); +} + +/** + * cx_driver_unregister - Unregister the driver. + * @cx_driver: driver table (cx_drv struct) from driver + */ +int cx_driver_unregister(struct cx_drv *cx_driver) +{ + driver_unregister(&cx_driver->driver); + return 0; +} + +/** + * cx_device_register - Register a device. + * @nasid: device's nasid + * @part_num: device's part number + * @mfg_num: device's manufacturer number + * @hubdev: hub info associated with this device + * + */ +int +cx_device_register(nasid_t nasid, int part_num, int mfg_num, + struct hubdev_info *hubdev) +{ + struct cx_dev *cx_dev; + + cx_dev = kcalloc(1, sizeof(struct cx_dev), GFP_KERNEL); + DBG("cx_dev= 0x%p\n", cx_dev); + if (cx_dev == NULL) + return -ENOMEM; + + cx_dev->cx_id.part_num = part_num; + cx_dev->cx_id.mfg_num = mfg_num; + cx_dev->cx_id.nasid = nasid; + cx_dev->hubdev = hubdev; + + cx_dev->dev.parent = NULL; + cx_dev->dev.bus = &tiocx_bus_type; + cx_dev->dev.release = tiocx_bus_release; + snprintf(cx_dev->dev.bus_id, BUS_ID_SIZE, "%d.0x%x", + cx_dev->cx_id.nasid, cx_dev->cx_id.part_num); + device_register(&cx_dev->dev); + get_device(&cx_dev->dev); + + device_create_file(&cx_dev->dev, &dev_attr_cxdev_control); + + return 0; +} + +/** + * cx_device_unregister - Unregister a device. + * @cx_dev: part/mfg id for the device + */ +int cx_device_unregister(struct cx_dev *cx_dev) +{ + put_device(&cx_dev->dev); + device_unregister(&cx_dev->dev); + return 0; +} + +/** + * cx_device_reload - Reload the device. + * @nasid: device's nasid + * @part_num: device's part number + * @mfg_num: device's manufacturer number + * + * Remove the device associated with 'nasid' from device list and then + * call device-register with the given part/mfg numbers. + */ +static int cx_device_reload(struct cx_dev *cx_dev) +{ + device_remove_file(&cx_dev->dev, &dev_attr_cxdev_control); + cx_device_unregister(cx_dev); + return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num, + cx_dev->cx_id.mfg_num, cx_dev->hubdev); +} + +static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget, + u64 sn_irq_info, + int req_irq, nasid_t req_nasid, + int req_slice) +{ + struct ia64_sal_retval rv; + rv.status = 0; + rv.v0 = 0; + + ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT, + SAL_INTR_ALLOC, nasid, + widget, sn_irq_info, req_irq, + req_nasid, req_slice); + return rv.status; +} + +static inline void tiocx_intr_free(nasid_t nasid, int widget, + struct sn_irq_info *sn_irq_info) +{ + struct ia64_sal_retval rv; + rv.status = 0; + rv.v0 = 0; + + ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT, + SAL_INTR_FREE, nasid, + widget, sn_irq_info->irq_irq, + sn_irq_info->irq_cookie, 0, 0); +} + +struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq, + nasid_t req_nasid, int slice) +{ + struct sn_irq_info *sn_irq_info; + int status; + int sn_irq_size = sizeof(struct sn_irq_info); + + if ((nasid & 1) == 0) + return NULL; + + sn_irq_info = kmalloc(sn_irq_size, GFP_KERNEL); + if (sn_irq_info == NULL) + return NULL; + + memset(sn_irq_info, 0x0, sn_irq_size); + + status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq, + req_nasid, slice); + if (status) { + kfree(sn_irq_info); + return NULL; + } else { + return sn_irq_info; + } +} + +void tiocx_irq_free(struct sn_irq_info *sn_irq_info) +{ + uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; + nasid_t nasid = NASID_GET(bridge); + int widget; + + if (nasid & 1) { + widget = TIO_SWIN_WIDGETNUM(bridge); + tiocx_intr_free(nasid, widget, sn_irq_info); + kfree(sn_irq_info); + } +} + +uint64_t tiocx_dma_addr(uint64_t addr) +{ + return PHYS_TO_TIODMA(addr); +} + +uint64_t tiocx_swin_base(int nasid) +{ + return TIO_SWIN_BASE(nasid, TIOCX_CORELET); +} + +EXPORT_SYMBOL(cx_driver_register); +EXPORT_SYMBOL(cx_driver_unregister); +EXPORT_SYMBOL(cx_device_register); +EXPORT_SYMBOL(cx_device_unregister); +EXPORT_SYMBOL(tiocx_irq_alloc); +EXPORT_SYMBOL(tiocx_irq_free); +EXPORT_SYMBOL(tiocx_bus_type); +EXPORT_SYMBOL(tiocx_dma_addr); +EXPORT_SYMBOL(tiocx_swin_base); + +static void tio_conveyor_set(nasid_t nasid, int enable_flag) +{ + uint64_t ice_frz; + uint64_t disable_cb = (1ull << 61); + + if (!(nasid & 1)) + return; + + ice_frz = REMOTE_HUB_L(nasid, TIO_ICE_FRZ_CFG); + if (enable_flag) { + if (!(ice_frz & disable_cb)) /* already enabled */ + return; + ice_frz &= ~disable_cb; + } else { + if (ice_frz & disable_cb) /* already disabled */ + return; + ice_frz |= disable_cb; + } + DBG(KERN_ALERT "TIO_ICE_FRZ_CFG= 0x%lx\n", ice_frz); + REMOTE_HUB_S(nasid, TIO_ICE_FRZ_CFG, ice_frz); +} + +#define tio_conveyor_enable(nasid) tio_conveyor_set(nasid, 1) +#define tio_conveyor_disable(nasid) tio_conveyor_set(nasid, 0) + +static void tio_corelet_reset(nasid_t nasid, int corelet) +{ + if (!(nasid & 1)) + return; + + REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 1 << corelet); + udelay(2000); + REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 0); + udelay(2000); +} + +static int tiocx_btchar_get(int nasid) +{ + moduleid_t module_id; + geoid_t geoid; + int cnodeid; + + cnodeid = nasid_to_cnodeid(nasid); + geoid = cnodeid_get_geoid(cnodeid); + module_id = geo_module(geoid); + return MODULE_GET_BTCHAR(module_id); +} + +static int is_fpga_brick(int nasid) +{ + switch (tiocx_btchar_get(nasid)) { + case L1_BRICKTYPE_SA: + case L1_BRICKTYPE_ATHENA: + return 1; + } + return 0; +} + +static int bitstream_loaded(nasid_t nasid) +{ + uint64_t cx_credits; + + cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3); + cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK; + DBG("cx_credits= 0x%lx\n", cx_credits); + + return (cx_credits == 0xf) ? 1 : 0; +} + +static int tiocx_reload(struct cx_dev *cx_dev) +{ + int part_num = CX_DEV_NONE; + int mfg_num = CX_DEV_NONE; + nasid_t nasid = cx_dev->cx_id.nasid; + + if (bitstream_loaded(nasid)) { + uint64_t cx_id; + + cx_id = + *(volatile int32_t *)(TIO_SWIN_BASE(nasid, TIOCX_CORELET) + + WIDGET_ID); + part_num = XWIDGET_PART_NUM(cx_id); + mfg_num = XWIDGET_MFG_NUM(cx_id); + DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num); + /* just ignore it if it's a CE */ + if (part_num == TIO_CE_ASIC_PARTNUM) + return 0; + } + + cx_dev->cx_id.part_num = part_num; + cx_dev->cx_id.mfg_num = mfg_num; + + /* + * Delete old device and register the new one. It's ok if + * part_num/mfg_num == CX_DEV_NONE. We want to register + * devices in the table even if a bitstream isn't loaded. + * That allows use to see that a bitstream isn't loaded via + * TIOCX_IOCTL_DEV_LIST. + */ + return cx_device_reload(cx_dev); +} + +static ssize_t show_cxdev_control(struct device *dev, char *buf) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + + return sprintf(buf, "0x%x 0x%x 0x%x %d\n", + cx_dev->cx_id.nasid, + cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num, + tiocx_btchar_get(cx_dev->cx_id.nasid)); +} + +static ssize_t store_cxdev_control(struct device *dev, const char *buf, + size_t count) +{ + int n; + struct cx_dev *cx_dev = to_cx_dev(dev); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (count <= 0) + return 0; + + n = simple_strtoul(buf, NULL, 0); + + switch (n) { + case 1: + tiocx_reload(cx_dev); + break; + case 3: + tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET); + break; + default: + break; + } + + return count; +} + +DEVICE_ATTR(cxdev_control, 0644, show_cxdev_control, store_cxdev_control); + +static int __init tiocx_init(void) +{ + cnodeid_t cnodeid; + int found_tiocx_device = 0; + + bus_register(&tiocx_bus_type); + + for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) { + nasid_t nasid; + + if ((nasid = cnodeid_to_nasid(cnodeid)) < 0) + break; /* No more nasids .. bail out of loop */ + + if ((nasid & 0x1) && is_fpga_brick(nasid)) { + struct hubdev_info *hubdev; + struct xwidget_info *widgetp; + + DBG("Found TIO at nasid 0x%x\n", nasid); + + hubdev = + (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo); + + widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET]; + + /* The CE hangs off of the CX port but is not an FPGA */ + if (widgetp->xwi_hwid.part_num == TIO_CE_ASIC_PARTNUM) + continue; + + tio_corelet_reset(nasid, TIOCX_CORELET); + tio_conveyor_enable(nasid); + + if (cx_device_register + (nasid, widgetp->xwi_hwid.part_num, + widgetp->xwi_hwid.mfg_num, hubdev) < 0) + return -ENXIO; + else + found_tiocx_device++; + } + } + + /* It's ok if we find zero devices. */ + DBG("found_tiocx_device= %d\n", found_tiocx_device); + + return 0; +} + +static void __exit tiocx_exit(void) +{ + struct device *dev; + struct device *tdev; + + DBG("tiocx_exit\n"); + + /* + * Unregister devices. + */ + list_for_each_entry_safe(dev, tdev, &tiocx_bus_type.devices.list, + bus_list) { + if (dev) { + struct cx_dev *cx_dev = to_cx_dev(dev); + device_remove_file(dev, &dev_attr_cxdev_control); + cx_device_unregister(cx_dev); + } + } + + bus_unregister(&tiocx_bus_type); +} + +module_init(tiocx_init); +module_exit(tiocx_exit); + +/************************************************************************ + * Module licensing and description + ************************************************************************/ +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bruce Losure <blosure@sgi.com>"); +MODULE_DESCRIPTION("TIOCX module"); +MODULE_SUPPORTED_DEVICE(DEVICE_NAME); diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c new file mode 100644 index 000000000000..3be52a34c80f --- /dev/null +++ b/arch/ia64/sn/kernel/xp_main.c @@ -0,0 +1,289 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition (XP) base. + * + * XP provides a base from which its users can interact + * with XPC, yet not be dependent on XPC. + * + */ + + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <asm/sn/intr.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/xp.h> + + +/* + * Target of nofault PIO read. + */ +u64 xp_nofault_PIOR_target; + + +/* + * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level + * users of XPC. + */ +struct xpc_registration xpc_registrations[XPC_NCHANNELS]; + + +/* + * Initialize the XPC interface to indicate that XPC isn't loaded. + */ +static enum xpc_retval xpc_notloaded(void) { return xpcNotLoaded; } + +struct xpc_interface xpc_interface = { + (void (*)(int)) xpc_notloaded, + (void (*)(int)) xpc_notloaded, + (enum xpc_retval (*)(partid_t, int, u32, void **)) xpc_notloaded, + (enum xpc_retval (*)(partid_t, int, void *)) xpc_notloaded, + (enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func, void *)) + xpc_notloaded, + (void (*)(partid_t, int, void *)) xpc_notloaded, + (enum xpc_retval (*)(partid_t, void *)) xpc_notloaded +}; + + +/* + * XPC calls this when it (the XPC module) has been loaded. + */ +void +xpc_set_interface(void (*connect)(int), + void (*disconnect)(int), + enum xpc_retval (*allocate)(partid_t, int, u32, void **), + enum xpc_retval (*send)(partid_t, int, void *), + enum xpc_retval (*send_notify)(partid_t, int, void *, + xpc_notify_func, void *), + void (*received)(partid_t, int, void *), + enum xpc_retval (*partid_to_nasids)(partid_t, void *)) +{ + xpc_interface.connect = connect; + xpc_interface.disconnect = disconnect; + xpc_interface.allocate = allocate; + xpc_interface.send = send; + xpc_interface.send_notify = send_notify; + xpc_interface.received = received; + xpc_interface.partid_to_nasids = partid_to_nasids; +} + + +/* + * XPC calls this when it (the XPC module) is being unloaded. + */ +void +xpc_clear_interface(void) +{ + xpc_interface.connect = (void (*)(int)) xpc_notloaded; + xpc_interface.disconnect = (void (*)(int)) xpc_notloaded; + xpc_interface.allocate = (enum xpc_retval (*)(partid_t, int, u32, + void **)) xpc_notloaded; + xpc_interface.send = (enum xpc_retval (*)(partid_t, int, void *)) + xpc_notloaded; + xpc_interface.send_notify = (enum xpc_retval (*)(partid_t, int, void *, + xpc_notify_func, void *)) xpc_notloaded; + xpc_interface.received = (void (*)(partid_t, int, void *)) + xpc_notloaded; + xpc_interface.partid_to_nasids = (enum xpc_retval (*)(partid_t, void *)) + xpc_notloaded; +} + + +/* + * Register for automatic establishment of a channel connection whenever + * a partition comes up. + * + * Arguments: + * + * ch_number - channel # to register for connection. + * func - function to call for asynchronous notification of channel + * state changes (i.e., connection, disconnection, error) and + * the arrival of incoming messages. + * key - pointer to optional user-defined value that gets passed back + * to the user on any callouts made to func. + * payload_size - size in bytes of the XPC message's payload area which + * contains a user-defined message. The user should make + * this large enough to hold their largest message. + * nentries - max #of XPC message entries a message queue can contain. + * The actual number, which is determined when a connection + * is established and may be less then requested, will be + * passed to the user via the xpcConnected callout. + * assigned_limit - max number of kthreads allowed to be processing + * messages (per connection) at any given instant. + * idle_limit - max number of kthreads allowed to be idle at any given + * instant. + */ +enum xpc_retval +xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, + u16 nentries, u32 assigned_limit, u32 idle_limit) +{ + struct xpc_registration *registration; + + + DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + DBUG_ON(payload_size == 0 || nentries == 0); + DBUG_ON(func == NULL); + DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit); + + registration = &xpc_registrations[ch_number]; + + if (down_interruptible(®istration->sema) != 0) { + return xpcInterrupted; + } + + /* if XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func != NULL) { + up(®istration->sema); + return xpcAlreadyRegistered; + } + + /* register the channel for connection */ + registration->msg_size = XPC_MSG_SIZE(payload_size); + registration->nentries = nentries; + registration->assigned_limit = assigned_limit; + registration->idle_limit = idle_limit; + registration->key = key; + registration->func = func; + + up(®istration->sema); + + xpc_interface.connect(ch_number); + + return xpcSuccess; +} + + +/* + * Remove the registration for automatic connection of the specified channel + * when a partition comes up. + * + * Before returning this xpc_disconnect() will wait for all connections on the + * specified channel have been closed/torndown. So the caller can be assured + * that they will not be receiving any more callouts from XPC to their + * function registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_disconnect(int ch_number) +{ + struct xpc_registration *registration; + + + DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + + registration = &xpc_registrations[ch_number]; + + /* + * We've decided not to make this a down_interruptible(), since we + * figured XPC's users will just turn around and call xpc_disconnect() + * again anyways, so we might as well wait, if need be. + */ + down(®istration->sema); + + /* if !XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func == NULL) { + up(®istration->sema); + return; + } + + /* remove the connection registration for the specified channel */ + registration->func = NULL; + registration->key = NULL; + registration->nentries = 0; + registration->msg_size = 0; + registration->assigned_limit = 0; + registration->idle_limit = 0; + + xpc_interface.disconnect(ch_number); + + up(®istration->sema); + + return; +} + + +int __init +xp_init(void) +{ + int ret, ch_number; + u64 func_addr = *(u64 *) xp_nofault_PIOR; + u64 err_func_addr = *(u64 *) xp_error_PIOR; + + + if (!ia64_platform_is("sn2")) { + return -ENODEV; + } + + /* + * Register a nofault code region which performs a cross-partition + * PIO read. If the PIO read times out, the MCA handler will consume + * the error and return to a kernel-provided instruction to indicate + * an error. This PIO read exists because it is guaranteed to timeout + * if the destination is down (AMO operations do not timeout on at + * least some CPUs on Shubs <= v1.2, which unfortunately we have to + * work around). + */ + if ((ret = sn_register_nofault_code(func_addr, err_func_addr, + err_func_addr, 1, 1)) != 0) { + printk(KERN_ERR "XP: can't register nofault code, error=%d\n", + ret); + } + /* + * Setup the nofault PIO read target. (There is no special reason why + * SH_IPI_ACCESS was selected.) + */ + if (is_shub2()) { + xp_nofault_PIOR_target = SH2_IPI_ACCESS0; + } else { + xp_nofault_PIOR_target = SH1_IPI_ACCESS; + } + + /* initialize the connection registration semaphores */ + for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) { + sema_init(&xpc_registrations[ch_number].sema, 1); /* mutex */ + } + + return 0; +} +module_init(xp_init); + + +void __exit +xp_exit(void) +{ + u64 func_addr = *(u64 *) xp_nofault_PIOR; + u64 err_func_addr = *(u64 *) xp_error_PIOR; + + + /* unregister the PIO read nofault code region */ + (void) sn_register_nofault_code(func_addr, err_func_addr, + err_func_addr, 1, 0); +} +module_exit(xp_exit); + + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition (XP) base"); +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(xp_nofault_PIOR); +EXPORT_SYMBOL(xp_nofault_PIOR_target); +EXPORT_SYMBOL(xpc_registrations); +EXPORT_SYMBOL(xpc_interface); +EXPORT_SYMBOL(xpc_clear_interface); +EXPORT_SYMBOL(xpc_set_interface); +EXPORT_SYMBOL(xpc_connect); +EXPORT_SYMBOL(xpc_disconnect); + diff --git a/arch/ia64/sn/kernel/xp_nofault.S b/arch/ia64/sn/kernel/xp_nofault.S new file mode 100644 index 000000000000..b772543053c9 --- /dev/null +++ b/arch/ia64/sn/kernel/xp_nofault.S @@ -0,0 +1,31 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * The xp_nofault_PIOR function takes a pointer to a remote PIO register + * and attempts to load and consume a value from it. This function + * will be registered as a nofault code block. In the event that the + * PIO read fails, the MCA handler will force the error to look + * corrected and vector to the xp_error_PIOR which will return an error. + * + * extern int xp_nofault_PIOR(void *remote_register); + */ + + .global xp_nofault_PIOR +xp_nofault_PIOR: + mov r8=r0 // Stage a success return value + ld8.acq r9=[r32];; // PIO Read the specified register + adds r9=1,r9 // Add to force a consume + br.ret.sptk.many b0;; // Return success + + .global xp_error_PIOR +xp_error_PIOR: + mov r8=1 // Return value of 1 + br.ret.sptk.many b0;; // Return failure + diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h new file mode 100644 index 000000000000..1a0aed8490d1 --- /dev/null +++ b/arch/ia64/sn/kernel/xpc.h @@ -0,0 +1,991 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition Communication (XPC) structures and macros. + */ + +#ifndef _IA64_SN_KERNEL_XPC_H +#define _IA64_SN_KERNEL_XPC_H + + +#include <linux/config.h> +#include <linux/interrupt.h> +#include <linux/sysctl.h> +#include <linux/device.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/sn/bte.h> +#include <asm/sn/clksupport.h> +#include <asm/sn/addrs.h> +#include <asm/sn/mspec.h> +#include <asm/sn/shub_mmr.h> +#include <asm/sn/xp.h> + + +/* + * XPC Version numbers consist of a major and minor number. XPC can always + * talk to versions with same major #, and never talk to versions with a + * different major #. + */ +#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf)) +#define XPC_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPC_VERSION_MINOR(_v) ((_v) & 0xf) + + +/* + * The next macros define word or bit representations for given + * C-brick nasid in either the SAL provided bit array representing + * nasids in the partition/machine or the AMO_t array used for + * inter-partition initiation communications. + * + * For SN2 machines, C-Bricks are alway even numbered NASIDs. As + * such, some space will be saved by insisting that nasid information + * passed from SAL always be packed for C-Bricks and the + * cross-partition interrupts use the same packing scheme. + */ +#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2) +#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1)) +#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \ + (1UL << XPC_NASID_B_INDEX(_n))) +#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2) + +#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ +#define XPC_HB_CHECK_DEFAULT_TIMEOUT 20 /* check HB every x secs */ + +/* define the process name of HB checker and the CPU it is pinned to */ +#define XPC_HB_CHECK_THREAD_NAME "xpc_hb" +#define XPC_HB_CHECK_CPU 0 + +/* define the process name of the discovery thread */ +#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery" + + +#define XPC_HB_ALLOWED(_p, _v) ((_v)->heartbeating_to_mask & (1UL << (_p))) +#define XPC_ALLOW_HB(_p, _v) (_v)->heartbeating_to_mask |= (1UL << (_p)) +#define XPC_DISALLOW_HB(_p, _v) (_v)->heartbeating_to_mask &= (~(1UL << (_p))) + + +/* + * Reserved Page provided by SAL. + * + * SAL provides one page per partition of reserved memory. When SAL + * initialization is complete, SAL_signature, SAL_version, partid, + * part_nasids, and mach_nasids are set. + * + * Note: Until vars_pa is set, the partition XPC code has not been initialized. + */ +struct xpc_rsvd_page { + u64 SAL_signature; /* SAL unique signature */ + u64 SAL_version; /* SAL specified version */ + u8 partid; /* partition ID from SAL */ + u8 version; + u8 pad[6]; /* pad to u64 align */ + u64 vars_pa; + u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; + u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; +}; +#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */ + +#define XPC_RSVD_PAGE_ALIGNED_SIZE \ + (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))) + + +/* + * Define the structures by which XPC variables can be exported to other + * partitions. (There are two: struct xpc_vars and struct xpc_vars_part) + */ + +/* + * The following structure describes the partition generic variables + * needed by other partitions in order to properly initialize. + * + * struct xpc_vars version number also applies to struct xpc_vars_part. + * Changes to either structure and/or related functionality should be + * reflected by incrementing either the major or minor version numbers + * of struct xpc_vars. + */ +struct xpc_vars { + u8 version; + u64 heartbeat; + u64 heartbeating_to_mask; + u64 kdb_status; /* 0 = machine running */ + int act_nasid; + int act_phys_cpuid; + u64 vars_part_pa; + u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */ + AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */ + AMO_t *act_amos; /* pointer to the first activation AMO */ +}; +#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */ + +#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars))) + +/* + * The following structure describes the per partition specific variables. + * + * An array of these structures, one per partition, will be defined. As a + * partition becomes active XPC will copy the array entry corresponding to + * itself from that partition. It is desirable that the size of this + * structure evenly divide into a cacheline, such that none of the entries + * in this array crosses a cacheline boundary. As it is now, each entry + * occupies half a cacheline. + */ +struct xpc_vars_part { + u64 magic; + + u64 openclose_args_pa; /* physical address of open and close args */ + u64 GPs_pa; /* physical address of Get/Put values */ + + u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */ + int IPI_nasid; /* nasid of where to send IPIs */ + int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */ + + u8 nchannels; /* #of defined channels supported */ + + u8 reserved[23]; /* pad to a full 64 bytes */ +}; + +/* + * The vars_part MAGIC numbers play a part in the first contact protocol. + * + * MAGIC1 indicates that the per partition specific variables for a remote + * partition have been initialized by this partition. + * + * MAGIC2 indicates that this partition has pulled the remote partititions + * per partition variables that pertain to this partition. + */ +#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ +#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ + + + +/* + * Functions registered by add_timer() or called by kernel_thread() only + * allow for a single 64-bit argument. The following macros can be used to + * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from + * the passed argument. + */ +#define XPC_PACK_ARGS(_arg1, _arg2) \ + ((((u64) _arg1) & 0xffffffff) | \ + ((((u64) _arg2) & 0xffffffff) << 32)) + +#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff) +#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff) + + + +/* + * Define a Get/Put value pair (pointers) used with a message queue. + */ +struct xpc_gp { + s64 get; /* Get value */ + s64 put; /* Put value */ +}; + +#define XPC_GP_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS) + + + +/* + * Define a structure that contains arguments associated with opening and + * closing a channel. + */ +struct xpc_openclose_args { + u16 reason; /* reason why channel is closing */ + u16 msg_size; /* sizeof each message entry */ + u16 remote_nentries; /* #of message entries in remote msg queue */ + u16 local_nentries; /* #of message entries in local msg queue */ + u64 local_msgqueue_pa; /* physical address of local message queue */ +}; + +#define XPC_OPENCLOSE_ARGS_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS) + + + +/* struct xpc_msg flags */ + +#define XPC_M_DONE 0x01 /* msg has been received/consumed */ +#define XPC_M_READY 0x02 /* msg is ready to be sent */ +#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */ + + +#define XPC_MSG_ADDRESS(_payload) \ + ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET)) + + + +/* + * Defines notify entry. + * + * This is used to notify a message's sender that their message was received + * and consumed by the intended recipient. + */ +struct xpc_notify { + struct semaphore sema; /* notify semaphore */ + u8 type; /* type of notification */ + + /* the following two fields are only used if type == XPC_N_CALL */ + xpc_notify_func func; /* user's notify function */ + void *key; /* pointer to user's key */ +}; + +/* struct xpc_notify type of notification */ + +#define XPC_N_CALL 0x01 /* notify function provided by user */ + + + +/* + * Define the structure that manages all the stuff required by a channel. In + * particular, they are used to manage the messages sent across the channel. + * + * This structure is private to a partition, and is NOT shared across the + * partition boundary. + * + * There is an array of these structures for each remote partition. It is + * allocated at the time a partition becomes active. The array contains one + * of these structures for each potential channel connection to that partition. + * + * Each of these structures manages two message queues (circular buffers). + * They are allocated at the time a channel connection is made. One of + * these message queues (local_msgqueue) holds the locally created messages + * that are destined for the remote partition. The other of these message + * queues (remote_msgqueue) is a locally cached copy of the remote partition's + * own local_msgqueue. + * + * The following is a description of the Get/Put pointers used to manage these + * two message queues. Consider the local_msgqueue to be on one partition + * and the remote_msgqueue to be its cached copy on another partition. A + * description of what each of the lettered areas contains is included. + * + * + * local_msgqueue remote_msgqueue + * + * |/////////| |/////////| + * w_remote_GP.get --> +---------+ |/////////| + * | F | |/////////| + * remote_GP.get --> +---------+ +---------+ <-- local_GP->get + * | | | | + * | | | E | + * | | | | + * | | +---------+ <-- w_local_GP.get + * | B | |/////////| + * | | |////D////| + * | | |/////////| + * | | +---------+ <-- w_remote_GP.put + * | | |////C////| + * local_GP->put --> +---------+ +---------+ <-- remote_GP.put + * | | |/////////| + * | A | |/////////| + * | | |/////////| + * w_local_GP.put --> +---------+ |/////////| + * |/////////| |/////////| + * + * + * ( remote_GP.[get|put] are cached copies of the remote + * partition's local_GP->[get|put], and thus their values can + * lag behind their counterparts on the remote partition. ) + * + * + * A - Messages that have been allocated, but have not yet been sent to the + * remote partition. + * + * B - Messages that have been sent, but have not yet been acknowledged by the + * remote partition as having been received. + * + * C - Area that needs to be prepared for the copying of sent messages, by + * the clearing of the message flags of any previously received messages. + * + * D - Area into which sent messages are to be copied from the remote + * partition's local_msgqueue and then delivered to their intended + * recipients. [ To allow for a multi-message copy, another pointer + * (next_msg_to_pull) has been added to keep track of the next message + * number needing to be copied (pulled). It chases after w_remote_GP.put. + * Any messages lying between w_local_GP.get and next_msg_to_pull have + * been copied and are ready to be delivered. ] + * + * E - Messages that have been copied and delivered, but have not yet been + * acknowledged by the recipient as having been received. + * + * F - Messages that have been acknowledged, but XPC has not yet notified the + * sender that the message was received by its intended recipient. + * This is also an area that needs to be prepared for the allocating of + * new messages, by the clearing of the message flags of the acknowledged + * messages. + */ +struct xpc_channel { + partid_t partid; /* ID of remote partition connected */ + spinlock_t lock; /* lock for updating this structure */ + u32 flags; /* general flags */ + + enum xpc_retval reason; /* reason why channel is disconnect'g */ + int reason_line; /* line# disconnect initiated from */ + + u16 number; /* channel # */ + + u16 msg_size; /* sizeof each msg entry */ + u16 local_nentries; /* #of msg entries in local msg queue */ + u16 remote_nentries; /* #of msg entries in remote msg queue*/ + + void *local_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg *local_msgqueue; /* local message queue */ + void *remote_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg *remote_msgqueue;/* cached copy of remote partition's */ + /* local message queue */ + u64 remote_msgqueue_pa; /* phys addr of remote partition's */ + /* local message queue */ + + atomic_t references; /* #of external references to queues */ + + atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ + wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ + + /* queue of msg senders who want to be notified when msg received */ + + atomic_t n_to_notify; /* #of msg senders to notify */ + struct xpc_notify *notify_queue;/* notify queue for messages sent */ + + xpc_channel_func func; /* user's channel function */ + void *key; /* pointer to user's key */ + + struct semaphore msg_to_pull_sema; /* next msg to pull serialization */ + struct semaphore teardown_sema; /* wait for teardown completion */ + + struct xpc_openclose_args *local_openclose_args; /* args passed on */ + /* opening or closing of channel */ + + /* various flavors of local and remote Get/Put values */ + + struct xpc_gp *local_GP; /* local Get/Put values */ + struct xpc_gp remote_GP; /* remote Get/Put values */ + struct xpc_gp w_local_GP; /* working local Get/Put values */ + struct xpc_gp w_remote_GP; /* working remote Get/Put values */ + s64 next_msg_to_pull; /* Put value of next msg to pull */ + + /* kthread management related fields */ + +// >>> rethink having kthreads_assigned_limit and kthreads_idle_limit; perhaps +// >>> allow the assigned limit be unbounded and let the idle limit be dynamic +// >>> dependent on activity over the last interval of time + atomic_t kthreads_assigned; /* #of kthreads assigned to channel */ + u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */ + atomic_t kthreads_idle; /* #of kthreads idle waiting for work */ + u32 kthreads_idle_limit; /* limit on #of kthreads idle */ + atomic_t kthreads_active; /* #of kthreads actively working */ + // >>> following field is temporary + u32 kthreads_created; /* total #of kthreads created */ + + wait_queue_head_t idle_wq; /* idle kthread wait queue */ + +} ____cacheline_aligned; + + +/* struct xpc_channel flags */ + +#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */ + +#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */ +#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */ +#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */ +#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */ + +#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */ +#define XPC_C_CONNECTCALLOUT 0x00000040 /* channel connected callout made */ +#define XPC_C_CONNECTED 0x00000080 /* local channel is connected */ +#define XPC_C_CONNECTING 0x00000100 /* channel is being connected */ + +#define XPC_C_RCLOSEREPLY 0x00000200 /* remote close channel reply */ +#define XPC_C_CLOSEREPLY 0x00000400 /* local close channel reply */ +#define XPC_C_RCLOSEREQUEST 0x00000800 /* remote close channel request */ +#define XPC_C_CLOSEREQUEST 0x00001000 /* local close channel request */ + +#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ +#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ + + + +/* + * Manages channels on a partition basis. There is one of these structures + * for each partition (a partition will never utilize the structure that + * represents itself). + */ +struct xpc_partition { + + /* XPC HB infrastructure */ + + u64 remote_rp_pa; /* phys addr of partition's rsvd pg */ + u64 remote_vars_pa; /* phys addr of partition's vars */ + u64 remote_vars_part_pa; /* phys addr of partition's vars part */ + u64 last_heartbeat; /* HB at last read */ + u64 remote_amos_page_pa; /* phys addr of partition's amos page */ + int remote_act_nasid; /* active part's act/deact nasid */ + int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */ + u32 act_IRQ_rcvd; /* IRQs since activation */ + spinlock_t act_lock; /* protect updating of act_state */ + u8 act_state; /* from XPC HB viewpoint */ + enum xpc_retval reason; /* reason partition is deactivating */ + int reason_line; /* line# deactivation initiated from */ + int reactivate_nasid; /* nasid in partition to reactivate */ + + + /* XPC infrastructure referencing and teardown control */ + + u8 setup_state; /* infrastructure setup state */ + wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ + atomic_t references; /* #of references to infrastructure */ + + + /* + * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN + * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION + * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE + * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.) + */ + + + u8 nchannels; /* #of defined channels supported */ + atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ + struct xpc_channel *channels;/* array of channel structures */ + + void *local_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp *local_GPs; /* local Get/Put values */ + void *remote_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp *remote_GPs;/* copy of remote partition's local Get/Put */ + /* values */ + u64 remote_GPs_pa; /* phys address of remote partition's local */ + /* Get/Put values */ + + + /* fields used to pass args when opening or closing a channel */ + + void *local_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *local_openclose_args; /* local's args */ + void *remote_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */ + /* args */ + u64 remote_openclose_args_pa; /* phys addr of remote's args */ + + + /* IPI sending, receiving and handling related fields */ + + int remote_IPI_nasid; /* nasid of where to send IPIs */ + int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */ + AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */ + + AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */ + u64 local_IPI_amo; /* IPI amo flags yet to be handled */ + char IPI_owner[8]; /* IPI owner's name */ + struct timer_list dropped_IPI_timer; /* dropped IPI timer */ + + spinlock_t IPI_lock; /* IPI handler lock */ + + + /* channel manager related fields */ + + atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */ + wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */ + +} ____cacheline_aligned; + + +/* struct xpc_partition act_state values (for XPC HB) */ + +#define XPC_P_INACTIVE 0x00 /* partition is not active */ +#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */ +#define XPC_P_ACTIVATING 0x02 /* activation thread started */ +#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */ +#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */ + + +#define XPC_DEACTIVATE_PARTITION(_p, _reason) \ + xpc_deactivate_partition(__LINE__, (_p), (_reason)) + + +/* struct xpc_partition setup_state values */ + +#define XPC_P_UNSET 0x00 /* infrastructure was never setup */ +#define XPC_P_SETUP 0x01 /* infrastructure is setup */ +#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ +#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */ + + +/* + * struct xpc_partition IPI_timer #of seconds to wait before checking for + * dropped IPIs. These occur whenever an IPI amo write doesn't complete until + * after the IPI was received. + */ +#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ) + + +#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0])) + + + +/* found in xp_main.c */ +extern struct xpc_registration xpc_registrations[]; + + +/* >>> found in xpc_main.c only */ +extern struct device *xpc_part; +extern struct device *xpc_chan; +extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *); +extern void xpc_dropped_IPI_check(struct xpc_partition *); +extern void xpc_activate_kthreads(struct xpc_channel *, int); +extern void xpc_create_kthreads(struct xpc_channel *, int); +extern void xpc_disconnect_wait(int); + + +/* found in xpc_main.c and efi-xpc.c */ +extern void xpc_activate_partition(struct xpc_partition *); + + +/* found in xpc_partition.c */ +extern int xpc_exiting; +extern int xpc_hb_interval; +extern int xpc_hb_check_interval; +extern struct xpc_vars *xpc_vars; +extern struct xpc_rsvd_page *xpc_rsvd_page; +extern struct xpc_vars_part *xpc_vars_part; +extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; +extern char xpc_remote_copy_buffer[]; +extern struct xpc_rsvd_page *xpc_rsvd_page_init(void); +extern void xpc_allow_IPI_ops(void); +extern void xpc_restrict_IPI_ops(void); +extern int xpc_identify_act_IRQ_sender(void); +extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *); +extern void xpc_mark_partition_inactive(struct xpc_partition *); +extern void xpc_discovery(void); +extern void xpc_check_remote_hb(void); +extern void xpc_deactivate_partition(const int, struct xpc_partition *, + enum xpc_retval); +extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *); + + +/* found in xpc_channel.c */ +extern void xpc_initiate_connect(int); +extern void xpc_initiate_disconnect(int); +extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **); +extern enum xpc_retval xpc_initiate_send(partid_t, int, void *); +extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *, + xpc_notify_func, void *); +extern void xpc_initiate_received(partid_t, int, void *); +extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *); +extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *); +extern void xpc_process_channel_activity(struct xpc_partition *); +extern void xpc_connected_callout(struct xpc_channel *); +extern void xpc_deliver_msg(struct xpc_channel *); +extern void xpc_disconnect_channel(const int, struct xpc_channel *, + enum xpc_retval, unsigned long *); +extern void xpc_disconnected_callout(struct xpc_channel *); +extern void xpc_partition_down(struct xpc_partition *, enum xpc_retval); +extern void xpc_teardown_infrastructure(struct xpc_partition *); + + + +static inline void +xpc_wakeup_channel_mgr(struct xpc_partition *part) +{ + if (atomic_inc_return(&part->channel_mgr_requests) == 1) { + wake_up(&part->channel_mgr_wq); + } +} + + + +/* + * These next two inlines are used to keep us from tearing down a channel's + * msg queues while a thread may be referencing them. + */ +static inline void +xpc_msgqueue_ref(struct xpc_channel *ch) +{ + atomic_inc(&ch->references); +} + +static inline void +xpc_msgqueue_deref(struct xpc_channel *ch) +{ + s32 refs = atomic_dec_return(&ch->references); + + DBUG_ON(refs < 0); + if (refs == 0) { + xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]); + } +} + + + +#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \ + xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs) + + +/* + * These two inlines are used to keep us from tearing down a partition's + * setup infrastructure while a thread may be referencing it. + */ +static inline void +xpc_part_deref(struct xpc_partition *part) +{ + s32 refs = atomic_dec_return(&part->references); + + + DBUG_ON(refs < 0); + if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) { + wake_up(&part->teardown_wq); + } +} + +static inline int +xpc_part_ref(struct xpc_partition *part) +{ + int setup; + + + atomic_inc(&part->references); + setup = (part->setup_state == XPC_P_SETUP); + if (!setup) { + xpc_part_deref(part); + } + return setup; +} + + + +/* + * The following macro is to be used for the setting of the reason and + * reason_line fields in both the struct xpc_channel and struct xpc_partition + * structures. + */ +#define XPC_SET_REASON(_p, _reason, _line) \ + { \ + (_p)->reason = _reason; \ + (_p)->reason_line = _line; \ + } + + + +/* + * The following set of macros and inlines are used for the sending and + * receiving of IPIs (also known as IRQs). There are two flavors of IPIs, + * one that is associated with partition activity (SGI_XPC_ACTIVATE) and + * the other that is associated with channel activity (SGI_XPC_NOTIFY). + */ + +static inline u64 +xpc_IPI_receive(AMO_t *amo) +{ + return FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_CLEAR); +} + + +static inline enum xpc_retval +xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector) +{ + int ret = 0; + unsigned long irq_flags; + + + local_irq_save(irq_flags); + + FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, flag); + sn_send_IPI_phys(nasid, phys_cpuid, vector, 0); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IPIs and AMOs to it until the heartbeat times out. + */ + ret = xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + return ((ret == 0) ? xpcSuccess : xpcPioReadError); +} + + +/* + * IPIs associated with SGI_XPC_ACTIVATE IRQ. + */ + +/* + * Flag the appropriate AMO variable and send an IPI to the specified node. + */ +static inline void +xpc_activate_IRQ_send(u64 amos_page, int from_nasid, int to_nasid, + int to_phys_cpuid) +{ + int w_index = XPC_NASID_W_INDEX(from_nasid); + int b_index = XPC_NASID_B_INDEX(from_nasid); + AMO_t *amos = (AMO_t *) __va(amos_page + + (XP_MAX_PARTITIONS * sizeof(AMO_t))); + + + (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid, + to_phys_cpuid, SGI_XPC_ACTIVATE); +} + +static inline void +xpc_IPI_send_activate(struct xpc_vars *vars) +{ + xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0), + vars->act_nasid, vars->act_phys_cpuid); +} + +static inline void +xpc_IPI_send_activated(struct xpc_partition *part) +{ + xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0), + part->remote_act_nasid, part->remote_act_phys_cpuid); +} + +static inline void +xpc_IPI_send_reactivate(struct xpc_partition *part) +{ + xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid, + xpc_vars->act_nasid, xpc_vars->act_phys_cpuid); +} + + +/* + * IPIs associated with SGI_XPC_NOTIFY IRQ. + */ + +/* + * Send an IPI to the remote partition that is associated with the + * specified channel. + */ +#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \ + xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f) + +static inline void +xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string, + unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + enum xpc_retval ret; + + + if (likely(part->act_state != XPC_P_DEACTIVATING)) { + ret = xpc_IPI_send(part->remote_IPI_amo_va, + (u64) ipi_flag << (ch->number * 8), + part->remote_IPI_nasid, + part->remote_IPI_phys_cpuid, + SGI_XPC_NOTIFY); + dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n", + ipi_flag_string, ch->partid, ch->number, ret); + if (unlikely(ret != xpcSuccess)) { + if (irq_flags != NULL) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + } + XPC_DEACTIVATE_PARTITION(part, ret); + if (irq_flags != NULL) { + spin_lock_irqsave(&ch->lock, *irq_flags); + } + } + } +} + + +/* + * Make it look like the remote partition, which is associated with the + * specified channel, sent us an IPI. This faked IPI will be handled + * by xpc_dropped_IPI_check(). + */ +#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \ + xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f) + +static inline void +xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag, + char *ipi_flag_string) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + + + FETCHOP_STORE_OP(TO_AMO((u64) &part->local_IPI_amo_va->variable), + FETCHOP_OR, ((u64) ipi_flag << (ch->number * 8))); + dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n", + ipi_flag_string, ch->partid, ch->number); +} + + +/* + * The sending and receiving of IPIs includes the setting of an AMO variable + * to indicate the reason the IPI was sent. The 64-bit variable is divided + * up into eight bytes, ordered from right to left. Byte zero pertains to + * channel 0, byte one to channel 1, and so on. Each byte is described by + * the following IPI flags. + */ + +#define XPC_IPI_CLOSEREQUEST 0x01 +#define XPC_IPI_CLOSEREPLY 0x02 +#define XPC_IPI_OPENREQUEST 0x04 +#define XPC_IPI_OPENREPLY 0x08 +#define XPC_IPI_MSGREQUEST 0x10 + + +/* given an AMO variable and a channel#, get its associated IPI flags */ +#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff)) + +#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f) +#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010) + + +static inline void +xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->local_openclose_args; + + + args->reason = ch->reason; + + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags); +} + +static inline void +xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags) +{ + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags); +} + +static inline void +xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->local_openclose_args; + + + args->msg_size = ch->msg_size; + args->local_nentries = ch->local_nentries; + + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags); +} + +static inline void +xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->local_openclose_args; + + + args->remote_nentries = ch->remote_nentries; + args->local_nentries = ch->local_nentries; + args->local_msgqueue_pa = __pa(ch->local_msgqueue); + + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags); +} + +static inline void +xpc_IPI_send_msgrequest(struct xpc_channel *ch) +{ + XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL); +} + +static inline void +xpc_IPI_send_local_msgrequest(struct xpc_channel *ch) +{ + XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST); +} + + +/* + * Memory for XPC's AMO variables is allocated by the MSPEC driver. These + * pages are located in the lowest granule. The lowest granule uses 4k pages + * for cached references and an alternate TLB handler to never provide a + * cacheable mapping for the entire region. This will prevent speculative + * reading of cached copies of our lines from being issued which will cause + * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 + * (XP_MAX_PARTITIONS) AMO variables for message notification (xpc_main.c) + * and an additional 16 AMO variables for partition activation (xpc_hb.c). + */ +static inline AMO_t * +xpc_IPI_init(partid_t partid) +{ + AMO_t *part_amo = xpc_vars->amos_page + partid; + + + xpc_IPI_receive(part_amo); + return part_amo; +} + + + +static inline enum xpc_retval +xpc_map_bte_errors(bte_result_t error) +{ + switch (error) { + case BTE_SUCCESS: return xpcSuccess; + case BTEFAIL_DIR: return xpcBteDirectoryError; + case BTEFAIL_POISON: return xpcBtePoisonError; + case BTEFAIL_WERR: return xpcBteWriteError; + case BTEFAIL_ACCESS: return xpcBteAccessError; + case BTEFAIL_PWERR: return xpcBtePWriteError; + case BTEFAIL_PRERR: return xpcBtePReadError; + case BTEFAIL_TOUT: return xpcBteTimeOutError; + case BTEFAIL_XTERR: return xpcBteXtalkError; + case BTEFAIL_NOTAVAIL: return xpcBteNotAvailable; + default: return xpcBteUnmappedError; + } +} + + + +static inline void * +xpc_kmalloc_cacheline_aligned(size_t size, int flags, void **base) +{ + /* see if kmalloc will give us cachline aligned memory by default */ + *base = kmalloc(size, flags); + if (*base == NULL) { + return NULL; + } + if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) { + return *base; + } + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kmalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) { + return NULL; + } + return (void *) L1_CACHE_ALIGN((u64) *base); +} + + +/* + * Check to see if there is any channel activity to/from the specified + * partition. + */ +static inline void +xpc_check_for_channel_activity(struct xpc_partition *part) +{ + u64 IPI_amo; + unsigned long irq_flags; + + + IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va); + if (IPI_amo == 0) { + return; + } + + spin_lock_irqsave(&part->IPI_lock, irq_flags); + part->local_IPI_amo |= IPI_amo; + spin_unlock_irqrestore(&part->IPI_lock, irq_flags); + + dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n", + XPC_PARTID(part), IPI_amo); + + xpc_wakeup_channel_mgr(part); +} + + +#endif /* _IA64_SN_KERNEL_XPC_H */ + diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c new file mode 100644 index 000000000000..0bf6fbcc46d2 --- /dev/null +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -0,0 +1,2297 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition Communication (XPC) channel support. + * + * This is the part of XPC that manages the channels and + * sends/receives messages across them to/from other partitions. + * + */ + + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/cache.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <asm/sn/bte.h> +#include <asm/sn/sn_sal.h> +#include "xpc.h" + + +/* + * Set up the initial values for the XPartition Communication channels. + */ +static void +xpc_initialize_channels(struct xpc_partition *part, partid_t partid) +{ + int ch_number; + struct xpc_channel *ch; + + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + ch->partid = partid; + ch->number = ch_number; + ch->flags = XPC_C_DISCONNECTED; + + ch->local_GP = &part->local_GPs[ch_number]; + ch->local_openclose_args = + &part->local_openclose_args[ch_number]; + + atomic_set(&ch->kthreads_assigned, 0); + atomic_set(&ch->kthreads_idle, 0); + atomic_set(&ch->kthreads_active, 0); + + atomic_set(&ch->references, 0); + atomic_set(&ch->n_to_notify, 0); + + spin_lock_init(&ch->lock); + sema_init(&ch->msg_to_pull_sema, 1); /* mutex */ + + atomic_set(&ch->n_on_msg_allocate_wq, 0); + init_waitqueue_head(&ch->msg_allocate_wq); + init_waitqueue_head(&ch->idle_wq); + } +} + + +/* + * Setup the infrastructure necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +enum xpc_retval +xpc_setup_infrastructure(struct xpc_partition *part) +{ + int ret; + struct timer_list *timer; + partid_t partid = XPC_PARTID(part); + + + /* + * Zero out MOST of the entry for this partition. Only the fields + * starting with `nchannels' will be zeroed. The preceding fields must + * remain `viable' across partition ups and downs, since they may be + * referenced during this memset() operation. + */ + memset(&part->nchannels, 0, sizeof(struct xpc_partition) - + offsetof(struct xpc_partition, nchannels)); + + /* + * Allocate all of the channel structures as a contiguous chunk of + * memory. + */ + part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, + GFP_KERNEL); + if (part->channels == NULL) { + dev_err(xpc_chan, "can't get memory for channels\n"); + return xpcNoMemory; + } + memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS); + + part->nchannels = XPC_NCHANNELS; + + + /* allocate all the required GET/PUT values */ + + part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, + GFP_KERNEL, &part->local_GPs_base); + if (part->local_GPs == NULL) { + kfree(part->channels); + part->channels = NULL; + dev_err(xpc_chan, "can't get memory for local get/put " + "values\n"); + return xpcNoMemory; + } + memset(part->local_GPs, 0, XPC_GP_SIZE); + + part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, + GFP_KERNEL, &part->remote_GPs_base); + if (part->remote_GPs == NULL) { + kfree(part->channels); + part->channels = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + dev_err(xpc_chan, "can't get memory for remote get/put " + "values\n"); + return xpcNoMemory; + } + memset(part->remote_GPs, 0, XPC_GP_SIZE); + + + /* allocate all the required open and close args */ + + part->local_openclose_args = xpc_kmalloc_cacheline_aligned( + XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, + &part->local_openclose_args_base); + if (part->local_openclose_args == NULL) { + kfree(part->channels); + part->channels = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + dev_err(xpc_chan, "can't get memory for local connect args\n"); + return xpcNoMemory; + } + memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); + + part->remote_openclose_args = xpc_kmalloc_cacheline_aligned( + XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, + &part->remote_openclose_args_base); + if (part->remote_openclose_args == NULL) { + kfree(part->channels); + part->channels = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + kfree(part->local_openclose_args_base); + part->local_openclose_args = NULL; + dev_err(xpc_chan, "can't get memory for remote connect args\n"); + return xpcNoMemory; + } + memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE); + + + xpc_initialize_channels(part, partid); + + atomic_set(&part->nchannels_active, 0); + + + /* local_IPI_amo were set to 0 by an earlier memset() */ + + /* Initialize this partitions AMO_t structure */ + part->local_IPI_amo_va = xpc_IPI_init(partid); + + spin_lock_init(&part->IPI_lock); + + atomic_set(&part->channel_mgr_requests, 1); + init_waitqueue_head(&part->channel_mgr_wq); + + sprintf(part->IPI_owner, "xpc%02d", partid); + ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ, + part->IPI_owner, (void *) (u64) partid); + if (ret != 0) { + kfree(part->channels); + part->channels = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + kfree(part->local_openclose_args_base); + part->local_openclose_args = NULL; + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " + "errno=%d\n", -ret); + return xpcLackOfResources; + } + + /* Setup a timer to check for dropped IPIs */ + timer = &part->dropped_IPI_timer; + init_timer(timer); + timer->function = (void (*)(unsigned long)) xpc_dropped_IPI_check; + timer->data = (unsigned long) part; + timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT; + add_timer(timer); + + /* + * With the setting of the partition setup_state to XPC_P_SETUP, we're + * declaring that this partition is ready to go. + */ + (volatile u8) part->setup_state = XPC_P_SETUP; + + + /* + * Setup the per partition specific variables required by the + * remote partition to establish channel connections with us. + * + * The setting of the magic # indicates that these per partition + * specific variables are ready to be used. + */ + xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs); + xpc_vars_part[partid].openclose_args_pa = + __pa(part->local_openclose_args); + xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va); + xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(smp_processor_id()); + xpc_vars_part[partid].IPI_phys_cpuid = + cpu_physical_id(smp_processor_id()); + xpc_vars_part[partid].nchannels = part->nchannels; + (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC1; + + return xpcSuccess; +} + + +/* + * Create a wrapper that hides the underlying mechanism for pulling a cacheline + * (or multiple cachelines) from a remote partition. + * + * src must be a cacheline aligned physical address on the remote partition. + * dst must be a cacheline aligned virtual address on this partition. + * cnt must be an cacheline sized + */ +static enum xpc_retval +xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst, + const void *src, size_t cnt) +{ + bte_result_t bte_ret; + + + DBUG_ON((u64) src != L1_CACHE_ALIGN((u64) src)); + DBUG_ON((u64) dst != L1_CACHE_ALIGN((u64) dst)); + DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); + + if (part->act_state == XPC_P_DEACTIVATING) { + return part->reason; + } + + bte_ret = xp_bte_copy((u64) src, (u64) ia64_tpa((u64) dst), + (u64) cnt, (BTE_NORMAL | BTE_WACQUIRE), NULL); + if (bte_ret == BTE_SUCCESS) { + return xpcSuccess; + } + + dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n", + XPC_PARTID(part), bte_ret); + + return xpc_map_bte_errors(bte_ret); +} + + +/* + * Pull the remote per partititon specific variables from the specified + * partition. + */ +enum xpc_retval +xpc_pull_remote_vars_part(struct xpc_partition *part) +{ + u8 buffer[L1_CACHE_BYTES * 2]; + struct xpc_vars_part *pulled_entry_cacheline = + (struct xpc_vars_part *) L1_CACHE_ALIGN((u64) buffer); + struct xpc_vars_part *pulled_entry; + u64 remote_entry_cacheline_pa, remote_entry_pa; + partid_t partid = XPC_PARTID(part); + enum xpc_retval ret; + + + /* pull the cacheline that contains the variables we're interested in */ + + DBUG_ON(part->remote_vars_part_pa != + L1_CACHE_ALIGN(part->remote_vars_part_pa)); + DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2); + + remote_entry_pa = part->remote_vars_part_pa + + sn_partition_id * sizeof(struct xpc_vars_part); + + remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); + + pulled_entry = (struct xpc_vars_part *) ((u64) pulled_entry_cacheline + + (remote_entry_pa & (L1_CACHE_BYTES - 1))); + + ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline, + (void *) remote_entry_cacheline_pa, + L1_CACHE_BYTES); + if (ret != xpcSuccess) { + dev_dbg(xpc_chan, "failed to pull XPC vars_part from " + "partition %d, ret=%d\n", partid, ret); + return ret; + } + + + /* see if they've been set up yet */ + + if (pulled_entry->magic != XPC_VP_MAGIC1 && + pulled_entry->magic != XPC_VP_MAGIC2) { + + if (pulled_entry->magic != 0) { + dev_dbg(xpc_chan, "partition %d's XPC vars_part for " + "partition %d has bad magic value (=0x%lx)\n", + partid, sn_partition_id, pulled_entry->magic); + return xpcBadMagic; + } + + /* they've not been initialized yet */ + return xpcRetry; + } + + if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) { + + /* validate the variables */ + + if (pulled_entry->GPs_pa == 0 || + pulled_entry->openclose_args_pa == 0 || + pulled_entry->IPI_amo_pa == 0) { + + dev_err(xpc_chan, "partition %d's XPC vars_part for " + "partition %d are not valid\n", partid, + sn_partition_id); + return xpcInvalidAddress; + } + + /* the variables we imported look to be valid */ + + part->remote_GPs_pa = pulled_entry->GPs_pa; + part->remote_openclose_args_pa = + pulled_entry->openclose_args_pa; + part->remote_IPI_amo_va = + (AMO_t *) __va(pulled_entry->IPI_amo_pa); + part->remote_IPI_nasid = pulled_entry->IPI_nasid; + part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid; + + if (part->nchannels > pulled_entry->nchannels) { + part->nchannels = pulled_entry->nchannels; + } + + /* let the other side know that we've pulled their variables */ + + (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC2; + } + + if (pulled_entry->magic == XPC_VP_MAGIC1) { + return xpcRetry; + } + + return xpcSuccess; +} + + +/* + * Get the IPI flags and pull the openclose args and/or remote GPs as needed. + */ +static u64 +xpc_get_IPI_flags(struct xpc_partition *part) +{ + unsigned long irq_flags; + u64 IPI_amo; + enum xpc_retval ret; + + + /* + * See if there are any IPI flags to be handled. + */ + + spin_lock_irqsave(&part->IPI_lock, irq_flags); + if ((IPI_amo = part->local_IPI_amo) != 0) { + part->local_IPI_amo = 0; + } + spin_unlock_irqrestore(&part->IPI_lock, irq_flags); + + + if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) { + ret = xpc_pull_remote_cachelines(part, + part->remote_openclose_args, + (void *) part->remote_openclose_args_pa, + XPC_OPENCLOSE_ARGS_SIZE); + if (ret != xpcSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull openclose args from " + "partition %d, ret=%d\n", XPC_PARTID(part), + ret); + + /* don't bother processing IPIs anymore */ + IPI_amo = 0; + } + } + + if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) { + ret = xpc_pull_remote_cachelines(part, part->remote_GPs, + (void *) part->remote_GPs_pa, + XPC_GP_SIZE); + if (ret != xpcSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull GPs from partition " + "%d, ret=%d\n", XPC_PARTID(part), ret); + + /* don't bother processing IPIs anymore */ + IPI_amo = 0; + } + } + + return IPI_amo; +} + + +/* + * Allocate the local message queue and the notify queue. + */ +static enum xpc_retval +xpc_allocate_local_msgqueue(struct xpc_channel *ch) +{ + unsigned long irq_flags; + int nentries; + size_t nbytes; + + + // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between + // >>> iterations of the for-loop, bail if set? + + // >>> should we impose a minumum #of entries? like 4 or 8? + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->msg_size; + ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, + (GFP_KERNEL | GFP_DMA), + &ch->local_msgqueue_base); + if (ch->local_msgqueue == NULL) { + continue; + } + memset(ch->local_msgqueue, 0, nbytes); + + nbytes = nentries * sizeof(struct xpc_notify); + ch->notify_queue = kmalloc(nbytes, (GFP_KERNEL | GFP_DMA)); + if (ch->notify_queue == NULL) { + kfree(ch->local_msgqueue_base); + ch->local_msgqueue = NULL; + continue; + } + memset(ch->notify_queue, 0, nbytes); + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->local_nentries, ch->partid, ch->number); + + ch->local_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpcSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for local message queue and notify " + "queue, partid=%d, channel=%d\n", ch->partid, ch->number); + return xpcNoMemory; +} + + +/* + * Allocate the cached remote message queue. + */ +static enum xpc_retval +xpc_allocate_remote_msgqueue(struct xpc_channel *ch) +{ + unsigned long irq_flags; + int nentries; + size_t nbytes; + + + DBUG_ON(ch->remote_nentries <= 0); + + // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between + // >>> iterations of the for-loop, bail if set? + + // >>> should we impose a minumum #of entries? like 4 or 8? + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->msg_size; + ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, + (GFP_KERNEL | GFP_DMA), + &ch->remote_msgqueue_base); + if (ch->remote_msgqueue == NULL) { + continue; + } + memset(ch->remote_msgqueue, 0, nbytes); + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->remote_nentries, ch->partid, ch->number); + + ch->remote_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpcSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + return xpcNoMemory; +} + + +/* + * Allocate message queues and other stuff associated with a channel. + * + * Note: Assumes all of the channel sizes are filled in. + */ +static enum xpc_retval +xpc_allocate_msgqueues(struct xpc_channel *ch) +{ + unsigned long irq_flags; + int i; + enum xpc_retval ret; + + + DBUG_ON(ch->flags & XPC_C_SETUP); + + if ((ret = xpc_allocate_local_msgqueue(ch)) != xpcSuccess) { + return ret; + } + + if ((ret = xpc_allocate_remote_msgqueue(ch)) != xpcSuccess) { + kfree(ch->local_msgqueue_base); + ch->local_msgqueue = NULL; + kfree(ch->notify_queue); + ch->notify_queue = NULL; + return ret; + } + + for (i = 0; i < ch->local_nentries; i++) { + /* use a semaphore as an event wait queue */ + sema_init(&ch->notify_queue[i].sema, 0); + } + + sema_init(&ch->teardown_sema, 0); /* event wait */ + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_SETUP; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + return xpcSuccess; +} + + +/* + * Process a connect message from a remote partition. + * + * Note: xpc_process_connect() is expecting to be called with the + * spin_lock_irqsave held and will leave it locked upon return. + */ +static void +xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + enum xpc_retval ret; + + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_OPENREQUEST) || + !(ch->flags & XPC_C_ROPENREQUEST)) { + /* nothing more to do for now */ + return; + } + DBUG_ON(!(ch->flags & XPC_C_CONNECTING)); + + if (!(ch->flags & XPC_C_SETUP)) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + ret = xpc_allocate_msgqueues(ch); + spin_lock_irqsave(&ch->lock, *irq_flags); + + if (ret != xpcSuccess) { + XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); + } + if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING)) { + return; + } + + DBUG_ON(!(ch->flags & XPC_C_SETUP)); + DBUG_ON(ch->local_msgqueue == NULL); + DBUG_ON(ch->remote_msgqueue == NULL); + } + + if (!(ch->flags & XPC_C_OPENREPLY)) { + ch->flags |= XPC_C_OPENREPLY; + xpc_IPI_send_openreply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_ROPENREPLY)) { + return; + } + + DBUG_ON(ch->remote_msgqueue_pa == 0); + + ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ + + dev_info(xpc_chan, "channel %d to partition %d connected\n", + ch->number, ch->partid); + + spin_unlock_irqrestore(&ch->lock, *irq_flags); + xpc_create_kthreads(ch, 1); + spin_lock_irqsave(&ch->lock, *irq_flags); +} + + +/* + * Free up message queues and other stuff that were allocated for the specified + * channel. + * + * Note: ch->reason and ch->reason_line are left set for debugging purposes, + * they're cleared when XPC_C_DISCONNECTED is cleared. + */ +static void +xpc_free_msgqueues(struct xpc_channel *ch) +{ + DBUG_ON(!spin_is_locked(&ch->lock)); + DBUG_ON(atomic_read(&ch->n_to_notify) != 0); + + ch->remote_msgqueue_pa = 0; + ch->func = NULL; + ch->key = NULL; + ch->msg_size = 0; + ch->local_nentries = 0; + ch->remote_nentries = 0; + ch->kthreads_assigned_limit = 0; + ch->kthreads_idle_limit = 0; + + ch->local_GP->get = 0; + ch->local_GP->put = 0; + ch->remote_GP.get = 0; + ch->remote_GP.put = 0; + ch->w_local_GP.get = 0; + ch->w_local_GP.put = 0; + ch->w_remote_GP.get = 0; + ch->w_remote_GP.put = 0; + ch->next_msg_to_pull = 0; + + if (ch->flags & XPC_C_SETUP) { + ch->flags &= ~XPC_C_SETUP; + + dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", + ch->flags, ch->partid, ch->number); + + kfree(ch->local_msgqueue_base); + ch->local_msgqueue = NULL; + kfree(ch->remote_msgqueue_base); + ch->remote_msgqueue = NULL; + kfree(ch->notify_queue); + ch->notify_queue = NULL; + + /* in case someone is waiting for the teardown to complete */ + up(&ch->teardown_sema); + } +} + + +/* + * spin_lock_irqsave() is expected to be held on entry. + */ +static void +xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + u32 ch_flags = ch->flags; + + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + return; + } + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + + /* make sure all activity has settled down first */ + + if (atomic_read(&ch->references) > 0) { + return; + } + DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); + + /* it's now safe to free the channel's message queues */ + + xpc_free_msgqueues(ch); + DBUG_ON(ch->flags & XPC_C_SETUP); + + if (part->act_state != XPC_P_DEACTIVATING) { + + /* as long as the other side is up do the full protocol */ + + if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { + return; + } + + if (!(ch->flags & XPC_C_CLOSEREPLY)) { + ch->flags |= XPC_C_CLOSEREPLY; + xpc_IPI_send_closereply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_RCLOSEREPLY)) { + return; + } + } + + /* both sides are disconnected now */ + + ch->flags = XPC_C_DISCONNECTED; /* clear all flags, but this one */ + + atomic_dec(&part->nchannels_active); + + if (ch_flags & XPC_C_WASCONNECTED) { + dev_info(xpc_chan, "channel %d to partition %d disconnected, " + "reason=%d\n", ch->number, ch->partid, ch->reason); + } +} + + +/* + * Process a change in the channel's remote connection state. + */ +static void +xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number, + u8 IPI_flags) +{ + unsigned long irq_flags; + struct xpc_openclose_args *args = + &part->remote_openclose_args[ch_number]; + struct xpc_channel *ch = &part->channels[ch_number]; + enum xpc_retval reason; + + + + spin_lock_irqsave(&ch->lock, irq_flags); + + + if (IPI_flags & XPC_IPI_CLOSEREQUEST) { + + dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received " + "from partid=%d, channel=%d\n", args->reason, + ch->partid, ch->number); + + /* + * If RCLOSEREQUEST is set, we're probably waiting for + * RCLOSEREPLY. We should find it and a ROPENREQUEST packed + * with this RCLOSEQREUQEST in the IPI_flags. + */ + + if (ch->flags & XPC_C_RCLOSEREQUEST) { + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY)); + DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY); + + DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY)); + IPI_flags &= ~XPC_IPI_CLOSEREPLY; + ch->flags |= XPC_C_RCLOSEREPLY; + + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + } + + if (ch->flags & XPC_C_DISCONNECTED) { + // >>> explain this section + + if (!(IPI_flags & XPC_IPI_OPENREQUEST)) { + DBUG_ON(part->act_state != + XPC_P_DEACTIVATING); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); + } + + IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY); + + /* + * The meaningful CLOSEREQUEST connection state fields are: + * reason = reason connection is to be closed + */ + + ch->flags |= XPC_C_RCLOSEREQUEST; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + reason = args->reason; + if (reason <= xpcSuccess || reason > xpcUnknownReason) { + reason = xpcUnknownReason; + } else if (reason == xpcUnregistering) { + reason = xpcOtherUnregistering; + } + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + } else { + xpc_process_disconnect(ch, &irq_flags); + } + } + + + if (IPI_flags & XPC_IPI_CLOSEREPLY) { + + dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d," + " channel=%d\n", ch->partid, ch->number); + + if (ch->flags & XPC_C_DISCONNECTED) { + DBUG_ON(part->act_state != XPC_P_DEACTIVATING); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_RCLOSEREQUEST)); + + ch->flags |= XPC_C_RCLOSEREPLY; + + if (ch->flags & XPC_C_CLOSEREPLY) { + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + } + } + + + if (IPI_flags & XPC_IPI_OPENREQUEST) { + + dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, " + "local_nentries=%d) received from partid=%d, " + "channel=%d\n", args->msg_size, args->local_nentries, + ch->partid, ch->number); + + if ((ch->flags & XPC_C_DISCONNECTING) || + part->act_state == XPC_P_DEACTIVATING) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED | + XPC_C_OPENREQUEST))); + DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_OPENREPLY | XPC_C_CONNECTED)); + + /* + * The meaningful OPENREQUEST connection state fields are: + * msg_size = size of channel's messages in bytes + * local_nentries = remote partition's local_nentries + */ + DBUG_ON(args->msg_size == 0); + DBUG_ON(args->local_nentries == 0); + + ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); + ch->remote_nentries = args->local_nentries; + + + if (ch->flags & XPC_C_OPENREQUEST) { + if (args->msg_size != ch->msg_size) { + XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + } else { + ch->msg_size = args->msg_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + } + + xpc_process_connect(ch, &irq_flags); + } + + + if (IPI_flags & XPC_IPI_OPENREPLY) { + + dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, " + "local_nentries=%d, remote_nentries=%d) received from " + "partid=%d, channel=%d\n", args->local_msgqueue_pa, + args->local_nentries, args->remote_nentries, + ch->partid, ch->number); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } + DBUG_ON(!(ch->flags & XPC_C_OPENREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); + DBUG_ON(ch->flags & XPC_C_CONNECTED); + + /* + * The meaningful OPENREPLY connection state fields are: + * local_msgqueue_pa = physical address of remote + * partition's local_msgqueue + * local_nentries = remote partition's local_nentries + * remote_nentries = remote partition's remote_nentries + */ + DBUG_ON(args->local_msgqueue_pa == 0); + DBUG_ON(args->local_nentries == 0); + DBUG_ON(args->remote_nentries == 0); + + ch->flags |= XPC_C_ROPENREPLY; + ch->remote_msgqueue_pa = args->local_msgqueue_pa; + + if (args->local_nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " + "remote_nentries=%d, old remote_nentries=%d, " + "partid=%d, channel=%d\n", + args->local_nentries, ch->remote_nentries, + ch->partid, ch->number); + + ch->remote_nentries = args->local_nentries; + } + if (args->remote_nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new " + "local_nentries=%d, old local_nentries=%d, " + "partid=%d, channel=%d\n", + args->remote_nentries, ch->local_nentries, + ch->partid, ch->number); + + ch->local_nentries = args->remote_nentries; + } + + xpc_process_connect(ch, &irq_flags); + } + + spin_unlock_irqrestore(&ch->lock, irq_flags); +} + + +/* + * Attempt to establish a channel connection to a remote partition. + */ +static enum xpc_retval +xpc_connect_channel(struct xpc_channel *ch) +{ + unsigned long irq_flags; + struct xpc_registration *registration = &xpc_registrations[ch->number]; + + + if (down_interruptible(®istration->sema) != 0) { + return xpcInterrupted; + } + + if (!XPC_CHANNEL_REGISTERED(ch->number)) { + up(®istration->sema); + return xpcUnregistered; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + + DBUG_ON(ch->flags & XPC_C_CONNECTED); + DBUG_ON(ch->flags & XPC_C_OPENREQUEST); + + if (ch->flags & XPC_C_DISCONNECTING) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + up(®istration->sema); + return ch->reason; + } + + + /* add info from the channel connect registration to the channel */ + + ch->kthreads_assigned_limit = registration->assigned_limit; + ch->kthreads_idle_limit = registration->idle_limit; + DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); + DBUG_ON(atomic_read(&ch->kthreads_idle) != 0); + DBUG_ON(atomic_read(&ch->kthreads_active) != 0); + + ch->func = registration->func; + DBUG_ON(registration->func == NULL); + ch->key = registration->key; + + ch->local_nentries = registration->nentries; + + if (ch->flags & XPC_C_ROPENREQUEST) { + if (registration->msg_size != ch->msg_size) { + /* the local and remote sides aren't the same */ + + /* + * Because XPC_DISCONNECT_CHANNEL() can block we're + * forced to up the registration sema before we unlock + * the channel lock. But that's okay here because we're + * done with the part that required the registration + * sema. XPC_DISCONNECT_CHANNEL() requires that the + * channel lock be locked and will unlock and relock + * the channel lock as needed. + */ + up(®istration->sema); + XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpcUnequalMsgSizes; + } + } else { + ch->msg_size = registration->msg_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&xpc_partitions[ch->partid].nchannels_active); + } + + up(®istration->sema); + + + /* initiate the connection */ + + ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); + xpc_IPI_send_openrequest(ch, &irq_flags); + + xpc_process_connect(ch, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + return xpcSuccess; +} + + +/* + * Notify those who wanted to be notified upon delivery of their message. + */ +static void +xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put) +{ + struct xpc_notify *notify; + u8 notify_type; + s64 get = ch->w_remote_GP.get - 1; + + + while (++get < put && atomic_read(&ch->n_to_notify) > 0) { + + notify = &ch->notify_queue[get % ch->local_nentries]; + + /* + * See if the notify entry indicates it was associated with + * a message who's sender wants to be notified. It is possible + * that it is, but someone else is doing or has done the + * notification. + */ + notify_type = notify->type; + if (notify_type == 0 || + cmpxchg(¬ify->type, notify_type, 0) != + notify_type) { + continue; + } + + DBUG_ON(notify_type != XPC_N_CALL); + + atomic_dec(&ch->n_to_notify); + + if (notify->func != NULL) { + dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, " + "msg_number=%ld, partid=%d, channel=%d\n", + (void *) notify, get, ch->partid, ch->number); + + notify->func(reason, ch->partid, ch->number, + notify->key); + + dev_dbg(xpc_chan, "notify->func() returned, " + "notify=0x%p, msg_number=%ld, partid=%d, " + "channel=%d\n", (void *) notify, get, + ch->partid, ch->number); + } + } +} + + +/* + * Clear some of the msg flags in the local message queue. + */ +static inline void +xpc_clear_local_msgqueue_flags(struct xpc_channel *ch) +{ + struct xpc_msg *msg; + s64 get; + + + get = ch->w_remote_GP.get; + do { + msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + + (get % ch->local_nentries) * ch->msg_size); + msg->flags = 0; + } while (++get < (volatile s64) ch->remote_GP.get); +} + + +/* + * Clear some of the msg flags in the remote message queue. + */ +static inline void +xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch) +{ + struct xpc_msg *msg; + s64 put; + + + put = ch->w_remote_GP.put; + do { + msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + + (put % ch->remote_nentries) * ch->msg_size); + msg->flags = 0; + } while (++put < (volatile s64) ch->remote_GP.put); +} + + +static void +xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + int nmsgs_sent; + + + ch->remote_GP = part->remote_GPs[ch_number]; + + + /* See what, if anything, has changed for each connected channel */ + + xpc_msgqueue_ref(ch); + + if (ch->w_remote_GP.get == ch->remote_GP.get && + ch->w_remote_GP.put == ch->remote_GP.put) { + /* nothing changed since GPs were last pulled */ + xpc_msgqueue_deref(ch); + return; + } + + if (!(ch->flags & XPC_C_CONNECTED)){ + xpc_msgqueue_deref(ch); + return; + } + + + /* + * First check to see if messages recently sent by us have been + * received by the other side. (The remote GET value will have + * changed since we last looked at it.) + */ + + if (ch->w_remote_GP.get != ch->remote_GP.get) { + + /* + * We need to notify any senders that want to be notified + * that their sent messages have been received by their + * intended recipients. We need to do this before updating + * w_remote_GP.get so that we don't allocate the same message + * queue entries prematurely (see xpc_allocate_msg()). + */ + if (atomic_read(&ch->n_to_notify) > 0) { + /* + * Notify senders that messages sent have been + * received and delivered by the other side. + */ + xpc_notify_senders(ch, xpcMsgDelivered, + ch->remote_GP.get); + } + + /* + * Clear msg->flags in previously sent messages, so that + * they're ready for xpc_allocate_msg(). + */ + xpc_clear_local_msgqueue_flags(ch); + + (volatile s64) ch->w_remote_GP.get = ch->remote_GP.get; + + dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, " + "channel=%d\n", ch->w_remote_GP.get, ch->partid, + ch->number); + + /* + * If anyone was waiting for message queue entries to become + * available, wake them up. + */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { + wake_up(&ch->msg_allocate_wq); + } + } + + + /* + * Now check for newly sent messages by the other side. (The remote + * PUT value will have changed since we last looked at it.) + */ + + if (ch->w_remote_GP.put != ch->remote_GP.put) { + /* + * Clear msg->flags in previously received messages, so that + * they're ready for xpc_get_deliverable_msg(). + */ + xpc_clear_remote_msgqueue_flags(ch); + + (volatile s64) ch->w_remote_GP.put = ch->remote_GP.put; + + dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, " + "channel=%d\n", ch->w_remote_GP.put, ch->partid, + ch->number); + + nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get; + if (nmsgs_sent > 0) { + dev_dbg(xpc_chan, "msgs waiting to be copied and " + "delivered=%d, partid=%d, channel=%d\n", + nmsgs_sent, ch->partid, ch->number); + + if (ch->flags & XPC_C_CONNECTCALLOUT) { + xpc_activate_kthreads(ch, nmsgs_sent); + } + } + } + + xpc_msgqueue_deref(ch); +} + + +void +xpc_process_channel_activity(struct xpc_partition *part) +{ + unsigned long irq_flags; + u64 IPI_amo, IPI_flags; + struct xpc_channel *ch; + int ch_number; + + + IPI_amo = xpc_get_IPI_flags(part); + + /* + * Initiate channel connections for registered channels. + * + * For each connected channel that has pending messages activate idle + * kthreads and/or create new kthreads as needed. + */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + + /* + * Process any open or close related IPI flags, and then deal + * with connecting or disconnecting the channel as required. + */ + + IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number); + + if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags)) { + xpc_process_openclose_IPI(part, ch_number, IPI_flags); + } + + + if (ch->flags & XPC_C_DISCONNECTING) { + spin_lock_irqsave(&ch->lock, irq_flags); + xpc_process_disconnect(ch, &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + continue; + } + + if (part->act_state == XPC_P_DEACTIVATING) { + continue; + } + + if (!(ch->flags & XPC_C_CONNECTED)) { + if (!(ch->flags & XPC_C_OPENREQUEST)) { + DBUG_ON(ch->flags & XPC_C_SETUP); + (void) xpc_connect_channel(ch); + } else { + spin_lock_irqsave(&ch->lock, irq_flags); + xpc_process_connect(ch, &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + continue; + } + + + /* + * Process any message related IPI flags, this may involve the + * activation of kthreads to deliver any pending messages sent + * from the other partition. + */ + + if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags)) { + xpc_process_msg_IPI(part, ch_number); + } + } +} + + +/* + * XPC's heartbeat code calls this function to inform XPC that a partition has + * gone down. XPC responds by tearing down the XPartition Communication + * infrastructure used for the just downed partition. + * + * XPC's heartbeat code will never call this function and xpc_partition_up() + * at the same time. Nor will it ever make multiple calls to either function + * at the same time. + */ +void +xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason) +{ + unsigned long irq_flags; + int ch_number; + struct xpc_channel *ch; + + + dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n", + XPC_PARTID(part), reason); + + if (!xpc_part_ref(part)) { + /* infrastructure for this partition isn't currently set up */ + return; + } + + + /* disconnect all channels associated with the downed partition */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + + xpc_msgqueue_ref(ch); + spin_lock_irqsave(&ch->lock, irq_flags); + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + xpc_msgqueue_deref(ch); + } + + xpc_wakeup_channel_mgr(part); + + xpc_part_deref(part); +} + + +/* + * Teardown the infrastructure necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +void +xpc_teardown_infrastructure(struct xpc_partition *part) +{ + partid_t partid = XPC_PARTID(part); + + + /* + * We start off by making this partition inaccessible to local + * processes by marking it as no longer setup. Then we make it + * inaccessible to remote processes by clearing the XPC per partition + * specific variable's magic # (which indicates that these variables + * are no longer valid) and by ignoring all XPC notify IPIs sent to + * this partition. + */ + + DBUG_ON(atomic_read(&part->nchannels_active) != 0); + DBUG_ON(part->setup_state != XPC_P_SETUP); + part->setup_state = XPC_P_WTEARDOWN; + + xpc_vars_part[partid].magic = 0; + + + free_irq(SGI_XPC_NOTIFY, (void *) (u64) partid); + + + /* + * Before proceding with the teardown we have to wait until all + * existing references cease. + */ + wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); + + + /* now we can begin tearing down the infrastructure */ + + part->setup_state = XPC_P_TORNDOWN; + + /* in case we've still got outstanding timers registered... */ + del_timer_sync(&part->dropped_IPI_timer); + + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + kfree(part->local_openclose_args_base); + part->local_openclose_args = NULL; + kfree(part->remote_GPs_base); + part->remote_GPs = NULL; + kfree(part->local_GPs_base); + part->local_GPs = NULL; + kfree(part->channels); + part->channels = NULL; + part->local_IPI_amo_va = NULL; +} + + +/* + * Called by XP at the time of channel connection registration to cause + * XPC to establish connections to all currently active partitions. + */ +void +xpc_initiate_connect(int ch_number) +{ + partid_t partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + + DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + DBUG_ON(ch->flags & XPC_C_OPENREQUEST); + DBUG_ON(ch->flags & XPC_C_CONNECTED); + DBUG_ON(ch->flags & XPC_C_SETUP); + + /* + * Initiate the establishment of a connection + * on the newly registered channel to the + * remote partition. + */ + xpc_wakeup_channel_mgr(part); + } + + xpc_part_deref(part); + } + } +} + + +void +xpc_connected_callout(struct xpc_channel *ch) +{ + unsigned long irq_flags; + + + /* let the registerer know that a connection has been established */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=xpcConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + + ch->func(xpcConnected, ch->partid, ch->number, + (void *) (u64) ch->local_nentries, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + } + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_CONNECTCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); +} + + +/* + * Called by XP at the time of channel connection unregistration to cause + * XPC to teardown all current connections for the specified channel. + * + * Before returning xpc_initiate_disconnect() will wait until all connections + * on the specified channel have been closed/torndown. So the caller can be + * assured that they will not be receiving any more callouts from XPC to the + * function they registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_initiate_disconnect(int ch_number) +{ + unsigned long irq_flags; + partid_t partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + + DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS); + + /* initiate the channel disconnect for every active partition */ + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + xpc_msgqueue_ref(ch); + + spin_lock_irqsave(&ch->lock, irq_flags); + + XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering, + &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_msgqueue_deref(ch); + xpc_part_deref(part); + } + } + + xpc_disconnect_wait(ch_number); +} + + +/* + * To disconnect a channel, and reflect it back to all who may be waiting. + * + * >>> An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by + * >>> xpc_free_msgqueues(). + * + * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN. + */ +void +xpc_disconnect_channel(const int line, struct xpc_channel *ch, + enum xpc_retval reason, unsigned long *irq_flags) +{ + u32 flags; + + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { + return; + } + DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED))); + + dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n", + reason, line, ch->partid, ch->number); + + XPC_SET_REASON(ch, reason, line); + + flags = ch->flags; + /* some of these may not have been set */ + ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY | + XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_CONNECTING | XPC_C_CONNECTED); + + ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING); + xpc_IPI_send_closerequest(ch, irq_flags); + + if (flags & XPC_C_CONNECTED) { + ch->flags |= XPC_C_WASCONNECTED; + } + + if (atomic_read(&ch->kthreads_idle) > 0) { + /* wake all idle kthreads so they can exit */ + wake_up_all(&ch->idle_wq); + } + + spin_unlock_irqrestore(&ch->lock, *irq_flags); + + + /* wake those waiting to allocate an entry from the local msg queue */ + + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { + wake_up(&ch->msg_allocate_wq); + } + + /* wake those waiting for notify completion */ + + if (atomic_read(&ch->n_to_notify) > 0) { + xpc_notify_senders(ch, reason, ch->w_local_GP.put); + } + + spin_lock_irqsave(&ch->lock, *irq_flags); +} + + +void +xpc_disconnected_callout(struct xpc_channel *ch) +{ + /* + * Let the channel's registerer know that the channel is now + * disconnected. We don't want to do this if the registerer was never + * informed of a connection being made, unless the disconnect was for + * abnormal reasons. + */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " + "channel=%d\n", ch->reason, ch->partid, ch->number); + + ch->func(ch->reason, ch->partid, ch->number, NULL, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " + "channel=%d\n", ch->reason, ch->partid, ch->number); + } +} + + +/* + * Wait for a message entry to become available for the specified channel, + * but don't wait any longer than 1 jiffy. + */ +static enum xpc_retval +xpc_allocate_msg_wait(struct xpc_channel *ch) +{ + enum xpc_retval ret; + + + if (ch->flags & XPC_C_DISCONNECTING) { + DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true? + return ch->reason; + } + + atomic_inc(&ch->n_on_msg_allocate_wq); + ret = interruptible_sleep_on_timeout(&ch->msg_allocate_wq, 1); + atomic_dec(&ch->n_on_msg_allocate_wq); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + DBUG_ON(ch->reason == xpcInterrupted); // >>> Is this true? + } else if (ret == 0) { + ret = xpcTimeout; + } else { + ret = xpcInterrupted; + } + + return ret; +} + + +/* + * Allocate an entry for a message from the message queue associated with the + * specified channel. + */ +static enum xpc_retval +xpc_allocate_msg(struct xpc_channel *ch, u32 flags, + struct xpc_msg **address_of_msg) +{ + struct xpc_msg *msg; + enum xpc_retval ret; + s64 put; + + + /* this reference will be dropped in xpc_send_msg() */ + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + xpc_msgqueue_deref(ch); + return ch->reason; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return xpcNotConnected; + } + + + /* + * Get the next available message entry from the local message queue. + * If none are available, we'll make sure that we grab the latest + * GP values. + */ + ret = xpcTimeout; + + while (1) { + + put = (volatile s64) ch->w_local_GP.put; + if (put - (volatile s64) ch->w_remote_GP.get < + ch->local_nentries) { + + /* There are available message entries. We need to try + * to secure one for ourselves. We'll do this by trying + * to increment w_local_GP.put as long as someone else + * doesn't beat us to it. If they do, we'll have to + * try again. + */ + if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == + put) { + /* we got the entry referenced by put */ + break; + } + continue; /* try again */ + } + + + /* + * There aren't any available msg entries at this time. + * + * In waiting for a message entry to become available, + * we set a timeout in case the other side is not + * sending completion IPIs. This lets us fake an IPI + * that will cause the IPI handler to fetch the latest + * GP values as if an IPI was sent by the other side. + */ + if (ret == xpcTimeout) { + xpc_IPI_send_local_msgrequest(ch); + } + + if (flags & XPC_NOWAIT) { + xpc_msgqueue_deref(ch); + return xpcNoWait; + } + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpcInterrupted && ret != xpcTimeout) { + xpc_msgqueue_deref(ch); + return ret; + } + } + + + /* get the message's address and initialize it */ + msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + + (put % ch->local_nentries) * ch->msg_size); + + + DBUG_ON(msg->flags != 0); + msg->number = put; + + dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, " + "msg_number=%ld, partid=%d, channel=%d\n", put + 1, + (void *) msg, msg->number, ch->partid, ch->number); + + *address_of_msg = msg; + + return xpcSuccess; +} + + +/* + * Allocate an entry for a message from the message queue associated with the + * specified channel. NOTE that this routine can sleep waiting for a message + * entry to become available. To not sleep, pass in the XPC_NOWAIT flag. + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel #. + * flags - see xpc.h for valid flags. + * payload - address of the allocated payload area pointer (filled in on + * return) in which the user-defined message is constructed. + */ +enum xpc_retval +xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + enum xpc_retval ret = xpcUnknownReason; + struct xpc_msg *msg; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + + *payload = NULL; + + if (xpc_part_ref(part)) { + ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg); + xpc_part_deref(part); + + if (msg != NULL) { + *payload = &msg->payload; + } + } + + return ret; +} + + +/* + * Now we actually send the messages that are ready to be sent by advancing + * the local message queue's Put value and then send an IPI to the recipient + * partition. + */ +static void +xpc_send_msgs(struct xpc_channel *ch, s64 initial_put) +{ + struct xpc_msg *msg; + s64 put = initial_put + 1; + int send_IPI = 0; + + + while (1) { + + while (1) { + if (put == (volatile s64) ch->w_local_GP.put) { + break; + } + + msg = (struct xpc_msg *) ((u64) ch->local_msgqueue + + (put % ch->local_nentries) * ch->msg_size); + + if (!(msg->flags & XPC_M_READY)) { + break; + } + + put++; + } + + if (put == initial_put) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) != + initial_put) { + /* someone else beat us to it */ + DBUG_ON((volatile s64) ch->local_GP->put < initial_put); + break; + } + + /* we just set the new value of local_GP->put */ + + dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, " + "channel=%d\n", put, ch->partid, ch->number); + + send_IPI = 1; + + /* + * We need to ensure that the message referenced by + * local_GP->put is not XPC_M_READY or that local_GP->put + * equals w_local_GP.put, so we'll go have a look. + */ + initial_put = put; + } + + if (send_IPI) { + xpc_IPI_send_msgrequest(ch); + } +} + + +/* + * Common code that does the actual sending of the message by advancing the + * local message queue's Put value and sends an IPI to the partition the + * message is being sent to. + */ +static enum xpc_retval +xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type, + xpc_notify_func func, void *key) +{ + enum xpc_retval ret = xpcSuccess; + struct xpc_notify *notify = NULL; // >>> to keep the compiler happy!! + s64 put, msg_number = msg->number; + + + DBUG_ON(notify_type == XPC_N_CALL && func == NULL); + DBUG_ON((((u64) msg - (u64) ch->local_msgqueue) / ch->msg_size) != + msg_number % ch->local_nentries); + DBUG_ON(msg->flags & XPC_M_READY); + + if (ch->flags & XPC_C_DISCONNECTING) { + /* drop the reference grabbed in xpc_allocate_msg() */ + xpc_msgqueue_deref(ch); + return ch->reason; + } + + if (notify_type != 0) { + /* + * Tell the remote side to send an ACK interrupt when the + * message has been delivered. + */ + msg->flags |= XPC_M_INTERRUPT; + + atomic_inc(&ch->n_to_notify); + + notify = &ch->notify_queue[msg_number % ch->local_nentries]; + notify->func = func; + notify->key = key; + (volatile u8) notify->type = notify_type; + + // >>> is a mb() needed here? + + if (ch->flags & XPC_C_DISCONNECTING) { + /* + * An error occurred between our last error check and + * this one. We will try to clear the type field from + * the notify entry. If we succeed then + * xpc_disconnect_channel() didn't already process + * the notify entry. + */ + if (cmpxchg(¬ify->type, notify_type, 0) == + notify_type) { + atomic_dec(&ch->n_to_notify); + ret = ch->reason; + } + + /* drop the reference grabbed in xpc_allocate_msg() */ + xpc_msgqueue_deref(ch); + return ret; + } + } + + msg->flags |= XPC_M_READY; + + /* + * The preceding store of msg->flags must occur before the following + * load of ch->local_GP->put. + */ + mb(); + + /* see if the message is next in line to be sent, if so send it */ + + put = ch->local_GP->put; + if (put == msg_number) { + xpc_send_msgs(ch, put); + } + + /* drop the reference grabbed in xpc_allocate_msg() */ + xpc_msgqueue_deref(ch); + return ret; +} + + +/* + * Send a message previously allocated using xpc_initiate_allocate() on the + * specified channel connected to the specified partition. + * + * This routine will not wait for the message to be received, nor will + * notification be given when it does happen. Once this routine has returned + * the message entry allocated via xpc_initiate_allocate() is no longer + * accessable to the caller. + * + * This routine, although called by users, does not call xpc_part_ref() to + * ensure that the partition infrastructure is in place. It relies on the + * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * payload - pointer to the payload area allocated via + * xpc_initiate_allocate(). + */ +enum xpc_retval +xpc_initiate_send(partid_t partid, int ch_number, void *payload) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); + enum xpc_retval ret; + + + dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg, + partid, ch_number); + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(msg == NULL); + + ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL); + + return ret; +} + + +/* + * Send a message previously allocated using xpc_initiate_allocate on the + * specified channel connected to the specified partition. + * + * This routine will not wait for the message to be sent. Once this routine + * has returned the message entry allocated via xpc_initiate_allocate() is no + * longer accessable to the caller. + * + * Once the remote end of the channel has received the message, the function + * passed as an argument to xpc_initiate_send_notify() will be called. This + * allows the sender to free up or re-use any buffers referenced by the + * message, but does NOT mean the message has been processed at the remote + * end by a receiver. + * + * If this routine returns an error, the caller's function will NOT be called. + * + * This routine, although called by users, does not call xpc_part_ref() to + * ensure that the partition infrastructure is in place. It relies on the + * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg(). + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * payload - pointer to the payload area allocated via + * xpc_initiate_allocate(). + * func - function to call with asynchronous notification of message + * receipt. THIS FUNCTION MUST BE NON-BLOCKING. + * key - user-defined key to be passed to the function when it's called. + */ +enum xpc_retval +xpc_initiate_send_notify(partid_t partid, int ch_number, void *payload, + xpc_notify_func func, void *key) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); + enum xpc_retval ret; + + + dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *) msg, + partid, ch_number); + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(msg == NULL); + DBUG_ON(func == NULL); + + ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL, + func, key); + return ret; +} + + +static struct xpc_msg * +xpc_pull_remote_msg(struct xpc_channel *ch, s64 get) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_msg *remote_msg, *msg; + u32 msg_index, nmsgs; + u64 msg_offset; + enum xpc_retval ret; + + + if (down_interruptible(&ch->msg_to_pull_sema) != 0) { + /* we were interrupted by a signal */ + return NULL; + } + + while (get >= ch->next_msg_to_pull) { + + /* pull as many messages as are ready and able to be pulled */ + + msg_index = ch->next_msg_to_pull % ch->remote_nentries; + + DBUG_ON(ch->next_msg_to_pull >= + (volatile s64) ch->w_remote_GP.put); + nmsgs = (volatile s64) ch->w_remote_GP.put - + ch->next_msg_to_pull; + if (msg_index + nmsgs > ch->remote_nentries) { + /* ignore the ones that wrap the msg queue for now */ + nmsgs = ch->remote_nentries - msg_index; + } + + msg_offset = msg_index * ch->msg_size; + msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + + msg_offset); + remote_msg = (struct xpc_msg *) (ch->remote_msgqueue_pa + + msg_offset); + + if ((ret = xpc_pull_remote_cachelines(part, msg, remote_msg, + nmsgs * ch->msg_size)) != xpcSuccess) { + + dev_dbg(xpc_chan, "failed to pull %d msgs starting with" + " msg %ld from partition %d, channel=%d, " + "ret=%d\n", nmsgs, ch->next_msg_to_pull, + ch->partid, ch->number, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + + up(&ch->msg_to_pull_sema); + return NULL; + } + + mb(); /* >>> this may not be needed, we're not sure */ + + ch->next_msg_to_pull += nmsgs; + } + + up(&ch->msg_to_pull_sema); + + /* return the message we were looking for */ + msg_offset = (get % ch->remote_nentries) * ch->msg_size; + msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + msg_offset); + + return msg; +} + + +/* + * Get a message to be delivered. + */ +static struct xpc_msg * +xpc_get_deliverable_msg(struct xpc_channel *ch) +{ + struct xpc_msg *msg = NULL; + s64 get; + + + do { + if ((volatile u32) ch->flags & XPC_C_DISCONNECTING) { + break; + } + + get = (volatile s64) ch->w_local_GP.get; + if (get == (volatile s64) ch->w_remote_GP.put) { + break; + } + + /* There are messages waiting to be pulled and delivered. + * We need to try to secure one for ourselves. We'll do this + * by trying to increment w_local_GP.get and hope that no one + * else beats us to it. If they do, we'll we'll simply have + * to try again for the next one. + */ + + if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) { + /* we got the entry referenced by get */ + + dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, " + "partid=%d, channel=%d\n", get + 1, + ch->partid, ch->number); + + /* pull the message from the remote partition */ + + msg = xpc_pull_remote_msg(ch, get); + + DBUG_ON(msg != NULL && msg->number != get); + DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE)); + DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY)); + + break; + } + + } while (1); + + return msg; +} + + +/* + * Deliver a message to its intended recipient. + */ +void +xpc_deliver_msg(struct xpc_channel *ch) +{ + struct xpc_msg *msg; + + + if ((msg = xpc_get_deliverable_msg(ch)) != NULL) { + + /* + * This ref is taken to protect the payload itself from being + * freed before the user is finished with it, which the user + * indicates by calling xpc_initiate_received(). + */ + xpc_msgqueue_ref(ch); + + atomic_inc(&ch->kthreads_active); + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, " + "msg_number=%ld, partid=%d, channel=%d\n", + (void *) msg, msg->number, ch->partid, + ch->number); + + /* deliver the message to its intended recipient */ + ch->func(xpcMsgReceived, ch->partid, ch->number, + &msg->payload, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, " + "msg_number=%ld, partid=%d, channel=%d\n", + (void *) msg, msg->number, ch->partid, + ch->number); + } + + atomic_dec(&ch->kthreads_active); + } +} + + +/* + * Now we actually acknowledge the messages that have been delivered and ack'd + * by advancing the cached remote message queue's Get value and if requested + * send an IPI to the message sender's partition. + */ +static void +xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) +{ + struct xpc_msg *msg; + s64 get = initial_get + 1; + int send_IPI = 0; + + + while (1) { + + while (1) { + if (get == (volatile s64) ch->w_local_GP.get) { + break; + } + + msg = (struct xpc_msg *) ((u64) ch->remote_msgqueue + + (get % ch->remote_nentries) * ch->msg_size); + + if (!(msg->flags & XPC_M_DONE)) { + break; + } + + msg_flags |= msg->flags; + get++; + } + + if (get == initial_get) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) != + initial_get) { + /* someone else beat us to it */ + DBUG_ON((volatile s64) ch->local_GP->get <= + initial_get); + break; + } + + /* we just set the new value of local_GP->get */ + + dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, " + "channel=%d\n", get, ch->partid, ch->number); + + send_IPI = (msg_flags & XPC_M_INTERRUPT); + + /* + * We need to ensure that the message referenced by + * local_GP->get is not XPC_M_DONE or that local_GP->get + * equals w_local_GP.get, so we'll go have a look. + */ + initial_get = get; + } + + if (send_IPI) { + xpc_IPI_send_msgrequest(ch); + } +} + + +/* + * Acknowledge receipt of a delivered message. + * + * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition + * that sent the message. + * + * This function, although called by users, does not call xpc_part_ref() to + * ensure that the partition infrastructure is in place. It relies on the + * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg(). + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # message received on. + * payload - pointer to the payload area allocated via + * xpc_initiate_allocate(). + */ +void +xpc_initiate_received(partid_t partid, int ch_number, void *payload) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + struct xpc_msg *msg = XPC_MSG_ADDRESS(payload); + s64 get, msg_number = msg->number; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + + ch = &part->channels[ch_number]; + + dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n", + (void *) msg, msg_number, ch->partid, ch->number); + + DBUG_ON((((u64) msg - (u64) ch->remote_msgqueue) / ch->msg_size) != + msg_number % ch->remote_nentries); + DBUG_ON(msg->flags & XPC_M_DONE); + + msg->flags |= XPC_M_DONE; + + /* + * The preceding store of msg->flags must occur before the following + * load of ch->local_GP->get. + */ + mb(); + + /* + * See if this message is next in line to be acknowledged as having + * been delivered. + */ + get = ch->local_GP->get; + if (get == msg_number) { + xpc_acknowledge_msgs(ch, get, msg->flags); + } + + /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */ + xpc_msgqueue_deref(ch); +} + diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c new file mode 100644 index 000000000000..177ddb748ebe --- /dev/null +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -0,0 +1,1064 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition Communication (XPC) support - standard version. + * + * XPC provides a message passing capability that crosses partition + * boundaries. This module is made up of two parts: + * + * partition This part detects the presence/absence of other + * partitions. It provides a heartbeat and monitors + * the heartbeats of other partitions. + * + * channel This part manages the channels and sends/receives + * messages across them to/from other partitions. + * + * There are a couple of additional functions residing in XP, which + * provide an interface to XPC for its users. + * + * + * Caveats: + * + * . We currently have no way to determine which nasid an IPI came + * from. Thus, xpc_IPI_send() does a remote AMO write followed by + * an IPI. The AMO indicates where data is to be pulled from, so + * after the IPI arrives, the remote partition checks the AMO word. + * The IPI can actually arrive before the AMO however, so other code + * must periodically check for this case. Also, remote AMO operations + * do not reliably time out. Thus we do a remote PIO read solely to + * know whether the remote partition is down and whether we should + * stop sending IPIs to it. This remote PIO read operation is set up + * in a special nofault region so SAL knows to ignore (and cleanup) + * any errors due to the remote AMO write, PIO read, and/or PIO + * write operations. + * + * If/when new hardware solves this IPI problem, we should abandon + * the current approach. + * + */ + + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/cache.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <asm/sn/intr.h> +#include <asm/sn/sn_sal.h> +#include <asm/uaccess.h> +#include "xpc.h" + + +/* define two XPC debug device structures to be used with dev_dbg() et al */ + +struct device_driver xpc_dbg_name = { + .name = "xpc" +}; + +struct device xpc_part_dbg_subname = { + .bus_id = {0}, /* set to "part" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device xpc_chan_dbg_subname = { + .bus_id = {0}, /* set to "chan" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device *xpc_part = &xpc_part_dbg_subname; +struct device *xpc_chan = &xpc_chan_dbg_subname; + + +/* systune related variables for /proc/sys directories */ + +static int xpc_hb_min = 1; +static int xpc_hb_max = 10; + +static int xpc_hb_check_min = 10; +static int xpc_hb_check_max = 120; + +static ctl_table xpc_sys_xpc_hb_dir[] = { + { + 1, + "hb_interval", + &xpc_hb_interval, + sizeof(int), + 0644, + NULL, + &proc_dointvec_minmax, + &sysctl_intvec, + NULL, + &xpc_hb_min, &xpc_hb_max + }, + { + 2, + "hb_check_interval", + &xpc_hb_check_interval, + sizeof(int), + 0644, + NULL, + &proc_dointvec_minmax, + &sysctl_intvec, + NULL, + &xpc_hb_check_min, &xpc_hb_check_max + }, + {0} +}; +static ctl_table xpc_sys_xpc_dir[] = { + { + 1, + "hb", + NULL, + 0, + 0555, + xpc_sys_xpc_hb_dir + }, + {0} +}; +static ctl_table xpc_sys_dir[] = { + { + 1, + "xpc", + NULL, + 0, + 0555, + xpc_sys_xpc_dir + }, + {0} +}; +static struct ctl_table_header *xpc_sysctl; + + +/* #of IRQs received */ +static atomic_t xpc_act_IRQ_rcvd; + +/* IRQ handler notifies this wait queue on receipt of an IRQ */ +static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq); + +static unsigned long xpc_hb_check_timeout; + +/* xpc_hb_checker thread exited notification */ +static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited); + +/* xpc_discovery thread exited notification */ +static DECLARE_MUTEX_LOCKED(xpc_discovery_exited); + + +static struct timer_list xpc_hb_timer; + + +static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); + + +/* + * Notify the heartbeat check thread that an IRQ has been received. + */ +static irqreturn_t +xpc_act_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs) +{ + atomic_inc(&xpc_act_IRQ_rcvd); + wake_up_interruptible(&xpc_act_IRQ_wq); + return IRQ_HANDLED; +} + + +/* + * Timer to produce the heartbeat. The timer structures function is + * already set when this is initially called. A tunable is used to + * specify when the next timeout should occur. + */ +static void +xpc_hb_beater(unsigned long dummy) +{ + xpc_vars->heartbeat++; + + if (jiffies >= xpc_hb_check_timeout) { + wake_up_interruptible(&xpc_act_IRQ_wq); + } + + xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); + add_timer(&xpc_hb_timer); +} + + +/* + * This thread is responsible for nearly all of the partition + * activation/deactivation. + */ +static int +xpc_hb_checker(void *ignore) +{ + int last_IRQ_count = 0; + int new_IRQ_count; + int force_IRQ=0; + + + /* this thread was marked active by xpc_hb_init() */ + + daemonize(XPC_HB_CHECK_THREAD_NAME); + + set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU)); + + xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); + + while (!(volatile int) xpc_exiting) { + + /* wait for IRQ or timeout */ + (void) wait_event_interruptible(xpc_act_IRQ_wq, + (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) || + jiffies >= xpc_hb_check_timeout || + (volatile int) xpc_exiting)); + + dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " + "been received\n", + (int) (xpc_hb_check_timeout - jiffies), + atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count); + + + /* checking of remote heartbeats is skewed by IRQ handling */ + if (jiffies >= xpc_hb_check_timeout) { + dev_dbg(xpc_part, "checking remote heartbeats\n"); + xpc_check_remote_hb(); + + /* + * We need to periodically recheck to ensure no + * IPI/AMO pairs have been missed. That check + * must always reset xpc_hb_check_timeout. + */ + force_IRQ = 1; + } + + + new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); + if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { + force_IRQ = 0; + + dev_dbg(xpc_part, "found an IRQ to process; will be " + "resetting xpc_hb_check_timeout\n"); + + last_IRQ_count += xpc_identify_act_IRQ_sender(); + if (last_IRQ_count < new_IRQ_count) { + /* retry once to help avoid missing AMO */ + (void) xpc_identify_act_IRQ_sender(); + } + last_IRQ_count = new_IRQ_count; + + xpc_hb_check_timeout = jiffies + + (xpc_hb_check_interval * HZ); + } + } + + dev_dbg(xpc_part, "heartbeat checker is exiting\n"); + + + /* mark this thread as inactive */ + up(&xpc_hb_checker_exited); + return 0; +} + + +/* + * This thread will attempt to discover other partitions to activate + * based on info provided by SAL. This new thread is short lived and + * will exit once discovery is complete. + */ +static int +xpc_initiate_discovery(void *ignore) +{ + daemonize(XPC_DISCOVERY_THREAD_NAME); + + xpc_discovery(); + + dev_dbg(xpc_part, "discovery thread is exiting\n"); + + /* mark this thread as inactive */ + up(&xpc_discovery_exited); + return 0; +} + + +/* + * Establish first contact with the remote partititon. This involves pulling + * the XPC per partition variables from the remote partition and waiting for + * the remote partition to pull ours. + */ +static enum xpc_retval +xpc_make_first_contact(struct xpc_partition *part) +{ + enum xpc_retval ret; + + + while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) { + if (ret != xpcRetry) { + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + dev_dbg(xpc_chan, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + set_current_state(TASK_INTERRUPTIBLE); + (void) schedule_timeout(0.25 * HZ); + + if (part->act_state == XPC_P_DEACTIVATING) { + return part->reason; + } + } + + return xpc_mark_partition_active(part); +} + + +/* + * The first kthread assigned to a newly activated partition is the one + * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to + * that kthread until the partition is brought down, at which time that kthread + * returns back to XPC HB. (The return of that kthread will signify to XPC HB + * that XPC has dismantled all communication infrastructure for the associated + * partition.) This kthread becomes the channel manager for that partition. + * + * Each active partition has a channel manager, who, besides connecting and + * disconnecting channels, will ensure that each of the partition's connected + * channels has the required number of assigned kthreads to get the work done. + */ +static void +xpc_channel_mgr(struct xpc_partition *part) +{ + while (part->act_state != XPC_P_DEACTIVATING || + atomic_read(&part->nchannels_active) > 0) { + + xpc_process_channel_activity(part); + + + /* + * Wait until we've been requested to activate kthreads or + * all of the channel's message queues have been torn down or + * a signal is pending. + * + * The channel_mgr_requests is set to 1 after being awakened, + * This is done to prevent the channel mgr from making one pass + * through the loop for each request, since he will + * be servicing all the requests in one pass. The reason it's + * set to 1 instead of 0 is so that other kthreads will know + * that the channel mgr is running and won't bother trying to + * wake him up. + */ + atomic_dec(&part->channel_mgr_requests); + (void) wait_event_interruptible(part->channel_mgr_wq, + (atomic_read(&part->channel_mgr_requests) > 0 || + (volatile u64) part->local_IPI_amo != 0 || + ((volatile u8) part->act_state == + XPC_P_DEACTIVATING && + atomic_read(&part->nchannels_active) == 0))); + atomic_set(&part->channel_mgr_requests, 1); + + // >>> Does it need to wakeup periodically as well? In case we + // >>> miscalculated the #of kthreads to wakeup or create? + } +} + + +/* + * When XPC HB determines that a partition has come up, it will create a new + * kthread and that kthread will call this function to attempt to set up the + * basic infrastructure used for Cross Partition Communication with the newly + * upped partition. + * + * The kthread that was created by XPC HB and which setup the XPC + * infrastructure will remain assigned to the partition until the partition + * goes down. At which time the kthread will teardown the XPC infrastructure + * and then exit. + * + * XPC HB will put the remote partition's XPC per partition specific variables + * physical address into xpc_partitions[partid].remote_vars_part_pa prior to + * calling xpc_partition_up(). + */ +static void +xpc_partition_up(struct xpc_partition *part) +{ + DBUG_ON(part->channels != NULL); + + dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part)); + + if (xpc_setup_infrastructure(part) != xpcSuccess) { + return; + } + + /* + * The kthread that XPC HB called us with will become the + * channel manager for this partition. It will not return + * back to XPC HB until the partition's XPC infrastructure + * has been dismantled. + */ + + (void) xpc_part_ref(part); /* this will always succeed */ + + if (xpc_make_first_contact(part) == xpcSuccess) { + xpc_channel_mgr(part); + } + + xpc_part_deref(part); + + xpc_teardown_infrastructure(part); +} + + +static int +xpc_activating(void *__partid) +{ + partid_t partid = (u64) __partid; + struct xpc_partition *part = &xpc_partitions[partid]; + unsigned long irq_flags; + struct sched_param param = { sched_priority: MAX_USER_RT_PRIO - 1 }; + int ret; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_DEACTIVATING) { + part->act_state = XPC_P_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; + return 0; + } + + /* indicate the thread is activating */ + DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ); + part->act_state = XPC_P_ACTIVATING; + + XPC_SET_REASON(part, 0, 0); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + dev_dbg(xpc_part, "bringing partition %d up\n", partid); + + daemonize("xpc%02d", partid); + + /* + * This thread needs to run at a realtime priority to prevent a + * significant performance degradation. + */ + ret = sched_setscheduler(current, SCHED_FIFO, ¶m); + if (ret != 0) { + dev_warn(xpc_part, "unable to set pid %d to a realtime " + "priority, ret=%d\n", current->pid, ret); + } + + /* allow this thread and its children to run on any CPU */ + set_cpus_allowed(current, CPU_MASK_ALL); + + /* + * Register the remote partition's AMOs with SAL so it can handle + * and cleanup errors within that address range should the remote + * partition go down. We don't unregister this range because it is + * difficult to tell when outstanding writes to the remote partition + * are finished and thus when it is safe to unregister. This should + * not result in wasted space in the SAL xp_addr_region table because + * we should get the same page for remote_amos_page_pa after module + * reloads and system reboots. + */ + if (sn_register_xp_addr_region(part->remote_amos_page_pa, + PAGE_SIZE, 1) < 0) { + dev_warn(xpc_part, "xpc_partition_up(%d) failed to register " + "xp_addr region\n", partid); + + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_INACTIVE; + XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; + return 0; + } + + XPC_ALLOW_HB(partid, xpc_vars); + xpc_IPI_send_activated(part); + + + /* + * xpc_partition_up() holds this thread and marks this partition as + * XPC_P_ACTIVE by calling xpc_hb_mark_active(). + */ + (void) xpc_partition_up(part); + + xpc_mark_partition_inactive(part); + + if (part->reason == xpcReactivating) { + /* interrupting ourselves results in activating partition */ + xpc_IPI_send_reactivate(part); + } + + return 0; +} + + +void +xpc_activate_partition(struct xpc_partition *part) +{ + partid_t partid = XPC_PARTID(part); + unsigned long irq_flags; + pid_t pid; + + + spin_lock_irqsave(&part->act_lock, irq_flags); + + pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0); + + DBUG_ON(part->act_state != XPC_P_INACTIVE); + + if (pid > 0) { + part->act_state = XPC_P_ACTIVATION_REQ; + XPC_SET_REASON(part, xpcCloneKThread, __LINE__); + } else { + XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__); + } + + spin_unlock_irqrestore(&part->act_lock, irq_flags); +} + + +/* + * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified + * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more + * than one partition, we use an AMO_t structure per partition to indicate + * whether a partition has sent an IPI or not. >>> If it has, then wake up the + * associated kthread to handle it. + * + * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC + * running on other partitions. + * + * Noteworthy Arguments: + * + * irq - Interrupt ReQuest number. NOT USED. + * + * dev_id - partid of IPI's potential sender. + * + * regs - processor's context before the processor entered + * interrupt code. NOT USED. + */ +irqreturn_t +xpc_notify_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs) +{ + partid_t partid = (partid_t) (u64) dev_id; + struct xpc_partition *part = &xpc_partitions[partid]; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + + if (xpc_part_ref(part)) { + xpc_check_for_channel_activity(part); + + xpc_part_deref(part); + } + return IRQ_HANDLED; +} + + +/* + * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor + * because the write to their associated IPI amo completed after the IRQ/IPI + * was received. + */ +void +xpc_dropped_IPI_check(struct xpc_partition *part) +{ + if (xpc_part_ref(part)) { + xpc_check_for_channel_activity(part); + + part->dropped_IPI_timer.expires = jiffies + + XPC_P_DROPPED_IPI_WAIT; + add_timer(&part->dropped_IPI_timer); + xpc_part_deref(part); + } +} + + +void +xpc_activate_kthreads(struct xpc_channel *ch, int needed) +{ + int idle = atomic_read(&ch->kthreads_idle); + int assigned = atomic_read(&ch->kthreads_assigned); + int wakeup; + + + DBUG_ON(needed <= 0); + + if (idle > 0) { + wakeup = (needed > idle) ? idle : needed; + needed -= wakeup; + + dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, " + "channel=%d\n", wakeup, ch->partid, ch->number); + + /* only wakeup the requested number of kthreads */ + wake_up_nr(&ch->idle_wq, wakeup); + } + + if (needed <= 0) { + return; + } + + if (needed + assigned > ch->kthreads_assigned_limit) { + needed = ch->kthreads_assigned_limit - assigned; + // >>>should never be less than 0 + if (needed <= 0) { + return; + } + } + + dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", + needed, ch->partid, ch->number); + + xpc_create_kthreads(ch, needed); +} + + +/* + * This function is where XPC's kthreads wait for messages to deliver. + */ +static void +xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) +{ + do { + /* deliver messages to their intended recipients */ + + while ((volatile s64) ch->w_local_GP.get < + (volatile s64) ch->w_remote_GP.put && + !((volatile u32) ch->flags & + XPC_C_DISCONNECTING)) { + xpc_deliver_msg(ch); + } + + if (atomic_inc_return(&ch->kthreads_idle) > + ch->kthreads_idle_limit) { + /* too many idle kthreads on this channel */ + atomic_dec(&ch->kthreads_idle); + break; + } + + dev_dbg(xpc_chan, "idle kthread calling " + "wait_event_interruptible_exclusive()\n"); + + (void) wait_event_interruptible_exclusive(ch->idle_wq, + ((volatile s64) ch->w_local_GP.get < + (volatile s64) ch->w_remote_GP.put || + ((volatile u32) ch->flags & + XPC_C_DISCONNECTING))); + + atomic_dec(&ch->kthreads_idle); + + } while (!((volatile u32) ch->flags & XPC_C_DISCONNECTING)); +} + + +static int +xpc_daemonize_kthread(void *args) +{ + partid_t partid = XPC_UNPACK_ARG1(args); + u16 ch_number = XPC_UNPACK_ARG2(args); + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + int n_needed; + + + daemonize("xpc%02dc%d", partid, ch_number); + + dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n", + partid, ch_number); + + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + DBUG_ON(!(ch->flags & XPC_C_CONNECTED)); + + /* let registerer know that connection has been established */ + + if (atomic_read(&ch->kthreads_assigned) == 1) { + xpc_connected_callout(ch); + + /* + * It is possible that while the callout was being + * made that the remote partition sent some messages. + * If that is the case, we may need to activate + * additional kthreads to help deliver them. We only + * need one less than total #of messages to deliver. + */ + n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1; + if (n_needed > 0 && + !(ch->flags & XPC_C_DISCONNECTING)) { + xpc_activate_kthreads(ch, n_needed); + } + } + + xpc_kthread_waitmsgs(part, ch); + } + + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + ((ch->flags & XPC_C_CONNECTCALLOUT) || + (ch->reason != xpcUnregistering && + ch->reason != xpcOtherUnregistering))) { + xpc_disconnected_callout(ch); + } + + + xpc_msgqueue_deref(ch); + + dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n", + partid, ch_number); + + xpc_part_deref(part); + return 0; +} + + +/* + * For each partition that XPC has established communications with, there is + * a minimum of one kernel thread assigned to perform any operation that + * may potentially sleep or block (basically the callouts to the asynchronous + * functions registered via xpc_connect()). + * + * Additional kthreads are created and destroyed by XPC as the workload + * demands. + * + * A kthread is assigned to one of the active channels that exists for a given + * partition. + */ +void +xpc_create_kthreads(struct xpc_channel *ch, int needed) +{ + unsigned long irq_flags; + pid_t pid; + u64 args = XPC_PACK_ARGS(ch->partid, ch->number); + + + while (needed-- > 0) { + pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); + if (pid < 0) { + /* the fork failed */ + + if (atomic_read(&ch->kthreads_assigned) < + ch->kthreads_idle_limit) { + /* + * Flag this as an error only if we have an + * insufficient #of kthreads for the channel + * to function. + * + * No xpc_msgqueue_ref() is needed here since + * the channel mgr is doing this. + */ + spin_lock_irqsave(&ch->lock, irq_flags); + XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + break; + } + + /* + * The following is done on behalf of the newly created + * kthread. That kthread is responsible for doing the + * counterpart to the following before it exits. + */ + (void) xpc_part_ref(&xpc_partitions[ch->partid]); + xpc_msgqueue_ref(ch); + atomic_inc(&ch->kthreads_assigned); + ch->kthreads_created++; // >>> temporary debug only!!! + } +} + + +void +xpc_disconnect_wait(int ch_number) +{ + partid_t partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + + /* now wait for all callouts to the caller's function to cease */ + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + +// >>> how do we keep from falling into the window between our check and going +// >>> down and coming back up where sema is re-inited? + if (ch->flags & XPC_C_SETUP) { + (void) down(&ch->teardown_sema); + } + + xpc_part_deref(part); + } + } +} + + +static void +xpc_do_exit(void) +{ + partid_t partid; + int active_part_count; + struct xpc_partition *part; + + + /* now it's time to eliminate our heartbeat */ + del_timer_sync(&xpc_hb_timer); + xpc_vars->heartbeating_to_mask = 0; + + /* indicate to others that our reserved page is uninitialized */ + xpc_rsvd_page->vars_pa = 0; + + /* + * Ignore all incoming interrupts. Without interupts the heartbeat + * checker won't activate any new partitions that may come up. + */ + free_irq(SGI_XPC_ACTIVATE, NULL); + + /* + * Cause the heartbeat checker and the discovery threads to exit. + * We don't want them attempting to activate new partitions as we + * try to deactivate the existing ones. + */ + xpc_exiting = 1; + wake_up_interruptible(&xpc_act_IRQ_wq); + + /* wait for the heartbeat checker thread to mark itself inactive */ + down(&xpc_hb_checker_exited); + + /* wait for the discovery thread to mark itself inactive */ + down(&xpc_discovery_exited); + + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(0.3 * HZ); + set_current_state(TASK_RUNNING); + + + /* wait for all partitions to become inactive */ + + do { + active_part_count = 0; + + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + if (part->act_state != XPC_P_INACTIVE) { + active_part_count++; + + XPC_DEACTIVATE_PARTITION(part, xpcUnloading); + } + } + + if (active_part_count) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(0.3 * HZ); + set_current_state(TASK_RUNNING); + } + + } while (active_part_count > 0); + + + /* close down protections for IPI operations */ + xpc_restrict_IPI_ops(); + + + /* clear the interface to XPC's functions */ + xpc_clear_interface(); + + if (xpc_sysctl) { + unregister_sysctl_table(xpc_sysctl); + } +} + + +int __init +xpc_init(void) +{ + int ret; + partid_t partid; + struct xpc_partition *part; + pid_t pid; + + + /* + * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng + * both a partition's reserved page and its XPC variables. Its size was + * based on the size of a reserved page. So we need to ensure that the + * XPC variables will fit as well. + */ + if (XPC_VARS_ALIGNED_SIZE > XPC_RSVD_PAGE_ALIGNED_SIZE) { + dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n"); + return -EPERM; + } + DBUG_ON((u64) xpc_remote_copy_buffer != + L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer)); + + snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part"); + snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan"); + + xpc_sysctl = register_sysctl_table(xpc_sys_dir, 1); + + /* + * The first few fields of each entry of xpc_partitions[] need to + * be initialized now so that calls to xpc_connect() and + * xpc_disconnect() can be made prior to the activation of any remote + * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE + * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING + * PARTITION HAS BEEN ACTIVATED. + */ + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + part = &xpc_partitions[partid]; + + DBUG_ON((u64) part != L1_CACHE_ALIGN((u64) part)); + + part->act_IRQ_rcvd = 0; + spin_lock_init(&part->act_lock); + part->act_state = XPC_P_INACTIVE; + XPC_SET_REASON(part, 0, 0); + part->setup_state = XPC_P_UNSET; + init_waitqueue_head(&part->teardown_wq); + atomic_set(&part->references, 0); + } + + /* + * Open up protections for IPI operations (and AMO operations on + * Shub 1.1 systems). + */ + xpc_allow_IPI_ops(); + + /* + * Interrupts being processed will increment this atomic variable and + * awaken the heartbeat thread which will process the interrupts. + */ + atomic_set(&xpc_act_IRQ_rcvd, 0); + + /* + * This is safe to do before the xpc_hb_checker thread has started + * because the handler releases a wait queue. If an interrupt is + * received before the thread is waiting, it will not go to sleep, + * but rather immediately process the interrupt. + */ + ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0, + "xpc hb", NULL); + if (ret != 0) { + dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " + "errno=%d\n", -ret); + + xpc_restrict_IPI_ops(); + + if (xpc_sysctl) { + unregister_sysctl_table(xpc_sysctl); + } + return -EBUSY; + } + + /* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ + xpc_rsvd_page = xpc_rsvd_page_init(); + if (xpc_rsvd_page == NULL) { + dev_err(xpc_part, "could not setup our reserved page\n"); + + free_irq(SGI_XPC_ACTIVATE, NULL); + xpc_restrict_IPI_ops(); + + if (xpc_sysctl) { + unregister_sysctl_table(xpc_sysctl); + } + return -EBUSY; + } + + + /* + * Set the beating to other partitions into motion. This is + * the last requirement for other partitions' discovery to + * initiate communications with us. + */ + init_timer(&xpc_hb_timer); + xpc_hb_timer.function = xpc_hb_beater; + xpc_hb_beater(0); + + + /* + * The real work-horse behind xpc. This processes incoming + * interrupts and monitors remote heartbeats. + */ + pid = kernel_thread(xpc_hb_checker, NULL, 0); + if (pid < 0) { + dev_err(xpc_part, "failed while forking hb check thread\n"); + + /* indicate to others that our reserved page is uninitialized */ + xpc_rsvd_page->vars_pa = 0; + + del_timer_sync(&xpc_hb_timer); + free_irq(SGI_XPC_ACTIVATE, NULL); + xpc_restrict_IPI_ops(); + + if (xpc_sysctl) { + unregister_sysctl_table(xpc_sysctl); + } + return -EBUSY; + } + + + /* + * Startup a thread that will attempt to discover other partitions to + * activate based on info provided by SAL. This new thread is short + * lived and will exit once discovery is complete. + */ + pid = kernel_thread(xpc_initiate_discovery, NULL, 0); + if (pid < 0) { + dev_err(xpc_part, "failed while forking discovery thread\n"); + + /* mark this new thread as a non-starter */ + up(&xpc_discovery_exited); + + xpc_do_exit(); + return -EBUSY; + } + + + /* set the interface to point at XPC's functions */ + xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, + xpc_initiate_allocate, xpc_initiate_send, + xpc_initiate_send_notify, xpc_initiate_received, + xpc_initiate_partid_to_nasids); + + return 0; +} +module_init(xpc_init); + + +void __exit +xpc_exit(void) +{ + xpc_do_exit(); +} +module_exit(xpc_exit); + + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Communication (XPC) support"); +MODULE_LICENSE("GPL"); + +module_param(xpc_hb_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between " + "heartbeat increments."); + +module_param(xpc_hb_check_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " + "heartbeat checks."); + diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c new file mode 100644 index 000000000000..2c3c4a8af553 --- /dev/null +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -0,0 +1,984 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + + +/* + * Cross Partition Communication (XPC) partition support. + * + * This is the part of XPC that detects the presence/absence of + * other partitions. It provides a heartbeat and monitors the + * heartbeats of other partitions. + * + */ + + +#include <linux/kernel.h> +#include <linux/sysctl.h> +#include <linux/cache.h> +#include <linux/mmzone.h> +#include <linux/nodemask.h> +#include <asm/sn/bte.h> +#include <asm/sn/intr.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/nodepda.h> +#include <asm/sn/addrs.h> +#include "xpc.h" + + +/* XPC is exiting flag */ +int xpc_exiting; + + +/* SH_IPI_ACCESS shub register value on startup */ +static u64 xpc_sh1_IPI_access; +static u64 xpc_sh2_IPI_access0; +static u64 xpc_sh2_IPI_access1; +static u64 xpc_sh2_IPI_access2; +static u64 xpc_sh2_IPI_access3; + + +/* original protection values for each node */ +u64 xpc_prot_vec[MAX_COMPACT_NODES]; + + +/* this partition's reserved page */ +struct xpc_rsvd_page *xpc_rsvd_page; + +/* this partition's XPC variables (within the reserved page) */ +struct xpc_vars *xpc_vars; +struct xpc_vars_part *xpc_vars_part; + + +/* + * For performance reasons, each entry of xpc_partitions[] is cacheline + * aligned. And xpc_partitions[] is padded with an additional entry at the + * end so that the last legitimate entry doesn't share its cacheline with + * another variable. + */ +struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; + + +/* + * Generic buffer used to store a local copy of the remote partitions + * reserved page or XPC variables. + * + * xpc_discovery runs only once and is a seperate thread that is + * very likely going to be processing in parallel with receiving + * interrupts. + */ +char ____cacheline_aligned + xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE]; + + +/* systune related variables */ +int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; +int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT; + + +/* + * Given a nasid, get the physical address of the partition's reserved page + * for that nasid. This function returns 0 on any error. + */ +static u64 +xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size) +{ + bte_result_t bte_res; + s64 status; + u64 cookie = 0; + u64 rp_pa = nasid; /* seed with nasid */ + u64 len = 0; + + + while (1) { + + status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa, + &len); + + dev_dbg(xpc_part, "SAL returned with status=%li, cookie=" + "0x%016lx, address=0x%016lx, len=0x%016lx\n", + status, cookie, rp_pa, len); + + if (status != SALRET_MORE_PASSES) { + break; + } + + if (len > buf_size) { + dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len); + status = SALRET_ERROR; + break; + } + + bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size, + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (bte_res != BTE_SUCCESS) { + dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); + status = SALRET_ERROR; + break; + } + } + + if (status != SALRET_OK) { + rp_pa = 0; + } + dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); + return rp_pa; +} + + +/* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ +struct xpc_rsvd_page * +xpc_rsvd_page_init(void) +{ + struct xpc_rsvd_page *rp; + AMO_t *amos_page; + u64 rp_pa, next_cl, nasid_array = 0; + int i, ret; + + + /* get the local reserved page's address */ + + rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0), + (u64) xpc_remote_copy_buffer, + XPC_RSVD_PAGE_ALIGNED_SIZE); + if (rp_pa == 0) { + dev_err(xpc_part, "SAL failed to locate the reserved page\n"); + return NULL; + } + rp = (struct xpc_rsvd_page *) __va(rp_pa); + + if (rp->partid != sn_partition_id) { + dev_err(xpc_part, "the reserved page's partid of %d should be " + "%d\n", rp->partid, sn_partition_id); + return NULL; + } + + rp->version = XPC_RP_VERSION; + + /* + * Place the XPC variables on the cache line following the + * reserved page structure. + */ + next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE; + xpc_vars = (struct xpc_vars *) next_cl; + + /* + * Before clearing xpc_vars, see if a page of AMOs had been previously + * allocated. If not we'll need to allocate one and set permissions + * so that cross-partition AMOs are allowed. + * + * The allocated AMO page needs MCA reporting to remain disabled after + * XPC has unloaded. To make this work, we keep a copy of the pointer + * to this page (i.e., amos_page) in the struct xpc_vars structure, + * which is pointed to by the reserved page, and re-use that saved copy + * on subsequent loads of XPC. This AMO page is never freed, and its + * memory protections are never restricted. + */ + if ((amos_page = xpc_vars->amos_page) == NULL) { + amos_page = (AMO_t *) mspec_kalloc_page(0); + if (amos_page == NULL) { + dev_err(xpc_part, "can't allocate page of AMOs\n"); + return NULL; + } + + /* + * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems + * when xpc_allow_IPI_ops() is called via xpc_hb_init(). + */ + if (!enable_shub_wars_1_1()) { + ret = sn_change_memprotect(ia64_tpa((u64) amos_page), + PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) { + dev_err(xpc_part, "can't change memory " + "protections\n"); + mspec_kfree_page((unsigned long) amos_page); + return NULL; + } + } + } else if (!IS_AMO_ADDRESS((u64) amos_page)) { + /* + * EFI's XPBOOT can also set amos_page in the reserved page, + * but it happens to leave it as an uncached physical address + * and we need it to be an uncached virtual, so we'll have to + * convert it. + */ + if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) { + dev_err(xpc_part, "previously used amos_page address " + "is bad = 0x%p\n", (void *) amos_page); + return NULL; + } + amos_page = (AMO_t *) TO_AMO((u64) amos_page); + } + + memset(xpc_vars, 0, sizeof(struct xpc_vars)); + + /* + * Place the XPC per partition specific variables on the cache line + * following the XPC variables structure. + */ + next_cl += XPC_VARS_ALIGNED_SIZE; + memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) * + XP_MAX_PARTITIONS); + xpc_vars_part = (struct xpc_vars_part *) next_cl; + xpc_vars->vars_part_pa = __pa(next_cl); + + xpc_vars->version = XPC_V_VERSION; + xpc_vars->act_nasid = cpuid_to_nasid(0); + xpc_vars->act_phys_cpuid = cpu_physical_id(0); + xpc_vars->amos_page = amos_page; /* save for next load of XPC */ + + + /* + * Initialize the activation related AMO variables. + */ + xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS); + for (i = 1; i < XP_NASID_MASK_WORDS; i++) { + xpc_IPI_init(i + XP_MAX_PARTITIONS); + } + /* export AMO page's physical address to other partitions */ + xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page); + + /* + * This signifies to the remote partition that our reserved + * page is initialized. + */ + (volatile u64) rp->vars_pa = __pa(xpc_vars); + + return rp; +} + + +/* + * Change protections to allow IPI operations (and AMO operations on + * Shub 1.1 systems). + */ +void +xpc_allow_IPI_ops(void) +{ + int node; + int nasid; + + + // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. + + if (is_shub2()) { + xpc_sh2_IPI_access0 = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); + xpc_sh2_IPI_access1 = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); + xpc_sh2_IPI_access2 = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); + xpc_sh2_IPI_access3 = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + -1UL); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + -1UL); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + -1UL); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + -1UL); + } + + } else { + xpc_sh1_IPI_access = + (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + -1UL); + + /* + * Since the BIST collides with memory operations on + * SHUB 1.1 sn_change_memprotect() cannot be used. + */ + if (enable_shub_wars_1_1()) { + /* open up everything */ + xpc_prot_vec[node] = (u64) HUB_L((u64 *) + GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0)); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0), + -1UL); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQRP_MMR_DIR_PRIVEC0), + -1UL); + } + } + } +} + + +/* + * Restrict protections to disallow IPI operations (and AMO operations on + * Shub 1.1 systems). + */ +void +xpc_restrict_IPI_ops(void) +{ + int node; + int nasid; + + + // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. + + if (is_shub2()) { + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + xpc_sh2_IPI_access0); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + xpc_sh2_IPI_access1); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + xpc_sh2_IPI_access2); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + xpc_sh2_IPI_access3); + } + + } else { + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + xpc_sh1_IPI_access); + + if (enable_shub_wars_1_1()) { + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0), + xpc_prot_vec[node]); + HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQRP_MMR_DIR_PRIVEC0), + xpc_prot_vec[node]); + } + } + } +} + + +/* + * At periodic intervals, scan through all active partitions and ensure + * their heartbeat is still active. If not, the partition is deactivated. + */ +void +xpc_check_remote_hb(void) +{ + struct xpc_vars *remote_vars; + struct xpc_partition *part; + partid_t partid; + bte_result_t bres; + + + remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; + + for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { + if (partid == sn_partition_id) { + continue; + } + + part = &xpc_partitions[partid]; + + if (part->act_state == XPC_P_INACTIVE || + part->act_state == XPC_P_DEACTIVATING) { + continue; + } + + /* pull the remote_hb cache line */ + bres = xp_bte_copy(part->remote_vars_pa, + ia64_tpa((u64) remote_vars), + XPC_VARS_ALIGNED_SIZE, + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (bres != BTE_SUCCESS) { + XPC_DEACTIVATE_PARTITION(part, + xpc_map_bte_errors(bres)); + continue; + } + + dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" + " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid, + remote_vars->heartbeat, part->last_heartbeat, + remote_vars->kdb_status, + remote_vars->heartbeating_to_mask); + + if (((remote_vars->heartbeat == part->last_heartbeat) && + (remote_vars->kdb_status == 0)) || + !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { + + XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); + continue; + } + + part->last_heartbeat = remote_vars->heartbeat; + } +} + + +/* + * Get a copy of the remote partition's rsvd page. + * + * remote_rp points to a buffer that is cacheline aligned for BTE copies and + * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE. + */ +static enum xpc_retval +xpc_get_remote_rp(int nasid, u64 *discovered_nasids, + struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa) +{ + int bres, i; + + + /* get the reserved page's physical address */ + + *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, + XPC_RSVD_PAGE_ALIGNED_SIZE); + if (*remote_rsvd_page_pa == 0) { + return xpcNoRsvdPageAddr; + } + + + /* pull over the reserved page structure */ + + bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp), + XPC_RSVD_PAGE_ALIGNED_SIZE, + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (bres != BTE_SUCCESS) { + return xpc_map_bte_errors(bres); + } + + + if (discovered_nasids != NULL) { + for (i = 0; i < XP_NASID_MASK_WORDS; i++) { + discovered_nasids[i] |= remote_rp->part_nasids[i]; + } + } + + + /* check that the partid is for another partition */ + + if (remote_rp->partid < 1 || + remote_rp->partid > (XP_MAX_PARTITIONS - 1)) { + return xpcInvalidPartid; + } + + if (remote_rp->partid == sn_partition_id) { + return xpcLocalPartid; + } + + + if (XPC_VERSION_MAJOR(remote_rp->version) != + XPC_VERSION_MAJOR(XPC_RP_VERSION)) { + return xpcBadVersion; + } + + return xpcSuccess; +} + + +/* + * Get a copy of the remote partition's XPC variables. + * + * remote_vars points to a buffer that is cacheline aligned for BTE copies and + * assumed to be of size XPC_VARS_ALIGNED_SIZE. + */ +static enum xpc_retval +xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) +{ + int bres; + + + if (remote_vars_pa == 0) { + return xpcVarsNotSet; + } + + + /* pull over the cross partition variables */ + + bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars), + XPC_VARS_ALIGNED_SIZE, + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (bres != BTE_SUCCESS) { + return xpc_map_bte_errors(bres); + } + + if (XPC_VERSION_MAJOR(remote_vars->version) != + XPC_VERSION_MAJOR(XPC_V_VERSION)) { + return xpcBadVersion; + } + + return xpcSuccess; +} + + +/* + * Prior code has determine the nasid which generated an IPI. Inspect + * that nasid to determine if its partition needs to be activated or + * deactivated. + * + * A partition is consider "awaiting activation" if our partition + * flags indicate it is not active and it has a heartbeat. A + * partition is considered "awaiting deactivation" if our partition + * flags indicate it is active but it has no heartbeat or it is not + * sending its heartbeat to us. + * + * To determine the heartbeat, the remote nasid must have a properly + * initialized reserved page. + */ +static void +xpc_identify_act_IRQ_req(int nasid) +{ + struct xpc_rsvd_page *remote_rp; + struct xpc_vars *remote_vars; + u64 remote_rsvd_page_pa; + u64 remote_vars_pa; + partid_t partid; + struct xpc_partition *part; + enum xpc_retval ret; + + + /* pull over the reserved page structure */ + + remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer; + + ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa); + if (ret != xpcSuccess) { + dev_warn(xpc_part, "unable to get reserved page from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + return; + } + + remote_vars_pa = remote_rp->vars_pa; + partid = remote_rp->partid; + part = &xpc_partitions[partid]; + + + /* pull over the cross partition variables */ + + remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; + + ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); + if (ret != xpcSuccess) { + + dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + return; + } + + + part->act_IRQ_rcvd++; + + dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " + "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd, + remote_vars->heartbeat, remote_vars->heartbeating_to_mask); + + + if (part->act_state == XPC_P_INACTIVE) { + + part->remote_rp_pa = remote_rsvd_page_pa; + dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", + part->remote_rp_pa); + + part->remote_vars_pa = remote_vars_pa; + dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", + part->remote_vars_pa); + + part->last_heartbeat = remote_vars->heartbeat; + dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", + part->last_heartbeat); + + part->remote_vars_part_pa = remote_vars->vars_part_pa; + dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", + part->remote_vars_part_pa); + + part->remote_act_nasid = remote_vars->act_nasid; + dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", + part->remote_act_nasid); + + part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; + dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", + part->remote_act_phys_cpuid); + + part->remote_amos_page_pa = remote_vars->amos_page_pa; + dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", + part->remote_amos_page_pa); + + xpc_activate_partition(part); + + } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa || + !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { + + part->reactivate_nasid = nasid; + XPC_DEACTIVATE_PARTITION(part, xpcReactivating); + } +} + + +/* + * Loop through the activation AMO variables and process any bits + * which are set. Each bit indicates a nasid sending a partition + * activation or deactivation request. + * + * Return #of IRQs detected. + */ +int +xpc_identify_act_IRQ_sender(void) +{ + int word, bit; + u64 nasid_mask; + u64 nasid; /* remote nasid */ + int n_IRQs_detected = 0; + AMO_t *act_amos; + struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page; + + + act_amos = xpc_vars->act_amos; + + + /* scan through act AMO variable looking for non-zero entries */ + for (word = 0; word < XP_NASID_MASK_WORDS; word++) { + + nasid_mask = xpc_IPI_receive(&act_amos[word]); + if (nasid_mask == 0) { + /* no IRQs from nasids in this variable */ + continue; + } + + dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word, + nasid_mask); + + + /* + * If this nasid has been added to the machine since + * our partition was reset, this will retain the + * remote nasid in our reserved pages machine mask. + * This is used in the event of module reload. + */ + rp->mach_nasids[word] |= nasid_mask; + + + /* locate the nasid(s) which sent interrupts */ + + for (bit = 0; bit < (8 * sizeof(u64)); bit++) { + if (nasid_mask & (1UL << bit)) { + n_IRQs_detected++; + nasid = XPC_NASID_FROM_W_B(word, bit); + dev_dbg(xpc_part, "interrupt from nasid %ld\n", + nasid); + xpc_identify_act_IRQ_req(nasid); + } + } + } + return n_IRQs_detected; +} + + +/* + * Mark specified partition as active. + */ +enum xpc_retval +xpc_mark_partition_active(struct xpc_partition *part) +{ + unsigned long irq_flags; + enum xpc_retval ret; + + + dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + if (part->act_state == XPC_P_ACTIVATING) { + part->act_state = XPC_P_ACTIVE; + ret = xpcSuccess; + } else { + DBUG_ON(part->reason == xpcSuccess); + ret = part->reason; + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + return ret; +} + + +/* + * Notify XPC that the partition is down. + */ +void +xpc_deactivate_partition(const int line, struct xpc_partition *part, + enum xpc_retval reason) +{ + unsigned long irq_flags; + partid_t partid = XPC_PARTID(part); + + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_INACTIVE) { + XPC_SET_REASON(part, reason, line); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + if (reason == xpcReactivating) { + /* we interrupt ourselves to reactivate partition */ + xpc_IPI_send_reactivate(part); + } + return; + } + if (part->act_state == XPC_P_DEACTIVATING) { + if ((part->reason == xpcUnloading && reason != xpcUnloading) || + reason == xpcReactivating) { + XPC_SET_REASON(part, reason, line); + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + return; + } + + part->act_state = XPC_P_DEACTIVATING; + XPC_SET_REASON(part, reason, line); + + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + XPC_DISALLOW_HB(partid, xpc_vars); + + dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid, + reason); + + xpc_partition_down(part, reason); +} + + +/* + * Mark specified partition as active. + */ +void +xpc_mark_partition_inactive(struct xpc_partition *part) +{ + unsigned long irq_flags; + + + dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", + XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; +} + + +/* + * SAL has provided a partition and machine mask. The partition mask + * contains a bit for each even nasid in our partition. The machine + * mask contains a bit for each even nasid in the entire machine. + * + * Using those two bit arrays, we can determine which nasids are + * known in the machine. Each should also have a reserved page + * initialized if they are available for partitioning. + */ +void +xpc_discovery(void) +{ + void *remote_rp_base; + struct xpc_rsvd_page *remote_rp; + struct xpc_vars *remote_vars; + u64 remote_rsvd_page_pa; + u64 remote_vars_pa; + int region; + int max_regions; + int nasid; + struct xpc_rsvd_page *rp; + partid_t partid; + struct xpc_partition *part; + u64 *discovered_nasids; + enum xpc_retval ret; + + + remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE, + GFP_KERNEL, &remote_rp_base); + if (remote_rp == NULL) { + return; + } + remote_vars = (struct xpc_vars *) remote_rp; + + + discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS, + GFP_KERNEL); + if (discovered_nasids == NULL) { + kfree(remote_rp_base); + return; + } + memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS); + + rp = (struct xpc_rsvd_page *) xpc_rsvd_page; + + /* + * The term 'region' in this context refers to the minimum number of + * nodes that can comprise an access protection grouping. The access + * protection is in regards to memory, IOI and IPI. + */ +//>>> move the next two #defines into either include/asm-ia64/sn/arch.h or +//>>> include/asm-ia64/sn/addrs.h +#define SH1_MAX_REGIONS 64 +#define SH2_MAX_REGIONS 256 + max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS; + + for (region = 0; region < max_regions; region++) { + + if ((volatile int) xpc_exiting) { + break; + } + + dev_dbg(xpc_part, "searching region %d\n", region); + + for (nasid = (region * sn_region_size * 2); + nasid < ((region + 1) * sn_region_size * 2); + nasid += 2) { + + if ((volatile int) xpc_exiting) { + break; + } + + dev_dbg(xpc_part, "checking nasid %d\n", nasid); + + + if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) { + dev_dbg(xpc_part, "PROM indicates Nasid %d is " + "part of the local partition; skipping " + "region\n", nasid); + break; + } + + if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) { + dev_dbg(xpc_part, "PROM indicates Nasid %d was " + "not on Numa-Link network at reset\n", + nasid); + continue; + } + + if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) { + dev_dbg(xpc_part, "Nasid %d is part of a " + "partition which was previously " + "discovered\n", nasid); + continue; + } + + + /* pull over the reserved page structure */ + + ret = xpc_get_remote_rp(nasid, discovered_nasids, + remote_rp, &remote_rsvd_page_pa); + if (ret != xpcSuccess) { + dev_dbg(xpc_part, "unable to get reserved page " + "from nasid %d, reason=%d\n", nasid, + ret); + + if (ret == xpcLocalPartid) { + break; + } + continue; + } + + remote_vars_pa = remote_rp->vars_pa; + + partid = remote_rp->partid; + part = &xpc_partitions[partid]; + + + /* pull over the cross partition variables */ + + ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); + if (ret != xpcSuccess) { + dev_dbg(xpc_part, "unable to get XPC variables " + "from nasid %d, reason=%d\n", nasid, + ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + continue; + } + + if (part->act_state != XPC_P_INACTIVE) { + dev_dbg(xpc_part, "partition %d on nasid %d is " + "already activating\n", partid, nasid); + break; + } + + /* + * Register the remote partition's AMOs with SAL so it + * can handle and cleanup errors within that address + * range should the remote partition go down. We don't + * unregister this range because it is difficult to + * tell when outstanding writes to the remote partition + * are finished and thus when it is thus safe to + * unregister. This should not result in wasted space + * in the SAL xp_addr_region table because we should + * get the same page for remote_act_amos_pa after + * module reloads and system reboots. + */ + if (sn_register_xp_addr_region( + remote_vars->amos_page_pa, + PAGE_SIZE, 1) < 0) { + dev_dbg(xpc_part, "partition %d failed to " + "register xp_addr region 0x%016lx\n", + partid, remote_vars->amos_page_pa); + + XPC_SET_REASON(part, xpcPhysAddrRegFailed, + __LINE__); + break; + } + + /* + * The remote nasid is valid and available. + * Send an interrupt to that nasid to notify + * it that we are ready to begin activation. + */ + dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, " + "nasid %d, phys_cpuid 0x%x\n", + remote_vars->amos_page_pa, + remote_vars->act_nasid, + remote_vars->act_phys_cpuid); + + xpc_IPI_send_activate(remote_vars); + } + } + + kfree(discovered_nasids); + kfree(remote_rp_base); +} + + +/* + * Given a partid, get the nasids owned by that partition from the + * remote partition's reserved page. + */ +enum xpc_retval +xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask) +{ + struct xpc_partition *part; + u64 part_nasid_pa; + int bte_res; + + + part = &xpc_partitions[partid]; + if (part->remote_rp_pa == 0) { + return xpcPartitionDown; + } + + part_nasid_pa = part->remote_rp_pa + + (u64) &((struct xpc_rsvd_page *) 0)->part_nasids; + + bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask), + L1_CACHE_ALIGN(XP_NASID_MASK_BYTES), + (BTE_NOTIFY | BTE_WACQUIRE), NULL); + + return xpc_map_bte_errors(bte_res); +} + diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c new file mode 100644 index 000000000000..78c13d676fa6 --- /dev/null +++ b/arch/ia64/sn/kernel/xpnet.c @@ -0,0 +1,715 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. + */ + + +/* + * Cross Partition Network Interface (XPNET) support + * + * XPNET provides a virtual network layered on top of the Cross + * Partition communication layer. + * + * XPNET provides direct point-to-point and broadcast-like support + * for an ethernet-like device. The ethernet broadcast medium is + * replaced with a point-to-point message structure which passes + * pointers to a DMA-capable block that a remote partition should + * retrieve and pass to the upper level networking layer. + * + */ + + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/delay.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <asm/sn/bte.h> +#include <asm/sn/io.h> +#include <asm/sn/sn_sal.h> +#include <asm/types.h> +#include <asm/atomic.h> +#include <asm/sn/xp.h> + + +/* + * The message payload transferred by XPC. + * + * buf_pa is the physical address where the DMA should pull from. + * + * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a + * cacheline boundary. To accomplish this, we record the number of + * bytes from the beginning of the first cacheline to the first useful + * byte of the skb (leadin_ignore) and the number of bytes from the + * last useful byte of the skb to the end of the last cacheline + * (tailout_ignore). + * + * size is the number of bytes to transfer which includes the skb->len + * (useful bytes of the senders skb) plus the leadin and tailout + */ +struct xpnet_message { + u16 version; /* Version for this message */ + u16 embedded_bytes; /* #of bytes embedded in XPC message */ + u32 magic; /* Special number indicating this is xpnet */ + u64 buf_pa; /* phys address of buffer to retrieve */ + u32 size; /* #of bytes in buffer */ + u8 leadin_ignore; /* #of bytes to ignore at the beginning */ + u8 tailout_ignore; /* #of bytes to ignore at the end */ + unsigned char data; /* body of small packets */ +}; + +/* + * Determine the size of our message, the cacheline aligned size, + * and then the number of message will request from XPC. + * + * XPC expects each message to exist in an individual cacheline. + */ +#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET) +#define XPNET_MSG_DATA_MAX \ + (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data)) +#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE)) +#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE) + + +#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) +#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) + +/* + * Version number of XPNET implementation. XPNET can always talk to versions + * with same major #, and never talk to versions with a different version. + */ +#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor)) +#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf) + +#define XPNET_VERSION _XPNET_VERSION(1,0) /* version 1.0 */ +#define XPNET_VERSION_EMBED _XPNET_VERSION(1,1) /* version 1.1 */ +#define XPNET_MAGIC 0x88786984 /* "XNET" */ + +#define XPNET_VALID_MSG(_m) \ + ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \ + && (msg->magic == XPNET_MAGIC)) + +#define XPNET_DEVICE_NAME "xp0" + + +/* + * When messages are queued with xpc_send_notify, a kmalloc'd buffer + * of the following type is passed as a notification cookie. When the + * notification function is called, we use the cookie to decide + * whether all outstanding message sends have completed. The skb can + * then be released. + */ +struct xpnet_pending_msg { + struct list_head free_list; + struct sk_buff *skb; + atomic_t use_count; +}; + +/* driver specific structure pointed to by the device structure */ +struct xpnet_dev_private { + struct net_device_stats stats; +}; + +struct net_device *xpnet_device; + +/* + * When we are notified of other partitions activating, we add them to + * our bitmask of partitions to which we broadcast. + */ +static u64 xpnet_broadcast_partitions; +/* protect above */ +static spinlock_t xpnet_broadcast_lock = SPIN_LOCK_UNLOCKED; + +/* + * Since the Block Transfer Engine (BTE) is being used for the transfer + * and it relies upon cache-line size transfers, we need to reserve at + * least one cache-line for head and tail alignment. The BTE is + * limited to 8MB transfers. + * + * Testing has shown that changing MTU to greater than 64KB has no effect + * on TCP as the two sides negotiate a Max Segment Size that is limited + * to 64K. Other protocols May use packets greater than this, but for + * now, the default is 64KB. + */ +#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) +/* 32KB has been determined to be the ideal */ +#define XPNET_DEF_MTU (0x8000UL) + + +/* + * The partition id is encapsulated in the MAC address. The following + * define locates the octet the partid is in. + */ +#define XPNET_PARTID_OCTET 1 +#define XPNET_LICENSE_OCTET 2 + + +/* + * Define the XPNET debug device structure that is to be used with dev_dbg(), + * dev_err(), dev_warn(), and dev_info(). + */ +struct device_driver xpnet_dbg_name = { + .name = "xpnet" +}; + +struct device xpnet_dbg_subname = { + .bus_id = {0}, /* set to "" */ + .driver = &xpnet_dbg_name +}; + +struct device *xpnet = &xpnet_dbg_subname; + +/* + * Packet was recevied by XPC and forwarded to us. + */ +static void +xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg) +{ + struct sk_buff *skb; + bte_result_t bret; + struct xpnet_dev_private *priv = + (struct xpnet_dev_private *) xpnet_device->priv; + + + if (!XPNET_VALID_MSG(msg)) { + /* + * Packet with a different XPC version. Ignore. + */ + xpc_received(partid, channel, (void *) msg); + + priv->stats.rx_errors++; + + return; + } + dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + + /* reserve an extra cache line */ + skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES); + if (!skb) { + dev_err(xpnet, "failed on dev_alloc_skb(%d)\n", + msg->size + L1_CACHE_BYTES); + + xpc_received(partid, channel, (void *) msg); + + priv->stats.rx_errors++; + + return; + } + + /* + * The allocated skb has some reserved space. + * In order to use bte_copy, we need to get the + * skb->data pointer moved forward. + */ + skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & + (L1_CACHE_BYTES - 1)) + + msg->leadin_ignore)); + + /* + * Update the tail pointer to indicate data actually + * transferred. + */ + skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore)); + + /* + * Move the data over from the the other side. + */ + if ((XPNET_VERSION_MINOR(msg->version) == 1) && + (msg->embedded_bytes != 0)) { + dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, " + "%lu)\n", skb->data, &msg->data, + (size_t) msg->embedded_bytes); + + memcpy(skb->data, &msg->data, (size_t) msg->embedded_bytes); + } else { + dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" + "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa, + (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), + msg->size); + + bret = bte_copy(msg->buf_pa, + __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)), + msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL); + + if (bret != BTE_SUCCESS) { + // >>> Need better way of cleaning skb. Currently skb + // >>> appears in_use and we can't just call + // >>> dev_kfree_skb. + dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned " + "error=0x%x\n", (void *)msg->buf_pa, + (void *)__pa((u64)skb->data & + ~(L1_CACHE_BYTES - 1)), + msg->size, bret); + + xpc_received(partid, channel, (void *) msg); + + priv->stats.rx_errors++; + + return; + } + } + + dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *) skb->head, + (void *) skb->data, (void *) skb->tail, (void *) skb->end, + skb->len); + + skb->dev = xpnet_device; + skb->protocol = eth_type_trans(skb, xpnet_device); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p " + "skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n", + (void *) skb->head, (void *) skb->data, (void *) skb->tail, + (void *) skb->end, skb->len); + + + xpnet_device->last_rx = jiffies; + priv->stats.rx_packets++; + priv->stats.rx_bytes += skb->len + ETH_HLEN; + + netif_rx_ni(skb); + xpc_received(partid, channel, (void *) msg); +} + + +/* + * This is the handler which XPC calls during any sort of change in + * state or message reception on a connection. + */ +static void +xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel, + void *data, void *key) +{ + long bp; + + + DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); + DBUG_ON(channel != XPC_NET_CHANNEL); + + switch(reason) { + case xpcMsgReceived: /* message received */ + DBUG_ON(data == NULL); + + xpnet_receive(partid, channel, (struct xpnet_message *) data); + break; + + case xpcConnected: /* connection completed to a partition */ + spin_lock_bh(&xpnet_broadcast_lock); + xpnet_broadcast_partitions |= 1UL << (partid -1 ); + bp = xpnet_broadcast_partitions; + spin_unlock_bh(&xpnet_broadcast_lock); + + netif_carrier_on(xpnet_device); + + dev_dbg(xpnet, "%s connection created to partition %d; " + "xpnet_broadcast_partitions=0x%lx\n", + xpnet_device->name, partid, bp); + break; + + default: + spin_lock_bh(&xpnet_broadcast_lock); + xpnet_broadcast_partitions &= ~(1UL << (partid -1 )); + bp = xpnet_broadcast_partitions; + spin_unlock_bh(&xpnet_broadcast_lock); + + if (bp == 0) { + netif_carrier_off(xpnet_device); + } + + dev_dbg(xpnet, "%s disconnected from partition %d; " + "xpnet_broadcast_partitions=0x%lx\n", + xpnet_device->name, partid, bp); + break; + + } +} + + +static int +xpnet_dev_open(struct net_device *dev) +{ + enum xpc_retval ret; + + + dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %d, " + "%d)\n", XPC_NET_CHANNEL, xpnet_connection_activity, + XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS, + XPNET_MAX_IDLE_KTHREADS); + + ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, + XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, + XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS); + if (ret != xpcSuccess) { + dev_err(xpnet, "ifconfig up of %s failed on XPC connect, " + "ret=%d\n", dev->name, ret); + + return -ENOMEM; + } + + dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name); + + return 0; +} + + +static int +xpnet_dev_stop(struct net_device *dev) +{ + xpc_disconnect(XPC_NET_CHANNEL); + + dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name); + + return 0; +} + + +static int +xpnet_dev_change_mtu(struct net_device *dev, int new_mtu) +{ + /* 68 comes from min TCP+IP+MAC header */ + if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) { + dev_err(xpnet, "ifconfig %s mtu %d failed; value must be " + "between 68 and %ld\n", dev->name, new_mtu, + XPNET_MAX_MTU); + return -EINVAL; + } + + dev->mtu = new_mtu; + dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu); + return 0; +} + + +/* + * Required for the net_device structure. + */ +static int +xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map) +{ + return 0; +} + + +/* + * Return statistics to the caller. + */ +static struct net_device_stats * +xpnet_dev_get_stats(struct net_device *dev) +{ + struct xpnet_dev_private *priv; + + + priv = (struct xpnet_dev_private *) dev->priv; + + return &priv->stats; +} + + +/* + * Notification that the other end has received the message and + * DMA'd the skb information. At this point, they are done with + * our side. When all recipients are done processing, we + * release the skb and then release our pending message structure. + */ +static void +xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel, + void *__qm) +{ + struct xpnet_pending_msg *queued_msg = + (struct xpnet_pending_msg *) __qm; + + + DBUG_ON(queued_msg == NULL); + + dev_dbg(xpnet, "message to %d notified with reason %d\n", + partid, reason); + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_dbg(xpnet, "all acks for skb->head=-x%p\n", + (void *) queued_msg->skb->head); + + dev_kfree_skb_any(queued_msg->skb); + kfree(queued_msg); + } +} + + +/* + * Network layer has formatted a packet (skb) and is ready to place it + * "on the wire". Prepare and send an xpnet_message to all partitions + * which have connected with us and are targets of this packet. + * + * MAC-NOTE: For the XPNET driver, the MAC address contains the + * destination partition_id. If the destination partition id word + * is 0xff, this packet is to broadcast to all partitions. + */ +static int +xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct xpnet_pending_msg *queued_msg; + enum xpc_retval ret; + struct xpnet_message *msg; + u64 start_addr, end_addr; + long dp; + u8 second_mac_octet; + partid_t dest_partid; + struct xpnet_dev_private *priv; + u16 embedded_bytes; + + + priv = (struct xpnet_dev_private *) dev->priv; + + + dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *) skb->head, + (void *) skb->data, (void *) skb->tail, (void *) skb->end, + skb->len); + + + /* + * The xpnet_pending_msg tracks how many outstanding + * xpc_send_notifies are relying on this skb. When none + * remain, release the skb. + */ + queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC); + if (queued_msg == NULL) { + dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping " + "packet\n", sizeof(struct xpnet_pending_msg)); + + priv->stats.tx_errors++; + + return -ENOMEM; + } + + + /* get the beginning of the first cacheline and end of last */ + start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1)); + end_addr = L1_CACHE_ALIGN((u64) skb->tail); + + /* calculate how many bytes to embed in the XPC message */ + embedded_bytes = 0; + if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { + /* skb->data does fit so embed */ + embedded_bytes = skb->len; + } + + + /* + * Since the send occurs asynchronously, we set the count to one + * and begin sending. Any sends that happen to complete before + * we are done sending will not free the skb. We will be left + * with that task during exit. This also handles the case of + * a packet destined for a partition which is no longer up. + */ + atomic_set(&queued_msg->use_count, 1); + queued_msg->skb = skb; + + + second_mac_octet = skb->data[XPNET_PARTID_OCTET]; + if (second_mac_octet == 0xff) { + /* we are being asked to broadcast to all partitions */ + dp = xpnet_broadcast_partitions; + } else if (second_mac_octet != 0) { + dp = xpnet_broadcast_partitions & + (1UL << (second_mac_octet - 1)); + } else { + /* 0 is an invalid partid. Ignore */ + dp = 0; + } + dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp); + + /* + * If we wanted to allow promiscous mode to work like an + * unswitched network, this would be a good point to OR in a + * mask of partitions which should be receiving all packets. + */ + + /* + * Main send loop. + */ + for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS; + dest_partid++) { + + + if (!(dp & (1UL << (dest_partid - 1)))) { + /* not destined for this partition */ + continue; + } + + /* remove this partition from the destinations mask */ + dp &= ~(1UL << (dest_partid - 1)); + + + /* found a partition to send to */ + + ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL, + XPC_NOWAIT, (void **)&msg); + if (unlikely(ret != xpcSuccess)) { + continue; + } + + msg->embedded_bytes = embedded_bytes; + if (unlikely(embedded_bytes != 0)) { + msg->version = XPNET_VERSION_EMBED; + dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", + &msg->data, skb->data, (size_t) embedded_bytes); + memcpy(&msg->data, skb->data, (size_t) embedded_bytes); + } else { + msg->version = XPNET_VERSION; + } + msg->magic = XPNET_MAGIC; + msg->size = end_addr - start_addr; + msg->leadin_ignore = (u64) skb->data - start_addr; + msg->tailout_ignore = end_addr - (u64) skb->tail; + msg->buf_pa = __pa(start_addr); + + dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa=" + "0x%lx, msg->size=%u, msg->leadin_ignore=%u, " + "msg->tailout_ignore=%u\n", dest_partid, + XPC_NET_CHANNEL, msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + + atomic_inc(&queued_msg->use_count); + + ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg, + xpnet_send_completed, queued_msg); + if (unlikely(ret != xpcSuccess)) { + atomic_dec(&queued_msg->use_count); + continue; + } + + } + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_dbg(xpnet, "no partitions to receive packet destined for " + "%d\n", dest_partid); + + + dev_kfree_skb(skb); + kfree(queued_msg); + } + + priv->stats.tx_packets++; + priv->stats.tx_bytes += skb->len; + + return 0; +} + + +/* + * Deal with transmit timeouts coming from the network layer. + */ +static void +xpnet_dev_tx_timeout (struct net_device *dev) +{ + struct xpnet_dev_private *priv; + + + priv = (struct xpnet_dev_private *) dev->priv; + + priv->stats.tx_errors++; + return; +} + + +static int __init +xpnet_init(void) +{ + int i; + u32 license_num; + int result = -ENOMEM; + + + dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); + + /* + * use ether_setup() to init the majority of our device + * structure and then override the necessary pieces. + */ + xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private), + XPNET_DEVICE_NAME, ether_setup); + if (xpnet_device == NULL) { + return -ENOMEM; + } + + netif_carrier_off(xpnet_device); + + xpnet_device->mtu = XPNET_DEF_MTU; + xpnet_device->change_mtu = xpnet_dev_change_mtu; + xpnet_device->open = xpnet_dev_open; + xpnet_device->get_stats = xpnet_dev_get_stats; + xpnet_device->stop = xpnet_dev_stop; + xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit; + xpnet_device->tx_timeout = xpnet_dev_tx_timeout; + xpnet_device->set_config = xpnet_dev_set_config; + + /* + * Multicast assumes the LSB of the first octet is set for multicast + * MAC addresses. We chose the first octet of the MAC to be unlikely + * to collide with any vendor's officially issued MAC. + */ + xpnet_device->dev_addr[0] = 0xfe; + xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id; + license_num = sn_partition_serial_number_val(); + for (i = 3; i >= 0; i--) { + xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] = + license_num & 0xff; + license_num = license_num >> 8; + } + + /* + * ether_setup() sets this to a multicast device. We are + * really not supporting multicast at this time. + */ + xpnet_device->flags &= ~IFF_MULTICAST; + + /* + * No need to checksum as it is a DMA transfer. The BTE will + * report an error if the data is not retrievable and the + * packet will be dropped. + */ + xpnet_device->features = NETIF_F_NO_CSUM; + + result = register_netdev(xpnet_device); + if (result != 0) { + free_netdev(xpnet_device); + } + + return result; +} +module_init(xpnet_init); + + +static void __exit +xpnet_exit(void) +{ + dev_info(xpnet, "unregistering network device %s\n", + xpnet_device[0].name); + + unregister_netdev(xpnet_device); + + free_netdev(xpnet_device); +} +module_exit(xpnet_exit); + + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)"); +MODULE_LICENSE("GPL"); + diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile index b5dca0097a8e..2f915bce25f9 100644 --- a/arch/ia64/sn/pci/Makefile +++ b/arch/ia64/sn/pci/Makefile @@ -7,4 +7,4 @@ # # Makefile for the sn pci general routines. -obj-y := pci_dma.o pcibr/ +obj-y := pci_dma.o tioca_provider.o pcibr/ diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index f680824f819d..5da9bdbde7cb 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -12,9 +12,8 @@ #include <linux/module.h> #include <asm/dma.h> #include <asm/sn/sn_sal.h> -#include "pci/pcibus_provider_defs.h" -#include "pci/pcidev.h" -#include "pci/pcibr_provider.h" +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) #define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) @@ -79,7 +78,8 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size, { void *cpuaddr; unsigned long phys_addr; - struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); BUG_ON(dev->bus != &pci_bus_type); @@ -102,8 +102,7 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size, * resources. */ - *dma_handle = pcibr_dma_map(pcidev_info, phys_addr, size, - SN_PCIDMA_CONSISTENT); + *dma_handle = provider->dma_map_consistent(pdev, phys_addr, size); if (!*dma_handle) { printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); free_pages((unsigned long)cpuaddr, get_order(size)); @@ -127,11 +126,12 @@ EXPORT_SYMBOL(sn_dma_alloc_coherent); void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { - struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); BUG_ON(dev->bus != &pci_bus_type); - pcibr_dma_unmap(pcidev_info, dma_handle, 0); + provider->dma_unmap(pdev, dma_handle, 0); free_pages((unsigned long)cpu_addr, get_order(size)); } EXPORT_SYMBOL(sn_dma_free_coherent); @@ -159,12 +159,13 @@ dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size, { dma_addr_t dma_addr; unsigned long phys_addr; - struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); BUG_ON(dev->bus != &pci_bus_type); phys_addr = __pa(cpu_addr); - dma_addr = pcibr_dma_map(pcidev_info, phys_addr, size, 0); + dma_addr = provider->dma_map(pdev, phys_addr, size); if (!dma_addr) { printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); return 0; @@ -187,10 +188,12 @@ EXPORT_SYMBOL(sn_dma_map_single); void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, int direction) { - struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); BUG_ON(dev->bus != &pci_bus_type); - pcibr_dma_unmap(pcidev_info, dma_addr, direction); + + provider->dma_unmap(pdev, dma_addr, direction); } EXPORT_SYMBOL(sn_dma_unmap_single); @@ -207,12 +210,13 @@ void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, int direction) { int i; - struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); BUG_ON(dev->bus != &pci_bus_type); for (i = 0; i < nhwentries; i++, sg++) { - pcibr_dma_unmap(pcidev_info, sg->dma_address, direction); + provider->dma_unmap(pdev, sg->dma_address, direction); sg->dma_address = (dma_addr_t) NULL; sg->dma_length = 0; } @@ -233,7 +237,8 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries, { unsigned long phys_addr; struct scatterlist *saved_sg = sg; - struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); int i; BUG_ON(dev->bus != &pci_bus_type); @@ -243,8 +248,8 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries, */ for (i = 0; i < nhwentries; i++, sg++) { phys_addr = SG_ENT_PHYS_ADDRESS(sg); - sg->dma_address = pcibr_dma_map(pcidev_info, phys_addr, - sg->length, 0); + sg->dma_address = provider->dma_map(pdev, + phys_addr, sg->length); if (!sg->dma_address) { printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index 9d6854666f9b..0e47bce85f2d 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -8,8 +8,8 @@ #include <linux/types.h> #include <asm/sn/sn_sal.h> -#include "pci/pcibus_provider_defs.h" -#include "pci/pcidev.h" +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> #include "pci/pcibr_provider.h" int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index b1d66ac065c8..64af2b2c1787 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -12,8 +12,8 @@ #include <asm/sn/geo.h> #include "xtalk/xwidgetdev.h" #include "xtalk/hubdev.h" -#include "pci/pcibus_provider_defs.h" -#include "pci/pcidev.h" +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> #include "pci/tiocp.h" #include "pci/pic.h" #include "pci/pcibr_provider.h" @@ -40,7 +40,7 @@ extern int sn_ioif_inited; * we do not have to allocate entries in the PMU. */ -static uint64_t +static dma_addr_t pcibr_dmamap_ate32(struct pcidev_info *info, uint64_t paddr, size_t req_size, uint64_t flags) { @@ -109,7 +109,7 @@ pcibr_dmamap_ate32(struct pcidev_info *info, return pci_addr; } -static uint64_t +static dma_addr_t pcibr_dmatrans_direct64(struct pcidev_info * info, uint64_t paddr, uint64_t dma_attributes) { @@ -141,7 +141,7 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, uint64_t paddr, } -static uint64_t +static dma_addr_t pcibr_dmatrans_direct32(struct pcidev_info * info, uint64_t paddr, size_t req_size, uint64_t flags) { @@ -180,11 +180,11 @@ pcibr_dmatrans_direct32(struct pcidev_info * info, * DMA mappings for Direct 64 and 32 do not have any DMA maps. */ void -pcibr_dma_unmap(struct pcidev_info *pcidev_info, dma_addr_t dma_handle, - int direction) +pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle, int direction) { - struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> - pdi_pcibus_info; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); + struct pcibus_info *pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_pcibus_info; if (IS_PCI32_MAPPED(dma_handle)) { int ate_index; @@ -301,7 +301,7 @@ void sn_dma_flush(uint64_t addr) spin_lock_irqsave(&((struct sn_flush_device_list *)p)-> sfdl_flush_lock, flags); - p->sfdl_flush_value = 0; + *p->sfdl_flush_addr = 0; /* force an interrupt. */ *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1; @@ -316,64 +316,63 @@ void sn_dma_flush(uint64_t addr) } /* - * Wrapper DMA interface. Called from pci_dma.c routines. + * DMA interfaces. Called from pci_dma.c routines. */ -uint64_t -pcibr_dma_map(struct pcidev_info * pcidev_info, unsigned long phys_addr, - size_t size, unsigned int flags) +dma_addr_t +pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size) { dma_addr_t dma_handle; - struct pci_dev *pcidev = pcidev_info->pdi_linux_pcidev; - - if (flags & SN_PCIDMA_CONSISTENT) { - /* sn_pci_alloc_consistent interfaces */ - if (pcidev->dev.coherent_dma_mask == ~0UL) { - dma_handle = - pcibr_dmatrans_direct64(pcidev_info, phys_addr, - PCI64_ATTR_BAR); - } else { - dma_handle = - (dma_addr_t) pcibr_dmamap_ate32(pcidev_info, - phys_addr, size, - PCI32_ATE_BAR); - } - } else { - /* map_sg/map_single interfaces */ + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); - /* SN cannot support DMA addresses smaller than 32 bits. */ - if (pcidev->dma_mask < 0x7fffffff) { - return 0; - } + /* SN cannot support DMA addresses smaller than 32 bits. */ + if (hwdev->dma_mask < 0x7fffffff) { + return 0; + } - if (pcidev->dma_mask == ~0UL) { + if (hwdev->dma_mask == ~0UL) { + /* + * Handle the most common case: 64 bit cards. This + * call should always succeed. + */ + + dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, + PCI64_ATTR_PREF); + } else { + /* Handle 32-63 bit cards via direct mapping */ + dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr, + size, 0); + if (!dma_handle) { /* - * Handle the most common case: 64 bit cards. This - * call should always succeed. + * It is a 32 bit card and we cannot do direct mapping, + * so we use an ATE. */ - dma_handle = - pcibr_dmatrans_direct64(pcidev_info, phys_addr, - PCI64_ATTR_PREF); - } else { - /* Handle 32-63 bit cards via direct mapping */ - dma_handle = - pcibr_dmatrans_direct32(pcidev_info, phys_addr, - size, 0); - if (!dma_handle) { - /* - * It is a 32 bit card and we cannot do direct mapping, - * so we use an ATE. - */ - - dma_handle = - pcibr_dmamap_ate32(pcidev_info, phys_addr, - size, PCI32_ATE_PREF); - } + dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr, + size, PCI32_ATE_PREF); } } return dma_handle; } +dma_addr_t +pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr, + size_t size) +{ + dma_addr_t dma_handle; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); + + if (hwdev->dev.coherent_dma_mask == ~0UL) { + dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, + PCI64_ATTR_BAR); + } else { + dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info, + phys_addr, size, + PCI32_ATE_BAR); + } + + return dma_handle; +} + EXPORT_SYMBOL(sn_dma_flush); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 92bd278cf7ff..3893999d23d8 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -13,8 +13,8 @@ #include "xtalk/xwidgetdev.h" #include <asm/sn/geo.h> #include "xtalk/hubdev.h" -#include "pci/pcibus_provider_defs.h" -#include "pci/pcidev.h" +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> #include "pci/pcibr_provider.h" #include <asm/sn/addrs.h> @@ -168,3 +168,23 @@ void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info) pcibr_force_interrupt(sn_irq_info); } } + +/* + * Provider entries for PIC/CP + */ + +struct sn_pcibus_provider pcibr_provider = { + .dma_map = pcibr_dma_map, + .dma_map_consistent = pcibr_dma_map_consistent, + .dma_unmap = pcibr_dma_unmap, + .bus_fixup = pcibr_bus_fixup, +}; + +int +pcibr_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_PIC] = &pcibr_provider; + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCP] = &pcibr_provider; + + return 0; +} diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c index 74a74a7d2a13..865c11c3b50a 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_reg.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c @@ -8,8 +8,8 @@ #include <linux/types.h> #include <linux/interrupt.h> -#include "pci/pcibus_provider_defs.h" -#include "pci/pcidev.h" +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> #include "pci/tiocp.h" #include "pci/pic.h" #include "pci/pcibr_provider.h" diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c new file mode 100644 index 000000000000..8dae9eb45456 --- /dev/null +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -0,0 +1,668 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/tioca_provider.h> + +uint32_t tioca_gart_found; +EXPORT_SYMBOL(tioca_gart_found); /* used by agp-sgi */ + +LIST_HEAD(tioca_list); +EXPORT_SYMBOL(tioca_list); /* used by agp-sgi */ + +static int tioca_gart_init(struct tioca_kernel *); + +/** + * tioca_gart_init - Initialize SGI TIOCA GART + * @tioca_common: ptr to common prom/kernel struct identifying the + * + * If the indicated tioca has devices present, initialize its associated + * GART MMR's and kernel memory. + */ +static int +tioca_gart_init(struct tioca_kernel *tioca_kern) +{ + uint64_t ap_reg; + uint64_t offset; + struct page *tmp; + struct tioca_common *tioca_common; + volatile struct tioca *ca_base; + + tioca_common = tioca_kern->ca_common; + ca_base = (struct tioca *)tioca_common->ca_common.bs_base; + + if (list_empty(tioca_kern->ca_devices)) + return 0; + + ap_reg = 0; + + /* + * Validate aperature size + */ + + switch (CA_APERATURE_SIZE >> 20) { + case 4: + ap_reg |= (0x3ff << CA_GART_AP_SIZE_SHFT); /* 4MB */ + break; + case 8: + ap_reg |= (0x3fe << CA_GART_AP_SIZE_SHFT); /* 8MB */ + break; + case 16: + ap_reg |= (0x3fc << CA_GART_AP_SIZE_SHFT); /* 16MB */ + break; + case 32: + ap_reg |= (0x3f8 << CA_GART_AP_SIZE_SHFT); /* 32 MB */ + break; + case 64: + ap_reg |= (0x3f0 << CA_GART_AP_SIZE_SHFT); /* 64 MB */ + break; + case 128: + ap_reg |= (0x3e0 << CA_GART_AP_SIZE_SHFT); /* 128 MB */ + break; + case 256: + ap_reg |= (0x3c0 << CA_GART_AP_SIZE_SHFT); /* 256 MB */ + break; + case 512: + ap_reg |= (0x380 << CA_GART_AP_SIZE_SHFT); /* 512 MB */ + break; + case 1024: + ap_reg |= (0x300 << CA_GART_AP_SIZE_SHFT); /* 1GB */ + break; + case 2048: + ap_reg |= (0x200 << CA_GART_AP_SIZE_SHFT); /* 2GB */ + break; + case 4096: + ap_reg |= (0x000 << CA_GART_AP_SIZE_SHFT); /* 4 GB */ + break; + default: + printk(KERN_ERR "%s: Invalid CA_APERATURE_SIZE " + "0x%lx\n", __FUNCTION__, (ulong) CA_APERATURE_SIZE); + return -1; + } + + /* + * Set up other aperature parameters + */ + + if (PAGE_SIZE >= 16384) { + tioca_kern->ca_ap_pagesize = 16384; + ap_reg |= CA_GART_PAGE_SIZE; + } else { + tioca_kern->ca_ap_pagesize = 4096; + } + + tioca_kern->ca_ap_size = CA_APERATURE_SIZE; + tioca_kern->ca_ap_bus_base = CA_APERATURE_BASE; + tioca_kern->ca_gart_entries = + tioca_kern->ca_ap_size / tioca_kern->ca_ap_pagesize; + + ap_reg |= (CA_GART_AP_ENB_AGP | CA_GART_AP_ENB_PCI); + ap_reg |= tioca_kern->ca_ap_bus_base; + + /* + * Allocate and set up the GART + */ + + tioca_kern->ca_gart_size = tioca_kern->ca_gart_entries * sizeof(u64); + tmp = + alloc_pages_node(tioca_kern->ca_closest_node, + GFP_KERNEL | __GFP_ZERO, + get_order(tioca_kern->ca_gart_size)); + + if (!tmp) { + printk(KERN_ERR "%s: Could not allocate " + "%lu bytes (order %d) for GART\n", + __FUNCTION__, + tioca_kern->ca_gart_size, + get_order(tioca_kern->ca_gart_size)); + return -ENOMEM; + } + + tioca_kern->ca_gart = page_address(tmp); + tioca_kern->ca_gart_coretalk_addr = + PHYS_TO_TIODMA(virt_to_phys(tioca_kern->ca_gart)); + + /* + * Compute PCI/AGP convenience fields + */ + + offset = CA_PCI32_MAPPED_BASE - CA_APERATURE_BASE; + tioca_kern->ca_pciap_base = CA_PCI32_MAPPED_BASE; + tioca_kern->ca_pciap_size = CA_PCI32_MAPPED_SIZE; + tioca_kern->ca_pcigart_start = offset / tioca_kern->ca_ap_pagesize; + tioca_kern->ca_pcigart_base = + tioca_kern->ca_gart_coretalk_addr + offset; + tioca_kern->ca_pcigart = + &tioca_kern->ca_gart[tioca_kern->ca_pcigart_start]; + tioca_kern->ca_pcigart_entries = + tioca_kern->ca_pciap_size / tioca_kern->ca_ap_pagesize; + tioca_kern->ca_pcigart_pagemap = + kcalloc(1, tioca_kern->ca_pcigart_entries / 8, GFP_KERNEL); + if (!tioca_kern->ca_pcigart_pagemap) { + free_pages((unsigned long)tioca_kern->ca_gart, + get_order(tioca_kern->ca_gart_size)); + return -1; + } + + offset = CA_AGP_MAPPED_BASE - CA_APERATURE_BASE; + tioca_kern->ca_gfxap_base = CA_AGP_MAPPED_BASE; + tioca_kern->ca_gfxap_size = CA_AGP_MAPPED_SIZE; + tioca_kern->ca_gfxgart_start = offset / tioca_kern->ca_ap_pagesize; + tioca_kern->ca_gfxgart_base = + tioca_kern->ca_gart_coretalk_addr + offset; + tioca_kern->ca_gfxgart = + &tioca_kern->ca_gart[tioca_kern->ca_gfxgart_start]; + tioca_kern->ca_gfxgart_entries = + tioca_kern->ca_gfxap_size / tioca_kern->ca_ap_pagesize; + + /* + * various control settings: + * use agp op-combining + * use GET semantics to fetch memory + * participate in coherency domain + * DISABLE GART PREFETCHING due to hw bug tracked in SGI PV930029 + */ + + ca_base->ca_control1 |= CA_AGPDMA_OP_ENB_COMBDELAY; /* PV895469 ? */ + ca_base->ca_control2 &= ~(CA_GART_MEM_PARAM); + ca_base->ca_control2 |= (0x2ull << CA_GART_MEM_PARAM_SHFT); + tioca_kern->ca_gart_iscoherent = 1; + ca_base->ca_control2 &= + ~(CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB); + + /* + * Unmask GART fetch error interrupts. Clear residual errors first. + */ + + ca_base->ca_int_status_alias = CA_GART_FETCH_ERR; + ca_base->ca_mult_error_alias = CA_GART_FETCH_ERR; + ca_base->ca_int_mask &= ~CA_GART_FETCH_ERR; + + /* + * Program the aperature and gart registers in TIOCA + */ + + ca_base->ca_gart_aperature = ap_reg; + ca_base->ca_gart_ptr_table = tioca_kern->ca_gart_coretalk_addr | 1; + + return 0; +} + +/** + * tioca_fastwrite_enable - enable AGP FW for a tioca and its functions + * @tioca_kernel: structure representing the CA + * + * Given a CA, scan all attached functions making sure they all support + * FastWrite. If so, enable FastWrite for all functions and the CA itself. + */ + +void +tioca_fastwrite_enable(struct tioca_kernel *tioca_kern) +{ + int cap_ptr; + uint64_t ca_control1; + uint32_t reg; + struct tioca *tioca_base; + struct pci_dev *pdev; + struct tioca_common *common; + + common = tioca_kern->ca_common; + + /* + * Scan all vga controllers on this bus making sure they all + * suport FW. If not, return. + */ + + list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) { + if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8)) + continue; + + cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP); + if (!cap_ptr) + return; /* no AGP CAP means no FW */ + + pci_read_config_dword(pdev, cap_ptr + PCI_AGP_STATUS, ®); + if (!(reg & PCI_AGP_STATUS_FW)) + return; /* function doesn't support FW */ + } + + /* + * Set fw for all vga fn's + */ + + list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) { + if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8)) + continue; + + cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP); + pci_read_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, ®); + reg |= PCI_AGP_COMMAND_FW; + pci_write_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, reg); + } + + /* + * Set ca's fw to match + */ + + tioca_base = (struct tioca *)common->ca_common.bs_base; + ca_control1 = tioca_base->ca_control1; + ca_control1 |= CA_AGP_FW_ENABLE; + tioca_base->ca_control1 = ca_control1; +} + +EXPORT_SYMBOL(tioca_fastwrite_enable); /* used by agp-sgi */ + +/** + * tioca_dma_d64 - create a DMA mapping using 64-bit direct mode + * @paddr: system physical address + * + * Map @paddr into 64-bit CA bus space. No device context is necessary. + * Bits 53:0 come from the coretalk address. We just need to mask in the + * following optional bits of the 64-bit pci address: + * + * 63:60 - Coretalk Packet Type - 0x1 for Mem Get/Put (coherent) + * 0x2 for PIO (non-coherent) + * We will always use 0x1 + * 55:55 - Swap bytes Currently unused + */ +static uint64_t +tioca_dma_d64(unsigned long paddr) +{ + dma_addr_t bus_addr; + + bus_addr = PHYS_TO_TIODMA(paddr); + + BUG_ON(!bus_addr); + BUG_ON(bus_addr >> 54); + + /* Set upper nibble to Cache Coherent Memory op */ + bus_addr |= (1UL << 60); + + return bus_addr; +} + +/** + * tioca_dma_d48 - create a DMA mapping using 48-bit direct mode + * @pdev: linux pci_dev representing the function + * @paddr: system physical address + * + * Map @paddr into 64-bit bus space of the CA associated with @pcidev_info. + * + * The CA agp 48 bit direct address falls out as follows: + * + * When direct mapping AGP addresses, the 48 bit AGP address is + * constructed as follows: + * + * [47:40] - Low 8 bits of the page Node ID extracted from coretalk + * address [47:40]. The upper 8 node bits are fixed + * and come from the xxx register bits [5:0] + * [39:38] - Chiplet ID extracted from coretalk address [39:38] + * [37:00] - node offset extracted from coretalk address [37:00] + * + * Since the node id in general will be non-zero, and the chiplet id + * will always be non-zero, it follows that the device must support + * a dma mask of at least 0xffffffffff (40 bits) to target node 0 + * and in general should be 0xffffffffffff (48 bits) to target nodes + * up to 255. Nodes above 255 need the support of the xxx register, + * and so a given CA can only directly target nodes in the range + * xxx - xxx+255. + */ +static uint64_t +tioca_dma_d48(struct pci_dev *pdev, uint64_t paddr) +{ + struct tioca_common *tioca_common; + struct tioca *ca_base; + uint64_t ct_addr; + dma_addr_t bus_addr; + uint32_t node_upper; + uint64_t agp_dma_extn; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev); + + tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; + ca_base = (struct tioca *)tioca_common->ca_common.bs_base; + + ct_addr = PHYS_TO_TIODMA(paddr); + if (!ct_addr) + return 0; + + bus_addr = (dma_addr_t) (ct_addr & 0xffffffffffff); + node_upper = ct_addr >> 48; + + if (node_upper > 64) { + printk(KERN_ERR "%s: coretalk addr 0x%p node id out " + "of range\n", __FUNCTION__, (void *)ct_addr); + return 0; + } + + agp_dma_extn = ca_base->ca_agp_dma_addr_extn; + if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) { + printk(KERN_ERR "%s: coretalk upper node (%u) " + "mismatch with ca_agp_dma_addr_extn (%lu)\n", + __FUNCTION__, + node_upper, (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)); + return 0; + } + + return bus_addr; +} + +/** + * tioca_dma_mapped - create a DMA mapping using a CA GART + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @req_size: len (bytes) to map + * + * Map @paddr into CA address space using the GART mechanism. The mapped + * dma_addr_t is guarenteed to be contiguous in CA bus space. + */ +static dma_addr_t +tioca_dma_mapped(struct pci_dev *pdev, uint64_t paddr, size_t req_size) +{ + int i, ps, ps_shift, entry, entries, mapsize, last_entry; + uint64_t xio_addr, end_xio_addr; + struct tioca_common *tioca_common; + struct tioca_kernel *tioca_kern; + dma_addr_t bus_addr = 0; + struct tioca_dmamap *ca_dmamap; + void *map; + unsigned long flags; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev);; + + tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; + tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private; + + xio_addr = PHYS_TO_TIODMA(paddr); + if (!xio_addr) + return 0; + + spin_lock_irqsave(&tioca_kern->ca_lock, flags); + + /* + * allocate a map struct + */ + + ca_dmamap = kcalloc(1, sizeof(struct tioca_dmamap), GFP_ATOMIC); + if (!ca_dmamap) + goto map_return; + + /* + * Locate free entries that can hold req_size. Account for + * unaligned start/length when allocating. + */ + + ps = tioca_kern->ca_ap_pagesize; /* will be power of 2 */ + ps_shift = ffs(ps) - 1; + end_xio_addr = xio_addr + req_size - 1; + + entries = (end_xio_addr >> ps_shift) - (xio_addr >> ps_shift) + 1; + + map = tioca_kern->ca_pcigart_pagemap; + mapsize = tioca_kern->ca_pcigart_entries; + + entry = find_first_zero_bit(map, mapsize); + while (entry < mapsize) { + last_entry = find_next_bit(map, mapsize, entry); + + if (last_entry - entry >= entries) + break; + + entry = find_next_zero_bit(map, mapsize, last_entry); + } + + if (entry > mapsize) + goto map_return; + + for (i = 0; i < entries; i++) + set_bit(entry + i, map); + + bus_addr = tioca_kern->ca_pciap_base + (entry * ps); + + ca_dmamap->cad_dma_addr = bus_addr; + ca_dmamap->cad_gart_size = entries; + ca_dmamap->cad_gart_entry = entry; + list_add(&ca_dmamap->cad_list, &tioca_kern->ca_dmamaps); + + if (xio_addr % ps) { + tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr); + bus_addr += xio_addr & (ps - 1); + xio_addr &= ~(ps - 1); + xio_addr += ps; + entry++; + } + + while (xio_addr < end_xio_addr) { + tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr); + xio_addr += ps; + entry++; + } + + tioca_tlbflush(tioca_kern); + +map_return: + spin_unlock_irqrestore(&tioca_kern->ca_lock, flags); + return bus_addr; +} + +/** + * tioca_dma_unmap - release CA mapping resources + * @pdev: linux pci_dev representing the function + * @bus_addr: bus address returned by an earlier tioca_dma_map + * @dir: mapping direction (unused) + * + * Locate mapping resources associated with @bus_addr and release them. + * For mappings created using the direct modes (64 or 48) there are no + * resources to release. + */ +void +tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) +{ + int i, entry; + struct tioca_common *tioca_common; + struct tioca_kernel *tioca_kern; + struct tioca_dmamap *map; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev); + unsigned long flags; + + tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; + tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private; + + /* return straight away if this isn't be a mapped address */ + + if (bus_addr < tioca_kern->ca_pciap_base || + bus_addr >= (tioca_kern->ca_pciap_base + tioca_kern->ca_pciap_size)) + return; + + spin_lock_irqsave(&tioca_kern->ca_lock, flags); + + list_for_each_entry(map, &tioca_kern->ca_dmamaps, cad_list) + if (map->cad_dma_addr == bus_addr) + break; + + BUG_ON(map == NULL); + + entry = map->cad_gart_entry; + + for (i = 0; i < map->cad_gart_size; i++, entry++) { + clear_bit(entry, tioca_kern->ca_pcigart_pagemap); + tioca_kern->ca_pcigart[entry] = 0; + } + tioca_tlbflush(tioca_kern); + + list_del(&map->cad_list); + spin_unlock_irqrestore(&tioca_kern->ca_lock, flags); + kfree(map); +} + +/** + * tioca_dma_map - map pages for PCI DMA + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @byte_count: bytes to map + * + * This is the main wrapper for mapping host physical pages to CA PCI space. + * The mapping mode used is based on the devices dma_mask. As a last resort + * use the GART mapped mode. + */ +uint64_t +tioca_dma_map(struct pci_dev *pdev, uint64_t paddr, size_t byte_count) +{ + uint64_t mapaddr; + + /* + * If card is 64 or 48 bit addresable, use a direct mapping. 32 + * bit direct is so restrictive w.r.t. where the memory resides that + * we don't use it even though CA has some support. + */ + + if (pdev->dma_mask == ~0UL) + mapaddr = tioca_dma_d64(paddr); + else if (pdev->dma_mask == 0xffffffffffffUL) + mapaddr = tioca_dma_d48(pdev, paddr); + else + mapaddr = 0; + + /* Last resort ... use PCI portion of CA GART */ + + if (mapaddr == 0) + mapaddr = tioca_dma_mapped(pdev, paddr, byte_count); + + return mapaddr; +} + +/** + * tioca_error_intr_handler - SGI TIO CA error interrupt handler + * @irq: unused + * @arg: pointer to tioca_common struct for the given CA + * @pt: unused + * + * Handle a CA error interrupt. Simply a wrapper around a SAL call which + * defers processing to the SGI prom. + */ +static irqreturn_t +tioca_error_intr_handler(int irq, void *arg, struct pt_regs *pt) +{ + struct tioca_common *soft = arg; + struct ia64_sal_retval ret_stuff; + uint64_t segment; + uint64_t busnum; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = 0; + busnum = soft->ca_common.bs_persist_busnum; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + segment, busnum, 0, 0, 0, 0, 0); + + return IRQ_HANDLED; +} + +/** + * tioca_bus_fixup - perform final PCI fixup for a TIO CA bus + * @prom_bussoft: Common prom/kernel struct representing the bus + * + * Replicates the tioca_common pointed to by @prom_bussoft in kernel + * space. Allocates and initializes a kernel-only area for a given CA, + * and sets up an irq for handling CA error interrupts. + * + * On successful setup, returns the kernel version of tioca_common back to + * the caller. + */ +void * +tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft) +{ + struct tioca_common *tioca_common; + struct tioca_kernel *tioca_kern; + struct pci_bus *bus; + + /* sanity check prom rev */ + + if (sn_sal_rev_major() < 4 || + (sn_sal_rev_major() == 4 && sn_sal_rev_minor() < 6)) { + printk + (KERN_ERR "%s: SGI prom rev 4.06 or greater required " + "for tioca support\n", __FUNCTION__); + return NULL; + } + + /* + * Allocate kernel bus soft and copy from prom. + */ + + tioca_common = kcalloc(1, sizeof(struct tioca_common), GFP_KERNEL); + if (!tioca_common) + return NULL; + + memcpy(tioca_common, prom_bussoft, sizeof(struct tioca_common)); + tioca_common->ca_common.bs_base |= __IA64_UNCACHED_OFFSET; + + /* init kernel-private area */ + + tioca_kern = kcalloc(1, sizeof(struct tioca_kernel), GFP_KERNEL); + if (!tioca_kern) { + kfree(tioca_common); + return NULL; + } + + tioca_kern->ca_common = tioca_common; + spin_lock_init(&tioca_kern->ca_lock); + INIT_LIST_HEAD(&tioca_kern->ca_dmamaps); + tioca_kern->ca_closest_node = + nasid_to_cnodeid(tioca_common->ca_closest_nasid); + tioca_common->ca_kernel_private = (uint64_t) tioca_kern; + + bus = pci_find_bus(0, tioca_common->ca_common.bs_persist_busnum); + BUG_ON(!bus); + tioca_kern->ca_devices = &bus->devices; + + /* init GART */ + + if (tioca_gart_init(tioca_kern) < 0) { + kfree(tioca_kern); + kfree(tioca_common); + return NULL; + } + + tioca_gart_found++; + list_add(&tioca_kern->ca_list, &tioca_list); + + if (request_irq(SGI_TIOCA_ERROR, + tioca_error_intr_handler, + SA_SHIRQ, "TIOCA error", (void *)tioca_common)) + printk(KERN_WARNING + "%s: Unable to get irq %d. " + "Error interrupts won't be routed for TIOCA bus %d\n", + __FUNCTION__, SGI_TIOCA_ERROR, + (int)tioca_common->ca_common.bs_persist_busnum); + + return tioca_common; +} + +static struct sn_pcibus_provider tioca_pci_interfaces = { + .dma_map = tioca_dma_map, + .dma_map_consistent = tioca_dma_map, + .dma_unmap = tioca_dma_unmap, + .bus_fixup = tioca_bus_fixup, +}; + +/** + * tioca_init_provider - init SN PCI provider ops for TIO CA + */ +int +tioca_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCA] = &tioca_pci_interfaces; + return 0; +} diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 8b40f362dd6f..124f7c1b775e 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -24,6 +24,7 @@ #include <linux/ptrace.h> #include <linux/user.h> #include <linux/string.h> +#include <linux/signal.h> #include <asm/cacheflush.h> #include <asm/io.h> @@ -665,7 +666,7 @@ do_ptrace(long request, struct task_struct *child, long addr, long data) case PTRACE_SYSCALL: case PTRACE_CONT: ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -700,7 +701,7 @@ do_ptrace(long request, struct task_struct *child, long addr, long data) unsigned long pc, insn; ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); if ((child->ptrace & PT_DTRACE) == 0) { diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index 0beb53333ba3..f4e1e5eb8e12 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -19,6 +19,7 @@ #include <linux/ptrace.h> #include <linux/user.h> #include <linux/config.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -251,7 +252,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) long tmp; ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) { child->thread.work.syscall_trace = ~0; @@ -292,7 +293,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) long tmp; ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; child->thread.work.syscall_trace = 0; tmp = get_reg(child, PT_SR) | (TRACE_BITS << 16); diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index fc4615b6d3a9..e729bd280623 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -534,6 +534,11 @@ endchoice endmenu +config ISA_DMA_API + bool + depends on !M5272 + default y + menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" config PCI diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c index 15cf79080b15..9724e1cd82e5 100644 --- a/arch/m68knommu/kernel/ptrace.c +++ b/arch/m68knommu/kernel/ptrace.c @@ -19,6 +19,7 @@ #include <linux/ptrace.h> #include <linux/user.h> #include <linux/config.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -240,7 +241,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) long tmp; ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -278,7 +279,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) long tmp; ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); tmp = get_reg(child, PT_SR) | (TRACE_BITS << 16); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 5e666aad8815..ab9944693f1f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1656,3 +1656,7 @@ config GENERIC_HARDIRQS config GENERIC_IRQ_PROBE bool default y + +config ISA_DMA_API + bool + default y diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 92f2c39afe27..92e70ca3bff9 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -26,6 +26,7 @@ #include <linux/smp_lock.h> #include <linux/user.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/cpu.h> #include <asm/fpu.h> @@ -257,7 +258,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -300,25 +301,38 @@ out: return ret; } +static inline int audit_arch(void) +{ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#ifdef CONFIG_MIPS64 + if (!(current->thread.mflags & MF_32BIT_REGS)) + return AUDIT_ARCH_MIPSEL64; +#endif /* MIPS64 */ + return AUDIT_ARCH_MIPSEL; + +#else /* big endian... */ +#ifdef CONFIG_MIPS64 + if (!(current->thread.mflags & MF_32BIT_REGS)) + return AUDIT_ARCH_MIPS64; +#endif /* MIPS64 */ + return AUDIT_ARCH_MIPS; + +#endif /* endian */ +} + /* * Notification of system call entry/exit * - triggered by current->work.syscall_trace */ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) { - if (unlikely(current->audit_context)) { - if (!entryexit) - audit_syscall_entry(current, regs->regs[2], - regs->regs[4], regs->regs[5], - regs->regs[6], regs->regs[7]); - else - audit_syscall_exit(current, regs->regs[2]); - } + if (unlikely(current->audit_context) && entryexit) + audit_syscall_exit(current, AUDITSC_RESULT(regs->regs[2]), regs->regs[2]); if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + goto out; if (!(current->ptrace & PT_PTRACED)) - return; + goto out; /* The 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ @@ -334,4 +348,9 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + out: + if (unlikely(current->audit_context) && !entryexit) + audit_syscall_entry(current, audit_arch(), regs->regs[2], + regs->regs[4], regs->regs[5], + regs->regs[6], regs->regs[7]); } diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 611dee919d50..eee207969c21 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -24,6 +24,7 @@ #include <linux/smp_lock.h> #include <linux/user.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/cpu.h> #include <asm/fpu.h> @@ -241,7 +242,7 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned int) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 598bfe7426a2..ae2a1312d4ef 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -374,22 +374,6 @@ asmlinkage int sys_ipc (uint call, int first, int second, } /* - * Native ABI that is O32 or N64 version - */ -asmlinkage long sys_shmat(int shmid, char __user *shmaddr, - int shmflg, unsigned long *addr) -{ - unsigned long raddr; - int err; - - err = do_shmat(shmid, shmaddr, shmflg, &raddr); - if (err) - return err; - - return put_user(raddr, addr); -} - -/* * No implemented yet ... */ asmlinkage int sys_cachectl(char *addr, int nbytes, int op) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5b5cd00d98ca..e7e7c56fc212 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -45,6 +45,10 @@ config GENERIC_IRQ_PROBE config PM bool +config ISA_DMA_API + bool + default y + source "init/Kconfig" diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 2937a9236384..c07db9dff7cd 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -17,6 +17,7 @@ #include <linux/personality.h> #include <linux/security.h> #include <linux/compat.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -285,7 +286,7 @@ long sys_ptrace(long request, pid_t pid, long addr, long data) ret = -EIO; DBG("sys_ptrace(%s)\n", request == PTRACE_SYSCALL ? "SYSCALL" : "CONT"); - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) goto out_tsk; child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP); if (request == PTRACE_SYSCALL) { @@ -311,7 +312,7 @@ long sys_ptrace(long request, pid_t pid, long addr, long data) case PTRACE_SINGLEBLOCK: DBG("sys_ptrace(SINGLEBLOCK)\n"); ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) goto out_tsk; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->ptrace &= ~PT_SINGLESTEP; @@ -328,7 +329,7 @@ long sys_ptrace(long request, pid_t pid, long addr, long data) case PTRACE_SINGLESTEP: DBG("sys_ptrace(SINGLESTEP)\n"); ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) goto out_tsk; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 7958cd8c8bf8..d15a1d53e101 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -161,17 +161,6 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, } } -long sys_shmat_wrapper(int shmid, char __user *shmaddr, int shmflag) -{ - unsigned long raddr; - int r; - - r = do_shmat(shmid, shmaddr, shmflag, &raddr); - if (r < 0) - return r; - return raddr; -} - /* Fucking broken ABI */ #ifdef CONFIG_64BIT diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 779b537100ec..dcfa4d3d0e7d 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -297,7 +297,7 @@ ENTRY_DIFF(msgrcv) ENTRY_SAME(msgget) /* 190 */ ENTRY_SAME(msgctl) - ENTRY_SAME(shmat_wrapper) + ENTRY_SAME(shmat) ENTRY_SAME(shmdt) ENTRY_SAME(shmget) ENTRY_SAME(shmctl) /* 195 */ diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 74aa1e92a395..600f23d7fd33 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -43,6 +43,10 @@ config GENERIC_NVRAM bool default y +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool + default y + source "init/Kconfig" menu "Processor" @@ -53,6 +57,7 @@ choice config 6xx bool "6xx/7xx/74xx/52xx/82xx/83xx" + select PPC_FPU help There are four types of PowerPC chips supported. The more common types (601, 603, 604, 740, 750, 7400), the Motorola embedded @@ -72,9 +77,11 @@ config 44x bool "44x" config POWER3 + select PPC_FPU bool "POWER3" config POWER4 + select PPC_FPU bool "POWER4 and 970 (G5)" config 8xx @@ -86,6 +93,9 @@ config E500 endchoice +config PPC_FPU + bool + config BOOKE bool depends on E500 @@ -1075,6 +1085,10 @@ source kernel/power/Kconfig endmenu +config ISA_DMA_API + bool + default y + menu "Bus options" config ISA diff --git a/arch/ppc/Makefile b/arch/ppc/Makefile index 73cbdda5b597..0432a25b4735 100644 --- a/arch/ppc/Makefile +++ b/arch/ppc/Makefile @@ -53,6 +53,7 @@ head-$(CONFIG_FSL_BOOKE) := arch/ppc/kernel/head_fsl_booke.o head-$(CONFIG_6xx) += arch/ppc/kernel/idle_6xx.o head-$(CONFIG_POWER4) += arch/ppc/kernel/idle_power4.o +head-$(CONFIG_PPC_FPU) += arch/ppc/kernel/fpu.o core-y += arch/ppc/kernel/ arch/ppc/platforms/ \ arch/ppc/mm/ arch/ppc/lib/ arch/ppc/syslib/ diff --git a/arch/ppc/boot/images/Makefile b/arch/ppc/boot/images/Makefile index 774de8e23871..f850fb0fb511 100644 --- a/arch/ppc/boot/images/Makefile +++ b/arch/ppc/boot/images/Makefile @@ -20,8 +20,9 @@ quiet_cmd_uimage = UIMAGE $@ targets += uImage $(obj)/uImage: $(obj)/vmlinux.gz + $(Q)rm -f $@ $(call if_changed,uimage) - @echo ' Image $@ is ready' + @echo ' Image: $@' $(if $(wildcard $@),'is ready','not made') # Files generated that shall be removed upon make clean clean-files := sImage vmapus vmlinux* miboot* zImage* uImage diff --git a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile index 86bc878cb3ee..b284451802c9 100644 --- a/arch/ppc/kernel/Makefile +++ b/arch/ppc/kernel/Makefile @@ -9,6 +9,7 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-$(CONFIG_6xx) += idle_6xx.o extra-$(CONFIG_POWER4) += idle_power4.o +extra-$(CONFIG_PPC_FPU) += fpu.o extra-y += vmlinux.lds obj-y := entry.o traps.o irq.o idle.o time.o misc.o \ diff --git a/arch/ppc/kernel/align.c b/arch/ppc/kernel/align.c index 79c929475037..ff81da9598d8 100644 --- a/arch/ppc/kernel/align.c +++ b/arch/ppc/kernel/align.c @@ -290,6 +290,10 @@ fix_alignment(struct pt_regs *regs) /* lwm, stmw */ nb = (32 - reg) * 4; } + + if (!access_ok((flags & ST? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0)) + return -EFAULT; /* bad address */ + rptr = (unsigned char *) ®s->gpr[reg]; if (flags & LD) { for (i = 0; i < nb; ++i) @@ -368,16 +372,24 @@ fix_alignment(struct pt_regs *regs) /* Single-precision FP load and store require conversions... */ case LD+F+S: +#ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); cvt_fd(&data.f, &data.d, ¤t->thread.fpscr); preempt_enable(); +#else + return 0; +#endif break; case ST+F+S: +#ifdef CONFIG_PPC_FPU preempt_disable(); enable_kernel_fp(); cvt_df(&data.d, &data.f, ¤t->thread.fpscr); preempt_enable(); +#else + return 0; +#endif break; } diff --git a/arch/ppc/kernel/cpu_setup_6xx.S b/arch/ppc/kernel/cpu_setup_6xx.S index 74f781b486a3..468721d9ebd2 100644 --- a/arch/ppc/kernel/cpu_setup_6xx.S +++ b/arch/ppc/kernel/cpu_setup_6xx.S @@ -30,12 +30,14 @@ _GLOBAL(__setup_cpu_604) blr _GLOBAL(__setup_cpu_750) mflr r4 + bl __init_fpu_registers bl setup_common_caches bl setup_750_7400_hid0 mtlr r4 blr _GLOBAL(__setup_cpu_750cx) mflr r4 + bl __init_fpu_registers bl setup_common_caches bl setup_750_7400_hid0 bl setup_750cx @@ -43,6 +45,7 @@ _GLOBAL(__setup_cpu_750cx) blr _GLOBAL(__setup_cpu_750fx) mflr r4 + bl __init_fpu_registers bl setup_common_caches bl setup_750_7400_hid0 bl setup_750fx @@ -50,6 +53,7 @@ _GLOBAL(__setup_cpu_750fx) blr _GLOBAL(__setup_cpu_7400) mflr r4 + bl __init_fpu_registers bl setup_7400_workarounds bl setup_common_caches bl setup_750_7400_hid0 @@ -57,6 +61,7 @@ _GLOBAL(__setup_cpu_7400) blr _GLOBAL(__setup_cpu_7410) mflr r4 + bl __init_fpu_registers bl setup_7410_workarounds bl setup_common_caches bl setup_750_7400_hid0 @@ -80,7 +85,7 @@ setup_common_caches: bne 1f /* don't invalidate the D-cache */ ori r8,r8,HID0_DCI /* unless it wasn't enabled */ 1: sync - mtspr SPRN_HID0,r8 /* enable and invalidate caches */ + mtspr SPRN_HID0,r8 /* enable and invalidate caches */ sync mtspr SPRN_HID0,r11 /* enable caches */ sync @@ -152,9 +157,13 @@ setup_7410_workarounds: setup_750_7400_hid0: mfspr r11,SPRN_HID0 ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC + oris r11,r11,HID0_DPM@h BEGIN_FTR_SECTION - oris r11,r11,HID0_DPM@h /* enable dynamic power mgmt */ -END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) + xori r11,r11,HID0_BTIC +END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC) +BEGIN_FTR_SECTION + xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */ +END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) li r3,HID0_SPD andc r11,r11,r3 /* clear SPD: enable speculative */ li r3,0 @@ -218,13 +227,15 @@ setup_745x_specifics: /* All of the bits we have to set..... */ - ori r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE | HID0_LRSTK | HID0_BTIC + ori r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE + ori r11,r11,HID0_LRSTK | HID0_BTIC + oris r11,r11,HID0_DPM@h BEGIN_FTR_SECTION xori r11,r11,HID0_BTIC END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC) BEGIN_FTR_SECTION - oris r11,r11,HID0_DPM@h /* enable dynamic power mgmt */ -END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) + xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */ +END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) /* All of the bits we have to clear.... */ @@ -248,6 +259,25 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) isync blr +/* + * Initialize the FPU registers. This is needed to work around an errata + * in some 750 cpus where using a not yet initialized FPU register after + * power on reset may hang the CPU + */ +_GLOBAL(__init_fpu_registers) + mfmsr r10 + ori r11,r10,MSR_FP + mtmsr r11 + isync + addis r9,r3,empty_zero_page@ha + addi r9,r9,empty_zero_page@l + REST_32FPRS(0,r9) + sync + mtmsr r10 + isync + blr + + /* Definitions for the table use to save CPU states */ #define CS_HID0 0 #define CS_HID1 4 diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S index 035217d6c0f1..5f075dbc4ee7 100644 --- a/arch/ppc/kernel/entry.S +++ b/arch/ppc/kernel/entry.S @@ -563,6 +563,65 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) addi r1,r1,INT_FRAME_SIZE blr + .globl fast_exception_return +fast_exception_return: +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + andi. r10,r9,MSR_RI /* check for recoverable interrupt */ + beq 1f /* if not, we've got problems */ +#endif + +2: REST_4GPRS(3, r11) + lwz r10,_CCR(r11) + REST_GPR(1, r11) + mtcr r10 + lwz r10,_LINK(r11) + mtlr r10 + REST_GPR(10, r11) + mtspr SPRN_SRR1,r9 + mtspr SPRN_SRR0,r12 + REST_GPR(9, r11) + REST_GPR(12, r11) + lwz r11,GPR11(r11) + SYNC + RFI + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +/* check if the exception happened in a restartable section */ +1: lis r3,exc_exit_restart_end@ha + addi r3,r3,exc_exit_restart_end@l + cmplw r12,r3 + bge 3f + lis r4,exc_exit_restart@ha + addi r4,r4,exc_exit_restart@l + cmplw r12,r4 + blt 3f + lis r3,fee_restarts@ha + tophys(r3,r3) + lwz r5,fee_restarts@l(r3) + addi r5,r5,1 + stw r5,fee_restarts@l(r3) + mr r12,r4 /* restart at exc_exit_restart */ + b 2b + + .comm fee_restarts,4 + +/* aargh, a nonrecoverable interrupt, panic */ +/* aargh, we don't know which trap this is */ +/* but the 601 doesn't implement the RI bit, so assume it's OK */ +3: +BEGIN_FTR_SECTION + b 2b +END_FTR_SECTION_IFSET(CPU_FTR_601) + li r10,-1 + stw r10,TRAP(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + lis r10,MSR_KERNEL@h + ori r10,r10,MSR_KERNEL@l + bl transfer_to_handler_full + .long nonrecoverable_exception + .long ret_from_except +#endif + .globl sigreturn_exit sigreturn_exit: subi r1,r3,STACK_FRAME_OVERHEAD diff --git a/arch/ppc/kernel/fpu.S b/arch/ppc/kernel/fpu.S new file mode 100644 index 000000000000..6189b26f640f --- /dev/null +++ b/arch/ppc/kernel/fpu.S @@ -0,0 +1,133 @@ +/* + * FPU support code, moved here from head.S so that it can be used + * by chips which use other head-whatever.S files. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/offsets.h> + +/* + * This task wants to use the FPU now. + * On UP, disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Load up this task's FP registers from its thread_struct, + * enable the FPU for the current task and return to the task. + */ + .globl load_up_fpu +load_up_fpu: + mfmsr r5 + ori r5,r5,MSR_FP +#ifdef CONFIG_PPC64BRIDGE + clrldi r5,r5,1 /* turn off 64-bit mode */ +#endif /* CONFIG_PPC64BRIDGE */ + SYNC + MTMSRD(r5) /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + */ +#ifndef CONFIG_SMP + tophys(r6,0) /* get __pa constant */ + addis r3,r6,last_task_used_math@ha + lwz r4,last_task_used_math@l(r3) + cmpwi 0,r4,0 + beq 1f + add r4,r4,r6 + addi r4,r4,THREAD /* want last_task_used_math->thread */ + SAVE_32FPRS(0, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR-4(r4) + lwz r5,PT_REGS(r4) + add r5,r5,r6 + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r10,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r10 /* disable FP for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ + mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + lwz r4,THREAD_FPEXC_MODE(r5) + ori r9,r9,MSR_FP /* enable FP for current */ + or r9,r9,r4 + lfd fr0,THREAD_FPSCR-4(r5) + mtfsf 0xff,fr0 + REST_32FPRS(0, r5) +#ifndef CONFIG_SMP + subi r4,r5,THREAD + sub r4,r4,r6 + stw r4,last_task_used_math@l(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + /* we haven't used ctr or xer or lr */ + b fast_exception_return + +/* + * FP unavailable trap from kernel - print a message, but let + * the task use FP in the kernel until it returns to user mode. + */ + .globl KernelFP +KernelFP: + lwz r3,_MSR(r1) + ori r3,r3,MSR_FP + stw r3,_MSR(r1) /* enable use of FP after return */ + lis r3,86f@h + ori r3,r3,86f@l + mr r4,r2 /* current */ + lwz r5,_NIP(r1) + bl printk + b ret_from_except +86: .string "floating point used in kernel (task=%p, pc=%x)\n" + .align 4,0 + +/* + * giveup_fpu(tsk) + * Disable FP for the task given as the argument, + * and save the floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + */ + .globl giveup_fpu +giveup_fpu: + mfmsr r5 + ori r5,r5,MSR_FP + SYNC_601 + ISYNC_601 + MTMSRD(r5) /* enable use of fpu now */ + SYNC_601 + isync + cmpwi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + lwz r5,PT_REGS(r3) + cmpwi 0,r5,0 + SAVE_32FPRS(0, r3) + mffs fr0 + stfd fr0,THREAD_FPSCR-4(r3) + beq 1f + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r3,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r3 /* disable FP for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + lis r4,last_task_used_math@ha + stw r5,last_task_used_math@l(r4) +#endif /* CONFIG_SMP */ + blr diff --git a/arch/ppc/kernel/head.S b/arch/ppc/kernel/head.S index 1a89a71e0acc..a931d773715f 100644 --- a/arch/ppc/kernel/head.S +++ b/arch/ppc/kernel/head.S @@ -775,133 +775,6 @@ InstructionSegment: EXC_XFER_STD(0x480, UnknownException) #endif /* CONFIG_PPC64BRIDGE */ -/* - * This task wants to use the FPU now. - * On UP, disable FP for the task which had the FPU previously, - * and save its floating-point registers in its thread_struct. - * Load up this task's FP registers from its thread_struct, - * enable the FPU for the current task and return to the task. - */ -load_up_fpu: - mfmsr r5 - ori r5,r5,MSR_FP -#ifdef CONFIG_PPC64BRIDGE - clrldi r5,r5,1 /* turn off 64-bit mode */ -#endif /* CONFIG_PPC64BRIDGE */ - SYNC - MTMSRD(r5) /* enable use of fpu now */ - isync -/* - * For SMP, we don't do lazy FPU switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_fpu in switch_to. - */ -#ifndef CONFIG_SMP - tophys(r6,0) /* get __pa constant */ - addis r3,r6,last_task_used_math@ha - lwz r4,last_task_used_math@l(r3) - cmpwi 0,r4,0 - beq 1f - add r4,r4,r6 - addi r4,r4,THREAD /* want last_task_used_math->thread */ - SAVE_32FPRS(0, r4) - mffs fr0 - stfd fr0,THREAD_FPSCR-4(r4) - lwz r5,PT_REGS(r4) - add r5,r5,r6 - lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r10,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r10 /* disable FP for previous task */ - stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* enable use of FP after return */ - mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ - lwz r4,THREAD_FPEXC_MODE(r5) - ori r9,r9,MSR_FP /* enable FP for current */ - or r9,r9,r4 - lfd fr0,THREAD_FPSCR-4(r5) - mtfsf 0xff,fr0 - REST_32FPRS(0, r5) -#ifndef CONFIG_SMP - subi r4,r5,THREAD - sub r4,r4,r6 - stw r4,last_task_used_math@l(r3) -#endif /* CONFIG_SMP */ - /* restore registers and return */ - /* we haven't used ctr or xer or lr */ - /* fall through to fast_exception_return */ - - .globl fast_exception_return -fast_exception_return: - andi. r10,r9,MSR_RI /* check for recoverable interrupt */ - beq 1f /* if not, we've got problems */ -2: REST_4GPRS(3, r11) - lwz r10,_CCR(r11) - REST_GPR(1, r11) - mtcr r10 - lwz r10,_LINK(r11) - mtlr r10 - REST_GPR(10, r11) - mtspr SPRN_SRR1,r9 - mtspr SPRN_SRR0,r12 - REST_GPR(9, r11) - REST_GPR(12, r11) - lwz r11,GPR11(r11) - SYNC - RFI - -/* check if the exception happened in a restartable section */ -1: lis r3,exc_exit_restart_end@ha - addi r3,r3,exc_exit_restart_end@l - cmplw r12,r3 - bge 3f - lis r4,exc_exit_restart@ha - addi r4,r4,exc_exit_restart@l - cmplw r12,r4 - blt 3f - lis r3,fee_restarts@ha - tophys(r3,r3) - lwz r5,fee_restarts@l(r3) - addi r5,r5,1 - stw r5,fee_restarts@l(r3) - mr r12,r4 /* restart at exc_exit_restart */ - b 2b - - .comm fee_restarts,4 - -/* aargh, a nonrecoverable interrupt, panic */ -/* aargh, we don't know which trap this is */ -/* but the 601 doesn't implement the RI bit, so assume it's OK */ -3: -BEGIN_FTR_SECTION - b 2b -END_FTR_SECTION_IFSET(CPU_FTR_601) - li r10,-1 - stw r10,TRAP(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - li r10,MSR_KERNEL - bl transfer_to_handler_full - .long nonrecoverable_exception - .long ret_from_except - -/* - * FP unavailable trap from kernel - print a message, but let - * the task use FP in the kernel until it returns to user mode. - */ -KernelFP: - lwz r3,_MSR(r1) - ori r3,r3,MSR_FP - stw r3,_MSR(r1) /* enable use of FP after return */ - lis r3,86f@h - ori r3,r3,86f@l - mr r4,r2 /* current */ - lwz r5,_NIP(r1) - bl printk - b ret_from_except -86: .string "floating point used in kernel (task=%p, pc=%x)\n" - .align 4,0 - #ifdef CONFIG_ALTIVEC /* Note that the AltiVec support is closely modeled after the FP * support. Changes to one are likely to be applicable to the @@ -1016,42 +889,6 @@ giveup_altivec: #endif /* CONFIG_ALTIVEC */ /* - * giveup_fpu(tsk) - * Disable FP for the task given as the argument, - * and save the floating-point registers in its thread_struct. - * Enables the FPU for use in the kernel on return. - */ - .globl giveup_fpu -giveup_fpu: - mfmsr r5 - ori r5,r5,MSR_FP - SYNC_601 - ISYNC_601 - MTMSRD(r5) /* enable use of fpu now */ - SYNC_601 - isync - cmpwi 0,r3,0 - beqlr- /* if no previous owner, done */ - addi r3,r3,THREAD /* want THREAD of task */ - lwz r5,PT_REGS(r3) - cmpwi 0,r5,0 - SAVE_32FPRS(0, r3) - mffs fr0 - stfd fr0,THREAD_FPSCR-4(r3) - beq 1f - lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r3,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r3 /* disable FP for previous task */ - stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#ifndef CONFIG_SMP - li r5,0 - lis r4,last_task_used_math@ha - stw r5,last_task_used_math@l(r4) -#endif /* CONFIG_SMP */ - blr - -/* * This code is jumped to from the startup code to copy * the kernel image to physical address 0. */ diff --git a/arch/ppc/kernel/head_44x.S b/arch/ppc/kernel/head_44x.S index 9ed8165a3d6c..9b6a8e513657 100644 --- a/arch/ppc/kernel/head_44x.S +++ b/arch/ppc/kernel/head_44x.S @@ -426,7 +426,11 @@ interrupt_base: PROGRAM_EXCEPTION /* Floating Point Unavailable Interrupt */ +#ifdef CONFIG_PPC_FPU + FP_UNAVAILABLE_EXCEPTION +#else EXCEPTION(0x2010, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) +#endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) @@ -686,8 +690,10 @@ _GLOBAL(giveup_altivec) * * The 44x core does not have an FPU. */ +#ifndef CONFIG_PPC_FPU _GLOBAL(giveup_fpu) blr +#endif /* * extern void abort(void) diff --git a/arch/ppc/kernel/head_booke.h b/arch/ppc/kernel/head_booke.h index 884dac916bce..f213d12eec08 100644 --- a/arch/ppc/kernel/head_booke.h +++ b/arch/ppc/kernel/head_booke.h @@ -337,4 +337,11 @@ label: addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_LITE(0x0900, timer_interrupt) +#define FP_UNAVAILABLE_EXCEPTION \ + START_EXCEPTION(FloatingPointUnavailable) \ + NORMAL_EXCEPTION_PROLOG; \ + bne load_up_fpu; /* if from user, just load it up */ \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_EE_LITE(0x800, KernelFP) + #endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/ppc/kernel/head_fsl_booke.S b/arch/ppc/kernel/head_fsl_booke.S index d64bf61d2b1f..f22ddce36135 100644 --- a/arch/ppc/kernel/head_fsl_booke.S +++ b/arch/ppc/kernel/head_fsl_booke.S @@ -504,7 +504,11 @@ interrupt_base: PROGRAM_EXCEPTION /* Floating Point Unavailable Interrupt */ +#ifdef CONFIG_PPC_FPU + FP_UNAVAILABLE_EXCEPTION +#else EXCEPTION(0x0800, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) +#endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) @@ -916,10 +920,12 @@ _GLOBAL(giveup_spe) /* * extern void giveup_fpu(struct task_struct *prev) * - * The e500 core does not have an FPU. + * Not all FSL Book-E cores have an FPU */ +#ifndef CONFIG_PPC_FPU _GLOBAL(giveup_fpu) blr +#endif /* * extern void abort(void) diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index 73f7c23b0dd4..e4f1615ec13f 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -1096,17 +1096,7 @@ _GLOBAL(_get_SP) * and exceptions as if the cpu had performed the load or store. */ -#if defined(CONFIG_4xx) || defined(CONFIG_E500) -_GLOBAL(cvt_fd) - lfs 0,0(r3) - stfd 0,0(r4) - blr - -_GLOBAL(cvt_df) - lfd 0,0(r3) - stfs 0,0(r4) - blr -#else +#ifdef CONFIG_PPC_FPU _GLOBAL(cvt_fd) lfd 0,-4(r5) /* load up fpscr value */ mtfsf 0xff,0 diff --git a/arch/ppc/kernel/pci.c b/arch/ppc/kernel/pci.c index 98f94b60204c..47a15306823a 100644 --- a/arch/ppc/kernel/pci.c +++ b/arch/ppc/kernel/pci.c @@ -1432,7 +1432,7 @@ pci_bus_to_hose(int bus) return NULL; } -void* +void __iomem * pci_bus_io_base(unsigned int bus) { struct pci_controller *hose; diff --git a/arch/ppc/kernel/ptrace.c b/arch/ppc/kernel/ptrace.c index 426b6f7d9de3..59d59a8dc249 100644 --- a/arch/ppc/kernel/ptrace.c +++ b/arch/ppc/kernel/ptrace.c @@ -26,6 +26,7 @@ #include <linux/ptrace.h> #include <linux/user.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -356,7 +357,7 @@ int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -389,7 +390,7 @@ int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_single_step(child); diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c index e97ce635b99e..5dfb42f1a152 100644 --- a/arch/ppc/kernel/setup.c +++ b/arch/ppc/kernel/setup.c @@ -221,27 +221,26 @@ int show_cpuinfo(struct seq_file *m, void *v) return err; } - switch (PVR_VER(pvr)) { - case 0x0020: /* 403 family */ - maj = PVR_MAJ(pvr) + 1; - min = PVR_MIN(pvr); - break; - case 0x1008: /* 740P/750P ?? */ - maj = ((pvr >> 8) & 0xFF) - 1; - min = pvr & 0xFF; - break; - case 0x8083: /* e300 */ - maj = PVR_MAJ(pvr); - min = PVR_MIN(pvr); - break; - case 0x8020: /* e500 */ + /* If we are a Freescale core do a simple check so + * we dont have to keep adding cases in the future */ + if ((PVR_VER(pvr) & 0x8000) == 0x8000) { maj = PVR_MAJ(pvr); min = PVR_MIN(pvr); - break; - default: - maj = (pvr >> 8) & 0xFF; - min = pvr & 0xFF; - break; + } else { + switch (PVR_VER(pvr)) { + case 0x0020: /* 403 family */ + maj = PVR_MAJ(pvr) + 1; + min = PVR_MIN(pvr); + break; + case 0x1008: /* 740P/750P ?? */ + maj = ((pvr >> 8) & 0xFF) - 1; + min = pvr & 0xFF; + break; + default: + maj = (pvr >> 8) & 0xFF; + min = pvr & 0xFF; + break; + } } seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n", diff --git a/arch/ppc/kernel/signal.c b/arch/ppc/kernel/signal.c index 645eae19805c..7c8437da09d5 100644 --- a/arch/ppc/kernel/signal.c +++ b/arch/ppc/kernel/signal.c @@ -511,7 +511,7 @@ int sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, } int sys_debug_setcontext(struct ucontext __user *ctx, - int ndbg, struct sig_dbg_op *dbg, + int ndbg, struct sig_dbg_op __user *dbg, int r6, int r7, int r8, struct pt_regs *regs) { @@ -632,7 +632,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, if (__put_user((unsigned long) ka->sa.sa_handler, &sc->handler) || __put_user(oldset->sig[0], &sc->oldmask) || __put_user(oldset->sig[1], &sc->_unused[3]) - || __put_user((struct pt_regs *)frame, &sc->regs) + || __put_user((struct pt_regs __user *)frame, &sc->regs) || __put_user(sig, &sc->signal)) goto badframe; diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c index 002322a1f3ce..f8e7e324a173 100644 --- a/arch/ppc/kernel/traps.c +++ b/arch/ppc/kernel/traps.c @@ -176,7 +176,7 @@ static inline int check_io_access(struct pt_regs *regs) #else #define get_mc_reason(regs) (mfspr(SPRN_MCSR)) #endif -#define REASON_FP 0 +#define REASON_FP ESR_FP #define REASON_ILLEGAL ESR_PIL #define REASON_PRIVILEGED ESR_PPR #define REASON_TRAP ESR_PTR @@ -403,7 +403,7 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword) u8 rA = (instword >> 16) & 0x1f; u8 NB_RB = (instword >> 11) & 0x1f; u32 num_bytes; - u32 EA; + unsigned long EA; int pos = 0; /* Early out if we are an invalid form of lswx */ diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S index 0c0e714b84de..9353584fb710 100644 --- a/arch/ppc/kernel/vmlinux.lds.S +++ b/arch/ppc/kernel/vmlinux.lds.S @@ -145,6 +145,7 @@ SECTIONS __init_end = .; . = ALIGN(4096); + _sextratext = .; __pmac_begin = .; .pmac.text : { *(.pmac.text) } .pmac.data : { *(.pmac.data) } @@ -171,6 +172,7 @@ SECTIONS .openfirmware.data : { *(.openfirmware.data) } . = ALIGN(4096); __openfirmware_end = .; + _eextratext = .; __bss_start = .; .bss : diff --git a/arch/ppc/platforms/4xx/ebony.c b/arch/ppc/platforms/4xx/ebony.c index f63bca83e757..cd11734ef7c5 100644 --- a/arch/ppc/platforms/4xx/ebony.c +++ b/arch/ppc/platforms/4xx/ebony.c @@ -149,7 +149,7 @@ ebony_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) static void __init ebony_setup_pcix(void) { - void *pcix_reg_base; + void __iomem *pcix_reg_base; pcix_reg_base = ioremap64(PCIX0_REG_BASE, PCIX_REG_SIZE); @@ -210,9 +210,8 @@ ebony_setup_hose(void) hose->io_space.end = EBONY_PCI_UPPER_IO; hose->mem_space.start = EBONY_PCI_LOWER_MEM; hose->mem_space.end = EBONY_PCI_UPPER_MEM; - isa_io_base = - (unsigned long)ioremap64(EBONY_PCI_IO_BASE, EBONY_PCI_IO_SIZE); - hose->io_base_virt = (void *)isa_io_base; + hose->io_base_virt = ioremap64(EBONY_PCI_IO_BASE, EBONY_PCI_IO_SIZE); + isa_io_base = (unsigned long)hose->io_base_virt; setup_indirect_pci(hose, EBONY_PCI_CFGA_PLB32, diff --git a/arch/ppc/platforms/4xx/luan.c b/arch/ppc/platforms/4xx/luan.c index 1df2339f1f6c..95359f748e7b 100644 --- a/arch/ppc/platforms/4xx/luan.c +++ b/arch/ppc/platforms/4xx/luan.c @@ -223,9 +223,8 @@ luan_setup_hose(struct pci_controller *hose, hose->io_space.end = LUAN_PCIX_UPPER_IO; hose->mem_space.start = lower_mem; hose->mem_space.end = upper_mem; - isa_io_base = - (unsigned long)ioremap64(pcix_io_base, PCIX_IO_SIZE); - hose->io_base_virt = (void *)isa_io_base; + hose->io_base_virt = ioremap64(pcix_io_base, PCIX_IO_SIZE); + isa_io_base = (unsigned long) hose->io_base_virt; setup_indirect_pci(hose, cfga, cfgd); hose->set_cfg_type = 1; diff --git a/arch/ppc/platforms/4xx/ocotea.c b/arch/ppc/platforms/4xx/ocotea.c index 28de707434f1..5f82a6bc7046 100644 --- a/arch/ppc/platforms/4xx/ocotea.c +++ b/arch/ppc/platforms/4xx/ocotea.c @@ -227,9 +227,8 @@ ocotea_setup_hose(void) hose->io_space.end = OCOTEA_PCI_UPPER_IO; hose->mem_space.start = OCOTEA_PCI_LOWER_MEM; hose->mem_space.end = OCOTEA_PCI_UPPER_MEM; - isa_io_base = - (unsigned long)ioremap64(OCOTEA_PCI_IO_BASE, OCOTEA_PCI_IO_SIZE); - hose->io_base_virt = (void *)isa_io_base; + hose->io_base_virt = ioremap64(OCOTEA_PCI_IO_BASE, OCOTEA_PCI_IO_SIZE); + isa_io_base = (unsigned long) hose->io_base_virt; setup_indirect_pci(hose, OCOTEA_PCI_CFGA_PLB32, diff --git a/arch/ppc/platforms/chrp_pci.c b/arch/ppc/platforms/chrp_pci.c index 5bb6492ecf8c..7d0ee308f662 100644 --- a/arch/ppc/platforms/chrp_pci.c +++ b/arch/ppc/platforms/chrp_pci.c @@ -129,7 +129,7 @@ static struct pci_ops rtas_pci_ops = rtas_write_config }; -volatile struct Hydra *Hydra = NULL; +volatile struct Hydra __iomem *Hydra = NULL; int __init hydra_init(void) @@ -175,13 +175,14 @@ chrp_pcibios_fixup(void) static void __init setup_python(struct pci_controller *hose, struct device_node *dev) { - u32 *reg, val; + u32 __iomem *reg; + u32 val; unsigned long addr = dev->addrs[0].address; setup_indirect_pci(hose, addr + 0xf8000, addr + 0xf8010); /* Clear the magic go-slow bit */ - reg = (u32 *) ioremap(dev->addrs[0].address + 0xf6000, 0x40); + reg = ioremap(dev->addrs[0].address + 0xf6000, 0x40); val = in_be32(®[12]); if (val & PRG_CL_RESET_VALID) { out_be32(®[12], val & ~PRG_CL_RESET_VALID); diff --git a/arch/ppc/platforms/chrp_setup.c b/arch/ppc/platforms/chrp_setup.c index f23c4f320760..57f29ab29bda 100644 --- a/arch/ppc/platforms/chrp_setup.c +++ b/arch/ppc/platforms/chrp_setup.c @@ -356,7 +356,7 @@ static void __init chrp_find_openpic(void) struct device_node *np; int len, i; unsigned int *iranges; - void *isu; + void __iomem *isu; np = find_type_devices("open-pic"); if (np == NULL || np->n_addrs == 0) diff --git a/arch/ppc/platforms/pmac_cache.S b/arch/ppc/platforms/pmac_cache.S index da34a9bc9299..fb977de6b704 100644 --- a/arch/ppc/platforms/pmac_cache.S +++ b/arch/ppc/platforms/pmac_cache.S @@ -64,27 +64,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtspr SPRN_HID0,r4 /* Disable DPM */ sync - /* disp-flush L1 */ - li r4,0x4000 - mtctr r4 + /* Disp-flush L1. We have a weird problem here that I never + * totally figured out. On 750FX, using the ROM for the flush + * results in a non-working flush. We use that workaround for + * now until I finally understand what's going on. --BenH + */ + + /* ROM base by default */ lis r4,0xfff0 -1: lwzx r0,r0,r4 + mfpvr r3 + srwi r3,r3,16 + cmplwi cr0,r3,0x7000 + bne+ 1f + /* RAM base on 750FX */ + li r4,0 +1: li r4,0x4000 + mtctr r4 +1: lwz r0,0(r4) addi r4,r4,32 bdnz 1b sync isync - /* disable / invalidate / enable L1 data */ + /* Disable / invalidate / enable L1 data */ mfspr r3,SPRN_HID0 - rlwinm r0,r0,0,~HID0_DCE + rlwinm r3,r3,0,~(HID0_DCE | HID0_ICE) mtspr SPRN_HID0,r3 sync isync - ori r3,r3,HID0_DCE|HID0_DCI + ori r3,r3,(HID0_DCE|HID0_DCI|HID0_ICE|HID0_ICFI) sync isync mtspr SPRN_HID0,r3 - xori r3,r3,HID0_DCI + xori r3,r3,(HID0_DCI|HID0_ICFI) mtspr SPRN_HID0,r3 sync @@ -110,11 +122,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lis r4,2 mtctr r4 lis r4,0xfff0 -1: lwzx r0,r0,r4 +1: lwz r0,0(r4) + addi r4,r4,32 + bdnz 1b + sync + isync + lis r4,2 + mtctr r4 + lis r4,0xfff0 +1: dcbf 0,r4 addi r4,r4,32 bdnz 1b sync isync + /* now disable L2 */ rlwinm r5,r5,0,~L2CR_L2E b 2f @@ -135,6 +156,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtspr SPRN_L2CR,r4 sync isync + + /* Wait for the invalidation to complete */ +1: mfspr r3,SPRN_L2CR + rlwinm. r0,r3,0,31,31 + bne 1b + + /* Clear L2I */ xoris r4,r4,L2CR_L2I@h sync mtspr SPRN_L2CR,r4 @@ -142,14 +170,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* now disable the L1 data cache */ mfspr r0,SPRN_HID0 - rlwinm r0,r0,0,~HID0_DCE + rlwinm r0,r0,0,~(HID0_DCE|HID0_ICE) mtspr SPRN_HID0,r0 sync isync /* Restore HID0[DPM] to whatever it was before */ sync - mtspr SPRN_HID0,r8 + mfspr r0,SPRN_HID0 + rlwimi r0,r8,0,11,11 /* Turn back HID0[DPM] */ + mtspr SPRN_HID0,r0 sync /* restore DR and EE */ @@ -201,7 +231,7 @@ flush_disable_745x: mtctr r4 li r4,0 1: - lwzx r0,r0,r4 + lwz r0,0(r4) addi r4,r4,32 /* Go to start of next cache line */ bdnz 1b isync diff --git a/arch/ppc/platforms/pmac_feature.c b/arch/ppc/platforms/pmac_feature.c index 46cbf36722db..867336ad5d36 100644 --- a/arch/ppc/platforms/pmac_feature.c +++ b/arch/ppc/platforms/pmac_feature.c @@ -1590,6 +1590,114 @@ intrepid_shutdown(struct macio_chip* macio, int sleep_mode) mdelay(10); } + +void __pmac pmac_tweak_clock_spreading(int enable) +{ + struct macio_chip* macio = &macio_chips[0]; + + /* Hack for doing clock spreading on some machines PowerBooks and + * iBooks. This implements the "platform-do-clockspreading" OF + * property as decoded manually on various models. For safety, we also + * check the product ID in the device-tree in cases we'll whack the i2c + * chip to make reasonably sure we won't set wrong values in there + * + * Of course, ultimately, we have to implement a real parser for + * the platform-do-* stuff... + */ + + if (macio->type == macio_intrepid) { + if (enable) + UN_OUT(UNI_N_CLOCK_SPREADING, 2); + else + UN_OUT(UNI_N_CLOCK_SPREADING, 0); + mdelay(40); + } + + while (machine_is_compatible("PowerBook5,2") || + machine_is_compatible("PowerBook5,3") || + machine_is_compatible("PowerBook6,2") || + machine_is_compatible("PowerBook6,3")) { + struct device_node *ui2c = of_find_node_by_type(NULL, "i2c"); + struct device_node *dt = of_find_node_by_name(NULL, "device-tree"); + u8 buffer[9]; + u32 *productID; + int i, rc, changed = 0; + + if (dt == NULL) + break; + productID = (u32 *)get_property(dt, "pid#", NULL); + if (productID == NULL) + break; + while(ui2c) { + struct device_node *p = of_get_parent(ui2c); + if (p && !strcmp(p->name, "uni-n")) + break; + ui2c = of_find_node_by_type(ui2c, "i2c"); + } + if (ui2c == NULL) + break; + DBG("Trying to bump clock speed for PID: %08x...\n", *productID); + rc = pmac_low_i2c_open(ui2c, 1); + if (rc != 0) + break; + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); + rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); + DBG("read result: %d,", rc); + if (rc != 0) { + pmac_low_i2c_close(ui2c); + break; + } + for (i=0; i<9; i++) + DBG(" %02x", buffer[i]); + DBG("\n"); + + switch(*productID) { + case 0x1182: /* AlBook 12" rev 2 */ + case 0x1183: /* iBook G4 12" */ + buffer[0] = (buffer[0] & 0x8f) | 0x70; + buffer[2] = (buffer[2] & 0x7f) | 0x00; + buffer[5] = (buffer[5] & 0x80) | 0x31; + buffer[6] = (buffer[6] & 0x40) | 0xb0; + buffer[7] = (buffer[7] & 0x00) | (enable ? 0xc0 : 0xba); + buffer[8] = (buffer[8] & 0x00) | 0x30; + changed = 1; + break; + case 0x3142: /* AlBook 15" (ATI M10) */ + case 0x3143: /* AlBook 17" (ATI M10) */ + buffer[0] = (buffer[0] & 0xaf) | 0x50; + buffer[2] = (buffer[2] & 0x7f) | 0x00; + buffer[5] = (buffer[5] & 0x80) | 0x31; + buffer[6] = (buffer[6] & 0x40) | 0xb0; + buffer[7] = (buffer[7] & 0x00) | (enable ? 0xd0 : 0xc0); + buffer[8] = (buffer[8] & 0x00) | 0x30; + changed = 1; + break; + default: + DBG("i2c-hwclock: Machine model not handled\n"); + break; + } + if (!changed) { + pmac_low_i2c_close(ui2c); + break; + } + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_stdsub); + rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_write, 0x80, buffer, 9); + DBG("write result: %d,", rc); + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); + rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); + DBG("read result: %d,", rc); + if (rc != 0) { + pmac_low_i2c_close(ui2c); + break; + } + for (i=0; i<9; i++) + DBG(" %02x", buffer[i]); + pmac_low_i2c_close(ui2c); + break; + } +} + + static int __pmac core99_sleep(void) { @@ -1601,12 +1709,6 @@ core99_sleep(void) macio->type != macio_intrepid) return -ENODEV; - /* The device-tree contains that in the hwclock node */ - if (macio->type == macio_intrepid) { - UN_OUT(UNI_N_CLOCK_SPREADING, 0); - mdelay(40); - } - /* We power off the wireless slot in case it was not done * by the driver. We don't power it on automatically however */ @@ -1749,12 +1851,6 @@ core99_wake_up(void) UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl); udelay(100); - /* Restore clock spreading */ - if (macio->type == macio_intrepid) { - UN_OUT(UNI_N_CLOCK_SPREADING, 2); - mdelay(40); - } - return 0; } @@ -2149,7 +2245,7 @@ static struct pmac_mb_def pmac_mb_defs[] __pmacdata = { }, { "PowerBook1,1", "PowerBook 101 (Lombard)", PMAC_TYPE_101_PBOOK, paddington_features, - PMAC_MB_MAY_SLEEP | PMAC_MB_MOBILE + PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE }, { "PowerBook2,1", "iBook (first generation)", PMAC_TYPE_ORIG_IBOOK, core99_features, @@ -2718,97 +2814,11 @@ set_initial_features(void) MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); } - /* Hack for bumping clock speed on the new PowerBooks and the - * iBook G4. This implements the "platform-do-clockspreading" OF - * property. For safety, we also check the product ID in the - * device-tree to make reasonably sure we won't set wrong values - * in the clock chip. - * - * Of course, ultimately, we have to implement a real parser for - * the platform-do-* stuff... + /* Some machine models need the clock chip to be properly setup for + * clock spreading now. This should be a platform function but we + * don't do these at the moment */ - while (machine_is_compatible("PowerBook5,2") || - machine_is_compatible("PowerBook5,3") || - machine_is_compatible("PowerBook6,2") || - machine_is_compatible("PowerBook6,3")) { - struct device_node *ui2c = of_find_node_by_type(NULL, "i2c"); - struct device_node *dt = of_find_node_by_name(NULL, "device-tree"); - u8 buffer[9]; - u32 *productID; - int i, rc, changed = 0; - - if (dt == NULL) - break; - productID = (u32 *)get_property(dt, "pid#", NULL); - if (productID == NULL) - break; - while(ui2c) { - struct device_node *p = of_get_parent(ui2c); - if (p && !strcmp(p->name, "uni-n")) - break; - ui2c = of_find_node_by_type(ui2c, "i2c"); - } - if (ui2c == NULL) - break; - DBG("Trying to bump clock speed for PID: %08x...\n", *productID); - rc = pmac_low_i2c_open(ui2c, 1); - if (rc != 0) - break; - pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); - rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); - DBG("read result: %d,", rc); - if (rc != 0) { - pmac_low_i2c_close(ui2c); - break; - } - for (i=0; i<9; i++) - DBG(" %02x", buffer[i]); - DBG("\n"); - - switch(*productID) { - case 0x1182: /* AlBook 12" rev 2 */ - case 0x1183: /* iBook G4 12" */ - buffer[0] = (buffer[0] & 0x8f) | 0x70; - buffer[2] = (buffer[2] & 0x7f) | 0x00; - buffer[5] = (buffer[5] & 0x80) | 0x31; - buffer[6] = (buffer[6] & 0x40) | 0xb0; - buffer[7] = (buffer[7] & 0x00) | 0xc0; - buffer[8] = (buffer[8] & 0x00) | 0x30; - changed = 1; - break; - case 0x3142: /* AlBook 15" (ATI M10) */ - case 0x3143: /* AlBook 17" (ATI M10) */ - buffer[0] = (buffer[0] & 0xaf) | 0x50; - buffer[2] = (buffer[2] & 0x7f) | 0x00; - buffer[5] = (buffer[5] & 0x80) | 0x31; - buffer[6] = (buffer[6] & 0x40) | 0xb0; - buffer[7] = (buffer[7] & 0x00) | 0xd0; - buffer[8] = (buffer[8] & 0x00) | 0x30; - changed = 1; - break; - default: - DBG("i2c-hwclock: Machine model not handled\n"); - break; - } - if (!changed) { - pmac_low_i2c_close(ui2c); - break; - } - pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_stdsub); - rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_write, 0x80, buffer, 9); - DBG("write result: %d,", rc); - pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); - rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); - DBG("read result: %d,", rc); - if (rc != 0) { - pmac_low_i2c_close(ui2c); - break; - } - for (i=0; i<9; i++) - DBG(" %02x", buffer[i]); - pmac_low_i2c_close(ui2c); - break; - } + pmac_tweak_clock_spreading(1); #endif /* CONFIG_POWER4 */ diff --git a/arch/ppc/platforms/pmac_low_i2c.c b/arch/ppc/platforms/pmac_low_i2c.c index d07579f2b8b9..08583fce1692 100644 --- a/arch/ppc/platforms/pmac_low_i2c.c +++ b/arch/ppc/platforms/pmac_low_i2c.c @@ -54,7 +54,7 @@ struct low_i2c_host int mode; /* Current mode */ int channel; /* Current channel */ int num_channels; /* Number of channels */ - unsigned long base; /* For keywest-i2c, base address */ + void __iomem * base; /* For keywest-i2c, base address */ int bsteps; /* And register stepping */ int speed; /* And speed */ }; @@ -154,14 +154,12 @@ static const char *__kw_state_names[] = { static inline u8 __kw_read_reg(struct low_i2c_host *host, reg_t reg) { - return in_8(((volatile u8 *)host->base) - + (((unsigned)reg) << host->bsteps)); + return in_8(host->base + (((unsigned)reg) << host->bsteps)); } static inline void __kw_write_reg(struct low_i2c_host *host, reg_t reg, u8 val) { - out_8(((volatile u8 *)host->base) - + (((unsigned)reg) << host->bsteps), val); + out_8(host->base + (((unsigned)reg) << host->bsteps), val); (void)__kw_read_reg(host, reg_subaddr); } @@ -370,7 +368,7 @@ static void keywest_low_i2c_add(struct device_node *np) break; } host->mode = pmac_low_i2c_mode_std; - host->base = (unsigned long)ioremap(np->addrs[0].address + aoffset, + host->base = ioremap(np->addrs[0].address + aoffset, np->addrs[0].size); host->func = keywest_low_i2c_func; } diff --git a/arch/ppc/platforms/pmac_sleep.S b/arch/ppc/platforms/pmac_sleep.S index 3139b6766ad3..f459ade1bd63 100644 --- a/arch/ppc/platforms/pmac_sleep.S +++ b/arch/ppc/platforms/pmac_sleep.S @@ -267,6 +267,10 @@ grackle_wake_up: /* Restore various CPU config stuffs */ bl __restore_cpu_setup + /* Make sure all FPRs have been initialized */ + bl reloc_offset + bl __init_fpu_registers + /* Invalidate & enable L1 cache, we don't care about * whatever the ROM may have tried to write to memory */ diff --git a/arch/ppc/platforms/pmac_smp.c b/arch/ppc/platforms/pmac_smp.c index 731841f9a5b8..8e049dab4e63 100644 --- a/arch/ppc/platforms/pmac_smp.c +++ b/arch/ppc/platforms/pmac_smp.c @@ -91,11 +91,11 @@ extern void __secondary_start_psurge3(void); /* Temporary horrible hack */ #define PSURGE_QUAD_BIC(r, v) (PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) & ~(v))) /* virtual addresses for the above */ -static volatile u8 *hhead_base; -static volatile u8 *quad_base; -static volatile u32 *psurge_pri_intr; -static volatile u8 *psurge_sec_intr; -static volatile u32 *psurge_start; +static volatile u8 __iomem *hhead_base; +static volatile u8 __iomem *quad_base; +static volatile u32 __iomem *psurge_pri_intr; +static volatile u8 __iomem *psurge_sec_intr; +static volatile u32 __iomem *psurge_start; /* values for psurge_type */ #define PSURGE_NONE -1 @@ -322,10 +322,10 @@ static int __init smp_psurge_probe(void) /* All released cards using this HW design have 4 CPUs */ ncpus = 4; } else { - iounmap((void *) quad_base); + iounmap(quad_base); if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) { /* not a dual-cpu card */ - iounmap((void *) hhead_base); + iounmap(hhead_base); psurge_type = PSURGE_NONE; return 1; } diff --git a/arch/ppc/platforms/pmac_time.c b/arch/ppc/platforms/pmac_time.c index 09636546f44e..de60ccc7db9f 100644 --- a/arch/ppc/platforms/pmac_time.c +++ b/arch/ppc/platforms/pmac_time.c @@ -165,7 +165,7 @@ int __init via_calibrate_decr(void) { struct device_node *vias; - volatile unsigned char *via; + volatile unsigned char __iomem *via; int count = VIA_TIMER_FREQ_6 / 100; unsigned int dstart, dend; @@ -176,8 +176,7 @@ via_calibrate_decr(void) vias = find_devices("via"); if (vias == 0 || vias->n_addrs == 0) return 0; - via = (volatile unsigned char *) - ioremap(vias->addrs[0].address, vias->addrs[0].size); + via = ioremap(vias->addrs[0].address, vias->addrs[0].size); /* set timer 1 for continuous interrupts */ out_8(&via[ACR], (via[ACR] & ~T1MODE) | T1MODE_CONT); @@ -202,7 +201,7 @@ via_calibrate_decr(void) printk(KERN_INFO "via_calibrate_decr: ticks per jiffy = %u (%u ticks)\n", tb_ticks_per_jiffy, dstart - dend); - iounmap((void*)via); + iounmap(via); return 1; } diff --git a/arch/ppc/platforms/radstone_ppc7d.c b/arch/ppc/platforms/radstone_ppc7d.c index 2a99b43737a8..c30607a972d8 100644 --- a/arch/ppc/platforms/radstone_ppc7d.c +++ b/arch/ppc/platforms/radstone_ppc7d.c @@ -68,6 +68,7 @@ #define PPC7D_RST_PIN 17 /* GPP17 */ extern u32 mv64360_irq_base; +extern spinlock_t rtc_lock; static struct mv64x60_handle bh; static int ppc7d_has_alma; @@ -75,6 +76,11 @@ static int ppc7d_has_alma; extern void gen550_progress(char *, unsigned short); extern void gen550_init(int, struct uart_port *); +/* FIXME - move to h file */ +extern int ds1337_do_command(int id, int cmd, void *arg); +#define DS1337_GET_DATE 0 +#define DS1337_SET_DATE 1 + /* residual data */ unsigned char __res[sizeof(bd_t)]; @@ -253,6 +259,8 @@ static int ppc7d_show_cpuinfo(struct seq_file *m) u8 val1, val2; static int flash_sizes[4] = { 64, 32, 0, 16 }; static int flash_banks[4] = { 4, 3, 2, 1 }; + static int sdram_bank_sizes[4] = { 128, 256, 512, 1 }; + int sdram_num_banks = 2; static char *pci_modes[] = { "PCI33", "PCI66", "Unknown", "Unknown", "PCIX33", "PCIX66", @@ -279,13 +287,17 @@ static int ppc7d_show_cpuinfo(struct seq_file *m) (val1 == PPC7D_CPLD_MB_TYPE_PLL_100) ? 100 : (val1 == PPC7D_CPLD_MB_TYPE_PLL_64) ? 64 : 0); + val = inb(PPC7D_CPLD_MEM_CONFIG); + if (val & PPC7D_CPLD_SDRAM_BANK_NUM_MASK) sdram_num_banks--; + val = inb(PPC7D_CPLD_MEM_CONFIG_EXTEND); - val1 = val & PPC7D_CPLD_SDRAM_BANK_SIZE_MASK; - seq_printf(m, "SDRAM\t\t: %d%c", - (val1 == PPC7D_CPLD_SDRAM_BANK_SIZE_128M) ? 128 : - (val1 == PPC7D_CPLD_SDRAM_BANK_SIZE_256M) ? 256 : - (val1 == PPC7D_CPLD_SDRAM_BANK_SIZE_512M) ? 512 : 1, - (val1 == PPC7D_CPLD_SDRAM_BANK_SIZE_1G) ? 'G' : 'M'); + val1 = (val & PPC7D_CPLD_SDRAM_BANK_SIZE_MASK) >> 6; + seq_printf(m, "SDRAM\t\t: %d banks of %d%c, total %d%c", + sdram_num_banks, + sdram_bank_sizes[val1], + (sdram_bank_sizes[val1] < 128) ? 'G' : 'M', + sdram_num_banks * sdram_bank_sizes[val1], + (sdram_bank_sizes[val1] < 128) ? 'G' : 'M'); if (val2 & PPC7D_CPLD_MB_TYPE_ECC_FITTED_MASK) { seq_printf(m, " [ECC %sabled]", (val2 & PPC7D_CPLD_MB_TYPE_ECC_ENABLE_MASK) ? "en" : @@ -1236,6 +1248,38 @@ static void __init ppc7d_setup_arch(void) printk(KERN_INFO "Radstone Technology PPC7D\n"); if (ppc_md.progress) ppc_md.progress("ppc7d_setup_arch: exit", 0); + +} + +/* Real Time Clock support. + * PPC7D has a DS1337 accessed by I2C. + */ +static ulong ppc7d_get_rtc_time(void) +{ + struct rtc_time tm; + int result; + + spin_lock(&rtc_lock); + result = ds1337_do_command(0, DS1337_GET_DATE, &tm); + spin_unlock(&rtc_lock); + + if (result == 0) + result = mktime(tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); + + return result; +} + +static int ppc7d_set_rtc_time(unsigned long nowtime) +{ + struct rtc_time tm; + int result; + + spin_lock(&rtc_lock); + to_tm(nowtime, &tm); + result = ds1337_do_command(0, DS1337_SET_DATE, &tm); + spin_unlock(&rtc_lock); + + return result; } /* This kernel command line parameter can be used to have the target @@ -1293,6 +1337,10 @@ static void ppc7d_init2(void) data8 |= 0x07; outb(data8, PPC7D_CPLD_LEDS); + /* Hook up RTC. We couldn't do this earlier because we need the I2C subsystem */ + ppc_md.set_rtc_time = ppc7d_set_rtc_time; + ppc_md.get_rtc_time = ppc7d_get_rtc_time; + pr_debug("%s: exit\n", __FUNCTION__); } diff --git a/arch/ppc/platforms/radstone_ppc7d.h b/arch/ppc/platforms/radstone_ppc7d.h index 4546fff2b0c3..938375510be4 100644 --- a/arch/ppc/platforms/radstone_ppc7d.h +++ b/arch/ppc/platforms/radstone_ppc7d.h @@ -240,6 +240,7 @@ #define PPC7D_CPLD_FLASH_CNTL 0x086E /* MEMORY_CONFIG_EXTEND */ +#define PPC7D_CPLD_SDRAM_BANK_NUM_MASK 0x02 #define PPC7D_CPLD_SDRAM_BANK_SIZE_MASK 0xc0 #define PPC7D_CPLD_SDRAM_BANK_SIZE_128M 0 #define PPC7D_CPLD_SDRAM_BANK_SIZE_256M 0x40 diff --git a/arch/ppc/syslib/cpm2_pic.c b/arch/ppc/syslib/cpm2_pic.c index 954b07fc1df3..c867be6981cb 100644 --- a/arch/ppc/syslib/cpm2_pic.c +++ b/arch/ppc/syslib/cpm2_pic.c @@ -107,6 +107,11 @@ static void cpm2_end_irq(unsigned int irq_nr) simr = &(cpm2_immr->im_intctl.ic_simrh); ppc_cached_irq_mask[word] |= 1 << bit; simr[word] = ppc_cached_irq_mask[word]; + /* + * Work around large numbers of spurious IRQs on PowerPC 82xx + * systems. + */ + mb(); } } diff --git a/arch/ppc/syslib/m8260_pci.c b/arch/ppc/syslib/m8260_pci.c index bd564fb35ab6..057cc3f8ff37 100644 --- a/arch/ppc/syslib/m8260_pci.c +++ b/arch/ppc/syslib/m8260_pci.c @@ -171,10 +171,9 @@ void __init m8260_find_bridges(void) m8260_setup_pci(hose); hose->pci_mem_offset = MPC826x_PCI_MEM_OFFSET; - isa_io_base = - (unsigned long) ioremap(MPC826x_PCI_IO_BASE, + hose->io_base_virt = ioremap(MPC826x_PCI_IO_BASE, MPC826x_PCI_IO_SIZE); - hose->io_base_virt = (void *) isa_io_base; + isa_io_base = (unsigned long) hose->io_base_virt; /* setup resources */ pci_init_resource(&hose->mem_resources[0], diff --git a/arch/ppc/syslib/mpc52xx_pci.c b/arch/ppc/syslib/mpc52xx_pci.c index c723efd954a6..59cf3e8bd1a0 100644 --- a/arch/ppc/syslib/mpc52xx_pci.c +++ b/arch/ppc/syslib/mpc52xx_pci.c @@ -205,13 +205,11 @@ mpc52xx_find_bridges(void) hose->pci_mem_offset = MPC52xx_PCI_MEM_OFFSET; - isa_io_base = - (unsigned long) ioremap(MPC52xx_PCI_IO_BASE, - MPC52xx_PCI_IO_SIZE); - hose->io_base_virt = (void *) isa_io_base; + hose->io_base_virt = ioremap(MPC52xx_PCI_IO_BASE, MPC52xx_PCI_IO_SIZE); + isa_io_base = (unsigned long) hose->io_base_virt; hose->cfg_addr = &pci_regs->car; - hose->cfg_data = (void __iomem *) isa_io_base; + hose->cfg_data = hose->io_base_virt; /* Setup resources */ pci_init_resource(&hose->mem_resources[0], diff --git a/arch/ppc/syslib/ppc85xx_setup.c b/arch/ppc/syslib/ppc85xx_setup.c index 81f1968c3269..152c3ef1312a 100644 --- a/arch/ppc/syslib/ppc85xx_setup.c +++ b/arch/ppc/syslib/ppc85xx_setup.c @@ -280,16 +280,14 @@ mpc85xx_setup_hose(void) hose_a->io_space.end = MPC85XX_PCI1_UPPER_IO; hose_a->io_base_phys = MPC85XX_PCI1_IO_BASE; #ifdef CONFIG_85xx_PCI2 - isa_io_base = - (unsigned long) ioremap(MPC85XX_PCI1_IO_BASE, + hose_a->io_base_virt = ioremap(MPC85XX_PCI1_IO_BASE, MPC85XX_PCI1_IO_SIZE + MPC85XX_PCI2_IO_SIZE); #else - isa_io_base = - (unsigned long) ioremap(MPC85XX_PCI1_IO_BASE, + hose_a->io_base_virt = ioremap(MPC85XX_PCI1_IO_BASE, MPC85XX_PCI1_IO_SIZE); #endif - hose_a->io_base_virt = (void *) isa_io_base; + isa_io_base = (unsigned long)hose_a->io_base_virt; /* setup resources */ pci_init_resource(&hose_a->mem_resources[0], @@ -329,8 +327,8 @@ mpc85xx_setup_hose(void) hose_b->io_space.start = MPC85XX_PCI2_LOWER_IO; hose_b->io_space.end = MPC85XX_PCI2_UPPER_IO; hose_b->io_base_phys = MPC85XX_PCI2_IO_BASE; - hose_b->io_base_virt = (void *) isa_io_base + MPC85XX_PCI1_IO_SIZE; - + hose_b->io_base_virt = hose_a->io_base_virt + MPC85XX_PCI1_IO_SIZE; + /* setup resources */ pci_init_resource(&hose_b->mem_resources[0], MPC85XX_PCI2_LOWER_MEM, diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index ef1f05e437c4..5cb343883e4d 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -40,6 +40,10 @@ config COMPAT bool default y +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool + default y + # We optimistically allocate largepages from the VM, so make the limit # large enough (16MB). This badly named config option is actually # max order + 1 @@ -258,6 +262,7 @@ config PPC_RTAS config RTAS_PROC bool "Proc interface to RTAS" depends on PPC_RTAS + default y config RTAS_FLASH tristate "Firmware flash interface" @@ -293,6 +298,9 @@ config SECCOMP endmenu +config ISA_DMA_API + bool + default y menu "General setup" diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug index e341a129da80..46b1ce58da3b 100644 --- a/arch/ppc64/Kconfig.debug +++ b/arch/ppc64/Kconfig.debug @@ -5,6 +5,9 @@ source "lib/Kconfig.debug" config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL + help + This option will cause messages to be printed if free stack space + drops below a certain limit. config KPROBES bool "Kprobes" diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile index d33e20bcc52f..691f3008e698 100644 --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -56,13 +56,20 @@ LDFLAGS_vmlinux := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) CFLAGS += -msoft-float -pipe -mminimal-toc -mtraceback=none \ -mcall-aixdesc +GCC_VERSION := $(call cc-version) +GCC_BROKEN_VEC := $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi ;) + ifeq ($(CONFIG_POWER4_ONLY),y) ifeq ($(CONFIG_ALTIVEC),y) +ifeq ($(GCC_BROKEN_VEC),y) CFLAGS += $(call cc-option,-mcpu=970) else CFLAGS += $(call cc-option,-mcpu=power4) endif else + CFLAGS += $(call cc-option,-mcpu=power4) +endif +else CFLAGS += $(call cc-option,-mtune=power4) endif diff --git a/arch/ppc64/boot/addnote.c b/arch/ppc64/boot/addnote.c index 66ff8103bf4d..719663a694bb 100644 --- a/arch/ppc64/boot/addnote.c +++ b/arch/ppc64/boot/addnote.c @@ -19,6 +19,7 @@ #include <unistd.h> #include <string.h> +/* CHRP note section */ char arch[] = "PowerPC"; #define N_DESCR 6 @@ -31,6 +32,29 @@ unsigned int descr[N_DESCR] = { 0x4000, /* load-base */ }; +/* RPA note section */ +char rpaname[] = "IBM,RPA-Client-Config"; + +/* + * Note: setting ignore_my_client_config *should* mean that OF ignores + * all the other fields, but there is a firmware bug which means that + * it looks at the splpar field at least. So these values need to be + * reasonable. + */ +#define N_RPA_DESCR 8 +unsigned int rpanote[N_RPA_DESCR] = { + 0, /* lparaffinity */ + 64, /* min_rmo_size */ + 0, /* min_rmo_percent */ + 40, /* max_pft_size */ + 1, /* splpar */ + -1, /* min_load */ + 0, /* new_mem_def */ + 1, /* ignore_my_client_config */ +}; + +#define ROUNDUP(len) (((len) + 3) & ~3) + unsigned char buf[512]; #define GET_16BE(off) ((buf[off] << 8) + (buf[(off)+1])) @@ -69,7 +93,7 @@ main(int ac, char **av) { int fd, n, i; int ph, ps, np; - int nnote, ns; + int nnote, nnote2, ns; if (ac != 2) { fprintf(stderr, "Usage: %s elf-file\n", av[0]); @@ -81,7 +105,8 @@ main(int ac, char **av) exit(1); } - nnote = strlen(arch) + 1 + (N_DESCR + 3) * 4; + nnote = 12 + ROUNDUP(strlen(arch) + 1) + sizeof(descr); + nnote2 = 12 + ROUNDUP(strlen(rpaname) + 1) + sizeof(rpanote); n = read(fd, buf, sizeof(buf)); if (n < 0) { @@ -104,7 +129,7 @@ main(int ac, char **av) np = GET_16BE(E_PHNUM); if (ph < E_HSIZE || ps < PH_HSIZE || np < 1) goto notelf; - if (ph + (np + 1) * ps + nnote > n) + if (ph + (np + 2) * ps + nnote + nnote2 > n) goto nospace; for (i = 0; i < np; ++i) { @@ -117,12 +142,12 @@ main(int ac, char **av) } /* XXX check that the area we want to use is all zeroes */ - for (i = 0; i < ps + nnote; ++i) + for (i = 0; i < 2 * ps + nnote + nnote2; ++i) if (buf[ph + i] != 0) goto nospace; /* fill in the program header entry */ - ns = ph + ps; + ns = ph + 2 * ps; PUT_32BE(ph + PH_TYPE, PT_NOTE); PUT_32BE(ph + PH_OFFSET, ns); PUT_32BE(ph + PH_FILESZ, nnote); @@ -134,11 +159,26 @@ main(int ac, char **av) PUT_32BE(ns + 8, 0x1275); strcpy(&buf[ns + 12], arch); ns += 12 + strlen(arch) + 1; - for (i = 0; i < N_DESCR; ++i) - PUT_32BE(ns + i * 4, descr[i]); + for (i = 0; i < N_DESCR; ++i, ns += 4) + PUT_32BE(ns, descr[i]); + + /* fill in the second program header entry and the RPA note area */ + ph += ps; + PUT_32BE(ph + PH_TYPE, PT_NOTE); + PUT_32BE(ph + PH_OFFSET, ns); + PUT_32BE(ph + PH_FILESZ, nnote2); + + /* fill in the note area we point to */ + PUT_32BE(ns, strlen(rpaname) + 1); + PUT_32BE(ns + 4, sizeof(rpanote)); + PUT_32BE(ns + 8, 0x12759999); + strcpy(&buf[ns + 12], rpaname); + ns += 12 + ROUNDUP(strlen(rpaname) + 1); + for (i = 0; i < N_RPA_DESCR; ++i, ns += 4) + PUT_32BE(ns, rpanote[i]); /* Update the number of program headers */ - PUT_16BE(E_PHNUM, np + 1); + PUT_16BE(E_PHNUM, np + 2); /* write back */ lseek(fd, (long) 0, SEEK_SET); @@ -155,11 +195,11 @@ main(int ac, char **av) exit(0); notelf: - fprintf(stderr, "%s does not appear to be an ELF file\n", av[0]); + fprintf(stderr, "%s does not appear to be an ELF file\n", av[1]); exit(1); nospace: fprintf(stderr, "sorry, I can't find space in %s to put the note\n", - av[0]); + av[1]); exit(1); } diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c index b0fa86ad8b1b..da12ea2ca464 100644 --- a/arch/ppc64/boot/main.c +++ b/arch/ppc64/boot/main.c @@ -14,7 +14,6 @@ #include <linux/string.h> #include <asm/processor.h> #include <asm/page.h> -#include <asm/bootinfo.h> extern void *finddevice(const char *); extern int getprop(void *, const char *, void *, int); diff --git a/arch/ppc64/boot/start.c b/arch/ppc64/boot/start.c deleted file mode 100644 index ea247e79b55e..000000000000 --- a/arch/ppc64/boot/start.c +++ /dev/null @@ -1,654 +0,0 @@ -/* - * Copyright (C) Paul Mackerras 1997. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <stdarg.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/ctype.h> - -#include <asm/div64.h> - -int (*prom)(void *); - -void *chosen_handle; -void *stdin; -void *stdout; -void *stderr; - -void exit(void); -void *finddevice(const char *name); -int getprop(void *phandle, const char *name, void *buf, int buflen); -void chrpboot(int a1, int a2, void *prom); /* in main.c */ - -void printk(char *fmt, ...); - -void -start(int a1, int a2, void *promptr) -{ - prom = (int (*)(void *)) promptr; - chosen_handle = finddevice("/chosen"); - if (chosen_handle == (void *) -1) - exit(); - if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4) - exit(); - stderr = stdout; - if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4) - exit(); - - chrpboot(a1, a2, promptr); - for (;;) - exit(); -} - -int -write(void *handle, void *ptr, int nb) -{ - struct prom_args { - char *service; - int nargs; - int nret; - void *ihandle; - void *addr; - int len; - int actual; - } args; - - args.service = "write"; - args.nargs = 3; - args.nret = 1; - args.ihandle = handle; - args.addr = ptr; - args.len = nb; - args.actual = -1; - (*prom)(&args); - return args.actual; -} - -int -read(void *handle, void *ptr, int nb) -{ - struct prom_args { - char *service; - int nargs; - int nret; - void *ihandle; - void *addr; - int len; - int actual; - } args; - - args.service = "read"; - args.nargs = 3; - args.nret = 1; - args.ihandle = handle; - args.addr = ptr; - args.len = nb; - args.actual = -1; - (*prom)(&args); - return args.actual; -} - -void -exit() -{ - struct prom_args { - char *service; - } args; - - for (;;) { - args.service = "exit"; - (*prom)(&args); - } -} - -void -pause(void) -{ - struct prom_args { - char *service; - } args; - - args.service = "enter"; - (*prom)(&args); -} - -void * -finddevice(const char *name) -{ - struct prom_args { - char *service; - int nargs; - int nret; - const char *devspec; - void *phandle; - } args; - - args.service = "finddevice"; - args.nargs = 1; - args.nret = 1; - args.devspec = name; - args.phandle = (void *) -1; - (*prom)(&args); - return args.phandle; -} - -void * -claim(unsigned long virt, unsigned long size, unsigned long align) -{ - struct prom_args { - char *service; - int nargs; - int nret; - unsigned int virt; - unsigned int size; - unsigned int align; - void *ret; - } args; - - args.service = "claim"; - args.nargs = 3; - args.nret = 1; - args.virt = virt; - args.size = size; - args.align = align; - (*prom)(&args); - return args.ret; -} - -int -getprop(void *phandle, const char *name, void *buf, int buflen) -{ - struct prom_args { - char *service; - int nargs; - int nret; - void *phandle; - const char *name; - void *buf; - int buflen; - int size; - } args; - - args.service = "getprop"; - args.nargs = 4; - args.nret = 1; - args.phandle = phandle; - args.name = name; - args.buf = buf; - args.buflen = buflen; - args.size = -1; - (*prom)(&args); - return args.size; -} - -int -putc(int c, void *f) -{ - char ch = c; - - if (c == '\n') - putc('\r', f); - return write(f, &ch, 1) == 1? c: -1; -} - -int -putchar(int c) -{ - return putc(c, stdout); -} - -int -fputs(char *str, void *f) -{ - int n = strlen(str); - - return write(f, str, n) == n? 0: -1; -} - -int -readchar(void) -{ - char ch; - - for (;;) { - switch (read(stdin, &ch, 1)) { - case 1: - return ch; - case -1: - printk("read(stdin) returned -1\r\n"); - return -1; - } - } -} - -static char line[256]; -static char *lineptr; -static int lineleft; - -int -getchar(void) -{ - int c; - - if (lineleft == 0) { - lineptr = line; - for (;;) { - c = readchar(); - if (c == -1 || c == 4) - break; - if (c == '\r' || c == '\n') { - *lineptr++ = '\n'; - putchar('\n'); - break; - } - switch (c) { - case 0177: - case '\b': - if (lineptr > line) { - putchar('\b'); - putchar(' '); - putchar('\b'); - --lineptr; - } - break; - case 'U' & 0x1F: - while (lineptr > line) { - putchar('\b'); - putchar(' '); - putchar('\b'); - --lineptr; - } - break; - default: - if (lineptr >= &line[sizeof(line) - 1]) - putchar('\a'); - else { - putchar(c); - *lineptr++ = c; - } - } - } - lineleft = lineptr - line; - lineptr = line; - } - if (lineleft == 0) - return -1; - --lineleft; - return *lineptr++; -} - - - -/* String functions lifted from lib/vsprintf.c and lib/ctype.c */ -unsigned char _ctype[] = { -_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ -_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ -_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ -_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ -_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ -_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ -_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ -_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ -_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ -_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ -_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ -_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ -_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ -_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ - -size_t strnlen(const char * s, size_t count) -{ - const char *sc; - - for (sc = s; count-- && *sc != '\0'; ++sc) - /* nothing */; - return sc - s; -} - -unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) -{ - unsigned long result = 0,value; - - if (!base) { - base = 10; - if (*cp == '0') { - base = 8; - cp++; - if ((*cp == 'x') && isxdigit(cp[1])) { - cp++; - base = 16; - } - } - } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { - result = result*base + value; - cp++; - } - if (endp) - *endp = (char *)cp; - return result; -} - -long simple_strtol(const char *cp,char **endp,unsigned int base) -{ - if(*cp=='-') - return -simple_strtoul(cp+1,endp,base); - return simple_strtoul(cp,endp,base); -} - -static int skip_atoi(const char **s) -{ - int i=0; - - while (isdigit(**s)) - i = i*10 + *((*s)++) - '0'; - return i; -} - -#define ZEROPAD 1 /* pad with zero */ -#define SIGN 2 /* unsigned/signed long */ -#define PLUS 4 /* show plus */ -#define SPACE 8 /* space if plus */ -#define LEFT 16 /* left justified */ -#define SPECIAL 32 /* 0x */ -#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ - -static char * number(char * str, long long num, int base, int size, int precision, int type) -{ - char c,sign,tmp[66]; - const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; - int i; - - if (type & LARGE) - digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - if (type & LEFT) - type &= ~ZEROPAD; - if (base < 2 || base > 36) - return 0; - c = (type & ZEROPAD) ? '0' : ' '; - sign = 0; - if (type & SIGN) { - if (num < 0) { - sign = '-'; - num = -num; - size--; - } else if (type & PLUS) { - sign = '+'; - size--; - } else if (type & SPACE) { - sign = ' '; - size--; - } - } - if (type & SPECIAL) { - if (base == 16) - size -= 2; - else if (base == 8) - size--; - } - i = 0; - if (num == 0) - tmp[i++]='0'; - else while (num != 0) - tmp[i++] = digits[do_div(num,base)]; - if (i > precision) - precision = i; - size -= precision; - if (!(type&(ZEROPAD+LEFT))) - while(size-->0) - *str++ = ' '; - if (sign) - *str++ = sign; - if (type & SPECIAL) { - if (base==8) - *str++ = '0'; - else if (base==16) { - *str++ = '0'; - *str++ = digits[33]; - } - } - if (!(type & LEFT)) - while (size-- > 0) - *str++ = c; - while (i < precision--) - *str++ = '0'; - while (i-- > 0) - *str++ = tmp[i]; - while (size-- > 0) - *str++ = ' '; - return str; -} - -/* Forward decl. needed for IP address printing stuff... */ -int sprintf(char * buf, const char *fmt, ...); - -int vsprintf(char *buf, const char *fmt, va_list args) -{ - int len; - unsigned long long num; - int i, base; - char * str; - const char *s; - - int flags; /* flags to number() */ - - int field_width; /* width of output field */ - int precision; /* min. # of digits for integers; max - number of chars for from string */ - int qualifier; /* 'h', 'l', or 'L' for integer fields */ - /* 'z' support added 23/7/1999 S.H. */ - /* 'z' changed to 'Z' --davidm 1/25/99 */ - - - for (str=buf ; *fmt ; ++fmt) { - if (*fmt != '%') { - *str++ = *fmt; - continue; - } - - /* process flags */ - flags = 0; - repeat: - ++fmt; /* this also skips first '%' */ - switch (*fmt) { - case '-': flags |= LEFT; goto repeat; - case '+': flags |= PLUS; goto repeat; - case ' ': flags |= SPACE; goto repeat; - case '#': flags |= SPECIAL; goto repeat; - case '0': flags |= ZEROPAD; goto repeat; - } - - /* get field width */ - field_width = -1; - if (isdigit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - field_width = va_arg(args, int); - if (field_width < 0) { - field_width = -field_width; - flags |= LEFT; - } - } - - /* get the precision */ - precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - precision = va_arg(args, int); - } - if (precision < 0) - precision = 0; - } - - /* get the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { - qualifier = *fmt; - ++fmt; - } - - /* default base */ - base = 10; - - switch (*fmt) { - case 'c': - if (!(flags & LEFT)) - while (--field_width > 0) - *str++ = ' '; - *str++ = (unsigned char) va_arg(args, int); - while (--field_width > 0) - *str++ = ' '; - continue; - - case 's': - s = va_arg(args, char *); - if (!s) - s = "<NULL>"; - - len = strnlen(s, precision); - - if (!(flags & LEFT)) - while (len < field_width--) - *str++ = ' '; - for (i = 0; i < len; ++i) - *str++ = *s++; - while (len < field_width--) - *str++ = ' '; - continue; - - case 'p': - if (field_width == -1) { - field_width = 2*sizeof(void *); - flags |= ZEROPAD; - } - str = number(str, - (unsigned long) va_arg(args, void *), 16, - field_width, precision, flags); - continue; - - - case 'n': - if (qualifier == 'l') { - long * ip = va_arg(args, long *); - *ip = (str - buf); - } else if (qualifier == 'Z') { - size_t * ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int * ip = va_arg(args, int *); - *ip = (str - buf); - } - continue; - - case '%': - *str++ = '%'; - continue; - - /* integer number formats - set up the flags and "break" */ - case 'o': - base = 8; - break; - - case 'X': - flags |= LARGE; - case 'x': - base = 16; - break; - - case 'd': - case 'i': - flags |= SIGN; - case 'u': - break; - - default: - *str++ = '%'; - if (*fmt) - *str++ = *fmt; - else - --fmt; - continue; - } - if (qualifier == 'L') - num = va_arg(args, long long); - else if (qualifier == 'l') { - num = va_arg(args, unsigned long); - if (flags & SIGN) - num = (signed long) num; - } else if (qualifier == 'Z') { - num = va_arg(args, size_t); - } else if (qualifier == 'h') { - num = (unsigned short) va_arg(args, int); - if (flags & SIGN) - num = (signed short) num; - } else { - num = va_arg(args, unsigned int); - if (flags & SIGN) - num = (signed int) num; - } - str = number(str, num, base, field_width, precision, flags); - } - *str = '\0'; - return str-buf; -} - -int sprintf(char * buf, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i=vsprintf(buf,fmt,args); - va_end(args); - return i; -} - -static char sprint_buf[1024]; - -void -printk(char *fmt, ...) -{ - va_list args; - int n; - - va_start(args, fmt); - n = vsprintf(sprint_buf, fmt, args); - va_end(args); - write(stdout, sprint_buf, n); -} - -int -printf(char *fmt, ...) -{ - va_list args; - int n; - - va_start(args, fmt); - n = vsprintf(sprint_buf, fmt, args); - va_end(args); - write(stdout, sprint_buf, n); - return n; -} diff --git a/arch/ppc64/kernel/HvLpEvent.c b/arch/ppc64/kernel/HvLpEvent.c index 9802beefa217..f8f19637f73f 100644 --- a/arch/ppc64/kernel/HvLpEvent.c +++ b/arch/ppc64/kernel/HvLpEvent.c @@ -45,7 +45,7 @@ int HvLpEvent_unregisterHandler( HvLpEvent_Type eventType ) /* We now sleep until all other CPUs have scheduled. This ensures that * the deletion is seen by all other CPUs, and that the deleted handler * isn't still running on another CPU when we return. */ - synchronize_kernel(); + synchronize_rcu(); } } return rc; diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index 90b41f48d21c..b944717c1dbd 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -32,7 +32,7 @@ .text /* - * Returns (address we're running at) - (address we were linked at) + * Returns (address we were linked at) - (address we are running at) * for use before the text and data are mapped to KERNELBASE. */ diff --git a/arch/ppc64/kernel/nvram.c b/arch/ppc64/kernel/nvram.c index b9069c2d1933..4e71781a4414 100644 --- a/arch/ppc64/kernel/nvram.c +++ b/arch/ppc64/kernel/nvram.c @@ -339,9 +339,9 @@ static int nvram_remove_os_partition(void) static int nvram_create_os_partition(void) { struct list_head * p; - struct nvram_partition * part; - struct nvram_partition * new_part = NULL; - struct nvram_partition * free_part = NULL; + struct nvram_partition *part = NULL; + struct nvram_partition *new_part = NULL; + struct nvram_partition *free_part = NULL; int seq_init[2] = { 0, 0 }; loff_t tmp_index; long size = 0; @@ -364,13 +364,11 @@ static int nvram_create_os_partition(void) free_part = part; } } - if (!size) { + if (!size) return -ENOSPC; - } /* Create our OS partition */ - new_part = (struct nvram_partition *) - kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + new_part = kmalloc(sizeof(*new_part), GFP_KERNEL); if (!new_part) { printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n"); return -ENOMEM; @@ -379,7 +377,7 @@ static int nvram_create_os_partition(void) new_part->index = free_part->index; new_part->header.signature = NVRAM_SIG_OS; new_part->header.length = size; - sprintf(new_part->header.name, "ppc64,linux"); + strcpy(new_part->header.name, "ppc64,linux"); new_part->header.checksum = nvram_checksum(&new_part->header); rc = nvram_write_header(new_part); @@ -394,7 +392,8 @@ static int nvram_create_os_partition(void) tmp_index = new_part->index + NVRAM_HEADER_LEN; rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index); if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write failed (%d)\n", rc); + printk(KERN_ERR "nvram_create_os_partition: nvram_write " + "failed (%d)\n", rc); return rc; } diff --git a/arch/ppc64/kernel/pSeries_hvCall.S b/arch/ppc64/kernel/pSeries_hvCall.S index 0715d3038019..176e8da76466 100644 --- a/arch/ppc64/kernel/pSeries_hvCall.S +++ b/arch/ppc64/kernel/pSeries_hvCall.S @@ -28,6 +28,8 @@ unsigned long *out3); R10 */ _GLOBAL(plpar_hcall) + HMT_MEDIUM + mfcr r0 std r8,STK_PARM(r8)(r1) /* Save out ptrs */ @@ -53,6 +55,8 @@ _GLOBAL(plpar_hcall) /* Simple interface with no output values (other than status) */ _GLOBAL(plpar_hcall_norets) + HMT_MEDIUM + mfcr r0 stw r0,8(r1) @@ -75,6 +79,8 @@ _GLOBAL(plpar_hcall_norets) unsigned long *out1); 120(R1) */ _GLOBAL(plpar_hcall_8arg_2ret) + HMT_MEDIUM + mfcr r0 ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */ stw r0,8(r1) @@ -99,6 +105,8 @@ _GLOBAL(plpar_hcall_8arg_2ret) unsigned long *out4); 112(R1) */ _GLOBAL(plpar_hcall_4out) + HMT_MEDIUM + mfcr r0 stw r0,8(r1) diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index c60d8cb2b84d..fbad349ec58c 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -326,13 +326,6 @@ static void __devinit smp_xics_setup_cpu(int cpu) cpu_clear(cpu, of_spin_map); - /* - * Put the calling processor into the GIQ. This is really only - * necessary from a secondary thread as the OF start-cpu interface - * performs this function for us on primary threads. - */ - rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, - (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); } static DEFINE_SPINLOCK(timebase_lock); diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index be3cc387c1ec..d786d4b6af0b 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -438,7 +438,7 @@ pgprot_t pci_phys_mem_access_prot(struct file *file, int i; if (page_is_ram(offset >> PAGE_SHIFT)) - return prot; + return __pgprot(prot); prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 45a4ad08fbc2..eb6538b58008 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -321,6 +321,10 @@ static int __devinit finish_node_interrupts(struct device_node *np, char *name = get_property(ic->parent, "name", NULL); if (name && !strcmp(name, "u3")) np->intrs[intrcount].line += 128; + else if (!(name && !strcmp(name, "mac-io"))) + /* ignore other cascaded controllers, such as + the k2-sata-root */ + break; } np->intrs[intrcount].sense = 1; if (n > 1) @@ -830,7 +834,7 @@ void __init unflatten_device_tree(void) { unsigned long start, mem, size; struct device_node **allnextp = &allnodes; - char *p; + char *p = NULL; int l = 0; DBG(" -> unflatten_device_tree()\n"); diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index 8dffa9ae2623..35ec42de962e 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -493,6 +493,113 @@ static void __init early_cmdline_parse(void) } /* + * To tell the firmware what our capabilities are, we have to pass + * it a fake 32-bit ELF header containing a couple of PT_NOTE sections + * that contain structures that contain the actual values. + */ +static struct fake_elf { + Elf32_Ehdr elfhdr; + Elf32_Phdr phdr[2]; + struct chrpnote { + u32 namesz; + u32 descsz; + u32 type; + char name[8]; /* "PowerPC" */ + struct chrpdesc { + u32 real_mode; + u32 real_base; + u32 real_size; + u32 virt_base; + u32 virt_size; + u32 load_base; + } chrpdesc; + } chrpnote; + struct rpanote { + u32 namesz; + u32 descsz; + u32 type; + char name[24]; /* "IBM,RPA-Client-Config" */ + struct rpadesc { + u32 lpar_affinity; + u32 min_rmo_size; + u32 min_rmo_percent; + u32 max_pft_size; + u32 splpar; + u32 min_load; + u32 new_mem_def; + u32 ignore_me; + } rpadesc; + } rpanote; +} fake_elf = { + .elfhdr = { + .e_ident = { 0x7f, 'E', 'L', 'F', + ELFCLASS32, ELFDATA2MSB, EV_CURRENT }, + .e_type = ET_EXEC, /* yeah right */ + .e_machine = EM_PPC, + .e_version = EV_CURRENT, + .e_phoff = offsetof(struct fake_elf, phdr), + .e_phentsize = sizeof(Elf32_Phdr), + .e_phnum = 2 + }, + .phdr = { + [0] = { + .p_type = PT_NOTE, + .p_offset = offsetof(struct fake_elf, chrpnote), + .p_filesz = sizeof(struct chrpnote) + }, [1] = { + .p_type = PT_NOTE, + .p_offset = offsetof(struct fake_elf, rpanote), + .p_filesz = sizeof(struct rpanote) + } + }, + .chrpnote = { + .namesz = sizeof("PowerPC"), + .descsz = sizeof(struct chrpdesc), + .type = 0x1275, + .name = "PowerPC", + .chrpdesc = { + .real_mode = ~0U, /* ~0 means "don't care" */ + .real_base = ~0U, + .real_size = ~0U, + .virt_base = ~0U, + .virt_size = ~0U, + .load_base = ~0U + }, + }, + .rpanote = { + .namesz = sizeof("IBM,RPA-Client-Config"), + .descsz = sizeof(struct rpadesc), + .type = 0x12759999, + .name = "IBM,RPA-Client-Config", + .rpadesc = { + .lpar_affinity = 0, + .min_rmo_size = 64, /* in megabytes */ + .min_rmo_percent = 0, + .max_pft_size = 48, /* 2^48 bytes max PFT size */ + .splpar = 1, + .min_load = ~0U, + .new_mem_def = 0 + } + } +}; + +static void __init prom_send_capabilities(void) +{ + unsigned long offset = reloc_offset(); + ihandle elfloader; + int ret; + + elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader")); + if (elfloader == 0) { + prom_printf("couldn't open /packages/elf-loader\n"); + return; + } + ret = call_prom("call-method", 3, 1, ADDR("process-elf-header"), + elfloader, ADDR(&fake_elf)); + call_prom("close", 1, 0, elfloader); +} + +/* * Memory allocation strategy... our layout is normally: * * at 14Mb or more we vmlinux, then a gap and initrd. In some rare cases, initrd @@ -1448,6 +1555,12 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, } } +/* + * The Open Firmware 1275 specification states properties must be 31 bytes or + * less, however not all firmwares obey this. Make it 64 bytes to be safe. + */ +#define MAX_PROPERTY_NAME 64 + static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, unsigned long *mem_end) { @@ -1457,7 +1570,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, unsigned long soff; unsigned char *valp; unsigned long offset = reloc_offset(); - char pname[32]; + char pname[MAX_PROPERTY_NAME]; char *path; path = RELOC(prom_scratch); diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c index 354a287c67eb..9f8c6087ae56 100644 --- a/arch/ppc64/kernel/ptrace.c +++ b/arch/ppc64/kernel/ptrace.c @@ -28,6 +28,7 @@ #include <linux/security.h> #include <linux/audit.h> #include <linux/seccomp.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -162,7 +163,7 @@ int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -194,7 +195,7 @@ int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_single_step(child); @@ -304,14 +305,17 @@ static void do_syscall_trace(void) void do_syscall_trace_enter(struct pt_regs *regs) { + if (test_thread_flag(TIF_SYSCALL_TRACE) + && (current->ptrace & PT_PTRACED)) + do_syscall_trace(); + if (unlikely(current->audit_context)) - audit_syscall_entry(current, regs->gpr[0], + audit_syscall_entry(current, + test_thread_flag(TIF_32BIT)?AUDIT_ARCH_PPC:AUDIT_ARCH_PPC64, + regs->gpr[0], regs->gpr[3], regs->gpr[4], regs->gpr[5], regs->gpr[6]); - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - do_syscall_trace(); } void do_syscall_trace_leave(struct pt_regs *regs) @@ -319,7 +323,9 @@ void do_syscall_trace_leave(struct pt_regs *regs) secure_computing(regs->gpr[0]); if (unlikely(current->audit_context)) - audit_syscall_exit(current, regs->result); + audit_syscall_exit(current, + (regs->ccr&0x1000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, + regs->result); if ((test_thread_flag(TIF_SYSCALL_TRACE) || test_thread_flag(TIF_SINGLESTEP)) diff --git a/arch/ppc64/kernel/ptrace32.c b/arch/ppc64/kernel/ptrace32.c index ee81b1b776cc..16436426c7e2 100644 --- a/arch/ppc64/kernel/ptrace32.c +++ b/arch/ppc64/kernel/ptrace32.c @@ -26,6 +26,7 @@ #include <linux/ptrace.h> #include <linux/user.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -293,7 +294,7 @@ int sys32_ptrace(long request, long pid, unsigned long addr, unsigned long data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -325,7 +326,7 @@ int sys32_ptrace(long request, long pid, unsigned long addr, unsigned long data) case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_single_step(child); diff --git a/arch/ppc64/kernel/rtas_flash.c b/arch/ppc64/kernel/rtas_flash.c index 3213837282ca..923e2e201a70 100644 --- a/arch/ppc64/kernel/rtas_flash.c +++ b/arch/ppc64/kernel/rtas_flash.c @@ -218,7 +218,7 @@ static void get_flash_status_msg(int status, char *buf) } /* Reading the proc file will show status (not the firmware contents) */ -static ssize_t rtas_flash_read(struct file *file, char *buf, +static ssize_t rtas_flash_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); @@ -256,7 +256,7 @@ static ssize_t rtas_flash_read(struct file *file, char *buf, * count is. If the system is low on memory it will be just as well * that we fail.... */ -static ssize_t rtas_flash_write(struct file *file, const char *buffer, +static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, size_t count, loff_t *off) { struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); @@ -356,7 +356,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf) args_buf->status = rc; } -static ssize_t manage_flash_read(struct file *file, char *buf, +static ssize_t manage_flash_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); @@ -386,7 +386,7 @@ static ssize_t manage_flash_read(struct file *file, char *buf, return msglen; } -static ssize_t manage_flash_write(struct file *file, const char *buf, +static ssize_t manage_flash_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); @@ -466,7 +466,7 @@ static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, return n; } -static ssize_t validate_flash_read(struct file *file, char *buf, +static ssize_t validate_flash_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); @@ -494,7 +494,7 @@ static ssize_t validate_flash_read(struct file *file, char *buf, return msglen; } -static ssize_t validate_flash_write(struct file *file, const char *buf, +static ssize_t validate_flash_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); diff --git a/arch/ppc64/kernel/scanlog.c b/arch/ppc64/kernel/scanlog.c index 189b81a41987..4d70736619c7 100644 --- a/arch/ppc64/kernel/scanlog.c +++ b/arch/ppc64/kernel/scanlog.c @@ -43,7 +43,7 @@ static int scanlog_debug; static unsigned int ibm_scan_log_dump; /* RTAS token */ static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */ -static ssize_t scanlog_read(struct file *file, char *buf, +static ssize_t scanlog_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct inode * inode = file->f_dentry->d_inode; @@ -129,7 +129,7 @@ static ssize_t scanlog_read(struct file *file, char *buf, /*NOTREACHED*/ } -static ssize_t scanlog_write(struct file * file, const char * buf, +static ssize_t scanlog_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { char stkbuf[20]; diff --git a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c index a95a2b49a1d5..bf782276984c 100644 --- a/arch/ppc64/kernel/signal.c +++ b/arch/ppc64/kernel/signal.c @@ -42,11 +42,7 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -#ifndef MIN -#define MIN(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -#define GP_REGS_SIZE MIN(sizeof(elf_gregset_t), sizeof(struct pt_regs)) +#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) #define TRAMP_TRACEBACK 3 diff --git a/arch/ppc64/kernel/signal32.c b/arch/ppc64/kernel/signal32.c index b0e167db6af9..3c2fa5c284c0 100644 --- a/arch/ppc64/kernel/signal32.c +++ b/arch/ppc64/kernel/signal32.c @@ -657,7 +657,7 @@ static int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; - if (put_user(regs->gpr[1], (unsigned long __user *)newsp)) + if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; if (vdso32_rt_sigtramp && current->thread.vdso_base) { @@ -842,7 +842,7 @@ static int handle_signal32(unsigned long sig, struct k_sigaction *ka, regs->link = (unsigned long) frame->mctx.tramp; } - if (put_user(regs->gpr[1], (unsigned long __user *)newsp)) + if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; regs->gpr[1] = (unsigned long) newsp; regs->gpr[3] = sig; diff --git a/arch/ppc64/kernel/smp.c b/arch/ppc64/kernel/smp.c index 1c92da3e4525..3b906cd94037 100644 --- a/arch/ppc64/kernel/smp.c +++ b/arch/ppc64/kernel/smp.c @@ -125,7 +125,7 @@ void __devinit smp_generic_kick_cpu(int nr) * the processor will continue on to secondary_start */ paca[nr].cpu_start = 1; - mb(); + smp_mb(); } #endif /* CONFIG_PPC_MULTIPLATFORM */ @@ -256,7 +256,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, } call_data = &data; - wmb(); + smp_wmb(); /* Send a message to all other CPUs and wait for them to respond */ smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION); @@ -431,7 +431,7 @@ int generic_cpu_enable(unsigned int cpu) /* get the target out of it's holding state */ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - wmb(); + smp_wmb(); while (!cpu_online(cpu)) cpu_relax(); @@ -447,7 +447,7 @@ void generic_cpu_die(unsigned int cpu) int i; for (i = 0; i < 100; i++) { - rmb(); + smp_rmb(); if (per_cpu(cpu_state, cpu) == CPU_DEAD) return; msleep(100); @@ -463,7 +463,7 @@ void generic_mach_cpu_die(void) cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); __get_cpu_var(cpu_state) = CPU_DEAD; - wmb(); + smp_wmb(); while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) cpu_relax(); @@ -515,7 +515,7 @@ int __devinit __cpu_up(unsigned int cpu) * be written out to main store before we release * the processor. */ - mb(); + smp_mb(); /* wake up cpus */ DBG("smp: kicking cpu %d\n", cpu); diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 77ded5a363b6..772a465b49f9 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -221,15 +221,15 @@ static __inline__ void timer_recalc_offset(unsigned long cur_tb) temp_varp->tb_to_xs = do_gtod.varp->tb_to_xs; temp_varp->tb_orig_stamp = new_tb_orig_stamp; temp_varp->stamp_xsec = new_stamp_xsec; - mb(); + smp_mb(); do_gtod.varp = temp_varp; do_gtod.var_idx = temp_idx; ++(systemcfg->tb_update_count); - wmb(); + smp_wmb(); systemcfg->tb_orig_stamp = new_tb_orig_stamp; systemcfg->stamp_xsec = new_stamp_xsec; - wmb(); + smp_wmb(); ++(systemcfg->tb_update_count); } @@ -648,7 +648,7 @@ void ppc_adjtimex(void) temp_varp->tb_to_xs = new_tb_to_xs; temp_varp->stamp_xsec = new_stamp_xsec; temp_varp->tb_orig_stamp = do_gtod.varp->tb_orig_stamp; - mb(); + smp_mb(); do_gtod.varp = temp_varp; do_gtod.var_idx = temp_idx; @@ -662,10 +662,10 @@ void ppc_adjtimex(void) * loops back and reads them again until this criteria is met. */ ++(systemcfg->tb_update_count); - wmb(); + smp_wmb(); systemcfg->tb_to_xs = new_tb_to_xs; systemcfg->stamp_xsec = new_stamp_xsec; - wmb(); + smp_wmb(); ++(systemcfg->tb_update_count); write_sequnlock_irqrestore( &xtime_lock, flags ); diff --git a/arch/ppc64/kernel/vdso32/Makefile b/arch/ppc64/kernel/vdso32/Makefile index ede2f7e477c2..0b1b0df973eb 100644 --- a/arch/ppc64/kernel/vdso32/Makefile +++ b/arch/ppc64/kernel/vdso32/Makefile @@ -1,7 +1,7 @@ # List of files in the vdso, has to be asm only for now -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o # Build rules diff --git a/arch/ppc64/kernel/vdso32/cacheflush.S b/arch/ppc64/kernel/vdso32/cacheflush.S index c74fddb6afd4..0ed7ea721715 100644 --- a/arch/ppc64/kernel/vdso32/cacheflush.S +++ b/arch/ppc64/kernel/vdso32/cacheflush.S @@ -47,6 +47,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) addi r6,r6,128 bdnz 1b isync + li r3,0 blr .cfi_endproc V_FUNCTION_END(__kernel_sync_dicache) @@ -59,6 +60,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) .cfi_startproc sync isync + li r3,0 blr .cfi_endproc V_FUNCTION_END(__kernel_sync_dicache_p5) diff --git a/arch/ppc64/kernel/vdso32/gettimeofday.S b/arch/ppc64/kernel/vdso32/gettimeofday.S index ca7f415195c4..2b48bf1fb109 100644 --- a/arch/ppc64/kernel/vdso32/gettimeofday.S +++ b/arch/ppc64/kernel/vdso32/gettimeofday.S @@ -58,6 +58,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) stw r5,TZONE_TZ_DSTTIME(r11) 1: mtlr r12 + li r3,0 blr 2: mr r3,r10 diff --git a/arch/ppc64/kernel/vdso32/note.S b/arch/ppc64/kernel/vdso32/note.S new file mode 100644 index 000000000000..d4b5be4f3d5f --- /dev/null +++ b/arch/ppc64/kernel/vdso32/note.S @@ -0,0 +1,25 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> + +#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ + .section name, flags; \ + .balign 4; \ + .long 1f - 0f; /* name length */ \ + .long 3f - 2f; /* data length */ \ + .long type; /* note type */ \ +0: .asciz vendor; /* vendor name */ \ +1: .balign 4; \ +2: + +#define ASM_ELF_NOTE_END \ +3: .balign 4; /* pad out section */ \ + .previous + + ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) + .long LINUX_VERSION_CODE + ASM_ELF_NOTE_END diff --git a/arch/ppc64/kernel/vdso32/vdso32.lds.S b/arch/ppc64/kernel/vdso32/vdso32.lds.S index cca27bd03a57..11290c902ba3 100644 --- a/arch/ppc64/kernel/vdso32/vdso32.lds.S +++ b/arch/ppc64/kernel/vdso32/vdso32.lds.S @@ -20,6 +20,8 @@ SECTIONS .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) } + .note : { *(.note.*) } :text :note + . = ALIGN (16); .text : { @@ -87,6 +89,7 @@ SECTIONS PHDRS { text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + note PT_NOTE FLAGS(4); /* PF_R */ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ } diff --git a/arch/ppc64/kernel/vdso64/Makefile b/arch/ppc64/kernel/vdso64/Makefile index bd3f70b1a384..ab39988452cc 100644 --- a/arch/ppc64/kernel/vdso64/Makefile +++ b/arch/ppc64/kernel/vdso64/Makefile @@ -1,6 +1,6 @@ # List of files in the vdso, has to be asm only for now -obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o +obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o # Build rules diff --git a/arch/ppc64/kernel/vdso64/cacheflush.S b/arch/ppc64/kernel/vdso64/cacheflush.S index d9696ffcf334..e0725b7b7003 100644 --- a/arch/ppc64/kernel/vdso64/cacheflush.S +++ b/arch/ppc64/kernel/vdso64/cacheflush.S @@ -47,6 +47,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) addi r6,r6,128 bdnz 1b isync + li r3,0 blr .cfi_endproc V_FUNCTION_END(__kernel_sync_dicache) @@ -59,6 +60,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) .cfi_startproc sync isync + li r3,0 blr .cfi_endproc V_FUNCTION_END(__kernel_sync_dicache_p5) diff --git a/arch/ppc64/kernel/vdso64/note.S b/arch/ppc64/kernel/vdso64/note.S new file mode 100644 index 000000000000..dc2a509f7e8a --- /dev/null +++ b/arch/ppc64/kernel/vdso64/note.S @@ -0,0 +1 @@ +#include "../vdso32/note.S" diff --git a/arch/ppc64/kernel/vdso64/vdso64.lds.S b/arch/ppc64/kernel/vdso64/vdso64.lds.S index 942c815c7bc7..9cb28181da80 100644 --- a/arch/ppc64/kernel/vdso64/vdso64.lds.S +++ b/arch/ppc64/kernel/vdso64/vdso64.lds.S @@ -18,12 +18,14 @@ SECTIONS .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) } + .note : { *(.note.*) } :text :note + . = ALIGN (16); .text : { *(.text .stub .text.* .gnu.linkonce.t.*) *(.sfpr .glink) - } + } :text PROVIDE (__etext = .); PROVIDE (_etext = .); PROVIDE (etext = .); @@ -88,6 +90,7 @@ SECTIONS PHDRS { text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + note PT_NOTE FLAGS(4); /* PF_R */ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ } diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c index eedd1d3c2a10..879f39b90a33 100644 --- a/arch/ppc64/kernel/xics.c +++ b/arch/ppc64/kernel/xics.c @@ -432,6 +432,7 @@ void xics_cause_IPI(int cpu) { ops->qirr_info(cpu, IPI_PRIORITY); } +#endif /* CONFIG_SMP */ void xics_setup_cpu(void) { @@ -439,9 +440,17 @@ void xics_setup_cpu(void) ops->cppr_info(cpu, 0xff); iosync(); -} -#endif /* CONFIG_SMP */ + /* + * Put the calling processor into the GIQ. This is really only + * necessary from a secondary thread as the OF start-cpu interface + * performs this function for us on primary threads. + * + * XXX: undo of teardown on kexec needs this too, as may hotplug + */ + rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); +} void xics_init_IRQ(void) { @@ -563,8 +572,7 @@ nextnode: for (; i < NR_IRQS; ++i) get_irq_desc(i)->handler = &xics_pic; - ops->cppr_info(boot_cpuid, 0xff); - iosync(); + xics_setup_cpu(); ppc64_boot_msg(0x21, "XICS Done"); } diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S index 8c0156a37001..c23d46956dd9 100644 --- a/arch/ppc64/mm/hash_low.S +++ b/arch/ppc64/mm/hash_low.S @@ -85,7 +85,10 @@ _GLOBAL(__hash_page) bne- htab_wrong_access /* Check if PTE is busy */ andi. r0,r31,_PAGE_BUSY - bne- 1b + /* If so, just bail out and refault if needed. Someone else + * is changing this PTE anyway and might hash it. + */ + bne- bail_ok /* Prepare new PTE value (turn access RW into DIRTY, then * add BUSY,HASHPTE and ACCESSED) */ @@ -215,6 +218,10 @@ _GLOBAL(htab_call_hpte_remove) /* Try all again */ b htab_insert_pte +bail_ok: + li r3,0 + b bail + htab_pte_insert_ok: /* Insert slot number & secondary bit in PTE */ rldimi r30,r3,12,63-15 diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index 144657e0c3d5..52b6b9305341 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c @@ -320,8 +320,7 @@ static void native_flush_hash_range(unsigned long context, j = 0; for (i = 0; i < number; i++) { - if ((batch->addr[i] >= USER_START) && - (batch->addr[i] <= USER_END)) + if (batch->addr[i] < KERNELBASE) vsid = get_vsid(context, batch->addr[i]); else vsid = get_kernel_vsid(batch->addr[i]); diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index e48be12f518c..0a0f97008d02 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -298,24 +298,23 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) int local = 0; cpumask_t tmp; + if ((ea & ~REGION_MASK) > EADDR_MASK) + return 1; + switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; mm = current->mm; - if ((ea > USER_END) || (! mm)) + if (! mm) return 1; vsid = get_vsid(mm->context.id, ea); break; case IO_REGION_ID: - if (ea > IMALLOC_END) - return 1; mm = &ioremap_mm; vsid = get_kernel_vsid(ea); break; case VMALLOC_REGION_ID: - if (ea > VMALLOC_END) - return 1; mm = &init_mm; vsid = get_kernel_vsid(ea); break; @@ -362,7 +361,7 @@ void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte, unsigned long vsid, vpn, va, hash, secondary, slot; unsigned long huge = pte_huge(pte); - if ((ea >= USER_START) && (ea <= USER_END)) + if (ea < KERNELBASE) vsid = get_vsid(context, ea); else vsid = get_kernel_vsid(ea); diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index 390296efe3e0..d3bf86a5c1ad 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -42,7 +42,7 @@ static inline int hugepgd_index(unsigned long addr) return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT; } -static pgd_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) +static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) { int index; @@ -52,21 +52,21 @@ static pgd_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) index = hugepgd_index(addr); BUG_ON(index >= PTRS_PER_HUGEPGD); - return mm->context.huge_pgdir + index; + return (pud_t *)(mm->context.huge_pgdir + index); } -static inline pte_t *hugepte_offset(pgd_t *dir, unsigned long addr) +static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr) { int index; - if (pgd_none(*dir)) + if (pud_none(*dir)) return NULL; index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE; - return (pte_t *)pgd_page(*dir) + index; + return (pte_t *)pud_page(*dir) + index; } -static pgd_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) +static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) { BUG_ON(! in_hugepage_area(mm->context, addr)); @@ -90,10 +90,9 @@ static pgd_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) return hugepgd_offset(mm, addr); } -static pte_t *hugepte_alloc(struct mm_struct *mm, pgd_t *dir, - unsigned long addr) +static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr) { - if (! pgd_present(*dir)) { + if (! pud_present(*dir)) { pte_t *new; spin_unlock(&mm->page_table_lock); @@ -104,7 +103,7 @@ static pte_t *hugepte_alloc(struct mm_struct *mm, pgd_t *dir, * Because we dropped the lock, we should re-check the * entry, as somebody else could have populated it.. */ - if (pgd_present(*dir)) { + if (pud_present(*dir)) { if (new) kmem_cache_free(zero_cache, new); } else { @@ -115,7 +114,7 @@ static pte_t *hugepte_alloc(struct mm_struct *mm, pgd_t *dir, ptepage = virt_to_page(new); ptepage->mapping = (void *) mm; ptepage->index = addr & HUGEPGDIR_MASK; - pgd_populate(mm, dir, new); + pud_populate(mm, dir, new); } } @@ -124,28 +123,28 @@ static pte_t *hugepte_alloc(struct mm_struct *mm, pgd_t *dir, static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { - pgd_t *pgd; + pud_t *pud; BUG_ON(! in_hugepage_area(mm->context, addr)); - pgd = hugepgd_offset(mm, addr); - if (! pgd) + pud = hugepgd_offset(mm, addr); + if (! pud) return NULL; - return hugepte_offset(pgd, addr); + return hugepte_offset(pud, addr); } static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { - pgd_t *pgd; + pud_t *pud; BUG_ON(! in_hugepage_area(mm->context, addr)); - pgd = hugepgd_alloc(mm, addr); - if (! pgd) + pud = hugepgd_alloc(mm, addr); + if (! pud) return NULL; - return hugepte_alloc(mm, pgd, addr); + return hugepte_alloc(mm, pud, addr); } static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, @@ -709,10 +708,10 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm) /* cleanup any hugepte pages leftover */ for (i = 0; i < PTRS_PER_HUGEPGD; i++) { - pgd_t *pgd = pgdir + i; + pud_t *pud = (pud_t *)(pgdir + i); - if (! pgd_none(*pgd)) { - pte_t *pte = (pte_t *)pgd_page(*pgd); + if (! pud_none(*pud)) { + pte_t *pte = (pte_t *)pud_page(*pud); struct page *ptepage = virt_to_page(pte); ptepage->mapping = NULL; @@ -720,7 +719,7 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm) BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE)); kmem_cache_free(zero_cache, pte); } - pgd_clear(pgd); + pud_clear(pud); } BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE)); diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c index 9d92b0d9cde5..cb8727f3267a 100644 --- a/arch/ppc64/mm/imalloc.c +++ b/arch/ppc64/mm/imalloc.c @@ -14,6 +14,7 @@ #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/semaphore.h> +#include <asm/imalloc.h> static DECLARE_MUTEX(imlist_sem); struct vm_struct * imlist = NULL; @@ -23,11 +24,11 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr) unsigned long addr; struct vm_struct **p, *tmp; - addr = IMALLOC_START; + addr = ioremap_bot; for (p = &imlist; (tmp = *p) ; p = &tmp->next) { if (size + addr < (unsigned long) tmp->addr) break; - if ((unsigned long)tmp->addr >= IMALLOC_START) + if ((unsigned long)tmp->addr >= ioremap_bot) addr = tmp->size + (unsigned long) tmp->addr; if (addr > IMALLOC_END-size) return 1; diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index a7149b9fc35c..4b42aff74d73 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -64,6 +64,7 @@ #include <asm/iommu.h> #include <asm/abs_addr.h> #include <asm/vdso.h> +#include <asm/imalloc.h> int mem_init_done; unsigned long ioremap_bot = IMALLOC_BASE; @@ -136,14 +137,78 @@ void iounmap(volatile void __iomem *addr) #else +static void unmap_im_area_pte(pmd_t *pmd, unsigned long addr, + unsigned long end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + pte_t ptent = ptep_get_and_clear(&ioremap_mm, addr, pte); + WARN_ON(!pte_none(ptent) && !pte_present(ptent)); + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static inline void unmap_im_area_pmd(pud_t *pud, unsigned long addr, + unsigned long end) +{ + pmd_t *pmd; + unsigned long next; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; + unmap_im_area_pte(pmd, addr, next); + } while (pmd++, addr = next, addr != end); +} + +static inline void unmap_im_area_pud(pgd_t *pgd, unsigned long addr, + unsigned long end) +{ + pud_t *pud; + unsigned long next; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + unmap_im_area_pmd(pud, addr, next); + } while (pud++, addr = next, addr != end); +} + +static void unmap_im_area(unsigned long addr, unsigned long end) +{ + struct mm_struct *mm = &ioremap_mm; + unsigned long next; + pgd_t *pgd; + + spin_lock(&mm->page_table_lock); + + pgd = pgd_offset_i(addr); + flush_cache_vunmap(addr, end); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + unmap_im_area_pud(pgd, addr, next); + } while (pgd++, addr = next, addr != end); + flush_tlb_kernel_range(start, end); + + spin_unlock(&mm->page_table_lock); +} + /* * map_io_page currently only called by __ioremap * map_io_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ -static void map_io_page(unsigned long ea, unsigned long pa, int flags) +static int map_io_page(unsigned long ea, unsigned long pa, int flags) { pgd_t *pgdp; + pud_t *pudp; pmd_t *pmdp; pte_t *ptep; unsigned long vsid; @@ -151,9 +216,15 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags) if (mem_init_done) { spin_lock(&ioremap_mm.page_table_lock); pgdp = pgd_offset_i(ea); - pmdp = pmd_alloc(&ioremap_mm, pgdp, ea); + pudp = pud_alloc(&ioremap_mm, pgdp, ea); + if (!pudp) + return -ENOMEM; + pmdp = pmd_alloc(&ioremap_mm, pudp, ea); + if (!pmdp) + return -ENOMEM; ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea); - + if (!ptep) + return -ENOMEM; pa = abs_to_phys(pa); set_pte_at(&ioremap_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); @@ -181,6 +252,7 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags) panic("map_io_page: could not insert mapping"); } } + return 0; } @@ -194,9 +266,14 @@ static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, flags |= pgprot_val(PAGE_KERNEL); for (i = 0; i < size; i += PAGE_SIZE) - map_io_page(ea+i, pa+i, flags); + if (map_io_page(ea+i, pa+i, flags)) + goto failure; return (void __iomem *) (ea + (addr & ~PAGE_MASK)); + failure: + if (mem_init_done) + unmap_im_area(ea, ea + size); + return NULL; } @@ -206,10 +283,11 @@ ioremap(unsigned long addr, unsigned long size) return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); } -void __iomem * -__ioremap(unsigned long addr, unsigned long size, unsigned long flags) +void __iomem * __ioremap(unsigned long addr, unsigned long size, + unsigned long flags) { unsigned long pa, ea; + void __iomem *ret; /* * Choose an address to map it to. @@ -232,12 +310,16 @@ __ioremap(unsigned long addr, unsigned long size, unsigned long flags) if (area == NULL) return NULL; ea = (unsigned long)(area->addr); + ret = __ioremap_com(addr, pa, ea, size, flags); + if (!ret) + im_free(area->addr); } else { ea = ioremap_bot; - ioremap_bot += size; + ret = __ioremap_com(addr, pa, ea, size, flags); + if (ret) + ioremap_bot += size; } - - return __ioremap_com(addr, pa, ea, size, flags); + return ret; } #define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) @@ -246,6 +328,7 @@ int __ioremap_explicit(unsigned long pa, unsigned long ea, unsigned long size, unsigned long flags) { struct vm_struct *area; + void __iomem *ret; /* For now, require page-aligned values for pa, ea, and size */ if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || @@ -276,7 +359,12 @@ int __ioremap_explicit(unsigned long pa, unsigned long ea, } } - if (__ioremap_com(pa, pa, ea, size, flags) != (void *) ea) { + ret = __ioremap_com(pa, pa, ea, size, flags); + if (ret == NULL) { + printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); + return 1; + } + if (ret != (void *) ea) { printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); return 1; } @@ -284,69 +372,6 @@ int __ioremap_explicit(unsigned long pa, unsigned long ea, return 0; } -static void unmap_im_area_pte(pmd_t *pmd, unsigned long address, - unsigned long size) -{ - unsigned long base, end; - pte_t *pte; - - if (pmd_none(*pmd)) - return; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - return; - } - - pte = pte_offset_kernel(pmd, address); - base = address & PMD_MASK; - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - - do { - pte_t page; - page = ptep_get_and_clear(&ioremap_mm, base + address, pte); - address += PAGE_SIZE; - pte++; - if (pte_none(page)) - continue; - if (pte_present(page)) - continue; - printk(KERN_CRIT "Whee.. Swapped out page in kernel page" - " table\n"); - } while (address < end); -} - -static void unmap_im_area_pmd(pgd_t *dir, unsigned long address, - unsigned long size) -{ - unsigned long base, end; - pmd_t *pmd; - - if (pgd_none(*dir)) - return; - if (pgd_bad(*dir)) { - pgd_ERROR(*dir); - pgd_clear(dir); - return; - } - - pmd = pmd_offset(dir, address); - base = address & PGDIR_MASK; - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - - do { - unmap_im_area_pte(pmd, base + address, end - address); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address < end); -} - /* * Unmap an IO region and remove it from imalloc'd list. * Access to IO memory should be serialized by driver. @@ -356,39 +381,19 @@ static void unmap_im_area_pmd(pgd_t *dir, unsigned long address, */ void iounmap(volatile void __iomem *token) { - unsigned long address, start, end, size; - struct mm_struct *mm; - pgd_t *dir; + unsigned long address, size; void *addr; - if (!mem_init_done) { + if (!mem_init_done) return; - } addr = (void *) ((unsigned long __force) token & PAGE_MASK); - if ((size = im_free(addr)) == 0) { + if ((size = im_free(addr)) == 0) return; - } address = (unsigned long)addr; - start = address; - end = address + size; - - mm = &ioremap_mm; - spin_lock(&mm->page_table_lock); - - dir = pgd_offset_i(address); - flush_cache_vunmap(address, end); - do { - unmap_im_area_pmd(dir, address, end - address); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - flush_tlb_kernel_range(start, end); - - spin_unlock(&mm->page_table_lock); - return; + unmap_im_area(address, address + size); } static int iounmap_subset_regions(unsigned long addr, unsigned long size) @@ -664,7 +669,7 @@ void __init paging_init(void) zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; - free_area_init_node(0, &contig_page_data, zones_size, + free_area_init_node(0, NODE_DATA(0), zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); } #endif /* CONFIG_DISCONTIGMEM */ diff --git a/arch/ppc64/mm/slb.c b/arch/ppc64/mm/slb.c index 6a20773f695d..244150a0bc18 100644 --- a/arch/ppc64/mm/slb.c +++ b/arch/ppc64/mm/slb.c @@ -33,8 +33,8 @@ static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags) return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags; } -static inline void create_slbe(unsigned long ea, unsigned long vsid, - unsigned long flags, unsigned long entry) +static inline void create_slbe(unsigned long ea, unsigned long flags, + unsigned long entry) { asm volatile("slbmte %0,%1" : : "r" (mk_vsid_data(ea, flags)), @@ -145,9 +145,8 @@ void slb_initialize(void) asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); - create_slbe(KERNELBASE, get_kernel_vsid(KERNELBASE), flags, 0); - create_slbe(VMALLOCBASE, get_kernel_vsid(KERNELBASE), - SLB_VSID_KERNEL, 1); + create_slbe(KERNELBASE, flags, 0); + create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1); /* We don't bolt the stack for the time being - we're in boot, * so the stack is in the bolted segment. By the time it goes * elsewhere, we'll call _switch() which will bolt in the new diff --git a/arch/ppc64/mm/stab.c b/arch/ppc64/mm/stab.c index 31491131d5e4..df4bbe14153c 100644 --- a/arch/ppc64/mm/stab.c +++ b/arch/ppc64/mm/stab.c @@ -19,6 +19,11 @@ #include <asm/paca.h> #include <asm/cputable.h> +struct stab_entry { + unsigned long esid_data; + unsigned long vsid_data; +}; + /* Both the segment table and SLB code uses the following cache */ #define NR_STAB_CACHE_ENTRIES 8 DEFINE_PER_CPU(long, stab_cache_ptr); diff --git a/arch/ppc64/xmon/ppc-opc.c b/arch/ppc64/xmon/ppc-opc.c index 1e4e7e319970..5ee8fc32f824 100644 --- a/arch/ppc64/xmon/ppc-opc.c +++ b/arch/ppc64/xmon/ppc-opc.c @@ -20,6 +20,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <linux/stddef.h> #include "nonstdio.h" #include "ppc.h" @@ -110,12 +111,12 @@ const struct powerpc_operand powerpc_operands[] = /* The zero index is used to indicate the end of the list of operands. */ #define UNUSED 0 - { 0, 0, 0, 0, 0 }, + { 0, 0, NULL, NULL, 0 }, /* The BA field in an XL form instruction. */ #define BA UNUSED + 1 #define BA_MASK (0x1f << 16) - { 5, 16, 0, 0, PPC_OPERAND_CR }, + { 5, 16, NULL, NULL, PPC_OPERAND_CR }, /* The BA field in an XL form instruction when it must be the same as the BT field in the same instruction. */ @@ -125,7 +126,7 @@ const struct powerpc_operand powerpc_operands[] = /* The BB field in an XL form instruction. */ #define BB BAT + 1 #define BB_MASK (0x1f << 11) - { 5, 11, 0, 0, PPC_OPERAND_CR }, + { 5, 11, NULL, NULL, PPC_OPERAND_CR }, /* The BB field in an XL form instruction when it must be the same as the BA field in the same instruction. */ @@ -168,21 +169,21 @@ const struct powerpc_operand powerpc_operands[] = /* The BF field in an X or XL form instruction. */ #define BF BDPA + 1 - { 3, 23, 0, 0, PPC_OPERAND_CR }, + { 3, 23, NULL, NULL, PPC_OPERAND_CR }, /* An optional BF field. This is used for comparison instructions, in which an omitted BF field is taken as zero. */ #define OBF BF + 1 - { 3, 23, 0, 0, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, + { 3, 23, NULL, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, /* The BFA field in an X or XL form instruction. */ #define BFA OBF + 1 - { 3, 18, 0, 0, PPC_OPERAND_CR }, + { 3, 18, NULL, NULL, PPC_OPERAND_CR }, /* The BI field in a B form or XL form instruction. */ #define BI BFA + 1 #define BI_MASK (0x1f << 16) - { 5, 16, 0, 0, PPC_OPERAND_CR }, + { 5, 16, NULL, NULL, PPC_OPERAND_CR }, /* The BO field in a B form instruction. Certain values are illegal. */ @@ -197,36 +198,36 @@ const struct powerpc_operand powerpc_operands[] = /* The BT field in an X or XL form instruction. */ #define BT BOE + 1 - { 5, 21, 0, 0, PPC_OPERAND_CR }, + { 5, 21, NULL, NULL, PPC_OPERAND_CR }, /* The condition register number portion of the BI field in a B form or XL form instruction. This is used for the extended conditional branch mnemonics, which set the lower two bits of the BI field. This field is optional. */ #define CR BT + 1 - { 3, 18, 0, 0, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, + { 3, 18, NULL, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, /* The CRB field in an X form instruction. */ #define CRB CR + 1 - { 5, 6, 0, 0, 0 }, + { 5, 6, NULL, NULL, 0 }, /* The CRFD field in an X form instruction. */ #define CRFD CRB + 1 - { 3, 23, 0, 0, PPC_OPERAND_CR }, + { 3, 23, NULL, NULL, PPC_OPERAND_CR }, /* The CRFS field in an X form instruction. */ #define CRFS CRFD + 1 - { 3, 0, 0, 0, PPC_OPERAND_CR }, + { 3, 0, NULL, NULL, PPC_OPERAND_CR }, /* The CT field in an X form instruction. */ #define CT CRFS + 1 - { 5, 21, 0, 0, PPC_OPERAND_OPTIONAL }, + { 5, 21, NULL, NULL, PPC_OPERAND_OPTIONAL }, /* The D field in a D form instruction. This is a displacement off a register, and implies that the next operand is a register in parentheses. */ #define D CT + 1 - { 16, 0, 0, 0, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED }, + { 16, 0, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED }, /* The DE field in a DE form instruction. This is like D, but is 12 bits only. */ @@ -252,40 +253,40 @@ const struct powerpc_operand powerpc_operands[] = /* The E field in a wrteei instruction. */ #define E DS + 1 - { 1, 15, 0, 0, 0 }, + { 1, 15, NULL, NULL, 0 }, /* The FL1 field in a POWER SC form instruction. */ #define FL1 E + 1 - { 4, 12, 0, 0, 0 }, + { 4, 12, NULL, NULL, 0 }, /* The FL2 field in a POWER SC form instruction. */ #define FL2 FL1 + 1 - { 3, 2, 0, 0, 0 }, + { 3, 2, NULL, NULL, 0 }, /* The FLM field in an XFL form instruction. */ #define FLM FL2 + 1 - { 8, 17, 0, 0, 0 }, + { 8, 17, NULL, NULL, 0 }, /* The FRA field in an X or A form instruction. */ #define FRA FLM + 1 #define FRA_MASK (0x1f << 16) - { 5, 16, 0, 0, PPC_OPERAND_FPR }, + { 5, 16, NULL, NULL, PPC_OPERAND_FPR }, /* The FRB field in an X or A form instruction. */ #define FRB FRA + 1 #define FRB_MASK (0x1f << 11) - { 5, 11, 0, 0, PPC_OPERAND_FPR }, + { 5, 11, NULL, NULL, PPC_OPERAND_FPR }, /* The FRC field in an A form instruction. */ #define FRC FRB + 1 #define FRC_MASK (0x1f << 6) - { 5, 6, 0, 0, PPC_OPERAND_FPR }, + { 5, 6, NULL, NULL, PPC_OPERAND_FPR }, /* The FRS field in an X form instruction or the FRT field in a D, X or A form instruction. */ #define FRS FRC + 1 #define FRT FRS - { 5, 21, 0, 0, PPC_OPERAND_FPR }, + { 5, 21, NULL, NULL, PPC_OPERAND_FPR }, /* The FXM field in an XFX instruction. */ #define FXM FRS + 1 @@ -298,11 +299,11 @@ const struct powerpc_operand powerpc_operands[] = /* The L field in a D or X form instruction. */ #define L FXM4 + 1 - { 1, 21, 0, 0, PPC_OPERAND_OPTIONAL }, + { 1, 21, NULL, NULL, PPC_OPERAND_OPTIONAL }, /* The LEV field in a POWER SC form instruction. */ #define LEV L + 1 - { 7, 5, 0, 0, 0 }, + { 7, 5, NULL, NULL, 0 }, /* The LI field in an I form instruction. The lower two bits are forced to zero. */ @@ -316,24 +317,24 @@ const struct powerpc_operand powerpc_operands[] = /* The LS field in an X (sync) form instruction. */ #define LS LIA + 1 - { 2, 21, 0, 0, PPC_OPERAND_OPTIONAL }, + { 2, 21, NULL, NULL, PPC_OPERAND_OPTIONAL }, /* The MB field in an M form instruction. */ #define MB LS + 1 #define MB_MASK (0x1f << 6) - { 5, 6, 0, 0, 0 }, + { 5, 6, NULL, NULL, 0 }, /* The ME field in an M form instruction. */ #define ME MB + 1 #define ME_MASK (0x1f << 1) - { 5, 1, 0, 0, 0 }, + { 5, 1, NULL, NULL, 0 }, /* The MB and ME fields in an M form instruction expressed a single operand which is a bitmask indicating which bits to select. This is a two operand form using PPC_OPERAND_NEXT. See the description in opcode/ppc.h for what this means. */ #define MBE ME + 1 - { 5, 6, 0, 0, PPC_OPERAND_OPTIONAL | PPC_OPERAND_NEXT }, + { 5, 6, NULL, NULL, PPC_OPERAND_OPTIONAL | PPC_OPERAND_NEXT }, { 32, 0, insert_mbe, extract_mbe, 0 }, /* The MB or ME field in an MD or MDS form instruction. The high @@ -345,7 +346,7 @@ const struct powerpc_operand powerpc_operands[] = /* The MO field in an mbar instruction. */ #define MO MB6 + 1 - { 5, 21, 0, 0, 0 }, + { 5, 21, NULL, NULL, 0 }, /* The NB field in an X form instruction. The value 32 is stored as 0. */ @@ -361,34 +362,34 @@ const struct powerpc_operand powerpc_operands[] = /* The RA field in an D, DS, DQ, X, XO, M, or MDS form instruction. */ #define RA NSI + 1 #define RA_MASK (0x1f << 16) - { 5, 16, 0, 0, PPC_OPERAND_GPR }, + { 5, 16, NULL, NULL, PPC_OPERAND_GPR }, /* The RA field in the DQ form lq instruction, which has special value restrictions. */ #define RAQ RA + 1 - { 5, 16, insert_raq, 0, PPC_OPERAND_GPR }, + { 5, 16, insert_raq, NULL, PPC_OPERAND_GPR }, /* The RA field in a D or X form instruction which is an updating load, which means that the RA field may not be zero and may not equal the RT field. */ #define RAL RAQ + 1 - { 5, 16, insert_ral, 0, PPC_OPERAND_GPR }, + { 5, 16, insert_ral, NULL, PPC_OPERAND_GPR }, /* The RA field in an lmw instruction, which has special value restrictions. */ #define RAM RAL + 1 - { 5, 16, insert_ram, 0, PPC_OPERAND_GPR }, + { 5, 16, insert_ram, NULL, PPC_OPERAND_GPR }, /* The RA field in a D or X form instruction which is an updating store or an updating floating point load, which means that the RA field may not be zero. */ #define RAS RAM + 1 - { 5, 16, insert_ras, 0, PPC_OPERAND_GPR }, + { 5, 16, insert_ras, NULL, PPC_OPERAND_GPR }, /* The RB field in an X, XO, M, or MDS form instruction. */ #define RB RAS + 1 #define RB_MASK (0x1f << 11) - { 5, 11, 0, 0, PPC_OPERAND_GPR }, + { 5, 11, NULL, NULL, PPC_OPERAND_GPR }, /* The RB field in an X form instruction when it must be the same as the RS field in the instruction. This is used for extended @@ -402,22 +403,22 @@ const struct powerpc_operand powerpc_operands[] = #define RS RBS + 1 #define RT RS #define RT_MASK (0x1f << 21) - { 5, 21, 0, 0, PPC_OPERAND_GPR }, + { 5, 21, NULL, NULL, PPC_OPERAND_GPR }, /* The RS field of the DS form stq instruction, which has special value restrictions. */ #define RSQ RS + 1 - { 5, 21, insert_rsq, 0, PPC_OPERAND_GPR }, + { 5, 21, insert_rsq, NULL, PPC_OPERAND_GPR }, /* The RT field of the DQ form lq instruction, which has special value restrictions. */ #define RTQ RSQ + 1 - { 5, 21, insert_rtq, 0, PPC_OPERAND_GPR }, + { 5, 21, insert_rtq, NULL, PPC_OPERAND_GPR }, /* The SH field in an X or M form instruction. */ #define SH RTQ + 1 #define SH_MASK (0x1f << 11) - { 5, 11, 0, 0, 0 }, + { 5, 11, NULL, NULL, 0 }, /* The SH field in an MD form instruction. This is split. */ #define SH6 SH + 1 @@ -426,12 +427,12 @@ const struct powerpc_operand powerpc_operands[] = /* The SI field in a D form instruction. */ #define SI SH6 + 1 - { 16, 0, 0, 0, PPC_OPERAND_SIGNED }, + { 16, 0, NULL, NULL, PPC_OPERAND_SIGNED }, /* The SI field in a D form instruction when we accept a wide range of positive values. */ #define SISIGNOPT SI + 1 - { 16, 0, 0, 0, PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT }, + { 16, 0, NULL, NULL, PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT }, /* The SPR field in an XFX form instruction. This is flipped--the lower 5 bits are stored in the upper 5 and vice- versa. */ @@ -443,25 +444,25 @@ const struct powerpc_operand powerpc_operands[] = /* The BAT index number in an XFX form m[ft]ibat[lu] instruction. */ #define SPRBAT SPR + 1 #define SPRBAT_MASK (0x3 << 17) - { 2, 17, 0, 0, 0 }, + { 2, 17, NULL, NULL, 0 }, /* The SPRG register number in an XFX form m[ft]sprg instruction. */ #define SPRG SPRBAT + 1 #define SPRG_MASK (0x3 << 16) - { 2, 16, 0, 0, 0 }, + { 2, 16, NULL, NULL, 0 }, /* The SR field in an X form instruction. */ #define SR SPRG + 1 - { 4, 16, 0, 0, 0 }, + { 4, 16, NULL, NULL, 0 }, /* The STRM field in an X AltiVec form instruction. */ #define STRM SR + 1 #define STRM_MASK (0x3 << 21) - { 2, 21, 0, 0, 0 }, + { 2, 21, NULL, NULL, 0 }, /* The SV field in a POWER SC form instruction. */ #define SV STRM + 1 - { 14, 2, 0, 0, 0 }, + { 14, 2, NULL, NULL, 0 }, /* The TBR field in an XFX form instruction. This is like the SPR field, but it is optional. */ @@ -471,52 +472,52 @@ const struct powerpc_operand powerpc_operands[] = /* The TO field in a D or X form instruction. */ #define TO TBR + 1 #define TO_MASK (0x1f << 21) - { 5, 21, 0, 0, 0 }, + { 5, 21, NULL, NULL, 0 }, /* The U field in an X form instruction. */ #define U TO + 1 - { 4, 12, 0, 0, 0 }, + { 4, 12, NULL, NULL, 0 }, /* The UI field in a D form instruction. */ #define UI U + 1 - { 16, 0, 0, 0, 0 }, + { 16, 0, NULL, NULL, 0 }, /* The VA field in a VA, VX or VXR form instruction. */ #define VA UI + 1 #define VA_MASK (0x1f << 16) - { 5, 16, 0, 0, PPC_OPERAND_VR }, + { 5, 16, NULL, NULL, PPC_OPERAND_VR }, /* The VB field in a VA, VX or VXR form instruction. */ #define VB VA + 1 #define VB_MASK (0x1f << 11) - { 5, 11, 0, 0, PPC_OPERAND_VR }, + { 5, 11, NULL, NULL, PPC_OPERAND_VR }, /* The VC field in a VA form instruction. */ #define VC VB + 1 #define VC_MASK (0x1f << 6) - { 5, 6, 0, 0, PPC_OPERAND_VR }, + { 5, 6, NULL, NULL, PPC_OPERAND_VR }, /* The VD or VS field in a VA, VX, VXR or X form instruction. */ #define VD VC + 1 #define VS VD #define VD_MASK (0x1f << 21) - { 5, 21, 0, 0, PPC_OPERAND_VR }, + { 5, 21, NULL, NULL, PPC_OPERAND_VR }, /* The SIMM field in a VX form instruction. */ #define SIMM VD + 1 - { 5, 16, 0, 0, PPC_OPERAND_SIGNED}, + { 5, 16, NULL, NULL, PPC_OPERAND_SIGNED}, /* The UIMM field in a VX form instruction. */ #define UIMM SIMM + 1 - { 5, 16, 0, 0, 0 }, + { 5, 16, NULL, NULL, 0 }, /* The SHB field in a VA form instruction. */ #define SHB UIMM + 1 - { 4, 6, 0, 0, 0 }, + { 4, 6, NULL, NULL, 0 }, /* The other UIMM field in a EVX form instruction. */ #define EVUIMM SHB + 1 - { 5, 11, 0, 0, 0 }, + { 5, 11, NULL, NULL, 0 }, /* The other UIMM field in a half word EVX form instruction. */ #define EVUIMM_2 EVUIMM + 1 @@ -533,11 +534,11 @@ const struct powerpc_operand powerpc_operands[] = /* The WS field. */ #define WS EVUIMM_8 + 1 #define WS_MASK (0x7 << 11) - { 3, 11, 0, 0, 0 }, + { 3, 11, NULL, NULL, 0 }, /* The L field in an mtmsrd instruction */ #define MTMSRD_L WS + 1 - { 1, 16, 0, 0, PPC_OPERAND_OPTIONAL }, + { 1, 16, NULL, NULL, PPC_OPERAND_OPTIONAL }, }; diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 1358b4201701..07fd0414a4bf 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.11 -# Wed Mar 2 16:57:55 2005 +# Linux kernel version: 2.6.12-rc3 +# Fri Apr 22 15:30:58 2005 # CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y @@ -15,6 +15,7 @@ CONFIG_UID16=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -26,24 +27,25 @@ CONFIG_SYSVIPC=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y # CONFIG_AUDIT is not set -CONFIG_LOG_BUF_SHIFT=17 CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +# CONFIG_CPUSETS is not set # CONFIG_EMBEDDED is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SHMEM=y CONFIG_CC_ALIGN_FUNCTIONS=0 CONFIG_CC_ALIGN_LABELS=0 CONFIG_CC_ALIGN_LOOPS=0 CONFIG_CC_ALIGN_JUMPS=0 # CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 # # Loadable module support @@ -261,7 +263,6 @@ CONFIG_NET=y # CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y CONFIG_NET_KEY=y CONFIG_INET=y @@ -329,6 +330,7 @@ CONFIG_NET_SCH_DSMARK=m CONFIG_NET_QOS=y CONFIG_NET_ESTIMATOR=y CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_ROUTE=y @@ -338,6 +340,7 @@ CONFIG_NET_CLS_U32=m # CONFIG_NET_CLS_IND is not set CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m +# CONFIG_NET_EMATCH is not set # CONFIG_NET_CLS_ACT is not set CONFIG_NET_CLS_POLICE=y @@ -393,6 +396,8 @@ CONFIG_CTC=m CONFIG_IUCV=m # CONFIG_NETIUCV is not set # CONFIG_SMSGIUCV is not set +# CONFIG_CLAW is not set +# CONFIG_MPC is not set CONFIG_QETH=y # @@ -532,10 +537,13 @@ CONFIG_MSDOS_PARTITION=y # # Kernel hacking # +# CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=17 # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set @@ -560,6 +568,7 @@ CONFIG_CRYPTO=y # CONFIG_CRYPTO_SHA256 is not set # CONFIG_CRYPTO_SHA512 is not set # CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set # CONFIG_CRYPTO_DES is not set # CONFIG_CRYPTO_DES_Z990 is not set # CONFIG_CRYPTO_BLOWFISH is not set diff --git a/arch/s390/kernel/compat_ioctl.c b/arch/s390/kernel/compat_ioctl.c index 96571ff7115d..03d03c6d3cbb 100644 --- a/arch/s390/kernel/compat_ioctl.c +++ b/arch/s390/kernel/compat_ioctl.c @@ -16,6 +16,7 @@ #define CODE #include "../../../fs/compat_ioctl.c" #include <asm/dasd.h> +#include <asm/cmb.h> #include <asm/tape390.h> static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, @@ -58,7 +59,11 @@ COMPATIBLE_IOCTL(BIODASDPRRD) COMPATIBLE_IOCTL(BIODASDPSRD) COMPATIBLE_IOCTL(BIODASDGATTR) COMPATIBLE_IOCTL(BIODASDSATTR) - +#if defined(CONFIG_DASD_CMB) || defined(CONFIG_DASD_CMB_MODULE) +COMPATIBLE_IOCTL(BIODASDCMFENABLE) +COMPATIBLE_IOCTL(BIODASDCMFDISABLE) +COMPATIBLE_IOCTL(BIODASDREADALLCMB) +#endif #endif #if defined(CONFIG_S390_TAPE) || defined(CONFIG_S390_TAPE_MODULE) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 647233c02fc8..26889366929a 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -32,6 +32,7 @@ #include <linux/user.h> #include <linux/security.h> #include <linux/audit.h> +#include <linux/signal.h> #include <asm/segment.h> #include <asm/page.h> @@ -609,7 +610,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data) /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ - if ((unsigned long) data >= _NSIG) + if (!valid_signal(data)) return -EIO; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -637,7 +638,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data) case PTRACE_SINGLESTEP: /* set the trap flag. */ - if ((unsigned long) data >= _NSIG) + if (!valid_signal(data)) return -EIO; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; @@ -711,18 +712,13 @@ out: asmlinkage void syscall_trace(struct pt_regs *regs, int entryexit) { - if (unlikely(current->audit_context)) { - if (!entryexit) - audit_syscall_entry(current, regs->gprs[2], - regs->orig_gpr2, regs->gprs[3], - regs->gprs[4], regs->gprs[5]); - else - audit_syscall_exit(current, regs->gprs[2]); - } + if (unlikely(current->audit_context) && entryexit) + audit_syscall_exit(current, AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); + if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + goto out; if (!(current->ptrace & PT_PTRACED)) - return; + goto out; ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); @@ -735,4 +731,10 @@ syscall_trace(struct pt_regs *regs, int entryexit) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + out: + if (unlikely(current->audit_context) && !entryexit) + audit_syscall_entry(current, + test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X, + regs->gprs[2], regs->orig_gpr2, regs->gprs[3], + regs->gprs[4], regs->gprs[5]); } diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 11fd6d556d8f..bee654abb6d3 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -34,7 +34,6 @@ EXPORT_SYMBOL(__clear_user_asm); EXPORT_SYMBOL(__strncpy_from_user_asm); EXPORT_SYMBOL(__strnlen_user_asm); EXPORT_SYMBOL(diag10); -EXPORT_SYMBOL(default_storage_key); /* * semaphore ops diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c879c40aa7a5..df83215beac3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -44,6 +44,8 @@ #include <asm/cpcmd.h> #include <asm/lowcore.h> #include <asm/irq.h> +#include <asm/page.h> +#include <asm/ptrace.h> /* * Machine setup.. @@ -53,13 +55,14 @@ unsigned int console_devno = -1; unsigned int console_irq = -1; unsigned long memory_size = 0; unsigned long machine_flags = 0; -unsigned int default_storage_key = 0; struct { unsigned long addr, size, type; } memory_chunk[MEMORY_CHUNKS] = { { 0 } }; #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ +unsigned long __initdata zholes_size[MAX_NR_ZONES]; +static unsigned long __initdata memory_end; /* * Setup options @@ -78,11 +81,15 @@ static char command_line[COMMAND_LINE_SIZE] = { 0, }; static struct resource code_resource = { .name = "Kernel code", + .start = (unsigned long) &_text, + .end = (unsigned long) &_etext - 1, .flags = IORESOURCE_BUSY | IORESOURCE_MEM, }; static struct resource data_resource = { .name = "Kernel data", + .start = (unsigned long) &_etext, + .end = (unsigned long) &_edata - 1, .flags = IORESOURCE_BUSY | IORESOURCE_MEM, }; @@ -310,90 +317,50 @@ void machine_power_off(void) EXPORT_SYMBOL(machine_power_off); -/* - * Setup function called from init/main.c just after the banner - * was printed. - */ -extern char _pstart, _pend, _stext; +static void __init +add_memory_hole(unsigned long start, unsigned long end) +{ + unsigned long dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; + + if (end <= dma_pfn) + zholes_size[ZONE_DMA] += end - start + 1; + else if (start > dma_pfn) + zholes_size[ZONE_NORMAL] += end - start + 1; + else { + zholes_size[ZONE_DMA] += dma_pfn - start + 1; + zholes_size[ZONE_NORMAL] += end - dma_pfn; + } +} -void __init setup_arch(char **cmdline_p) +static void __init +parse_cmdline_early(char **cmdline_p) { - unsigned long bootmap_size; - unsigned long memory_start, memory_end; - char c = ' ', cn, *to = command_line, *from = COMMAND_LINE; - unsigned long start_pfn, end_pfn; - static unsigned int smptrap=0; - unsigned long delay = 0; - struct _lowcore *lc; - int i; + char c = ' ', cn, *to = command_line, *from = COMMAND_LINE; + unsigned long delay = 0; - if (smptrap) - return; - smptrap=1; + /* Save unparsed command line copy for /proc/cmdline */ + memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; - /* - * print what head.S has found out about the machine - */ -#ifndef CONFIG_ARCH_S390X - printk((MACHINE_IS_VM) ? - "We are running under VM (31 bit mode)\n" : - "We are running native (31 bit mode)\n"); - printk((MACHINE_HAS_IEEE) ? - "This machine has an IEEE fpu\n" : - "This machine has no IEEE fpu\n"); -#else /* CONFIG_ARCH_S390X */ - printk((MACHINE_IS_VM) ? - "We are running under VM (64 bit mode)\n" : - "We are running native (64 bit mode)\n"); -#endif /* CONFIG_ARCH_S390X */ - - ROOT_DEV = Root_RAM0; - memory_start = (unsigned long) &_end; /* fixit if use $CODELO etc*/ -#ifndef CONFIG_ARCH_S390X - memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */ - /* - * We need some free virtual space to be able to do vmalloc. - * On a machine with 2GB memory we make sure that we have at - * least 128 MB free space for vmalloc. - */ - if (memory_end > 1920*1024*1024) - memory_end = 1920*1024*1024; -#else /* CONFIG_ARCH_S390X */ - memory_end = memory_size & ~0x200000UL; /* detected in head.s */ -#endif /* CONFIG_ARCH_S390X */ - init_mm.start_code = PAGE_OFFSET; - init_mm.end_code = (unsigned long) &_etext; - init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; - - code_resource.start = (unsigned long) &_text; - code_resource.end = (unsigned long) &_etext - 1; - data_resource.start = (unsigned long) &_etext; - data_resource.end = (unsigned long) &_edata - 1; - - /* Save unparsed command line copy for /proc/cmdline */ - memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE); - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; - - for (;;) { - /* - * "mem=XXX[kKmM]" sets memsize - */ - if (c == ' ' && strncmp(from, "mem=", 4) == 0) { - memory_end = simple_strtoul(from+4, &from, 0); - if ( *from == 'K' || *from == 'k' ) { - memory_end = memory_end << 10; - from++; - } else if ( *from == 'M' || *from == 'm' ) { - memory_end = memory_end << 20; - from++; - } - } - /* - * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes - */ - if (c == ' ' && strncmp(from, "ipldelay=", 9) == 0) { - delay = simple_strtoul(from+9, &from, 0); + for (;;) { + /* + * "mem=XXX[kKmM]" sets memsize + */ + if (c == ' ' && strncmp(from, "mem=", 4) == 0) { + memory_end = simple_strtoul(from+4, &from, 0); + if ( *from == 'K' || *from == 'k' ) { + memory_end = memory_end << 10; + from++; + } else if ( *from == 'M' || *from == 'm' ) { + memory_end = memory_end << 20; + from++; + } + } + /* + * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes + */ + if (c == ' ' && strncmp(from, "ipldelay=", 9) == 0) { + delay = simple_strtoul(from+9, &from, 0); if (*from == 's' || *from == 'S') { delay = delay*1000000; from++; @@ -403,24 +370,110 @@ void __init setup_arch(char **cmdline_p) } /* now wait for the requested amount of time */ udelay(delay); - } - cn = *(from++); - if (!cn) - break; - if (cn == '\n') - cn = ' '; /* replace newlines with space */ + } + cn = *(from++); + if (!cn) + break; + if (cn == '\n') + cn = ' '; /* replace newlines with space */ if (cn == 0x0d) cn = ' '; /* replace 0x0d with space */ - if (cn == ' ' && c == ' ') - continue; /* remove additional spaces */ - c = cn; - if (to - command_line >= COMMAND_LINE_SIZE) - break; - *(to++) = c; - } - if (c == ' ' && to > command_line) to--; - *to = '\0'; - *cmdline_p = command_line; + if (cn == ' ' && c == ' ') + continue; /* remove additional spaces */ + c = cn; + if (to - command_line >= COMMAND_LINE_SIZE) + break; + *(to++) = c; + } + if (c == ' ' && to > command_line) to--; + *to = '\0'; + *cmdline_p = command_line; +} + +static void __init +setup_lowcore(void) +{ + struct _lowcore *lc; + int lc_pages; + + /* + * Setup lowcore for boot cpu + */ + lc_pages = sizeof(void *) == 8 ? 2 : 1; + lc = (struct _lowcore *) + __alloc_bootmem(lc_pages * PAGE_SIZE, lc_pages * PAGE_SIZE, 0); + memset(lc, 0, lc_pages * PAGE_SIZE); + lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.addr = + PSW_ADDR_AMODE | (unsigned long) restart_int_handler; + lc->external_new_psw.mask = PSW_KERNEL_BITS; + lc->external_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) ext_int_handler; + lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; + lc->program_new_psw.mask = PSW_KERNEL_BITS; + lc->program_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; + lc->mcck_new_psw.mask = PSW_KERNEL_BITS; + lc->mcck_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; + lc->io_new_psw.mask = PSW_KERNEL_BITS; + lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; + lc->ipl_device = S390_lowcore.ipl_device; + lc->jiffy_timer = -1LL; + lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; + lc->async_stack = (unsigned long) + __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; +#ifdef CONFIG_CHECK_STACK + lc->panic_stack = (unsigned long) + __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; +#endif + lc->current_task = (unsigned long) init_thread_union.thread_info.task; + lc->thread_info = (unsigned long) &init_thread_union; +#ifdef CONFIG_ARCH_S390X + if (MACHINE_HAS_DIAG44) + lc->diag44_opcode = 0x83000044; + else + lc->diag44_opcode = 0x07000700; +#endif /* CONFIG_ARCH_S390X */ + set_prefix((u32)(unsigned long) lc); +} + +static void __init +setup_resources(void) +{ + struct resource *res; + int i; + + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + res = alloc_bootmem_low(sizeof(struct resource)); + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + switch (memory_chunk[i].type) { + case CHUNK_READ_WRITE: + res->name = "System RAM"; + break; + case CHUNK_READ_ONLY: + res->name = "System ROM"; + res->flags |= IORESOURCE_READONLY; + break; + default: + res->name = "reserved"; + } + res->start = memory_chunk[i].addr; + res->end = memory_chunk[i].addr + memory_chunk[i].size - 1; + request_resource(&iomem_resource, res); + request_resource(res, &code_resource); + request_resource(res, &data_resource); + } +} + +static void __init +setup_memory(void) +{ + unsigned long bootmap_size; + unsigned long start_pfn, end_pfn, init_pfn; + unsigned long last_rw_end; + int i; /* * partially used pages are not usable - thus @@ -429,6 +482,10 @@ void __init setup_arch(char **cmdline_p) start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = max_pfn = memory_end >> PAGE_SHIFT; + /* Initialize storage key for kernel pages */ + for (init_pfn = 0 ; init_pfn < start_pfn; init_pfn++) + page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); + /* * Initialize the boot-time allocator (with low memory only): */ @@ -437,7 +494,9 @@ void __init setup_arch(char **cmdline_p) /* * Register RAM areas with the bootmem allocator. */ - for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) { + last_rw_end = start_pfn; + + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { unsigned long start_chunk, end_chunk; if (memory_chunk[i].type != CHUNK_READ_WRITE) @@ -450,102 +509,98 @@ void __init setup_arch(char **cmdline_p) start_chunk = start_pfn; if (end_chunk > end_pfn) end_chunk = end_pfn; - if (start_chunk < end_chunk) + if (start_chunk < end_chunk) { + /* Initialize storage key for RAM pages */ + for (init_pfn = start_chunk ; init_pfn < end_chunk; + init_pfn++) + page_set_storage_key(init_pfn << PAGE_SHIFT, + PAGE_DEFAULT_KEY); free_bootmem(start_chunk << PAGE_SHIFT, (end_chunk - start_chunk) << PAGE_SHIFT); + if (last_rw_end < start_chunk) + add_memory_hole(last_rw_end, start_chunk - 1); + last_rw_end = end_chunk; + } } - /* - * Reserve the bootmem bitmap itself as well. We do this in two - * steps (first step was init_bootmem()) because this catches - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. - */ - reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size); + psw_set_key(PAGE_DEFAULT_KEY); + + if (last_rw_end < end_pfn - 1) + add_memory_hole(last_rw_end, end_pfn - 1); + + /* + * Reserve the bootmem bitmap itself as well. We do this in two + * steps (first step was init_bootmem()) because this catches + * the (very unlikely) case of us accidentally initializing the + * bootmem allocator with an invalid RAM area. + */ + reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size); #ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START) { + if (INITRD_START) { if (INITRD_START + INITRD_SIZE <= memory_end) { reserve_bootmem(INITRD_START, INITRD_SIZE); initrd_start = INITRD_START; initrd_end = initrd_start + INITRD_SIZE; } else { - printk("initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - initrd_start + INITRD_SIZE, memory_end); - initrd_start = initrd_end = 0; + printk("initrd extends beyond end of memory " + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + initrd_start + INITRD_SIZE, memory_end); + initrd_start = initrd_end = 0; } - } + } #endif +} - for (i = 0; i < 16 && memory_chunk[i].size > 0; i++) { - struct resource *res; - - res = alloc_bootmem_low(sizeof(struct resource)); - res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; - - switch (memory_chunk[i].type) { - case CHUNK_READ_WRITE: - res->name = "System RAM"; - break; - case CHUNK_READ_ONLY: - res->name = "System ROM"; - res->flags |= IORESOURCE_READONLY; - break; - default: - res->name = "reserved"; - } - res->start = memory_chunk[i].addr; - res->end = memory_chunk[i].addr + memory_chunk[i].size - 1; - request_resource(&iomem_resource, res); - request_resource(res, &code_resource); - request_resource(res, &data_resource); - } +/* + * Setup function called from init/main.c just after the banner + * was printed. + */ +void __init +setup_arch(char **cmdline_p) +{ /* - * Setup lowcore for boot cpu + * print what head.S has found out about the machine */ #ifndef CONFIG_ARCH_S390X - lc = (struct _lowcore *) __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0); - memset(lc, 0, PAGE_SIZE); + printk((MACHINE_IS_VM) ? + "We are running under VM (31 bit mode)\n" : + "We are running native (31 bit mode)\n"); + printk((MACHINE_HAS_IEEE) ? + "This machine has an IEEE fpu\n" : + "This machine has no IEEE fpu\n"); #else /* CONFIG_ARCH_S390X */ - lc = (struct _lowcore *) __alloc_bootmem(2*PAGE_SIZE, 2*PAGE_SIZE, 0); - memset(lc, 0, 2*PAGE_SIZE); + printk((MACHINE_IS_VM) ? + "We are running under VM (64 bit mode)\n" : + "We are running native (64 bit mode)\n"); #endif /* CONFIG_ARCH_S390X */ - lc->restart_psw.mask = PSW_BASE_BITS; - lc->restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - lc->external_new_psw.mask = PSW_KERNEL_BITS; - lc->external_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT; - lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = PSW_KERNEL_BITS; - lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = PSW_KERNEL_BITS; - lc->mcck_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = PSW_KERNEL_BITS; - lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; - lc->ipl_device = S390_lowcore.ipl_device; - lc->jiffy_timer = -1LL; - lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; - lc->async_stack = (unsigned long) - __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; -#ifdef CONFIG_CHECK_STACK - lc->panic_stack = (unsigned long) - __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; -#endif - lc->current_task = (unsigned long) init_thread_union.thread_info.task; - lc->thread_info = (unsigned long) &init_thread_union; -#ifdef CONFIG_ARCH_S390X - if (MACHINE_HAS_DIAG44) - lc->diag44_opcode = 0x83000044; - else - lc->diag44_opcode = 0x07000700; + + ROOT_DEV = Root_RAM0; +#ifndef CONFIG_ARCH_S390X + memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */ + /* + * We need some free virtual space to be able to do vmalloc. + * On a machine with 2GB memory we make sure that we have at + * least 128 MB free space for vmalloc. + */ + if (memory_end > 1920*1024*1024) + memory_end = 1920*1024*1024; +#else /* CONFIG_ARCH_S390X */ + memory_end = memory_size & ~0x200000UL; /* detected in head.s */ #endif /* CONFIG_ARCH_S390X */ - set_prefix((u32)(unsigned long) lc); + + init_mm.start_code = PAGE_OFFSET; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + parse_cmdline_early(cmdline_p); + + setup_memory(); + setup_resources(); + setup_lowcore(); + cpu_init(); __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 061e81138dc2..8ca485676780 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -244,7 +244,7 @@ int sysctl_hz_timer = 1; */ static inline void stop_hz_timer(void) { - __u64 timer; + __u64 timer, todval; if (sysctl_hz_timer != 0) return; @@ -265,8 +265,14 @@ static inline void stop_hz_timer(void) * for the next event. */ timer = (__u64) (next_timer_interrupt() - jiffies) + jiffies_64; - timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY; - asm volatile ("SCKC %0" : : "m" (timer)); + todval = -1ULL; + /* Be careful about overflows. */ + if (timer < (-1ULL / CLK_TICKS_PER_JIFFY)) { + timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY; + if (timer >= jiffies_timer_cc) + todval = timer; + } + asm volatile ("SCKC %0" : : "m" (todval)); } /* diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index bb6cf02418a2..fa0726507b3d 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -122,12 +122,17 @@ static void start_cpu_timer(void) struct vtimer_queue *vt_list; vt_list = &per_cpu(virt_cpu_timer, smp_processor_id()); - set_vtimer(vt_list->idle); + + /* CPU timer interrupt is pending, don't reprogramm it */ + if (vt_list->idle & 1LL<<63) + return; + + if (!list_empty(&vt_list->list)) + set_vtimer(vt_list->idle); } static void stop_cpu_timer(void) { - __u64 done; struct vtimer_queue *vt_list; vt_list = &per_cpu(virt_cpu_timer, smp_processor_id()); @@ -138,21 +143,17 @@ static void stop_cpu_timer(void) goto fire; } - /* store progress */ - asm volatile ("STPT %0" : "=m" (done)); + /* store the actual expire value */ + asm volatile ("STPT %0" : "=m" (vt_list->idle)); /* - * If done is negative we do not stop the CPU timer - * because we will get instantly an interrupt that - * will start the CPU timer again. + * If the CPU timer is negative we don't reprogramm + * it because we will get instantly an interrupt. */ - if (done & 1LL<<63) + if (vt_list->idle & 1LL<<63) return; - else - vt_list->offset += vt_list->to_expire - done; - /* save the actual expire value */ - vt_list->idle = done; + vt_list->offset += vt_list->to_expire - vt_list->idle; /* * We cannot halt the CPU timer, we just write a value that diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index d30cdb4248a9..f5a5bc09b8fa 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -20,6 +20,11 @@ #include <asm/pgalloc.h> #include <asm/uaccess.h> +static char *sender = "VMRMSVM"; +module_param(sender, charp, 0); +MODULE_PARM_DESC(sender, + "Guest name that may send SMSG messages (default VMRMSVM)"); + #include "../../../drivers/s390/net/smsgiucv.h" #define CMM_NR_PAGES ((PAGE_SIZE / sizeof(unsigned long)) - 2) @@ -367,10 +372,12 @@ static struct ctl_table cmm_dir_table[] = { #ifdef CONFIG_CMM_IUCV #define SMSG_PREFIX "CMM" static void -cmm_smsg_target(char *msg) +cmm_smsg_target(char *from, char *msg) { long pages, seconds; + if (strlen(sender) > 0 && strcmp(from, sender) != 0) + return; if (!cmm_skip_blanks(msg + strlen(SMSG_PREFIX), &msg)) return; if (strncmp(msg, "SHRINK", 6) == 0) { diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8e723bc7f795..6ec5cd981e74 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -101,6 +101,7 @@ extern unsigned long _end; extern unsigned long __init_begin; extern unsigned long __init_end; +extern unsigned long __initdata zholes_size[]; /* * paging_init() sets up the page tables */ @@ -163,10 +164,13 @@ void __init paging_init(void) local_flush_tlb(); { - unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES]; + memset(zones_size, 0, sizeof(zones_size)); zones_size[ZONE_DMA] = max_low_pfn; - free_area_init(zones_size); + free_area_init_node(0, &contig_page_data, zones_size, + __pa(PAGE_OFFSET) >> PAGE_SHIFT, + zholes_size); } return; } @@ -184,9 +188,10 @@ void __init paging_init(void) _KERN_REGION_TABLE; static const int ssm_mask = 0x04000000L; - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES]; unsigned long dma_pfn, high_pfn; + memset(zones_size, 0, sizeof(zones_size)); dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; high_pfn = max_low_pfn; @@ -198,8 +203,8 @@ void __init paging_init(void) } /* Initialize mem_map[]. */ - free_area_init(zones_size); - + free_area_init_node(0, &contig_page_data, zones_size, + __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); /* * map whole physical memory to virtual memory (identity mapping) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 722ea1d63c94..3468d5127223 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -693,6 +693,10 @@ config RTC_9701JE endmenu +config ISA_DMA_API + bool + depends on MPC1211 + default y menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" diff --git a/arch/sh/kernel/ptrace.c b/arch/sh/kernel/ptrace.c index 1b0dfb4d8ea4..b28919b65682 100644 --- a/arch/sh/kernel/ptrace.c +++ b/arch/sh/kernel/ptrace.c @@ -20,6 +20,7 @@ #include <linux/user.h> #include <linux/slab.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/io.h> #include <asm/uaccess.h> @@ -197,7 +198,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -228,7 +229,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) struct pt_regs *dummy = NULL; ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); if ((child->ptrace & PT_DTRACE) == 0) { diff --git a/arch/sh64/kernel/ptrace.c b/arch/sh64/kernel/ptrace.c index 800288c1562b..fd2000956dae 100644 --- a/arch/sh64/kernel/ptrace.c +++ b/arch/sh64/kernel/ptrace.c @@ -27,6 +27,7 @@ #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/user.h> +#include <linux/signal.h> #include <asm/io.h> #include <asm/uaccess.h> @@ -255,7 +256,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); @@ -285,7 +286,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) struct pt_regs *regs; ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); if ((child->ptrace & PT_DTRACE) == 0) { diff --git a/arch/sh64/kernel/sys_sh64.c b/arch/sh64/kernel/sys_sh64.c index 4546845b9caf..58ff7d522d81 100644 --- a/arch/sh64/kernel/sys_sh64.c +++ b/arch/sh64/kernel/sys_sh64.c @@ -283,18 +283,3 @@ asmlinkage int sys_uname(struct old_utsname * name) up_read(&uts_sem); return err?-EFAULT:0; } - -/* Copy from mips version */ -asmlinkage long sys_shmatcall(int shmid, char __user *shmaddr, - int shmflg) -{ - unsigned long raddr; - int err; - - err = do_shmat(shmid, shmaddr, shmflg, &raddr); - if (err) - return err; - - err = raddr; - return err; -} diff --git a/arch/sh64/kernel/syscalls.S b/arch/sh64/kernel/syscalls.S index 8ed417df3dc6..6aabc63e4518 100644 --- a/arch/sh64/kernel/syscalls.S +++ b/arch/sh64/kernel/syscalls.S @@ -268,7 +268,7 @@ sys_call_table: .long sys_msgrcv .long sys_msgget .long sys_msgctl - .long sys_shmatcall + .long sys_shmat .long sys_shmdt /* 245 */ .long sys_shmget .long sys_shmctl diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index 143fe2f3c1c4..2c216ffeea90 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -83,9 +83,6 @@ void default_idle(void) */ void cpu_idle(void) { - if (current->pid != 0) - goto out; - /* endless idle loop with no priority at all */ for (;;) { if (ARCH_SUN4C_SUN4) { @@ -126,8 +123,6 @@ void cpu_idle(void) schedule(); check_pgt_cache(); } -out: - return; } #else @@ -333,6 +328,17 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) printk("\n"); } +void dump_stack(void) +{ + unsigned long *ksp; + + __asm__ __volatile__("mov %%fp, %0" + : "=r" (ksp)); + show_stack(current, ksp); +} + +EXPORT_SYMBOL(dump_stack); + /* * Note: sparc64 has a pretty intricated thread_saved_pc, check it out. */ diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c index c4f93bd2daf2..475c4c13462c 100644 --- a/arch/sparc/kernel/ptrace.c +++ b/arch/sparc/kernel/ptrace.c @@ -18,6 +18,7 @@ #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -526,7 +527,7 @@ asmlinkage void do_ptrace(struct pt_regs *regs) addr = 1; case PTRACE_CONT: { /* restart after signal. */ - if (data > _NSIG) { + if (!valid_signal(data)) { pt_error_return(regs, EIO); goto out_tsk; } diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c index f91b0e8d0dc8..1bd430d0ca06 100644 --- a/arch/sparc/kernel/sparc_ksyms.c +++ b/arch/sparc/kernel/sparc_ksyms.c @@ -20,6 +20,7 @@ #include <linux/in6.h> #include <linux/spinlock.h> #include <linux/mm.h> +#include <linux/syscalls.h> #ifdef CONFIG_PCI #include <linux/pci.h> #endif @@ -89,6 +90,9 @@ extern void ___atomic24_sub(void); extern void ___set_bit(void); extern void ___clear_bit(void); extern void ___change_bit(void); +extern void ___rw_read_enter(void); +extern void ___rw_read_exit(void); +extern void ___rw_write_enter(void); /* Alias functions whose names begin with "." and export the aliases. * The module references will be fixed up by module_frob_arch_sections. @@ -121,9 +125,9 @@ EXPORT_SYMBOL(_do_write_unlock); #endif #else // XXX find what uses (or used) these. -// EXPORT_SYMBOL_PRIVATE(_rw_read_enter); -// EXPORT_SYMBOL_PRIVATE(_rw_read_exit); -// EXPORT_SYMBOL_PRIVATE(_rw_write_enter); +EXPORT_SYMBOL(___rw_read_enter); +EXPORT_SYMBOL(___rw_read_exit); +EXPORT_SYMBOL(___rw_write_enter); #endif /* semaphores */ EXPORT_SYMBOL(__up); @@ -144,6 +148,9 @@ EXPORT_SYMBOL(___set_bit); EXPORT_SYMBOL(___clear_bit); EXPORT_SYMBOL(___change_bit); +/* Per-CPU information table */ +EXPORT_PER_CPU_SYMBOL(__cpu_data); + #ifdef CONFIG_SMP /* IRQ implementation. */ EXPORT_SYMBOL(synchronize_irq); @@ -151,6 +158,10 @@ EXPORT_SYMBOL(synchronize_irq); /* Misc SMP information */ EXPORT_SYMBOL(__cpu_number_map); EXPORT_SYMBOL(__cpu_logical_map); + +/* CPU online map and active count. */ +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(phys_cpu_present_map); #endif EXPORT_SYMBOL(__udelay); @@ -332,3 +343,6 @@ EXPORT_SYMBOL(do_BUG); /* Sun Power Management Idle Handler */ EXPORT_SYMBOL(pm_idle); + +/* Binfmt_misc needs this */ +EXPORT_SYMBOL(sys_close); diff --git a/arch/sparc/prom/memory.c b/arch/sparc/prom/memory.c index 46aa51afec14..c20e5309f8aa 100644 --- a/arch/sparc/prom/memory.c +++ b/arch/sparc/prom/memory.c @@ -47,9 +47,9 @@ prom_sortmemlist(struct linux_mlist_v0 *thislist) char *tmpaddr; char *lowest; - for(i=0; thislist[i].theres_more != 0; i++) { + for(i=0; thislist[i].theres_more; i++) { lowest = thislist[i].start_adr; - for(mitr = i+1; thislist[mitr-1].theres_more != 0; mitr++) + for(mitr = i+1; thislist[mitr-1].theres_more; mitr++) if(thislist[mitr].start_adr < lowest) { lowest = thislist[mitr].start_adr; swapi = mitr; @@ -85,7 +85,7 @@ void __init prom_meminit(void) prom_phys_total[iter].num_bytes = mptr->num_bytes; prom_phys_total[iter].theres_more = &prom_phys_total[iter+1]; } - prom_phys_total[iter-1].theres_more = 0x0; + prom_phys_total[iter-1].theres_more = NULL; /* Second, the total prom taken descriptors. */ for(mptr = (*(romvec->pv_v0mem.v0_prommap)), iter=0; mptr; mptr=mptr->theres_more, iter++) { @@ -93,7 +93,7 @@ void __init prom_meminit(void) prom_prom_taken[iter].num_bytes = mptr->num_bytes; prom_prom_taken[iter].theres_more = &prom_prom_taken[iter+1]; } - prom_prom_taken[iter-1].theres_more = 0x0; + prom_prom_taken[iter-1].theres_more = NULL; /* Last, the available physical descriptors. */ for(mptr = (*(romvec->pv_v0mem.v0_available)), iter=0; mptr; mptr=mptr->theres_more, iter++) { @@ -101,7 +101,7 @@ void __init prom_meminit(void) prom_phys_avail[iter].num_bytes = mptr->num_bytes; prom_phys_avail[iter].theres_more = &prom_phys_avail[iter+1]; } - prom_phys_avail[iter-1].theres_more = 0x0; + prom_phys_avail[iter-1].theres_more = NULL; /* Sort all the lists. */ prom_sortmemlist(prom_phys_total); prom_sortmemlist(prom_prom_taken); @@ -124,7 +124,7 @@ void __init prom_meminit(void) prom_phys_avail[iter].theres_more = &prom_phys_avail[iter+1]; } - prom_phys_avail[iter-1].theres_more = 0x0; + prom_phys_avail[iter-1].theres_more = NULL; num_regs = prom_getproperty(node, "reg", (char *) prom_reg_memlist, @@ -138,7 +138,7 @@ void __init prom_meminit(void) prom_phys_total[iter].theres_more = &prom_phys_total[iter+1]; } - prom_phys_total[iter-1].theres_more = 0x0; + prom_phys_total[iter-1].theres_more = NULL; node = prom_getchild(prom_root_node); node = prom_searchsiblings(node, "virtual-memory"); @@ -158,7 +158,7 @@ void __init prom_meminit(void) prom_prom_taken[iter].theres_more = &prom_prom_taken[iter+1]; } - prom_prom_taken[iter-1].theres_more = 0x0; + prom_prom_taken[iter-1].theres_more = NULL; prom_sortmemlist(prom_prom_taken); @@ -182,15 +182,15 @@ void __init prom_meminit(void) case PROM_SUN4: #ifdef CONFIG_SUN4 /* how simple :) */ - prom_phys_total[0].start_adr = 0x0; + prom_phys_total[0].start_adr = NULL; prom_phys_total[0].num_bytes = *(sun4_romvec->memorysize); - prom_phys_total[0].theres_more = 0x0; - prom_prom_taken[0].start_adr = 0x0; + prom_phys_total[0].theres_more = NULL; + prom_prom_taken[0].start_adr = NULL; prom_prom_taken[0].num_bytes = 0x0; - prom_prom_taken[0].theres_more = 0x0; - prom_phys_avail[0].start_adr = 0x0; + prom_prom_taken[0].theres_more = NULL; + prom_phys_avail[0].start_adr = NULL; prom_phys_avail[0].num_bytes = *(sun4_romvec->memoryavail); - prom_phys_avail[0].theres_more = 0x0; + prom_phys_avail[0].theres_more = NULL; #endif break; diff --git a/arch/sparc/prom/sun4prom.c b/arch/sparc/prom/sun4prom.c index 69ca735f0d4e..00390a2652aa 100644 --- a/arch/sparc/prom/sun4prom.c +++ b/arch/sparc/prom/sun4prom.c @@ -151,7 +151,7 @@ struct linux_romvec * __init sun4_prom_init(void) * have more time, we can teach the penguin to say "By your * command" or "Activating turbo boost, Michael". :-) */ - sun4_romvec->setLEDs(0x0); + sun4_romvec->setLEDs(NULL); printk("PROMLIB: Old Sun4 boot PROM monitor %s, romvec version %d\n", sun4_romvec->monid, diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index fb1189641c74..a72fd15d5ea8 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -118,6 +118,7 @@ config VT_CONSOLE config HW_CONSOLE bool + depends on VT default y config SMP diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index a38cb5036df0..4dcb8af94090 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -756,7 +756,7 @@ void handler_irq(int irq, struct pt_regs *regs) clear_softint(clr_mask); } #else - int should_forward = 1; + int should_forward = 0; clear_softint(1 << irq); #endif @@ -1007,10 +1007,10 @@ static int retarget_one_irq(struct irqaction *p, int goal_cpu) } upa_writel(tid | IMAP_VALID, imap); - while (!cpu_online(goal_cpu)) { + do { if (++goal_cpu >= NR_CPUS) goal_cpu = 0; - } + } while (!cpu_online(goal_cpu)); return goal_cpu; } diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 26d3ec41da1c..a0cd2b2494d6 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -62,9 +62,6 @@ void default_idle(void) */ void cpu_idle(void) { - if (current->pid != 0) - return; - /* endless idle loop with no priority at all */ for (;;) { /* If current->work.need_resched is zero we should really @@ -80,7 +77,6 @@ void cpu_idle(void) schedule(); check_pgt_cache(); } - return; } #else diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index 5f080cf04b33..80a76e2ad732 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -19,6 +19,7 @@ #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/security.h> +#include <linux/signal.h> #include <asm/asi.h> #include <asm/pgtable.h> @@ -559,7 +560,7 @@ asmlinkage void do_ptrace(struct pt_regs *regs) addr = 1; case PTRACE_CONT: { /* restart after signal. */ - if (data > _NSIG) { + if (!valid_signal(data)) { pt_error_return(regs, EIO); goto out_tsk; } diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index cad5a1122800..e78cc53594fa 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -278,7 +278,7 @@ EXPORT_SYMBOL(verify_compat_iovec); EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(__pte_alloc_one_kernel); +EXPORT_SYMBOL(pte_alloc_one_kernel); #ifndef CONFIG_SMP EXPORT_SYMBOL(pgt_quicklists); #endif diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index 6a717d4d2bc5..71b4e3807694 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -48,7 +48,7 @@ DEFINE_SPINLOCK(mostek_lock); DEFINE_SPINLOCK(rtc_lock); -unsigned long mstk48t02_regs = 0UL; +void __iomem *mstk48t02_regs = NULL; #ifdef CONFIG_PCI unsigned long ds1287_regs = 0UL; #endif @@ -59,8 +59,8 @@ u64 jiffies_64 = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); -static unsigned long mstk48t08_regs = 0UL; -static unsigned long mstk48t59_regs = 0UL; +static void __iomem *mstk48t08_regs; +static void __iomem *mstk48t59_regs; static int set_rtc_mmss(unsigned long); @@ -520,7 +520,7 @@ void timer_tick_interrupt(struct pt_regs *regs) /* Kick start a stopped clock (procedure from the Sun NVRAM/hostid FAQ). */ static void __init kick_start_clock(void) { - unsigned long regs = mstk48t02_regs; + void __iomem *regs = mstk48t02_regs; u8 sec, tmp; int i, count; @@ -604,7 +604,7 @@ static void __init kick_start_clock(void) /* Return nonzero if the clock chip battery is low. */ static int __init has_low_battery(void) { - unsigned long regs = mstk48t02_regs; + void __iomem *regs = mstk48t02_regs; u8 data1, data2; spin_lock_irq(&mostek_lock); @@ -623,7 +623,7 @@ static int __init has_low_battery(void) static void __init set_system_time(void) { unsigned int year, mon, day, hour, min, sec; - unsigned long mregs = mstk48t02_regs; + void __iomem *mregs = mstk48t02_regs; #ifdef CONFIG_PCI unsigned long dregs = ds1287_regs; #else @@ -843,7 +843,8 @@ void __init clock_probe(void) !strcmp(model, "m5823")) { ds1287_regs = edev->resource[0].start; } else { - mstk48t59_regs = edev->resource[0].start; + mstk48t59_regs = (void __iomem *) + edev->resource[0].start; mstk48t02_regs = mstk48t59_regs + MOSTEK_48T59_48T02; } break; @@ -865,7 +866,8 @@ try_isa_clock: !strcmp(model, "m5823")) { ds1287_regs = isadev->resource.start; } else { - mstk48t59_regs = isadev->resource.start; + mstk48t59_regs = (void __iomem *) + isadev->resource.start; mstk48t02_regs = mstk48t59_regs + MOSTEK_48T59_48T02; } break; @@ -893,21 +895,24 @@ try_isa_clock: } if(model[5] == '0' && model[6] == '2') { - mstk48t02_regs = (((u64)clk_reg[0].phys_addr) | - (((u64)clk_reg[0].which_io)<<32UL)); + mstk48t02_regs = (void __iomem *) + (((u64)clk_reg[0].phys_addr) | + (((u64)clk_reg[0].which_io)<<32UL)); } else if(model[5] == '0' && model[6] == '8') { - mstk48t08_regs = (((u64)clk_reg[0].phys_addr) | - (((u64)clk_reg[0].which_io)<<32UL)); + mstk48t08_regs = (void __iomem *) + (((u64)clk_reg[0].phys_addr) | + (((u64)clk_reg[0].which_io)<<32UL)); mstk48t02_regs = mstk48t08_regs + MOSTEK_48T08_48T02; } else { - mstk48t59_regs = (((u64)clk_reg[0].phys_addr) | - (((u64)clk_reg[0].which_io)<<32UL)); + mstk48t59_regs = (void __iomem *) + (((u64)clk_reg[0].phys_addr) | + (((u64)clk_reg[0].which_io)<<32UL)); mstk48t02_regs = mstk48t59_regs + MOSTEK_48T59_48T02; } break; } - if (mstk48t02_regs != 0UL) { + if (mstk48t02_regs != NULL) { /* Report a low battery voltage condition. */ if (has_low_battery()) prom_printf("NVRAM: Low battery voltage!\n"); @@ -1087,7 +1092,7 @@ unsigned long long sched_clock(void) static int set_rtc_mmss(unsigned long nowtime) { int real_seconds, real_minutes, chip_minutes; - unsigned long mregs = mstk48t02_regs; + void __iomem *mregs = mstk48t02_regs; #ifdef CONFIG_PCI unsigned long dregs = ds1287_regs; #else diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index db6fa77b4dab..9c5222075da9 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1114,7 +1114,7 @@ struct pgtable_cache_struct pgt_quicklists; #else #define DC_ALIAS_SHIFT 0 #endif -pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { struct page *page; unsigned long color; diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 9a23df182123..c5292181a664 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -244,6 +244,7 @@ config KERNEL_HALF_GIGS config HIGHMEM bool "Highmem support" + depends on !64BIT config KERNEL_STACK_ORDER int "Kernel stack size order" diff --git a/arch/um/Kconfig_i386 b/arch/um/Kconfig_i386 index 203c242201b6..e41f3748d30f 100644 --- a/arch/um/Kconfig_i386 +++ b/arch/um/Kconfig_i386 @@ -1,4 +1,8 @@ -config 64_BIT +config UML_X86 + bool + default y + +config 64BIT bool default n diff --git a/arch/um/Kconfig_x86_64 b/arch/um/Kconfig_x86_64 index 768dc6626a8d..fd8d7e8982b1 100644 --- a/arch/um/Kconfig_x86_64 +++ b/arch/um/Kconfig_x86_64 @@ -1,4 +1,8 @@ -config 64_BIT +config UML_X86 + bool + default y + +config 64BIT bool default y diff --git a/arch/um/Makefile b/arch/um/Makefile index 97bca6b5ca95..f2a0c40a9204 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -17,7 +17,7 @@ core-y += $(ARCH_DIR)/kernel/ \ # Have to precede the include because the included Makefiles reference them. SYMLINK_HEADERS := archparam.h system.h sigcontext.h processor.h ptrace.h \ - arch-signal.h module.h vm-flags.h + module.h vm-flags.h elf.h SYMLINK_HEADERS := $(foreach header,$(SYMLINK_HEADERS),include/asm-um/$(header)) # XXX: The "os" symlink is only used by arch/um/include/os.h, which includes @@ -44,6 +44,11 @@ ifneq ($(MAKEFILES-INCL),) endif ARCH_INCLUDE := -I$(ARCH_DIR)/include +ifneq ($(KBUILD_SRC),) +ARCH_INCLUDE += -I$(ARCH_DIR)/include2 +ARCH_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include +MRPROPER_DIRS += $(ARCH_DIR)/include2 +endif SYS_DIR := $(ARCH_DIR)/include/sysdep-$(SUBARCH) include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH) @@ -94,17 +99,18 @@ define archhelp echo ' find in the kernel root.' endef +ifneq ($(KBUILD_SRC),) +$(shell mkdir -p $(ARCH_DIR) && ln -fsn $(srctree)/$(ARCH_DIR)/Kconfig_$(SUBARCH) $(ARCH_DIR)/Kconfig_arch) +CLEAN_FILES += $(ARCH_DIR)/Kconfig_arch +else $(shell cd $(ARCH_DIR) && ln -sf Kconfig_$(SUBARCH) Kconfig_arch) +endif -prepare: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS) \ - $(ARCH_DIR)/kernel/vmlinux.lds.S +prepare: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS) LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib -LD_SCRIPT-$(CONFIG_LD_SCRIPT_STATIC) := uml.lds.S -LD_SCRIPT-$(CONFIG_LD_SCRIPT_DYN) := dyn.lds.S - CPP_MODE-$(CONFIG_MODE_TT) := -DMODE_TT CONFIG_KERNEL_STACK_ORDER ?= 2 STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] ) @@ -126,7 +132,7 @@ define cmd_vmlinux__ $(CC) $(CFLAGS_vmlinux) -o $@ \ -Wl,-T,$(vmlinux-lds) $(vmlinux-init) \ -Wl,--start-group $(vmlinux-main) -Wl,--end-group \ - -L/usr/lib -lutil \ + -lutil \ $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) \ FORCE ,$^) ; rm -f linux endef @@ -145,31 +151,42 @@ archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -o -name '*.gcov' \) -type f -print | xargs rm -f -#We need to re-preprocess this when the symlink dest changes. -#So we touch it when needed. -$(ARCH_DIR)/kernel/vmlinux.lds.S: FORCE - $(Q)if [ "$(shell readlink $@)" != "$(LD_SCRIPT-y)" ]; then \ - echo ' SYMLINK $@'; \ - ln -sf $(LD_SCRIPT-y) $@; \ - touch $@; \ - fi; - $(SYMLINK_HEADERS): @echo ' SYMLINK $@' +ifneq ($(KBUILD_SRC),) + ln -fsn $(srctree)/include/asm-um/$(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $@ +else $(Q)cd $(TOPDIR)/$(dir $@) ; \ ln -sf $(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $(notdir $@) +endif include/asm-um/arch: @echo ' SYMLINK $@' +ifneq ($(KBUILD_SRC),) + $(Q)mkdir -p include/asm-um + $(Q)ln -fsn $(srctree)/include/asm-$(SUBARCH) include/asm-um/arch +else $(Q)cd $(TOPDIR)/include/asm-um && ln -sf ../asm-$(SUBARCH) arch +endif $(ARCH_DIR)/include/sysdep: @echo ' SYMLINK $@' +ifneq ($(KBUILD_SRC),) + $(Q)mkdir -p $(ARCH_DIR)/include + $(Q)mkdir -p $(ARCH_DIR)/include2 + $(Q)ln -fsn sysdep-$(SUBARCH) $(ARCH_DIR)/include/sysdep + $(Q)ln -fsn $(srctree)/$(ARCH_DIR)/include/sysdep-$(SUBARCH) $(ARCH_DIR)/include2/sysdep +else $(Q)cd $(ARCH_DIR)/include && ln -sf sysdep-$(SUBARCH) sysdep +endif $(ARCH_DIR)/os: @echo ' SYMLINK $@' +ifneq ($(KBUILD_SRC),) + $(Q)ln -fsn $(srctree)/$(ARCH_DIR)/os-$(OS) $(ARCH_DIR)/os +else $(Q)cd $(ARCH_DIR) && ln -sf os-$(OS) os +endif # Generated files define filechk_umlconfig @@ -179,10 +196,31 @@ endef $(ARCH_DIR)/include/uml-config.h : include/linux/autoconf.h $(call filechk,umlconfig) +$(ARCH_DIR)/user-offsets.s: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.c + $(CC) $(USER_CFLAGS) -S -o $@ $< + +$(ARCH_DIR)/user-offsets.h: $(ARCH_DIR)/user-offsets.s + $(call filechk,gen-asm-offsets) + +CLEAN_FILES += $(ARCH_DIR)/user-offsets.s $(ARCH_DIR)/user-offsets.h + +$(ARCH_DIR)/kernel-offsets.s: $(ARCH_DIR)/sys-$(SUBARCH)/kernel-offsets.c \ + $(ARCH_SYMLINKS) \ + $(SYS_DIR)/sc.h \ + include/asm include/linux/version.h \ + include/config/MARKER \ + $(ARCH_DIR)/include/user_constants.h + $(CC) $(CFLAGS) $(NOSTDINC_FLAGS) $(CPPFLAGS) -S -o $@ $< + +$(ARCH_DIR)/kernel-offsets.h: $(ARCH_DIR)/kernel-offsets.s + $(call filechk,gen-asm-offsets) + +CLEAN_FILES += $(ARCH_DIR)/kernel-offsets.s $(ARCH_DIR)/kernel-offsets.h + $(ARCH_DIR)/include/task.h: $(ARCH_DIR)/util/mk_task $(call filechk,gen_header) -$(ARCH_DIR)/include/user_constants.h: $(ARCH_DIR)/os/util/mk_user_constants +$(ARCH_DIR)/include/user_constants.h: $(ARCH_DIR)/os-$(OS)/util/mk_user_constants $(call filechk,gen_header) $(ARCH_DIR)/include/kern_constants.h: $(ARCH_DIR)/util/mk_constants @@ -191,20 +229,20 @@ $(ARCH_DIR)/include/kern_constants.h: $(ARCH_DIR)/util/mk_constants $(ARCH_DIR)/include/skas_ptregs.h: $(ARCH_DIR)/kernel/skas/util/mk_ptregs $(call filechk,gen_header) -$(ARCH_DIR)/os/util/mk_user_constants: $(ARCH_DIR)/os/util FORCE ; +$(ARCH_DIR)/os-$(OS)/util/mk_user_constants: $(ARCH_DIR)/os-$(OS)/util FORCE ; $(ARCH_DIR)/util/mk_task $(ARCH_DIR)/util/mk_constants: $(ARCH_DIR)/include/user_constants.h $(ARCH_DIR)/util \ FORCE ; $(ARCH_DIR)/kernel/skas/util/mk_ptregs: $(ARCH_DIR)/kernel/skas/util FORCE ; -$(ARCH_DIR)/util: scripts_basic $(SYS_DIR)/sc.h FORCE +$(ARCH_DIR)/util: scripts_basic $(SYS_DIR)/sc.h $(ARCH_DIR)/kernel-offsets.h FORCE $(Q)$(MAKE) $(build)=$@ -$(ARCH_DIR)/kernel/skas/util: scripts_basic FORCE +$(ARCH_DIR)/kernel/skas/util: scripts_basic $(ARCH_DIR)/user-offsets.h FORCE $(Q)$(MAKE) $(build)=$@ -$(ARCH_DIR)/os/util: scripts_basic FORCE +$(ARCH_DIR)/os-$(OS)/util: scripts_basic FORCE $(Q)$(MAKE) $(build)=$@ export SUBARCH USER_CFLAGS OS diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386 index 97b223bfa78e..29e182d5a83a 100644 --- a/arch/um/Makefile-i386 +++ b/arch/um/Makefile-i386 @@ -1,4 +1,4 @@ -SUBARCH_CORE := arch/um/sys-i386/ +SUBARCH_CORE := arch/um/sys-i386/ arch/i386/crypto/ TOP_ADDR := $(CONFIG_TOP_ADDR) @@ -32,10 +32,10 @@ $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread $(call filechk,gen_header) -$(SYS_UTIL_DIR)/mk_sc: scripts_basic FORCE +$(SYS_UTIL_DIR)/mk_sc: scripts_basic $(ARCH_DIR)/user-offsets.h FORCE $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ -$(SYS_UTIL_DIR)/mk_thread: scripts_basic $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE +$(SYS_UTIL_DIR)/mk_thread: scripts_basic $(ARCH_DIR)/kernel-offsets.h FORCE $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ $(SYS_UTIL_DIR): scripts_basic include/asm FORCE diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64 index a77971133e91..32144562c279 100644 --- a/arch/um/Makefile-x86_64 +++ b/arch/um/Makefile-x86_64 @@ -23,10 +23,10 @@ $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread $(call filechk,gen_header) -$(SYS_UTIL_DIR)/mk_sc: scripts_basic FORCE +$(SYS_UTIL_DIR)/mk_sc: scripts_basic $(ARCH_DIR)/user-offsets.h FORCE $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ -$(SYS_UTIL_DIR)/mk_thread: scripts_basic $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE +$(SYS_UTIL_DIR)/mk_thread: scripts_basic $(GEN_HEADERS) $(ARCH_DIR)/kernel-offsets.h FORCE $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ CLEAN_FILES += $(SYS_HEADERS) diff --git a/arch/um/defconfig b/arch/um/defconfig index fc3075c589d8..4067c3aa5b60 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk1 -# Sun Mar 20 16:53:00 2005 +# Linux kernel version: 2.6.12-rc3-skas3-v9-pre2 +# Sun Apr 24 19:46:10 2005 # CONFIG_GENERIC_HARDIRQS=y CONFIG_UML=y @@ -15,7 +15,8 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y # CONFIG_MODE_TT=y CONFIG_MODE_SKAS=y -# CONFIG_64_BIT is not set +CONFIG_UML_X86=y +# CONFIG_64BIT is not set CONFIG_TOP_ADDR=0xc0000000 # CONFIG_3_LEVEL_PGTABLES is not set CONFIG_ARCH_HAS_SC_SIGNALS=y @@ -41,6 +42,7 @@ CONFIG_UML_REAL_TIME_CLOCK=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -158,7 +160,6 @@ CONFIG_UML_NET_SLIRP=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y @@ -412,6 +413,5 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_FS is not set CONFIG_FRAME_POINTER=y CONFIG_PT_PROXY=y -# CONFIG_GPROF is not set # CONFIG_GCOV is not set # CONFIG_SYSCALL_DEBUG is not set diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 1f77deb3fd23..0150038af795 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -22,7 +22,7 @@ #ifdef CONFIG_NOCONFIG_CHAN static void *not_configged_init(char *str, int device, struct chan_opts *opts) { - printk(KERN_ERR "Using a channel type which is configured out of " + printf(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(NULL); } @@ -30,27 +30,27 @@ static void *not_configged_init(char *str, int device, struct chan_opts *opts) static int not_configged_open(int input, int output, int primary, void *data, char **dev_out) { - printk(KERN_ERR "Using a channel type which is configured out of " + printf(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-ENODEV); } static void not_configged_close(int fd, void *data) { - printk(KERN_ERR "Using a channel type which is configured out of " + printf(KERN_ERR "Using a channel type which is configured out of " "UML\n"); } static int not_configged_read(int fd, char *c_out, void *data) { - printk(KERN_ERR "Using a channel type which is configured out of " + printf(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-EIO); } static int not_configged_write(int fd, const char *buf, int len, void *data) { - printk(KERN_ERR "Using a channel type which is configured out of " + printf(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-EIO); } @@ -58,7 +58,7 @@ static int not_configged_write(int fd, const char *buf, int len, void *data) static int not_configged_console_write(int fd, const char *buf, int len, void *data) { - printk(KERN_ERR "Using a channel type which is configured out of " + printf(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-EIO); } @@ -66,14 +66,14 @@ static int not_configged_console_write(int fd, const char *buf, int len, static int not_configged_window_size(int fd, void *data, unsigned short *rows, unsigned short *cols) { - printk(KERN_ERR "Using a channel type which is configured out of " + printf(KERN_ERR "Using a channel type which is configured out of " "UML\n"); return(-ENODEV); } static void not_configged_free(void *data) { - printk(KERN_ERR "Using a channel type which is configured out of " + printf(KERN_ERR "Using a channel type which is configured out of " "UML\n"); } diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 6924f273ced9..025d3be8aca4 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -39,19 +39,69 @@ static void line_timer_cb(void *arg) line_interrupt(line->driver->read_irq, arg, NULL); } -static int write_room(struct line *dev) +/* Returns the free space inside the ring buffer of this line. + * + * Should be called while holding line->lock (this does not modify datas). + */ +static int write_room(struct line *line) { int n; - if (dev->buffer == NULL) - return (LINE_BUFSIZE - 1); + if (line->buffer == NULL) + return LINE_BUFSIZE - 1; + + /* This is for the case where the buffer is wrapped! */ + n = line->head - line->tail; - n = dev->head - dev->tail; if (n <= 0) - n = LINE_BUFSIZE + n; - return (n - 1); + n = LINE_BUFSIZE + n; /* The other case */ + return n - 1; +} + +int line_write_room(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + unsigned long flags; + int room; + + if (tty->stopped) + return 0; + + spin_lock_irqsave(&line->lock, flags); + room = write_room(line); + spin_unlock_irqrestore(&line->lock, flags); + + /*XXX: Warning to remove */ + if (0 == room) + printk(KERN_DEBUG "%s: %s: no room left in buffer\n", + __FUNCTION__,tty->name); + return room; +} + +int line_chars_in_buffer(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + unsigned long flags; + int ret; + + spin_lock_irqsave(&line->lock, flags); + + /*write_room subtracts 1 for the needed NULL, so we readd it.*/ + ret = LINE_BUFSIZE - (write_room(line) + 1); + spin_unlock_irqrestore(&line->lock, flags); + + return ret; } +/* + * This copies the content of buf into the circular buffer associated with + * this line. + * The return value is the number of characters actually copied, i.e. the ones + * for which there was space: this function is not supposed to ever flush out + * the circular buffer. + * + * Must be called while holding line->lock! + */ static int buffer_data(struct line *line, const char *buf, int len) { int end, room; @@ -70,48 +120,95 @@ static int buffer_data(struct line *line, const char *buf, int len) len = (len > room) ? room : len; end = line->buffer + LINE_BUFSIZE - line->tail; - if(len < end){ + + if (len < end){ memcpy(line->tail, buf, len); line->tail += len; - } - else { + } else { + /* The circular buffer is wrapping */ memcpy(line->tail, buf, end); buf += end; memcpy(line->buffer, buf, len - end); line->tail = line->buffer + len - end; } - return(len); + return len; } +/* + * Flushes the ring buffer to the output channels. That is, write_chan is + * called, passing it line->head as buffer, and an appropriate count. + * + * On exit, returns 1 when the buffer is empty, + * 0 when the buffer is not empty on exit, + * and -errno when an error occurred. + * + * Must be called while holding line->lock!*/ static int flush_buffer(struct line *line) { int n, count; if ((line->buffer == NULL) || (line->head == line->tail)) - return(1); + return 1; if (line->tail < line->head) { + /* line->buffer + LINE_BUFSIZE is the end of the buffer! */ count = line->buffer + LINE_BUFSIZE - line->head; + n = write_chan(&line->chan_list, line->head, count, line->driver->write_irq); if (n < 0) - return(n); - if (n == count) + return n; + if (n == count) { + /* We have flushed from ->head to buffer end, now we + * must flush only from the beginning to ->tail.*/ line->head = line->buffer; - else { + } else { line->head += n; - return(0); + return 0; } } count = line->tail - line->head; n = write_chan(&line->chan_list, line->head, count, line->driver->write_irq); - if(n < 0) return(n); + + if(n < 0) + return n; line->head += n; - return(line->head == line->tail); + return line->head == line->tail; +} + +void line_flush_buffer(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + unsigned long flags; + int err; + + /*XXX: copied from line_write, verify if it is correct!*/ + if(tty->stopped) + return; + //return 0; + + spin_lock_irqsave(&line->lock, flags); + err = flush_buffer(line); + /*if (err == 1) + err = 0;*/ + spin_unlock_irqrestore(&line->lock, flags); + //return err; +} + +/* We map both ->flush_chars and ->put_char (which go in pair) onto ->flush_buffer + * and ->write. Hope it's not that bad.*/ +void line_flush_chars(struct tty_struct *tty) +{ + line_flush_buffer(tty); +} + +void line_put_char(struct tty_struct *tty, unsigned char ch) +{ + line_write(tty, &ch, sizeof(ch)); } int line_write(struct tty_struct *tty, const unsigned char *buf, int len) @@ -120,38 +217,31 @@ int line_write(struct tty_struct *tty, const unsigned char *buf, int len) unsigned long flags; int n, err, ret = 0; - if(tty->stopped) return 0; + if(tty->stopped) + return 0; - down(&line->sem); - if(line->head != line->tail){ - local_irq_save(flags); + spin_lock_irqsave(&line->lock, flags); + if (line->head != line->tail) { ret = buffer_data(line, buf, len); err = flush_buffer(line); - local_irq_restore(flags); - if(err <= 0 && (err != -EAGAIN || !ret)) + if (err <= 0 && (err != -EAGAIN || !ret)) ret = err; - } - else { + } else { n = write_chan(&line->chan_list, buf, len, line->driver->write_irq); - if(n < 0){ + if (n < 0) { ret = n; goto out_up; } len -= n; ret += n; - if(len > 0) + if (len > 0) ret += buffer_data(line, buf + n, len); } - out_up: - up(&line->sem); - return(ret); -} - -void line_put_char(struct tty_struct *tty, unsigned char ch) -{ - line_write(tty, &ch, sizeof(ch)); +out_up: + spin_unlock_irqrestore(&line->lock, flags); + return ret; } void line_set_termios(struct tty_struct *tty, struct termios * old) @@ -159,11 +249,6 @@ void line_set_termios(struct tty_struct *tty, struct termios * old) /* nothing */ } -int line_chars_in_buffer(struct tty_struct *tty) -{ - return 0; -} - static struct { int cmd; char *level; @@ -250,7 +335,7 @@ int line_ioctl(struct tty_struct *tty, struct file * file, ret = -ENOIOCTLCMD; break; } - return(ret); + return ret; } static irqreturn_t line_write_interrupt(int irq, void *data, @@ -260,18 +345,23 @@ static irqreturn_t line_write_interrupt(int irq, void *data, struct line *line = tty->driver_data; int err; + /* Interrupts are enabled here because we registered the interrupt with + * SA_INTERRUPT (see line_setup_irq).*/ + + spin_lock_irq(&line->lock); err = flush_buffer(line); - if(err == 0) - return(IRQ_NONE); - else if(err < 0){ + if (err == 0) { + return IRQ_NONE; + } else if(err < 0) { line->head = line->buffer; line->tail = line->buffer; } + spin_unlock_irq(&line->lock); if(tty == NULL) - return(IRQ_NONE); + return IRQ_NONE; - if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && + if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && (tty->ldisc.write_wakeup != NULL)) (tty->ldisc.write_wakeup)(tty); @@ -281,9 +371,9 @@ static irqreturn_t line_write_interrupt(int irq, void *data, * writes. */ - if(waitqueue_active(&tty->write_wait)) + if (waitqueue_active(&tty->write_wait)) wake_up_interruptible(&tty->write_wait); - return(IRQ_HANDLED); + return IRQ_HANDLED; } int line_setup_irq(int fd, int input, int output, struct tty_struct *tty) @@ -292,15 +382,18 @@ int line_setup_irq(int fd, int input, int output, struct tty_struct *tty) struct line_driver *driver = line->driver; int err = 0, flags = SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM; - if(input) err = um_request_irq(driver->read_irq, fd, IRQ_READ, + if (input) + err = um_request_irq(driver->read_irq, fd, IRQ_READ, line_interrupt, flags, driver->read_irq_name, tty); - if(err) return(err); - if(output) err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, + if (err) + return err; + if (output) + err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, line_write_interrupt, flags, driver->write_irq_name, tty); line->have_irq = 1; - return(err); + return err; } void line_disable(struct tty_struct *tty, int current_irq) @@ -336,7 +429,9 @@ int line_open(struct line *lines, struct tty_struct *tty, line = &lines[tty->index]; tty->driver_data = line; - down(&line->sem); + /* The IRQ which takes this lock is not yet enabled and won't be run + * before the end, so we don't need to use spin_lock_irq.*/ + spin_lock(&line->lock); if (tty->count == 1) { if (!line->valid) { err = -ENODEV; @@ -349,6 +444,7 @@ int line_open(struct line *lines, struct tty_struct *tty, err = open_chan(&line->chan_list); if(err) goto out; } + /* Here the interrupt is registered.*/ enable_chan(&line->chan_list, tty); INIT_WORK(&line->task, line_timer_cb, tty); } @@ -362,21 +458,36 @@ int line_open(struct line *lines, struct tty_struct *tty, line->count++; out: - up(&line->sem); - return(err); + spin_unlock(&line->lock); + return err; } +static void unregister_winch(struct tty_struct *tty); + void line_close(struct tty_struct *tty, struct file * filp) { struct line *line = tty->driver_data; - down(&line->sem); + /* XXX: I assume this should be called in process context, not with + * interrupts disabled! + */ + spin_lock_irq(&line->lock); + + /* We ignore the error anyway! */ + flush_buffer(line); + line->count--; if (tty->count == 1) { line_disable(tty, -1); tty->driver_data = NULL; } - up(&line->sem); + + if((line->count == 0) && line->sigio){ + unregister_winch(tty); + line->sigio = 0; + } + + spin_unlock_irq(&line->lock); } void close_lines(struct line *lines, int nlines) @@ -387,31 +498,41 @@ void close_lines(struct line *lines, int nlines) close_chan(&lines[i].chan_list); } -int line_setup(struct line *lines, int num, char *init, int all_allowed) +/* Common setup code for both startup command line and mconsole initialization. + * @lines contains the the array (of size @num) to modify; + * @init is the setup string; + * @all_allowed is a boolean saying if we can setup the whole @lines + * at once. For instance, it will be usually true for startup init. (where we + * can use con=xterm) and false for mconsole.*/ + +int line_setup(struct line *lines, unsigned int num, char *init, int all_allowed) { int i, n; char *end; - if(*init == '=') n = -1; - else { + if(*init == '=') { + /* We said con=/ssl= instead of con#=, so we are configuring all + * consoles at once.*/ + n = -1; + } else { n = simple_strtoul(init, &end, 0); if(*end != '='){ printk(KERN_ERR "line_setup failed to parse \"%s\"\n", init); - return(0); + return 0; } init = end; } init++; - if((n >= 0) && (n >= num)){ + + if (n >= (signed int) num) { printk("line_setup - %d out of range ((0 ... %d) allowed)\n", n, num - 1); - return(0); - } - else if (n >= 0){ + return 0; + } else if (n >= 0){ if (lines[n].count > 0) { printk("line_setup - device %d is open\n", n); - return(0); + return 0; } if (lines[n].init_pri <= INIT_ONE){ lines[n].init_pri = INIT_ONE; @@ -422,13 +543,11 @@ int line_setup(struct line *lines, int num, char *init, int all_allowed) lines[n].valid = 1; } } - } - else if(!all_allowed){ + } else if(!all_allowed){ printk("line_setup - can't configure all devices from " "mconsole\n"); - return(0); - } - else { + return 0; + } else { for(i = 0; i < num; i++){ if(lines[i].init_pri <= INIT_ALL){ lines[i].init_pri = INIT_ALL; @@ -440,21 +559,21 @@ int line_setup(struct line *lines, int num, char *init, int all_allowed) } } } - return(1); + return 1; } -int line_config(struct line *lines, int num, char *str) +int line_config(struct line *lines, unsigned int num, char *str) { char *new = uml_strdup(str); if(new == NULL){ printk("line_config - uml_strdup failed\n"); - return(-ENOMEM); + return -ENOMEM; } - return(!line_setup(lines, num, new, 0)); + return !line_setup(lines, num, new, 0); } -int line_get_config(char *name, struct line *lines, int num, char *str, +int line_get_config(char *name, struct line *lines, unsigned int num, char *str, int size, char **error_out) { struct line *line; @@ -464,47 +583,33 @@ int line_get_config(char *name, struct line *lines, int num, char *str, dev = simple_strtoul(name, &end, 0); if((*end != '\0') || (end == name)){ *error_out = "line_get_config failed to parse device number"; - return(0); + return 0; } if((dev < 0) || (dev >= num)){ - *error_out = "device number of of range"; - return(0); + *error_out = "device number out of range"; + return 0; } line = &lines[dev]; - down(&line->sem); + spin_lock(&line->lock); if(!line->valid) CONFIG_CHUNK(str, size, n, "none", 1); else if(line->count == 0) CONFIG_CHUNK(str, size, n, line->init_str, 1); else n = chan_config_string(&line->chan_list, str, size, error_out); - up(&line->sem); + spin_unlock(&line->lock); - return(n); + return n; } -int line_remove(struct line *lines, int num, char *str) +int line_remove(struct line *lines, unsigned int num, char *str) { char config[sizeof("conxxxx=none\0")]; sprintf(config, "%s=none", str); - return(!line_setup(lines, num, config, 0)); -} - -int line_write_room(struct tty_struct *tty) -{ - struct line *dev = tty->driver_data; - int room; - - if (tty->stopped) - return 0; - room = write_room(dev); - if (0 == room) - printk(KERN_DEBUG "%s: %s: no room left in buffer\n", - __FUNCTION__,tty->name); - return room; + return !line_setup(lines, num, config, 0); } struct tty_driver *line_register_devfs(struct lines *set, @@ -553,7 +658,7 @@ void lines_init(struct line *lines, int nlines) for(i = 0; i < nlines; i++){ line = &lines[i]; INIT_LIST_HEAD(&line->chan_list); - sema_init(&line->sem, 1); + spin_lock_init(&line->lock); if(line->init_str != NULL){ line->init_str = uml_strdup(line->init_str); if(line->init_str == NULL) @@ -587,7 +692,7 @@ irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused) "errno = %d\n", -err); printk("fd %d is losing SIGWINCH support\n", winch->tty_fd); - return(IRQ_HANDLED); + return IRQ_HANDLED; } goto out; } @@ -603,7 +708,7 @@ irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused) out: if(winch->fd != -1) reactivate_fd(winch->fd, WINCH_IRQ); - return(IRQ_HANDLED); + return IRQ_HANDLED; } DECLARE_MUTEX(winch_handler_sem); @@ -625,7 +730,7 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty) .pid = pid, .tty = tty }); list_add(&winch->list, &winch_handlers); - if(um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, + if(um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, "winch", winch) < 0) printk("register_winch_irq - failed to register IRQ\n"); @@ -633,6 +738,34 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty) up(&winch_handler_sem); } +static void unregister_winch(struct tty_struct *tty) +{ + struct list_head *ele; + struct winch *winch, *found = NULL; + + down(&winch_handler_sem); + list_for_each(ele, &winch_handlers){ + winch = list_entry(ele, struct winch, list); + if(winch->tty == tty){ + found = winch; + break; + } + } + + if(found == NULL) + goto out; + + if(winch->pid != -1) + os_kill_process(winch->pid, 1); + + free_irq_by_irq_and_dev(WINCH_IRQ, winch); + free_irq(WINCH_IRQ, winch); + list_del(&winch->list); + kfree(winch); + out: + up(&winch_handler_sem); +} + static void winch_cleanup(void) { struct list_head *ele; @@ -656,26 +789,16 @@ char *add_xterm_umid(char *base) int len; umid = get_umid(1); - if(umid == NULL) return(base); + if(umid == NULL) + return base; len = strlen(base) + strlen(" ()") + strlen(umid) + 1; title = kmalloc(len, GFP_KERNEL); if(title == NULL){ printk("Failed to allocate buffer for xterm title\n"); - return(base); + return base; } snprintf(title, len, "%s (%s)", base, umid); - return(title); + return title; } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index c5839c3141f8..a2bac429f3d4 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -107,11 +107,6 @@ int ssl_open(struct tty_struct *tty, struct file *filp) } #if 0 -static int ssl_chars_in_buffer(struct tty_struct *tty) -{ - return(0); -} - static void ssl_flush_buffer(struct tty_struct *tty) { return; @@ -149,11 +144,11 @@ static struct tty_operations ssl_ops = { .put_char = line_put_char, .write_room = line_write_room, .chars_in_buffer = line_chars_in_buffer, + .flush_buffer = line_flush_buffer, + .flush_chars = line_flush_chars, .set_termios = line_set_termios, .ioctl = line_ioctl, #if 0 - .flush_chars = ssl_flush_chars, - .flush_buffer = ssl_flush_buffer, .throttle = ssl_throttle, .unthrottle = ssl_unthrottle, .stop = ssl_stop, @@ -171,10 +166,11 @@ static void ssl_console_write(struct console *c, const char *string, unsigned len) { struct line *line = &serial_lines[c->index]; + unsigned long flags; - down(&line->sem); + spin_lock_irqsave(&line->lock, flags); console_write_chan(&line->chan_list, string, len); - up(&line->sem); + spin_unlock_irqrestore(&line->lock, flags); } static struct tty_driver *ssl_console_device(struct console *c, int *index) @@ -238,14 +234,3 @@ static int ssl_chan_setup(char *str) __setup("ssl", ssl_chan_setup); __channel_help(ssl_chan_setup, "ssl"); - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index e604d7c87695..361d0be342b3 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -116,8 +116,11 @@ static struct tty_operations console_ops = { .open = con_open, .close = line_close, .write = line_write, + .put_char = line_put_char, .write_room = line_write_room, .chars_in_buffer = line_chars_in_buffer, + .flush_buffer = line_flush_buffer, + .flush_chars = line_flush_chars, .set_termios = line_set_termios, .ioctl = line_ioctl, }; @@ -126,10 +129,11 @@ static void uml_console_write(struct console *console, const char *string, unsigned len) { struct line *line = &vts[console->index]; + unsigned long flags; - down(&line->sem); + spin_lock_irqsave(&line->lock, flags); console_write_chan(&line->chan_list, string, len); - up(&line->sem); + spin_unlock_irqrestore(&line->lock, flags); } static struct tty_driver *uml_console_device(struct console *c, int *index) @@ -192,14 +196,3 @@ static int console_chan_setup(char *str) } __setup("con", console_chan_setup); __channel_help(console_chan_setup, "con"); - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 4d8b165bfa48..9a56ff94308d 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -156,6 +156,7 @@ static struct gendisk *fake_gendisk[MAX_DEV]; static struct openflags global_openflags = OPEN_FLAGS; struct cow { + /* This is the backing file, actually */ char *file; int fd; unsigned long *bitmap; @@ -927,10 +928,14 @@ static int ubd_open(struct inode *inode, struct file *filp) } } dev->count++; - if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){ + set_disk_ro(disk, !dev->openflags.w); + + /* This should no more be needed. And it didn't work anyway to exclude + * read-write remounting of filesystems.*/ + /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){ if(--dev->count == 0) ubd_close(dev); err = -EROFS; - } + }*/ out: return(err); } @@ -1096,6 +1101,7 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req) if(req->rq_status == RQ_INACTIVE) return(1); + /* This should be impossible now */ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", disk->disk_name); @@ -1243,6 +1249,7 @@ static int ubd_check_remapped(int fd, unsigned long address, int is_write, /* It's a write to a ubd device */ + /* This should be impossible now */ if(!dev->openflags.w){ /* It's a write access on a read-only device - probably * shouldn't happen. If the kernel is trying to change @@ -1605,8 +1612,7 @@ void do_io(struct io_thread_req *req) } } while((n < len) && (n != 0)); if (n < len) memset(&buf[n], 0, len - n); - } - else { + } else { n = os_write_file(req->fds[bit], buf, len); if(n != len){ printk("do_io - write failed err = %d " diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c index 7917b9d1cec8..a4fdf3584ad2 100644 --- a/arch/um/drivers/xterm_kern.c +++ b/arch/um/drivers/xterm_kern.c @@ -7,7 +7,6 @@ #include "linux/slab.h" #include "linux/signal.h" #include "linux/interrupt.h" -#include "asm/semaphore.h" #include "asm/irq.h" #include "irq_user.h" #include "irq_kern.h" diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h new file mode 100644 index 000000000000..d705daa2d854 --- /dev/null +++ b/arch/um/include/common-offsets.h @@ -0,0 +1,14 @@ +/* for use by sys-$SUBARCH/kernel-offsets.c */ + +OFFSET(TASK_REGS, task_struct, thread.regs); +OFFSET(TASK_PID, task_struct, pid); +DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); +DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); +DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); +DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); +DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); +DEFINE_STR(UM_KERN_ERR, KERN_ERR); +DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); +DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); +DEFINE_STR(UM_KERN_INFO, KERN_INFO); +DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index 15389c886b41..e5fec5570199 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -8,6 +8,7 @@ #include "linux/threads.h" #include "sysdep/ptrace.h" +#include "sysdep/faultinfo.h" extern int ncpus; extern char *linux_prog; @@ -31,8 +32,8 @@ extern int current_pid(void); extern unsigned long alloc_stack(int order, int atomic); extern int do_signal(void); extern int is_stack_fault(unsigned long sp); -extern unsigned long segv(unsigned long address, unsigned long ip, - int is_write, int is_user, void *sc); +extern unsigned long segv(struct faultinfo fi, unsigned long ip, + int is_user, void *sc); extern int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out); extern void syscall_ready(void); @@ -82,7 +83,7 @@ extern void timer_irq(union uml_pt_regs *regs); extern void unprotect_stack(unsigned long stack); extern void do_uml_exitcalls(void); extern int attach_debugger(int idle_pid, int pid, int stop); -extern void bad_segv(unsigned long address, unsigned long ip, int is_write); +extern void bad_segv(struct faultinfo fi, unsigned long ip); extern int config_gdb(char *str); extern int remove_gdb(void); extern char *uml_strdup(char *string); diff --git a/arch/um/include/line.h b/arch/um/include/line.h index 6d81ecc17be5..4c5e92c04ccb 100644 --- a/arch/um/include/line.h +++ b/arch/um/include/line.h @@ -10,7 +10,7 @@ #include "linux/workqueue.h" #include "linux/tty.h" #include "linux/interrupt.h" -#include "asm/semaphore.h" +#include "linux/spinlock.h" #include "chan_user.h" #include "mconsole_kern.h" @@ -37,10 +37,18 @@ struct line { struct list_head chan_list; int valid; int count; - struct semaphore sem; + /*This lock is actually, mostly, local to*/ + spinlock_t lock; + + /* Yes, this is a real circular buffer. + * XXX: And this should become a struct kfifo! + * + * buffer points to a buffer allocated on demand, of length + * LINE_BUFSIZE, head to the start of the ring, tail to the end.*/ char *buffer; char *head; char *tail; + int sigio; struct work_struct task; struct line_driver *driver; @@ -52,7 +60,6 @@ struct line { init_pri : INIT_STATIC, \ chan_list : { }, \ valid : 1, \ - sem : { }, \ buffer : NULL, \ head : NULL, \ tail : NULL, \ @@ -69,15 +76,18 @@ struct lines { extern void line_close(struct tty_struct *tty, struct file * filp); extern int line_open(struct line *lines, struct tty_struct *tty, struct chan_opts *opts); -extern int line_setup(struct line *lines, int num, char *init, +extern int line_setup(struct line *lines, unsigned int sizeof_lines, char *init, int all_allowed); extern int line_write(struct tty_struct *tty, const unsigned char *buf, int len); extern void line_put_char(struct tty_struct *tty, unsigned char ch); extern void line_set_termios(struct tty_struct *tty, struct termios * old); extern int line_chars_in_buffer(struct tty_struct *tty); +extern void line_flush_buffer(struct tty_struct *tty); +extern void line_flush_chars(struct tty_struct *tty); extern int line_write_room(struct tty_struct *tty); extern int line_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg); + extern char *add_xterm_umid(char *base); extern int line_setup_irq(int fd, int input, int output, struct tty_struct *tty); extern void line_close_chan(struct line *line); @@ -89,20 +99,10 @@ extern struct tty_driver * line_register_devfs(struct lines *set, int nlines); extern void lines_init(struct line *lines, int nlines); extern void close_lines(struct line *lines, int nlines); -extern int line_config(struct line *lines, int num, char *str); -extern int line_remove(struct line *lines, int num, char *str); -extern int line_get_config(char *dev, struct line *lines, int num, char *str, + +extern int line_config(struct line *lines, unsigned int sizeof_lines, char *str); +extern int line_remove(struct line *lines, unsigned int sizeof_lines, char *str); +extern int line_get_config(char *dev, struct line *lines, unsigned int sizeof_lines, char *str, int size, char **error_out); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 07340c8cf203..d246d5a24609 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -160,6 +160,7 @@ extern void os_kill_process(int pid, int reap_child); extern void os_kill_ptraced_process(int pid, int reap_child); extern void os_usr1_process(int pid); extern int os_getpid(void); +extern int os_getpgrp(void); extern int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x); diff --git a/arch/um/include/skas_ptrace.h b/arch/um/include/skas_ptrace.h index cfb5fb4f5b91..cd2327d09c8d 100644 --- a/arch/um/include/skas_ptrace.h +++ b/arch/um/include/skas_ptrace.h @@ -6,22 +6,11 @@ #ifndef __SKAS_PTRACE_H #define __SKAS_PTRACE_H -struct ptrace_faultinfo { - int is_write; - unsigned long addr; -}; - -struct ptrace_ldt { - int func; - void *ptr; - unsigned long bytecount; -}; - #define PTRACE_FAULTINFO 52 -#define PTRACE_SIGPENDING 53 -#define PTRACE_LDT 54 #define PTRACE_SWITCH_MM 55 +#include "sysdep/skas_ptrace.h" + #endif /* diff --git a/arch/um/include/sysdep-i386/checksum.h b/arch/um/include/sysdep-i386/checksum.h index 3a2a45811aa3..764ba4db4788 100644 --- a/arch/um/include/sysdep-i386/checksum.h +++ b/arch/um/include/sysdep-i386/checksum.h @@ -24,19 +24,6 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum); /* - * the same as csum_partial, but copies from src while it - * checksums, and handles user-space pointer exceptions correctly, when needed. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - */ - -unsigned int csum_partial_copy_to(const unsigned char *src, unsigned char *dst, - int len, int sum, int *err_ptr); -unsigned int csum_partial_copy_from(const unsigned char *src, unsigned char *dst, - int len, int sum, int *err_ptr); - -/* * Note: when you get a NULL pointer exception here this means someone * passed in an incorrect kernel address to one of these functions. * @@ -52,11 +39,24 @@ unsigned int csum_partial_copy_nocheck(const unsigned char *src, unsigned char * return(csum_partial(dst, len, sum)); } +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + static __inline__ unsigned int csum_partial_copy_from_user(const unsigned char *src, unsigned char *dst, int len, int sum, int *err_ptr) { - return csum_partial_copy_from(src, dst, len, sum, err_ptr); + if(copy_from_user(dst, src, len)){ + *err_ptr = -EFAULT; + return(-1); + } + + return csum_partial(dst, len, sum); } /* @@ -67,7 +67,6 @@ unsigned int csum_partial_copy_from_user(const unsigned char *src, unsigned char */ #define csum_partial_copy_fromuser csum_partial_copy_from_user -unsigned int csum_partial_copy(const unsigned char *src, unsigned char *dst, int len, int sum); /* * This is a version of ip_compute_csum() optimized for IP headers, @@ -196,8 +195,14 @@ static __inline__ unsigned int csum_and_copy_to_user(const unsigned char *src, unsigned char *dst, int len, int sum, int *err_ptr) { - if (access_ok(VERIFY_WRITE, dst, len)) - return(csum_partial_copy_to(src, dst, len, sum, err_ptr)); + if (access_ok(VERIFY_WRITE, dst, len)){ + if(copy_to_user(dst, src, len)){ + *err_ptr = -EFAULT; + return(-1); + } + + return csum_partial(src, len, sum); + } if (len) *err_ptr = -EFAULT; diff --git a/arch/um/include/sysdep-i386/faultinfo.h b/arch/um/include/sysdep-i386/faultinfo.h new file mode 100644 index 000000000000..db437cc373bc --- /dev/null +++ b/arch/um/include/sysdep-i386/faultinfo.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + * Licensed under the GPL + */ + +#ifndef __FAULTINFO_I386_H +#define __FAULTINFO_I386_H + +/* this structure contains the full arch-specific faultinfo + * from the traps. + * On i386, ptrace_faultinfo unfortunately doesn't provide + * all the info, since trap_no is missing. + * All common elements are defined at the same position in + * both structures, thus making it easy to copy the + * contents without knowledge about the structure elements. + */ +struct faultinfo { + int error_code; /* in ptrace_faultinfo misleadingly called is_write */ + unsigned long cr2; /* in ptrace_faultinfo called addr */ + int trap_no; /* missing in ptrace_faultinfo */ +}; + +#define FAULT_WRITE(fi) ((fi).error_code & 2) +#define FAULT_ADDRESS(fi) ((fi).cr2) + +#define PTRACE_FULL_FAULTINFO 0 + +#endif diff --git a/arch/um/include/sysdep-i386/ptrace.h b/arch/um/include/sysdep-i386/ptrace.h index 661d495e2044..84ec7ff5cf8c 100644 --- a/arch/um/include/sysdep-i386/ptrace.h +++ b/arch/um/include/sysdep-i386/ptrace.h @@ -31,6 +31,7 @@ extern int sysemu_supported; #ifdef UML_CONFIG_MODE_SKAS #include "skas_ptregs.h" +#include "sysdep/faultinfo.h" #define REGS_IP(r) ((r)[HOST_IP]) #define REGS_SP(r) ((r)[HOST_SP]) @@ -53,12 +54,6 @@ extern int sysemu_supported; #define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) -#define REGS_SEGV_IS_FIXABLE(r) SEGV_IS_FIXABLE((r)->trap_type) - -#define REGS_FAULT_ADDR(r) ((r)->fault_addr) - -#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type) - #endif #ifndef PTRACE_SYSEMU_SINGLESTEP #define PTRACE_SYSEMU_SINGLESTEP 32 @@ -71,6 +66,7 @@ union uml_pt_regs { struct tt_regs { long syscall; void *sc; + struct faultinfo faultinfo; } tt; #endif #ifdef UML_CONFIG_MODE_SKAS @@ -78,9 +74,7 @@ union uml_pt_regs { unsigned long regs[HOST_FRAME_SIZE]; unsigned long fp[HOST_FP_SIZE]; unsigned long xfp[HOST_XFP_SIZE]; - unsigned long fault_addr; - unsigned long fault_type; - unsigned long trap_type; + struct faultinfo faultinfo; long syscall; int is_user; } skas; @@ -217,15 +211,8 @@ struct syscall_args { #define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r) #define UPT_SYSCALL_RET(r) UPT_EAX(r) -#define UPT_SEGV_IS_FIXABLE(r) \ - CHOOSE_MODE(SC_SEGV_IS_FIXABLE(UPT_SC(r)), \ - REGS_SEGV_IS_FIXABLE(&r->skas)) - -#define UPT_FAULT_ADDR(r) \ - __CHOOSE_MODE(SC_FAULT_ADDR(UPT_SC(r)), REGS_FAULT_ADDR(&r->skas)) - -#define UPT_FAULT_WRITE(r) \ - CHOOSE_MODE(SC_FAULT_WRITE(UPT_SC(r)), REGS_FAULT_WRITE(&r->skas)) +#define UPT_FAULTINFO(r) \ + CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo)) #endif diff --git a/arch/um/include/sysdep-i386/sigcontext.h b/arch/um/include/sysdep-i386/sigcontext.h index dfee589de360..1fe729265167 100644 --- a/arch/um/include/sysdep-i386/sigcontext.h +++ b/arch/um/include/sysdep-i386/sigcontext.h @@ -13,15 +13,12 @@ #define SC_RESTART_SYSCALL(sc) IP_RESTART_SYSCALL(SC_IP(sc)) #define SC_SET_SYSCALL_RETURN(sc, result) SC_EAX(sc) = (result) -#define SC_FAULT_ADDR(sc) SC_CR2(sc) -#define SC_FAULT_TYPE(sc) SC_ERR(sc) - -#define FAULT_WRITE(err) (err & 2) -#define TO_SC_ERR(is_write) ((is_write) ? 2 : 0) - -#define SC_FAULT_WRITE(sc) (FAULT_WRITE(SC_ERR(sc))) - -#define SC_TRAP_TYPE(sc) SC_TRAPNO(sc) +#define GET_FAULTINFO_FROM_SC(fi,sc) \ + { \ + (fi).cr2 = SC_CR2(sc); \ + (fi).error_code = SC_ERR(sc); \ + (fi).trap_no = SC_TRAPNO(sc); \ + } /* ptrace expects that, at the start of a system call, %eax contains * -ENOSYS, so this makes it so. @@ -29,9 +26,7 @@ #define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0) /* This is Page Fault */ -#define SEGV_IS_FIXABLE(trap) (trap == 14) - -#define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc))) +#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) extern unsigned long *sc_sigmask(void *sc_ptr); extern int sc_get_fpregs(unsigned long buf, void *sc_ptr); diff --git a/arch/um/include/sysdep-i386/signal.h b/arch/um/include/sysdep-i386/signal.h index b1e1f7a77499..07518b162136 100644 --- a/arch/um/include/sysdep-i386/signal.h +++ b/arch/um/include/sysdep-i386/signal.h @@ -8,6 +8,8 @@ #include <signal.h> +#define ARCH_SIGHDLR_PARAM int sig + #define ARCH_GET_SIGCONTEXT(sc, sig) \ do sc = (struct sigcontext *) (&sig + 1); while(0) diff --git a/arch/um/include/sysdep-i386/skas_ptrace.h b/arch/um/include/sysdep-i386/skas_ptrace.h new file mode 100644 index 000000000000..e27b8a791773 --- /dev/null +++ b/arch/um/include/sysdep-i386/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_I386_SKAS_PTRACE_H +#define __SYSDEP_I386_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/include/sysdep-i386/syscalls.h b/arch/um/include/sysdep-i386/syscalls.h index 5db81ec9087d..be0a3e3469eb 100644 --- a/arch/um/include/sysdep-i386/syscalls.h +++ b/arch/um/include/sysdep-i386/syscalls.h @@ -22,102 +22,3 @@ extern syscall_handler_t old_mmap_i386; extern long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); - -/* On i386 they choose a meaningless naming.*/ -#define __NR_kexec_load __NR_sys_kexec_load - -#define ARCH_SYSCALLS \ - [ __NR_waitpid ] = (syscall_handler_t *) sys_waitpid, \ - [ __NR_break ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_oldstat ] = (syscall_handler_t *) sys_stat, \ - [ __NR_umount ] = (syscall_handler_t *) sys_oldumount, \ - [ __NR_stime ] = um_stime, \ - [ __NR_oldfstat ] = (syscall_handler_t *) sys_fstat, \ - [ __NR_stty ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_gtty ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_nice ] = (syscall_handler_t *) sys_nice, \ - [ __NR_ftime ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_prof ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_signal ] = (syscall_handler_t *) sys_signal, \ - [ __NR_lock ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_mpx ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_ulimit ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_oldolduname ] = (syscall_handler_t *) sys_olduname, \ - [ __NR_sigaction ] = (syscall_handler_t *) sys_sigaction, \ - [ __NR_sgetmask ] = (syscall_handler_t *) sys_sgetmask, \ - [ __NR_ssetmask ] = (syscall_handler_t *) sys_ssetmask, \ - [ __NR_sigsuspend ] = (syscall_handler_t *) sys_sigsuspend, \ - [ __NR_sigpending ] = (syscall_handler_t *) sys_sigpending, \ - [ __NR_oldlstat ] = (syscall_handler_t *) sys_lstat, \ - [ __NR_readdir ] = old_readdir, \ - [ __NR_profil ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_socketcall ] = (syscall_handler_t *) sys_socketcall, \ - [ __NR_olduname ] = (syscall_handler_t *) sys_uname, \ - [ __NR_iopl ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_idle ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_ipc ] = (syscall_handler_t *) sys_ipc, \ - [ __NR_sigreturn ] = (syscall_handler_t *) sys_sigreturn, \ - [ __NR_sigprocmask ] = (syscall_handler_t *) sys_sigprocmask, \ - [ __NR_bdflush ] = (syscall_handler_t *) sys_bdflush, \ - [ __NR__llseek ] = (syscall_handler_t *) sys_llseek, \ - [ __NR__newselect ] = (syscall_handler_t *) sys_select, \ - [ __NR_vm86 ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_mmap ] = (syscall_handler_t *) old_mmap_i386, \ - [ __NR_ugetrlimit ] = (syscall_handler_t *) sys_getrlimit, \ - [ __NR_mmap2 ] = (syscall_handler_t *) sys_mmap2, \ - [ __NR_truncate64 ] = (syscall_handler_t *) sys_truncate64, \ - [ __NR_ftruncate64 ] = (syscall_handler_t *) sys_ftruncate64, \ - [ __NR_stat64 ] = (syscall_handler_t *) sys_stat64, \ - [ __NR_lstat64 ] = (syscall_handler_t *) sys_lstat64, \ - [ __NR_fstat64 ] = (syscall_handler_t *) sys_fstat64, \ - [ __NR_fcntl64 ] = (syscall_handler_t *) sys_fcntl64, \ - [ __NR_sendfile64 ] = (syscall_handler_t *) sys_sendfile64, \ - [ __NR_statfs64 ] = (syscall_handler_t *) sys_statfs64, \ - [ __NR_fstatfs64 ] = (syscall_handler_t *) sys_fstatfs64, \ - [ __NR_fadvise64_64 ] = (syscall_handler_t *) sys_fadvise64_64, \ - [ __NR_select ] = (syscall_handler_t *) old_select, \ - [ __NR_vm86old ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_modify_ldt ] = (syscall_handler_t *) sys_modify_ldt, \ - [ __NR_lchown32 ] = (syscall_handler_t *) sys_lchown, \ - [ __NR_getuid32 ] = (syscall_handler_t *) sys_getuid, \ - [ __NR_getgid32 ] = (syscall_handler_t *) sys_getgid, \ - [ __NR_geteuid32 ] = (syscall_handler_t *) sys_geteuid, \ - [ __NR_getegid32 ] = (syscall_handler_t *) sys_getegid, \ - [ __NR_setreuid32 ] = (syscall_handler_t *) sys_setreuid, \ - [ __NR_setregid32 ] = (syscall_handler_t *) sys_setregid, \ - [ __NR_getgroups32 ] = (syscall_handler_t *) sys_getgroups, \ - [ __NR_setgroups32 ] = (syscall_handler_t *) sys_setgroups, \ - [ __NR_fchown32 ] = (syscall_handler_t *) sys_fchown, \ - [ __NR_setresuid32 ] = (syscall_handler_t *) sys_setresuid, \ - [ __NR_getresuid32 ] = (syscall_handler_t *) sys_getresuid, \ - [ __NR_setresgid32 ] = (syscall_handler_t *) sys_setresgid, \ - [ __NR_getresgid32 ] = (syscall_handler_t *) sys_getresgid, \ - [ __NR_chown32 ] = (syscall_handler_t *) sys_chown, \ - [ __NR_setuid32 ] = (syscall_handler_t *) sys_setuid, \ - [ __NR_setgid32 ] = (syscall_handler_t *) sys_setgid, \ - [ __NR_setfsuid32 ] = (syscall_handler_t *) sys_setfsuid, \ - [ __NR_setfsgid32 ] = (syscall_handler_t *) sys_setfsgid, \ - [ __NR_pivot_root ] = (syscall_handler_t *) sys_pivot_root, \ - [ __NR_mincore ] = (syscall_handler_t *) sys_mincore, \ - [ __NR_madvise ] = (syscall_handler_t *) sys_madvise, \ - [ 222 ] = (syscall_handler_t *) sys_ni_syscall, \ - [ 223 ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_set_thread_area ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_get_thread_area ] = (syscall_handler_t *) sys_ni_syscall, \ - [ 251 ] = (syscall_handler_t *) sys_ni_syscall, \ - [ 285 ] = (syscall_handler_t *) sys_ni_syscall, - -/* 222 doesn't yet have a name in include/asm-i386/unistd.h */ - -#define LAST_ARCH_SYSCALL 285 - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/sysdep-ia64/skas_ptrace.h b/arch/um/include/sysdep-ia64/skas_ptrace.h new file mode 100644 index 000000000000..25a38e715702 --- /dev/null +++ b/arch/um/include/sysdep-ia64/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_IA64_SKAS_PTRACE_H +#define __SYSDEP_IA64_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/include/sysdep-ppc/skas_ptrace.h b/arch/um/include/sysdep-ppc/skas_ptrace.h new file mode 100644 index 000000000000..d9fbbac10de0 --- /dev/null +++ b/arch/um/include/sysdep-ppc/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_PPC_SKAS_PTRACE_H +#define __SYSDEP_PPC_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/include/sysdep-x86_64/faultinfo.h b/arch/um/include/sysdep-x86_64/faultinfo.h new file mode 100644 index 000000000000..cb917b0d5660 --- /dev/null +++ b/arch/um/include/sysdep-x86_64/faultinfo.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + * Licensed under the GPL + */ + +#ifndef __FAULTINFO_X86_64_H +#define __FAULTINFO_X86_64_H + +/* this structure contains the full arch-specific faultinfo + * from the traps. + * On i386, ptrace_faultinfo unfortunately doesn't provide + * all the info, since trap_no is missing. + * All common elements are defined at the same position in + * both structures, thus making it easy to copy the + * contents without knowledge about the structure elements. + */ +struct faultinfo { + int error_code; /* in ptrace_faultinfo misleadingly called is_write */ + unsigned long cr2; /* in ptrace_faultinfo called addr */ + int trap_no; /* missing in ptrace_faultinfo */ +}; + +#define FAULT_WRITE(fi) ((fi).error_code & 2) +#define FAULT_ADDRESS(fi) ((fi).cr2) + +#define PTRACE_FULL_FAULTINFO 1 + +#endif diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h index 915c82daffbd..348e8fcd513f 100644 --- a/arch/um/include/sysdep-x86_64/ptrace.h +++ b/arch/um/include/sysdep-x86_64/ptrace.h @@ -9,6 +9,7 @@ #include "uml-config.h" #include "user_constants.h" +#include "sysdep/faultinfo.h" #define MAX_REG_OFFSET (UM_FRAME_SIZE) #define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long)) @@ -83,6 +84,7 @@ union uml_pt_regs { long syscall; unsigned long orig_rax; void *sc; + struct faultinfo faultinfo; } tt; #endif #ifdef UML_CONFIG_MODE_SKAS @@ -90,9 +92,7 @@ union uml_pt_regs { /* XXX */ unsigned long regs[27]; unsigned long fp[65]; - unsigned long fault_addr; - unsigned long fault_type; - unsigned long trap_type; + struct faultinfo faultinfo; long syscall; int is_user; } skas; @@ -241,14 +241,8 @@ struct syscall_args { CHOOSE_MODE(SC_SEGV_IS_FIXABLE(UPT_SC(r)), \ REGS_SEGV_IS_FIXABLE(&r->skas)) -#define UPT_FAULT_ADDR(r) \ - __CHOOSE_MODE(SC_FAULT_ADDR(UPT_SC(r)), REGS_FAULT_ADDR(&r->skas)) - -#define UPT_FAULT_WRITE(r) \ - CHOOSE_MODE(SC_FAULT_WRITE(UPT_SC(r)), REGS_FAULT_WRITE(&r->skas)) - -#define UPT_TRAP(r) __CHOOSE_MODE(SC_TRAP_TYPE(UPT_SC(r)), REGS_TRAP(&r->skas)) -#define UPT_ERR(r) __CHOOSE_MODE(SC_FAULT_TYPE(UPT_SC(r)), REGS_ERR(&r->skas)) +#define UPT_FAULTINFO(r) \ + CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo)) #endif diff --git a/arch/um/include/sysdep-x86_64/sigcontext.h b/arch/um/include/sysdep-x86_64/sigcontext.h index 1e38a54ff4cf..2a78260d15a0 100644 --- a/arch/um/include/sysdep-x86_64/sigcontext.h +++ b/arch/um/include/sysdep-x86_64/sigcontext.h @@ -17,11 +17,12 @@ #define SC_FAULT_ADDR(sc) SC_CR2(sc) #define SC_FAULT_TYPE(sc) SC_ERR(sc) -#define FAULT_WRITE(err) ((err) & 2) - -#define SC_FAULT_WRITE(sc) FAULT_WRITE(SC_FAULT_TYPE(sc)) - -#define SC_TRAP_TYPE(sc) SC_TRAPNO(sc) +#define GET_FAULTINFO_FROM_SC(fi,sc) \ + { \ + (fi).cr2 = SC_CR2(sc); \ + (fi).error_code = SC_ERR(sc); \ + (fi).trap_no = SC_TRAPNO(sc); \ + } /* ptrace expects that, at the start of a system call, %eax contains * -ENOSYS, so this makes it so. @@ -29,8 +30,8 @@ #define SC_START_SYSCALL(sc) do SC_RAX(sc) = -ENOSYS; while(0) -#define SEGV_IS_FIXABLE(trap) ((trap) == 14) -#define SC_SEGV_IS_FIXABLE(sc) SEGV_IS_FIXABLE(SC_TRAP_TYPE(sc)) +/* This is Page Fault */ +#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) extern unsigned long *sc_sigmask(void *sc_ptr); diff --git a/arch/um/include/sysdep-x86_64/signal.h b/arch/um/include/sysdep-x86_64/signal.h index e5e52756fab4..6142897af3d1 100644 --- a/arch/um/include/sysdep-x86_64/signal.h +++ b/arch/um/include/sysdep-x86_64/signal.h @@ -6,6 +6,8 @@ #ifndef __X86_64_SIGNAL_H_ #define __X86_64_SIGNAL_H_ +#define ARCH_SIGHDLR_PARAM int sig + #define ARCH_GET_SIGCONTEXT(sc, sig_addr) \ do { \ struct ucontext *__uc; \ diff --git a/arch/um/include/sysdep-x86_64/skas_ptrace.h b/arch/um/include/sysdep-x86_64/skas_ptrace.h new file mode 100644 index 000000000000..95db4be786e4 --- /dev/null +++ b/arch/um/include/sysdep-x86_64/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_SKAS_PTRACE_H +#define __SYSDEP_X86_64_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/include/sysdep-x86_64/syscalls.h b/arch/um/include/sysdep-x86_64/syscalls.h index b187a4157ff3..67923cca5691 100644 --- a/arch/um/include/sysdep-x86_64/syscalls.h +++ b/arch/um/include/sysdep-x86_64/syscalls.h @@ -26,66 +26,9 @@ extern syscall_handler_t *ia32_sys_call_table[]; extern long old_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); -extern syscall_handler_t wrap_sys_shmat; extern syscall_handler_t sys_modify_ldt; extern syscall_handler_t sys_arch_prctl; -#define ARCH_SYSCALLS \ - [ __NR_mmap ] = (syscall_handler_t *) old_mmap, \ - [ __NR_select ] = (syscall_handler_t *) sys_select, \ - [ __NR_mincore ] = (syscall_handler_t *) sys_mincore, \ - [ __NR_madvise ] = (syscall_handler_t *) sys_madvise, \ - [ __NR_shmget ] = (syscall_handler_t *) sys_shmget, \ - [ __NR_shmat ] = (syscall_handler_t *) wrap_sys_shmat, \ - [ __NR_shmctl ] = (syscall_handler_t *) sys_shmctl, \ - [ __NR_semop ] = (syscall_handler_t *) sys_semop, \ - [ __NR_semget ] = (syscall_handler_t *) sys_semget, \ - [ __NR_semctl ] = (syscall_handler_t *) sys_semctl, \ - [ __NR_shmdt ] = (syscall_handler_t *) sys_shmdt, \ - [ __NR_msgget ] = (syscall_handler_t *) sys_msgget, \ - [ __NR_msgsnd ] = (syscall_handler_t *) sys_msgsnd, \ - [ __NR_msgrcv ] = (syscall_handler_t *) sys_msgrcv, \ - [ __NR_msgctl ] = (syscall_handler_t *) sys_msgctl, \ - [ __NR_pivot_root ] = (syscall_handler_t *) sys_pivot_root, \ - [ __NR_tuxcall ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_security ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_epoll_ctl_old ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_epoll_wait_old ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_modify_ldt ] = (syscall_handler_t *) sys_modify_ldt, \ - [ __NR_arch_prctl ] = (syscall_handler_t *) sys_arch_prctl, \ - [ __NR_socket ] = (syscall_handler_t *) sys_socket, \ - [ __NR_connect ] = (syscall_handler_t *) sys_connect, \ - [ __NR_accept ] = (syscall_handler_t *) sys_accept, \ - [ __NR_recvfrom ] = (syscall_handler_t *) sys_recvfrom, \ - [ __NR_recvmsg ] = (syscall_handler_t *) sys_recvmsg, \ - [ __NR_sendmsg ] = (syscall_handler_t *) sys_sendmsg, \ - [ __NR_bind ] = (syscall_handler_t *) sys_bind, \ - [ __NR_listen ] = (syscall_handler_t *) sys_listen, \ - [ __NR_getsockname ] = (syscall_handler_t *) sys_getsockname, \ - [ __NR_getpeername ] = (syscall_handler_t *) sys_getpeername, \ - [ __NR_socketpair ] = (syscall_handler_t *) sys_socketpair, \ - [ __NR_sendto ] = (syscall_handler_t *) sys_sendto, \ - [ __NR_shutdown ] = (syscall_handler_t *) sys_shutdown, \ - [ __NR_setsockopt ] = (syscall_handler_t *) sys_setsockopt, \ - [ __NR_getsockopt ] = (syscall_handler_t *) sys_getsockopt, \ - [ __NR_iopl ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_set_thread_area ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_get_thread_area ] = (syscall_handler_t *) sys_ni_syscall, \ - [ __NR_semtimedop ] = (syscall_handler_t *) sys_semtimedop, \ - [ 251 ] = (syscall_handler_t *) sys_ni_syscall, - -#define LAST_ARCH_SYSCALL 251 -#define NR_syscalls 1024 +#define NR_syscalls (__NR_syscall_max + 1) #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/user_util.h b/arch/um/include/user_util.h index 103cd320386c..b8c5b8a95250 100644 --- a/arch/um/include/user_util.h +++ b/arch/um/include/user_util.h @@ -67,7 +67,6 @@ extern void *um_kmalloc(int size); extern int switcheroo(int fd, int prot, void *from, void *to, int size); extern void setup_machinename(char *machine_out); extern void setup_hostinfo(void); -extern void add_arg(char *arg); extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)); extern void init_new_thread_signals(int altstack); extern void do_exec(int old_pid, int new_pid); diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index dc796c1bf39e..9736ca27c5f0 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -4,13 +4,13 @@ # extra-y := vmlinux.lds -clean-files := vmlinux.lds.S config.tmp +clean-files := -obj-y = checksum.o config.o exec_kern.o exitcode.o \ +obj-y = config.o exec_kern.o exitcode.o \ helper.o init_task.o irq.o irq_user.o ksyms.o main.o mem.o mem_user.o \ physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ - syscall_kern.o sysrq.o sys_call_table.o tempfile.o time.o time_kern.o \ + syscall_kern.o sysrq.o tempfile.o time.o time_kern.o \ tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \ user_util.o @@ -23,18 +23,14 @@ obj-$(CONFIG_SYSCALL_DEBUG) += syscall_user.o obj-$(CONFIG_MODE_TT) += tt/ obj-$(CONFIG_MODE_SKAS) += skas/ -# This needs be compiled with frame pointers regardless of how the rest of the -# kernel is built. -CFLAGS_frame.o := -fno-omit-frame-pointer - user-objs-$(CONFIG_TTY_LOG) += tty_log.o USER_OBJS := $(user-objs-y) config.o helper.o main.o process.o tempfile.o \ - time.o tty_log.o umid.o user_util.o frame.o + time.o tty_log.o umid.o user_util.o include arch/um/scripts/Makefile.rules -targets += config.c +targets := config.c config.tmp # Be careful with the below Sed code - sed is pitfall-rich! # We use sed to lower build requirements, for "embedded" builders for instance. @@ -53,6 +49,7 @@ quiet_cmd_quote2 = QUOTE $@ cmd_quote2 = sed -e '/CONFIG/{' \ -e 's/"CONFIG"\;/""/' \ -e 'r $(obj)/config.tmp' \ - -e 'a""\;' \ + -e 'a \' \ + -e '""\;' \ -e '}' \ $< > $@ diff --git a/arch/um/kernel/checksum.c b/arch/um/kernel/checksum.c index e69b2be951d1..e69de29bb2d1 100644 --- a/arch/um/kernel/checksum.c +++ b/arch/um/kernel/checksum.c @@ -1,36 +0,0 @@ -#include "asm/uaccess.h" -#include "linux/errno.h" -#include "linux/module.h" - -unsigned int arch_csum_partial(const unsigned char *buff, int len, int sum); - -unsigned int csum_partial(unsigned char *buff, int len, int sum) -{ - return arch_csum_partial(buff, len, sum); -} - -EXPORT_SYMBOL(csum_partial); - -unsigned int csum_partial_copy_to(const unsigned char *src, - unsigned char __user *dst, int len, int sum, - int *err_ptr) -{ - if(copy_to_user(dst, src, len)){ - *err_ptr = -EFAULT; - return(-1); - } - - return(arch_csum_partial(src, len, sum)); -} - -unsigned int csum_partial_copy_from(const unsigned char __user *src, - unsigned char *dst, int len, int sum, - int *err_ptr) -{ - if(copy_from_user(dst, src, len)){ - *err_ptr = -EFAULT; - return(-1); - } - - return arch_csum_partial(dst, len, sum); -} diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index d71e8f00810f..d44fb5282547 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -163,7 +163,6 @@ void __init init_IRQ(void) irq_desc[i].handler = &SIGIO_irq_type; enable_irq(i); } - init_irq_signals(0); } /* diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index b41d3397d07b..78d69dc74b26 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -10,7 +10,6 @@ #include "linux/spinlock.h" #include "linux/highmem.h" #include "asm/current.h" -#include "asm/delay.h" #include "asm/processor.h" #include "asm/unistd.h" #include "asm/pgalloc.h" @@ -28,8 +27,6 @@ EXPORT_SYMBOL(uml_physmem); EXPORT_SYMBOL(set_signals); EXPORT_SYMBOL(get_signals); EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(__const_udelay); -EXPORT_SYMBOL(__udelay); EXPORT_SYMBOL(sys_waitpid); EXPORT_SYMBOL(task_size); EXPORT_SYMBOL(flush_tlb_range); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index f76a2692adca..51f8e5a8ac6a 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -65,8 +65,6 @@ void init_new_thread_signals(int altstack) SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGWINCH, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); set_handler(SIGUSR2, (__sighandler_t) sig_handler, flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); signal(SIGHUP, SIG_IGN); diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 1d719d5b4bb9..c1adf7ba3fd1 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -115,16 +115,6 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) return(pid); } -void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - int cpu = smp_processor_id(); - - if (prev != next) - cpu_clear(cpu, prev->cpu_vm_mask); - cpu_set(cpu, next->cpu_vm_mask); -} - void set_current(void *t) { struct task_struct *task = t; @@ -152,7 +142,6 @@ void release_thread(struct task_struct *task) void exit_thread(void) { - CHOOSE_MODE(exit_thread_tt(), exit_thread_skas()); unprotect_stack((unsigned long) current_thread); } @@ -161,10 +150,6 @@ void *get_current(void) return(current); } -void prepare_to_copy(struct task_struct *tsk) -{ -} - int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, unsigned long stack_top, struct task_struct * p, struct pt_regs *regs) @@ -480,12 +465,21 @@ int singlestepping(void * t) return 2; } +/* + * Only x86 and x86_64 have an arch_align_stack(). + * All other arches have "#define arch_align_stack(x) (x)" + * in their asm/system.h + * As this is included in UML from asm-um/system-generic.h, + * we can use it to behave as the subarch does. + */ +#ifndef arch_align_stack unsigned long arch_align_stack(unsigned long sp) { if (randomize_va_space) sp -= get_random_int() % 8192; return sp & ~0xf; } +#endif /* diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 3a99ee6d94eb..2b75d8d9ba73 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -19,15 +19,30 @@ #include "skas_ptrace.h" #include "sysdep/ptrace.h" +static inline void set_singlestepping(struct task_struct *child, int on) +{ + if (on) + child->ptrace |= PT_DTRACE; + else + child->ptrace &= ~PT_DTRACE; + child->thread.singlestep_syscall = 0; + +#ifdef SUBARCH_SET_SINGLESTEPPING + SUBARCH_SET_SINGLESTEPPING(child, on) +#endif + } + /* * Called by kernel/ptrace.c when detaching.. */ void ptrace_disable(struct task_struct *child) { - child->ptrace &= ~PT_DTRACE; - child->thread.singlestep_syscall = 0; + set_singlestepping(child,0); } +extern int peek_user(struct task_struct * child, long addr, long data); +extern int poke_user(struct task_struct * child, long addr, long data); + long sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; @@ -67,6 +82,10 @@ long sys_ptrace(long request, long pid, long addr, long data) goto out_tsk; } +#ifdef SUBACH_PTRACE_SPECIAL + SUBARCH_PTRACE_SPECIAL(child,request,addr,data) +#endif + ret = ptrace_check_attach(child, request == PTRACE_KILL); if (ret < 0) goto out_tsk; @@ -87,26 +106,9 @@ long sys_ptrace(long request, long pid, long addr, long data) } /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; - - ret = -EIO; - if ((addr & 3) || addr < 0) - break; - - tmp = 0; /* Default return condition */ - if(addr < MAX_REG_OFFSET){ - tmp = getreg(child, addr); - } - else if((addr >= offsetof(struct user, u_debugreg[0])) && - (addr <= offsetof(struct user, u_debugreg[7]))){ - addr -= offsetof(struct user, u_debugreg[0]); - addr = addr >> 2; - tmp = child->thread.arch.debugregs[addr]; - } - ret = put_user(tmp, (unsigned long __user *) data); - break; - } + case PTRACE_PEEKUSR: + ret = peek_user(child, addr, data); + break; /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ @@ -119,35 +121,16 @@ long sys_ptrace(long request, long pid, long addr, long data) break; case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - ret = -EIO; - if ((addr & 3) || addr < 0) - break; - - if (addr < MAX_REG_OFFSET) { - ret = putreg(child, addr, data); - break; - } -#if 0 /* XXX x86_64 */ - else if((addr >= offsetof(struct user, u_debugreg[0])) && - (addr <= offsetof(struct user, u_debugreg[7]))){ - addr -= offsetof(struct user, u_debugreg[0]); - addr = addr >> 2; - if((addr == 4) || (addr == 5)) break; - child->thread.arch.debugregs[addr] = data; - ret = 0; - } -#endif - - break; + ret = poke_user(child, addr, data); + break; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; - child->ptrace &= ~PT_DTRACE; - child->thread.singlestep_syscall = 0; + set_singlestepping(child, 0); if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } @@ -170,8 +153,7 @@ long sys_ptrace(long request, long pid, long addr, long data) if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; - child->ptrace &= ~PT_DTRACE; - child->thread.singlestep_syscall = 0; + set_singlestepping(child, 0); child->exit_code = SIGKILL; wake_up_process(child); break; @@ -179,11 +161,10 @@ long sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->ptrace |= PT_DTRACE; - child->thread.singlestep_syscall = 0; + set_singlestepping(child, 1); child->exit_code = data; /* give it a chance to run. */ wake_up_process(child); @@ -250,23 +231,19 @@ long sys_ptrace(long request, long pid, long addr, long data) break; #endif case PTRACE_FAULTINFO: { - struct ptrace_faultinfo fault; - - fault = ((struct ptrace_faultinfo) - { .is_write = child->thread.err, - .addr = child->thread.cr2 }); - ret = copy_to_user((unsigned long __user *) data, &fault, - sizeof(fault)); + /* Take the info from thread->arch->faultinfo, + * but transfer max. sizeof(struct ptrace_faultinfo). + * On i386, ptrace_faultinfo is smaller! + */ + ret = copy_to_user((unsigned long __user *) data, + &child->thread.arch.faultinfo, + sizeof(struct ptrace_faultinfo)); if(ret) break; break; } - case PTRACE_SIGPENDING: - ret = copy_to_user((unsigned long __user *) data, - &child->pending.signal, - sizeof(child->pending.signal)); - break; +#ifdef PTRACE_LDT case PTRACE_LDT: { struct ptrace_ldt ldt; @@ -282,6 +259,7 @@ long sys_ptrace(long request, long pid, long addr, long data) ret = -EIO; break; } +#endif #ifdef CONFIG_PROC_MM case PTRACE_SWITCH_MM: { struct mm_struct *old = child->mm; @@ -337,15 +315,18 @@ void syscall_trace(union uml_pt_regs *regs, int entryexit) if (unlikely(current->audit_context)) { if (!entryexit) - audit_syscall_entry(current, - UPT_SYSCALL_NR(®s->regs), - UPT_SYSCALL_ARG1(®s->regs), - UPT_SYSCALL_ARG2(®s->regs), - UPT_SYSCALL_ARG3(®s->regs), - UPT_SYSCALL_ARG4(®s->regs)); - else - audit_syscall_exit(current, - UPT_SYSCALL_RET(®s->regs)); + audit_syscall_entry(current, + HOST_AUDIT_ARCH, + UPT_SYSCALL_NR(regs), + UPT_SYSCALL_ARG1(regs), + UPT_SYSCALL_ARG2(regs), + UPT_SYSCALL_ARG3(regs), + UPT_SYSCALL_ARG4(regs)); + else { + int res = UPT_SYSCALL_RET(regs); + audit_syscall_exit(current, AUDITSC_RESULT(res), + res); + } } /* Fake a debug trap */ diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c index 668df13d8c9d..e89218958f38 100644 --- a/arch/um/kernel/sigio_user.c +++ b/arch/um/kernel/sigio_user.c @@ -182,6 +182,7 @@ static int write_sigio_thread(void *unused) int i, n, respond_fd; char c; + signal(SIGWINCH, SIG_IGN); fds = ¤t_poll; while(1){ n = poll(fds->poll, fds->used, -1); diff --git a/arch/um/kernel/skas/include/mode_kern-skas.h b/arch/um/kernel/skas/include/mode_kern-skas.h index 94c564962378..e48490028111 100644 --- a/arch/um/kernel/skas/include/mode_kern-skas.h +++ b/arch/um/kernel/skas/include/mode_kern-skas.h @@ -18,7 +18,6 @@ extern int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, unsigned long stack_top, struct task_struct *p, struct pt_regs *regs); extern void release_thread_skas(struct task_struct *task); -extern void exit_thread_skas(void); extern void initial_thread_cb_skas(void (*proc)(void *), void *arg); extern void init_idle_skas(void); extern void flush_tlb_kernel_range_skas(unsigned long start, diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index f0702c2c7204..96b51dba3471 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -27,9 +27,10 @@ extern void map(int fd, unsigned long virt, unsigned long len, int r, int w, extern int unmap(int fd, void *addr, unsigned long len); extern int protect(int fd, unsigned long addr, unsigned long len, int r, int w, int x); -extern void user_signal(int sig, union uml_pt_regs *regs); +extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); extern void start_userspace(int cpu); +extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); #endif diff --git a/arch/um/kernel/skas/include/uaccess-skas.h b/arch/um/kernel/skas/include/uaccess-skas.h index 11986c9b9ddf..cd6c280482cb 100644 --- a/arch/um/kernel/skas/include/uaccess-skas.h +++ b/arch/um/kernel/skas/include/uaccess-skas.h @@ -19,7 +19,7 @@ ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))) static inline int verify_area_skas(int type, const void * addr, - unsigned long size) + unsigned long size) { return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index b4ffaaa81241..773cd2b525fc 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -4,6 +4,7 @@ */ #include <stdlib.h> +#include <string.h> #include <unistd.h> #include <errno.h> #include <signal.h> @@ -27,27 +28,37 @@ #include "chan_user.h" #include "signal_user.h" #include "registers.h" +#include "process.h" int is_skas_winch(int pid, int fd, void *data) { - if(pid != os_getpid()) + if(pid != os_getpgrp()) return(0); register_winch_irq(-1, fd, -1, data); return(1); } -static void handle_segv(int pid) +void get_skas_faultinfo(int pid, struct faultinfo * fi) { - struct ptrace_faultinfo fault; int err; - err = ptrace(PTRACE_FAULTINFO, pid, 0, &fault); + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); if(err) - panic("handle_segv - PTRACE_FAULTINFO failed, errno = %d\n", - errno); + panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " + "errno = %d\n", errno); + + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); +} - segv(fault.addr, 0, FAULT_WRITE(fault.is_write), 1, NULL); +static void handle_segv(int pid, union uml_pt_regs * regs) +{ + get_skas_faultinfo(pid, ®s->skas.faultinfo); + segv(regs->skas.faultinfo, 0, 1, NULL); } /*To use the same value of using_sysemu as the caller, ask it that value (in local_using_sysemu)*/ @@ -163,7 +174,7 @@ void userspace(union uml_pt_regs *regs) if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: - handle_segv(pid); + handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs, local_using_sysemu); @@ -177,7 +188,7 @@ void userspace(union uml_pt_regs *regs) case SIGBUS: case SIGFPE: case SIGWINCH: - user_signal(WSTOPSIG(status), regs); + user_signal(WSTOPSIG(status), regs, pid); break; default: printk("userspace - child stopped with signal " @@ -190,6 +201,11 @@ void userspace(union uml_pt_regs *regs) } } } +#define INIT_JMP_NEW_THREAD 0 +#define INIT_JMP_REMOVE_SIGSTACK 1 +#define INIT_JMP_CALLBACK 2 +#define INIT_JMP_HALT 3 +#define INIT_JMP_REBOOT 4 void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, void (*handler)(int)) @@ -225,7 +241,7 @@ void thread_wait(void *sw, void *fb) *switch_buf = &buf; fork_buf = fb; if(sigsetjmp(buf, 1) == 0) - siglongjmp(*fork_buf, 1); + siglongjmp(*fork_buf, INIT_JMP_REMOVE_SIGSTACK); } void switch_threads(void *me, void *next) @@ -249,23 +265,31 @@ int start_idle_thread(void *stack, void *switch_buf_ptr, void **fork_buf_ptr) sigjmp_buf **switch_buf = switch_buf_ptr; int n; + set_handler(SIGWINCH, (__sighandler_t) sig_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGALRM, + SIGVTALRM, -1); + *fork_buf_ptr = &initial_jmpbuf; n = sigsetjmp(initial_jmpbuf, 1); - if(n == 0) - new_thread_proc((void *) stack, new_thread_handler); - else if(n == 1) - remove_sigstack(); - else if(n == 2){ + switch(n){ + case INIT_JMP_NEW_THREAD: + new_thread_proc((void *) stack, new_thread_handler); + break; + case INIT_JMP_REMOVE_SIGSTACK: + remove_sigstack(); + break; + case INIT_JMP_CALLBACK: (*cb_proc)(cb_arg); siglongjmp(*cb_back, 1); - } - else if(n == 3){ + break; + case INIT_JMP_HALT: kmalloc_ok = 0; return(0); - } - else if(n == 4){ + case INIT_JMP_REBOOT: kmalloc_ok = 0; return(1); + default: + panic("Bad sigsetjmp return in start_idle_thread - %d\n", n); } siglongjmp(**switch_buf, 1); } @@ -290,7 +314,7 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) block_signals(); if(sigsetjmp(here, 1) == 0) - siglongjmp(initial_jmpbuf, 2); + siglongjmp(initial_jmpbuf, INIT_JMP_CALLBACK); unblock_signals(); cb_proc = NULL; @@ -301,13 +325,13 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) void halt_skas(void) { block_signals(); - siglongjmp(initial_jmpbuf, 3); + siglongjmp(initial_jmpbuf, INIT_JMP_HALT); } void reboot_skas(void) { block_signals(); - siglongjmp(initial_jmpbuf, 4); + siglongjmp(initial_jmpbuf, INIT_JMP_REBOOT); } void switch_mm_skas(int mm_fd) diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 5d096ea63b97..ab5d3271da0b 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -83,10 +83,6 @@ void release_thread_skas(struct task_struct *task) { } -void exit_thread_skas(void) -{ -} - void fork_handler(int sig) { change_sig(SIGUSR1, 1); diff --git a/arch/um/kernel/skas/trap_user.c b/arch/um/kernel/skas/trap_user.c index 8e9b46d4702e..0dee1d95c806 100644 --- a/arch/um/kernel/skas/trap_user.c +++ b/arch/um/kernel/skas/trap_user.c @@ -5,12 +5,15 @@ #include <signal.h> #include <errno.h> -#include "sysdep/ptrace.h" #include "signal_user.h" #include "user_util.h" #include "kern_util.h" #include "task.h" #include "sigcontext.h" +#include "skas.h" +#include "ptrace_user.h" +#include "sysdep/ptrace.h" +#include "sysdep/ptrace_user.h" void sig_handler_common_skas(int sig, void *sc_ptr) { @@ -31,9 +34,11 @@ void sig_handler_common_skas(int sig, void *sc_ptr) r = &TASK_REGS(get_current())->skas; save_user = r->is_user; r->is_user = 0; - r->fault_addr = SC_FAULT_ADDR(sc); - r->fault_type = SC_FAULT_TYPE(sc); - r->trap_type = SC_TRAP_TYPE(sc); + if ( sig == SIGFPE || sig == SIGSEGV || + sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP ) { + GET_FAULTINFO_FROM_SC(r->faultinfo, sc); + } change_sig(SIGUSR1, 1); info = &sig_info[sig]; @@ -45,14 +50,17 @@ void sig_handler_common_skas(int sig, void *sc_ptr) r->is_user = save_user; } -void user_signal(int sig, union uml_pt_regs *regs) +extern int ptrace_faultinfo; + +void user_signal(int sig, union uml_pt_regs *regs, int pid) { struct signal_info *info; + int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) || + (sig == SIGILL) || (sig == SIGTRAP)); regs->skas.is_user = 1; - regs->skas.fault_addr = 0; - regs->skas.fault_type = 0; - regs->skas.trap_type = 0; + if (segv) + get_skas_faultinfo(pid, ®s->skas.faultinfo); info = &sig_info[sig]; (*info->handler)(sig, regs); diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c index 7575ec489b63..75195281081e 100644 --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -3,6 +3,7 @@ * Licensed under the GPL */ +#include "linux/compiler.h" #include "linux/stddef.h" #include "linux/kernel.h" #include "linux/string.h" @@ -28,9 +29,12 @@ static unsigned long maybe_map(unsigned long virt, int is_write) if(IS_ERR(phys) || (is_write && !pte_write(pte))){ err = handle_page_fault(virt, 0, is_write, 1, &dummy_code); if(err) - return(0); + return(-1UL); phys = um_virt_to_phys(current, virt, NULL); } + if(IS_ERR(phys)) + phys = (void *) -1; + return((unsigned long) phys); } @@ -41,7 +45,7 @@ static int do_op(unsigned long addr, int len, int is_write, int n; addr = maybe_map(addr, is_write); - if(addr == -1) + if(addr == -1UL) return(-1); page = phys_to_page(addr); @@ -61,8 +65,7 @@ static void do_buffer_op(void *jmpbuf, void *arg_ptr) void *arg; int *res; - /* Some old gccs recognize __va_copy, but not va_copy */ - __va_copy(args, *(va_list *)arg_ptr); + va_copy(args, *(va_list *)arg_ptr); addr = va_arg(args, unsigned long); len = va_arg(args, int); is_write = va_arg(args, int); diff --git a/arch/um/kernel/skas/util/Makefile b/arch/um/kernel/skas/util/Makefile index 17f5909d60f7..f7b7eba83340 100644 --- a/arch/um/kernel/skas/util/Makefile +++ b/arch/um/kernel/skas/util/Makefile @@ -2,3 +2,4 @@ hostprogs-y := mk_ptregs always := $(hostprogs-y) mk_ptregs-objs := mk_ptregs-$(SUBARCH).o +HOSTCFLAGS_mk_ptregs-$(SUBARCH).o := -I$(objtree)/arch/um diff --git a/arch/um/kernel/skas/util/mk_ptregs-i386.c b/arch/um/kernel/skas/util/mk_ptregs-i386.c index 0788dd05bcac..1f96e1eeb8a7 100644 --- a/arch/um/kernel/skas/util/mk_ptregs-i386.c +++ b/arch/um/kernel/skas/util/mk_ptregs-i386.c @@ -1,8 +1,7 @@ #include <stdio.h> -#include <asm/ptrace.h> -#include <asm/user.h> +#include <user-offsets.h> -#define PRINT_REG(name, val) printf("#define HOST_%s %d\n", (name), (val)) +#define SHOW(name) printf("#define %s %d\n", #name, name) int main(int argc, char **argv) { @@ -12,28 +11,27 @@ int main(int argc, char **argv) printf("#ifndef __SKAS_PT_REGS_\n"); printf("#define __SKAS_PT_REGS_\n"); printf("\n"); - printf("#define HOST_FRAME_SIZE %d\n", FRAME_SIZE); - printf("#define HOST_FP_SIZE %d\n", - sizeof(struct user_i387_struct) / sizeof(unsigned long)); - printf("#define HOST_XFP_SIZE %d\n", - sizeof(struct user_fxsr_struct) / sizeof(unsigned long)); + SHOW(HOST_FRAME_SIZE); + SHOW(HOST_FP_SIZE); + SHOW(HOST_XFP_SIZE); + + SHOW(HOST_IP); + SHOW(HOST_SP); + SHOW(HOST_EFLAGS); + SHOW(HOST_EAX); + SHOW(HOST_EBX); + SHOW(HOST_ECX); + SHOW(HOST_EDX); + SHOW(HOST_ESI); + SHOW(HOST_EDI); + SHOW(HOST_EBP); + SHOW(HOST_CS); + SHOW(HOST_SS); + SHOW(HOST_DS); + SHOW(HOST_FS); + SHOW(HOST_ES); + SHOW(HOST_GS); - PRINT_REG("IP", EIP); - PRINT_REG("SP", UESP); - PRINT_REG("EFLAGS", EFL); - PRINT_REG("EAX", EAX); - PRINT_REG("EBX", EBX); - PRINT_REG("ECX", ECX); - PRINT_REG("EDX", EDX); - PRINT_REG("ESI", ESI); - PRINT_REG("EDI", EDI); - PRINT_REG("EBP", EBP); - PRINT_REG("CS", CS); - PRINT_REG("SS", SS); - PRINT_REG("DS", DS); - PRINT_REG("FS", FS); - PRINT_REG("ES", ES); - PRINT_REG("GS", GS); printf("\n"); printf("#endif\n"); return(0); diff --git a/arch/um/kernel/skas/util/mk_ptregs-x86_64.c b/arch/um/kernel/skas/util/mk_ptregs-x86_64.c index 67aee92a70ef..5fccbfe35f78 100644 --- a/arch/um/kernel/skas/util/mk_ptregs-x86_64.c +++ b/arch/um/kernel/skas/util/mk_ptregs-x86_64.c @@ -5,11 +5,10 @@ */ #include <stdio.h> -#define __FRAME_OFFSETS -#include <asm/ptrace.h> +#include <user-offsets.h> -#define PRINT_REG(name, val) \ - printf("#define HOST_%s (%d / sizeof(unsigned long))\n", (name), (val)) +#define SHOW(name) \ + printf("#define %s (%d / sizeof(unsigned long))\n", #name, name) int main(int argc, char **argv) { @@ -18,36 +17,35 @@ int main(int argc, char **argv) printf("\n"); printf("#ifndef __SKAS_PT_REGS_\n"); printf("#define __SKAS_PT_REGS_\n"); - printf("#define HOST_FRAME_SIZE (%d / sizeof(unsigned long))\n", - FRAME_SIZE); - PRINT_REG("RBX", RBX); - PRINT_REG("RCX", RCX); - PRINT_REG("RDI", RDI); - PRINT_REG("RSI", RSI); - PRINT_REG("RDX", RDX); - PRINT_REG("RBP", RBP); - PRINT_REG("RAX", RAX); - PRINT_REG("R8", R8); - PRINT_REG("R9", R9); - PRINT_REG("R10", R10); - PRINT_REG("R11", R11); - PRINT_REG("R12", R12); - PRINT_REG("R13", R13); - PRINT_REG("R14", R14); - PRINT_REG("R15", R15); - PRINT_REG("ORIG_RAX", ORIG_RAX); - PRINT_REG("CS", CS); - PRINT_REG("SS", SS); - PRINT_REG("EFLAGS", EFLAGS); + SHOW(HOST_FRAME_SIZE); + SHOW(HOST_RBX); + SHOW(HOST_RCX); + SHOW(HOST_RDI); + SHOW(HOST_RSI); + SHOW(HOST_RDX); + SHOW(HOST_RBP); + SHOW(HOST_RAX); + SHOW(HOST_R8); + SHOW(HOST_R9); + SHOW(HOST_R10); + SHOW(HOST_R11); + SHOW(HOST_R12); + SHOW(HOST_R13); + SHOW(HOST_R14); + SHOW(HOST_R15); + SHOW(HOST_ORIG_RAX); + SHOW(HOST_CS); + SHOW(HOST_SS); + SHOW(HOST_EFLAGS); #if 0 - PRINT_REG("FS", FS); - PRINT_REG("GS", GS); - PRINT_REG("DS", DS); - PRINT_REG("ES", ES); + SHOW(HOST_FS); + SHOW(HOST_GS); + SHOW(HOST_DS); + SHOW(HOST_ES); #endif - PRINT_REG("IP", RIP); - PRINT_REG("SP", RSP); + SHOW(HOST_IP); + SHOW(HOST_SP); printf("#define HOST_FP_SIZE 0\n"); printf("#define HOST_XFP_SIZE 0\n"); printf("\n"); diff --git a/arch/um/kernel/sys_call_table.c b/arch/um/kernel/sys_call_table.c deleted file mode 100644 index 7fc06c85b29d..000000000000 --- a/arch/um/kernel/sys_call_table.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) - * Copyright 2003 PathScale, Inc. - * Licensed under the GPL - */ - -#include "linux/config.h" -#include "linux/unistd.h" -#include "linux/sys.h" -#include "linux/swap.h" -#include "linux/syscalls.h" -#include "linux/sysctl.h" -#include "asm/signal.h" -#include "sysdep/syscalls.h" -#include "kern_util.h" - -#ifdef CONFIG_NFSD -#define NFSSERVCTL sys_nfsservctl -#else -#define NFSSERVCTL sys_ni_syscall -#endif - -#define LAST_GENERIC_SYSCALL __NR_keyctl - -#if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL -#define LAST_SYSCALL LAST_GENERIC_SYSCALL -#else -#define LAST_SYSCALL LAST_ARCH_SYSCALL -#endif - -extern syscall_handler_t sys_fork; -extern syscall_handler_t sys_execve; -extern syscall_handler_t um_time; -extern syscall_handler_t um_stime; -extern syscall_handler_t sys_pipe; -extern syscall_handler_t sys_olduname; -extern syscall_handler_t sys_sigaction; -extern syscall_handler_t sys_sigsuspend; -extern syscall_handler_t old_readdir; -extern syscall_handler_t sys_uname; -extern syscall_handler_t sys_ipc; -extern syscall_handler_t sys_sigreturn; -extern syscall_handler_t sys_clone; -extern syscall_handler_t sys_rt_sigreturn; -extern syscall_handler_t sys_sigaltstack; -extern syscall_handler_t sys_vfork; -extern syscall_handler_t old_select; -extern syscall_handler_t sys_modify_ldt; -extern syscall_handler_t sys_rt_sigsuspend; -extern syscall_handler_t sys_mbind; -extern syscall_handler_t sys_get_mempolicy; -extern syscall_handler_t sys_set_mempolicy; -extern syscall_handler_t sys_sys_setaltroot; - -syscall_handler_t *sys_call_table[] = { - [ __NR_restart_syscall ] = (syscall_handler_t *) sys_restart_syscall, - [ __NR_exit ] = (syscall_handler_t *) sys_exit, - [ __NR_fork ] = (syscall_handler_t *) sys_fork, - [ __NR_read ] = (syscall_handler_t *) sys_read, - [ __NR_write ] = (syscall_handler_t *) sys_write, - - /* These three are declared differently in asm/unistd.h */ - [ __NR_open ] = (syscall_handler_t *) sys_open, - [ __NR_close ] = (syscall_handler_t *) sys_close, - [ __NR_creat ] = (syscall_handler_t *) sys_creat, - [ __NR_link ] = (syscall_handler_t *) sys_link, - [ __NR_unlink ] = (syscall_handler_t *) sys_unlink, - [ __NR_execve ] = (syscall_handler_t *) sys_execve, - - /* declared differently in kern_util.h */ - [ __NR_chdir ] = (syscall_handler_t *) sys_chdir, - [ __NR_time ] = um_time, - [ __NR_mknod ] = (syscall_handler_t *) sys_mknod, - [ __NR_chmod ] = (syscall_handler_t *) sys_chmod, - [ __NR_lchown ] = (syscall_handler_t *) sys_lchown16, - [ __NR_lseek ] = (syscall_handler_t *) sys_lseek, - [ __NR_getpid ] = (syscall_handler_t *) sys_getpid, - [ __NR_mount ] = (syscall_handler_t *) sys_mount, - [ __NR_setuid ] = (syscall_handler_t *) sys_setuid16, - [ __NR_getuid ] = (syscall_handler_t *) sys_getuid16, - [ __NR_ptrace ] = (syscall_handler_t *) sys_ptrace, - [ __NR_alarm ] = (syscall_handler_t *) sys_alarm, - [ __NR_pause ] = (syscall_handler_t *) sys_pause, - [ __NR_utime ] = (syscall_handler_t *) sys_utime, - [ __NR_access ] = (syscall_handler_t *) sys_access, - [ __NR_sync ] = (syscall_handler_t *) sys_sync, - [ __NR_kill ] = (syscall_handler_t *) sys_kill, - [ __NR_rename ] = (syscall_handler_t *) sys_rename, - [ __NR_mkdir ] = (syscall_handler_t *) sys_mkdir, - [ __NR_rmdir ] = (syscall_handler_t *) sys_rmdir, - - /* Declared differently in asm/unistd.h */ - [ __NR_dup ] = (syscall_handler_t *) sys_dup, - [ __NR_pipe ] = (syscall_handler_t *) sys_pipe, - [ __NR_times ] = (syscall_handler_t *) sys_times, - [ __NR_brk ] = (syscall_handler_t *) sys_brk, - [ __NR_setgid ] = (syscall_handler_t *) sys_setgid16, - [ __NR_getgid ] = (syscall_handler_t *) sys_getgid16, - [ __NR_geteuid ] = (syscall_handler_t *) sys_geteuid16, - [ __NR_getegid ] = (syscall_handler_t *) sys_getegid16, - [ __NR_acct ] = (syscall_handler_t *) sys_acct, - [ __NR_umount2 ] = (syscall_handler_t *) sys_umount, - [ __NR_ioctl ] = (syscall_handler_t *) sys_ioctl, - [ __NR_fcntl ] = (syscall_handler_t *) sys_fcntl, - [ __NR_setpgid ] = (syscall_handler_t *) sys_setpgid, - [ __NR_umask ] = (syscall_handler_t *) sys_umask, - [ __NR_chroot ] = (syscall_handler_t *) sys_chroot, - [ __NR_ustat ] = (syscall_handler_t *) sys_ustat, - [ __NR_dup2 ] = (syscall_handler_t *) sys_dup2, - [ __NR_getppid ] = (syscall_handler_t *) sys_getppid, - [ __NR_getpgrp ] = (syscall_handler_t *) sys_getpgrp, - [ __NR_setsid ] = (syscall_handler_t *) sys_setsid, - [ __NR_setreuid ] = (syscall_handler_t *) sys_setreuid16, - [ __NR_setregid ] = (syscall_handler_t *) sys_setregid16, - [ __NR_sethostname ] = (syscall_handler_t *) sys_sethostname, - [ __NR_setrlimit ] = (syscall_handler_t *) sys_setrlimit, - [ __NR_getrlimit ] = (syscall_handler_t *) sys_old_getrlimit, - [ __NR_getrusage ] = (syscall_handler_t *) sys_getrusage, - [ __NR_gettimeofday ] = (syscall_handler_t *) sys_gettimeofday, - [ __NR_settimeofday ] = (syscall_handler_t *) sys_settimeofday, - [ __NR_getgroups ] = (syscall_handler_t *) sys_getgroups16, - [ __NR_setgroups ] = (syscall_handler_t *) sys_setgroups16, - [ __NR_symlink ] = (syscall_handler_t *) sys_symlink, - [ __NR_readlink ] = (syscall_handler_t *) sys_readlink, - [ __NR_uselib ] = (syscall_handler_t *) sys_uselib, - [ __NR_swapon ] = (syscall_handler_t *) sys_swapon, - [ __NR_reboot ] = (syscall_handler_t *) sys_reboot, - [ __NR_munmap ] = (syscall_handler_t *) sys_munmap, - [ __NR_truncate ] = (syscall_handler_t *) sys_truncate, - [ __NR_ftruncate ] = (syscall_handler_t *) sys_ftruncate, - [ __NR_fchmod ] = (syscall_handler_t *) sys_fchmod, - [ __NR_fchown ] = (syscall_handler_t *) sys_fchown16, - [ __NR_getpriority ] = (syscall_handler_t *) sys_getpriority, - [ __NR_setpriority ] = (syscall_handler_t *) sys_setpriority, - [ __NR_statfs ] = (syscall_handler_t *) sys_statfs, - [ __NR_fstatfs ] = (syscall_handler_t *) sys_fstatfs, - [ __NR_ioperm ] = (syscall_handler_t *) sys_ni_syscall, - [ __NR_syslog ] = (syscall_handler_t *) sys_syslog, - [ __NR_setitimer ] = (syscall_handler_t *) sys_setitimer, - [ __NR_getitimer ] = (syscall_handler_t *) sys_getitimer, - [ __NR_stat ] = (syscall_handler_t *) sys_newstat, - [ __NR_lstat ] = (syscall_handler_t *) sys_newlstat, - [ __NR_fstat ] = (syscall_handler_t *) sys_newfstat, - [ __NR_vhangup ] = (syscall_handler_t *) sys_vhangup, - [ __NR_wait4 ] = (syscall_handler_t *) sys_wait4, - [ __NR_swapoff ] = (syscall_handler_t *) sys_swapoff, - [ __NR_sysinfo ] = (syscall_handler_t *) sys_sysinfo, - [ __NR_fsync ] = (syscall_handler_t *) sys_fsync, - [ __NR_clone ] = (syscall_handler_t *) sys_clone, - [ __NR_setdomainname ] = (syscall_handler_t *) sys_setdomainname, - [ __NR_uname ] = (syscall_handler_t *) sys_newuname, - [ __NR_adjtimex ] = (syscall_handler_t *) sys_adjtimex, - [ __NR_mprotect ] = (syscall_handler_t *) sys_mprotect, - [ __NR_create_module ] = (syscall_handler_t *) sys_ni_syscall, - [ __NR_init_module ] = (syscall_handler_t *) sys_init_module, - [ __NR_delete_module ] = (syscall_handler_t *) sys_delete_module, - [ __NR_get_kernel_syms ] = (syscall_handler_t *) sys_ni_syscall, - [ __NR_quotactl ] = (syscall_handler_t *) sys_quotactl, - [ __NR_getpgid ] = (syscall_handler_t *) sys_getpgid, - [ __NR_fchdir ] = (syscall_handler_t *) sys_fchdir, - [ __NR_sysfs ] = (syscall_handler_t *) sys_sysfs, - [ __NR_personality ] = (syscall_handler_t *) sys_personality, - [ __NR_afs_syscall ] = (syscall_handler_t *) sys_ni_syscall, - [ __NR_setfsuid ] = (syscall_handler_t *) sys_setfsuid16, - [ __NR_setfsgid ] = (syscall_handler_t *) sys_setfsgid16, - [ __NR_getdents ] = (syscall_handler_t *) sys_getdents, - [ __NR_flock ] = (syscall_handler_t *) sys_flock, - [ __NR_msync ] = (syscall_handler_t *) sys_msync, - [ __NR_readv ] = (syscall_handler_t *) sys_readv, - [ __NR_writev ] = (syscall_handler_t *) sys_writev, - [ __NR_getsid ] = (syscall_handler_t *) sys_getsid, - [ __NR_fdatasync ] = (syscall_handler_t *) sys_fdatasync, - [ __NR__sysctl ] = (syscall_handler_t *) sys_sysctl, - [ __NR_mlock ] = (syscall_handler_t *) sys_mlock, - [ __NR_munlock ] = (syscall_handler_t *) sys_munlock, - [ __NR_mlockall ] = (syscall_handler_t *) sys_mlockall, - [ __NR_munlockall ] = (syscall_handler_t *) sys_munlockall, - [ __NR_sched_setparam ] = (syscall_handler_t *) sys_sched_setparam, - [ __NR_sched_getparam ] = (syscall_handler_t *) sys_sched_getparam, - [ __NR_sched_setscheduler ] = (syscall_handler_t *) sys_sched_setscheduler, - [ __NR_sched_getscheduler ] = (syscall_handler_t *) sys_sched_getscheduler, - [ __NR_sched_yield ] = (syscall_handler_t *) yield, - [ __NR_sched_get_priority_max ] = (syscall_handler_t *) sys_sched_get_priority_max, - [ __NR_sched_get_priority_min ] = (syscall_handler_t *) sys_sched_get_priority_min, - [ __NR_sched_rr_get_interval ] = (syscall_handler_t *) sys_sched_rr_get_interval, - [ __NR_nanosleep ] = (syscall_handler_t *) sys_nanosleep, - [ __NR_mremap ] = (syscall_handler_t *) sys_mremap, - [ __NR_setresuid ] = (syscall_handler_t *) sys_setresuid16, - [ __NR_getresuid ] = (syscall_handler_t *) sys_getresuid16, - [ __NR_query_module ] = (syscall_handler_t *) sys_ni_syscall, - [ __NR_poll ] = (syscall_handler_t *) sys_poll, - [ __NR_nfsservctl ] = (syscall_handler_t *) NFSSERVCTL, - [ __NR_setresgid ] = (syscall_handler_t *) sys_setresgid16, - [ __NR_getresgid ] = (syscall_handler_t *) sys_getresgid16, - [ __NR_prctl ] = (syscall_handler_t *) sys_prctl, - [ __NR_rt_sigreturn ] = (syscall_handler_t *) sys_rt_sigreturn, - [ __NR_rt_sigaction ] = (syscall_handler_t *) sys_rt_sigaction, - [ __NR_rt_sigprocmask ] = (syscall_handler_t *) sys_rt_sigprocmask, - [ __NR_rt_sigpending ] = (syscall_handler_t *) sys_rt_sigpending, - [ __NR_rt_sigtimedwait ] = (syscall_handler_t *) sys_rt_sigtimedwait, - [ __NR_rt_sigqueueinfo ] = (syscall_handler_t *) sys_rt_sigqueueinfo, - [ __NR_rt_sigsuspend ] = (syscall_handler_t *) sys_rt_sigsuspend, - [ __NR_pread64 ] = (syscall_handler_t *) sys_pread64, - [ __NR_pwrite64 ] = (syscall_handler_t *) sys_pwrite64, - [ __NR_chown ] = (syscall_handler_t *) sys_chown16, - [ __NR_getcwd ] = (syscall_handler_t *) sys_getcwd, - [ __NR_capget ] = (syscall_handler_t *) sys_capget, - [ __NR_capset ] = (syscall_handler_t *) sys_capset, - [ __NR_sigaltstack ] = (syscall_handler_t *) sys_sigaltstack, - [ __NR_sendfile ] = (syscall_handler_t *) sys_sendfile, - [ __NR_getpmsg ] = (syscall_handler_t *) sys_ni_syscall, - [ __NR_putpmsg ] = (syscall_handler_t *) sys_ni_syscall, - [ __NR_vfork ] = (syscall_handler_t *) sys_vfork, - [ __NR_getdents64 ] = (syscall_handler_t *) sys_getdents64, - [ __NR_gettid ] = (syscall_handler_t *) sys_gettid, - [ __NR_readahead ] = (syscall_handler_t *) sys_readahead, - [ __NR_setxattr ] = (syscall_handler_t *) sys_setxattr, - [ __NR_lsetxattr ] = (syscall_handler_t *) sys_lsetxattr, - [ __NR_fsetxattr ] = (syscall_handler_t *) sys_fsetxattr, - [ __NR_getxattr ] = (syscall_handler_t *) sys_getxattr, - [ __NR_lgetxattr ] = (syscall_handler_t *) sys_lgetxattr, - [ __NR_fgetxattr ] = (syscall_handler_t *) sys_fgetxattr, - [ __NR_listxattr ] = (syscall_handler_t *) sys_listxattr, - [ __NR_llistxattr ] = (syscall_handler_t *) sys_llistxattr, - [ __NR_flistxattr ] = (syscall_handler_t *) sys_flistxattr, - [ __NR_removexattr ] = (syscall_handler_t *) sys_removexattr, - [ __NR_lremovexattr ] = (syscall_handler_t *) sys_lremovexattr, - [ __NR_fremovexattr ] = (syscall_handler_t *) sys_fremovexattr, - [ __NR_tkill ] = (syscall_handler_t *) sys_tkill, - [ __NR_futex ] = (syscall_handler_t *) sys_futex, - [ __NR_sched_setaffinity ] = (syscall_handler_t *) sys_sched_setaffinity, - [ __NR_sched_getaffinity ] = (syscall_handler_t *) sys_sched_getaffinity, - [ __NR_io_setup ] = (syscall_handler_t *) sys_io_setup, - [ __NR_io_destroy ] = (syscall_handler_t *) sys_io_destroy, - [ __NR_io_getevents ] = (syscall_handler_t *) sys_io_getevents, - [ __NR_io_submit ] = (syscall_handler_t *) sys_io_submit, - [ __NR_io_cancel ] = (syscall_handler_t *) sys_io_cancel, - [ __NR_exit_group ] = (syscall_handler_t *) sys_exit_group, - [ __NR_lookup_dcookie ] = (syscall_handler_t *) sys_lookup_dcookie, - [ __NR_epoll_create ] = (syscall_handler_t *) sys_epoll_create, - [ __NR_epoll_ctl ] = (syscall_handler_t *) sys_epoll_ctl, - [ __NR_epoll_wait ] = (syscall_handler_t *) sys_epoll_wait, - [ __NR_remap_file_pages ] = (syscall_handler_t *) sys_remap_file_pages, - [ __NR_set_tid_address ] = (syscall_handler_t *) sys_set_tid_address, - [ __NR_timer_create ] = (syscall_handler_t *) sys_timer_create, - [ __NR_timer_settime ] = (syscall_handler_t *) sys_timer_settime, - [ __NR_timer_gettime ] = (syscall_handler_t *) sys_timer_gettime, - [ __NR_timer_getoverrun ] = (syscall_handler_t *) sys_timer_getoverrun, - [ __NR_timer_delete ] = (syscall_handler_t *) sys_timer_delete, - [ __NR_clock_settime ] = (syscall_handler_t *) sys_clock_settime, - [ __NR_clock_gettime ] = (syscall_handler_t *) sys_clock_gettime, - [ __NR_clock_getres ] = (syscall_handler_t *) sys_clock_getres, - [ __NR_clock_nanosleep ] = (syscall_handler_t *) sys_clock_nanosleep, - [ __NR_tgkill ] = (syscall_handler_t *) sys_tgkill, - [ __NR_utimes ] = (syscall_handler_t *) sys_utimes, - [ __NR_fadvise64 ] = (syscall_handler_t *) sys_fadvise64, - [ __NR_vserver ] = (syscall_handler_t *) sys_ni_syscall, - [ __NR_mbind ] = (syscall_handler_t *) sys_mbind, - [ __NR_get_mempolicy ] = (syscall_handler_t *) sys_get_mempolicy, - [ __NR_set_mempolicy ] = (syscall_handler_t *) sys_set_mempolicy, - [ __NR_mq_open ] = (syscall_handler_t *) sys_mq_open, - [ __NR_mq_unlink ] = (syscall_handler_t *) sys_mq_unlink, - [ __NR_mq_timedsend ] = (syscall_handler_t *) sys_mq_timedsend, - [ __NR_mq_timedreceive ] = (syscall_handler_t *) sys_mq_timedreceive, - [ __NR_mq_notify ] = (syscall_handler_t *) sys_mq_notify, - [ __NR_mq_getsetattr ] = (syscall_handler_t *) sys_mq_getsetattr, - [ __NR_kexec_load ] = (syscall_handler_t *) sys_ni_syscall, - [ __NR_waitid ] = (syscall_handler_t *) sys_waitid, - [ __NR_add_key ] = (syscall_handler_t *) sys_add_key, - [ __NR_request_key ] = (syscall_handler_t *) sys_request_key, - [ __NR_keyctl ] = (syscall_handler_t *) sys_keyctl, - - ARCH_SYSCALLS - [ LAST_SYSCALL + 1 ... NR_syscalls ] = - (syscall_handler_t *) sys_ni_syscall -}; diff --git a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c index 42731e04f50f..b7a55251e897 100644 --- a/arch/um/kernel/syscall_kern.c +++ b/arch/um/kernel/syscall_kern.c @@ -17,7 +17,6 @@ #include "linux/utime.h" #include "asm/mman.h" #include "asm/uaccess.h" -#include "asm/ipc.h" #include "kern_util.h" #include "user_util.h" #include "sysdep/syscalls.h" diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c index 2461cd73ca87..6516fc52afe0 100644 --- a/arch/um/kernel/time_kern.c +++ b/arch/um/kernel/time_kern.c @@ -48,8 +48,6 @@ static unsigned long long prev_usecs; static long long delta; /* Deviation per interval */ #endif -#define MILLION 1000000 - void timer_irq(union uml_pt_regs *regs) { unsigned long long ticks = 0; @@ -136,22 +134,6 @@ long um_stime(int __user *tptr) return 0; } -void __udelay(unsigned long usecs) -{ - int i, n; - - n = (loops_per_jiffy * HZ * usecs) / MILLION; - for(i=0;i<n;i++) ; -} - -void __const_udelay(unsigned long usecs) -{ - int i, n; - - n = (loops_per_jiffy * HZ * usecs) / MILLION; - for(i=0;i<n;i++) ; -} - void timer_handler(int sig, union uml_pt_regs *regs) { local_irq_disable(); diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c index 47e766e6ba10..5fca2c61eb98 100644 --- a/arch/um/kernel/trap_kern.c +++ b/arch/um/kernel/trap_kern.c @@ -48,7 +48,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, goto good_area; else if(!(vma->vm_flags & VM_GROWSDOWN)) goto out; - else if(!ARCH_IS_STACKGROW(address)) + else if(is_user && !ARCH_IS_STACKGROW(address)) goto out; else if(expand_stack(vma, address)) goto out; @@ -133,12 +133,19 @@ static int check_remapped_addr(unsigned long address, int is_write) return(0); } -unsigned long segv(unsigned long address, unsigned long ip, int is_write, - int is_user, void *sc) +/* + * We give a *copy* of the faultinfo in the regs to segv. + * This must be done, since nesting SEGVs could overwrite + * the info in the regs. A pointer to the info then would + * give us bad data! + */ +unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc) { struct siginfo si; void *catcher; int err; + int is_write = FAULT_WRITE(fi); + unsigned long address = FAULT_ADDRESS(fi); if(!is_user && (address >= start_vm) && (address < end_vm)){ flush_tlb_kernel_vm(); @@ -159,7 +166,7 @@ unsigned long segv(unsigned long address, unsigned long ip, int is_write, } else if(current->thread.fault_addr != NULL) panic("fault_addr set but no fault catcher"); - else if(arch_fixup(ip, sc)) + else if(!is_user && arch_fixup(ip, sc)) return(0); if(!is_user) @@ -171,6 +178,7 @@ unsigned long segv(unsigned long address, unsigned long ip, int is_write, si.si_errno = 0; si.si_code = BUS_ADRERR; si.si_addr = (void *)address; + current->thread.arch.faultinfo = fi; force_sig_info(SIGBUS, &si, current); } else if(err == -ENOMEM){ @@ -180,22 +188,20 @@ unsigned long segv(unsigned long address, unsigned long ip, int is_write, else { si.si_signo = SIGSEGV; si.si_addr = (void *) address; - current->thread.cr2 = address; - current->thread.err = is_write; + current->thread.arch.faultinfo = fi; force_sig_info(SIGSEGV, &si, current); } return(0); } -void bad_segv(unsigned long address, unsigned long ip, int is_write) +void bad_segv(struct faultinfo fi, unsigned long ip) { struct siginfo si; si.si_signo = SIGSEGV; si.si_code = SEGV_ACCERR; - si.si_addr = (void *) address; - current->thread.cr2 = address; - current->thread.err = is_write; + si.si_addr = (void *) FAULT_ADDRESS(fi); + current->thread.arch.faultinfo = fi; force_sig_info(SIGSEGV, &si, current); } @@ -204,6 +210,7 @@ void relay_signal(int sig, union uml_pt_regs *regs) if(arch_handle_signal(sig, regs)) return; if(!UPT_IS_USER(regs)) panic("Kernel mode signal %d", sig); + current->thread.arch.faultinfo = *UPT_FAULTINFO(regs); force_sig(sig, current); } diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c index 50a4042a509f..f825a6eda3f5 100644 --- a/arch/um/kernel/trap_user.c +++ b/arch/um/kernel/trap_user.c @@ -54,23 +54,22 @@ struct { void segv_handler(int sig, union uml_pt_regs *regs) { int index, max; + struct faultinfo * fi = UPT_FAULTINFO(regs); - if(UPT_IS_USER(regs) && !UPT_SEGV_IS_FIXABLE(regs)){ - bad_segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), - UPT_FAULT_WRITE(regs)); + if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){ + bad_segv(*fi, UPT_IP(regs)); return; } max = sizeof(segfault_record)/sizeof(segfault_record[0]); index = next_trap_index(max); nsegfaults++; - segfault_record[index].address = UPT_FAULT_ADDR(regs); + segfault_record[index].address = FAULT_ADDRESS(*fi); segfault_record[index].pid = os_getpid(); - segfault_record[index].is_write = UPT_FAULT_WRITE(regs); + segfault_record[index].is_write = FAULT_WRITE(*fi); segfault_record[index].sp = UPT_SP(regs); segfault_record[index].is_user = UPT_IS_USER(regs); - segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), UPT_FAULT_WRITE(regs), - UPT_IS_USER(regs), regs); + segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); } void usr2_handler(int sig, union uml_pt_regs *regs) diff --git a/arch/um/kernel/tt/Makefile b/arch/um/kernel/tt/Makefile index 3d5177df3504..c3faea21a996 100644 --- a/arch/um/kernel/tt/Makefile +++ b/arch/um/kernel/tt/Makefile @@ -4,6 +4,7 @@ # extra-y := unmap_fin.o +targets := unmap.o clean-files := unmap_tmp.o obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ diff --git a/arch/um/kernel/tt/include/mode_kern-tt.h b/arch/um/kernel/tt/include/mode_kern-tt.h index 28aaab3448fa..e0ca0e0b2516 100644 --- a/arch/um/kernel/tt/include/mode_kern-tt.h +++ b/arch/um/kernel/tt/include/mode_kern-tt.h @@ -19,7 +19,6 @@ extern int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, unsigned long stack_top, struct task_struct *p, struct pt_regs *regs); extern void release_thread_tt(struct task_struct *task); -extern void exit_thread_tt(void); extern void initial_thread_cb_tt(void (*proc)(void *), void *arg); extern void init_idle_tt(void); extern void flush_tlb_kernel_range_tt(unsigned long start, unsigned long end); diff --git a/arch/um/kernel/tt/include/uaccess-tt.h b/arch/um/kernel/tt/include/uaccess-tt.h index f0bad010cebd..3fbb5fe26f49 100644 --- a/arch/um/kernel/tt/include/uaccess-tt.h +++ b/arch/um/kernel/tt/include/uaccess-tt.h @@ -34,7 +34,7 @@ extern unsigned long uml_physmem; (under_task_size(addr, size) || is_stack(addr, size)))) static inline int verify_area_tt(int type, const void * addr, - unsigned long size) + unsigned long size) { return(access_ok_tt(type, addr, size) ? 0 : -EFAULT); } diff --git a/arch/um/kernel/tt/mem.c b/arch/um/kernel/tt/mem.c index 74346a04a2b2..bcb8796c3cb1 100644 --- a/arch/um/kernel/tt/mem.c +++ b/arch/um/kernel/tt/mem.c @@ -21,14 +21,8 @@ void before_mem_tt(unsigned long brk_start) remap_data(UML_ROUND_DOWN(&__bss_start), UML_ROUND_UP(&_end), 1); } -#ifdef CONFIG_HOST_2G_2G -#define TOP 0x80000000 -#else -#define TOP 0xc0000000 -#endif - #define SIZE ((CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS) * 0x20000000) -#define START (TOP - SIZE) +#define START (CONFIG_TOP_ADDR - SIZE) unsigned long set_task_sizes_tt(int arg, unsigned long *host_size_out, unsigned long *task_size_out) diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index f19f7c18febe..df810ca8fc12 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c @@ -65,8 +65,7 @@ void *switch_to_tt(void *prev, void *next, void *last) panic("write of switch_pipe failed, err = %d", -err); reading = 1; - if((from->exit_state == EXIT_ZOMBIE) || - (from->exit_state == EXIT_DEAD)) + if(from->thread.mode.tt.switch_pipe[0] == -1) os_kill_process(os_getpid(), 0); err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c)); @@ -81,8 +80,7 @@ void *switch_to_tt(void *prev, void *next, void *last) * in case it has not already killed itself. */ prev_sched = current->thread.prev_sched; - if((prev_sched->exit_state == EXIT_ZOMBIE) || - (prev_sched->exit_state == EXIT_DEAD)) + if(prev_sched->thread.mode.tt.switch_pipe[0] == -1) os_kill_process(prev_sched->thread.mode.tt.extern_pid, 1); change_sig(SIGVTALRM, vtalrm); @@ -101,14 +99,18 @@ void release_thread_tt(struct task_struct *task) { int pid = task->thread.mode.tt.extern_pid; + /* + * We first have to kill the other process, before + * closing its switch_pipe. Else it might wake up + * and receive "EOF" before we could kill it. + */ if(os_getpid() != pid) os_kill_process(pid, 0); -} -void exit_thread_tt(void) -{ - os_close_file(current->thread.mode.tt.switch_pipe[0]); - os_close_file(current->thread.mode.tt.switch_pipe[1]); + os_close_file(task->thread.mode.tt.switch_pipe[0]); + os_close_file(task->thread.mode.tt.switch_pipe[1]); + /* use switch_pipe as flag: thread is released */ + task->thread.mode.tt.switch_pipe[0] = -1; } void suspend_new_thread(int fd) diff --git a/arch/um/kernel/tt/syscall_user.c b/arch/um/kernel/tt/syscall_user.c index e4e7e9c2224c..b218316cfdb2 100644 --- a/arch/um/kernel/tt/syscall_user.c +++ b/arch/um/kernel/tt/syscall_user.c @@ -63,6 +63,10 @@ void do_syscall(void *task, int pid, int local_using_sysemu) UPT_SYSCALL_NR(TASK_REGS(task)) = PT_SYSCALL_NR(proc_regs); +#ifdef UPT_ORIGGPR2 + UPT_ORIGGPR2(TASK_REGS(task)) = REGS_ORIGGPR2(proc_regs); +#endif + if(((unsigned long *) PT_IP(proc_regs) >= &_stext) && ((unsigned long *) PT_IP(proc_regs) <= &_etext)) tracer_panic("I'm tracing myself and I can't get out"); diff --git a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c index 7b5d937e5955..d11e7399d7a1 100644 --- a/arch/um/kernel/tt/tracer.c +++ b/arch/um/kernel/tt/tracer.c @@ -26,6 +26,7 @@ #include "kern_util.h" #include "chan_user.h" #include "ptrace_user.h" +#include "irq_user.h" #include "mode.h" #include "tt.h" @@ -33,7 +34,7 @@ static int tracer_winch[2]; int is_tracer_winch(int pid, int fd, void *data) { - if(pid != tracing_pid) + if(pid != os_getpgrp()) return(0); register_winch_irq(tracer_winch[0], fd, -1, data); @@ -89,8 +90,10 @@ void tracer_panic(char *format, ...) static void tracer_segv(int sig, struct sigcontext sc) { + struct faultinfo fi; + GET_FAULTINFO_FROM_SC(fi, &sc); printf("Tracing thread segfault at address 0x%lx, ip 0x%lx\n", - SC_FAULT_ADDR(&sc), SC_IP(&sc)); + FAULT_ADDRESS(fi), SC_IP(&sc)); while(1) pause(); } @@ -117,6 +120,7 @@ static int signal_tramp(void *arg) signal(SIGSEGV, (__sighandler_t) sig_handler); set_cmdline("(idle thread)"); set_init_pid(os_getpid()); + init_irq_signals(0); proc = arg; return((*proc)(NULL)); } diff --git a/arch/um/kernel/tt/trap_user.c b/arch/um/kernel/tt/trap_user.c index 92a3820ca543..fc108615beaf 100644 --- a/arch/um/kernel/tt/trap_user.c +++ b/arch/um/kernel/tt/trap_user.c @@ -7,6 +7,7 @@ #include <errno.h> #include <signal.h> #include "sysdep/ptrace.h" +#include "sysdep/sigcontext.h" #include "signal_user.h" #include "user_util.h" #include "kern_util.h" @@ -28,6 +29,11 @@ void sig_handler_common_tt(int sig, void *sc_ptr) change_sig(SIGSEGV, 1); r = &TASK_REGS(get_current())->tt; + if ( sig == SIGFPE || sig == SIGSEGV || + sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP ) { + GET_FAULTINFO_FROM_SC(r->faultinfo, sc); + } save_regs = *r; is_user = user_context(SC_SP(sc)); r->sc = sc; diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 5c49d88eed3d..4d10ec372a67 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -23,6 +23,7 @@ #include "asm/ptrace.h" #include "asm/elf.h" #include "asm/user.h" +#include "asm/setup.h" #include "ubd_user.h" #include "asm/current.h" #include "asm/setup.h" @@ -42,9 +43,9 @@ #define DEFAULT_COMMAND_LINE "root=98:0" /* Changed in linux_main and setup_arch, which run before SMP is started */ -char command_line[COMMAND_LINE_SIZE] = { 0 }; +static char command_line[COMMAND_LINE_SIZE] = { 0 }; -void add_arg(char *arg) +static void add_arg(char *arg) { if (strlen(command_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) { printf("add_arg: Too many command line arguments!\n"); @@ -449,7 +450,7 @@ void __init setup_arch(char **cmdline_p) { notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); paging_init(); - strlcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); + strlcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; setup_hostinfo(); } diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S new file mode 100644 index 000000000000..1660a769674b --- /dev/null +++ b/arch/um/kernel/vmlinux.lds.S @@ -0,0 +1,6 @@ +#include <linux/config.h> +#ifdef CONFIG_LD_SCRIPT_STATIC +#include "uml.lds.S" +#else +#include "dyn.lds.S" +#endif diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index ba9ca1cc790a..1e126bfd31a7 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -123,6 +123,11 @@ int os_getpid(void) return(getpid()); } +int os_getpgrp(void) +{ + return getpgrp(); +} + int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x) { diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 7eac1baf5975..c7bfd5ee3925 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -8,7 +8,7 @@ #include "mode.h" #include "sysdep/signal.h" -void sig_handler(int sig) +void sig_handler(ARCH_SIGHDLR_PARAM) { struct sigcontext *sc; @@ -19,7 +19,7 @@ void sig_handler(int sig) extern int timer_irq_inited; -void alarm_handler(int sig) +void alarm_handler(ARCH_SIGHDLR_PARAM) { struct sigcontext *sc; diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c index 148645b14480..9a0ad094d926 100644 --- a/arch/um/os-Linux/sys-i386/registers.c +++ b/arch/um/os-Linux/sys-i386/registers.c @@ -105,14 +105,15 @@ void init_registers(int pid) panic("check_ptrace : PTRACE_GETREGS failed, errno = %d", err); + errno = 0; err = ptrace(PTRACE_GETFPXREGS, pid, 0, exec_fpx_regs); if(!err) return; + if(errno != EIO) + panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", + errno); have_fpx_regs = 0; - if(err != EIO) - panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", - err); err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs); if(err) diff --git a/arch/um/os-Linux/util/Makefile b/arch/um/os-Linux/util/Makefile index fb00ddf969bd..9778aed0c314 100644 --- a/arch/um/os-Linux/util/Makefile +++ b/arch/um/os-Linux/util/Makefile @@ -1,4 +1,4 @@ hostprogs-y := mk_user_constants always := $(hostprogs-y) -mk_user_constants-objs := mk_user_constants.o +HOSTCFLAGS_mk_user_constants.o := -I$(objtree)/arch/um diff --git a/arch/um/os-Linux/util/mk_user_constants.c b/arch/um/os-Linux/util/mk_user_constants.c index 0933518aa8bd..4838f30eecf0 100644 --- a/arch/um/os-Linux/util/mk_user_constants.c +++ b/arch/um/os-Linux/util/mk_user_constants.c @@ -1,11 +1,5 @@ #include <stdio.h> -#include <asm/types.h> -/* For some reason, x86_64 nowhere defines u64 and u32, even though they're - * used throughout the headers. - */ -typedef __u64 u64; -typedef __u32 u32; -#include <asm/user.h> +#include <user-offsets.h> int main(int argc, char **argv) { @@ -20,7 +14,7 @@ int main(int argc, char **argv) * x86_64 (216 vs 168 bytes). user_regs_struct is the correct size on * both x86_64 and i386. */ - printf("#define UM_FRAME_SIZE %d\n", (int) sizeof(struct user_regs_struct)); + printf("#define UM_FRAME_SIZE %d\n", __UM_FRAME_SIZE); printf("\n"); printf("#endif\n"); diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index 143f6fea0763..0b2491883d9c 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -2,12 +2,27 @@ # arch/um: Generic definitions # =========================================================================== -USER_SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) -USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) - +USER_SINGLE_OBJS := \ + $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) +USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) -$(USER_OBJS): c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) $(CFLAGS_$(notdir $@)) +$(USER_OBJS) : c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) \ + $(CFLAGS_$(notdir $@)) quiet_cmd_make_link = SYMLINK $@ -cmd_make_link = rm -f $@; ln -sf $(srctree)/arch/$(SUBARCH)/$($(notdir $@)-dir)/$(notdir $@) $@ +cmd_make_link = ln -sf $(srctree)/arch/$(SUBARCH)/$($(notdir $@)-dir)/$(notdir $@) $@ + +# this needs to be before the foreach, because targets does not accept +# complete paths like $(obj)/$(f). To make sure this works, use a := assignment, +# or we will get $(obj)/$(f) in the "targets" value. +# Also, this forces you to use the := syntax when assigning to targets. +# Otherwise the line below will cause an infinite loop (if you don't know why, +# just do it). + +targets := $(targets) $(SYMLINKS) + +SYMLINKS := $(foreach f,$(SYMLINKS),$(obj)/$(f)) + +$(SYMLINKS): FORCE + $(call if_changed,make_link) diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 71b47e618605..fcd67c3414e4 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -1,29 +1,19 @@ obj-y = bitops.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ - ptrace_user.o semaphore.o signal.o sigcontext.o syscalls.o sysrq.o + ptrace_user.o semaphore.o signal.o sigcontext.o syscalls.o sysrq.o \ + sys_call_table.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_MODULES) += module.o USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o -include arch/um/scripts/Makefile.rules - SYMLINKS = bitops.c semaphore.c highmem.c module.c -# this needs to be before the foreach, because clean-files does not accept -# complete paths like $(src)/$f. -clean-files := $(SYMLINKS) - -targets += $(SYMLINKS) - -SYMLINKS := $(foreach f,$(SYMLINKS),$(obj)/$f) - bitops.c-dir = lib semaphore.c-dir = kernel highmem.c-dir = mm module.c-dir = kernel -$(SYMLINKS): FORCE - $(call if_changed,make_link) - subdir- := util + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/sys-i386/checksum.S b/arch/um/sys-i386/checksum.S index a11171fb6223..d98b2fff3d08 100644 --- a/arch/um/sys-i386/checksum.S +++ b/arch/um/sys-i386/checksum.S @@ -38,7 +38,7 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) .text .align 4 -.globl arch_csum_partial +.globl csum_partial #ifndef CONFIG_X86_USE_PPRO_CHECKSUM @@ -49,7 +49,7 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ -arch_csum_partial: +csum_partial: pushl %esi pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum @@ -119,7 +119,7 @@ arch_csum_partial: /* Version for PentiumII/PPro */ -arch_csum_partial: +csum_partial: pushl %esi pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum diff --git a/arch/um/sys-i386/delay.c b/arch/um/sys-i386/delay.c index 20d37dbbaf08..e9892eef51ce 100644 --- a/arch/um/sys-i386/delay.c +++ b/arch/um/sys-i386/delay.c @@ -1,3 +1,6 @@ +#include "linux/delay.h" +#include "asm/param.h" + void __delay(unsigned long time) { /* Stolen from the i386 __loop_delay */ @@ -12,3 +15,18 @@ void __delay(unsigned long time) :"0" (time)); } +void __udelay(unsigned long usecs) +{ + int i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) ; +} + +void __const_udelay(unsigned long usecs) +{ + int i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) ; +} diff --git a/arch/um/sys-i386/kernel-offsets.c b/arch/um/sys-i386/kernel-offsets.c new file mode 100644 index 000000000000..9f8ecd1fdd96 --- /dev/null +++ b/arch/um/sys-i386/kernel-offsets.c @@ -0,0 +1,25 @@ +#include <linux/config.h> +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <asm/page.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define STR(x) #x +#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(TASK_DEBUGREGS, task_struct, thread.arch.debugregs); +#ifdef CONFIG_MODE_TT + OFFSET(TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); +#endif +#include <common-offsets.h> +} diff --git a/arch/um/sys-i386/ksyms.c b/arch/um/sys-i386/ksyms.c index 74f70a120458..db524ab3f743 100644 --- a/arch/um/sys-i386/ksyms.c +++ b/arch/um/sys-i386/ksyms.c @@ -2,6 +2,7 @@ #include "linux/in6.h" #include "linux/rwsem.h" #include "asm/byteorder.h" +#include "asm/delay.h" #include "asm/semaphore.h" #include "asm/uaccess.h" #include "asm/checksum.h" @@ -13,5 +14,8 @@ EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_from); -EXPORT_SYMBOL(csum_partial_copy_to); +EXPORT_SYMBOL(csum_partial); + +/* delay core functions */ +EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(__udelay); diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c index 31bcb2f997d4..dc755b0b9db8 100644 --- a/arch/um/sys-i386/ldt.c +++ b/arch/um/sys-i386/ldt.c @@ -25,7 +25,7 @@ int sys_modify_ldt_tt(int func, void __user *ptr, unsigned long bytecount) #endif #ifdef CONFIG_MODE_SKAS -extern int userspace_pid; +extern int userspace_pid[]; #include "skas_ptrace.h" @@ -56,7 +56,8 @@ int sys_modify_ldt_skas(int func, void __user *ptr, unsigned long bytecount) ldt = ((struct ptrace_ldt) { .func = func, .ptr = buf, .bytecount = bytecount }); - res = ptrace(PTRACE_LDT, userspace_pid, 0, (unsigned long) &ldt); +#warning Need to look up userspace_pid by cpu + res = ptrace(PTRACE_LDT, userspace_pid[0], 0, (unsigned long) &ldt); if(res < 0) goto out; diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c index e470d28cdf84..e839ce65ad28 100644 --- a/arch/um/sys-i386/ptrace.c +++ b/arch/um/sys-i386/ptrace.c @@ -73,6 +73,25 @@ int putreg(struct task_struct *child, int regno, unsigned long value) return 0; } +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if((addr == 4) || (addr == 5)) return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } + return -EIO; +} + unsigned long getreg(struct task_struct *child, int regno) { unsigned long retval = ~0UL; @@ -93,6 +112,27 @@ unsigned long getreg(struct task_struct *child, int regno) return retval; } +int peek_user(struct task_struct *child, long addr, long data) +{ +/* read the word at location addr in the USER area. */ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if(addr < MAX_REG_OFFSET){ + tmp = getreg(child, addr); + } + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } + return put_user(tmp, (unsigned long *) data); +} + struct i387_fxsave_struct { unsigned short cwd; unsigned short swd; diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c index 76ba87254b25..03913ca5d256 100644 --- a/arch/um/sys-i386/signal.c +++ b/arch/um/sys-i386/signal.c @@ -47,9 +47,6 @@ static int copy_sc_from_user_skas(struct pt_regs *regs, REGS_CS(regs->regs.skas.regs) = sc.cs; REGS_EFLAGS(regs->regs.skas.regs) = sc.eflags; REGS_SS(regs->regs.skas.regs) = sc.ss; - regs->regs.skas.fault_addr = sc.cr2; - regs->regs.skas.fault_type = FAULT_WRITE(sc.err); - regs->regs.skas.trap_type = sc.trapno; err = restore_fp_registers(userspace_pid[0], fpregs); if(err < 0){ @@ -62,11 +59,11 @@ static int copy_sc_from_user_skas(struct pt_regs *regs, } int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, - struct pt_regs *regs, unsigned long fault_addr, - int fault_type) + struct pt_regs *regs) { struct sigcontext sc; unsigned long fpregs[HOST_FP_SIZE]; + struct faultinfo * fi = ¤t->thread.arch.faultinfo; int err; sc.gs = REGS_GS(regs->regs.skas.regs); @@ -86,9 +83,9 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, sc.eflags = REGS_EFLAGS(regs->regs.skas.regs); sc.esp_at_signal = regs->regs.skas.regs[UESP]; sc.ss = regs->regs.skas.regs[SS]; - sc.cr2 = fault_addr; - sc.err = TO_SC_ERR(fault_type); - sc.trapno = regs->regs.skas.trap_type; + sc.cr2 = fi->cr2; + sc.err = fi->error_code; + sc.trapno = fi->trap_no; err = save_fp_registers(userspace_pid[0], fpregs); if(err < 0){ @@ -167,9 +164,7 @@ static int copy_sc_to_user(struct sigcontext *to, struct _fpstate *fp, { return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), sizeof(*fp)), - copy_sc_to_user_skas(to, fp, from, - current->thread.cr2, - current->thread.err))); + copy_sc_to_user_skas(to, fp, from))); } static int copy_ucontext_to_user(struct ucontext *uc, struct _fpstate *fp, diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/um/sys-i386/sys_call_table.S new file mode 100644 index 000000000000..ad75c27afe38 --- /dev/null +++ b/arch/um/sys-i386/sys_call_table.S @@ -0,0 +1,16 @@ +#include <linux/linkage.h> +/* Steal i386 syscall table for our purposes, but with some slight changes.*/ + +#define sys_iopl sys_ni_syscall +#define sys_ioperm sys_ni_syscall + +#define sys_vm86old sys_ni_syscall +#define sys_vm86 sys_ni_syscall +#define sys_set_thread_area sys_ni_syscall +#define sys_get_thread_area sys_ni_syscall + +#define sys_stime um_stime +#define sys_time um_time +#define old_mmap old_mmap_i386 + +#include "../../i386/kernel/syscall_table.S" diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c new file mode 100644 index 000000000000..3ceaabceb3d7 --- /dev/null +++ b/arch/um/sys-i386/user-offsets.c @@ -0,0 +1,69 @@ +#include <stdio.h> +#include <signal.h> +#include <asm/ptrace.h> +#include <asm/user.h> +#include <linux/stddef.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(SC_IP, sigcontext, eip); + OFFSET(SC_SP, sigcontext, esp); + OFFSET(SC_FS, sigcontext, fs); + OFFSET(SC_GS, sigcontext, gs); + OFFSET(SC_DS, sigcontext, ds); + OFFSET(SC_ES, sigcontext, es); + OFFSET(SC_SS, sigcontext, ss); + OFFSET(SC_CS, sigcontext, cs); + OFFSET(SC_EFLAGS, sigcontext, eflags); + OFFSET(SC_EAX, sigcontext, eax); + OFFSET(SC_EBX, sigcontext, ebx); + OFFSET(SC_ECX, sigcontext, ecx); + OFFSET(SC_EDX, sigcontext, edx); + OFFSET(SC_EDI, sigcontext, edi); + OFFSET(SC_ESI, sigcontext, esi); + OFFSET(SC_EBP, sigcontext, ebp); + OFFSET(SC_TRAPNO, sigcontext, trapno); + OFFSET(SC_ERR, sigcontext, err); + OFFSET(SC_CR2, sigcontext, cr2); + OFFSET(SC_FPSTATE, sigcontext, fpstate); + OFFSET(SC_SIGMASK, sigcontext, oldmask); + OFFSET(SC_FP_CW, _fpstate, cw); + OFFSET(SC_FP_SW, _fpstate, sw); + OFFSET(SC_FP_TAG, _fpstate, tag); + OFFSET(SC_FP_IPOFF, _fpstate, ipoff); + OFFSET(SC_FP_CSSEL, _fpstate, cssel); + OFFSET(SC_FP_DATAOFF, _fpstate, dataoff); + OFFSET(SC_FP_DATASEL, _fpstate, datasel); + OFFSET(SC_FP_ST, _fpstate, _st); + OFFSET(SC_FXSR_ENV, _fpstate, _fxsr_env); + + DEFINE(HOST_FRAME_SIZE, FRAME_SIZE); + DEFINE(HOST_FP_SIZE, + sizeof(struct user_i387_struct) / sizeof(unsigned long)); + DEFINE(HOST_XFP_SIZE, + sizeof(struct user_fxsr_struct) / sizeof(unsigned long)); + + DEFINE(HOST_IP, EIP); + DEFINE(HOST_SP, UESP); + DEFINE(HOST_EFLAGS, EFL); + DEFINE(HOST_EAX, EAX); + DEFINE(HOST_EBX, EBX); + DEFINE(HOST_ECX, ECX); + DEFINE(HOST_EDX, EDX); + DEFINE(HOST_ESI, ESI); + DEFINE(HOST_EDI, EDI); + DEFINE(HOST_EBP, EBP); + DEFINE(HOST_CS, CS); + DEFINE(HOST_SS, SS); + DEFINE(HOST_DS, DS); + DEFINE(HOST_FS, FS); + DEFINE(HOST_ES, ES); + DEFINE(HOST_GS, GS); + DEFINE(__UM_FRAME_SIZE, sizeof(struct user_regs_struct)); +} diff --git a/arch/um/sys-i386/util/Makefile b/arch/um/sys-i386/util/Makefile index 34860f9ca7b0..bf61afd0b045 100644 --- a/arch/um/sys-i386/util/Makefile +++ b/arch/um/sys-i386/util/Makefile @@ -1,8 +1,5 @@ - hostprogs-y := mk_sc mk_thread always := $(hostprogs-y) -mk_thread-objs := mk_thread_kern.o mk_thread_user.o - -HOSTCFLAGS_mk_thread_kern.o := $(CFLAGS) $(CPPFLAGS) -HOSTCFLAGS_mk_thread_user.o := $(USER_CFLAGS) +HOSTCFLAGS_mk_sc.o := -I$(objtree)/arch/um +HOSTCFLAGS_mk_thread.o := -I$(objtree)/arch/um diff --git a/arch/um/sys-i386/util/mk_sc.c b/arch/um/sys-i386/util/mk_sc.c index 85cbd30396f7..04c0d73433aa 100644 --- a/arch/um/sys-i386/util/mk_sc.c +++ b/arch/um/sys-i386/util/mk_sc.c @@ -1,52 +1,51 @@ #include <stdio.h> -#include <signal.h> -#include <linux/stddef.h> +#include <user-offsets.h> #define SC_OFFSET(name, field) \ - printf("#define " name "(sc) *((unsigned long *) &(((char *) (sc))[%d]))\n",\ - offsetof(struct sigcontext, field)) + printf("#define " #name "(sc) *((unsigned long *) &(((char *) (sc))[%d]))\n",\ + name) #define SC_FP_OFFSET(name, field) \ - printf("#define " name \ + printf("#define " #name \ "(sc) *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ - offsetof(struct _fpstate, field)) + name) #define SC_FP_OFFSET_PTR(name, field, type) \ - printf("#define " name \ + printf("#define " #name \ "(sc) ((" type " *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ - offsetof(struct _fpstate, field)) + name) int main(int argc, char **argv) { - SC_OFFSET("SC_IP", eip); - SC_OFFSET("SC_SP", esp); - SC_OFFSET("SC_FS", fs); - SC_OFFSET("SC_GS", gs); - SC_OFFSET("SC_DS", ds); - SC_OFFSET("SC_ES", es); - SC_OFFSET("SC_SS", ss); - SC_OFFSET("SC_CS", cs); - SC_OFFSET("SC_EFLAGS", eflags); - SC_OFFSET("SC_EAX", eax); - SC_OFFSET("SC_EBX", ebx); - SC_OFFSET("SC_ECX", ecx); - SC_OFFSET("SC_EDX", edx); - SC_OFFSET("SC_EDI", edi); - SC_OFFSET("SC_ESI", esi); - SC_OFFSET("SC_EBP", ebp); - SC_OFFSET("SC_TRAPNO", trapno); - SC_OFFSET("SC_ERR", err); - SC_OFFSET("SC_CR2", cr2); - SC_OFFSET("SC_FPSTATE", fpstate); - SC_OFFSET("SC_SIGMASK", oldmask); - SC_FP_OFFSET("SC_FP_CW", cw); - SC_FP_OFFSET("SC_FP_SW", sw); - SC_FP_OFFSET("SC_FP_TAG", tag); - SC_FP_OFFSET("SC_FP_IPOFF", ipoff); - SC_FP_OFFSET("SC_FP_CSSEL", cssel); - SC_FP_OFFSET("SC_FP_DATAOFF", dataoff); - SC_FP_OFFSET("SC_FP_DATASEL", datasel); - SC_FP_OFFSET_PTR("SC_FP_ST", _st, "struct _fpstate"); - SC_FP_OFFSET_PTR("SC_FXSR_ENV", _fxsr_env, "void"); + SC_OFFSET(SC_IP, eip); + SC_OFFSET(SC_SP, esp); + SC_OFFSET(SC_FS, fs); + SC_OFFSET(SC_GS, gs); + SC_OFFSET(SC_DS, ds); + SC_OFFSET(SC_ES, es); + SC_OFFSET(SC_SS, ss); + SC_OFFSET(SC_CS, cs); + SC_OFFSET(SC_EFLAGS, eflags); + SC_OFFSET(SC_EAX, eax); + SC_OFFSET(SC_EBX, ebx); + SC_OFFSET(SC_ECX, ecx); + SC_OFFSET(SC_EDX, edx); + SC_OFFSET(SC_EDI, edi); + SC_OFFSET(SC_ESI, esi); + SC_OFFSET(SC_EBP, ebp); + SC_OFFSET(SC_TRAPNO, trapno); + SC_OFFSET(SC_ERR, err); + SC_OFFSET(SC_CR2, cr2); + SC_OFFSET(SC_FPSTATE, fpstate); + SC_OFFSET(SC_SIGMASK, oldmask); + SC_FP_OFFSET(SC_FP_CW, cw); + SC_FP_OFFSET(SC_FP_SW, sw); + SC_FP_OFFSET(SC_FP_TAG, tag); + SC_FP_OFFSET(SC_FP_IPOFF, ipoff); + SC_FP_OFFSET(SC_FP_CSSEL, cssel); + SC_FP_OFFSET(SC_FP_DATAOFF, dataoff); + SC_FP_OFFSET(SC_FP_DATASEL, datasel); + SC_FP_OFFSET_PTR(SC_FP_ST, _st, "struct _fpstate"); + SC_FP_OFFSET_PTR(SC_FXSR_ENV, _fxsr_env, "void"); return(0); } diff --git a/arch/um/sys-i386/util/mk_thread.c b/arch/um/sys-i386/util/mk_thread.c new file mode 100644 index 000000000000..7470d0dda67e --- /dev/null +++ b/arch/um/sys-i386/util/mk_thread.c @@ -0,0 +1,22 @@ +#include <stdio.h> +#include <kernel-offsets.h> + +int main(int argc, char **argv) +{ + printf("/*\n"); + printf(" * Generated by mk_thread\n"); + printf(" */\n"); + printf("\n"); + printf("#ifndef __UM_THREAD_H\n"); + printf("#define __UM_THREAD_H\n"); + printf("\n"); + printf("#define TASK_DEBUGREGS(task) ((unsigned long *) " + "&(((char *) (task))[%d]))\n", TASK_DEBUGREGS); +#ifdef TASK_EXTERN_PID + printf("#define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[%d]))\n", + TASK_EXTERN_PID); +#endif + printf("\n"); + printf("#endif\n"); + return(0); +} diff --git a/arch/um/sys-i386/util/mk_thread_kern.c b/arch/um/sys-i386/util/mk_thread_kern.c deleted file mode 100644 index 948b1ce85230..000000000000 --- a/arch/um/sys-i386/util/mk_thread_kern.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "linux/config.h" -#include "linux/stddef.h" -#include "linux/sched.h" - -extern void print_head(void); -extern void print_constant_ptr(char *name, int value); -extern void print_constant(char *name, char *type, int value); -extern void print_tail(void); - -#define THREAD_OFFSET(field) offsetof(struct task_struct, thread.field) - -int main(int argc, char **argv) -{ - print_head(); - print_constant_ptr("TASK_DEBUGREGS", THREAD_OFFSET(arch.debugregs)); -#ifdef CONFIG_MODE_TT - print_constant("TASK_EXTERN_PID", "int", THREAD_OFFSET(mode.tt.extern_pid)); -#endif - print_tail(); - return(0); -} - diff --git a/arch/um/sys-i386/util/mk_thread_user.c b/arch/um/sys-i386/util/mk_thread_user.c deleted file mode 100644 index 2620cd6aa1f1..000000000000 --- a/arch/um/sys-i386/util/mk_thread_user.c +++ /dev/null @@ -1,30 +0,0 @@ -#include <stdio.h> - -void print_head(void) -{ - printf("/*\n"); - printf(" * Generated by mk_thread\n"); - printf(" */\n"); - printf("\n"); - printf("#ifndef __UM_THREAD_H\n"); - printf("#define __UM_THREAD_H\n"); - printf("\n"); -} - -void print_constant_ptr(char *name, int value) -{ - printf("#define %s(task) ((unsigned long *) " - "&(((char *) (task))[%d]))\n", name, value); -} - -void print_constant(char *name, char *type, int value) -{ - printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, - value); -} - -void print_tail(void) -{ - printf("\n"); - printf("#endif\n"); -} diff --git a/arch/um/sys-ppc/ptrace.c b/arch/um/sys-ppc/ptrace.c index a971366d3277..8e71b47f2b8e 100644 --- a/arch/um/sys-ppc/ptrace.c +++ b/arch/um/sys-ppc/ptrace.c @@ -8,6 +8,25 @@ int putreg(struct task_struct *child, unsigned long regno, return 0; } +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if((addr == 4) || (addr == 5)) return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } + return -EIO; +} + unsigned long getreg(struct task_struct *child, unsigned long regno) { unsigned long retval = ~0UL; @@ -16,6 +35,27 @@ unsigned long getreg(struct task_struct *child, unsigned long regno) return retval; } +int peek_user(struct task_struct *child, long addr, long data) +{ + /* read the word at location addr in the USER area. */ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if(addr < MAX_REG_OFFSET){ + tmp = getreg(child, addr); + } + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } + return put_user(tmp, (unsigned long *) data); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 2129e3143559..3d7da911cc8c 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -4,24 +4,20 @@ # Licensed under the GPL # +#XXX: why into lib-y? lib-y = bitops.o bugs.o csum-partial.o delay.o fault.o mem.o memcpy.o \ ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o \ - syscalls.o sysrq.o thunk.o + syscalls.o sysrq.o thunk.o syscall_table.o + +obj-y := ksyms.o +obj-$(CONFIG_MODULES) += module.o um_module.o USER_OBJS := ptrace_user.o sigcontext.o include arch/um/scripts/Makefile.rules SYMLINKS = bitops.c csum-copy.S csum-partial.c csum-wrappers.c memcpy.S \ - semaphore.c thunk.S - -# this needs to be before the foreach, because clean-files does not accept -# complete paths like $(src)/$f. -clean-files := $(SYMLINKS) - -targets += $(SYMLINKS) - -SYMLINKS := $(foreach f,$(SYMLINKS),$(obj)/$f) + semaphore.c thunk.S module.c bitops.c-dir = lib csum-copy.S-dir = lib @@ -30,8 +26,8 @@ csum-wrappers.c-dir = lib memcpy.S-dir = lib semaphore.c-dir = kernel thunk.S-dir = lib - -$(SYMLINKS): FORCE - $(call if_changed,make_link) +module.c-dir = kernel CFLAGS_csum-partial.o := -Dcsum_partial=arch_csum_partial + +subdir- := util diff --git a/arch/um/sys-x86_64/delay.c b/arch/um/sys-x86_64/delay.c index f3b5187942b4..651332aeec22 100644 --- a/arch/um/sys-x86_64/delay.c +++ b/arch/um/sys-x86_64/delay.c @@ -5,7 +5,9 @@ * Licensed under the GPL */ +#include "linux/delay.h" #include "asm/processor.h" +#include "asm/param.h" void __delay(unsigned long loops) { @@ -14,6 +16,22 @@ void __delay(unsigned long loops) for(i = 0; i < loops; i++) ; } +void __udelay(unsigned long usecs) +{ + int i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) ; +} + +void __const_udelay(unsigned long usecs) +{ + int i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) ; +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/sys-x86_64/kernel-offsets.c b/arch/um/sys-x86_64/kernel-offsets.c new file mode 100644 index 000000000000..220e875cbe29 --- /dev/null +++ b/arch/um/sys-x86_64/kernel-offsets.c @@ -0,0 +1,24 @@ +#include <linux/config.h> +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <asm/page.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define DEFINE_STR1(x) #x +#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " DEFINE_STR1(val) " " #val: : ) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ +#ifdef CONFIG_MODE_TT + OFFSET(TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid); +#endif +#include <common-offsets.h> +} diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/um/sys-x86_64/ksyms.c new file mode 100644 index 000000000000..a27f0ee6a4f6 --- /dev/null +++ b/arch/um/sys-x86_64/ksyms.c @@ -0,0 +1,20 @@ +#include "linux/module.h" +#include "linux/in6.h" +#include "linux/rwsem.h" +#include "asm/byteorder.h" +#include "asm/semaphore.h" +#include "asm/uaccess.h" +#include "asm/checksum.h" +#include "asm/errno.h" + +EXPORT_SYMBOL(__down_failed); +EXPORT_SYMBOL(__down_failed_interruptible); +EXPORT_SYMBOL(__down_failed_trylock); +EXPORT_SYMBOL(__up_wakeup); + +/*XXX: we need them because they would be exported by x86_64 */ +EXPORT_SYMBOL(__memcpy); + +/* Networking helper routines. */ +/*EXPORT_SYMBOL(csum_partial_copy_from); +EXPORT_SYMBOL(csum_partial_copy_to);*/ diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c index 8c146b2a1e00..b593bb256f2c 100644 --- a/arch/um/sys-x86_64/ptrace.c +++ b/arch/um/sys-x86_64/ptrace.c @@ -62,6 +62,27 @@ int putreg(struct task_struct *child, int regno, unsigned long value) return 0; } +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + +#if 0 /* Need x86_64 debugregs handling */ + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if((addr == 4) || (addr == 5)) return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } +#endif + return -EIO; +} + unsigned long getreg(struct task_struct *child, int regno) { unsigned long retval = ~0UL; @@ -84,6 +105,29 @@ unsigned long getreg(struct task_struct *child, int regno) return retval; } +int peek_user(struct task_struct *child, long addr, long data) +{ + /* read the word at location addr in the USER area. */ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if(addr < MAX_REG_OFFSET){ + tmp = getreg(child, addr); + } +#if 0 /* Need x86_64 debugregs handling */ + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } +#endif + return put_user(tmp, (unsigned long *) data); +} + void arch_switch(void) { /* XXX diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c index 5bc5a0d796e5..73a7926f7370 100644 --- a/arch/um/sys-x86_64/signal.c +++ b/arch/um/sys-x86_64/signal.c @@ -57,7 +57,7 @@ static int copy_sc_from_user_skas(struct pt_regs *regs, int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, struct pt_regs *regs, unsigned long mask) { - unsigned long eflags; + struct faultinfo * fi = ¤t->thread.arch.faultinfo; int err = 0; err |= __put_user(0, &to->gs); @@ -84,14 +84,16 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp, err |= PUTREG(regs, R14, to, r14); err |= PUTREG(regs, R15, to, r15); err |= PUTREG(regs, CS, to, cs); /* XXX x86_64 doesn't do this */ - err |= __put_user(current->thread.err, &to->err); - err |= __put_user(current->thread.trap_no, &to->trapno); + + err |= __put_user(fi->cr2, &to->cr2); + err |= __put_user(fi->error_code, &to->err); + err |= __put_user(fi->trap_no, &to->trapno); + err |= PUTREG(regs, RIP, to, rip); err |= PUTREG(regs, EFLAGS, to, eflags); #undef PUTREG err |= __put_user(mask, &to->oldmask); - err |= __put_user(current->thread.cr2, &to->cr2); return(err); } @@ -166,7 +168,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16) - 8; - frame -= 128; + ((unsigned char *) frame) -= 128; if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto out; diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c new file mode 100644 index 000000000000..34b2e842864f --- /dev/null +++ b/arch/um/sys-x86_64/syscall_table.c @@ -0,0 +1,59 @@ +/* System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c + * with some changes for UML. */ + +#include <linux/linkage.h> +#include <linux/sys.h> +#include <linux/cache.h> +#include <linux/config.h> + +#define __NO_STUBS + +/* Below you can see, in terms of #define's, the differences between the x86-64 + * and the UML syscall table. */ + +/* Not going to be implemented by UML, since we have no hardware. */ +#define stub_iopl sys_ni_syscall +#define sys_ioperm sys_ni_syscall + +/* The UML TLS problem. Note that x86_64 does not implement this, so the below + * is needed only for the ia32 compatibility. */ +/*#define sys_set_thread_area sys_ni_syscall +#define sys_get_thread_area sys_ni_syscall*/ + +/* For __NR_time. The x86-64 name hopefully will change from sys_time64 to + * sys_time (since the current situation is bogus). I've sent a patch to cleanup + * this. Remove below the obsoleted line. */ +#define sys_time64 um_time +#define sys_time um_time + +/* On UML we call it this way ("old" means it's not mmap2) */ +#define sys_mmap old_mmap +/* On x86-64 sys_uname is actually sys_newuname plus a compatibility trick. + * See arch/x86_64/kernel/sys_x86_64.c */ +#define sys_uname sys_uname64 + +#define stub_clone sys_clone +#define stub_fork sys_fork +#define stub_vfork sys_vfork +#define stub_execve sys_execve +#define stub_rt_sigsuspend sys_rt_sigsuspend +#define stub_sigaltstack sys_sigaltstack +#define stub_rt_sigreturn sys_rt_sigreturn + +#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; +#undef _ASM_X86_64_UNISTD_H_ +#include <asm-x86_64/unistd.h> + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [ nr ] = sym, +#undef _ASM_X86_64_UNISTD_H_ + +typedef void (*sys_call_ptr_t)(void); + +extern void sys_ni_syscall(void); + +sys_call_ptr_t sys_call_table[__NR_syscall_max+1] __cacheline_aligned = { + /* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */ + [0 ... __NR_syscall_max] = &sys_ni_syscall, +#include <asm-x86_64/unistd.h> +}; diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c index 68205a03364c..dd9914642b8e 100644 --- a/arch/um/sys-x86_64/syscalls.c +++ b/arch/um/sys-x86_64/syscalls.c @@ -7,6 +7,8 @@ #include "linux/linkage.h" #include "linux/slab.h" #include "linux/shm.h" +#include "linux/utsname.h" +#include "linux/personality.h" #include "asm/uaccess.h" #define __FRAME_OFFSETS #include "asm/ptrace.h" @@ -14,11 +16,15 @@ #include "asm/prctl.h" /* XXX This should get the constants from libc */ #include "choose-mode.h" -asmlinkage long wrap_sys_shmat(int shmid, char __user *shmaddr, int shmflg) +asmlinkage long sys_uname64(struct new_utsname __user * name) { - unsigned long raddr; - - return do_shmat(shmid, shmaddr, shmflg, &raddr) ?: (long) raddr; + int err; + down_read(&uts_sem); + err = copy_to_user(name, &system_utsname, sizeof (*name)); + up_read(&uts_sem); + if (personality(current->personality) == PER_LINUX32) + err |= copy_to_user(&name->machine, "i686", 5); + return err ? -EFAULT : 0; } #ifdef CONFIG_MODE_TT @@ -38,6 +44,8 @@ long sys_modify_ldt_tt(int func, void *ptr, unsigned long bytecount) #ifdef CONFIG_MODE_SKAS extern int userspace_pid[]; +#include "skas_ptrace.h" + long sys_modify_ldt_skas(int func, void *ptr, unsigned long bytecount) { struct ptrace_ldt ldt; diff --git a/arch/um/sys-x86_64/um_module.c b/arch/um/sys-x86_64/um_module.c new file mode 100644 index 000000000000..8b8eff1bd977 --- /dev/null +++ b/arch/um/sys-x86_64/um_module.c @@ -0,0 +1,19 @@ +#include <linux/vmalloc.h> +#include <linux/moduleloader.h> + +/*Copied from i386 arch/i386/kernel/module.c */ +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + return vmalloc_exec(size); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/um/sys-x86_64/user-offsets.c new file mode 100644 index 000000000000..5e14792e4838 --- /dev/null +++ b/arch/um/sys-x86_64/user-offsets.c @@ -0,0 +1,78 @@ +#include <stdio.h> +#include <stddef.h> +#include <signal.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> +#include <asm/user.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(SC_RBX, sigcontext, rbx); + OFFSET(SC_RCX, sigcontext, rcx); + OFFSET(SC_RDX, sigcontext, rdx); + OFFSET(SC_RSI, sigcontext, rsi); + OFFSET(SC_RDI, sigcontext, rdi); + OFFSET(SC_RBP, sigcontext, rbp); + OFFSET(SC_RAX, sigcontext, rax); + OFFSET(SC_R8, sigcontext, r8); + OFFSET(SC_R9, sigcontext, r9); + OFFSET(SC_R10, sigcontext, r10); + OFFSET(SC_R11, sigcontext, r11); + OFFSET(SC_R12, sigcontext, r12); + OFFSET(SC_R13, sigcontext, r13); + OFFSET(SC_R14, sigcontext, r14); + OFFSET(SC_R15, sigcontext, r15); + OFFSET(SC_IP, sigcontext, rip); + OFFSET(SC_SP, sigcontext, rsp); + OFFSET(SC_CR2, sigcontext, cr2); + OFFSET(SC_ERR, sigcontext, err); + OFFSET(SC_TRAPNO, sigcontext, trapno); + OFFSET(SC_CS, sigcontext, cs); + OFFSET(SC_FS, sigcontext, fs); + OFFSET(SC_GS, sigcontext, gs); + OFFSET(SC_EFLAGS, sigcontext, eflags); + OFFSET(SC_SIGMASK, sigcontext, oldmask); +#if 0 + OFFSET(SC_ORIG_RAX, sigcontext, orig_rax); + OFFSET(SC_DS, sigcontext, ds); + OFFSET(SC_ES, sigcontext, es); + OFFSET(SC_SS, sigcontext, ss); +#endif + + DEFINE(HOST_FRAME_SIZE, FRAME_SIZE); + DEFINE(HOST_RBX, RBX); + DEFINE(HOST_RCX, RCX); + DEFINE(HOST_RDI, RDI); + DEFINE(HOST_RSI, RSI); + DEFINE(HOST_RDX, RDX); + DEFINE(HOST_RBP, RBP); + DEFINE(HOST_RAX, RAX); + DEFINE(HOST_R8, R8); + DEFINE(HOST_R9, R9); + DEFINE(HOST_R10, R10); + DEFINE(HOST_R11, R11); + DEFINE(HOST_R12, R12); + DEFINE(HOST_R13, R13); + DEFINE(HOST_R14, R14); + DEFINE(HOST_R15, R15); + DEFINE(HOST_ORIG_RAX, ORIG_RAX); + DEFINE(HOST_CS, CS); + DEFINE(HOST_SS, SS); + DEFINE(HOST_EFLAGS, EFLAGS); +#if 0 + DEFINE(HOST_FS, FS); + DEFINE(HOST_GS, GS); + DEFINE(HOST_DS, DS); + DEFINE(HOST_ES, ES); +#endif + + DEFINE(HOST_IP, RIP); + DEFINE(HOST_SP, RSP); + DEFINE(__UM_FRAME_SIZE, sizeof(struct user_regs_struct)); +} diff --git a/arch/um/sys-x86_64/util/Makefile b/arch/um/sys-x86_64/util/Makefile index 002607980864..75b052cfc206 100644 --- a/arch/um/sys-x86_64/util/Makefile +++ b/arch/um/sys-x86_64/util/Makefile @@ -4,7 +4,5 @@ hostprogs-y := mk_sc mk_thread always := $(hostprogs-y) -mk_thread-objs := mk_thread_kern.o mk_thread_user.o - -HOSTCFLAGS_mk_thread_kern.o := $(CFLAGS) $(CPPFLAGS) -HOSTCFLAGS_mk_thread_user.o := $(USER_CFLAGS) +HOSTCFLAGS_mk_sc.o := -I$(objtree)/arch/um +HOSTCFLAGS_mk_thread.o := -I$(objtree)/arch/um diff --git a/arch/um/sys-x86_64/util/mk_sc.c b/arch/um/sys-x86_64/util/mk_sc.c index c236e213918d..7619bc377c1f 100644 --- a/arch/um/sys-x86_64/util/mk_sc.c +++ b/arch/um/sys-x86_64/util/mk_sc.c @@ -3,56 +3,45 @@ */ #include <stdio.h> -#include <signal.h> -#include <linux/stddef.h> +#include <user-offsets.h> -#define SC_OFFSET(name, field) \ - printf("#define " name \ - "(sc) *((unsigned long *) &(((char *) (sc))[%ld]))\n",\ - offsetof(struct sigcontext, field)) - -#define SC_FP_OFFSET(name, field) \ - printf("#define " name \ - "(sc) *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[%ld]))\n",\ - offsetof(struct _fpstate, field)) - -#define SC_FP_OFFSET_PTR(name, field, type) \ - printf("#define " name \ - "(sc) ((" type " *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ - offsetof(struct _fpstate, field)) +#define SC_OFFSET(name) \ + printf("#define " #name \ + "(sc) *((unsigned long *) &(((char *) (sc))[%d]))\n",\ + name) int main(int argc, char **argv) { - SC_OFFSET("SC_RBX", rbx); - SC_OFFSET("SC_RCX", rcx); - SC_OFFSET("SC_RDX", rdx); - SC_OFFSET("SC_RSI", rsi); - SC_OFFSET("SC_RDI", rdi); - SC_OFFSET("SC_RBP", rbp); - SC_OFFSET("SC_RAX", rax); - SC_OFFSET("SC_R8", r8); - SC_OFFSET("SC_R9", r9); - SC_OFFSET("SC_R10", r10); - SC_OFFSET("SC_R11", r11); - SC_OFFSET("SC_R12", r12); - SC_OFFSET("SC_R13", r13); - SC_OFFSET("SC_R14", r14); - SC_OFFSET("SC_R15", r15); - SC_OFFSET("SC_IP", rip); - SC_OFFSET("SC_SP", rsp); - SC_OFFSET("SC_CR2", cr2); - SC_OFFSET("SC_ERR", err); - SC_OFFSET("SC_TRAPNO", trapno); - SC_OFFSET("SC_CS", cs); - SC_OFFSET("SC_FS", fs); - SC_OFFSET("SC_GS", gs); - SC_OFFSET("SC_EFLAGS", eflags); - SC_OFFSET("SC_SIGMASK", oldmask); + SC_OFFSET(SC_RBX); + SC_OFFSET(SC_RCX); + SC_OFFSET(SC_RDX); + SC_OFFSET(SC_RSI); + SC_OFFSET(SC_RDI); + SC_OFFSET(SC_RBP); + SC_OFFSET(SC_RAX); + SC_OFFSET(SC_R8); + SC_OFFSET(SC_R9); + SC_OFFSET(SC_R10); + SC_OFFSET(SC_R11); + SC_OFFSET(SC_R12); + SC_OFFSET(SC_R13); + SC_OFFSET(SC_R14); + SC_OFFSET(SC_R15); + SC_OFFSET(SC_IP); + SC_OFFSET(SC_SP); + SC_OFFSET(SC_CR2); + SC_OFFSET(SC_ERR); + SC_OFFSET(SC_TRAPNO); + SC_OFFSET(SC_CS); + SC_OFFSET(SC_FS); + SC_OFFSET(SC_GS); + SC_OFFSET(SC_EFLAGS); + SC_OFFSET(SC_SIGMASK); #if 0 - SC_OFFSET("SC_ORIG_RAX", orig_rax); - SC_OFFSET("SC_DS", ds); - SC_OFFSET("SC_ES", es); - SC_OFFSET("SC_SS", ss); + SC_OFFSET(SC_ORIG_RAX); + SC_OFFSET(SC_DS); + SC_OFFSET(SC_ES); + SC_OFFSET(SC_SS); #endif return(0); } diff --git a/arch/um/sys-x86_64/util/mk_thread.c b/arch/um/sys-x86_64/util/mk_thread.c new file mode 100644 index 000000000000..15517396e9cf --- /dev/null +++ b/arch/um/sys-x86_64/util/mk_thread.c @@ -0,0 +1,20 @@ +#include <stdio.h> +#include <kernel-offsets.h> + +int main(int argc, char **argv) +{ + printf("/*\n"); + printf(" * Generated by mk_thread\n"); + printf(" */\n"); + printf("\n"); + printf("#ifndef __UM_THREAD_H\n"); + printf("#define __UM_THREAD_H\n"); + printf("\n"); +#ifdef TASK_EXTERN_PID + printf("#define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[%d]))\n", + TASK_EXTERN_PID); +#endif + printf("\n"); + printf("#endif\n"); + return(0); +} diff --git a/arch/um/sys-x86_64/util/mk_thread_kern.c b/arch/um/sys-x86_64/util/mk_thread_kern.c deleted file mode 100644 index a281673f02b2..000000000000 --- a/arch/um/sys-x86_64/util/mk_thread_kern.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "linux/config.h" -#include "linux/stddef.h" -#include "linux/sched.h" - -extern void print_head(void); -extern void print_constant_ptr(char *name, int value); -extern void print_constant(char *name, char *type, int value); -extern void print_tail(void); - -#define THREAD_OFFSET(field) offsetof(struct task_struct, thread.field) - -int main(int argc, char **argv) -{ - print_head(); -#ifdef CONFIG_MODE_TT - print_constant("TASK_EXTERN_PID", "int", THREAD_OFFSET(mode.tt.extern_pid)); -#endif - print_tail(); - return(0); -} - diff --git a/arch/um/sys-x86_64/util/mk_thread_user.c b/arch/um/sys-x86_64/util/mk_thread_user.c deleted file mode 100644 index 7989725568b8..000000000000 --- a/arch/um/sys-x86_64/util/mk_thread_user.c +++ /dev/null @@ -1,30 +0,0 @@ -#include <stdio.h> - -void print_head(void) -{ - printf("/*\n"); - printf(" * Generated by mk_thread\n"); - printf(" */\n"); - printf("\n"); - printf("#ifndef __UM_THREAD_H\n"); - printf("#define __UM_THREAD_H\n"); - printf("\n"); -} - -void print_constant_ptr(char *name, int value) -{ - printf("#define %s(task) ((unsigned long *) " - "&(((char *) (task))[%d]))\n", name, value); -} - -void print_constant(char *name, char *type, int value) -{ - printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, - value); -} - -void print_tail(void) -{ - printf("\n"); - printf("#endif\n"); -} diff --git a/arch/um/util/Makefile b/arch/um/util/Makefile index e2ab71209f3f..4c7551c28033 100644 --- a/arch/um/util/Makefile +++ b/arch/um/util/Makefile @@ -1,8 +1,5 @@ hostprogs-y := mk_task mk_constants always := $(hostprogs-y) -mk_task-objs := mk_task_user.o mk_task_kern.o -mk_constants-objs := mk_constants_user.o mk_constants_kern.o - -HOSTCFLAGS_mk_task_kern.o := $(CFLAGS) $(CPPFLAGS) -HOSTCFLAGS_mk_constants_kern.o := $(CFLAGS) $(CPPFLAGS) +HOSTCFLAGS_mk_task.o := -I$(objtree)/arch/um +HOSTCFLAGS_mk_constants.o := -I$(objtree)/arch/um diff --git a/arch/um/util/mk_constants.c b/arch/um/util/mk_constants.c new file mode 100644 index 000000000000..ab217becc36a --- /dev/null +++ b/arch/um/util/mk_constants.c @@ -0,0 +1,32 @@ +#include <stdio.h> +#include <kernel-offsets.h> + +#define SHOW_INT(sym) printf("#define %s %d\n", #sym, sym) +#define SHOW_STR(sym) printf("#define %s %s\n", #sym, sym) + +int main(int argc, char **argv) +{ + printf("/*\n"); + printf(" * Generated by mk_constants\n"); + printf(" */\n"); + printf("\n"); + printf("#ifndef __UM_CONSTANTS_H\n"); + printf("#define __UM_CONSTANTS_H\n"); + printf("\n"); + + SHOW_INT(UM_KERN_PAGE_SIZE); + + SHOW_STR(UM_KERN_EMERG); + SHOW_STR(UM_KERN_ALERT); + SHOW_STR(UM_KERN_CRIT); + SHOW_STR(UM_KERN_ERR); + SHOW_STR(UM_KERN_WARNING); + SHOW_STR(UM_KERN_NOTICE); + SHOW_STR(UM_KERN_INFO); + SHOW_STR(UM_KERN_DEBUG); + + SHOW_INT(UM_NSEC_PER_SEC); + printf("\n"); + printf("#endif\n"); + return(0); +} diff --git a/arch/um/util/mk_constants_kern.c b/arch/um/util/mk_constants_kern.c deleted file mode 100644 index cdcb1232a1ea..000000000000 --- a/arch/um/util/mk_constants_kern.c +++ /dev/null @@ -1,28 +0,0 @@ -#include "linux/kernel.h" -#include "linux/stringify.h" -#include "linux/time.h" -#include "asm/page.h" - -extern void print_head(void); -extern void print_constant_str(char *name, char *value); -extern void print_constant_int(char *name, int value); -extern void print_tail(void); - -int main(int argc, char **argv) -{ - print_head(); - print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE); - - print_constant_str("UM_KERN_EMERG", KERN_EMERG); - print_constant_str("UM_KERN_ALERT", KERN_ALERT); - print_constant_str("UM_KERN_CRIT", KERN_CRIT); - print_constant_str("UM_KERN_ERR", KERN_ERR); - print_constant_str("UM_KERN_WARNING", KERN_WARNING); - print_constant_str("UM_KERN_NOTICE", KERN_NOTICE); - print_constant_str("UM_KERN_INFO", KERN_INFO); - print_constant_str("UM_KERN_DEBUG", KERN_DEBUG); - - print_constant_int("UM_NSEC_PER_SEC", NSEC_PER_SEC); - print_tail(); - return(0); -} diff --git a/arch/um/util/mk_constants_user.c b/arch/um/util/mk_constants_user.c deleted file mode 100644 index 8f4d7e50be7c..000000000000 --- a/arch/um/util/mk_constants_user.c +++ /dev/null @@ -1,28 +0,0 @@ -#include <stdio.h> - -void print_head(void) -{ - printf("/*\n"); - printf(" * Generated by mk_constants\n"); - printf(" */\n"); - printf("\n"); - printf("#ifndef __UM_CONSTANTS_H\n"); - printf("#define __UM_CONSTANTS_H\n"); - printf("\n"); -} - -void print_constant_str(char *name, char *value) -{ - printf("#define %s \"%s\"\n", name, value); -} - -void print_constant_int(char *name, int value) -{ - printf("#define %s %d\n", name, value); -} - -void print_tail(void) -{ - printf("\n"); - printf("#endif\n"); -} diff --git a/arch/um/util/mk_task_user.c b/arch/um/util/mk_task.c index 9db849f3f3ac..36c9606505e2 100644 --- a/arch/um/util/mk_task_user.c +++ b/arch/um/util/mk_task.c @@ -1,18 +1,19 @@ #include <stdio.h> +#include <kernel-offsets.h> -void print(char *name, char *type, int offset) +void print_ptr(char *name, char *type, int offset) { - printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, + printf("#define %s(task) ((%s *) &(((char *) (task))[%d]))\n", name, type, offset); } -void print_ptr(char *name, char *type, int offset) +void print(char *name, char *type, int offset) { - printf("#define %s(task) ((%s *) &(((char *) (task))[%d]))\n", name, type, + printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, offset); } -void print_head(void) +int main(int argc, char **argv) { printf("/*\n"); printf(" * Generated by mk_task\n"); @@ -21,10 +22,9 @@ void print_head(void) printf("#ifndef __TASK_H\n"); printf("#define __TASK_H\n"); printf("\n"); -} - -void print_tail(void) -{ + print_ptr("TASK_REGS", "union uml_pt_regs", TASK_REGS); + print("TASK_PID", "int", TASK_PID); printf("\n"); printf("#endif\n"); + return(0); } diff --git a/arch/um/util/mk_task_kern.c b/arch/um/util/mk_task_kern.c deleted file mode 100644 index c218103315ed..000000000000 --- a/arch/um/util/mk_task_kern.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "linux/sched.h" -#include "linux/stddef.h" - -extern void print(char *name, char *type, int offset); -extern void print_ptr(char *name, char *type, int offset); -extern void print_head(void); -extern void print_tail(void); - -int main(int argc, char **argv) -{ - print_head(); - print_ptr("TASK_REGS", "union uml_pt_regs", - offsetof(struct task_struct, thread.regs)); - print("TASK_PID", "int", offsetof(struct task_struct, pid)); - print_tail(); - return(0); -} diff --git a/arch/v850/kernel/ptrace.c b/arch/v850/kernel/ptrace.c index 8fa780757dcd..4726b87f5e5a 100644 --- a/arch/v850/kernel/ptrace.c +++ b/arch/v850/kernel/ptrace.c @@ -23,6 +23,7 @@ #include <linux/sched.h> #include <linux/smp_lock.h> #include <linux/ptrace.h> +#include <linux/signal.h> #include <asm/errno.h> #include <asm/ptrace.h> @@ -208,7 +209,7 @@ int sys_ptrace(long request, long pid, long addr, long data) /* Execute a single instruction. */ case PTRACE_SINGLESTEP: rval = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; /* Turn CHILD's single-step flag on or off. */ diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 80c38c5d71fe..44ee7f6acf7b 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -379,6 +379,11 @@ config GENERIC_IRQ_PROBE bool default y +# we have no ISA slots, but we do have ISA-style DMA. +config ISA_DMA_API + bool + default y + menu "Power management options" source kernel/power/Kconfig diff --git a/arch/x86_64/boot/bootsect.S b/arch/x86_64/boot/bootsect.S index bb15d406ee95..011b7a4993d4 100644 --- a/arch/x86_64/boot/bootsect.S +++ b/arch/x86_64/boot/bootsect.S @@ -63,7 +63,7 @@ msg_loop: jz die movb $0xe, %ah movw $7, %bx - int $0x10 + int $0x10 jmp msg_loop die: @@ -71,7 +71,7 @@ die: xorw %ax, %ax int $0x16 int $0x19 - + # int 0x19 should never return. In case it does anyway, # invoke the BIOS reset code... ljmp $0xf000,$0xfff0 diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S index 3e838be9dbe7..75d4d2ad93b3 100644 --- a/arch/x86_64/boot/setup.S +++ b/arch/x86_64/boot/setup.S @@ -160,7 +160,7 @@ ramdisk_max: .long 0xffffffff trampoline: call start_of_setup .align 16 # The offset at this point is 0x240 - .space (0x7ff-0x240+1) # E820 & EDD space (ending at 0x7ff) + .space (0xeff-0x240+1) # E820 & EDD space (ending at 0xeff) # End of setup header ##################################################### start_of_setup: @@ -412,9 +412,9 @@ jmpe820: # sizeof(e820rec). # good820: - movb (E820NR), %al # up to 32 entries + movb (E820NR), %al # up to 128 entries cmpb $E820MAX, %al - jnl bail820 + jae bail820 incb (E820NR) movw %di, %ax diff --git a/arch/x86_64/ia32/vsyscall.lds b/arch/x86_64/ia32/vsyscall.lds index fa4b4dd4a9ff..f2e75ed4c6c7 100644 --- a/arch/x86_64/ia32/vsyscall.lds +++ b/arch/x86_64/ia32/vsyscall.lds @@ -36,6 +36,7 @@ SECTIONS .text.rtsigreturn : { *(.text.rtsigreturn) } :text =0x90909090 + .note : { *(.note.*) } :text :note .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text .dynamic : { *(.dynamic) } :text :dynamic @@ -55,6 +56,7 @@ PHDRS { text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ } diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 56516ac92e5d..7c154dfff64a 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -2,6 +2,12 @@ * Handle the memory map. * The functions here do the job until bootmem takes over. * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $ + * + * Getting sanitize_e820_map() in sync with i386 version by applying change: + * - Provisions for empty E820 memory regions (reported by certain BIOSes). + * Alex Achenbach <xela@slit.de>, December 2002. + * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * */ #include <linux/config.h> #include <linux/kernel.h> @@ -277,7 +283,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) int chgidx, still_changing; int overlap_entries; int new_bios_entry; - int old_nr, new_nr; + int old_nr, new_nr, chg_nr; int i; /* @@ -331,20 +337,24 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) for (i=0; i < 2*old_nr; i++) change_point[i] = &change_point_list[i]; - /* record all known change-points (starting and ending addresses) */ + /* record all known change-points (starting and ending addresses), + omitting those that are for empty memory regions */ chgidx = 0; for (i=0; i < old_nr; i++) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; + if (biosmap[i].size != 0) { + change_point[chgidx]->addr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; + } } + chg_nr = chgidx; /* sort change-point list by memory addresses (low -> high) */ still_changing = 1; while (still_changing) { still_changing = 0; - for (i=1; i < 2*old_nr; i++) { + for (i=1; i < chg_nr; i++) { /* if <current_addr> > <last_addr>, swap */ /* or, if current=<start_addr> & last=<end_addr>, swap */ if ((change_point[i]->addr < change_point[i-1]->addr) || @@ -367,7 +377,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) last_type = 0; /* start with undefined memory type */ last_addr = 0; /* start with 0 as last starting address */ /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < 2*old_nr; chgidx++) + for (chgidx=0; chgidx < chg_nr; chgidx++) { /* keep track of all overlapping bios entries */ if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 750bcd0655dc..e3a19e8ebbf8 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -60,7 +60,7 @@ static struct console early_vga_console = { /* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ -int early_serial_base = 0x3f8; /* ttyS0 */ +static int early_serial_base = 0x3f8; /* ttyS0 */ #define XMTRDY 0x20 diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 3233a15cc4e0..1086b5fcac21 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -296,6 +296,7 @@ int_very_careful: call syscall_trace_leave popq %rdi andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi + cli jmp int_restore_rest int_signal: @@ -307,6 +308,7 @@ int_signal: 1: movl $_TIF_NEED_RESCHED,%edi int_restore_rest: RESTORE_REST + cli jmp int_with_check CFI_ENDPROC @@ -490,7 +492,8 @@ retint_signal: call do_notify_resume RESTORE_REST cli - GET_THREAD_INFO(%rcx) + GET_THREAD_INFO(%rcx) + movl $_TIF_WORK_MASK,%edi jmp retint_check #ifdef CONFIG_PREEMPT diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 6cad46c98a23..0f8c78dcd38c 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -29,8 +29,6 @@ static void __init clear_bss(void) (unsigned long) __bss_end - (unsigned long) __bss_start); } -extern char x86_boot_params[2048]; - #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -44,7 +42,7 @@ static void __init copy_bootdata(char *real_mode_data) int new_data; char * command_line; - memcpy(x86_boot_params, real_mode_data, 2048); + memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); if (!new_data) { if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { @@ -93,9 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) #ifdef CONFIG_SMP cpu_set(0, cpu_online_map); #endif - /* default console: */ - if (!strstr(saved_command_line, "console=")) - strcat(saved_command_line, " console=tty0"); s = strstr(saved_command_line, "earlyprintk="); if (s != NULL) setup_early_printk(s); diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 29a257295484..60be58617eb9 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -1607,7 +1607,6 @@ static inline void check_timer(void) disable_8259A_irq(0); setup_nmi(); enable_8259A_irq(0); - check_nmi_watchdog(); } return; } @@ -1627,7 +1626,6 @@ static inline void check_timer(void) nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); - check_nmi_watchdog(); } return; } diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 4f2a852299b6..f77f8a0ff187 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -355,6 +355,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) *tos &= ~(TF_MASK | IF_MASK); *tos |= kprobe_old_rflags; break; + case 0xc3: /* ret/lret */ + case 0xcb: + case 0xc2: + case 0xca: + regs->eflags &= ~TF_MASK; + /* rip is already adjusted, no more changes required*/ + return; case 0xe8: /* call relative - Fix return addr */ *tos = orig_rip + (*tos - copy_rip); break; diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c index c2ffea8845ed..bac195c74bcc 100644 --- a/arch/x86_64/kernel/module.c +++ b/arch/x86_64/kernel/module.c @@ -30,9 +30,12 @@ #define DEBUGP(fmt...) +#ifndef CONFIG_UML void module_free(struct module *mod, void *module_region) { vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ } void *module_alloc(unsigned long size) @@ -51,6 +54,7 @@ void *module_alloc(unsigned long size) return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); } +#endif /* We don't need anything special. */ int module_frob_arch_sections(Elf_Ehdr *hdr, diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index e00d4adec36b..61de0b34a01e 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -112,17 +112,20 @@ static __init int cpu_has_lapic(void) } } -int __init check_nmi_watchdog (void) +static int __init check_nmi_watchdog (void) { int counts[NR_CPUS]; int cpu; + if (nmi_watchdog == NMI_NONE) + return 0; + if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) { nmi_watchdog = NMI_NONE; return -1; } - printk(KERN_INFO "testing NMI watchdog ... "); + printk(KERN_INFO "Testing NMI watchdog ... "); for (cpu = 0; cpu < NR_CPUS; cpu++) counts[cpu] = cpu_pda[cpu].__nmi_count; @@ -148,6 +151,8 @@ int __init check_nmi_watchdog (void) return 0; } +/* Have this called later during boot so counters are updating */ +late_initcall(check_nmi_watchdog); int __init setup_nmi_watchdog(char *str) { diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 9922d2ba24a3..761b6d35e338 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -402,10 +402,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; - asm("movl %%gs,%0" : "=m" (p->thread.gsindex)); - asm("movl %%fs,%0" : "=m" (p->thread.fsindex)); - asm("movl %%es,%0" : "=m" (p->thread.es)); - asm("movl %%ds,%0" : "=m" (p->thread.ds)); + asm("mov %%gs,%0" : "=m" (p->thread.gsindex)); + asm("mov %%fs,%0" : "=m" (p->thread.fsindex)); + asm("mov %%es,%0" : "=m" (p->thread.es)); + asm("mov %%ds,%0" : "=m" (p->thread.ds)); if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -468,11 +468,11 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * * Switch DS and ES. * This won't pick up thread selector changes, but I guess that is ok. */ - asm volatile("movl %%es,%0" : "=m" (prev->es)); + asm volatile("mov %%es,%0" : "=m" (prev->es)); if (unlikely(next->es | prev->es)) loadsegment(es, next->es); - asm volatile ("movl %%ds,%0" : "=m" (prev->ds)); + asm volatile ("mov %%ds,%0" : "=m" (prev->ds)); if (unlikely(next->ds | prev->ds)) loadsegment(ds, next->ds); diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index c7011675007d..e26e86bb56fe 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -18,6 +18,7 @@ #include <linux/security.h> #include <linux/audit.h> #include <linux/seccomp.h> +#include <linux/signal.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -467,7 +468,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data case PTRACE_CONT: /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child,TIF_SYSCALL_TRACE); @@ -529,7 +530,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data case PTRACE_SINGLESTEP: /* set the trap flag. */ ret = -EIO; - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) break; clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); set_singlestep(child); @@ -634,20 +635,29 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) /* do the secure computing check first */ secure_computing(regs->orig_rax); - if (unlikely(current->audit_context)) - audit_syscall_entry(current, regs->orig_rax, - regs->rdi, regs->rsi, - regs->rdx, regs->r10); - if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) syscall_trace(regs); + + if (unlikely(current->audit_context)) { + if (test_thread_flag(TIF_IA32)) { + audit_syscall_entry(current, AUDIT_ARCH_I386, + regs->orig_rax, + regs->rbx, regs->rcx, + regs->rdx, regs->rsi); + } else { + audit_syscall_entry(current, AUDIT_ARCH_X86_64, + regs->orig_rax, + regs->rdi, regs->rsi, + regs->rdx, regs->r10); + } + } } asmlinkage void syscall_trace_leave(struct pt_regs *regs) { if (unlikely(current->audit_context)) - audit_syscall_exit(current, regs->rax); + audit_syscall_exit(current, AUDITSC_RESULT(regs->rax), regs->rax); if ((test_thread_flag(TIF_SYSCALL_TRACE) || test_thread_flag(TIF_SINGLESTEP)) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index b18c114c7648..2129cf9ba6b2 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -977,7 +977,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) if ((xlvl & 0xffff0000) == 0x80000000) { if (xlvl >= 0x80000001) { c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[5] = cpuid_ecx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); } if (xlvl >= 0x80000004) get_model_name(c); /* Default name */ @@ -1076,7 +1076,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL, /* AMD-defined */ - "pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow", @@ -1100,11 +1100,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + /* VIA/Cyrix/Centaur-defined */ + NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + /* AMD-defined (#2) */ "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static char *x86_power_flags[] = { "ts", /* temperature sensor */ diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index b5305b04bc40..678b7ac33b8b 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/bootmem.h> #include <linux/bitops.h> +#include <asm/bootsetup.h> #include <asm/pda.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -26,7 +27,7 @@ #include <asm/mman.h> #include <asm/numa.h> -char x86_boot_params[2048] __initdata = {0,}; +char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 7760224cdfe3..d439ced150c6 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -83,7 +83,7 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct rt_sigframe { - char *pretcode; + char __user *pretcode; struct ucontext uc; struct siginfo info; }; diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index 477d8be57d64..dbebd5ccba6b 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c @@ -152,12 +152,6 @@ asmlinkage long sys_uname(struct new_utsname __user * name) return err ? -EFAULT : 0; } -asmlinkage long wrap_sys_shmat(int shmid, char __user *shmaddr, int shmflg) -{ - unsigned long raddr; - return do_shmat(shmid,shmaddr,shmflg,&raddr) ?: (long)raddr; -} - asmlinkage long sys_time64(long __user * tloc) { struct timeval now; diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 88626e626886..a43dedb58fa2 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -139,35 +139,23 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); #undef memmove #undef memchr #undef strlen -#undef strcpy #undef strncmp #undef strncpy #undef strchr -#undef strcmp -#undef strcpy -#undef strcat -#undef memcmp extern void * memset(void *,int,__kernel_size_t); extern size_t strlen(const char *); extern void * memmove(void * dest,const void *src,size_t count); -extern char * strcpy(char * dest,const char *src); -extern int strcmp(const char * cs,const char * ct); extern void *memchr(const void *s, int c, size_t n); extern void * memcpy(void *,const void *,__kernel_size_t); extern void * __memcpy(void *,const void *,__kernel_size_t); -extern char * strcat(char *, const char *); -extern int memcmp(const void * cs,const void * ct,size_t count); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strchr); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strcat); EXPORT_SYMBOL(strncat); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(strrchr); @@ -175,7 +163,6 @@ EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(memscan); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__memcpy); -EXPORT_SYMBOL(memcmp); #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM /* prototypes are wrong, these are assembly with custom calling functions */ diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S index d9d3e5ec9ad1..5828b8191667 100644 --- a/arch/x86_64/lib/putuser.S +++ b/arch/x86_64/lib/putuser.S @@ -49,8 +49,8 @@ __put_user_2: jc 20f cmpq threadinfo_addr_limit(%r8),%rcx jae 20f -2: decq %rcx - movw %dx,(%rcx) + decq %rcx +2: movw %dx,(%rcx) xorl %eax,%eax ret 20: decq %rcx @@ -64,8 +64,8 @@ __put_user_4: jc 30f cmpq threadinfo_addr_limit(%r8),%rcx jae 30f -3: subq $3,%rcx - movl %edx,(%rcx) + subq $3,%rcx +3: movl %edx,(%rcx) xorl %eax,%eax ret 30: subq $3,%rcx @@ -79,8 +79,8 @@ __put_user_8: jc 40f cmpq threadinfo_addr_limit(%r8),%rcx jae 40f -4: subq $7,%rcx - movq %rdx,(%rcx) + subq $7,%rcx +4: movq %rdx,(%rcx) xorl %eax,%eax ret 40: subq $7,%rcx |