diff options
Diffstat (limited to 'arch/powerpc/kernel')
71 files changed, 4202 insertions, 1497 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f3f5e2641432..64f5948ebc9d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -50,16 +50,15 @@ obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_E500) += idle_e500.o obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o \ swsusp_$(CONFIG_WORD_SIZE).o obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o -obj-$(CONFIG_MODULES) += module_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_44x) += cpu_setup_44x.o -ifeq ($(CONFIG_PPC_MERGE),y) - extra-$(CONFIG_PPC_STD_MMU) := head_32.o extra-$(CONFIG_PPC64) := head_64.o extra-$(CONFIG_40x) := head_40x.o @@ -73,6 +72,7 @@ obj-y += time.o prom.o traps.o setup-common.o \ misc_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma_64.o iommu.o +obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o @@ -98,12 +98,6 @@ ifneq ($(CONFIG_PPC_INDIRECT_IO),y) obj-y += iomap.o endif -else -# stuff used from here for ARCH=ppc -smpobj-$(CONFIG_SMP) += smp.o - -endif - obj-$(CONFIG_PPC64) += $(obj64-y) extra-$(CONFIG_PPC_FPU) += fpu.o @@ -119,9 +113,6 @@ PHONY += systbl_chk systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i $(call cmd,systbl_chk) - -ifeq ($(CONFIG_PPC_MERGE),y) - $(obj)/built-in.o: prom_init_check quiet_cmd_prom_init_check = CALL $< @@ -131,7 +122,4 @@ PHONY += prom_init_check prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o $(call cmd,prom_init_check) -endif - - clean-files := vmlinux.lds diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index e06f75daeba3..367129789cc0 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -48,6 +48,7 @@ struct aligninfo { #define HARD 0x80 /* string, stwcx. */ #define E4 0x40 /* SPE endianness is word */ #define E8 0x80 /* SPE endianness is double word */ +#define SPLT 0x80 /* VSX SPLAT load */ /* DSISR bits reported for a DCBZ instruction: */ #define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ @@ -363,10 +364,10 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, * Only POWER6 has these instructions, and it does true little-endian, * so we don't need the address swizzling. */ -static int emulate_fp_pair(struct pt_regs *regs, unsigned char __user *addr, - unsigned int reg, unsigned int flags) +static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, + unsigned int flags) { - char *ptr = (char *) ¤t->thread.fpr[reg]; + char *ptr = (char *) ¤t->thread.TS_FPR(reg); int i, ret; if (!(flags & F)) @@ -637,6 +638,36 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, } #endif /* CONFIG_SPE */ +#ifdef CONFIG_VSX +/* + * Emulate VSX instructions... + */ +static int emulate_vsx(unsigned char __user *addr, unsigned int reg, + unsigned int areg, struct pt_regs *regs, + unsigned int flags, unsigned int length) +{ + char *ptr = (char *) ¤t->thread.TS_FPR(reg); + int ret; + + flush_vsx_to_thread(current); + + if (flags & ST) + ret = __copy_to_user(addr, ptr, length); + else { + if (flags & SPLT){ + ret = __copy_from_user(ptr, addr, length); + ptr += length; + } + ret |= __copy_from_user(ptr, addr, length); + } + if (flags & U) + regs->gpr[areg] = regs->dar; + if (ret) + return -EFAULT; + return 1; +} +#endif + /* * Called on alignment exception. Attempts to fixup * @@ -647,7 +678,7 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, int fix_alignment(struct pt_regs *regs) { - unsigned int instr, nb, flags; + unsigned int instr, nb, flags, instruction = 0; unsigned int reg, areg; unsigned int dsisr; unsigned char __user *addr; @@ -689,6 +720,7 @@ int fix_alignment(struct pt_regs *regs) if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE)) instr = cpu_to_le32(instr); dsisr = make_dsisr(instr); + instruction = instr; } /* extract the operation and registers from the dsisr */ @@ -728,6 +760,30 @@ int fix_alignment(struct pt_regs *regs) /* DAR has the operand effective address */ addr = (unsigned char __user *)regs->dar; +#ifdef CONFIG_VSX + if ((instruction & 0xfc00003e) == 0x7c000018) { + /* Additional register addressing bit (64 VSX vs 32 FPR/GPR */ + reg |= (instruction & 0x1) << 5; + /* Simple inline decoder instead of a table */ + if (instruction & 0x200) + nb = 16; + else if (instruction & 0x080) + nb = 8; + else + nb = 4; + flags = 0; + if (instruction & 0x100) + flags |= ST; + if (instruction & 0x040) + flags |= U; + /* splat load needs a special decoder */ + if ((instruction & 0x400) == 0){ + flags |= SPLT; + nb = 8; + } + return emulate_vsx(addr, reg, areg, regs, flags, nb); + } +#endif /* A size of 0 indicates an instruction we don't support, with * the exception of DCBZ which is handled as a special case here */ @@ -759,7 +815,7 @@ int fix_alignment(struct pt_regs *regs) /* Special case for 16-byte FP loads and stores */ if (nb == 16) - return emulate_fp_pair(regs, addr, reg, flags); + return emulate_fp_pair(addr, reg, flags); /* If we are loading, get the data from user space, else * get it from register values @@ -784,7 +840,7 @@ int fix_alignment(struct pt_regs *regs) return -EFAULT; } } else if (flags & F) { - data.dd = current->thread.fpr[reg]; + data.dd = current->thread.TS_FPR(reg); if (flags & S) { /* Single-precision FP store requires conversion... */ #ifdef CONFIG_PPC_FPU @@ -862,7 +918,7 @@ int fix_alignment(struct pt_regs *regs) if (unlikely(ret)) return -EFAULT; } else if (flags & F) - current->thread.fpr[reg] = data.dd; + current->thread.TS_FPR(reg) = data.dd; else regs->gpr[reg] = data.ll; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ec9228d687b0..92768d3006f7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -52,6 +52,10 @@ #include <asm/iseries/alpaca.h> #endif +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#include "head_booke.h" +#endif + int main(void) { DEFINE(THREAD, offsetof(struct task_struct, thread)); @@ -74,6 +78,10 @@ int main(void) DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr)); DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr)); + DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr)); +#endif /* CONFIG_VSX */ #ifdef CONFIG_PPC64 DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); #else /* CONFIG_PPC64 */ @@ -242,6 +250,25 @@ int main(void) DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8); #endif /* CONFIG_PPC64 */ +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); + DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); + /* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */ + DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); + DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1)); + DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2)); + DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3)); + DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6)); + DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7)); + DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0)); + DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1)); + DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0)); + DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); + DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); + DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); + DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); +#endif + DEFINE(CLONE_VM, CLONE_VM); DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S index e3623e3e3451..80cac984d85d 100644 --- a/arch/powerpc/kernel/cpu_setup_44x.S +++ b/arch/powerpc/kernel/cpu_setup_44x.S @@ -33,17 +33,12 @@ _GLOBAL(__setup_cpu_440grx) mtlr r4 blr _GLOBAL(__setup_cpu_460ex) +_GLOBAL(__setup_cpu_460gt) b __init_fpu_44x _GLOBAL(__setup_cpu_440gx) _GLOBAL(__setup_cpu_440spe) b __fixup_440A_mcheck - /* Temporary fixup for arch/ppc until we kill the whole thing */ -#ifndef CONFIG_PPC_MERGE -_GLOBAL(__fixup_440A_mcheck) - blr -#endif - /* enable APU between CPU and FPU */ _GLOBAL(__init_fpu_44x) mfspr r3,SPRN_CCR0 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index e44d5530f0a6..25c273c761d1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -23,6 +23,9 @@ struct cpu_spec* cur_cpu_spec = NULL; EXPORT_SYMBOL(cur_cpu_spec); +/* The platform string corresponding to the real PVR */ +const char *powerpc_base_platform; + /* NOTE: * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's * the responsibility of the appropriate CPU save/restore functions to @@ -37,6 +40,7 @@ extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec); @@ -52,6 +56,8 @@ extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); +extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_power7(void); #endif /* CONFIG_PPC64 */ /* This table only contains "desktop" CPUs, it need to be filled with embedded @@ -67,7 +73,12 @@ extern void __restore_cpu_ppc970(void); PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) #define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ - PPC_FEATURE_TRUE_LE) + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) +#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) @@ -347,6 +358,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .machine_check = machine_check_generic, + .oprofile_cpu_type = "ppc64/compat-power5+", .platform = "power5+", }, { /* Power6 */ @@ -378,8 +390,41 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .machine_check = machine_check_generic, + .oprofile_cpu_type = "ppc64/compat-power6", .platform = "power6", }, + { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000003, + .cpu_name = "POWER7 (architected)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .machine_check = machine_check_generic, + .oprofile_cpu_type = "ppc64/compat-power7", + .platform = "power7", + }, + { /* Power7 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003f0000, + .cpu_name = "POWER7 (raw)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .oprofile_cpu_type = "ppc64/power7", + .oprofile_type = PPC_OPROFILE_POWER4, + .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV, + .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR, + .oprofile_mmcra_clear = POWER6_MMCRA_THRM | + POWER6_MMCRA_OTHER, + .platform = "power7", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, @@ -1410,6 +1455,16 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, + { /* 440 in Xilinx Virtex-5 FXT */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x7ff21910, + .cpu_name = "440 in Virtex-5 FXT", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .icache_bsize = 32, + .dcache_bsize = 32, + .platform = "ppc440", + }, { /* 460EX */ .pvr_mask = 0xffff0002, .pvr_value = 0x13020002, @@ -1427,9 +1482,10 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_value = 0x13020000, .cpu_name = "460GT", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, .icache_bsize = 32, .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460gt, .machine_check = machine_check_440A, .platform = "ppc440", }, @@ -1491,7 +1547,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_mask = 0xffff0000, .pvr_value = 0x80200000, .cpu_name = "e500", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ .cpu_features = CPU_FTRS_E500, .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP | @@ -1508,7 +1563,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_mask = 0xffff0000, .pvr_value = 0x80210000, .cpu_name = "e500v2", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ .cpu_features = CPU_FTRS_E500_2, .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP | @@ -1522,6 +1576,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500, .platform = "ppc8548", }, + { /* e500mc */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80230000, + .cpu_name = "e500mc", + .cpu_features = CPU_FTRS_E500MC, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc/e500", /* xxx - galak, e500mc? */ + .oprofile_type = PPC_OPROFILE_FSL_EMB, + .machine_check = machine_check_e500, + .platform = "ppce500mc", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1567,9 +1635,34 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) t->cpu_setup = s->cpu_setup; t->cpu_restore = s->cpu_restore; t->platform = s->platform; + /* + * If we have passed through this logic once + * before and have pulled the default case + * because the real PVR was not found inside + * cpu_specs[], then we are possibly running in + * compatibility mode. In that case, let the + * oprofiler know which set of compatibility + * counters to pull from by making sure the + * oprofile_cpu_type string is set to that of + * compatibility mode. If the oprofile_cpu_type + * already has a value, then we are possibly + * overriding a real PVR with a logical one, and, + * in that case, keep the current value for + * oprofile_cpu_type. + */ + if (t->oprofile_cpu_type == NULL) + t->oprofile_cpu_type = s->oprofile_cpu_type; } else *t = *s; *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; + + /* + * Set the base platform string once; assumes + * we're called with real pvr first. + */ + if (*PTRRELOC(&powerpc_base_platform) == NULL) + *PTRRELOC(&powerpc_base_platform) = t->platform; + #if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE) /* ppc64 and booke expect identify_cpu to also call * setup_cpu for that processor. I will consolidate @@ -1587,38 +1680,3 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) BUG(); return NULL; } - -void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) -{ - struct fixup_entry { - unsigned long mask; - unsigned long value; - long start_off; - long end_off; - } *fcur, *fend; - - fcur = fixup_start; - fend = fixup_end; - - for (; fcur < fend; fcur++) { - unsigned int *pstart, *pend, *p; - - if ((value & fcur->mask) == fcur->value) - continue; - - /* These PTRRELOCs will disappear once the new scheme for - * modules and vdso is implemented - */ - pstart = ((unsigned int *)fcur) + (fcur->start_off / 4); - pend = ((unsigned int *)fcur) + (fcur->end_off / 4); - - for (p = pstart; p < pend; p++) { - *p = 0x60000000u; - asm volatile ("dcbst 0, %0" : : "r" (p)); - } - asm volatile ("sync" : : : "memory"); - for (p = pstart; p < pend; p++) - asm volatile ("icbi 0,%0" : : "r" (p)); - asm volatile ("sync; isync" : : : "memory"); - } -} diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index eae401de3f76..0a8439aafdd1 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -48,7 +48,7 @@ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; cpumask_t cpus_in_sr = CPU_MASK_NONE; -#define CRASH_HANDLER_MAX 1 +#define CRASH_HANDLER_MAX 2 /* NULL terminated list of shutdown handles */ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 9ee3c5278db0..e0debcca0bfa 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -14,6 +14,7 @@ #include <linux/crash_dump.h> #include <linux/bootmem.h> #include <linux/lmb.h> +#include <asm/code-patching.h> #include <asm/kdump.h> #include <asm/prom.h> #include <asm/firmware.h> @@ -33,6 +34,8 @@ void __init reserve_kdump_trampoline(void) static void __init create_trampoline(unsigned long addr) { + unsigned int *p = (unsigned int *)addr; + /* The maximum range of a single instruction branch, is the current * instruction's address + (32 MB - 4) bytes. For the trampoline we * need to branch to current address + 32 MB. So we insert a nop at @@ -41,8 +44,8 @@ static void __init create_trampoline(unsigned long addr) * branch to "addr" we jump to ("addr" + 32 MB). Although it requires * two instructions it doesn't require any registers. */ - create_instruction(addr, 0x60000000); /* nop */ - create_branch(addr + 4, addr + PHYSICAL_START, 0); + patch_instruction(p, PPC_NOP_INSTR); + patch_branch(++p, addr + PHYSICAL_START, 0); } void __init setup_kdump_trampoline(void) diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c index 3a317cb0636a..ae5708e3a312 100644 --- a/arch/powerpc/kernel/dma_64.c +++ b/arch/powerpc/kernel/dma_64.c @@ -15,15 +15,6 @@ * Generic iommu implementation */ -static inline unsigned long device_to_mask(struct device *dev) -{ - if (dev->dma_mask && *dev->dma_mask) - return *dev->dma_mask; - /* Assume devices without mask can take 32 bit addresses */ - return 0xfffffffful; -} - - /* Allocates a contiguous real buffer and creates mappings over it. * Returns the virtual address of the buffer and sets dma_handle * to the dma address (mapping) of the first page. @@ -50,32 +41,38 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size, */ static dma_addr_t dma_iommu_map_single(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) + enum dma_data_direction direction, + struct dma_attrs *attrs) { return iommu_map_single(dev, dev->archdata.dma_data, vaddr, size, - device_to_mask(dev), direction); + device_to_mask(dev), direction, attrs); } static void dma_iommu_unmap_single(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) + enum dma_data_direction direction, + struct dma_attrs *attrs) { - iommu_unmap_single(dev->archdata.dma_data, dma_handle, size, direction); + iommu_unmap_single(dev->archdata.dma_data, dma_handle, size, direction, + attrs); } static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) { - return iommu_map_sg(dev, sglist, nelems, - device_to_mask(dev), direction); + return iommu_map_sg(dev, dev->archdata.dma_data, sglist, nelems, + device_to_mask(dev), direction, attrs); } static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) { - iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction); + iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction, + attrs); } /* We support DMA to/from any memory page via the iommu */ @@ -148,19 +145,22 @@ static void dma_direct_free_coherent(struct device *dev, size_t size, static dma_addr_t dma_direct_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) + enum dma_data_direction direction, + struct dma_attrs *attrs) { return virt_to_abs(ptr) + get_dma_direct_offset(dev); } static void dma_direct_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) + enum dma_data_direction direction, + struct dma_attrs *attrs) { } static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction) + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct scatterlist *sg; int i; @@ -174,7 +174,8 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, } static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) { } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 7231a708af0d..1cbbf7033641 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -45,29 +45,54 @@ #endif #ifdef CONFIG_BOOKE -#include "head_booke.h" -#define TRANSFER_TO_HANDLER_EXC_LEVEL(exc_level) \ - mtspr exc_level##_SPRG,r8; \ - BOOKE_LOAD_EXC_LEVEL_STACK(exc_level); \ - lwz r0,GPR10-INT_FRAME_SIZE(r8); \ - stw r0,GPR10(r11); \ - lwz r0,GPR11-INT_FRAME_SIZE(r8); \ - stw r0,GPR11(r11); \ - mfspr r8,exc_level##_SPRG - .globl mcheck_transfer_to_handler mcheck_transfer_to_handler: - TRANSFER_TO_HANDLER_EXC_LEVEL(MCHECK) - b transfer_to_handler_full + mfspr r0,SPRN_DSRR0 + stw r0,_DSRR0(r11) + mfspr r0,SPRN_DSRR1 + stw r0,_DSRR1(r11) + /* fall through */ .globl debug_transfer_to_handler debug_transfer_to_handler: - TRANSFER_TO_HANDLER_EXC_LEVEL(DEBUG) - b transfer_to_handler_full + mfspr r0,SPRN_CSRR0 + stw r0,_CSRR0(r11) + mfspr r0,SPRN_CSRR1 + stw r0,_CSRR1(r11) + /* fall through */ .globl crit_transfer_to_handler crit_transfer_to_handler: - TRANSFER_TO_HANDLER_EXC_LEVEL(CRIT) +#ifdef CONFIG_FSL_BOOKE + mfspr r0,SPRN_MAS0 + stw r0,MAS0(r11) + mfspr r0,SPRN_MAS1 + stw r0,MAS1(r11) + mfspr r0,SPRN_MAS2 + stw r0,MAS2(r11) + mfspr r0,SPRN_MAS3 + stw r0,MAS3(r11) + mfspr r0,SPRN_MAS6 + stw r0,MAS6(r11) +#ifdef CONFIG_PHYS_64BIT + mfspr r0,SPRN_MAS7 + stw r0,MAS7(r11) +#endif /* CONFIG_PHYS_64BIT */ +#endif /* CONFIG_FSL_BOOKE */ +#ifdef CONFIG_44x + mfspr r0,SPRN_MMUCR + stw r0,MMUCR(r11) +#endif + mfspr r0,SPRN_SRR0 + stw r0,_SRR0(r11) + mfspr r0,SPRN_SRR1 + stw r0,_SRR1(r11) + + mfspr r8,SPRN_SPRG3 + lwz r0,KSP_LIMIT(r8) + stw r0,SAVED_KSP_LIMIT(r11) + rlwimi r0,r1,0,0,(31-THREAD_SHIFT) + stw r0,KSP_LIMIT(r8) /* fall through */ #endif @@ -78,6 +103,16 @@ crit_transfer_to_handler: stw r0,GPR10(r11) lwz r0,crit_r11@l(0) stw r0,GPR11(r11) + mfspr r0,SPRN_SRR0 + stw r0,crit_srr0@l(0) + mfspr r0,SPRN_SRR1 + stw r0,crit_srr1@l(0) + + mfspr r8,SPRN_SPRG3 + lwz r0,KSP_LIMIT(r8) + stw r0,saved_ksp_limit@l(0) + rlwimi r0,r1,0,0,(31-THREAD_SHIFT) + stw r0,KSP_LIMIT(r8) /* fall through */ #endif @@ -142,13 +177,14 @@ transfer_to_handler: cmplw r1,r9 /* if r1 <= ksp_limit */ ble- stack_ovf /* then the kernel stack overflowed */ 5: -#ifdef CONFIG_6xx +#if defined(CONFIG_6xx) || defined(CONFIG_E500) rlwinm r9,r1,0,0,31-THREAD_SHIFT tophys(r9,r9) /* check local flags */ lwz r12,TI_LOCAL_FLAGS(r9) mtcrf 0x01,r12 bt- 31-TLF_NAPPING,4f -#endif /* CONFIG_6xx */ + bt- 31-TLF_SLEEPING,7f +#endif /* CONFIG_6xx || CONFIG_E500 */ .globl transfer_to_handler_cont transfer_to_handler_cont: 3: @@ -161,10 +197,17 @@ transfer_to_handler_cont: SYNC RFI /* jump to handler, enable MMU */ -#ifdef CONFIG_6xx +#if defined (CONFIG_6xx) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING stw r12,TI_LOCAL_FLAGS(r9) - b power_save_6xx_restore + b power_save_ppc32_restore + +7: rlwinm r12,r12,0,~_TLF_SLEEPING + stw r12,TI_LOCAL_FLAGS(r9) + lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ + rlwinm r9,r9,0,~MSR_EE + lwz r12,_LINK(r11) /* and return to address in LR */ + b fast_exception_return #endif /* @@ -300,7 +343,12 @@ syscall_dotrace: stw r0,_TRAP(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_syscall_trace_enter - lwz r0,GPR0(r1) /* Restore original registers */ + /* + * Restore argument registers possibly just changed. + * We use the return value of do_syscall_trace_enter + * for call number to look up in the table (r0). + */ + mr r0,r3 lwz r3,GPR3(r1) lwz r4,GPR4(r1) lwz r5,GPR5(r1) @@ -669,7 +717,7 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED) + andi. r0,r9,_TIF_USER_WORK_MASK bne do_work restore_user: @@ -860,17 +908,90 @@ exc_exit_restart_end: exc_lvl_rfi; \ b .; /* prevent prefetch past exc_lvl_rfi */ +#define RESTORE_xSRR(exc_lvl_srr0, exc_lvl_srr1) \ + lwz r9,_##exc_lvl_srr0(r1); \ + lwz r10,_##exc_lvl_srr1(r1); \ + mtspr SPRN_##exc_lvl_srr0,r9; \ + mtspr SPRN_##exc_lvl_srr1,r10; + +#if defined(CONFIG_FSL_BOOKE) +#ifdef CONFIG_PHYS_64BIT +#define RESTORE_MAS7 \ + lwz r11,MAS7(r1); \ + mtspr SPRN_MAS7,r11; +#else +#define RESTORE_MAS7 +#endif /* CONFIG_PHYS_64BIT */ +#define RESTORE_MMU_REGS \ + lwz r9,MAS0(r1); \ + lwz r10,MAS1(r1); \ + lwz r11,MAS2(r1); \ + mtspr SPRN_MAS0,r9; \ + lwz r9,MAS3(r1); \ + mtspr SPRN_MAS1,r10; \ + lwz r10,MAS6(r1); \ + mtspr SPRN_MAS2,r11; \ + mtspr SPRN_MAS3,r9; \ + mtspr SPRN_MAS6,r10; \ + RESTORE_MAS7; +#elif defined(CONFIG_44x) +#define RESTORE_MMU_REGS \ + lwz r9,MMUCR(r1); \ + mtspr SPRN_MMUCR,r9; +#else +#define RESTORE_MMU_REGS +#endif + +#ifdef CONFIG_40x .globl ret_from_crit_exc ret_from_crit_exc: + mfspr r9,SPRN_SPRG3 + lis r10,saved_ksp_limit@ha; + lwz r10,saved_ksp_limit@l(r10); + tovirt(r9,r9); + stw r10,KSP_LIMIT(r9) + lis r9,crit_srr0@ha; + lwz r9,crit_srr0@l(r9); + lis r10,crit_srr1@ha; + lwz r10,crit_srr1@l(r10); + mtspr SPRN_SRR0,r9; + mtspr SPRN_SRR1,r10; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, RFCI) +#endif /* CONFIG_40x */ #ifdef CONFIG_BOOKE + .globl ret_from_crit_exc +ret_from_crit_exc: + mfspr r9,SPRN_SPRG3 + lwz r10,SAVED_KSP_LIMIT(r1) + stw r10,KSP_LIMIT(r9) + RESTORE_xSRR(SRR0,SRR1); + RESTORE_MMU_REGS; + RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, RFCI) + .globl ret_from_debug_exc ret_from_debug_exc: + mfspr r9,SPRN_SPRG3 + lwz r10,SAVED_KSP_LIMIT(r1) + stw r10,KSP_LIMIT(r9) + lwz r9,THREAD_INFO-THREAD(r9) + rlwinm r10,r1,0,0,(31-THREAD_SHIFT) + lwz r10,TI_PREEMPT(r10) + stw r10,TI_PREEMPT(r9) + RESTORE_xSRR(SRR0,SRR1); + RESTORE_xSRR(CSRR0,CSRR1); + RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, RFDI) .globl ret_from_mcheck_exc ret_from_mcheck_exc: + mfspr r9,SPRN_SPRG3 + lwz r10,SAVED_KSP_LIMIT(r1) + stw r10,KSP_LIMIT(r9) + RESTORE_xSRR(SRR0,SRR1); + RESTORE_xSRR(CSRR0,CSRR1); + RESTORE_xSRR(DSRR0,DSRR1); + RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, RFMCI) #endif /* CONFIG_BOOKE */ @@ -926,7 +1047,7 @@ recheck: lwz r9,TI_FLAGS(r9) andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched - andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK + andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user do_user_signal: /* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE @@ -939,8 +1060,8 @@ do_user_signal: /* r10 contains MSR_KERNEL here */ SAVE_NVGPRS(r1) rlwinm r3,r3,0,0,30 stw r3,_TRAP(r1) -2: li r3,0 - addi r4,r1,STACK_FRAME_OVERHEAD +2: addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r9 bl do_signal REST_NVGPRS(r1) b recheck diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2f511a969d2c..2d802e97097c 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -214,7 +214,12 @@ syscall_dotrace: bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl .do_syscall_trace_enter - ld r0,GPR0(r1) /* Restore original registers */ + /* + * Restore argument registers possibly just changed. + * We use the return value of do_syscall_trace_enter + * for the call number to look up in the table (r0). + */ + mr r0,r3 ld r3,GPR3(r1) ld r4,GPR4(r1) ld r5,GPR5(r1) @@ -354,6 +359,11 @@ _GLOBAL(_switch) mflr r20 /* Return to switch caller */ mfmsr r22 li r0, MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r0,r0,MSR_VSX@h /* Disable VSX */ +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif /* CONFIG_VSX */ #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION oris r0,r0,MSR_VEC@h /* Disable altivec */ @@ -384,16 +394,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) ld r8,KSP(r4) /* new stack pointer */ BEGIN_FTR_SECTION - b 2f -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) -BEGIN_FTR_SECTION + BEGIN_FTR_SECTION_NESTED(95) clrrdi r6,r8,28 /* get its ESID */ clrrdi r9,r1,28 /* get current sp ESID */ -END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) -BEGIN_FTR_SECTION + FTR_SECTION_ELSE_NESTED(95) clrrdi r6,r8,40 /* get its 1T ESID */ clrrdi r9,r1,40 /* get current sp 1T ESID */ -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95) +FTR_SECTION_ELSE + b 2f +ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB) clrldi. r0,r6,2 /* is new ESID c00000000? */ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ cror eq,4*cr1+eq,eq @@ -633,8 +643,7 @@ user_work: b .ret_from_except_lite 1: bl .save_nvgprs - li r3,0 - addi r4,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_FRAME_OVERHEAD bl .do_signal b .ret_from_except diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 821e152e093c..a088c064ae40 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -24,6 +24,29 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#ifdef CONFIG_VSX +#define REST_32FPVSRS(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_32FPRS(n,base); \ + b 3f; \ +2: REST_32VSRS(n,c,base); \ +3: + +#define SAVE_32FPVSRS(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + SAVE_32FPRS(n,base); \ + b 3f; \ +2: SAVE_32VSRS(n,c,base); \ +3: +#else +#define REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) +#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) +#endif + /* * This task wants to use the FPU now. * On UP, disable FP for the task which had the FPU previously, @@ -34,6 +57,11 @@ _GLOBAL(load_up_fpu) mfmsr r5 ori r5,r5,MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r5,r5,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif SYNC MTMSRD(r5) /* enable use of fpu now */ isync @@ -50,7 +78,7 @@ _GLOBAL(load_up_fpu) beq 1f toreal(r4) addi r4,r4,THREAD /* want last_task_used_math->thread */ - SAVE_32FPRS(0, r4) + SAVE_32FPVSRS(0, r5, r4) mffs fr0 stfd fr0,THREAD_FPSCR(r4) PPC_LL r5,PT_REGS(r4) @@ -77,7 +105,7 @@ _GLOBAL(load_up_fpu) #endif lfd fr0,THREAD_FPSCR(r5) MTFSF_L(fr0) - REST_32FPRS(0, r5) + REST_32FPVSRS(0, r4, r5) #ifndef CONFIG_SMP subi r4,r5,THREAD fromreal(r4) @@ -85,7 +113,7 @@ _GLOBAL(load_up_fpu) #endif /* CONFIG_SMP */ /* restore registers and return */ /* we haven't used ctr or xer or lr */ - b fast_exception_return + blr /* * giveup_fpu(tsk) @@ -96,6 +124,11 @@ _GLOBAL(load_up_fpu) _GLOBAL(giveup_fpu) mfmsr r5 ori r5,r5,MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r5,r5,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif SYNC_601 ISYNC_601 MTMSRD(r5) /* enable use of fpu now */ @@ -106,7 +139,7 @@ _GLOBAL(giveup_fpu) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r5,0 - SAVE_32FPRS(0, r3) + SAVE_32FPVSRS(0, r4 ,r3) mffs fr0 stfd fr0,THREAD_FPSCR(r3) beq 1f diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 785af9b56591..99ee2f0f0f2b 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -421,8 +421,10 @@ BEGIN_FTR_SECTION b ProgramCheck END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) EXCEPTION_PROLOG - bne load_up_fpu /* if from user, just load it up */ - addi r3,r1,STACK_FRAME_OVERHEAD + beq 1f + bl load_up_fpu /* if from user, just load it up */ + b fast_exception_return +1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) /* Decrementer */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 8552e67e3a8b..56d8e5d90c5b 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -93,6 +93,12 @@ _ENTRY(crit_r10) .space 4 _ENTRY(crit_r11) .space 4 +_ENTRY(crit_srr0) + .space 4 +_ENTRY(crit_srr1) + .space 4 +_ENTRY(saved_ksp_limit) + .space 4 /* * Exception vector entry code. This code runs with address translation @@ -148,14 +154,14 @@ _ENTRY(crit_r11) mfcr r10; /* save CR in r10 for now */\ mfspr r11,SPRN_SRR3; /* check whether user or kernel */\ andi. r11,r11,MSR_PR; \ - lis r11,critical_stack_top@h; \ - ori r11,r11,critical_stack_top@l; \ + lis r11,critirq_ctx@ha; \ + tophys(r11,r11); \ + lwz r11,critirq_ctx@l(r11); \ beq 1f; \ /* COMING FROM USER MODE */ \ mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ - addi r11,r11,THREAD_SIZE; \ -1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\ +1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\ tophys(r11,r11); \ stw r10,_CCR(r11); /* save various registers */\ stw r12,GPR12(r11); \ @@ -996,16 +1002,6 @@ empty_zero_page: swapper_pg_dir: .space PGD_TABLE_SIZE - -/* Stack for handling critical exceptions from kernel mode */ - .section .bss - .align 12 -exception_stack_bottom: - .space 4096 -critical_stack_top: - .globl exception_stack_top -exception_stack_top: - /* Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 22b5d2c459a3..f3a1ea9d7fe4 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -293,119 +293,9 @@ interrupt_base: MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception) /* Data Storage Interrupt */ - START_EXCEPTION(DataStorage) - mtspr SPRN_SPRG0, r10 /* Save some working registers */ - mtspr SPRN_SPRG1, r11 - mtspr SPRN_SPRG4W, r12 - mtspr SPRN_SPRG5W, r13 - mfcr r11 - mtspr SPRN_SPRG7W, r11 - - /* - * Check if it was a store fault, if not then bail - * because a user tried to access a kernel or - * read-protected page. Otherwise, get the - * offending address and handle it. - */ - mfspr r10, SPRN_ESR - andis. r10, r10, ESR_ST@h - beq 2f - - mfspr r10, SPRN_DEAR /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - - mfspr r12,SPRN_MMUCR - rlwinm r12,r12,0,0,23 /* Clear TID */ - - b 4f - - /* Get the PGD for the current thread */ -3: - mfspr r11,SPRN_SPRG3 - lwz r11,PGDIR(r11) - - /* Load PID into MMUCR TID */ - mfspr r12,SPRN_MMUCR /* Get MMUCR */ - mfspr r13,SPRN_PID /* Get PID */ - rlwimi r12,r13,0,24,31 /* Set TID */ - -4: - mtspr SPRN_MMUCR,r12 - - rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ - lwzx r11, r12, r11 /* Get pgd/pmd entry */ - rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ - beq 2f /* Bail if no table */ - - rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ - lwz r11, 4(r12) /* Get pte entry */ - - andi. r13, r11, _PAGE_RW /* Is it writeable? */ - beq 2f /* Bail if not */ - - /* Update 'changed'. - */ - ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE - stw r11, 4(r12) /* Update Linux page table */ - - li r13, PPC44x_TLB_SR@l /* Set SR */ - rlwimi r13, r11, 29, 29, 29 /* SX = _PAGE_HWEXEC */ - rlwimi r13, r11, 0, 30, 30 /* SW = _PAGE_RW */ - rlwimi r13, r11, 29, 28, 28 /* UR = _PAGE_USER */ - rlwimi r12, r11, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */ - rlwimi r12, r11, 29, 30, 30 /* (_PAGE_USER>>3)->r12 */ - and r12, r12, r11 /* HWEXEC/RW & USER */ - rlwimi r13, r12, 0, 26, 26 /* UX = HWEXEC & USER */ - rlwimi r13, r12, 3, 27, 27 /* UW = RW & USER */ - - rlwimi r11,r13,0,26,31 /* Insert static perms */ - - /* - * Clear U0-U3 and WL1 IL1I IL1D IL2I IL2D bits which are added - * on newer 440 cores like the 440x6 used on AMCC 460EX/460GT (see - * include/asm-powerpc/pgtable-ppc32.h for details). - */ - rlwinm r11,r11,0,20,10 - - /* find the TLB index that caused the fault. It has to be here. */ - tlbsx r10, 0, r10 - - tlbwe r11, r10, PPC44x_TLB_ATTRIB /* Write ATTRIB */ - - /* Done...restore registers and get out of here. - */ - mfspr r11, SPRN_SPRG7R - mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R + DATA_STORAGE_EXCEPTION - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 - rfi /* Force context change */ - -2: - /* - * The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ - mfspr r11, SPRN_SPRG7R - mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R - - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 - b data_access - - /* Instruction Storage Interrupt */ + /* Instruction Storage Interrupt */ INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ @@ -423,7 +313,6 @@ interrupt_base: #else EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif - /* System Call Interrupt */ START_EXCEPTION(SystemCall) NORMAL_EXCEPTION_PROLOG @@ -484,18 +373,57 @@ interrupt_base: 4: mtspr SPRN_MMUCR,r12 + /* Mask of required permission bits. Note that while we + * do copy ESR:ST to _PAGE_RW position as trying to write + * to an RO page is pretty common, we don't do it with + * _PAGE_DIRTY. We could do it, but it's a fairly rare + * event so I'd rather take the overhead when it happens + * rather than adding an instruction here. We should measure + * whether the whole thing is worth it in the first place + * as we could avoid loading SPRN_ESR completely in the first + * place... + * + * TODO: Is it worth doing that mfspr & rlwimi in the first + * place or can we save a couple of instructions here ? + */ + mfspr r12,SPRN_ESR + li r13,_PAGE_PRESENT|_PAGE_ACCESSED + rlwimi r13,r12,10,30,30 + + /* Load the PTE */ rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ lwzx r11, r12, r11 /* Get pgd/pmd entry */ rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ beq 2f /* Bail if no table */ rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ - lwz r11, 4(r12) /* Get pte entry */ - andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ - beq 2f /* Bail if not present */ + lwz r11, 0(r12) /* Get high word of pte entry */ + lwz r12, 4(r12) /* Get low word of pte entry */ - ori r11, r11, _PAGE_ACCESSED - stw r11, 4(r12) + lis r10,tlb_44x_index@ha + + andc. r13,r13,r12 /* Check permission */ + + /* Load the next available TLB index */ + lwz r13,tlb_44x_index@l(r10) + + bne 2f /* Bail if permission mismach */ + + /* Increment, rollover, and store TLB index */ + addi r13,r13,1 + + /* Compare with watermark (instruction gets patched) */ + .globl tlb_44x_patch_hwater_D +tlb_44x_patch_hwater_D: + cmpwi 0,r13,1 /* reserve entries */ + ble 5f + li r13,0 +5: + /* Store the next available TLB index */ + stw r13,tlb_44x_index@l(r10) + + /* Re-load the faulting address */ + mfspr r10,SPRN_DEAR /* Jump to common tlb load */ b finish_tlb_load @@ -510,7 +438,7 @@ interrupt_base: mfspr r12, SPRN_SPRG4R mfspr r11, SPRN_SPRG1 mfspr r10, SPRN_SPRG0 - b data_access + b DataStorage /* Instruction TLB Error Interrupt */ /* @@ -554,18 +482,42 @@ interrupt_base: 4: mtspr SPRN_MMUCR,r12 + /* Make up the required permissions */ + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC + rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ lwzx r11, r12, r11 /* Get pgd/pmd entry */ rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ beq 2f /* Bail if no table */ rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ - lwz r11, 4(r12) /* Get pte entry */ - andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ - beq 2f /* Bail if not present */ + lwz r11, 0(r12) /* Get high word of pte entry */ + lwz r12, 4(r12) /* Get low word of pte entry */ - ori r11, r11, _PAGE_ACCESSED - stw r11, 4(r12) + lis r10,tlb_44x_index@ha + + andc. r13,r13,r12 /* Check permission */ + + /* Load the next available TLB index */ + lwz r13,tlb_44x_index@l(r10) + + bne 2f /* Bail if permission mismach */ + + /* Increment, rollover, and store TLB index */ + addi r13,r13,1 + + /* Compare with watermark (instruction gets patched) */ + .globl tlb_44x_patch_hwater_I +tlb_44x_patch_hwater_I: + cmpwi 0,r13,1 /* reserve entries */ + ble 5f + li r13,0 +5: + /* Store the next available TLB index */ + stw r13,tlb_44x_index@l(r10) + + /* Re-load the faulting address */ + mfspr r10,SPRN_SRR0 /* Jump to common TLB load point */ b finish_tlb_load @@ -587,86 +539,40 @@ interrupt_base: /* * Local functions - */ - /* - * Data TLB exceptions will bail out to this point - * if they can't resolve the lightweight TLB fault. - */ -data_access: - NORMAL_EXCEPTION_PROLOG - mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ - stw r5,_ESR(r11) - mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ - EXC_XFER_EE_LITE(0x0300, handle_page_fault) + */ /* * Both the instruction and data TLB miss get to this * point to load the TLB. * r10 - EA of fault - * r11 - available to use - * r12 - Pointer to the 64-bit PTE - * r13 - available to use + * r11 - PTE high word value + * r12 - PTE low word value + * r13 - TLB index * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ finish_tlb_load: - /* - * We set execute, because we don't have the granularity to - * properly set this at the page level (Linux problem). - * If shared is set, we cause a zero PID->TID load. - * Many of these bits are software only. Bits we don't set - * here we (properly should) assume have the appropriate value. - */ - - /* Load the next available TLB index */ - lis r13, tlb_44x_index@ha - lwz r13, tlb_44x_index@l(r13) - /* Load the TLB high watermark */ - lis r11, tlb_44x_hwater@ha - lwz r11, tlb_44x_hwater@l(r11) - - /* Increment, rollover, and store TLB index */ - addi r13, r13, 1 - cmpw 0, r13, r11 /* reserve entries */ - ble 7f - li r13, 0 -7: - /* Store the next available TLB index */ - lis r11, tlb_44x_index@ha - stw r13, tlb_44x_index@l(r11) - - lwz r11, 0(r12) /* Get MS word of PTE */ - lwz r12, 4(r12) /* Get LS word of PTE */ - rlwimi r11, r12, 0, 0 , 19 /* Insert RPN */ - tlbwe r11, r13, PPC44x_TLB_XLAT /* Write XLAT */ + /* Combine RPN & ERPN an write WS 0 */ + rlwimi r11,r12,0,0,19 + tlbwe r11,r13,PPC44x_TLB_XLAT /* - * Create PAGEID. This is the faulting address, + * Create WS1. This is the faulting address (EPN), * page size, and valid flag. */ - li r11, PPC44x_TLB_VALID | PPC44x_TLB_4K - rlwimi r10, r11, 0, 20, 31 /* Insert valid and page size */ - tlbwe r10, r13, PPC44x_TLB_PAGEID /* Write PAGEID */ - - li r10, PPC44x_TLB_SR@l /* Set SR */ - rlwimi r10, r12, 0, 30, 30 /* Set SW = _PAGE_RW */ - rlwimi r10, r12, 29, 29, 29 /* SX = _PAGE_HWEXEC */ - rlwimi r10, r12, 29, 28, 28 /* UR = _PAGE_USER */ - rlwimi r11, r12, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */ - and r11, r12, r11 /* HWEXEC & USER */ - rlwimi r10, r11, 0, 26, 26 /* UX = HWEXEC & USER */ - - rlwimi r12, r10, 0, 26, 31 /* Insert static perms */ - - /* - * Clear U0-U3 and WL1 IL1I IL1D IL2I IL2D bits which are added - * on newer 440 cores like the 440x6 used on AMCC 460EX/460GT (see - * include/asm-powerpc/pgtable-ppc32.h for details). - */ - rlwinm r12, r12, 0, 20, 10 - - tlbwe r12, r13, PPC44x_TLB_ATTRIB /* Write ATTRIB */ + li r11,PPC44x_TLB_VALID | PPC44x_TLB_4K + rlwimi r10,r11,0,20,31 /* Insert valid and page size*/ + tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ + + /* And WS 2 */ + li r10,0xf85 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + and r11,r12,r10 /* Mask PTE bits to keep */ + andi. r10,r12,_PAGE_USER /* User page ? */ + beq 1f /* nope, leave U bits empty */ + rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ /* Done...restore registers and get out of here. */ @@ -742,15 +648,6 @@ empty_zero_page: swapper_pg_dir: .space PGD_TABLE_SIZE -/* Reserved 4k for the critical exception stack & 4k for the machine - * check stack per CPU for kernel mode exceptions */ - .section .bss - .align 12 -exception_stack_bottom: - .space BOOKE_EXCEPTION_STACK_SIZE - .globl exception_stack_top -exception_stack_top: - /* * Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 25e84c0e1166..cc8fb474d520 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -275,7 +275,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) . = 0xf00 b performance_monitor_pSeries - STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable) + . = 0xf20 + b altivec_unavailable_pSeries + + . = 0xf40 + b vsx_unavailable_pSeries #ifdef CONFIG_CBE_RAS HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error) @@ -295,6 +299,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) /* moved from 0xf00 */ STD_EXCEPTION_PSERIES(., performance_monitor) + STD_EXCEPTION_PSERIES(., altivec_unavailable) + STD_EXCEPTION_PSERIES(., vsx_unavailable) /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -739,7 +745,8 @@ fp_unavailable_common: ENABLE_INTS bl .kernel_fp_unavailable_exception BUG_OPCODE -1: b .load_up_fpu +1: bl .load_up_fpu + b fast_exception_return .align 7 .globl altivec_unavailable_common @@ -747,7 +754,10 @@ altivec_unavailable_common: EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION - bne .load_up_altivec /* if from user, just load it up */ + beq 1f + bl .load_up_altivec + b fast_exception_return +1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif bl .save_nvgprs @@ -827,9 +837,70 @@ _STATIC(load_up_altivec) std r4,0(r3) #endif /* CONFIG_SMP */ /* restore registers and return */ - b fast_exception_return + blr #endif /* CONFIG_ALTIVEC */ + .align 7 + .globl vsx_unavailable_common +vsx_unavailable_common: + EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + bne .load_up_vsx +1: +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .vsx_unavailable_exception + b .ret_from_except + +#ifdef CONFIG_VSX +/* + * load_up_vsx(unused, unused, tsk) + * Disable VSX for the task which had it previously, + * and save its vector registers in its thread_struct. + * Reuse the fp and vsx saves, but first check to see if they have + * been saved already. + * On entry: r13 == 'current' && last_task_used_vsx != 'current' + */ +_STATIC(load_up_vsx) +/* Load FP and VSX registers if they haven't been done yet */ + andi. r5,r12,MSR_FP + beql+ load_up_fpu /* skip if already loaded */ + andis. r5,r12,MSR_VEC@h + beql+ load_up_altivec /* skip if already loaded */ + +#ifndef CONFIG_SMP + ld r3,last_task_used_vsx@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Disable VSX for last_task_used_vsx */ + addi r4,r4,THREAD + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r6,MSR_VSX@h + andc r6,r4,r6 + std r6,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + ld r4,PACACURRENT(r13) + addi r4,r4,THREAD /* Get THREAD */ + li r6,1 + stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ + /* enable use of VSX after return */ + oris r12,r12,MSR_VSX@h + std r12,_MSR(r1) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + ld r4,PACACURRENT(r13) + std r4,0(r3) +#endif /* CONFIG_SMP */ + b fast_exception_return +#endif /* CONFIG_VSX */ + /* * Hash table stuff */ @@ -1127,7 +1198,6 @@ _GLOBAL(generic_secondary_smp_init) 3: HMT_LOW lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ /* start. */ - sync #ifndef CONFIG_SMP b 3b /* Never go on non-SMP */ @@ -1135,6 +1205,8 @@ _GLOBAL(generic_secondary_smp_init) cmpwi 0,r23,0 beq 3b /* Loop until told to go */ + sync /* order paca.run and cur_cpu_spec */ + /* See if we need to call a cpu state restore handler */ LOAD_REG_IMMEDIATE(r23, cur_cpu_spec) ld r23,0(r23) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index aefafc6330c9..fce2df988504 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -43,9 +43,7 @@ SAVE_2GPRS(7, r11) /* To handle the additional exception priority levels on 40x and Book-E - * processors we allocate a 4k stack per additional priority level. The various - * head_xxx.S files allocate space (exception_stack_top) for each priority's - * stack times the number of CPUs + * processors we allocate a stack per additional priority level. * * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check @@ -61,36 +59,37 @@ * going to critical or their own debug level we aren't currently * providing configurations that micro-optimize space usage. */ -#ifdef CONFIG_44x -#define NUM_EXCEPTION_LVLS 2 -#else -#define NUM_EXCEPTION_LVLS 3 -#endif -#define BOOKE_EXCEPTION_STACK_SIZE (4096 * NUM_EXCEPTION_LVLS) /* CRIT_SPRG only used in critical exception handling */ #define CRIT_SPRG SPRN_SPRG2 /* MCHECK_SPRG only used in machine check exception handling */ #define MCHECK_SPRG SPRN_SPRG6W -#define MCHECK_STACK_TOP (exception_stack_top - 4096) -#define CRIT_STACK_TOP (exception_stack_top) +#define MCHECK_STACK_BASE mcheckirq_ctx +#define CRIT_STACK_BASE critirq_ctx -/* only on e200 for now */ -#define DEBUG_STACK_TOP (exception_stack_top - 8192) +/* only on e500mc/e200 */ +#define DEBUG_STACK_BASE dbgirq_ctx +#ifdef CONFIG_PPC_E500MC +#define DEBUG_SPRG SPRN_SPRG9 +#else #define DEBUG_SPRG SPRN_SPRG6W +#endif + +#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) #ifdef CONFIG_SMP #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ mfspr r8,SPRN_PIR; \ - mulli r8,r8,BOOKE_EXCEPTION_STACK_SIZE; \ - neg r8,r8; \ - addis r8,r8,level##_STACK_TOP@ha; \ - addi r8,r8,level##_STACK_TOP@l + slwi r8,r8,2; \ + addis r8,r8,level##_STACK_BASE@ha; \ + lwz r8,level##_STACK_BASE@l(r8); \ + addi r8,r8,EXC_LVL_FRAME_OVERHEAD; #else #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ - lis r8,level##_STACK_TOP@h; \ - ori r8,r8,level##_STACK_TOP@l + lis r8,level##_STACK_BASE@ha; \ + lwz r8,level##_STACK_BASE@l(r8); \ + addi r8,r8,EXC_LVL_FRAME_OVERHEAD; #endif /* @@ -104,22 +103,36 @@ #define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \ mtspr exc_level##_SPRG,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ - stw r10,GPR10-INT_FRAME_SIZE(r8); \ - stw r11,GPR11-INT_FRAME_SIZE(r8); \ - mfcr r10; /* save CR in r10 for now */\ - mfspr r11,exc_level_srr1; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ - mr r11,r8; \ - mfspr r8,exc_level##_SPRG; \ - beq 1f; \ - /* COMING FROM USER MODE */ \ + stw r9,GPR9(r8); /* save various registers */\ + mfcr r9; /* save CR in r9 for now */\ + stw r10,GPR10(r8); \ + stw r11,GPR11(r8); \ + stw r9,_CCR(r8); /* save CR on stack */\ + mfspr r10,exc_level_srr1; /* check whether user or kernel */\ + andi. r10,r10,MSR_PR; \ mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ - addi r11,r11,THREAD_SIZE; \ -1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\ - stw r10,_CCR(r11); /* save various registers */\ - stw r12,GPR12(r11); \ + addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ + beq 1f; \ + /* COMING FROM USER MODE */ \ + stw r9,_CCR(r11); /* save CR */\ + lwz r10,GPR10(r8); /* copy regs from exception stack */\ + lwz r9,GPR9(r8); \ + stw r10,GPR10(r11); \ + lwz r10,GPR11(r8); \ stw r9,GPR9(r11); \ + stw r10,GPR11(r11); \ + b 2f; \ + /* COMING FROM PRIV MODE */ \ +1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \ + lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \ + stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \ + stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \ + lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \ + stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \ + mr r11,r8; \ +2: mfspr r8,exc_level##_SPRG; \ + stw r12,GPR12(r11); /* save various registers */\ mflr r10; \ stw r10,_LINK(r11); \ mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\ @@ -231,7 +244,7 @@ label: * the code where the exception occurred (since exception entry \ * doesn't turn off DE automatically). We simulate the effect \ * of turning off DE on entry to an exception handler by turning \ - * off DE in the CSRR1 value and clearing the debug status. \ + * off DE in the DSRR1 value and clearing the debug status. \ */ \ mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \ andis. r10,r10,DBSR_IC@h; \ @@ -262,17 +275,17 @@ label: lwz r12,GPR12(r11); \ mtspr DEBUG_SPRG,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \ - lwz r10,GPR10-INT_FRAME_SIZE(r8); \ - lwz r11,GPR11-INT_FRAME_SIZE(r8); \ + lwz r10,GPR10(r8); \ + lwz r11,GPR11(r8); \ mfspr r8,DEBUG_SPRG; \ \ RFDI; \ b .; \ \ - /* continue normal handling for a critical exception... */ \ + /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) + EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ @@ -315,8 +328,8 @@ label: lwz r12,GPR12(r11); \ mtspr CRIT_SPRG,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */ \ - lwz r10,GPR10-INT_FRAME_SIZE(r8); \ - lwz r11,GPR11-INT_FRAME_SIZE(r8); \ + lwz r10,GPR10(r8); \ + lwz r11,GPR11(r8); \ mfspr r8,CRIT_SPRG; \ \ rfci; \ @@ -327,6 +340,14 @@ label: addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) +#define DATA_STORAGE_EXCEPTION \ + START_EXCEPTION(DataStorage) \ + NORMAL_EXCEPTION_PROLOG; \ + mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ + stw r5,_ESR(r11); \ + mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ + EXC_XFER_EE_LITE(0x0300, handle_page_fault) + #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ NORMAL_EXCEPTION_PROLOG; \ @@ -363,8 +384,31 @@ label: #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ NORMAL_EXCEPTION_PROLOG; \ - bne load_up_fpu; /* if from user, just load it up */ \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ + beq 1f; \ + bl load_up_fpu; /* if from user, just load it up */ \ + b fast_exception_return; \ +1: addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) +#ifndef __ASSEMBLY__ +struct exception_regs { + unsigned long mas0; + unsigned long mas1; + unsigned long mas2; + unsigned long mas3; + unsigned long mas6; + unsigned long mas7; + unsigned long srr0; + unsigned long srr1; + unsigned long csrr0; + unsigned long csrr1; + unsigned long dsrr0; + unsigned long dsrr1; + unsigned long saved_ksp_limit; +}; + +/* ensure this structure is always sized to a multiple of the stack alignment */ +#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16) + +#endif /* __ASSEMBLY__ */ #endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index e581524d85bc..3cb52fa0eda3 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -39,6 +39,7 @@ #include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#include <asm/cache.h> #include "head_booke.h" /* As with the other PowerPC ports, it is expected that when code @@ -150,16 +151,11 @@ skpinv: addi r6,r6,1 /* Increment */ /* Invalidate TLB0 */ li r6,0x04 tlbivax 0,r6 -#ifdef CONFIG_SMP - tlbsync -#endif + TLBSYNC /* Invalidate TLB1 */ li r6,0x0c tlbivax 0,r6 -#ifdef CONFIG_SMP - tlbsync -#endif - msync + TLBSYNC /* 3. Setup a temp mapping and jump to it */ andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */ @@ -237,10 +233,7 @@ skpinv: addi r6,r6,1 /* Increment */ /* Invalidate TLB1 */ li r9,0x0c tlbivax 0,r9 -#ifdef CONFIG_SMP - tlbsync -#endif - msync + TLBSYNC /* 6. Setup KERNELBASE mapping in TLB1[0] */ lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */ @@ -282,10 +275,7 @@ skpinv: addi r6,r6,1 /* Increment */ /* Invalidate TLB1 */ li r9,0x0c tlbivax 0,r9 -#ifdef CONFIG_SMP - tlbsync -#endif - msync + TLBSYNC /* Establish the interrupt vector offsets */ SET_IVOR(0, CriticalInput); @@ -304,7 +294,7 @@ skpinv: addi r6,r6,1 /* Increment */ SET_IVOR(13, DataTLBError); SET_IVOR(14, InstructionTLBError); SET_IVOR(15, DebugDebug); -#if defined(CONFIG_E500) +#if defined(CONFIG_E500) && !defined(CONFIG_PPC_E500MC) SET_IVOR(15, DebugCrit); #endif SET_IVOR(32, SPEUnavailable); @@ -313,6 +303,9 @@ skpinv: addi r6,r6,1 /* Increment */ #ifndef CONFIG_E200 SET_IVOR(35, PerformanceMonitor); #endif +#ifdef CONFIG_PPC_E500MC + SET_IVOR(36, Doorbell); +#endif /* Establish the interrupt vector base */ lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ @@ -479,90 +472,16 @@ interrupt_base: /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) - mtspr SPRN_SPRG0, r10 /* Save some working registers */ - mtspr SPRN_SPRG1, r11 - mtspr SPRN_SPRG4W, r12 - mtspr SPRN_SPRG5W, r13 - mfcr r11 - mtspr SPRN_SPRG7W, r11 - - /* - * Check if it was a store fault, if not then bail - * because a user tried to access a kernel or - * read-protected page. Otherwise, get the - * offending address and handle it. - */ - mfspr r10, SPRN_ESR - andis. r10, r10, ESR_ST@h - beq 2f - - mfspr r10, SPRN_DEAR /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw 0, r10, r11 - bge 2f - - /* Get the PGD for the current thread */ -3: - mfspr r11,SPRN_SPRG3 - lwz r11,PGDIR(r11) -4: - FIND_PTE - - /* Are _PAGE_USER & _PAGE_RW set & _PAGE_HWWRITE not? */ - andi. r13, r11, _PAGE_RW|_PAGE_USER|_PAGE_HWWRITE - cmpwi 0, r13, _PAGE_RW|_PAGE_USER - bne 2f /* Bail if not */ - - /* Update 'changed'. */ - ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE - stw r11, PTE_FLAGS_OFFSET(r12) /* Update Linux page table */ - - /* MAS2 not updated as the entry does exist in the tlb, this - fault taken to detect state transition (eg: COW -> DIRTY) - */ - andi. r11, r11, _PAGE_HWEXEC - rlwimi r11, r11, 31, 27, 27 /* SX <- _PAGE_HWEXEC */ - ori r11, r11, (MAS3_UW|MAS3_SW|MAS3_UR|MAS3_SR)@l /* set static perms */ - - /* update search PID in MAS6, AS = 0 */ - mfspr r12, SPRN_PID0 - slwi r12, r12, 16 - mtspr SPRN_MAS6, r12 - - /* find the TLB index that caused the fault. It has to be here. */ - tlbsx 0, r10 - - /* only update the perm bits, assume the RPN is fine */ - mfspr r12, SPRN_MAS3 - rlwimi r12, r11, 0, 20, 31 - mtspr SPRN_MAS3,r12 - tlbwe - - /* Done...restore registers and get out of here. */ - mfspr r11, SPRN_SPRG7R - mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 - rfi /* Force context change */ - -2: - /* - * The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ - mfspr r11, SPRN_SPRG7R - mtcr r11 - mfspr r13, SPRN_SPRG5R - mfspr r12, SPRN_SPRG4R - mfspr r11, SPRN_SPRG1 - mfspr r10, SPRN_SPRG0 - b data_access + NORMAL_EXCEPTION_PROLOG + mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5,_ESR(r11) + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + andis. r10,r5,(ESR_ILK|ESR_DLK)@h + bne 1f + EXC_XFER_EE_LITE(0x0300, handle_page_fault) +1: + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x0300, CacheLockingException) /* Instruction Storage Interrupt */ INSTRUCTION_STORAGE_EXCEPTION @@ -641,15 +560,30 @@ interrupt_base: lwz r11,PGDIR(r11) 4: + /* Mask of required permission bits. Note that while we + * do copy ESR:ST to _PAGE_RW position as trying to write + * to an RO page is pretty common, we don't do it with + * _PAGE_DIRTY. We could do it, but it's a fairly rare + * event so I'd rather take the overhead when it happens + * rather than adding an instruction here. We should measure + * whether the whole thing is worth it in the first place + * as we could avoid loading SPRN_ESR completely in the first + * place... + * + * TODO: Is it worth doing that mfspr & rlwimi in the first + * place or can we save a couple of instructions here ? + */ + mfspr r12,SPRN_ESR + li r13,_PAGE_PRESENT|_PAGE_ACCESSED + rlwimi r13,r12,11,29,29 + FIND_PTE - andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ - beq 2f /* Bail if not present */ + andc. r13,r13,r11 /* Check permission */ + bne 2f /* Bail if permission mismach */ #ifdef CONFIG_PTE_64BIT lwz r13, 0(r12) #endif - ori r11, r11, _PAGE_ACCESSED - stw r11, PTE_FLAGS_OFFSET(r12) /* Jump to common tlb load */ b finish_tlb_load @@ -663,7 +597,7 @@ interrupt_base: mfspr r12, SPRN_SPRG4R mfspr r11, SPRN_SPRG1 mfspr r10, SPRN_SPRG0 - b data_access + b DataStorage /* Instruction TLB Error Interrupt */ /* @@ -701,15 +635,16 @@ interrupt_base: lwz r11,PGDIR(r11) 4: + /* Make up the required permissions */ + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC + FIND_PTE - andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ - beq 2f /* Bail if not present */ + andc. r13,r13,r11 /* Check permission */ + bne 2f /* Bail if permission mismach */ #ifdef CONFIG_PTE_64BIT lwz r13, 0(r12) #endif - ori r11, r11, _PAGE_ACCESSED - stw r11, PTE_FLAGS_OFFSET(r12) /* Jump to common TLB load point */ b finish_tlb_load @@ -750,10 +685,13 @@ interrupt_base: /* Performance Monitor */ EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) +#ifdef CONFIG_PPC_E500MC + EXCEPTION(0x2070, Doorbell, unknown_exception, EXC_XFER_EE) +#endif /* Debug Interrupt */ DEBUG_DEBUG_EXCEPTION -#if defined(CONFIG_E500) +#if defined(CONFIG_E500) && !defined(CONFIG_PPC_E500MC) DEBUG_CRIT_EXCEPTION #endif @@ -761,29 +699,13 @@ interrupt_base: * Local functions */ - /* - * Data TLB exceptions will bail out to this point - * if they can't resolve the lightweight TLB fault. - */ -data_access: - NORMAL_EXCEPTION_PROLOG - mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ - stw r5,_ESR(r11) - mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ - andis. r10,r5,(ESR_ILK|ESR_DLK)@h - bne 1f - EXC_XFER_EE_LITE(0x0300, handle_page_fault) -1: - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x0300, CacheLockingException) - /* - * Both the instruction and data TLB miss get to this * point to load the TLB. * r10 - EA of fault * r11 - TLB (info from Linux PTE) - * r12, r13 - available to use + * r12 - available to use + * r13 - upper bits of PTE (if PTE_64BIT) or available to use * CR5 - results of addr >= PAGE_OFFSET * MAS0, MAS1 - loaded with proper value when we get here * MAS2, MAS3 - will need additional info from Linux PTE @@ -805,20 +727,14 @@ finish_tlb_load: #endif mtspr SPRN_MAS2, r12 - bge 5, 1f - - /* is user addr */ - andi. r12, r11, (_PAGE_USER | _PAGE_HWWRITE | _PAGE_HWEXEC) + li r10, (_PAGE_HWEXEC | _PAGE_PRESENT) + rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ + and r12, r11, r10 andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ - srwi r10, r12, 1 - or r12, r12, r10 /* Copy user perms into supervisor */ - iseleq r12, 0, r12 - b 2f - - /* is kernel addr */ -1: rlwinm r12, r11, 31, 29, 29 /* Extract _PAGE_HWWRITE into SW */ - ori r12, r12, (MAS3_SX | MAS3_SR) - + slwi r10, r12, 1 + or r10, r10, r12 + iseleq r12, r12, r10 + #ifdef CONFIG_PTE_64BIT 2: rlwimi r12, r13, 24, 0, 7 /* grab RPN[32:39] */ rlwimi r12, r11, 24, 8, 19 /* grab RPN[40:51] */ @@ -1065,6 +981,52 @@ _GLOBAL(set_context) isync /* Force context change */ blr +_GLOBAL(flush_dcache_L1) + mfspr r3,SPRN_L1CFG0 + + rlwinm r5,r3,9,3 /* Extract cache block size */ + twlgti r5,1 /* Only 32 and 64 byte cache blocks + * are currently defined. + */ + li r4,32 + subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) - + * log2(number of ways) + */ + slw r5,r4,r5 /* r5 = cache block size */ + + rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */ + mulli r7,r7,13 /* An 8-way cache will require 13 + * loads per set. + */ + slw r7,r7,r6 + + /* save off HID0 and set DCFA */ + mfspr r8,SPRN_HID0 + ori r9,r8,HID0_DCFA@l + mtspr SPRN_HID0,r9 + isync + + lis r4,KERNELBASE@h + mtctr r7 + +1: lwz r3,0(r4) /* Load... */ + add r4,r4,r5 + bdnz 1b + + msync + lis r4,KERNELBASE@h + mtctr r7 + +1: dcbf 0,r4 /* ...and flush. */ + add r4,r4,r5 + bdnz 1b + + /* restore HID0 */ + mtspr SPRN_HID0,r8 + isync + + blr + /* * We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. @@ -1080,15 +1042,6 @@ empty_zero_page: swapper_pg_dir: .space PGD_TABLE_SIZE -/* Reserved 4k for the critical exception stack & 4k for the machine - * check stack per CPU for kernel mode exceptions */ - .section .bss - .align 12 -exception_stack_bottom: - .space BOOKE_EXCEPTION_STACK_SIZE * NR_CPUS - .globl exception_stack_top -exception_stack_top: - /* * Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 9971159c8040..9d42eb57aea3 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -53,7 +53,7 @@ static struct device ibmebus_bus_device = { /* fake "parent" device */ struct bus_type ibmebus_bus_type; /* These devices will automatically be added to the bus during init */ -static struct of_device_id __initdata builtin_matches[] = { +static struct of_device_id __initdata ibmebus_matches[] = { { .compatible = "IBM,lhca" }, { .compatible = "IBM,lhea" }, {}, @@ -82,7 +82,8 @@ static void ibmebus_free_coherent(struct device *dev, static dma_addr_t ibmebus_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) + enum dma_data_direction direction, + struct dma_attrs *attrs) { return (dma_addr_t)(ptr); } @@ -90,14 +91,16 @@ static dma_addr_t ibmebus_map_single(struct device *dev, static void ibmebus_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) + enum dma_data_direction direction, + struct dma_attrs *attrs) { return; } static int ibmebus_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction) + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct scatterlist *sg; int i; @@ -112,7 +115,8 @@ static int ibmebus_map_sg(struct device *dev, static void ibmebus_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) { return; } @@ -350,7 +354,7 @@ static int __init ibmebus_bus_init(void) return err; } - err = ibmebus_create_devices(builtin_matches); + err = ibmebus_create_devices(ibmebus_matches); if (err) { device_unregister(&ibmebus_bus_device); bus_unregister(&ibmebus_bus_type); diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index c3cf0e8f3ac1..d308a9f70f1b 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -60,7 +60,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(); + tick_nohz_stop_sched_tick(1); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 01bcd52bbf8e..019b02d8844f 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -153,7 +153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * address of current. R11 points to the exception frame (physical * address). We have to preserve r10. */ -_GLOBAL(power_save_6xx_restore) +_GLOBAL(power_save_ppc32_restore) lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */ stw r9,_NIP(r11) /* make it do a blr */ diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S new file mode 100644 index 000000000000..06304034b393 --- /dev/null +++ b/arch/powerpc/kernel/idle_e500.S @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Dave Liu <daveliu@freescale.com> + * copy from idle_6xx.S and modify for e500 based processor, + * implement the power_save function in idle. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + + .text + +_GLOBAL(e500_idle) + rlwinm r3,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */ + ori r4,r4,_TLF_NAPPING /* so when we take an exception */ + stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */ + + /* Check if we can nap or doze, put HID0 mask in r3 */ + lis r3,0 +BEGIN_FTR_SECTION + lis r3,HID0_DOZE@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) + +BEGIN_FTR_SECTION + /* Now check if user enabled NAP mode */ + lis r4,powersave_nap@ha + lwz r4,powersave_nap@l(r4) + cmpwi 0,r4,0 + beq 1f + stwu r1,-16(r1) + mflr r0 + stw r0,20(r1) + bl flush_dcache_L1 + lwz r0,20(r1) + addi r1,r1,16 + mtlr r0 + lis r3,HID0_NAP@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) +BEGIN_FTR_SECTION + msync + li r7,L2CSR0_L2FL@l + mtspr SPRN_L2CSR0,r7 +2: + mfspr r7,SPRN_L2CSR0 + andi. r4,r7,L2CSR0_L2FL@l + bne 2b +END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP) +1: + /* Go to NAP or DOZE now */ + mfspr r4,SPRN_HID0 + rlwinm r4,r4,0,~(HID0_DOZE|HID0_NAP|HID0_SLEEP) + or r4,r4,r3 + isync + mtspr SPRN_HID0,r4 + isync + + mfmsr r7 + oris r7,r7,MSR_WE@h + ori r7,r7,MSR_EE + msync + mtmsr r7 + isync +2: b 2b + +/* + * Return from NAP/DOZE mode, restore some CPU specific registers, + * r2 containing physical address of current. + * r11 points to the exception frame (physical address). + * We have to preserve r10. + */ +_GLOBAL(power_save_ppc32_restore) + lwz r9,_LINK(r11) /* interrupted in e500_idle */ + stw r9,_NIP(r11) /* make it do a blr */ + +#ifdef CONFIG_SMP + mfspr r12,SPRN_SPRG3 + lwz r11,TI_CPU(r12) /* get cpu number * 4 */ + slwi r11,r11,2 +#else + li r11,0 +#endif + b transfer_to_handler_cont diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0c663669bc32..550a19399bfa 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -49,6 +49,8 @@ static int novmerge = 1; static int protect4gb = 1; +static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); + static inline unsigned long iommu_num_pages(unsigned long vaddr, unsigned long slen) { @@ -186,10 +188,12 @@ static unsigned long iommu_range_alloc(struct device *dev, static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *page, unsigned int npages, enum dma_data_direction direction, - unsigned long mask, unsigned int align_order) + unsigned long mask, unsigned int align_order, + struct dma_attrs *attrs) { unsigned long entry, flags; dma_addr_t ret = DMA_ERROR_CODE; + int build_fail; spin_lock_irqsave(&(tbl->it_lock), flags); @@ -204,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ /* Put the TCEs in the HW table */ - ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK, - direction); + build_fail = ppc_md.tce_build(tbl, entry, npages, + (unsigned long)page & IOMMU_PAGE_MASK, + direction, attrs); + + /* ppc_md.tce_build() only returns non-zero for transient errors. + * Clean up the table bitmap in this case and return + * DMA_ERROR_CODE. For all other errors the functionality is + * not altered. + */ + if (unlikely(build_fail)) { + __iommu_free(tbl, ret, npages); + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return DMA_ERROR_CODE; + } /* Flush/invalidate TLB caches if necessary */ if (ppc_md.tce_flush) @@ -267,15 +283,15 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, spin_unlock_irqrestore(&(tbl->it_lock), flags); } -int iommu_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, unsigned long mask, - enum dma_data_direction direction) +int iommu_map_sg(struct device *dev, struct iommu_table *tbl, + struct scatterlist *sglist, int nelems, + unsigned long mask, enum dma_data_direction direction, + struct dma_attrs *attrs) { - struct iommu_table *tbl = dev->archdata.dma_data; dma_addr_t dma_next = 0, dma_addr; unsigned long flags; struct scatterlist *s, *outs, *segstart; - int outcount, incount, i; + int outcount, incount, i, build_fail = 0; unsigned int align; unsigned long handle; unsigned int max_seg_size; @@ -336,7 +352,11 @@ int iommu_map_sg(struct device *dev, struct scatterlist *sglist, npages, entry, dma_addr); /* Insert into HW table */ - ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, direction); + build_fail = ppc_md.tce_build(tbl, entry, npages, + vaddr & IOMMU_PAGE_MASK, + direction, attrs); + if(unlikely(build_fail)) + goto failure; /* If we are in an open segment, try merging */ if (segstart != s) { @@ -412,7 +432,8 @@ int iommu_map_sg(struct device *dev, struct scatterlist *sglist, void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct scatterlist *sg; unsigned long flags; @@ -554,7 +575,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) */ dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl, void *vaddr, size_t size, unsigned long mask, - enum dma_data_direction direction) + enum dma_data_direction direction, struct dma_attrs *attrs) { dma_addr_t dma_handle = DMA_ERROR_CODE; unsigned long uaddr; @@ -572,7 +593,8 @@ dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl, align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, - mask >> IOMMU_PAGE_SHIFT, align); + mask >> IOMMU_PAGE_SHIFT, align, + attrs); if (dma_handle == DMA_ERROR_CODE) { if (printk_ratelimit()) { printk(KERN_INFO "iommu_alloc failed, " @@ -587,7 +609,8 @@ dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl, } void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) + size_t size, enum dma_data_direction direction, + struct dma_attrs *attrs) { unsigned int npages; @@ -640,7 +663,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, nio_pages = size >> IOMMU_PAGE_SHIFT; io_order = get_iommu_order(size); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, - mask >> IOMMU_PAGE_SHIFT, io_order); + mask >> IOMMU_PAGE_SHIFT, io_order, NULL); if (mapping == DMA_ERROR_CODE) { free_pages((unsigned long)ret, order); return NULL; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index dcc946e67099..d972decf0324 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -77,22 +77,12 @@ static int ppc_spurious_interrupts; EXPORT_SYMBOL(__irq_offset_value); atomic_t ppc_n_lost_interrupts; -#ifndef CONFIG_PPC_MERGE -#define NR_MASK_WORDS ((NR_IRQS + 31) / 32) -unsigned long ppc_cached_irq_mask[NR_MASK_WORDS]; -#endif - #ifdef CONFIG_TAU_INT extern int tau_initialized; extern int tau_interrupts(int); #endif #endif /* CONFIG_PPC32 */ -#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE) -extern atomic_t ipi_recv; -extern atomic_t ipi_sent; -#endif - #ifdef CONFIG_PPC64 EXPORT_SYMBOL(irq_desc); @@ -216,21 +206,14 @@ int show_interrupts(struct seq_file *p, void *v) skip: spin_unlock_irqrestore(&desc->lock, flags); } else if (i == NR_IRQS) { -#ifdef CONFIG_PPC32 -#ifdef CONFIG_TAU_INT +#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized){ seq_puts(p, "TAU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", tau_interrupts(j)); seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); } -#endif -#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE) - /* should this be per processor send/receive? */ - seq_printf(p, "IPI (recv/sent): %10u/%u\n", - atomic_read(&ipi_recv), atomic_read(&ipi_sent)); -#endif -#endif /* CONFIG_PPC32 */ +#endif /* CONFIG_PPC32 && CONFIG_TAU_INT*/ seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts); } return 0; @@ -356,9 +339,42 @@ void __init init_IRQ(void) { if (ppc_md.init_IRQ) ppc_md.init_IRQ(); + + exc_lvl_ctx_init(); + irq_ctx_init(); } +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +struct thread_info *critirq_ctx[NR_CPUS] __read_mostly; +struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly; +struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; + +void exc_lvl_ctx_init(void) +{ + struct thread_info *tp; + int i; + + for_each_possible_cpu(i) { + memset((void *)critirq_ctx[i], 0, THREAD_SIZE); + tp = critirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = 0; + +#ifdef CONFIG_BOOKE + memset((void *)dbgirq_ctx[i], 0, THREAD_SIZE); + tp = dbgirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = 0; + + memset((void *)mcheckirq_ctx[i], 0, THREAD_SIZE); + tp = mcheckirq_ctx[i]; + tp->cpu = i; + tp->preempt_count = HARDIRQ_OFFSET; +#endif + } +} +#endif #ifdef CONFIG_IRQSTACKS struct thread_info *softirq_ctx[NR_CPUS] __read_mostly; @@ -421,8 +437,6 @@ void do_softirq(void) * IRQ controller and virtual interrupts */ -#ifdef CONFIG_PPC_MERGE - static LIST_HEAD(irq_hosts); static DEFINE_SPINLOCK(irq_big_lock); static DEFINE_PER_CPU(unsigned int, irq_radix_reader); @@ -465,7 +479,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, host->revmap_type = revmap_type; host->inval_irq = inval_irq; host->ops = ops; - host->of_node = of_node; + host->of_node = of_node_get(of_node); if (host->ops->match == NULL) host->ops->match = default_irq_host_match; @@ -1081,8 +1095,6 @@ static int __init irq_debugfs_init(void) __initcall(irq_debugfs_init); #endif /* CONFIG_VIRQ_DEBUG */ -#endif /* CONFIG_PPC_MERGE */ - #ifdef CONFIG_PPC64 static int __init setup_noirqdistrib(char *str) { diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c new file mode 100644 index 000000000000..b4fdf2f2743c --- /dev/null +++ b/arch/powerpc/kernel/kgdb.c @@ -0,0 +1,410 @@ +/* + * PowerPC backend to the KGDB stub. + * + * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu) + * Copyright (C) 2003 Timesys Corporation. + * Copyright (C) 2004-2006 MontaVista Software, Inc. + * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com) + * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and + * Sergei Shtylyov <sshtylyov@ru.mvista.com> + * Copyright (C) 2007-2008 Wind River Systems, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program as licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/kgdb.h> +#include <linux/smp.h> +#include <linux/signal.h> +#include <linux/ptrace.h> +#include <asm/current.h> +#include <asm/processor.h> +#include <asm/machdep.h> + +/* + * This table contains the mapping between PowerPC hardware trap types, and + * signals, which are primarily what GDB understands. GDB and the kernel + * don't always agree on values, so we use constants taken from gdb-6.2. + */ +static struct hard_trap_info +{ + unsigned int tt; /* Trap type code for powerpc */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 0x0100, 0x02 /* SIGINT */ }, /* system reset */ + { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */ + { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */ + { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */ + { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */ + { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */ + { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */ + { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ + { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ +#if defined(CONFIG_FSL_BOOKE) + { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */ + { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ + { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ + { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */ + { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ + { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ +#else /* ! CONFIG_FSL_BOOKE */ + { 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */ + { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */ + { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */ + { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ + { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ +#endif +#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ + { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ +#if defined(CONFIG_8xx) + { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ +#else /* ! CONFIG_8xx */ + { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ + { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ + { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ +#if defined(CONFIG_PPC64) + { 0x1200, 0x05 /* SIGILL */ }, /* system error */ + { 0x1500, 0x04 /* SIGILL */ }, /* soft patch */ + { 0x1600, 0x04 /* SIGILL */ }, /* maintenance */ + { 0x1700, 0x08 /* SIGFPE */ }, /* altivec assist */ + { 0x1800, 0x04 /* SIGILL */ }, /* thermal */ +#else /* ! CONFIG_PPC64 */ + { 0x1400, 0x02 /* SIGINT */ }, /* SMI */ + { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */ + { 0x1700, 0x04 /* SIGILL */ }, /* TAU */ + { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */ +#endif +#endif +#endif + { 0x0000, 0x00 } /* Must be last */ +}; + +static int computeSignal(unsigned int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGHUP; /* default for things we don't know about */ +} + +static int kgdb_call_nmi_hook(struct pt_regs *regs) +{ + kgdb_nmicallback(raw_smp_processor_id(), regs); + return 0; +} + +#ifdef CONFIG_SMP +void kgdb_roundup_cpus(unsigned long flags) +{ + smp_send_debugger_break(MSG_ALL_BUT_SELF); +} +#endif + +/* KGDB functions to use existing PowerPC64 hooks. */ +static int kgdb_debugger(struct pt_regs *regs) +{ + return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); +} + +static int kgdb_handle_breakpoint(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (kgdb_handle_exception(0, SIGTRAP, 0, regs) != 0) + return 0; + + if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) + regs->nip += 4; + + return 1; +} + +static int kgdb_singlestep(struct pt_regs *regs) +{ + struct thread_info *thread_info, *exception_thread_info; + + if (user_mode(regs)) + return 0; + + /* + * On Book E and perhaps other processsors, singlestep is handled on + * the critical exception stack. This causes current_thread_info() + * to fail, since it it locates the thread_info by masking off + * the low bits of the current stack pointer. We work around + * this issue by copying the thread_info from the kernel stack + * before calling kgdb_handle_exception, and copying it back + * afterwards. On most processors the copy is avoided since + * exception_thread_info == thread_info. + */ + thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1)); + exception_thread_info = current_thread_info(); + + if (thread_info != exception_thread_info) + memcpy(exception_thread_info, thread_info, sizeof *thread_info); + + kgdb_handle_exception(0, SIGTRAP, 0, regs); + + if (thread_info != exception_thread_info) + memcpy(thread_info, exception_thread_info, sizeof *thread_info); + + return 1; +} + +static int kgdb_iabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0) + return 0; + return 1; +} + +static int kgdb_dabr_match(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0) + return 0; + return 1; +} + +#define PACK64(ptr, src) do { *(ptr++) = (src); } while (0) + +#define PACK32(ptr, src) do { \ + u32 *ptr32; \ + ptr32 = (u32 *)ptr; \ + *(ptr32++) = (src); \ + ptr = (unsigned long *)ptr32; \ + } while (0) + + +void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + unsigned long *ptr = gdb_regs; + int reg; + + memset(gdb_regs, 0, NUMREGBYTES); + + for (reg = 0; reg < 32; reg++) + PACK64(ptr, regs->gpr[reg]); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + PACK64(ptr, current->thread.evr[reg]); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(long); +#endif + + PACK64(ptr, regs->nip); + PACK64(ptr, regs->msr); + PACK32(ptr, regs->ccr); + PACK64(ptr, regs->link); + PACK64(ptr, regs->ctr); + PACK32(ptr, regs->xer); + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + + STACK_FRAME_OVERHEAD); + unsigned long *ptr = gdb_regs; + int reg; + + memset(gdb_regs, 0, NUMREGBYTES); + + /* Regs GPR0-2 */ + for (reg = 0; reg < 3; reg++) + PACK64(ptr, regs->gpr[reg]); + + /* Regs GPR3-13 are caller saved, not in regs->gpr[] */ + ptr += 11; + + /* Regs GPR14-31 */ + for (reg = 14; reg < 32; reg++) + PACK64(ptr, regs->gpr[reg]); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + PACK64(ptr, p->thread.evr[reg]); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(long); +#endif + + PACK64(ptr, regs->nip); + PACK64(ptr, regs->msr); + PACK32(ptr, regs->ccr); + PACK64(ptr, regs->link); + PACK64(ptr, regs->ctr); + PACK32(ptr, regs->xer); + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +#define UNPACK64(dest, ptr) do { dest = *(ptr++); } while (0) + +#define UNPACK32(dest, ptr) do { \ + u32 *ptr32; \ + ptr32 = (u32 *)ptr; \ + dest = *(ptr32++); \ + ptr = (unsigned long *)ptr32; \ + } while (0) + +void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + unsigned long *ptr = gdb_regs; + int reg; +#ifdef CONFIG_SPE + union { + u32 v32[2]; + u64 v64; + } acc; +#endif + + for (reg = 0; reg < 32; reg++) + UNPACK64(regs->gpr[reg], ptr); + +#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_SPE + for (reg = 0; reg < 32; reg++) + UNPACK64(current->thread.evr[reg], ptr); +#else + ptr += 32; +#endif +#else + /* fp registers not used by kernel, leave zero */ + ptr += 32 * 8 / sizeof(int); +#endif + + UNPACK64(regs->nip, ptr); + UNPACK64(regs->msr, ptr); + UNPACK32(regs->ccr, ptr); + UNPACK64(regs->link, ptr); + UNPACK64(regs->ctr, ptr); + UNPACK32(regs->xer, ptr); + + BUG_ON((unsigned long)ptr > + (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); +} + +/* + * This function does PowerPC specific procesing for interfacing to gdb. + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + char *ptr = &remcom_in_buffer[1]; + unsigned long addr; + + switch (remcom_in_buffer[0]) { + /* + * sAA..AA Step one instruction from AA..AA + * This will return an error to gdb .. + */ + case 's': + case 'c': + /* handle the optional parameter */ + if (kgdb_hex2long(&ptr, &addr)) + linux_regs->nip = addr; + + atomic_set(&kgdb_cpu_doing_single_step, -1); + /* set the trace bit if we're stepping */ + if (remcom_in_buffer[0] == 's') { +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + mtspr(SPRN_DBCR0, + mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); + linux_regs->msr |= MSR_DE; +#else + linux_regs->msr |= MSR_SE; +#endif + kgdb_single_step = 1; + if (kgdb_contthread) + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); + } + return 0; + } + + return -1; +} + +/* + * Global data + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, +}; + +static int kgdb_not_implemented(struct pt_regs *regs) +{ + return 0; +} + +static void *old__debugger_ipi; +static void *old__debugger; +static void *old__debugger_bpt; +static void *old__debugger_sstep; +static void *old__debugger_iabr_match; +static void *old__debugger_dabr_match; +static void *old__debugger_fault_handler; + +int kgdb_arch_init(void) +{ + old__debugger_ipi = __debugger_ipi; + old__debugger = __debugger; + old__debugger_bpt = __debugger_bpt; + old__debugger_sstep = __debugger_sstep; + old__debugger_iabr_match = __debugger_iabr_match; + old__debugger_dabr_match = __debugger_dabr_match; + old__debugger_fault_handler = __debugger_fault_handler; + + __debugger_ipi = kgdb_call_nmi_hook; + __debugger = kgdb_debugger; + __debugger_bpt = kgdb_handle_breakpoint; + __debugger_sstep = kgdb_singlestep; + __debugger_iabr_match = kgdb_iabr_match; + __debugger_dabr_match = kgdb_dabr_match; + __debugger_fault_handler = kgdb_not_implemented; + + return 0; +} + +void kgdb_arch_exit(void) +{ + __debugger_ipi = old__debugger_ipi; + __debugger = old__debugger; + __debugger_bpt = old__debugger_bpt; + __debugger_sstep = old__debugger_sstep; + __debugger_iabr_match = old__debugger_iabr_match; + __debugger_dabr_match = old__debugger_dabr_match; + __debugger_fault_handler = old__debugger_fault_handler; +} diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index c176c513566b..de79915452c8 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -34,6 +34,13 @@ #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/uaccess.h> +#include <asm/system.h> + +#ifdef CONFIG_BOOKE +#define MSR_SINGLESTEP (MSR_DE) +#else +#define MSR_SINGLESTEP (MSR_SE) +#endif DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -53,7 +60,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) ret = -EINVAL; } - /* insn must be on a special executable page on ppc64 */ + /* insn must be on a special executable page on ppc64. This is + * not explicitly required on ppc32 (right now), but it doesn't hurt */ if (!ret) { p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) @@ -95,7 +103,16 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { - regs->msr |= MSR_SE; + /* We turn off async exceptions to ensure that the single step will + * be for the instruction we have the kprobe on, if we dont its + * possible we'd get the single step reported for an exception handler + * like Decrementer or External Interrupt */ + regs->msr &= ~MSR_EE; + regs->msr |= MSR_SINGLESTEP; +#ifdef CONFIG_BOOKE + regs->msr &= ~MSR_CE; + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); +#endif /* * On powerpc we should single step on the original @@ -127,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_saved_msr = regs->msr; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -158,7 +174,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) kprobe_opcode_t insn = *p->ainsn.insn; if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) { - regs->msr &= ~MSR_SE; + /* Turn off 'trace' bits */ + regs->msr &= ~MSR_SINGLESTEP; regs->msr |= kcb->kprobe_saved_msr; goto no_kprobe; } @@ -294,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given @@ -334,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, regs->nip = orig_ret_address; reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { @@ -376,6 +392,10 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) if (!cur) return 0; + /* make sure we got here for instruction we have a kprobe on */ + if (((unsigned long)cur->ainsn.insn + 4) != regs->nip) + return 0; + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { kcb->kprobe_status = KPROBE_HIT_SSDONE; cur->post_handler(cur, regs, 0); @@ -395,10 +415,10 @@ out: /* * if somebody else is singlestepping across a probe point, msr - * will have SE set, in which case, continue the remaining processing + * will have DE/SE set, in which case, continue the remaining processing * of do_debug, as if this is not a probe hit. */ - if (regs->msr & MSR_SE) + if (regs->msr & MSR_SINGLESTEP) return 0; return 1; @@ -421,7 +441,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) * normal page fault. */ regs->nip = (unsigned long)cur->addr; - regs->msr &= ~MSR_SE; + regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */ regs->msr |= kcb->kprobe_saved_msr; if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); @@ -498,7 +518,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, #ifdef CONFIG_PPC64 unsigned long arch_deref_entry_point(void *entry) { - return (unsigned long)(((func_descr_t *)entry)->entry); + return ((func_descr_t *)entry)->entry; } #endif diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 4d96e1db55ee..9ddfaef1a184 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -493,18 +493,18 @@ static int __init serial_dev_init(void) device_initcall(serial_dev_init); +#ifdef CONFIG_SERIAL_8250_CONSOLE /* * This is called very early, as part of console_init() (typically just after * time_init()). This function is respondible for trying to find a good * default console on serial ports. It tries to match the open firmware - * default output with one of the available serial console drivers, either - * one of the platform serial ports that have been probed earlier by - * find_legacy_serial_ports() or some more platform specific ones. + * default output with one of the available serial console drivers that have + * been probed earlier by find_legacy_serial_ports() */ static int __init check_legacy_serial_console(void) { struct device_node *prom_stdout = NULL; - int speed = 0, offset = 0; + int i, speed = 0, offset = 0; const char *name; const u32 *spd; @@ -548,31 +548,20 @@ static int __init check_legacy_serial_console(void) if (spd) speed = *spd; - if (0) - ; -#ifdef CONFIG_SERIAL_8250_CONSOLE - else if (strcmp(name, "serial") == 0) { - int i; - /* Look for it in probed array */ - for (i = 0; i < legacy_serial_count; i++) { - if (prom_stdout != legacy_serial_infos[i].np) - continue; - offset = i; - speed = legacy_serial_infos[i].speed; - break; - } - if (i >= legacy_serial_count) - goto not_found; + if (strcmp(name, "serial") != 0) + goto not_found; + + /* Look for it in probed array */ + for (i = 0; i < legacy_serial_count; i++) { + if (prom_stdout != legacy_serial_infos[i].np) + continue; + offset = i; + speed = legacy_serial_infos[i].speed; + break; } -#endif /* CONFIG_SERIAL_8250_CONSOLE */ -#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE - else if (strcmp(name, "ch-a") == 0) - offset = 0; - else if (strcmp(name, "ch-b") == 0) - offset = 1; -#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */ - else + if (i >= legacy_serial_count) goto not_found; + of_node_put(prom_stdout); DBG("Found serial console at ttyS%d\n", offset); @@ -591,3 +580,4 @@ static int __init check_legacy_serial_console(void) } console_initcall(check_legacy_serial_console); +#endif /* CONFIG_SERIAL_8250_CONSOLE */ diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 1e656b43ad7f..1a09719c7628 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -34,8 +34,9 @@ #include <asm/time.h> #include <asm/prom.h> #include <asm/vdso_datapage.h> +#include <asm/vio.h> -#define MODULE_VERS "1.7" +#define MODULE_VERS "1.8" #define MODULE_NAME "lparcfg" /* #define LPARCFG_DEBUG */ @@ -129,32 +130,46 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v) /* * Methods used to fetch LPAR data when running on a pSeries platform. */ -static void log_plpar_hcall_return(unsigned long rc, char *tag) +/** + * h_get_mpp + * H_GET_MPP hcall returns info in 7 parms + */ +int h_get_mpp(struct hvcall_mpp_data *mpp_data) { - switch(rc) { - case 0: - return; - case H_HARDWARE: - printk(KERN_INFO "plpar-hcall (%s) " - "Hardware fault\n", tag); - return; - case H_FUNCTION: - printk(KERN_INFO "plpar-hcall (%s) " - "Function not allowed\n", tag); - return; - case H_AUTHORITY: - printk(KERN_INFO "plpar-hcall (%s) " - "Not authorized to this function\n", tag); - return; - case H_PARAMETER: - printk(KERN_INFO "plpar-hcall (%s) " - "Bad parameter(s)\n",tag); - return; - default: - printk(KERN_INFO "plpar-hcall (%s) " - "Unexpected rc(0x%lx)\n", tag, rc); - } + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_MPP, retbuf); + + mpp_data->entitled_mem = retbuf[0]; + mpp_data->mapped_mem = retbuf[1]; + + mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + mpp_data->pool_num = retbuf[2] & 0xffff; + + mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; + mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; + mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; + + mpp_data->pool_size = retbuf[4]; + mpp_data->loan_request = retbuf[5]; + mpp_data->backing_mem = retbuf[6]; + + return rc; } +EXPORT_SYMBOL(h_get_mpp); + +struct hvcall_ppp_data { + u64 entitlement; + u64 unallocated_entitlement; + u16 group_num; + u16 pool_num; + u8 capped; + u8 weight; + u8 unallocated_weight; + u16 active_procs_in_pool; + u16 active_system_procs; +}; /* * H_GET_PPP hcall returns info in 4 parms. @@ -176,27 +191,30 @@ static void log_plpar_hcall_return(unsigned long rc, char *tag) * XXXX - Active processors in Physical Processor Pool. * XXXX - Processors active on platform. */ -static unsigned int h_get_ppp(unsigned long *entitled, - unsigned long *unallocated, - unsigned long *aggregation, - unsigned long *resource) +static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) { unsigned long rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; rc = plpar_hcall(H_GET_PPP, retbuf); - *entitled = retbuf[0]; - *unallocated = retbuf[1]; - *aggregation = retbuf[2]; - *resource = retbuf[3]; + ppp_data->entitlement = retbuf[0]; + ppp_data->unallocated_entitlement = retbuf[1]; - log_plpar_hcall_return(rc, "H_GET_PPP"); + ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + ppp_data->pool_num = retbuf[2] & 0xffff; + + ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01; + ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff; + ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff; + ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff; + ppp_data->active_system_procs = retbuf[3] & 0xffff; return rc; } -static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) +static unsigned h_pic(unsigned long *pool_idle_time, + unsigned long *num_procs) { unsigned long rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; @@ -206,8 +224,87 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) *pool_idle_time = retbuf[0]; *num_procs = retbuf[1]; - if (rc != H_AUTHORITY) - log_plpar_hcall_return(rc, "H_PIC"); + return rc; +} + +/* + * parse_ppp_data + * Parse out the data returned from h_get_ppp and h_pic + */ +static void parse_ppp_data(struct seq_file *m) +{ + struct hvcall_ppp_data ppp_data; + int rc; + + rc = h_get_ppp(&ppp_data); + if (rc) + return; + + seq_printf(m, "partition_entitled_capacity=%ld\n", + ppp_data.entitlement); + seq_printf(m, "group=%d\n", ppp_data.group_num); + seq_printf(m, "system_active_processors=%d\n", + ppp_data.active_system_procs); + + /* pool related entries are apropriate for shared configs */ + if (lppaca[0].shared_proc) { + unsigned long pool_idle_time, pool_procs; + + seq_printf(m, "pool=%d\n", ppp_data.pool_num); + + /* report pool_capacity in percentage */ + seq_printf(m, "pool_capacity=%d\n", + ppp_data.active_procs_in_pool * 100); + + h_pic(&pool_idle_time, &pool_procs); + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + } + + seq_printf(m, "unallocated_capacity_weight=%d\n", + ppp_data.unallocated_weight); + seq_printf(m, "capacity_weight=%d\n", ppp_data.weight); + seq_printf(m, "capped=%d\n", ppp_data.capped); + seq_printf(m, "unallocated_capacity=%ld\n", + ppp_data.unallocated_entitlement); +} + +/** + * parse_mpp_data + * Parse out data returned from h_get_mpp + */ +static void parse_mpp_data(struct seq_file *m) +{ + struct hvcall_mpp_data mpp_data; + int rc; + + rc = h_get_mpp(&mpp_data); + if (rc) + return; + + seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem); + + if (mpp_data.mapped_mem != -1) + seq_printf(m, "mapped_entitled_memory=%ld\n", + mpp_data.mapped_mem); + + seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num); + seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num); + + seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight); + seq_printf(m, "unallocated_entitled_memory_weight=%d\n", + mpp_data.unallocated_mem_weight); + seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n", + mpp_data.unallocated_entitlement); + + if (mpp_data.pool_size != -1) + seq_printf(m, "entitled_memory_pool_size=%ld bytes\n", + mpp_data.pool_size); + + seq_printf(m, "entitled_memory_loan_request=%ld\n", + mpp_data.loan_request); + + seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); } #define SPLPAR_CHARACTERISTICS_TOKEN 20 @@ -313,6 +410,25 @@ static int lparcfg_count_active_processors(void) return count; } +static void pseries_cmo_data(struct seq_file *m) +{ + int cpu; + unsigned long cmo_faults = 0; + unsigned long cmo_fault_time = 0; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + for_each_possible_cpu(cpu) { + cmo_faults += lppaca[cpu].cmo_faults; + cmo_fault_time += lppaca[cpu].cmo_fault_time; + } + + seq_printf(m, "cmo_faults=%lu\n", cmo_faults); + seq_printf(m, "cmo_fault_time_usec=%lu\n", + cmo_fault_time / tb_ticks_per_usec); +} + static int pseries_lparcfg_data(struct seq_file *m, void *v) { int partition_potential_processors; @@ -334,60 +450,13 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) partition_active_processors = lparcfg_count_active_processors(); if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - unsigned long h_entitled, h_unallocated; - unsigned long h_aggregation, h_resource; - unsigned long pool_idle_time, pool_procs; - unsigned long purr; - - h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation, - &h_resource); - - seq_printf(m, "R4=0x%lx\n", h_entitled); - seq_printf(m, "R5=0x%lx\n", h_unallocated); - seq_printf(m, "R6=0x%lx\n", h_aggregation); - seq_printf(m, "R7=0x%lx\n", h_resource); - - purr = get_purr(); - /* this call handles the ibm,get-system-parameter contents */ parse_system_parameter_string(m); + parse_ppp_data(m); + parse_mpp_data(m); + pseries_cmo_data(m); - seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled); - - seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff); - - seq_printf(m, "system_active_processors=%ld\n", - (h_resource >> 0 * 8) & 0xffff); - - /* pool related entries are apropriate for shared configs */ - if (lppaca[0].shared_proc) { - - h_pic(&pool_idle_time, &pool_procs); - - seq_printf(m, "pool=%ld\n", - (h_aggregation >> 0 * 8) & 0xffff); - - /* report pool_capacity in percentage */ - seq_printf(m, "pool_capacity=%ld\n", - ((h_resource >> 2 * 8) & 0xffff) * 100); - - seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); - - seq_printf(m, "pool_num_procs=%ld\n", pool_procs); - } - - seq_printf(m, "unallocated_capacity_weight=%ld\n", - (h_resource >> 4 * 8) & 0xFF); - - seq_printf(m, "capacity_weight=%ld\n", - (h_resource >> 5 * 8) & 0xFF); - - seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01); - - seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated); - - seq_printf(m, "purr=%ld\n", purr); - + seq_printf(m, "purr=%ld\n", get_purr()); } else { /* non SPLPAR case */ seq_printf(m, "system_active_processors=%d\n", @@ -414,6 +483,83 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) return 0; } +static ssize_t update_ppp(u64 *entitlement, u8 *weight) +{ + struct hvcall_ppp_data ppp_data; + u8 new_weight; + u64 new_entitled; + ssize_t retval; + + /* Get our current parameters */ + retval = h_get_ppp(&ppp_data); + if (retval) + return retval; + + if (entitlement) { + new_weight = ppp_data.weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = ppp_data.entitlement; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + __FUNCTION__, ppp_data.entitlement, ppp_data.weight); + + pr_debug("%s: new_entitled = %lu, new_weight = %u\n", + __FUNCTION__, new_entitled, new_weight); + + retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight); + return retval; +} + +/** + * update_mpp + * + * Update the memory entitlement and weight for the partition. Caller must + * specify either a new entitlement or weight, not both, to be updated + * since the h_set_mpp call takes both entitlement and weight as parameters. + */ +static ssize_t update_mpp(u64 *entitlement, u8 *weight) +{ + struct hvcall_mpp_data mpp_data; + u64 new_entitled; + u8 new_weight; + ssize_t rc; + + if (entitlement) { + /* Check with vio to ensure the new memory entitlement + * can be handled. + */ + rc = vio_cmo_entitlement_update(*entitlement); + if (rc) + return rc; + } + + rc = h_get_mpp(&mpp_data); + if (rc) + return rc; + + if (entitlement) { + new_weight = mpp_data.mem_weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = mpp_data.entitled_mem; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight); + + pr_debug("%s: new_entitled = %lu, new_weight = %u\n", + __FUNCTION__, new_entitled, new_weight); + + rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight); + return rc; +} + /* * Interface for changing system parameters (variable capacity weight * and entitled capacity). Format of input is "param_name=value"; @@ -427,35 +573,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) static ssize_t lparcfg_write(struct file *file, const char __user * buf, size_t count, loff_t * off) { - char *kbuf; + int kbuf_sz = 64; + char kbuf[kbuf_sz]; char *tmp; u64 new_entitled, *new_entitled_ptr = &new_entitled; u8 new_weight, *new_weight_ptr = &new_weight; - - unsigned long current_entitled; /* parameters for h_get_ppp */ - unsigned long dummy; - unsigned long resource; - u8 current_weight; - - ssize_t retval = -ENOMEM; + ssize_t retval; if (!firmware_has_feature(FW_FEATURE_SPLPAR) || firmware_has_feature(FW_FEATURE_ISERIES)) return -EINVAL; - kbuf = kmalloc(count, GFP_KERNEL); - if (!kbuf) - goto out; + if (count > kbuf_sz) + return -EINVAL; - retval = -EFAULT; if (copy_from_user(kbuf, buf, count)) - goto out; + return -EFAULT; - retval = -EINVAL; kbuf[count - 1] = '\0'; tmp = strchr(kbuf, '='); if (!tmp) - goto out; + return -EINVAL; *tmp++ = '\0'; @@ -463,34 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, char *endp; *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); if (endp == tmp) - goto out; - new_weight_ptr = ¤t_weight; + return -EINVAL; + + retval = update_ppp(new_entitled_ptr, NULL); } else if (!strcmp(kbuf, "capacity_weight")) { char *endp; *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); if (endp == tmp) - goto out; - new_entitled_ptr = ¤t_entitled; - } else - goto out; - - /* Get our current parameters */ - retval = h_get_ppp(¤t_entitled, &dummy, &dummy, &resource); - if (retval) { - retval = -EIO; - goto out; - } - - current_weight = (resource >> 5 * 8) & 0xFF; + return -EINVAL; - pr_debug("%s: current_entitled = %lu, current_weight = %u\n", - __func__, current_entitled, current_weight); + retval = update_ppp(NULL, new_weight_ptr); + } else if (!strcmp(kbuf, "entitled_memory")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; - pr_debug("%s: new_entitled = %lu, new_weight = %u\n", - __func__, *new_entitled_ptr, *new_weight_ptr); + retval = update_mpp(new_entitled_ptr, NULL); + } else if (!strcmp(kbuf, "entitled_memory_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; - retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, - *new_weight_ptr); + retval = update_mpp(NULL, new_weight_ptr); + } else + return -EINVAL; if (retval == H_SUCCESS || retval == H_CONSTRAINED) { retval = count; @@ -500,14 +636,8 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, retval = -EIO; } else if (retval == H_PARAMETER) { retval = -EINVAL; - } else { - printk(KERN_WARNING "%s: received unknown hv return code %ld", - __func__, retval); - retval = -EIO; } -out: - kfree(kbuf); return retval; } @@ -573,7 +703,7 @@ static int lparcfg_open(struct inode *inode, struct file *file) return single_open(file, lparcfg_data, NULL); } -const struct file_operations lparcfg_fops = { +static const struct file_operations lparcfg_fops = { .owner = THIS_MODULE, .read = seq_read, .write = lparcfg_write, @@ -581,7 +711,7 @@ const struct file_operations lparcfg_fops = { .release = single_release, }; -int __init lparcfg_init(void) +static int __init lparcfg_init(void) { struct proc_dir_entry *ent; mode_t mode = S_IRUSR | S_IRGRP | S_IROTH; @@ -601,7 +731,7 @@ int __init lparcfg_init(void) return 0; } -void __exit lparcfg_cleanup(void) +static void __exit lparcfg_cleanup(void) { if (proc_ppc64_lparcfg) remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent); diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 29a0e039d436..aab76887a842 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -48,7 +48,7 @@ void machine_kexec_cleanup(struct kimage *image) * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ -NORET_TYPE void machine_kexec(struct kimage *image) +void machine_kexec(struct kimage *image) { if (ppc_md.machine_kexec) ppc_md.machine_kexec(image); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 704375bda73a..a168514d8609 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -158,7 +158,7 @@ void kexec_copy_flush(struct kimage *image) * on calling the interrupts, but we would like to call it off irq level * so that the interrupt controller is clean. */ -void kexec_smp_down(void *arg) +static void kexec_smp_down(void *arg) { if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 1); @@ -172,7 +172,7 @@ static void kexec_prepare_cpus(void) { int my_cpu, i, notified=-1; - smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); + smp_call_function(kexec_smp_down, NULL, /* wait */0); my_cpu = get_cpu(); /* check the others cpus are now down (via paca hw cpu id == -1) */ @@ -249,7 +249,7 @@ static void kexec_prepare_cpus(void) * We could use a smaller stack if we don't care about anything using * current, but that audit has not been performed. */ -union thread_union kexec_stack +static union thread_union kexec_stack __attribute__((__section__(".data.init_task"))) = { }; /* Our assembly helper, in kexec_stub.S */ diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 7b9160220698..85cb6f340846 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -116,3 +116,8 @@ _GLOBAL(longjmp) mtlr r0 mr r3,r4 blr + +_GLOBAL(__setup_cpu_power7) +_GLOBAL(__restore_cpu_power7) + /* place holder */ + blr diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 89aaaa6f3561..6321ae36f729 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -489,7 +489,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) * * flush_icache_range(unsigned long start, unsigned long stop) */ -_GLOBAL(__flush_icache_range) +_KPROBE(__flush_icache_range) BEGIN_FTR_SECTION blr /* for 601, do nothing */ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 942951e76586..4dd70cf7bb4e 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -506,6 +506,39 @@ _GLOBAL(giveup_altivec) #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX +/* + * __giveup_vsx(tsk) + * Disable VSX for the task given as the argument. + * Does NOT save vsx registers. + * Enables the VSX for use in the kernel on return. + */ +_GLOBAL(__giveup_vsx) + mfmsr r5 + oris r5,r5,MSR_VSX@h + mtmsrd r5 /* enable use of VSX now */ + isync + + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_VSX@h + andc r4,r4,r3 /* disable VSX for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_vsx@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + +#endif /* CONFIG_VSX */ + /* kexec_wait(phys_cpu) * * wait for the flag to change, indicating this kernel is going away but diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c new file mode 100644 index 000000000000..af07003573c4 --- /dev/null +++ b/arch/powerpc/kernel/module.c @@ -0,0 +1,116 @@ +/* Kernel module help for powerpc. + Copyright (C) 2001, 2003 Rusty Russell IBM Corporation. + Copyright (C) 2008 Freescale Semiconductor, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include <linux/module.h> +#include <linux/elf.h> +#include <linux/moduleloader.h> +#include <linux/err.h> +#include <linux/vmalloc.h> +#include <linux/bug.h> +#include <asm/module.h> +#include <asm/uaccess.h> +#include <asm/firmware.h> +#include <linux/sort.h> + +#include "setup.h" + +LIST_HEAD(module_bug_list); + +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + + return vmalloc_exec(size); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + +static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) +{ + char *secstrings; + unsigned int i; + + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (i = 1; i < hdr->e_shnum; i++) + if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0) + return &sechdrs[i]; + return NULL; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *me) +{ + const Elf_Shdr *sect; + int err; + + err = module_bug_finalize(hdr, sechdrs, me); + if (err) + return err; + + /* Apply feature fixups */ + sect = find_section(hdr, sechdrs, "__ftr_fixup"); + if (sect != NULL) + do_feature_fixups(cur_cpu_spec->cpu_features, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); + +#ifdef CONFIG_PPC64 + sect = find_section(hdr, sechdrs, "__fw_ftr_fixup"); + if (sect != NULL) + do_feature_fixups(powerpc_firmware_features, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); +#endif + + sect = find_section(hdr, sechdrs, "__lwsync_fixup"); + if (sect != NULL) + do_lwsync_fixups(cur_cpu_spec->cpu_features, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); + + return 0; +} + +void module_arch_cleanup(struct module *mod) +{ + module_bug_cleanup(mod); +} + +struct bug_entry *module_find_bug(unsigned long bugaddr) +{ + struct mod_arch_specific *mod; + unsigned int i; + struct bug_entry *bug; + + list_for_each_entry(mod, &module_bug_list, bug_list) { + bug = mod->bug_table; + for (i = 0; i < mod->num_bugs; ++i, ++bug) + if (bugaddr == bug->bug_addr) + return bug; + } + return NULL; +} diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index eab313858315..2df91a03462a 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -34,23 +34,6 @@ #define DEBUGP(fmt , ...) #endif -LIST_HEAD(module_bug_list); - -void *module_alloc(unsigned long size) -{ - if (size == 0) - return NULL; - return vmalloc(size); -} - -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) -{ - vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ -} - /* Count how many different relocations (different symbol, different addend) */ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) @@ -325,58 +308,3 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } return 0; } - -static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - const char *name) -{ - char *secstrings; - unsigned int i; - - secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (i = 1; i < hdr->e_shnum; i++) - if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0) - return &sechdrs[i]; - return NULL; -} - -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - const Elf_Shdr *sect; - int err; - - err = module_bug_finalize(hdr, sechdrs, me); - if (err) /* never true, currently */ - return err; - - /* Apply feature fixups */ - sect = find_section(hdr, sechdrs, "__ftr_fixup"); - if (sect != NULL) - do_feature_fixups(cur_cpu_spec->cpu_features, - (void *)sect->sh_addr, - (void *)sect->sh_addr + sect->sh_size); - - return 0; -} - -void module_arch_cleanup(struct module *mod) -{ - module_bug_cleanup(mod); -} - -struct bug_entry *module_find_bug(unsigned long bugaddr) -{ - struct mod_arch_specific *mod; - unsigned int i; - struct bug_entry *bug; - - list_for_each_entry(mod, &module_bug_list, bug_list) { - bug = mod->bug_table; - for (i = 0; i < mod->num_bugs; ++i, ++bug) - if (bugaddr == bug->bug_addr) - return bug; - } - return NULL; -} diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 3a82b02b784b..ee6a2982d567 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -24,6 +24,7 @@ #include <asm/module.h> #include <asm/uaccess.h> #include <asm/firmware.h> +#include <asm/code-patching.h> #include <linux/sort.h> #include "setup.h" @@ -101,22 +102,6 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) return _count_relocs; } -void *module_alloc(unsigned long size) -{ - if (size == 0) - return NULL; - - return vmalloc_exec(size); -} - -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) -{ - vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ -} - static int relacmp(const void *_x, const void *_y) { const Elf64_Rela *x, *y; @@ -346,7 +331,7 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { - if (*instruction != 0x60000000) { + if (*instruction != PPC_NOP_INSTR) { printk("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; @@ -466,65 +451,3 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return 0; } - -LIST_HEAD(module_bug_list); - -static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - const char *name) -{ - char *secstrings; - unsigned int i; - - secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (i = 1; i < hdr->e_shnum; i++) - if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0) - return &sechdrs[i]; - return NULL; -} - -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, struct module *me) -{ - const Elf_Shdr *sect; - int err; - - err = module_bug_finalize(hdr, sechdrs, me); - if (err) - return err; - - /* Apply feature fixups */ - sect = find_section(hdr, sechdrs, "__ftr_fixup"); - if (sect != NULL) - do_feature_fixups(cur_cpu_spec->cpu_features, - (void *)sect->sh_addr, - (void *)sect->sh_addr + sect->sh_size); - - sect = find_section(hdr, sechdrs, "__fw_ftr_fixup"); - if (sect != NULL) - do_feature_fixups(powerpc_firmware_features, - (void *)sect->sh_addr, - (void *)sect->sh_addr + sect->sh_size); - - return 0; -} - -void module_arch_cleanup(struct module *mod) -{ - module_bug_cleanup(mod); -} - -struct bug_entry *module_find_bug(unsigned long bugaddr) -{ - struct mod_arch_specific *mod; - unsigned int i; - struct bug_entry *bug; - - list_for_each_entry(mod, &module_bug_list, bug_list) { - bug = mod->bug_table; - for (i = 0; i < mod->num_bugs; ++i, ++bug) - if (bugaddr == bug->bug_addr) - return bug; - } - return NULL; -} diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index c62d1012c013..3bb7d3dd28be 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -34,5 +34,5 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) void arch_teardown_msi_irqs(struct pci_dev *dev) { - return ppc_md.teardown_msi_irqs(dev); + ppc_md.teardown_msi_irqs(dev); } diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c index 5748ddb47d9f..e9be908f199b 100644 --- a/arch/powerpc/kernel/of_device.c +++ b/arch/powerpc/kernel/of_device.c @@ -89,54 +89,6 @@ struct of_device *of_device_alloc(struct device_node *np, } EXPORT_SYMBOL(of_device_alloc); -ssize_t of_device_get_modalias(struct of_device *ofdev, - char *str, ssize_t len) -{ - const char *compat; - int cplen, i; - ssize_t tsize, csize, repend; - - /* Name & Type */ - csize = snprintf(str, len, "of:N%sT%s", - ofdev->node->name, ofdev->node->type); - - /* Get compatible property if any */ - compat = of_get_property(ofdev->node, "compatible", &cplen); - if (!compat) - return csize; - - /* Find true end (we tolerate multiple \0 at the end */ - for (i=(cplen-1); i>=0 && !compat[i]; i--) - cplen--; - if (!cplen) - return csize; - cplen++; - - /* Check space (need cplen+1 chars including final \0) */ - tsize = csize + cplen; - repend = tsize; - - if (csize>=len) /* @ the limit, all is already filled */ - return tsize; - - if (tsize>=len) { /* limit compat list */ - cplen = len-csize-1; - repend = len; - } - - /* Copy and do char replacement */ - memcpy(&str[csize+1], compat, cplen); - for (i=csize; i<repend; i++) { - char c = str[i]; - if (c=='\0') - str[i] = 'C'; - else if (c==' ') - str[i] = '_'; - } - - return tsize; -} - int of_device_uevent(struct device *dev, struct kobj_uevent_env *env) { struct of_device *ofdev; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 063cdd413049..224e9a11765c 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -598,6 +598,7 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, res->start = pci_addr; break; case 2: /* PCI Memory space */ + case 3: /* PCI 64 bits Memory space */ printk(KERN_INFO " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n", cpu_addr, cpu_addr + size - 1, pci_addr, diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h index 90e562771791..dc16aefe1dd0 100644 --- a/arch/powerpc/kernel/ppc32.h +++ b/arch/powerpc/kernel/ppc32.h @@ -120,6 +120,7 @@ struct mcontext32 { elf_fpregset_t mc_fregs; unsigned int mc_pad[2]; elf_vrregset_t32 mc_vregs __attribute__((__aligned__(16))); + elf_vsrreghalf_t32 mc_vsregs __attribute__((__aligned__(16))); }; struct ucontext32 { diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index a8d02506468a..e1ea4fe5cfbd 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -107,6 +107,9 @@ EXPORT_SYMBOL(giveup_fpu); #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX +EXPORT_SYMBOL(giveup_vsx); +#endif /* CONFIG_VSX */ #ifdef CONFIG_SPE EXPORT_SYMBOL(giveup_spe); #endif /* CONFIG_SPE */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7de41c3948ec..957bded0020d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -47,12 +47,15 @@ #ifdef CONFIG_PPC64 #include <asm/firmware.h> #endif +#include <linux/kprobes.h> +#include <linux/kdebug.h> extern unsigned long _get_SP(void); #ifndef CONFIG_SMP struct task_struct *last_task_used_math = NULL; struct task_struct *last_task_used_altivec = NULL; +struct task_struct *last_task_used_vsx = NULL; struct task_struct *last_task_used_spe = NULL; #endif @@ -104,17 +107,6 @@ void enable_kernel_fp(void) } EXPORT_SYMBOL(enable_kernel_fp); -int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs) -{ - if (!tsk->thread.regs) - return 0; - flush_fp_to_thread(current); - - memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs)); - - return 1; -} - #ifdef CONFIG_ALTIVEC void enable_kernel_altivec(void) { @@ -148,36 +140,48 @@ void flush_altivec_to_thread(struct task_struct *tsk) preempt_enable(); } } +#endif /* CONFIG_ALTIVEC */ -int dump_task_altivec(struct task_struct *tsk, elf_vrregset_t *vrregs) +#ifdef CONFIG_VSX +#if 0 +/* not currently used, but some crazy RAID module might want to later */ +void enable_kernel_vsx(void) { - /* ELF_NVRREG includes the VSCR and VRSAVE which we need to save - * separately, see below */ - const int nregs = ELF_NVRREG - 2; - elf_vrreg_t *reg; - u32 *dest; - - if (tsk == current) - flush_altivec_to_thread(tsk); - - reg = (elf_vrreg_t *)vrregs; - - /* copy the 32 vr registers */ - memcpy(reg, &tsk->thread.vr[0], nregs * sizeof(*reg)); - reg += nregs; + WARN_ON(preemptible()); - /* copy the vscr */ - memcpy(reg, &tsk->thread.vscr, sizeof(*reg)); - reg++; +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) + giveup_vsx(current); + else + giveup_vsx(NULL); /* just enable vsx for kernel - force */ +#else + giveup_vsx(last_task_used_vsx); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_vsx); +#endif - /* vrsave is stored in the high 32bit slot of the final 128bits */ - memset(reg, 0, sizeof(*reg)); - dest = (u32 *)reg; - *dest = tsk->thread.vrsave; +void giveup_vsx(struct task_struct *tsk) +{ + giveup_fpu(tsk); + giveup_altivec(tsk); + __giveup_vsx(tsk); +} - return 1; +void flush_vsx_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + if (tsk->thread.regs->msr & MSR_VSX) { +#ifdef CONFIG_SMP + BUG_ON(tsk != current); +#endif + giveup_vsx(tsk); + } + preempt_enable(); + } } -#endif /* CONFIG_ALTIVEC */ +#endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -209,14 +213,6 @@ void flush_spe_to_thread(struct task_struct *tsk) preempt_enable(); } } - -int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs) -{ - flush_spe_to_thread(current); - /* We copy u32 evr[32] + u64 acc + u32 spefscr -> 35 */ - memcpy(evrregs, ¤t->thread.evr[0], sizeof(u32) * 35); - return 1; -} #endif /* CONFIG_SPE */ #ifndef CONFIG_SMP @@ -233,6 +229,10 @@ void discard_lazy_cpu_state(void) if (last_task_used_altivec == current) last_task_used_altivec = NULL; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + if (last_task_used_vsx == current) + last_task_used_vsx = NULL; +#endif /* CONFIG_VSX */ #ifdef CONFIG_SPE if (last_task_used_spe == current) last_task_used_spe = NULL; @@ -241,21 +241,53 @@ void discard_lazy_cpu_state(void) } #endif /* CONFIG_SMP */ +void do_dabr(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + if (debugger_dabr_match(regs)) + return; + + /* Clear the DAC and struct entries. One shot trigger */ +#if defined(CONFIG_BOOKE) + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W + | DBCR0_IDM)); +#endif + + /* Clear the DABR */ + set_dabr(0); + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)address; + force_sig_info(SIGTRAP, &info, current); +} + static DEFINE_PER_CPU(unsigned long, current_dabr); int set_dabr(unsigned long dabr) { __get_cpu_var(current_dabr) = dabr; -#ifdef CONFIG_PPC_MERGE /* XXX for now */ if (ppc_md.set_dabr) return ppc_md.set_dabr(dabr); -#endif /* XXX should we have a CPU_FTR_HAS_DABR ? */ #if defined(CONFIG_PPC64) || defined(CONFIG_6xx) mtspr(SPRN_DABR, dabr); #endif + +#if defined(CONFIG_BOOKE) + mtspr(SPRN_DAC1, dabr); +#endif + return 0; } @@ -297,6 +329,11 @@ struct task_struct *__switch_to(struct task_struct *prev, if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC)) giveup_altivec(prev); #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX)) + /* VMX and FPU registers are already save here */ + __giveup_vsx(prev); +#endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* * If the previous thread used spe in the last quantum @@ -317,6 +354,10 @@ struct task_struct *__switch_to(struct task_struct *prev, if (new->thread.regs && last_task_used_altivec == new) new->thread.regs->msr |= MSR_VEC; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + if (new->thread.regs && last_task_used_vsx == new) + new->thread.regs->msr |= MSR_VSX; +#endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* Avoid the trap. On smp this this never happens since * we don't set last_task_used_spe @@ -330,6 +371,12 @@ struct task_struct *__switch_to(struct task_struct *prev, if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) set_dabr(new->thread.dabr); +#if defined(CONFIG_BOOKE) + /* If new thread DAC (HW breakpoint) is the same then leave it */ + if (new->thread.dabr) + set_dabr(new->thread.dabr); +#endif + new_thread = &new->thread; old_thread = ¤t->thread; @@ -417,6 +464,8 @@ static struct regbit { {MSR_EE, "EE"}, {MSR_PR, "PR"}, {MSR_FP, "FP"}, + {MSR_VEC, "VEC"}, + {MSR_VSX, "VSX"}, {MSR_ME, "ME"}, {MSR_IR, "IR"}, {MSR_DR, "DR"}, @@ -484,10 +533,8 @@ void show_regs(struct pt_regs * regs) * Lookup NIP late so we have the best change of getting the * above info out without failing */ - printk("NIP ["REG"] ", regs->nip); - print_symbol("%s\n", regs->nip); - printk("LR ["REG"] ", regs->link); - print_symbol("%s\n", regs->link); + printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); + printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); #endif show_stack(current, (unsigned long *) regs->gpr[1]); if (!user_mode(regs)) @@ -518,6 +565,10 @@ void flush_thread(void) if (current->thread.dabr) { current->thread.dabr = 0; set_dabr(0); + +#if defined(CONFIG_BOOKE) + current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W); +#endif } } @@ -534,6 +585,7 @@ void prepare_to_copy(struct task_struct *tsk) { flush_fp_to_thread(current); flush_altivec_to_thread(current); + flush_vsx_to_thread(current); flush_spe_to_thread(current); } @@ -689,6 +741,9 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #endif discard_lazy_cpu_state(); +#ifdef CONFIG_VSX + current->thread.used_vsr = 0; +#endif memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); current->thread.fpscr.val = 0; #ifdef CONFIG_ALTIVEC @@ -971,8 +1026,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; if (!firstframe || ip != lr) { - printk("["REG"] ["REG"] ", sp, ip); - print_symbol("%s", ip); + printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); if (firstframe) printk(" (unreliable)"); printk("\n"); @@ -987,10 +1041,9 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); - printk("--- Exception: %lx", regs->trap); - print_symbol(" at %s\n", regs->nip); lr = regs->link; - print_symbol(" LR = %s\n", lr); + printk("--- Exception: %lx at %pS\n LR = %pS\n", + regs->trap, (void *)regs->nip, (void *)lr); firstframe = 1; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 2aefe2a4129a..87d83c56b31e 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -609,6 +609,10 @@ static struct feature_property { {"altivec", 0, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC}, {"ibm,vmx", 1, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC}, #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + /* Yes, this _really_ is ibm,vmx == 2 to enable VSX */ + {"ibm,vmx", 2, CPU_FTR_VSX, PPC_FEATURE_HAS_VSX}, +#endif /* CONFIG_VSX */ #ifdef CONFIG_PPC64 {"ibm,dfp", 1, 0, PPC_FEATURE_HAS_DFP}, {"ibm,purr", 1, CPU_FTR_PURR, 0}, diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 6d6df1e60325..b72849ac7db3 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -205,8 +205,6 @@ static int __initdata mem_reserve_cnt; static cell_t __initdata regbuf[1024]; -#define MAX_CPU_THREADS 2 - /* * Error results ... some OF calls will return "-1" on error, some * will return 0, some will return either. To simplify, here are @@ -620,6 +618,7 @@ static void __init early_cmdline_parse(void) #define OV1_PPC_2_03 0x10 /* set if we support PowerPC 2.03 */ #define OV1_PPC_2_04 0x08 /* set if we support PowerPC 2.04 */ #define OV1_PPC_2_05 0x04 /* set if we support PowerPC 2.05 */ +#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */ /* Option vector 2: Open Firmware options supported */ #define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ @@ -642,6 +641,11 @@ static void __init early_cmdline_parse(void) #else #define OV5_MSI 0x00 #endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_PPC_SMLPAR +#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */ +#else +#define OV5_CMO 0x00 +#endif /* * The architecture vector has an array of PVR mask/value pairs, @@ -650,6 +654,8 @@ static void __init early_cmdline_parse(void) static unsigned char ibm_architecture_vec[] = { W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ W(0xffff0000), W(0x003e0000), /* POWER6 */ + W(0xffff0000), W(0x003f0000), /* POWER7 */ + W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */ 5 - 1, /* 5 option vectors */ @@ -658,7 +664,7 @@ static unsigned char ibm_architecture_vec[] = { 3 - 2, /* length */ 0, /* don't ignore, don't halt */ OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | - OV1_PPC_2_04 | OV1_PPC_2_05, + OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06, /* option vector 2: Open Firmware options supported */ 34 - 2, /* length */ @@ -684,10 +690,12 @@ static unsigned char ibm_architecture_vec[] = { 0, /* don't halt */ /* option vector 5: PAPR/OF options */ - 3 - 2, /* length */ + 5 - 2, /* length */ 0, /* don't ignore, don't halt */ OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, + 0, + OV5_CMO, }; /* Old method - ELF header with PT_NOTE sections */ @@ -1329,10 +1337,6 @@ static void __init prom_hold_cpus(void) unsigned int reg; phandle node; char type[64]; - int cpuid = 0; - unsigned int interrupt_server[MAX_CPU_THREADS]; - unsigned int cpu_threads, hw_cpu_num; - int propsize; struct prom_t *_prom = &RELOC(prom); unsigned long *spinloop = (void *) LOW_ADDR(__secondary_hold_spinloop); @@ -1376,7 +1380,6 @@ static void __init prom_hold_cpus(void) reg = -1; prom_getprop(node, "reg", ®, sizeof(reg)); - prom_debug("\ncpuid = 0x%x\n", cpuid); prom_debug("cpu hw idx = 0x%x\n", reg); /* Init the acknowledge var which will be reset by @@ -1385,28 +1388,9 @@ static void __init prom_hold_cpus(void) */ *acknowledge = (unsigned long)-1; - propsize = prom_getprop(node, "ibm,ppc-interrupt-server#s", - &interrupt_server, - sizeof(interrupt_server)); - if (propsize < 0) { - /* no property. old hardware has no SMT */ - cpu_threads = 1; - interrupt_server[0] = reg; /* fake it with phys id */ - } else { - /* We have a threaded processor */ - cpu_threads = propsize / sizeof(u32); - if (cpu_threads > MAX_CPU_THREADS) { - prom_printf("SMT: too many threads!\n" - "SMT: found %x, max is %x\n", - cpu_threads, MAX_CPU_THREADS); - cpu_threads = 1; /* ToDo: panic? */ - } - } - - hw_cpu_num = interrupt_server[0]; - if (hw_cpu_num != _prom->cpu) { + if (reg != _prom->cpu) { /* Primary Thread of non-boot cpu */ - prom_printf("%x : starting cpu hw idx %x... ", cpuid, reg); + prom_printf("starting cpu hw idx %x... ", reg); call_prom("start-cpu", 3, 0, node, secondary_hold, reg); @@ -1421,17 +1405,10 @@ static void __init prom_hold_cpus(void) } #ifdef CONFIG_SMP else - prom_printf("%x : boot cpu %x\n", cpuid, reg); + prom_printf("boot cpu hw idx %x\n", reg); #endif /* CONFIG_SMP */ - - /* Reserve cpu #s for secondary threads. They start later. */ - cpuid += cpu_threads; } - if (cpuid > NR_CPUS) - prom_printf("WARNING: maximum CPUs (" __stringify(NR_CPUS) - ") exceeded: ignoring extras\n"); - prom_debug("prom_hold_cpus: end...\n"); } diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index 90eb3a3e383e..bc1fb27368af 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -128,12 +128,35 @@ static void of_bus_pci_count_cells(struct device_node *np, *sizec = 2; } +static unsigned int of_bus_pci_get_flags(const u32 *addr) +{ + unsigned int flags = 0; + u32 w = addr[0]; + + switch((w >> 24) & 0x03) { + case 0x01: + flags |= IORESOURCE_IO; + break; + case 0x02: /* 32 bits */ + case 0x03: /* 64 bits */ + flags |= IORESOURCE_MEM; + break; + } + if (w & 0x40000000) + flags |= IORESOURCE_PREFETCH; + return flags; +} + static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna) { u64 cp, s, da; + unsigned int af, rf; + + af = of_bus_pci_get_flags(addr); + rf = of_bus_pci_get_flags(range); /* Check address type match */ - if ((addr[0] ^ range[0]) & 0x03000000) + if ((af ^ rf) & (IORESOURCE_MEM | IORESOURCE_IO)) return OF_BAD_ADDR; /* Read address values, skipping high cell */ @@ -153,25 +176,6 @@ static int of_bus_pci_translate(u32 *addr, u64 offset, int na) return of_bus_default_translate(addr + 1, offset, na - 1); } -static unsigned int of_bus_pci_get_flags(const u32 *addr) -{ - unsigned int flags = 0; - u32 w = addr[0]; - - switch((w >> 24) & 0x03) { - case 0x01: - flags |= IORESOURCE_IO; - break; - case 0x02: /* 32 bits */ - case 0x03: /* 64 bits */ - flags |= IORESOURCE_MEM; - break; - } - if (w & 0x40000000) - flags |= IORESOURCE_PREFETCH; - return flags; -} - const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, unsigned int *flags) { diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 2a9fe97e4521..3635be61f899 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -22,6 +22,7 @@ #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/regset.h> +#include <linux/tracehook.h> #include <linux/elf.h> #include <linux/user.h> #include <linux/security.h> @@ -215,29 +216,56 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { +#ifdef CONFIG_VSX + double buf[33]; + int i; +#endif flush_fp_to_thread(target); +#ifdef CONFIG_VSX + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + memcpy(&buf[32], &target->thread.fpscr, sizeof(double)); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + +#else BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) != - offsetof(struct thread_struct, fpr[32])); + offsetof(struct thread_struct, TS_FPR(32))); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fpr, 0, -1); +#endif } static int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { +#ifdef CONFIG_VSX + double buf[33]; + int i; +#endif flush_fp_to_thread(target); +#ifdef CONFIG_VSX + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + for (i = 0; i < 32 ; i++) + target->thread.TS_FPR(i) = buf[i]; + memcpy(&target->thread.fpscr, &buf[32], sizeof(double)); + return 0; +#else BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) != - offsetof(struct thread_struct, fpr[32])); + offsetof(struct thread_struct, TS_FPR(32))); return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fpr, 0, -1); +#endif } - #ifdef CONFIG_ALTIVEC /* * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. @@ -323,6 +351,56 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, } #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX +/* + * Currently to set and and get all the vsx state, you need to call + * the fp and VMX calls aswell. This only get/sets the lower 32 + * 128bit VSX registers. + */ + +static int vsr_active(struct task_struct *target, + const struct user_regset *regset) +{ + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +static int vsr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + double buf[32]; + int ret, i; + + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +static int vsr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + double buf[32]; + int ret,i; + + flush_vsx_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + for (i = 0; i < 32 ; i++) + target->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + + return ret; +} +#endif /* CONFIG_VSX */ + #ifdef CONFIG_SPE /* @@ -399,6 +477,9 @@ enum powerpc_regset { #ifdef CONFIG_ALTIVEC REGSET_VMX, #endif +#ifdef CONFIG_VSX + REGSET_VSX, +#endif #ifdef CONFIG_SPE REGSET_SPE, #endif @@ -422,6 +503,13 @@ static const struct user_regset native_regsets[] = { .active = vr_active, .get = vr_get, .set = vr_set }, #endif +#ifdef CONFIG_VSX + [REGSET_VSX] = { + .core_note_type = NT_PPC_VSX, .n = 32, + .size = sizeof(double), .align = sizeof(double), + .active = vsr_active, .get = vsr_get, .set = vsr_set + }, +#endif #ifdef CONFIG_SPE [REGSET_SPE] = { .n = 35, @@ -616,7 +704,7 @@ void user_enable_single_step(struct task_struct *task) if (regs != NULL) { #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC; + task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; #else regs->msr |= MSR_SE; @@ -629,9 +717,16 @@ void user_disable_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; + +#if defined(CONFIG_BOOKE) + /* If DAC then do not single step, skip */ + if (task->thread.dabr) + return; +#endif + if (regs != NULL) { #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - task->thread.dbcr0 = 0; + task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM); regs->msr &= ~MSR_DE; #else regs->msr &= ~MSR_SE; @@ -640,22 +735,76 @@ void user_disable_single_step(struct task_struct *task) clear_tsk_thread_flag(task, TIF_SINGLESTEP); } -static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { - /* We only support one DABR and no IABRS at the moment */ + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). + * For embedded processors we support one DAC and no IAC's at the + * moment. + */ if (addr > 0) return -EINVAL; - /* The bottom 3 bits are flags */ + /* The bottom 3 bits in dabr are flags */ if ((data & ~0x7UL) >= TASK_SIZE) return -EIO; - /* Ensure translation is on */ +#ifndef CONFIG_BOOKE + + /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. + * It was assumed, on previous implementations, that 3 bits were + * passed together with the data address, fitting the design of the + * DABR register, as follows: + * + * bit 0: Read flag + * bit 1: Write flag + * bit 2: Breakpoint translation + * + * Thus, we use them here as so. + */ + + /* Ensure breakpoint translation bit is set */ if (data && !(data & DABR_TRANSLATION)) return -EIO; + /* Move contents to the DABR register */ task->thread.dabr = data; + +#endif +#if defined(CONFIG_BOOKE) + + /* As described above, it was assumed 3 bits were passed with the data + * address, but we will assume only the mode bits will be passed + * as to not cause alignment restrictions for DAC-based processors. + */ + + /* DAC's hold the whole address without any mode flags */ + task->thread.dabr = data & ~0x3UL; + + if (task->thread.dabr == 0) { + task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM); + task->thread.regs->msr &= ~MSR_DE; + return 0; + } + + /* Read or Write bits must be set */ + + if (!(data & 0x3UL)) + return -EINVAL; + + /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 + register */ + task->thread.dbcr0 = DBCR0_IDM; + + /* Check for write and read flags and set DBCR0 + accordingly */ + if (data & 0x1UL) + task->thread.dbcr0 |= DBSR_DAC1R; + if (data & 0x2UL) + task->thread.dbcr0 |= DBSR_DAC1W; + + task->thread.regs->msr |= MSR_DE; +#endif return 0; } @@ -728,7 +877,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) tmp = ptrace_get_reg(child, (int) index); } else { flush_fp_to_thread(child); - tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0]; + tmp = ((unsigned long *)child->thread.fpr) + [TS_FPRWIDTH * (index - PT_FPR0)]; } ret = put_user(tmp,(unsigned long __user *) data); break; @@ -755,7 +905,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = ptrace_put_reg(child, index, data); } else { flush_fp_to_thread(child); - ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data; + ((unsigned long *)child->thread.fpr) + [TS_FPRWIDTH * (index - PT_FPR0)] = data; ret = 0; } break; @@ -820,6 +971,19 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) sizeof(u32)), (const void __user *) data); #endif +#ifdef CONFIG_VSX + case PTRACE_GETVSRREGS: + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_VSX, + 0, 32 * sizeof(double), + (void __user *) data); + + case PTRACE_SETVSRREGS: + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_VSX, + 0, 32 * sizeof(double), + (const void __user *) data); +#endif #ifdef CONFIG_SPE case PTRACE_GETEVRREGS: /* Get the child spe register state. */ @@ -849,31 +1013,24 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return ret; } -static void do_syscall_trace(void) +/* + * We must return the syscall number to actually look up in the table. + * This can be -1L to skip running any syscall at all. + */ +long do_syscall_trace_enter(struct pt_regs *regs) { - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} + long ret = 0; -void do_syscall_trace_enter(struct pt_regs *regs) -{ secure_computing(regs->gpr[0]); - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - do_syscall_trace(); + if (test_thread_flag(TIF_SYSCALL_TRACE) && + tracehook_report_syscall_entry(regs)) + /* + * Tracing decided this syscall should not happen. + * We'll return a bogus call number to get an ENOSYS + * error, but leave the original number in regs->gpr[0]. + */ + ret = -1L; if (unlikely(current->audit_context)) { #ifdef CONFIG_PPC64 @@ -891,16 +1048,19 @@ void do_syscall_trace_enter(struct pt_regs *regs) regs->gpr[5] & 0xffffffff, regs->gpr[6] & 0xffffffff); } + + return ret ?: regs->gpr[0]; } void do_syscall_trace_leave(struct pt_regs *regs) { + int step; + if (unlikely(current->audit_context)) audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, regs->result); - if ((test_thread_flag(TIF_SYSCALL_TRACE) - || test_thread_flag(TIF_SINGLESTEP)) - && (current->ptrace & PT_PTRACED)) - do_syscall_trace(); + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); } diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 4c1de6af4c09..197d49c790ad 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -64,6 +64,11 @@ static long compat_ptrace_old(struct task_struct *child, long request, return -EPERM; } +/* Macros to workout the correct index for the FPR in the thread struct */ +#define FPRNUMBER(i) (((i) - PT_FPR0) >> 1) +#define FPRHALF(i) (((i) - PT_FPR0) & 1) +#define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) + FPRHALF(i) + long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { @@ -122,7 +127,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, * to be an array of unsigned int (32 bits) - the * index passed in is based on this assumption. */ - tmp = ((unsigned int *)child->thread.fpr)[index - PT_FPR0]; + tmp = ((unsigned int *)child->thread.fpr) + [FPRINDEX(index)]; } ret = put_user((unsigned int)tmp, (u32 __user *)data); break; @@ -162,7 +168,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, CHECK_FULL_REGS(child->thread.regs); if (numReg >= PT_FPR0) { flush_fp_to_thread(child); - tmp = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0]; + tmp = ((unsigned long int *)child->thread.fpr) + [FPRINDEX(numReg)]; } else { /* register within PT_REGS struct */ tmp = ptrace_get_reg(child, numReg); } @@ -217,7 +224,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, * to be an array of unsigned int (32 bits) - the * index passed in is based on this assumption. */ - ((unsigned int *)child->thread.fpr)[index - PT_FPR0] = data; + ((unsigned int *)child->thread.fpr) + [FPRINDEX(index)] = data; ret = 0; } break; @@ -286,6 +294,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, case PTRACE_SETFPREGS: case PTRACE_GETVRREGS: case PTRACE_SETVRREGS: + case PTRACE_GETVSRREGS: + case PTRACE_SETVSRREGS: case PTRACE_GETREGS64: case PTRACE_SETREGS64: case PPC_PTRACE_GETFPREGS: diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index f9c6abc84a94..1be9fe38bcb5 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -160,7 +160,7 @@ static int sensors_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_sensors_show, NULL); } -const struct file_operations ppc_rtas_sensors_operations = { +static const struct file_operations ppc_rtas_sensors_operations = { .open = sensors_open, .read = seq_read, .llseek = seq_lseek, @@ -172,7 +172,7 @@ static int poweron_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_poweron_show, NULL); } -const struct file_operations ppc_rtas_poweron_operations = { +static const struct file_operations ppc_rtas_poweron_operations = { .open = poweron_open, .read = seq_read, .llseek = seq_lseek, @@ -185,7 +185,7 @@ static int progress_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_progress_show, NULL); } -const struct file_operations ppc_rtas_progress_operations = { +static const struct file_operations ppc_rtas_progress_operations = { .open = progress_open, .read = seq_read, .llseek = seq_lseek, @@ -198,7 +198,7 @@ static int clock_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_clock_show, NULL); } -const struct file_operations ppc_rtas_clock_operations = { +static const struct file_operations ppc_rtas_clock_operations = { .open = clock_open, .read = seq_read, .llseek = seq_lseek, @@ -211,7 +211,7 @@ static int tone_freq_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_tone_freq_show, NULL); } -const struct file_operations ppc_rtas_tone_freq_operations = { +static const struct file_operations ppc_rtas_tone_freq_operations = { .open = tone_freq_open, .read = seq_read, .llseek = seq_lseek, @@ -224,7 +224,7 @@ static int tone_volume_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_tone_volume_show, NULL); } -const struct file_operations ppc_rtas_tone_volume_operations = { +static const struct file_operations ppc_rtas_tone_volume_operations = { .open = tone_volume_open, .read = seq_read, .llseek = seq_lseek, @@ -237,7 +237,7 @@ static int rmo_buf_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_rmo_buf_show, NULL); } -const struct file_operations ppc_rtas_rmo_buf_ops = { +static const struct file_operations ppc_rtas_rmo_buf_ops = { .open = rmo_buf_open, .read = seq_read, .llseek = seq_lseek, diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 34843c318419..c680f1bbd387 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -340,8 +340,8 @@ int rtas_get_error_log_max(void) EXPORT_SYMBOL(rtas_get_error_log_max); -char rtas_err_buf[RTAS_ERROR_LOG_MAX]; -int rtas_last_error_token; +static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; +static int rtas_last_error_token; /** Return a copy of the detailed error text associated with the * most recent failed call to rtas. Because the error text @@ -484,7 +484,7 @@ unsigned int rtas_busy_delay(int status) } EXPORT_SYMBOL(rtas_busy_delay); -int rtas_error_rc(int rtas_rc) +static int rtas_error_rc(int rtas_rc) { int rc; @@ -747,7 +747,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args) /* Call function on all CPUs. One of us will make the * rtas call */ - if (on_each_cpu(rtas_percpu_suspend_me, &data, 1, 0)) + if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) data.error = -EINVAL; wait_for_completion(&done); diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 0a5e22b22729..149cb112cd1a 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -286,7 +286,7 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf, } /* constructor for flash_block_cache */ -void rtas_block_ctor(struct kmem_cache *cache, void *ptr) +void rtas_block_ctor(void *ptr) { memset(ptr, 0, RTAS_BLK_SIZE); } @@ -731,7 +731,7 @@ static const struct file_operations validate_flash_operations = { .release = validate_flash_release, }; -int __init rtas_flash_init(void) +static int __init rtas_flash_init(void) { int rc; @@ -817,7 +817,7 @@ cleanup: return rc; } -void __exit rtas_flash_cleanup(void) +static void __exit rtas_flash_cleanup(void) { rtas_flash_term_hook = NULL; diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 3ab88a9dc70d..589a2797eac2 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -155,12 +155,12 @@ static int rtas_pci_write_config(struct pci_bus *bus, return PCIBIOS_DEVICE_NOT_FOUND; } -struct pci_ops rtas_pci_ops = { +static struct pci_ops rtas_pci_ops = { .read = rtas_pci_read_config, .write = rtas_pci_write_config, }; -int is_python(struct device_node *dev) +static int is_python(struct device_node *dev) { const char *model = of_get_property(dev, "model", NULL); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index db540eab09f4..9cc5a52711e5 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -367,7 +367,6 @@ static void __init cpu_init_thread_core_maps(int tpc) * setup_cpu_maps - initialize the following cpu maps: * cpu_possible_map * cpu_present_map - * cpu_sibling_map * * Having the possible map set up early allows us to restrict allocations * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. @@ -475,31 +474,9 @@ void __init smp_setup_cpu_maps(void) */ cpu_init_thread_core_maps(nthreads); } - -/* - * Being that cpu_sibling_map is now a per_cpu array, then it cannot - * be initialized until the per_cpu areas have been created. This - * function is now called from setup_per_cpu_areas(). - */ -void __init smp_setup_cpu_sibling_map(void) -{ -#ifdef CONFIG_PPC64 - int i, cpu, base; - - for_each_possible_cpu(cpu) { - DBG("Sibling map for CPU %d:", cpu); - base = cpu_first_thread_in_core(cpu); - for (i = 0; i < threads_per_core; i++) { - cpu_set(base + i, per_cpu(cpu_sibling_map, cpu)); - DBG(" %d", base + i); - } - DBG("\n"); - } - -#endif /* CONFIG_PPC64 */ -} #endif /* CONFIG_SMP */ +#ifdef CONFIG_PCSPKR_PLATFORM static __init int add_pcspkr(void) { struct device_node *np; @@ -522,6 +499,7 @@ static __init int add_pcspkr(void) return ret; } device_initcall(add_pcspkr); +#endif /* CONFIG_PCSPKR_PLATFORM */ void probe_machine(void) { diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 19e8fcb9cea8..066e65c59b58 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -43,10 +43,6 @@ #define DBG(fmt...) -#if defined CONFIG_KGDB -#include <asm/kgdb.h> -#endif - extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid; @@ -101,6 +97,10 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) PTRRELOC(&__start___ftr_fixup), PTRRELOC(&__stop___ftr_fixup)); + do_lwsync_fixups(spec->cpu_features, + PTRRELOC(&__start___lwsync_fixup), + PTRRELOC(&__stop___lwsync_fixup)); + return KERNELBASE + offset; } @@ -127,6 +127,11 @@ notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys) ppc_md.power_save = ppc6xx_idle; #endif +#ifdef CONFIG_E500 + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = e500_idle; +#endif if (ppc_md.progress) ppc_md.progress("id mach(): done", 0x200); } @@ -248,6 +253,28 @@ static void __init irqstack_early_init(void) #define irqstack_early_init() #endif +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +static void __init exc_lvl_early_init(void) +{ + unsigned int i; + + /* interrupt stacks must be in lowmem, we get that for free on ppc32 + * as the lmb is limited to lowmem by LMB_REAL_LIMIT */ + for_each_possible_cpu(i) { + critirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +#ifdef CONFIG_BOOKE + dbgirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + mcheckirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +#endif + } +} +#else +#define exc_lvl_early_init() +#endif + /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { @@ -271,18 +298,6 @@ void __init setup_arch(char **cmdline_p) xmon_setup(); -#if defined(CONFIG_KGDB) - if (ppc_md.kgdb_map_scc) - ppc_md.kgdb_map_scc(); - set_debug_traps(); - if (strstr(cmd_line, "gdb")) { - if (ppc_md.progress) - ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000); - printk("kgdb breakpoint activated\n"); - breakpoint(); - } -#endif - /* * Set cache line size based on type of cpu as a default. * Systems with OF can look in the properties on the cpu node(s) @@ -305,6 +320,8 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; + exc_lvl_early_init(); + irqstack_early_init(); /* set up the bootmem stuff with available memory */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 098fd96a394a..8b25f51f03bf 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -363,6 +363,8 @@ void __init setup_system(void) &__start___ftr_fixup, &__stop___ftr_fixup); do_feature_fixups(powerpc_firmware_features, &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); + do_lwsync_fixups(cur_cpu_spec->cpu_features, + &__start___lwsync_fixup, &__stop___lwsync_fixup); /* * Unflatten the device-tree passed by prom_init or kexec @@ -609,9 +611,6 @@ void __init setup_per_cpu_areas(void) paca[i].data_offset = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } - - /* Now that per_cpu is setup, initialize cpu_sibling_map */ - smp_setup_cpu_sibling_map(); } #endif diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a65a44fbe523..a54405ebd7b0 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -9,7 +9,7 @@ * this archive for more details. */ -#include <linux/ptrace.h> +#include <linux/tracehook.h> #include <linux/signal.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -112,7 +112,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, } } -int do_signal(sigset_t *oldset, struct pt_regs *regs) +static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) { siginfo_t info; int signr; @@ -120,7 +120,7 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) int ret; int is32 = is_32bit_task(); - if (test_thread_flag(TIF_RESTORE_SIGMASK)) + if (current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else if (!oldset) oldset = ¤t->blocked; @@ -131,9 +131,10 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) check_syscall_restart(regs, &ka, signr > 0); if (signr <= 0) { + struct thread_info *ti = current_thread_info(); /* No signal to deliver -- put the saved sigmask back */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); + if (ti->local_flags & _TLF_RESTORE_SIGMASK) { + ti->local_flags &= ~_TLF_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } return 0; /* no signals delivered */ @@ -144,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (current->thread.dabr) + if (current->thread.dabr) { set_dabr(current->thread.dabr); +#if defined(CONFIG_BOOKE) + mtspr(SPRN_DBCR0, current->thread.dbcr0); +#endif + } if (is32) { if (ka.sa.sa_flags & SA_SIGINFO) @@ -169,15 +174,31 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) /* * A signal was successfully delivered; the saved sigmask is in - * its frame, and we can clear the TIF_RESTORE_SIGMASK flag. + * its frame, and we can clear the TLF_RESTORE_SIGMASK flag. */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); + current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK; + + /* + * Let tracing know that we've done the handler setup. + */ + tracehook_signal_handler(signr, &info, &ka, regs, + test_thread_flag(TIF_SINGLESTEP)); } return ret; } +void do_signal(struct pt_regs *regs, unsigned long thread_info_flags) +{ + if (thread_info_flags & _TIF_SIGPENDING) + do_signal_pending(NULL, regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } +} + long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5, unsigned long r6, unsigned long r7, unsigned long r8, struct pt_regs *regs) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 77efb3d5465a..28f4b9f5fe5e 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -24,6 +24,16 @@ extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +extern unsigned long copy_fpr_to_user(void __user *to, + struct task_struct *task); +extern unsigned long copy_fpr_from_user(struct task_struct *task, + void __user *from); +#ifdef CONFIG_VSX +extern unsigned long copy_vsx_to_user(void __user *to, + struct task_struct *task); +extern unsigned long copy_vsx_from_user(struct task_struct *task, + void __user *from); +#endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index ad6943468ee9..3e80aa32b8b0 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -68,6 +68,13 @@ #define ucontext ucontext32 /* + * Userspace code may pass a ucontext which doesn't include VSX added + * at the end. We need to check for this case. + */ +#define UCONTEXTSIZEWITHOUTVSX \ + (sizeof(struct ucontext) - sizeof(elf_vsrreghalf_t32)) + +/* * Returning 0 means we return to userspace via * ret_from_except and thus restore all user * registers from *regs. This is what we need @@ -243,7 +250,7 @@ long sys_sigsuspend(old_sigset_t mask) current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); return -ERESTARTNOHAND; } @@ -328,6 +335,75 @@ struct rt_sigframe { int abigap[56]; }; +#ifdef CONFIG_VSX +unsigned long copy_fpr_to_user(void __user *to, + struct task_struct *task) +{ + double buf[ELF_NFPREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + buf[i] = task->thread.TS_FPR(i); + memcpy(&buf[i], &task->thread.fpscr, sizeof(double)); + return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); +} + +unsigned long copy_fpr_from_user(struct task_struct *task, + void __user *from) +{ + double buf[ELF_NFPREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) + return 1; + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + task->thread.TS_FPR(i) = buf[i]; + memcpy(&task->thread.fpscr, &buf[i], sizeof(double)); + + return 0; +} + +unsigned long copy_vsx_to_user(void __user *to, + struct task_struct *task) +{ + double buf[ELF_NVSRHALFREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < ELF_NVSRHALFREG; i++) + buf[i] = task->thread.fpr[i][TS_VSRLOWOFFSET]; + return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); +} + +unsigned long copy_vsx_from_user(struct task_struct *task, + void __user *from) +{ + double buf[ELF_NVSRHALFREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) + return 1; + for (i = 0; i < ELF_NVSRHALFREG ; i++) + task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + return 0; +} +#else +inline unsigned long copy_fpr_to_user(void __user *to, + struct task_struct *task) +{ + return __copy_to_user(to, task->thread.fpr, + ELF_NFPREG * sizeof(double)); +} + +inline unsigned long copy_fpr_from_user(struct task_struct *task, + void __user *from) +{ + return __copy_from_user(task->thread.fpr, from, + ELF_NFPREG * sizeof(double)); +} +#endif + /* * Save the current user registers on the user stack. * We only save the altivec/spe registers if the process has used @@ -336,13 +412,13 @@ struct rt_sigframe { static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, int sigret) { + unsigned long msr = regs->msr; + /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); - /* save general and floating-point registers */ - if (save_general_regs(regs, frame) || - __copy_to_user(&frame->mc_fregs, current->thread.fpr, - ELF_NFPREG * sizeof(double))) + /* save general registers */ + if (save_general_regs(regs, frame)) return 1; #ifdef CONFIG_ALTIVEC @@ -354,8 +430,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, return 1; /* set MSR_VEC in the saved MSR value to indicate that frame->mc_vregs contains valid data */ - if (__put_user(regs->msr | MSR_VEC, &frame->mc_gregs[PT_MSR])) - return 1; + msr |= MSR_VEC; } /* else assert((regs->msr & MSR_VEC) == 0) */ @@ -367,7 +442,22 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) return 1; #endif /* CONFIG_ALTIVEC */ - + if (copy_fpr_to_user(&frame->mc_fregs, current)) + return 1; +#ifdef CONFIG_VSX + /* + * Copy VSR 0-31 upper half from thread_struct to local + * buffer, then write that to userspace. Also set MSR_VSX in + * the saved MSR value to indicate that frame->mc_vregs + * contains valid data + */ + if (current->thread.used_vsr) { + __giveup_vsx(current); + if (copy_vsx_to_user(&frame->mc_vsregs, current)) + return 1; + msr |= MSR_VSX; + } +#endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* save spe registers */ if (current->thread.used_spe) { @@ -377,8 +467,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, return 1; /* set MSR_SPE in the saved MSR value to indicate that frame->mc_vregs contains valid data */ - if (__put_user(regs->msr | MSR_SPE, &frame->mc_gregs[PT_MSR])) - return 1; + msr |= MSR_SPE; } /* else assert((regs->msr & MSR_SPE) == 0) */ @@ -387,6 +476,8 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, return 1; #endif /* CONFIG_SPE */ + if (__put_user(msr, &frame->mc_gregs[PT_MSR])) + return 1; if (sigret) { /* Set up the sigreturn trampoline: li r0,sigret; sc */ if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) @@ -409,6 +500,9 @@ static long restore_user_regs(struct pt_regs *regs, long err; unsigned int save_r2 = 0; unsigned long msr; +#ifdef CONFIG_VSX + int i; +#endif /* * restore general registers but not including MSR or SOFTE. Also @@ -436,16 +530,11 @@ static long restore_user_regs(struct pt_regs *regs, */ discard_lazy_cpu_state(); - /* force the process to reload the FP registers from - current->thread when it next does FP instructions */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); - if (__copy_from_user(current->thread.fpr, &sr->mc_fregs, - sizeof(sr->mc_fregs))) - return 1; - #ifdef CONFIG_ALTIVEC - /* force the process to reload the altivec registers from - current->thread when it next does altivec instructions */ + /* + * Force the process to reload the altivec registers from + * current->thread when it next does altivec instructions + */ regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ @@ -459,6 +548,31 @@ static long restore_user_regs(struct pt_regs *regs, if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32])) return 1; #endif /* CONFIG_ALTIVEC */ + if (copy_fpr_from_user(current, &sr->mc_fregs)) + return 1; + +#ifdef CONFIG_VSX + /* + * Force the process to reload the VSX registers from + * current->thread when it next does VSX instruction. + */ + regs->msr &= ~MSR_VSX; + if (msr & MSR_VSX) { + /* + * Restore altivec registers from the stack to a local + * buffer, then write this out to the thread_struct + */ + if (copy_vsx_from_user(current, &sr->mc_vsregs)) + return 1; + } else if (current->thread.used_vsr) + for (i = 0; i < 32 ; i++) + current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; +#endif /* CONFIG_VSX */ + /* + * force the process to reload the FP registers from + * current->thread when it next does FP instructions + */ + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); #ifdef CONFIG_SPE /* force the process to reload the spe registers from @@ -823,12 +937,42 @@ long sys_swapcontext(struct ucontext __user *old_ctx, { unsigned char tmp; +#ifdef CONFIG_PPC64 + unsigned long new_msr = 0; + + if (new_ctx && + __get_user(new_msr, &new_ctx->uc_mcontext.mc_gregs[PT_MSR])) + return -EFAULT; + /* + * Check that the context is not smaller than the original + * size (with VMX but without VSX) + */ + if (ctx_size < UCONTEXTSIZEWITHOUTVSX) + return -EINVAL; + /* + * If the new context state sets the MSR VSX bits but + * it doesn't provide VSX state. + */ + if ((ctx_size < sizeof(struct ucontext)) && + (new_msr & MSR_VSX)) + return -EINVAL; +#ifdef CONFIG_VSX + /* + * If userspace doesn't provide enough room for VSX data, + * but current thread has used VSX, we don't have anywhere + * to store the full context back into. + */ + if ((ctx_size < sizeof(struct ucontext)) && + (current->thread.used_vsr && old_ctx)) + return -EINVAL; +#endif +#else /* Context size is for future use. Right now, we only make sure * we are passed something we understand */ if (ctx_size < sizeof(struct ucontext)) return -EINVAL; - +#endif if (old_ctx != NULL) { struct mcontext __user *mctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index da7c058e3731..65ad925c3a8f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -112,11 +112,29 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, #else /* CONFIG_ALTIVEC */ err |= __put_user(0, &sc->v_regs); #endif /* CONFIG_ALTIVEC */ + flush_fp_to_thread(current); + /* copy fpr regs and fpscr */ + err |= copy_fpr_to_user(&sc->fp_regs, current); +#ifdef CONFIG_VSX + /* + * Copy VSX low doubleword to local buffer for formatting, + * then out to userspace. Update v_regs to point after the + * VMX data. + */ + if (current->thread.used_vsr) { + __giveup_vsx(current); + v_regs += ELF_NVRREG; + err |= copy_vsx_to_user(v_regs, current); + /* set MSR_VSX in the MSR value in the frame to + * indicate that sc->vs_reg) contains valid data. + */ + msr |= MSR_VSX; + } +#endif /* CONFIG_VSX */ err |= __put_user(&sc->gp_regs, &sc->regs); WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); err |= __put_user(msr, &sc->gp_regs[PT_MSR]); - err |= __copy_to_user(&sc->fp_regs, ¤t->thread.fpr, FP_REGS_SIZE); err |= __put_user(signr, &sc->signal); err |= __put_user(handler, &sc->handler); if (set != NULL) @@ -137,29 +155,32 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, #endif unsigned long err = 0; unsigned long save_r13 = 0; - elf_greg_t *gregs = (elf_greg_t *)regs; unsigned long msr; +#ifdef CONFIG_VSX int i; +#endif /* If this is not a signal return, we preserve the TLS in r13 */ if (!sig) save_r13 = regs->gpr[13]; - /* copy everything before MSR */ - err |= __copy_from_user(regs, &sc->gp_regs, - PT_MSR*sizeof(unsigned long)); - + /* copy the GPRs */ + err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr)); + err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]); /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); if (sig) regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); - + err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]); + err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]); + err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]); + err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]); + err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]); /* skip SOFTE */ - for (i = PT_MSR+1; i <= PT_RESULT; i++) { - if (i == PT_SOFTE) - continue; - err |= __get_user(gregs[i], &sc->gp_regs[i]); - } + err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); + err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); + err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); + err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); if (!sig) regs->gpr[13] = save_r13; @@ -180,9 +201,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, * This has to be done before copying stuff into current->thread.fpr/vr * for the reasons explained in the previous comment. */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC); - - err |= __copy_from_user(¤t->thread.fpr, &sc->fp_regs, FP_REGS_SIZE); + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); #ifdef CONFIG_ALTIVEC err |= __get_user(v_regs, &sc->v_regs); @@ -202,7 +221,23 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, else current->thread.vrsave = 0; #endif /* CONFIG_ALTIVEC */ + /* restore floating point */ + err |= copy_fpr_from_user(current, &sc->fp_regs); +#ifdef CONFIG_VSX + /* + * Get additional VSX data. Update v_regs to point after the + * VMX data. Copy VSX low doubleword from userspace to local + * buffer for formatting, then into the taskstruct. + */ + v_regs += ELF_NVRREG; + if ((msr & MSR_VSX) != 0) + err |= copy_vsx_from_user(current, v_regs); + else + for (i = 0; i < 32 ; i++) + current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; +#else +#endif return err; } @@ -233,6 +268,13 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) } /* + * Userspace code may pass a ucontext which doesn't include VSX added + * at the end. We need to check for this case. + */ +#define UCONTEXTSIZEWITHOUTVSX \ + (sizeof(struct ucontext) - 32*sizeof(long)) + +/* * Handle {get,set,swap}_context operations */ int sys_swapcontext(struct ucontext __user *old_ctx, @@ -241,13 +283,34 @@ int sys_swapcontext(struct ucontext __user *old_ctx, { unsigned char tmp; sigset_t set; + unsigned long new_msr = 0; - /* Context size is for future use. Right now, we only make sure - * we are passed something we understand + if (new_ctx && + __get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR])) + return -EFAULT; + /* + * Check that the context is not smaller than the original + * size (with VMX but without VSX) */ - if (ctx_size < sizeof(struct ucontext)) + if (ctx_size < UCONTEXTSIZEWITHOUTVSX) return -EINVAL; - + /* + * If the new context state sets the MSR VSX bits but + * it doesn't provide VSX state. + */ + if ((ctx_size < sizeof(struct ucontext)) && + (new_msr & MSR_VSX)) + return -EINVAL; +#ifdef CONFIG_VSX + /* + * If userspace doesn't provide enough room for VSX data, + * but current thread has used VSX, we don't have anywhere + * to store the full context back into. + */ + if ((ctx_size < sizeof(struct ucontext)) && + (current->thread.used_vsr && old_ctx)) + return -EINVAL; +#endif if (old_ctx != NULL) { if (!access_ok(VERIFY_WRITE, old_ctx, sizeof(*old_ctx)) || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 1457aa0a08f1..5337ca7bb649 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -41,6 +41,7 @@ #include <asm/smp.h> #include <asm/time.h> #include <asm/machdep.h> +#include <asm/cputhreads.h> #include <asm/cputable.h> #include <asm/system.h> #include <asm/mpic.h> @@ -62,22 +63,20 @@ struct thread_info *secondary_ti; cpumask_t cpu_possible_map = CPU_MASK_NONE; cpumask_t cpu_online_map = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; +DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(cpu_possible_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; static volatile unsigned int cpu_callin_map[NR_CPUS]; -void smp_call_function_interrupt(void); - int smt_enabled_at_boot = 1; -static int ipi_fail_ok; - static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; #ifdef CONFIG_PPC64 @@ -99,12 +98,15 @@ void smp_message_recv(int msg) { switch(msg) { case PPC_MSG_CALL_FUNCTION: - smp_call_function_interrupt(); + generic_smp_call_function_interrupt(); break; case PPC_MSG_RESCHEDULE: /* XXX Do we have to do this? */ set_need_resched(); break; + case PPC_MSG_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; case PPC_MSG_DEBUGGER_BREAK: if (crash_ipi_function_ptr) { crash_ipi_function_ptr(get_irq_regs()); @@ -128,6 +130,19 @@ void smp_send_reschedule(int cpu) smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); } +void arch_send_call_function_single_ipi(int cpu) +{ + smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE); +} + +void arch_send_call_function_ipi(cpumask_t mask) +{ + unsigned int cpu; + + for_each_cpu_mask(cpu, mask) + smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION); +} + #ifdef CONFIG_DEBUGGER void smp_send_debugger_break(int cpu) { @@ -154,223 +169,13 @@ static void stop_this_cpu(void *dummy) ; } -/* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. - * Stolen from the i386 version. - */ -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); - -static struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -} *call_data; - -/* delay of at least 8 seconds */ -#define SMP_CALL_TIMEOUT 8 - -/* - * These functions send a 'generic call function' IPI to other online - * CPUS in the system. - * - * [SUMMARY] Run a function on other CPUs. - * <func> The function to run. This must be fast and non-blocking. - * <info> An arbitrary pointer to pass to the function. - * <nonatomic> currently unused. - * <wait> If true, wait (atomically) until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. Does not return until - * remote CPUs are nearly ready to execute <<func>> or are or have executed. - * <map> is a cpu map of the cpus to send IPI to. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -static int __smp_call_function_map(void (*func) (void *info), void *info, - int nonatomic, int wait, cpumask_t map) -{ - struct call_data_struct data; - int ret = -1, num_cpus; - int cpu; - u64 timeout; - - if (unlikely(smp_ops == NULL)) - return ret; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - /* remove 'self' from the map */ - if (cpu_isset(smp_processor_id(), map)) - cpu_clear(smp_processor_id(), map); - - /* sanity check the map, remove any non-online processors. */ - cpus_and(map, map, cpu_online_map); - - num_cpus = cpus_weight(map); - if (!num_cpus) - goto done; - - call_data = &data; - smp_wmb(); - /* Send a message to all CPUs in the map */ - for_each_cpu_mask(cpu, map) - smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION); - - timeout = get_tb() + (u64) SMP_CALL_TIMEOUT * tb_ticks_per_sec; - - /* Wait for indication that they have received the message */ - while (atomic_read(&data.started) != num_cpus) { - HMT_low(); - if (get_tb() >= timeout) { - printk("smp_call_function on cpu %d: other cpus not " - "responding (%d)\n", smp_processor_id(), - atomic_read(&data.started)); - if (!ipi_fail_ok) - debugger(NULL); - goto out; - } - } - - /* optionally wait for the CPUs to complete */ - if (wait) { - while (atomic_read(&data.finished) != num_cpus) { - HMT_low(); - if (get_tb() >= timeout) { - printk("smp_call_function on cpu %d: other " - "cpus not finishing (%d/%d)\n", - smp_processor_id(), - atomic_read(&data.finished), - atomic_read(&data.started)); - debugger(NULL); - goto out; - } - } - } - - done: - ret = 0; - - out: - call_data = NULL; - HMT_medium(); - return ret; -} - -static int __smp_call_function(void (*func)(void *info), void *info, - int nonatomic, int wait) -{ - int ret; - spin_lock(&call_lock); - ret =__smp_call_function_map(func, info, nonatomic, wait, - cpu_online_map); - spin_unlock(&call_lock); - return ret; -} - -int smp_call_function(void (*func) (void *info), void *info, int nonatomic, - int wait) -{ - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - return __smp_call_function(func, info, nonatomic, wait); -} -EXPORT_SYMBOL(smp_call_function); - -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - cpumask_t map = CPU_MASK_NONE; - int ret = 0; - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - if (!cpu_online(cpu)) - return -EINVAL; - - cpu_set(cpu, map); - if (cpu != get_cpu()) { - spin_lock(&call_lock); - ret = __smp_call_function_map(func, info, nonatomic, wait, map); - spin_unlock(&call_lock); - } else { - local_irq_disable(); - func(info); - local_irq_enable(); - } - put_cpu(); - return ret; -} -EXPORT_SYMBOL(smp_call_function_single); - void smp_send_stop(void) { - int nolock; - - /* It's OK to fail sending the IPI, since the alternative is to - * be stuck forever waiting on the other CPU to take the interrupt. - * - * It's better to at least continue and go through reboot, since this - * function is usually called at panic or reboot time in the first - * place. - */ - ipi_fail_ok = 1; - - /* Don't deadlock in case we got called through panic */ - nolock = !spin_trylock(&call_lock); - __smp_call_function_map(stop_this_cpu, NULL, 1, 0, cpu_online_map); - if (!nolock) - spin_unlock(&call_lock); -} - -void smp_call_function_interrupt(void) -{ - void (*func) (void *info); - void *info; - int wait; - - /* call_data will be NULL if the sender timed out while - * waiting on us to receive the call. - */ - if (!call_data) - return; - - func = call_data->func; - info = call_data->info; - wait = call_data->wait; - - if (!wait) - smp_mb__before_atomic_inc(); - - /* - * Notify initiating CPU that I've grabbed the data and am - * about to execute the function - */ - atomic_inc(&call_data->started); - /* - * At this point the info structure may be out of scope unless wait==1 - */ - (*func)(info); - if (wait) { - smp_mb__before_atomic_inc(); - atomic_inc(&call_data->finished); - } + smp_call_function(stop_this_cpu, NULL, 0); } -extern struct gettimeofday_struct do_gtod; - struct thread_info *current_set[NR_CPUS]; -DECLARE_PER_CPU(unsigned int, pvr); - static void __devinit smp_store_cpu_info(int id) { per_cpu(pvr, id) = mfspr(SPRN_PVR); @@ -426,6 +231,8 @@ void __devinit smp_prepare_boot_cpu(void) BUG_ON(smp_processor_id() != boot_cpuid); cpu_set(boot_cpuid, cpu_online_map); + cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid)); + cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid)); #ifdef CONFIG_PPC64 paca[boot_cpuid].__current = current; #endif @@ -573,11 +380,60 @@ int __cpuinit __cpu_up(unsigned int cpu) return 0; } +/* Return the value of the reg property corresponding to the given + * logical cpu. + */ +int cpu_to_core_id(int cpu) +{ + struct device_node *np; + const int *reg; + int id = -1; + + np = of_get_cpu_node(cpu, NULL); + if (!np) + goto out; + + reg = of_get_property(np, "reg", NULL); + if (!reg) + goto out; + + id = *reg; +out: + of_node_put(np); + return id; +} + +/* Must be called when no change can occur to cpu_present_map, + * i.e. during cpu online or offline. + */ +static struct device_node *cpu_to_l2cache(int cpu) +{ + struct device_node *np; + const phandle *php; + phandle ph; + + if (!cpu_present(cpu)) + return NULL; + + np = of_get_cpu_node(cpu, NULL); + if (np == NULL) + return NULL; + + php = of_get_property(np, "l2-cache", NULL); + if (php == NULL) + return NULL; + ph = *php; + of_node_put(np); + + return of_find_node_by_phandle(ph); +} /* Activate a secondary processor. */ int __devinit start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); + struct device_node *l2_cache; + int i, base; atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; @@ -596,9 +452,36 @@ int __devinit start_secondary(void *unused) secondary_cpu_time_init(); - spin_lock(&call_lock); + ipi_call_lock(); cpu_set(cpu, cpu_online_map); - spin_unlock(&call_lock); + /* Update sibling maps */ + base = cpu_first_thread_in_core(cpu); + for (i = 0; i < threads_per_core; i++) { + if (cpu_is_offline(base + i)) + continue; + cpu_set(cpu, per_cpu(cpu_sibling_map, base + i)); + cpu_set(base + i, per_cpu(cpu_sibling_map, cpu)); + + /* cpu_core_map should be a superset of + * cpu_sibling_map even if we don't have cache + * information, so update the former here, too. + */ + cpu_set(cpu, per_cpu(cpu_core_map, base +i)); + cpu_set(base + i, per_cpu(cpu_core_map, cpu)); + } + l2_cache = cpu_to_l2cache(cpu); + for_each_online_cpu(i) { + struct device_node *np = cpu_to_l2cache(i); + if (!np) + continue; + if (np == l2_cache) { + cpu_set(cpu, per_cpu(cpu_core_map, i)); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + } + of_node_put(np); + } + of_node_put(l2_cache); + ipi_call_unlock(); local_irq_enable(); @@ -635,10 +518,42 @@ void __init smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { - if (smp_ops->cpu_disable) - return smp_ops->cpu_disable(); + struct device_node *l2_cache; + int cpu = smp_processor_id(); + int base, i; + int err; - return -ENOSYS; + if (!smp_ops->cpu_disable) + return -ENOSYS; + + err = smp_ops->cpu_disable(); + if (err) + return err; + + /* Update sibling maps */ + base = cpu_first_thread_in_core(cpu); + for (i = 0; i < threads_per_core; i++) { + cpu_clear(cpu, per_cpu(cpu_sibling_map, base + i)); + cpu_clear(base + i, per_cpu(cpu_sibling_map, cpu)); + cpu_clear(cpu, per_cpu(cpu_core_map, base +i)); + cpu_clear(base + i, per_cpu(cpu_core_map, cpu)); + } + + l2_cache = cpu_to_l2cache(cpu); + for_each_present_cpu(i) { + struct device_node *np = cpu_to_l2cache(i); + if (!np) + continue; + if (np == l2_cache) { + cpu_clear(cpu, per_cpu(cpu_core_map, i)); + cpu_clear(i, per_cpu(cpu_core_map, cpu)); + } + of_node_put(np); + } + of_node_put(l2_cache); + + + return 0; } void __cpu_die(unsigned int cpu) diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c index 67d6f6890edc..c906c4bf6835 100644 --- a/arch/powerpc/kernel/softemu8xx.c +++ b/arch/powerpc/kernel/softemu8xx.c @@ -124,7 +124,7 @@ int Soft_emulate_8xx(struct pt_regs *regs) disp = instword & 0xffff; ea = (u32 *)(regs->gpr[idxreg] + disp); - ip = (u32 *)¤t->thread.fpr[flreg]; + ip = (u32 *)¤t->thread.TS_FPR(flreg); switch ( inst ) { @@ -168,7 +168,7 @@ int Soft_emulate_8xx(struct pt_regs *regs) break; case FMR: /* assume this is a fp move -- Cort */ - memcpy(ip, ¤t->thread.fpr[(instword>>11)&0x1f], + memcpy(ip, ¤t->thread.TS_FPR((instword>>11)&0x1f), sizeof(double)); break; default: diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 3cf0d94ba340..b0dbb1daa4df 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -10,34 +10,34 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/module.h> #include <linux/sched.h> #include <linux/stacktrace.h> -#include <linux/module.h> #include <asm/ptrace.h> +#include <asm/processor.h> /* * Save stack-backtrace addresses into a stack_trace buffer. */ -void save_stack_trace(struct stack_trace *trace) +static void save_context_stack(struct stack_trace *trace, unsigned long sp, + struct task_struct *tsk, int savesched) { - unsigned long sp; - - asm("mr %0,1" : "=r" (sp)); - for (;;) { unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) return; newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; - if (!trace->skip) - trace->entries[trace->nr_entries++] = ip; - else - trace->skip--; + if (savesched || !in_sched_functions(ip)) { + if (!trace->skip) + trace->entries[trace->nr_entries++] = ip; + else + trace->skip--; + } if (trace->nr_entries >= trace->max_entries) return; @@ -45,4 +45,19 @@ void save_stack_trace(struct stack_trace *trace) sp = newsp; } } + +void save_stack_trace(struct stack_trace *trace) +{ + unsigned long sp; + + asm("mr %0,1" : "=r" (sp)); + + save_context_stack(trace, sp, current, 1); +} EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + save_context_stack(trace, tsk->thread.ksp, tsk, 0); +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c index 8cee57107541..6fc6328dc626 100644 --- a/arch/powerpc/kernel/suspend.c +++ b/arch/powerpc/kernel/suspend.c @@ -7,6 +7,7 @@ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> */ +#include <linux/mm.h> #include <asm/page.h> /* References to section boundaries */ diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 4fe69ca24481..c04832c4a02e 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -143,6 +143,9 @@ static inline unsigned long do_mmap2(unsigned long addr, size_t len, struct file * file = NULL; unsigned long ret = -EINVAL; + if (!arch_validate_prot(prot)) + goto out; + if (shift) { if (off & ((1 << shift) - 1)) goto out; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c8127f832df0..56d172d16e56 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -22,13 +22,17 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); +static DEFINE_PER_CPU(struct kobject *, cache_toplevel); + /* SMT stuff */ #ifdef CONFIG_PPC_MULTIPLATFORM /* Time in microseconds we delay before sleeping in the idle loop */ DEFINE_PER_CPU(unsigned long, smt_snooze_delay) = { 100 }; -static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf, +static ssize_t store_smt_snooze_delay(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); @@ -44,7 +48,9 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf, return count; } -static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf) +static ssize_t show_smt_snooze_delay(struct sys_device *dev, + struct sysdev_attribute *attr, + char *buf) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); @@ -152,14 +158,17 @@ static unsigned long write_##NAME(unsigned long val) \ mtspr(ADDRESS, val); \ return 0; \ } \ -static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ +static ssize_t show_##NAME(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ { \ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ return sprintf(buf, "%lx\n", val); \ } \ static ssize_t __used \ - store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ + store_##NAME(struct sys_device *dev, struct sysdev_attribute *attr, \ + const char *buf, size_t count) \ { \ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ unsigned long val; \ @@ -290,8 +299,289 @@ static struct sysdev_attribute pa6t_attrs[] = { #endif /* CONFIG_DEBUG_KERNEL */ }; +struct cache_desc { + struct kobject kobj; + struct cache_desc *next; + const char *type; /* Instruction, Data, or Unified */ + u32 size; /* total cache size in KB */ + u32 line_size; /* in bytes */ + u32 nr_sets; /* number of sets */ + u32 level; /* e.g. 1, 2, 3... */ + u32 associativity; /* e.g. 8-way... 0 is fully associative */ +}; + +DEFINE_PER_CPU(struct cache_desc *, cache_desc); + +static struct cache_desc *kobj_to_cache_desc(struct kobject *k) +{ + return container_of(k, struct cache_desc, kobj); +} + +static void cache_desc_release(struct kobject *k) +{ + struct cache_desc *desc = kobj_to_cache_desc(k); + + pr_debug("%s: releasing %s\n", __func__, kobject_name(k)); + + if (desc->next) + kobject_put(&desc->next->kobj); + + kfree(kobj_to_cache_desc(k)); +} + +static ssize_t cache_desc_show(struct kobject *k, struct attribute *attr, char *buf) +{ + struct kobj_attribute *kobj_attr; + + kobj_attr = container_of(attr, struct kobj_attribute, attr); + + return kobj_attr->show(k, kobj_attr, buf); +} + +static struct sysfs_ops cache_desc_sysfs_ops = { + .show = cache_desc_show, +}; + +static struct kobj_type cache_desc_type = { + .release = cache_desc_release, + .sysfs_ops = &cache_desc_sysfs_ops, +}; + +static ssize_t cache_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + struct cache_desc *cache = kobj_to_cache_desc(k); + + return sprintf(buf, "%uK\n", cache->size); +} + +static struct kobj_attribute cache_size_attr = + __ATTR(size, 0444, cache_size_show, NULL); + +static ssize_t cache_line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + struct cache_desc *cache = kobj_to_cache_desc(k); + + return sprintf(buf, "%u\n", cache->line_size); +} + +static struct kobj_attribute cache_line_size_attr = + __ATTR(coherency_line_size, 0444, cache_line_size_show, NULL); + +static ssize_t cache_nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + struct cache_desc *cache = kobj_to_cache_desc(k); + + return sprintf(buf, "%u\n", cache->nr_sets); +} + +static struct kobj_attribute cache_nr_sets_attr = + __ATTR(number_of_sets, 0444, cache_nr_sets_show, NULL); + +static ssize_t cache_type_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + struct cache_desc *cache = kobj_to_cache_desc(k); + + return sprintf(buf, "%s\n", cache->type); +} + +static struct kobj_attribute cache_type_attr = + __ATTR(type, 0444, cache_type_show, NULL); + +static ssize_t cache_level_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + struct cache_desc *cache = kobj_to_cache_desc(k); + + return sprintf(buf, "%u\n", cache->level); +} + +static struct kobj_attribute cache_level_attr = + __ATTR(level, 0444, cache_level_show, NULL); + +static ssize_t cache_assoc_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + struct cache_desc *cache = kobj_to_cache_desc(k); + + return sprintf(buf, "%u\n", cache->associativity); +} + +static struct kobj_attribute cache_assoc_attr = + __ATTR(ways_of_associativity, 0444, cache_assoc_show, NULL); + +struct cache_desc_info { + const char *type; + const char *size_prop; + const char *line_size_prop; + const char *nr_sets_prop; +}; + +/* PowerPC Processor binding says the [di]-cache-* must be equal on + * unified caches, so just use d-cache properties. */ +static struct cache_desc_info ucache_info = { + .type = "Unified", + .size_prop = "d-cache-size", + .line_size_prop = "d-cache-line-size", + .nr_sets_prop = "d-cache-sets", +}; -static void register_cpu_online(unsigned int cpu) +static struct cache_desc_info dcache_info = { + .type = "Data", + .size_prop = "d-cache-size", + .line_size_prop = "d-cache-line-size", + .nr_sets_prop = "d-cache-sets", +}; + +static struct cache_desc_info icache_info = { + .type = "Instruction", + .size_prop = "i-cache-size", + .line_size_prop = "i-cache-line-size", + .nr_sets_prop = "i-cache-sets", +}; + +static struct cache_desc * __cpuinit create_cache_desc(struct device_node *np, struct kobject *parent, int index, int level, struct cache_desc_info *info) +{ + const u32 *cache_line_size; + struct cache_desc *new; + const u32 *cache_size; + const u32 *nr_sets; + int rc; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + rc = kobject_init_and_add(&new->kobj, &cache_desc_type, parent, + "index%d", index); + if (rc) + goto err; + + /* type */ + new->type = info->type; + rc = sysfs_create_file(&new->kobj, &cache_type_attr.attr); + WARN_ON(rc); + + /* level */ + new->level = level; + rc = sysfs_create_file(&new->kobj, &cache_level_attr.attr); + WARN_ON(rc); + + /* size */ + cache_size = of_get_property(np, info->size_prop, NULL); + if (cache_size) { + new->size = *cache_size / 1024; + rc = sysfs_create_file(&new->kobj, + &cache_size_attr.attr); + WARN_ON(rc); + } + + /* coherency_line_size */ + cache_line_size = of_get_property(np, info->line_size_prop, NULL); + if (cache_line_size) { + new->line_size = *cache_line_size; + rc = sysfs_create_file(&new->kobj, + &cache_line_size_attr.attr); + WARN_ON(rc); + } + + /* number_of_sets */ + nr_sets = of_get_property(np, info->nr_sets_prop, NULL); + if (nr_sets) { + new->nr_sets = *nr_sets; + rc = sysfs_create_file(&new->kobj, + &cache_nr_sets_attr.attr); + WARN_ON(rc); + } + + /* ways_of_associativity */ + if (new->nr_sets == 1) { + /* fully associative */ + new->associativity = 0; + goto create_assoc; + } + + if (new->nr_sets && new->size && new->line_size) { + /* If we have values for all of these we can derive + * the associativity. */ + new->associativity = + ((new->size * 1024) / new->nr_sets) / new->line_size; +create_assoc: + rc = sysfs_create_file(&new->kobj, + &cache_assoc_attr.attr); + WARN_ON(rc); + } + + return new; +err: + kfree(new); + return NULL; +} + +static bool cache_is_unified(struct device_node *np) +{ + return of_get_property(np, "cache-unified", NULL); +} + +static struct cache_desc * __cpuinit create_cache_index_info(struct device_node *np, struct kobject *parent, int index, int level) +{ + const phandle *next_cache_phandle; + struct device_node *next_cache; + struct cache_desc *new, **end; + + pr_debug("%s(node = %s, index = %d)\n", __func__, np->full_name, index); + + if (cache_is_unified(np)) { + new = create_cache_desc(np, parent, index, level, + &ucache_info); + } else { + new = create_cache_desc(np, parent, index, level, + &dcache_info); + if (new) { + index++; + new->next = create_cache_desc(np, parent, index, level, + &icache_info); + } + } + if (!new) + return NULL; + + end = &new->next; + while (*end) + end = &(*end)->next; + + next_cache_phandle = of_get_property(np, "l2-cache", NULL); + if (!next_cache_phandle) + goto out; + + next_cache = of_find_node_by_phandle(*next_cache_phandle); + if (!next_cache) + goto out; + + *end = create_cache_index_info(next_cache, parent, ++index, ++level); + + of_node_put(next_cache); +out: + return new; +} + +static void __cpuinit create_cache_info(struct sys_device *sysdev) +{ + struct kobject *cache_toplevel; + struct device_node *np = NULL; + int cpu = sysdev->id; + + cache_toplevel = kobject_create_and_add("cache", &sysdev->kobj); + if (!cache_toplevel) + return; + per_cpu(cache_toplevel, cpu) = cache_toplevel; + np = of_get_cpu_node(cpu, NULL); + if (np != NULL) { + per_cpu(cache_desc, cpu) = + create_cache_index_info(np, cache_toplevel, 0, 1); + of_node_put(np); + } + return; +} + +static void __cpuinit register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); struct sys_device *s = &c->sysdev; @@ -339,9 +629,33 @@ static void register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_DSCR)) sysdev_create_file(s, &attr_dscr); + + create_cache_info(s); } #ifdef CONFIG_HOTPLUG_CPU +static void remove_cache_info(struct sys_device *sysdev) +{ + struct kobject *cache_toplevel; + struct cache_desc *cache_desc; + int cpu = sysdev->id; + + cache_desc = per_cpu(cache_desc, cpu); + if (cache_desc != NULL) { + sysfs_remove_file(&cache_desc->kobj, &cache_size_attr.attr); + sysfs_remove_file(&cache_desc->kobj, &cache_line_size_attr.attr); + sysfs_remove_file(&cache_desc->kobj, &cache_type_attr.attr); + sysfs_remove_file(&cache_desc->kobj, &cache_level_attr.attr); + sysfs_remove_file(&cache_desc->kobj, &cache_nr_sets_attr.attr); + sysfs_remove_file(&cache_desc->kobj, &cache_assoc_attr.attr); + + kobject_put(&cache_desc->kobj); + } + cache_toplevel = per_cpu(cache_toplevel, cpu); + if (cache_toplevel != NULL) + kobject_put(cache_toplevel); +} + static void unregister_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -392,6 +706,8 @@ static void unregister_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_DSCR)) sysdev_remove_file(s, &attr_dscr); + + remove_cache_info(s); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -522,7 +838,8 @@ static void register_nodes(void) #endif /* Only valid if CPU is present. */ -static ssize_t show_physical_id(struct sys_device *dev, char *buf) +static ssize_t show_physical_id(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 368a4934f7ee..c3a56d65c5a9 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -192,7 +192,7 @@ static void tau_timeout_smp(unsigned long unused) /* schedule ourselves to be run again */ mod_timer(&tau_timer, jiffies + shrink_timer) ; - on_each_cpu(tau_timeout, NULL, 1, 0); + on_each_cpu(tau_timeout, NULL, 0); } /* @@ -234,7 +234,7 @@ int __init TAU_init(void) tau_timer.expires = jiffies + shrink_timer; add_timer(&tau_timer); - on_each_cpu(TAU_init_smp, NULL, 1, 0); + on_each_cpu(TAU_init_smp, NULL, 0); printk("Thermal assist unit "); #ifdef CONFIG_TAU_INT diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 73401e83739a..e2ee66b5831d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -129,7 +129,7 @@ static unsigned long __initdata iSeries_recal_titan; static signed long __initdata iSeries_recal_tb; /* Forward declaration is only needed for iSereis compiles */ -void __init clocksource_init(void); +static void __init clocksource_init(void); #endif #define XSEC_PER_SEC (1024*1024) @@ -150,8 +150,8 @@ u64 tb_to_xs; unsigned tb_to_us; #define TICKLEN_SCALE NTP_SCALE_SHIFT -u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */ -u64 ticklen_to_xs; /* 0.64 fraction */ +static u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */ +static u64 ticklen_to_xs; /* 0.64 fraction */ /* If last_tick_len corresponds to about 1/HZ seconds, then last_tick_len << TICKLEN_SHIFT will be about 2^63. */ @@ -164,7 +164,7 @@ static u64 tb_to_ns_scale __read_mostly; static unsigned tb_to_ns_shift __read_mostly; static unsigned long boot_tb __read_mostly; -struct gettimeofday_struct do_gtod; +static struct gettimeofday_struct do_gtod; extern struct timezone sys_tz; static long timezone_offset; @@ -322,7 +322,7 @@ void snapshot_timebases(void) { if (!cpu_has_feature(CPU_FTR_PURR)) return; - on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1); + on_each_cpu(snapshot_tb_and_purr, NULL, 1); } /* @@ -742,10 +742,6 @@ void __init generic_calibrate_decr(void) } #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) - /* Set the time base to zero */ - mtspr(SPRN_TBWL, 0); - mtspr(SPRN_TBWU, 0); - /* Clear any pending timer interrupts */ mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); @@ -832,7 +828,7 @@ void update_vsyscall_tz(void) ++vdso_data->tb_update_count; } -void __init clocksource_init(void) +static void __init clocksource_init(void) { struct clocksource *clock; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 4b5b7ff4f78b..81ccb8dd1a54 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -967,6 +967,20 @@ void altivec_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); } +void vsx_unavailable_exception(struct pt_regs *regs) +{ + if (user_mode(regs)) { + /* A user program has executed an vsx instruction, + but this kernel doesn't support vsx. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } + + printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); +} + void performance_monitor_exception(struct pt_regs *regs) { perf_irq(regs); @@ -1030,21 +1044,45 @@ void SoftwareEmulation(struct pt_regs *regs) #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) -void DebugException(struct pt_regs *regs, unsigned long debug_status) +void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) { if (debug_status & DBSR_IC) { /* instruction completion */ regs->msr &= ~MSR_DE; + + /* Disable instruction completion */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); + /* Clear the instruction completion event */ + mtspr(SPRN_DBSR, DBSR_IC); + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) { + return; + } + + if (debugger_sstep(regs)) + return; + if (user_mode(regs)) { current->thread.dbcr0 &= ~DBCR0_IC; + } + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { + regs->msr &= ~MSR_DE; + + if (user_mode(regs)) { + current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | + DBCR0_IDM); } else { - /* Disable instruction completion */ - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); - /* Clear the instruction completion event */ - mtspr(SPRN_DBSR, DBSR_IC); - if (debugger_sstep(regs)) - return; + /* Disable DAC interupts */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | + DBSR_DAC1W | DBCR0_IDM)); + + /* Clear the DAC event */ + mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W)); } - _exception(SIGTRAP, regs, TRAP_TRACE, 0); + /* Setup and send the trap to the handler */ + do_dabr(regs, mfspr(SPRN_DAC1), debug_status); } } #endif /* CONFIG_4xx || CONFIG_BOOKE */ @@ -1091,6 +1129,21 @@ void altivec_assist_exception(struct pt_regs *regs) } #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX +void vsx_assist_exception(struct pt_regs *regs) +{ + if (!user_mode(regs)) { + printk(KERN_EMERG "VSX assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel VSX assist exception", regs, SIGILL); + } + + flush_vsx_to_thread(current); + printk(KERN_INFO "VSX assist not supported at %lx\n", regs->nip); + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); +} +#endif /* CONFIG_VSX */ + #ifdef CONFIG_FSL_BOOKE void CacheLockingException(struct pt_regs *regs, unsigned long address, unsigned long error_code) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index ce245a850db2..65639a43e644 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -571,6 +571,11 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32, if (start64) do_feature_fixups(powerpc_firmware_features, start64, start64 + size64); + + start64 = find_section64(v64->hdr, "__lwsync_fixup", &size64); + if (start64) + do_lwsync_fixups(cur_cpu_spec->cpu_features, + start64, start64 + size64); #endif /* CONFIG_PPC64 */ start32 = find_section32(v32->hdr, "__ftr_fixup", &size32); @@ -585,6 +590,11 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32, start32, start32 + size32); #endif /* CONFIG_PPC64 */ + start32 = find_section32(v32->hdr, "__lwsync_fixup", &size32); + if (start32) + do_lwsync_fixups(cur_cpu_spec->cpu_features, + start32, start32 + size32); + return 0; } @@ -778,9 +788,7 @@ static int __init vdso_init(void) return 0; } -#ifdef CONFIG_PPC_MERGE arch_initcall(vdso_init); -#endif int in_gate_area_no_task(unsigned long addr) { diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 9352ab5200e5..be3b6a41dc09 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -24,7 +24,7 @@ SECTIONS . = ALIGN(16); .text : { - *(.text .stub .text.* .gnu.linkonce.t.*) + *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) } PROVIDE(__etext = .); PROVIDE(_etext = .); @@ -33,6 +33,9 @@ SECTIONS . = ALIGN(8); __ftr_fixup : { *(__ftr_fixup) } + . = ALIGN(8); + __lwsync_fixup : { *(__lwsync_fixup) } + #ifdef CONFIG_PPC64 . = ALIGN(8); __fw_ftr_fixup : { *(__fw_ftr_fixup) } diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 932b3fdb34b9..d0b2526dd38d 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -24,7 +24,7 @@ SECTIONS . = ALIGN(16); .text : { - *(.text .stub .text.* .gnu.linkonce.t.*) + *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) *(.sfpr .glink) } :text PROVIDE(__etext = .); @@ -35,6 +35,9 @@ SECTIONS __ftr_fixup : { *(__ftr_fixup) } . = ALIGN(8); + __lwsync_fixup : { *(__lwsync_fixup) } + + . = ALIGN(8); __fw_ftr_fixup : { *(__fw_ftr_fixup) } /* @@ -43,15 +46,15 @@ SECTIONS .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } .rodata1 : { *(.rodata1) } + .dynamic : { *(.dynamic) } :text :dynamic + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text .gcc_except_table : { *(.gcc_except_table) } + .rela.dyn ALIGN(8) : { *(.rela.dyn) } .opd ALIGN(8) : { KEEP (*(.opd)) } .got ALIGN(8) : { *(.got .toc) } - .rela.dyn ALIGN(8) : { *(.rela.dyn) } - - .dynamic : { *(.dynamic) } :text :dynamic _end = .; PROVIDE(end = .); diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index b77f8af7ddde..22a3c33fd751 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1,11 +1,12 @@ /* * IBM PowerPC Virtual I/O Infrastructure Support. * - * Copyright (c) 2003-2005 IBM Corp. + * Copyright (c) 2003,2008 IBM Corp. * Dave Engebretsen engebret@us.ibm.com * Santiago Leon santil@us.ibm.com * Hollis Blanchard <hollisb@us.ibm.com> * Stephen Rothwell + * Robert Jennings <rcjenn@us.ibm.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device = { /* fake "parent" device */ .dev.bus = &vio_bus_type, }; +#ifdef CONFIG_PPC_SMLPAR +/** + * vio_cmo_pool - A pool of IO memory for CMO use + * + * @size: The size of the pool in bytes + * @free: The amount of free memory in the pool + */ +struct vio_cmo_pool { + size_t size; + size_t free; +}; + +/* How many ms to delay queued balance work */ +#define VIO_CMO_BALANCE_DELAY 100 + +/* Portion out IO memory to CMO devices by this chunk size */ +#define VIO_CMO_BALANCE_CHUNK 131072 + +/** + * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement + * + * @vio_dev: struct vio_dev pointer + * @list: pointer to other devices on bus that are being tracked + */ +struct vio_cmo_dev_entry { + struct vio_dev *viodev; + struct list_head list; +}; + +/** + * vio_cmo - VIO bus accounting structure for CMO entitlement + * + * @lock: spinlock for entire structure + * @balance_q: work queue for balancing system entitlement + * @device_list: list of CMO-enabled devices requiring entitlement + * @entitled: total system entitlement in bytes + * @reserve: pool of memory from which devices reserve entitlement, incl. spare + * @excess: pool of excess entitlement not needed for device reserves or spare + * @spare: IO memory for device hotplug functionality + * @min: minimum necessary for system operation + * @desired: desired memory for system operation + * @curr: bytes currently allocated + * @high: high water mark for IO data usage + */ +struct vio_cmo { + spinlock_t lock; + struct delayed_work balance_q; + struct list_head device_list; + size_t entitled; + struct vio_cmo_pool reserve; + struct vio_cmo_pool excess; + size_t spare; + size_t min; + size_t desired; + size_t curr; + size_t high; +} vio_cmo; + +/** + * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows + */ +static int vio_cmo_num_OF_devs(void) +{ + struct device_node *node_vroot; + int count = 0; + + /* + * Count the number of vdevice entries with an + * ibm,my-dma-window OF property + */ + node_vroot = of_find_node_by_name(NULL, "vdevice"); + if (node_vroot) { + struct device_node *of_node; + struct property *prop; + + for_each_child_of_node(node_vroot, of_node) { + prop = of_find_property(of_node, "ibm,my-dma-window", + NULL); + if (prop) + count++; + } + } + of_node_put(node_vroot); + return count; +} + +/** + * vio_cmo_alloc - allocate IO memory for CMO-enable devices + * + * @viodev: VIO device requesting IO memory + * @size: size of allocation requested + * + * Allocations come from memory reserved for the devices and any excess + * IO memory available to all devices. The spare pool used to service + * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be + * made available. + * + * Return codes: + * 0 for successful allocation and -ENOMEM for a failure + */ +static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t reserve_free = 0; + size_t excess_free = 0; + int ret = -ENOMEM; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Determine the amount of free entitlement available in reserve */ + if (viodev->cmo.entitled > viodev->cmo.allocated) + reserve_free = viodev->cmo.entitled - viodev->cmo.allocated; + + /* If spare is not fulfilled, the excess pool can not be used. */ + if (vio_cmo.spare >= VIO_CMO_MIN_ENT) + excess_free = vio_cmo.excess.free; + + /* The request can be satisfied */ + if ((reserve_free + excess_free) >= size) { + vio_cmo.curr += size; + if (vio_cmo.curr > vio_cmo.high) + vio_cmo.high = vio_cmo.curr; + viodev->cmo.allocated += size; + size -= min(reserve_free, size); + vio_cmo.excess.free -= size; + ret = 0; + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return ret; +} + +/** + * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices + * @viodev: VIO device freeing IO memory + * @size: size of deallocation + * + * IO memory is freed by the device back to the correct memory pools. + * The spare pool is replenished first from either memory pool, then + * the reserve pool is used to reduce device entitlement, the excess + * pool is used to increase the reserve pool toward the desired entitlement + * target, and then the remaining memory is returned to the pools. + * + */ +static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t spare_needed = 0; + size_t excess_freed = 0; + size_t reserve_freed = size; + size_t tmp; + int balance = 0; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.curr -= size; + + /* Amount of memory freed from the excess pool */ + if (viodev->cmo.allocated > viodev->cmo.entitled) { + excess_freed = min(reserve_freed, (viodev->cmo.allocated - + viodev->cmo.entitled)); + reserve_freed -= excess_freed; + } + + /* Remove allocation from device */ + viodev->cmo.allocated -= (reserve_freed + excess_freed); + + /* Spare is a subset of the reserve pool, replenish it first. */ + spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare; + + /* + * Replenish the spare in the reserve pool from the excess pool. + * This moves entitlement into the reserve pool. + */ + if (spare_needed && excess_freed) { + tmp = min(excess_freed, spare_needed); + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + vio_cmo.spare += tmp; + excess_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Replenish the spare in the reserve pool from the reserve pool. + * This removes entitlement from the device down to VIO_CMO_MIN_ENT, + * if needed, and gives it to the spare pool. The amount of used + * memory in this pool does not change. + */ + if (spare_needed && reserve_freed) { + tmp = min(spare_needed, min(reserve_freed, + (viodev->cmo.entitled - + VIO_CMO_MIN_ENT))); + + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + reserve_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Increase the reserve pool until the desired allocation is met. + * Move an allocation freed from the excess pool into the reserve + * pool and schedule a balance operation. + */ + if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) { + tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size)); + + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + excess_freed -= tmp; + balance = 1; + } + + /* Return memory from the excess pool to that pool */ + if (excess_freed) + vio_cmo.excess.free += excess_freed; + + if (balance) + schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_entitlement_update - Manage system entitlement changes + * + * @new_entitlement: new system entitlement to attempt to accommodate + * + * Increases in entitlement will be used to fulfill the spare entitlement + * and the rest is given to the excess pool. Decreases, if they are + * possible, come from the excess pool and from unused device entitlement + * + * Returns: 0 on success, -ENOMEM when change can not be made + */ +int vio_cmo_entitlement_update(size_t new_entitlement) +{ + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail, delta, tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Entitlement increases */ + if (new_entitlement > vio_cmo.entitled) { + delta = new_entitlement - vio_cmo.entitled; + + /* Fulfill spare allocation */ + if (vio_cmo.spare < VIO_CMO_MIN_ENT) { + tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare)); + vio_cmo.spare += tmp; + vio_cmo.reserve.size += tmp; + delta -= tmp; + } + + /* Remaining new allocation goes to the excess pool */ + vio_cmo.entitled += delta; + vio_cmo.excess.size += delta; + vio_cmo.excess.free += delta; + + goto out; + } + + /* Entitlement decreases */ + delta = vio_cmo.entitled - new_entitlement; + avail = vio_cmo.excess.free; + + /* + * Need to check how much unused entitlement each device can + * sacrifice to fulfill entitlement change. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (avail >= delta) + break; + + viodev = dev_ent->viodev; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + avail += viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + } + + if (delta <= avail) { + vio_cmo.entitled -= delta; + + /* Take entitlement from the excess pool first */ + tmp = min(vio_cmo.excess.free, delta); + vio_cmo.excess.size -= tmp; + vio_cmo.excess.free -= tmp; + delta -= tmp; + + /* + * Remove all but VIO_CMO_MIN_ENT bytes from devices + * until entitlement change is served + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (!delta) + break; + + viodev = dev_ent->viodev; + tmp = 0; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + tmp = viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + viodev->cmo.entitled -= min(tmp, delta); + delta -= min(tmp, delta); + } + } else { + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + +out: + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_balance - Balance entitlement among devices + * + * @work: work queue structure for this operation + * + * Any system entitlement above the minimum needed for devices, or + * already allocated to devices, can be distributed to the devices. + * The list of devices is iterated through to recalculate the desired + * entitlement level and to determine how much entitlement above the + * minimum entitlement is allocated to devices. + * + * Small chunks of the available entitlement are given to devices until + * their requirements are fulfilled or there is no entitlement left to give. + * Upon completion sizes of the reserve and excess pools are calculated. + * + * The system minimum entitlement level is also recalculated here. + * Entitlement will be reserved for devices even after vio_bus_remove to + * accommodate reloading the driver. The OF tree is walked to count the + * number of devices present and this will remove entitlement for devices + * that have actually left the system after having vio_bus_remove called. + */ +static void vio_cmo_balance(struct work_struct *work) +{ + struct vio_cmo *cmo; + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail = 0, level, chunk, need; + int devcount = 0, fulfilled; + + cmo = container_of(work, struct vio_cmo, balance_q.work); + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Calculate minimum entitlement and fulfill spare */ + cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT; + BUG_ON(cmo->min > cmo->entitled); + cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min)); + cmo->min += cmo->spare; + cmo->desired = cmo->min; + + /* + * Determine how much entitlement is available and reset device + * entitlements + */ + avail = cmo->entitled - cmo->spare; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + devcount++; + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT); + avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT); + } + + /* + * Having provided each device with the minimum entitlement, loop + * over the devices portioning out the remaining entitlement + * until there is nothing left. + */ + level = VIO_CMO_MIN_ENT; + while (avail) { + fulfilled = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + + if (viodev->cmo.desired <= level) { + fulfilled++; + continue; + } + + /* + * Give the device up to VIO_CMO_BALANCE_CHUNK + * bytes of entitlement, but do not exceed the + * desired level of entitlement for the device. + */ + chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK); + chunk = min(chunk, (viodev->cmo.desired - + viodev->cmo.entitled)); + viodev->cmo.entitled += chunk; + + /* + * If the memory for this entitlement increase was + * already allocated to the device it does not come + * from the available pool being portioned out. + */ + need = max(viodev->cmo.allocated, viodev->cmo.entitled)- + max(viodev->cmo.allocated, level); + avail -= need; + + } + if (fulfilled == devcount) + break; + level += VIO_CMO_BALANCE_CHUNK; + } + + /* Calculate new reserve and excess pool sizes */ + cmo->reserve.size = cmo->min; + cmo->excess.free = 0; + cmo->excess.size = 0; + need = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + /* Calculated reserve size above the minimum entitlement */ + if (viodev->cmo.entitled) + cmo->reserve.size += (viodev->cmo.entitled - + VIO_CMO_MIN_ENT); + /* Calculated used excess entitlement */ + if (viodev->cmo.allocated > viodev->cmo.entitled) + need += viodev->cmo.allocated - viodev->cmo.entitled; + } + cmo->excess.size = cmo->entitled - cmo->reserve.size; + cmo->excess.free = cmo->excess.size - need; + + cancel_delayed_work(container_of(work, struct delayed_work, work)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + struct vio_dev *viodev = to_vio_dev(dev); + void *ret; + + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + atomic_inc(&viodev->cmo.allocs_failed); + return NULL; + } + + ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag); + if (unlikely(ret == NULL)) { + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + atomic_inc(&viodev->cmo.allocs_failed); + } + + return ret; +} + +static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + struct vio_dev *viodev = to_vio_dev(dev); + + dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle); + + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); +} + +static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + dma_addr_t ret = DMA_ERROR_CODE; + + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + atomic_inc(&viodev->cmo.allocs_failed); + return ret; + } + + ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs); + if (unlikely(dma_mapping_error(dev, ret))) { + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + atomic_inc(&viodev->cmo.allocs_failed); + } + + return ret; +} + +static void vio_dma_iommu_unmap_single(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + + dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs); + + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); +} + +static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct scatterlist *sgl; + int ret, count = 0; + size_t alloc_size = 0; + + for (sgl = sglist; count < nelems; count++, sgl++) + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE); + + if (vio_cmo_alloc(viodev, alloc_size)) { + atomic_inc(&viodev->cmo.allocs_failed); + return 0; + } + + ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs); + + if (unlikely(!ret)) { + vio_cmo_dealloc(viodev, alloc_size); + atomic_inc(&viodev->cmo.allocs_failed); + } + + for (sgl = sglist, count = 0; count < ret; count++, sgl++) + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + if (alloc_size) + vio_cmo_dealloc(viodev, alloc_size); + + return ret; +} + +static void vio_dma_iommu_unmap_sg(struct device *dev, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct scatterlist *sgl; + size_t alloc_size = 0; + int count = 0; + + for (sgl = sglist; count < nelems; count++, sgl++) + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + + dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); + + vio_cmo_dealloc(viodev, alloc_size); +} + +struct dma_mapping_ops vio_dma_mapping_ops = { + .alloc_coherent = vio_dma_iommu_alloc_coherent, + .free_coherent = vio_dma_iommu_free_coherent, + .map_single = vio_dma_iommu_map_single, + .unmap_single = vio_dma_iommu_unmap_single, + .map_sg = vio_dma_iommu_map_sg, + .unmap_sg = vio_dma_iommu_unmap_sg, +}; + +/** + * vio_cmo_set_dev_desired - Set desired entitlement for a device + * + * @viodev: struct vio_dev for device to alter + * @new_desired: new desired entitlement level in bytes + * + * For use by devices to request a change to their entitlement at runtime or + * through sysfs. The desired entitlement level is changed and a balancing + * of system resources is scheduled to run in the future. + */ +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) +{ + unsigned long flags; + struct vio_cmo_dev_entry *dev_ent; + int found = 0; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (desired < VIO_CMO_MIN_ENT) + desired = VIO_CMO_MIN_ENT; + + /* + * Changes will not be made for devices not in the device list. + * If it is not in the device list, then no driver is loaded + * for the device and it can not receive entitlement. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + found = 1; + break; + } + if (!found) + return; + + /* Increase/decrease in desired device entitlement */ + if (desired >= viodev->cmo.desired) { + /* Just bump the bus and device values prior to a balance*/ + vio_cmo.desired += desired - viodev->cmo.desired; + viodev->cmo.desired = desired; + } else { + /* Decrease bus and device values for desired entitlement */ + vio_cmo.desired -= viodev->cmo.desired - desired; + viodev->cmo.desired = desired; + /* + * If less entitlement is desired than current entitlement, move + * any reserve memory in the change region to the excess pool. + */ + if (viodev->cmo.entitled > desired) { + vio_cmo.reserve.size -= viodev->cmo.entitled - desired; + vio_cmo.excess.size += viodev->cmo.entitled - desired; + /* + * If entitlement moving from the reserve pool to the + * excess pool is currently unused, add to the excess + * free counter. + */ + if (viodev->cmo.allocated < viodev->cmo.entitled) + vio_cmo.excess.free += viodev->cmo.entitled - + max(viodev->cmo.allocated, desired); + viodev->cmo.entitled = desired; + } + } + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_bus_probe - Handle CMO specific bus probe activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Determine the devices IO memory entitlement needs, attempting + * to satisfy the system minimum entitlement at first and scheduling + * a balance operation to take care of the rest at a later time. + * + * Returns: 0 on success, -EINVAL when device doesn't support CMO, and + * -ENOMEM when entitlement is not available for device or + * device entry. + * + */ +static int vio_cmo_bus_probe(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + struct device *dev = &viodev->dev; + struct vio_driver *viodrv = to_vio_driver(dev->driver); + unsigned long flags; + size_t size; + + /* + * Check to see that device has a DMA window and configure + * entitlement for the device. + */ + if (of_get_property(viodev->dev.archdata.of_node, + "ibm,my-dma-window", NULL)) { + /* Check that the driver is CMO enabled and get desired DMA */ + if (!viodrv->get_desired_dma) { + dev_err(dev, "%s: device driver does not support CMO\n", + __func__); + return -EINVAL; + } + + viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev)); + if (viodev->cmo.desired < VIO_CMO_MIN_ENT) + viodev->cmo.desired = VIO_CMO_MIN_ENT; + size = VIO_CMO_MIN_ENT; + + dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry), + GFP_KERNEL); + if (!dev_ent) + return -ENOMEM; + + dev_ent->viodev = viodev; + spin_lock_irqsave(&vio_cmo.lock, flags); + list_add(&dev_ent->list, &vio_cmo.device_list); + } else { + viodev->cmo.desired = 0; + size = 0; + spin_lock_irqsave(&vio_cmo.lock, flags); + } + + /* + * If the needs for vio_cmo.min have not changed since they + * were last set, the number of devices in the OF tree has + * been constant and the IO memory for this is already in + * the reserve pool. + */ + if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) * + VIO_CMO_MIN_ENT)) { + /* Updated desired entitlement if device requires it */ + if (size) + vio_cmo.desired += (viodev->cmo.desired - + VIO_CMO_MIN_ENT); + } else { + size_t tmp; + + tmp = vio_cmo.spare + vio_cmo.excess.free; + if (tmp < size) { + dev_err(dev, "%s: insufficient free " + "entitlement to add device. " + "Need %lu, have %lu\n", __func__, + size, (vio_cmo.spare + tmp)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + + /* Use excess pool first to fulfill request */ + tmp = min(size, vio_cmo.excess.free); + vio_cmo.excess.free -= tmp; + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + + /* Use spare if excess pool was insufficient */ + vio_cmo.spare -= size - tmp; + + /* Update bus accounting */ + vio_cmo.min += size; + vio_cmo.desired += viodev->cmo.desired; + } + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_bus_remove - Handle CMO specific bus removal activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Remove the device from the cmo device list. The minimum entitlement + * will be reserved for the device as long as it is in the system. The + * rest of the entitlement the device had been allocated will be returned + * to the system. + */ +static void vio_cmo_bus_remove(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (viodev->cmo.allocated) { + dev_err(&viodev->dev, "%s: device had %lu bytes of IO " + "allocated after remove operation.\n", + __func__, viodev->cmo.allocated); + BUG(); + } + + /* + * Remove the device from the device list being maintained for + * CMO enabled devices. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + list_del(&dev_ent->list); + kfree(dev_ent); + break; + } + + /* + * Devices may not require any entitlement and they do not need + * to be processed. Otherwise, return the device's entitlement + * back to the pools. + */ + if (viodev->cmo.entitled) { + /* + * This device has not yet left the OF tree, it's + * minimum entitlement remains in vio_cmo.min and + * vio_cmo.desired + */ + vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT); + + /* + * Save min allocation for device in reserve as long + * as it exists in OF tree as determined by later + * balance operation + */ + viodev->cmo.entitled -= VIO_CMO_MIN_ENT; + + /* Replenish spare from freed reserve pool */ + if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) { + tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT - + vio_cmo.spare)); + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + } + + /* Remaining reserve goes to excess pool */ + vio_cmo.excess.size += viodev->cmo.entitled; + vio_cmo.excess.free += viodev->cmo.entitled; + vio_cmo.reserve.size -= viodev->cmo.entitled; + + /* + * Until the device is removed it will keep a + * minimum entitlement; this will guarantee that + * a module unload/load will result in a success. + */ + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + viodev->cmo.desired = VIO_CMO_MIN_ENT; + atomic_set(&viodev->cmo.allocs_failed, 0); + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) +{ + vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported; + viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops; +} + +/** + * vio_cmo_bus_init - CMO entitlement initialization at bus init time + * + * Set up the reserve and excess entitlement pools based on available + * system entitlement and the number of devices in the OF tree that + * require entitlement in the reserve pool. + */ +static void vio_cmo_bus_init(void) +{ + struct hvcall_mpp_data mpp_data; + int err; + + memset(&vio_cmo, 0, sizeof(struct vio_cmo)); + spin_lock_init(&vio_cmo.lock); + INIT_LIST_HEAD(&vio_cmo.device_list); + INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance); + + /* Get current system entitlement */ + err = h_get_mpp(&mpp_data); + + /* + * On failure, continue with entitlement set to 0, will panic() + * later when spare is reserved. + */ + if (err != H_SUCCESS) { + printk(KERN_ERR "%s: unable to determine system IO "\ + "entitlement. (%d)\n", __func__, err); + vio_cmo.entitled = 0; + } else { + vio_cmo.entitled = mpp_data.entitled_mem; + } + + /* Set reservation and check against entitlement */ + vio_cmo.spare = VIO_CMO_MIN_ENT; + vio_cmo.reserve.size = vio_cmo.spare; + vio_cmo.reserve.size += (vio_cmo_num_OF_devs() * + VIO_CMO_MIN_ENT); + if (vio_cmo.reserve.size > vio_cmo.entitled) { + printk(KERN_ERR "%s: insufficient system entitlement\n", + __func__); + panic("%s: Insufficient system entitlement", __func__); + } + + /* Set the remaining accounting variables */ + vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size; + vio_cmo.excess.free = vio_cmo.excess.size; + vio_cmo.min = vio_cmo.reserve.size; + vio_cmo.desired = vio_cmo.reserve.size; +} + +/* sysfs device functions and data structures for CMO */ + +#define viodev_cmo_rd_attr(name) \ +static ssize_t viodev_cmo_##name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \ +} + +static ssize_t viodev_cmo_allocs_failed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vio_dev *viodev = to_vio_dev(dev); + return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed)); +} + +static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + atomic_set(&viodev->cmo.allocs_failed, 0); + return count; +} + +static ssize_t viodev_cmo_desired_set(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + size_t new_desired; + int ret; + + ret = strict_strtoul(buf, 10, &new_desired); + if (ret) + return ret; + + vio_cmo_set_dev_desired(viodev, new_desired); + return count; +} + +viodev_cmo_rd_attr(desired); +viodev_cmo_rd_attr(entitled); +viodev_cmo_rd_attr(allocated); + +static ssize_t name_show(struct device *, struct device_attribute *, char *); +static ssize_t devspec_show(struct device *, struct device_attribute *, char *); +static struct device_attribute vio_cmo_dev_attrs[] = { + __ATTR_RO(name), + __ATTR_RO(devspec), + __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viodev_cmo_desired_show, viodev_cmo_desired_set), + __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL), + __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL), + __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset), + __ATTR_NULL +}; + +/* sysfs bus functions and data structures for CMO */ + +#define viobus_cmo_rd_attr(name) \ +static ssize_t \ +viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name); \ +} + +#define viobus_cmo_pool_rd_attr(name, var) \ +static ssize_t \ +viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name.var); \ +} + +static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf, + size_t count) +{ + unsigned long flags; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.high = vio_cmo.curr; + spin_unlock_irqrestore(&vio_cmo.lock, flags); + + return count; +} + +viobus_cmo_rd_attr(entitled); +viobus_cmo_pool_rd_attr(reserve, size); +viobus_cmo_pool_rd_attr(excess, size); +viobus_cmo_pool_rd_attr(excess, free); +viobus_cmo_rd_attr(spare); +viobus_cmo_rd_attr(min); +viobus_cmo_rd_attr(desired); +viobus_cmo_rd_attr(curr); +viobus_cmo_rd_attr(high); + +static struct bus_attribute vio_cmo_bus_attrs[] = { + __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL), + __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL), + __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL), + __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL), + __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL), + __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL), + __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL), + __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL), + __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viobus_cmo_high_show, viobus_cmo_high_reset), + __ATTR_NULL +}; + +static void vio_cmo_sysfs_init(void) +{ + vio_bus_type.dev_attrs = vio_cmo_dev_attrs; + vio_bus_type.bus_attrs = vio_cmo_bus_attrs; +} +#else /* CONFIG_PPC_SMLPAR */ +/* Dummy functions for iSeries platform */ +int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} +static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } +static void vio_cmo_bus_remove(struct vio_dev *viodev) {} +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {} +static void vio_cmo_bus_init(void) {} +static void vio_cmo_sysfs_init(void) { } +#endif /* CONFIG_PPC_SMLPAR */ +EXPORT_SYMBOL(vio_cmo_entitlement_update); +EXPORT_SYMBOL(vio_cmo_set_dev_desired); + static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) { const unsigned char *dma_window; @@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev) return error; id = vio_match_device(viodrv->id_table, viodev); - if (id) + if (id) { + memset(&viodev->cmo, 0, sizeof(viodev->cmo)); + if (firmware_has_feature(FW_FEATURE_CMO)) { + error = vio_cmo_bus_probe(viodev); + if (error) + return error; + } error = viodrv->probe(viodev, id); + if (error) + vio_cmo_bus_remove(viodev); + } return error; } @@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev) { struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); + struct device *devptr; + int ret = 1; + + /* + * Hold a reference to the device after the remove function is called + * to allow for CMO accounting cleanup for the device. + */ + devptr = get_device(dev); if (viodrv->remove) - return viodrv->remove(viodev); + ret = viodrv->remove(viodev); + + if (!ret && firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_remove(viodev); - /* driver can't remove */ - return 1; + put_device(devptr); + return ret; } /** @@ -215,7 +1226,11 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) viodev->unit_address = *unit_address; } viodev->dev.archdata.of_node = of_node_get(of_node); - viodev->dev.archdata.dma_ops = &dma_iommu_ops; + + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_set_dma_ops(viodev); + else + viodev->dev.archdata.dma_ops = &dma_iommu_ops; viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); viodev->dev.archdata.numa_node = of_node_to_nid(of_node); @@ -245,6 +1260,9 @@ static int __init vio_bus_init(void) int err; struct device_node *node_vroot; + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_sysfs_init(); + err = bus_register(&vio_bus_type); if (err) { printk(KERN_ERR "failed to register VIO bus\n"); @@ -262,6 +1280,9 @@ static int __init vio_bus_init(void) return err; } + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_init(); + node_vroot = of_find_node_by_name(NULL, "vdevice"); if (node_vroot) { struct device_node *of_node; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 53d57d17a894..4a8ce62fe112 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -9,6 +9,25 @@ ENTRY(_stext) +PHDRS { + kernel PT_LOAD FLAGS(7); /* RWX */ + notes PT_NOTE FLAGS(0); + dummy PT_NOTE FLAGS(0); + + /* binutils < 2.18 has a bug that makes it misbehave when taking an + ELF file with all segments at load address 0 as input. This + happens when running "strip" on vmlinux, because of the AT() magic + in this linker script. People using GCC >= 4.2 won't run into + this problem, because the "build-id" support will put some data + into the "notes" segment (at a non-zero load address). + + To work around this, we force some data into both the "dummy" + segment and the kernel segment, so the dummy segment will get a + non-zero load address. It's not enough to always create the + "notes" segment, since if nothing gets assigned to it, its load + address will be zero. */ +} + #ifdef CONFIG_PPC64 OUTPUT_ARCH(powerpc:common64) jiffies = jiffies_64; @@ -35,7 +54,7 @@ SECTIONS ALIGN_FUNCTION(); *(.text.head) _text = .; - *(.text .fixup .text.init.refok .exit.text.refok) + *(.text .fixup .text.init.refok .exit.text.refok __ftr_alt_*) SCHED_TEXT LOCK_TEXT KPROBES_TEXT @@ -50,7 +69,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); _etext = .; PROVIDE32 (etext = .); - } + } :kernel /* Read-only data */ RODATA @@ -62,7 +81,13 @@ SECTIONS __stop___ex_table = .; } - NOTES + NOTES :kernel :notes + + /* The dummy segment contents for the bug workaround mentioned above + near PHDRS. */ + .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) { + LONG(0xf177) + } :kernel :dummy /* * Init sections discarded at runtime @@ -74,7 +99,7 @@ SECTIONS _sinittext = .; INIT_TEXT _einittext = .; - } + } :kernel /* .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table @@ -125,6 +150,12 @@ SECTIONS *(__ftr_fixup) __stop___ftr_fixup = .; } + . = ALIGN(8); + __lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) { + __start___lwsync_fixup = .; + *(__lwsync_fixup) + __stop___lwsync_fixup = .; + } #ifdef CONFIG_PPC64 . = ALIGN(8); __fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) { |