diff options
Diffstat (limited to 'arch/powerpc/kernel')
81 files changed, 4803 insertions, 2664 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2da380fcc34c..1925341dbb9c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -14,6 +14,11 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) @@ -31,8 +36,7 @@ obj-y := cputable.o ptrace.o syscalls.o \ process.o systbl.o idle.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ - udbg.o misc.o io.o dma.o \ - misc_$(CONFIG_WORD_SIZE).o \ + udbg.o misc.o io.o dma.o misc_$(BITS).o \ of_platform.o prom_parse.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ @@ -42,12 +46,11 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o -obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o -obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o +obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o @@ -71,31 +74,27 @@ obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o ifeq ($(CONFIG_FSL_BOOKE),y) obj-$(CONFIG_HIBERNATION) += swsusp_booke.o else -obj-$(CONFIG_HIBERNATION) += swsusp_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_HIBERNATION) += swsusp_$(BITS).o endif obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o -obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_MODULES) += module.o module_$(BITS).o obj-$(CONFIG_44x) += cpu_setup_44x.o obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o obj-$(CONFIG_PPC_DOORBELL) += dbell.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -extra-y := head_$(CONFIG_WORD_SIZE).o +extra-y := head_$(BITS).o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds -obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o +obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o -obj-$(CONFIG_MODULES) += ppc_ksyms.o -ifeq ($(CONFIG_PPC32),y) -obj-$(CONFIG_MODULES) += ppc_ksyms_32.o -endif obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o @@ -105,11 +104,11 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o -obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ +obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ - machine_kexec_$(CONFIG_WORD_SIZE).o + machine_kexec_$(BITS).o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 8e7cb8e2b21a..033f3385fa49 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -26,6 +26,7 @@ #include <asm/emulated_ops.h> #include <asm/switch_to.h> #include <asm/disassemble.h> +#include <asm/cpu_has_feature.h> struct aligninfo { unsigned char len; @@ -228,9 +229,7 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) #else #define REG_BYTE(rp, i) *((u8 *)(rp) + (i)) #endif -#endif - -#ifdef __LITTLE_ENDIAN__ +#else #define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3)))) #endif @@ -875,6 +874,20 @@ int fix_alignment(struct pt_regs *regs) return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize); } #endif + + /* + * ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment + * check. + * + * Send a SIGBUS to the process that caused the fault. + * + * We do not emulate these because paste may contain additional metadata + * when pasting to a co-processor. Furthermore, paste_last is the + * synchronisation point for preceding copy/paste sequences. + */ + if ((instruction & 0xfc0006fe) == PPC_INST_COPY) + return -EIO; + /* A size of 0 indicates an instruction we don't support, with * the exception of DCBZ which is handled as a special case here */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9ea09551a2cd..caec7bf3b99a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -68,17 +68,18 @@ #include "../mm/mmu_decl.h" #endif +#ifdef CONFIG_PPC_8xx +#include <asm/fixmap.h> +#endif + int main(void) { DEFINE(THREAD, offsetof(struct task_struct, thread)); DEFINE(MM, offsetof(struct task_struct, mm)); DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); #ifdef CONFIG_PPC64 - DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); - DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); - DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit)); DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); @@ -132,17 +133,6 @@ int main(void) DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - DEFINE(THREAD_TAR, offsetof(struct thread_struct, tar)); - DEFINE(THREAD_BESCR, offsetof(struct thread_struct, bescr)); - DEFINE(THREAD_EBBHR, offsetof(struct thread_struct, ebbhr)); - DEFINE(THREAD_EBBRR, offsetof(struct thread_struct, ebbrr)); - DEFINE(THREAD_SIAR, offsetof(struct thread_struct, siar)); - DEFINE(THREAD_SDAR, offsetof(struct thread_struct, sdar)); - DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); - DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); - DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); -#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); @@ -152,12 +142,12 @@ int main(void) DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr)); DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr)); DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); - DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct, - transact_vr)); - DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct, - transact_vrsave)); - DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct, - transact_fp)); + DEFINE(THREAD_CKVRSTATE, offsetof(struct thread_struct, + ckvr_state)); + DEFINE(THREAD_CKVRSAVE, offsetof(struct thread_struct, + ckvrsave)); + DEFINE(THREAD_CKFPSTATE, offsetof(struct thread_struct, + ckfp_state)); /* Local pt_regs on stack for Transactional Memory funcs. */ DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); @@ -178,7 +168,6 @@ int main(void) DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); - DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); @@ -255,13 +244,28 @@ int main(void) DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default)); - DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); - DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); - DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); - DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); + DEFINE(ACCOUNT_STARTTIME, + offsetof(struct paca_struct, accounting.starttime)); + DEFINE(ACCOUNT_STARTTIME_USER, + offsetof(struct paca_struct, accounting.starttime_user)); + DEFINE(ACCOUNT_USER_TIME, + offsetof(struct paca_struct, accounting.user_time)); + DEFINE(ACCOUNT_SYSTEM_TIME, + offsetof(struct paca_struct, accounting.system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso)); +#else /* CONFIG_PPC64 */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + DEFINE(ACCOUNT_STARTTIME, + offsetof(struct thread_info, accounting.starttime)); + DEFINE(ACCOUNT_STARTTIME_USER, + offsetof(struct thread_info, accounting.starttime_user)); + DEFINE(ACCOUNT_USER_TIME, + offsetof(struct thread_info, accounting.user_time)); + DEFINE(ACCOUNT_SYSTEM_TIME, + offsetof(struct thread_info, accounting.system_time)); +#endif #endif /* CONFIG_PPC64 */ /* RTAS */ @@ -275,12 +279,6 @@ int main(void) /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); - - /* hcall statistics */ - DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats)); - DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls)); - DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total)); - DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total)); #endif /* CONFIG_PPC64 */ DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); @@ -298,23 +296,6 @@ int main(void) DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); #ifndef CONFIG_PPC64 DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14])); - DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15])); - DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16])); - DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17])); - DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18])); - DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19])); - DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20])); - DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21])); - DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22])); - DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23])); - DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24])); - DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25])); - DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26])); - DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27])); - DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28])); - DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29])); - DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30])); - DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31])); #endif /* CONFIG_PPC64 */ /* * Note: these symbols include _ because they overlap with special @@ -332,7 +313,6 @@ int main(void) DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); #ifndef CONFIG_PPC64 - DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq)); /* * The PowerPC 400-class & Book-E processors have neither the DAR * nor the DSISR SPRs. Hence, we overload them to hold the similar @@ -369,8 +349,6 @@ int main(void) DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); #endif #endif - DEFINE(CLONE_VM, CLONE_VM); - DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); #ifndef CONFIG_PPC64 DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); @@ -380,7 +358,6 @@ int main(void) DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore)); - DEFINE(CPU_DOWN_FLUSH, offsetof(struct cpu_spec, cpu_down_flush)); DEFINE(pbe_address, offsetof(struct pbe, address)); DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); @@ -395,7 +372,6 @@ int main(void) DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp)); DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec)); DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs)); - DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec)); DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count)); DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); @@ -517,7 +493,6 @@ int main(void) DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); - DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); @@ -528,11 +503,9 @@ int main(void) DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu)); #endif #ifdef CONFIG_PPC_BOOK3S - DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); - DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb)); DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); @@ -566,7 +539,6 @@ int main(void) DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); - DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr)); DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); @@ -576,7 +548,6 @@ int main(void) DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); - DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); @@ -585,6 +556,7 @@ int main(void) DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes)); + DEFINE(VCORE_VTB, offsetof(struct kvmppc_vcore, vtb)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); @@ -693,7 +665,6 @@ int main(void) DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr)); DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar)); DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar)); - DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size)); DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap)); DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped)); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ @@ -756,7 +727,6 @@ int main(void) #ifdef CONFIG_KVM_BOOKE_HV DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4)); DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6)); - DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc)); #endif #ifdef CONFIG_KVM_EXIT_TIMING @@ -783,5 +753,9 @@ int main(void) DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); +#ifdef CONFIG_PPC_8xx + DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f8cd9fba4d35..c5e5a94d9892 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -156,7 +156,7 @@ setup_7410_workarounds: blr /* 740/750/7400/7410 - * Enable Store Gathering (SGE), Address Brodcast (ABE), + * Enable Store Gathering (SGE), Address Broadcast (ABE), * Branch History Table (BHTE), Branch Target ICache (BTIC) * Dynamic Power Management (DPM), Speculative (SPD) * Clear Instruction cache throttling (ICTC) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 584e119fa8b0..52ff3f025437 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -51,6 +51,7 @@ _GLOBAL(__setup_cpu_power8) mflr r11 bl __init_FSCR bl __init_PMU + bl __init_PMU_ISA207 bl __init_hvmode_206 mtlr r11 beqlr @@ -62,6 +63,7 @@ _GLOBAL(__setup_cpu_power8) bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV + bl __init_PMU_HV_ISA207 mtlr r11 blr @@ -69,6 +71,7 @@ _GLOBAL(__restore_cpu_power8) mflr r11 bl __init_FSCR bl __init_PMU + bl __init_PMU_ISA207 mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 @@ -81,12 +84,14 @@ _GLOBAL(__restore_cpu_power8) bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV + bl __init_PMU_HV_ISA207 mtlr r11 blr _GLOBAL(__setup_cpu_power9) mflr r11 bl __init_FSCR + bl __init_PMU bl __init_hvmode_206 mtlr r11 beqlr @@ -94,15 +99,18 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH + ori r3, r3, LPCR_HVICE bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 + bl __init_PMU_HV mtlr r11 blr _GLOBAL(__restore_cpu_power9) mflr r11 bl __init_FSCR + bl __init_PMU mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 @@ -111,9 +119,11 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH + ori r3, r3, LPCR_HVICE bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 + bl __init_PMU_HV mtlr r11 blr @@ -208,14 +218,22 @@ __init_tlb_power9: __init_PMU_HV: li r5,0 mtspr SPRN_MMCRC,r5 + blr + +__init_PMU_HV_ISA207: + li r5,0 mtspr SPRN_MMCRH,r5 blr __init_PMU: li r5,0 - mtspr SPRN_MMCRS,r5 mtspr SPRN_MMCRA,r5 mtspr SPRN_MMCR0,r5 mtspr SPRN_MMCR1,r5 mtspr SPRN_MMCR2,r5 blr + +__init_PMU_ISA207: + li r5,0 + mtspr SPRN_MMCRS,r5 + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index eeeacf6235a3..6a82ef039c50 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -15,6 +15,7 @@ #include <linux/threads.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/jump_label.h> #include <asm/oprofile_impl.h> #include <asm/cputable.h> @@ -137,7 +138,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (gp)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4, + .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -152,7 +153,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4+ (gq)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4, + .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -505,6 +506,25 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power9 DD1*/ + .pvr_mask = 0xffffff00, + .pvr_value = 0x004e0100, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD1, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power9", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .flush_tlb = __flush_tlb_power9, + .platform = "power9", + }, { /* Power9 */ .pvr_mask = 0xffff0000, .pvr_value = 0x004e0000, @@ -1228,6 +1248,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTR_TYPE_8xx, .icache_bsize = 16, .dcache_bsize = 16, + .machine_check = machine_check_8xx, .platform = "ppc823", }, #endif /* CONFIG_8xx */ @@ -2224,3 +2245,39 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) return NULL; } + +#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS +struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = { + [0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT +}; +EXPORT_SYMBOL_GPL(cpu_feature_keys); + +void __init cpu_feature_keys_init(void) +{ + int i; + + for (i = 0; i < NUM_CPU_FTR_KEYS; i++) { + unsigned long f = 1ul << i; + + if (!(cur_cpu_spec->cpu_features & f)) + static_branch_disable(&cpu_feature_keys[i]); + } +} + +struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS] = { + [0 ... NUM_MMU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT +}; +EXPORT_SYMBOL_GPL(mmu_feature_keys); + +void __init mmu_feature_keys_init(void) +{ + int i; + + for (i = 0; i < NUM_MMU_FTR_KEYS; i++) { + unsigned long f = 1ul << i; + + if (!(cur_cpu_spec->mmu_features & f)) + static_branch_disable(&mmu_feature_keys[i]); + } +} +#endif diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 2bb252c01f07..47b63de81f9b 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -48,8 +48,8 @@ int crashing_cpu = -1; static int time_to_dump; #define CRASH_HANDLER_MAX 3 -/* NULL terminated list of shutdown handles */ -static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; +/* List of shutdown handles */ +static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; static DEFINE_SPINLOCK(crash_handlers_lock); static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; @@ -65,7 +65,7 @@ static int handle_fault(struct pt_regs *regs) #ifdef CONFIG_SMP static atomic_t cpus_in_crash; -void crash_ipi_callback(struct pt_regs *regs) +static void crash_ipi_callback(struct pt_regs *regs) { static cpumask_t cpus_state_saved = CPU_MASK_NONE; @@ -288,9 +288,14 @@ int crash_shutdown_unregister(crash_shutdown_t handler) rc = 1; } else { /* Shift handles down */ - for (; crash_shutdown_handles[i]; i++) + for (; i < (CRASH_HANDLER_MAX - 1); i++) crash_shutdown_handles[i] = crash_shutdown_handles[i+1]; + /* + * Reset last entry to NULL now that it has been shifted down, + * this will allow new handles to be added here. + */ + crash_shutdown_handles[i] = NULL; rc = 0; } @@ -346,7 +351,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; crash_shutdown_cpu = smp_processor_id(); - for (i = 0; crash_shutdown_handles[i]; i++) { + for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* * Insert syncs and delay to ensure diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 41a7d9d49a5a..fb7cbaa37658 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -18,7 +18,7 @@ */ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, dma_handle, dev->coherent_dma_mask, flag, @@ -27,7 +27,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, static void dma_iommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); } @@ -40,7 +40,7 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size, static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, size, device_to_mask(dev), direction, attrs); @@ -49,7 +49,7 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, attrs); @@ -58,7 +58,7 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, device_to_mask(dev), direction, attrs); @@ -66,7 +66,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, attrs); diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 3f1472a78f39..e64a6016fba7 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -64,7 +64,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask) void *__dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret; #ifdef CONFIG_NOT_COHERENT_CACHE @@ -121,7 +121,7 @@ void *__dma_direct_alloc_coherent(struct device *dev, size_t size, void __dma_direct_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { #ifdef CONFIG_NOT_COHERENT_CACHE __dma_free_coherent(size, vaddr); @@ -132,7 +132,7 @@ void __dma_direct_free_coherent(struct device *dev, size_t size, static void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu_table *iommu; @@ -156,7 +156,7 @@ static void *dma_direct_alloc_coherent(struct device *dev, size_t size, static void dma_direct_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu_table *iommu; @@ -177,7 +177,7 @@ static void dma_direct_free_coherent(struct device *dev, size_t size, int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t handle, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long pfn; @@ -195,7 +195,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -211,7 +211,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { } @@ -232,7 +232,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { BUG_ON(dir == DMA_NONE); __dma_sync_page(page, offset, size, dir); @@ -243,7 +243,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c9bc78e9c610..f25731627d7f 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -116,6 +116,7 @@ struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); +EXPORT_SYMBOL_GPL(confirm_error_lock); /* Lock to protect passed flags */ static DEFINE_MUTEX(eeh_dev_mutex); @@ -168,10 +169,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) int n = 0, l = 0; char buffer[128]; - n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", + n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", edev->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); - pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", + pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", edev->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); @@ -1044,7 +1045,7 @@ int eeh_init(void) if (eeh_enabled()) pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); else - pr_warn("EEH: No capable adapters found\n"); + pr_info("EEH: No capable adapters found\n"); return ret; } @@ -1502,6 +1503,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option) break; case EEH_OPT_THAW_MMIO: case EEH_OPT_THAW_DMA: + case EEH_OPT_FREEZE_PE: if (!eeh_ops || !eeh_ops->set_option) { ret = -ENOENT; break; diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index ddbcfab7efdf..d4cc26618809 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -114,9 +114,9 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) while (n) { struct pci_io_addr_range *piar; piar = rb_entry(n, struct pci_io_addr_range, rb_node); - pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n", + pr_debug("PCI: %s addr range %d [%pap-%pap]: %s\n", (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, - piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); + &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); cnt++; n = rb_next(n); } @@ -159,8 +159,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo, piar->flags = flags; #ifdef DEBUG - pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n", - alo, ahi, pci_name(dev)); + pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n", + &alo, &ahi, pci_name(dev)); #endif rb_link_node(&piar->rb_node, parent, p); diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index 7815095fe3d8..d6b2ca70d14d 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -44,14 +44,13 @@ /** * eeh_dev_init - Create EEH device according to OF node * @pdn: PCI device node - * @data: PHB * * It will create EEH device according to the given OF node. The function * might be called by PCI emunation, DR, PHB hotplug. */ -void *eeh_dev_init(struct pci_dn *pdn, void *data) +struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) { - struct pci_controller *phb = data; + struct pci_controller *phb = pdn->phb; struct eeh_dev *edev; /* Allocate EEH device */ @@ -69,7 +68,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) INIT_LIST_HEAD(&edev->list); INIT_LIST_HEAD(&edev->rmv_list); - return NULL; + return edev; } /** @@ -81,16 +80,8 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) */ void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { - struct pci_dn *root = phb->pci_data; - /* EEH PE for PHB */ eeh_phb_pe_create(phb); - - /* EEH device for PHB */ - eeh_dev_init(root, phb); - - /* EEH devices for children OF nodes */ - traverse_pci_dn(root, eeh_dev_init, phb); } /** @@ -106,8 +97,6 @@ static int __init eeh_dev_phb_init(void) list_for_each_entry_safe(phb, tmp, &hose_list, list_node) eeh_dev_phb_init_dynamic(phb); - pr_info("EEH: devices created\n"); - return 0; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2714a3b81d24..a62be72da274 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -139,7 +139,7 @@ static void eeh_enable_irq(struct pci_dev *dev) * into it. * * That's just wrong.The warning in the core code is - * there to tell people to fix their assymetries in + * there to tell people to fix their asymmetries in * their own code, not by abusing the core information * to avoid it. * @@ -642,13 +642,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) { eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); pci_hp_remove_devices(bus); pci_unlock_rescan_remove(); } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); + eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } /* @@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - if (pe->type & EEH_PE_VF) + if (pe->type & EEH_PE_VF) { eeh_add_virt_device(edev, NULL); - else + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_hp_add_devices(bus); + } } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); @@ -992,9 +993,17 @@ static void eeh_handle_special_event(void) /* Notify all devices to be down */ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - bus = eeh_pe_bus_get(phb_pe); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); + bus = eeh_pe_bus_get(phb_pe); + if (!bus) { + pr_err("%s: Cannot find PCI bus for " + "PHB#%d-PE#%x\n", + __func__, + pe->phb->global_number, + pe->addr); + break; + } pci_hp_remove_devices(bus); } pci_unlock_rescan_remove(); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f0520da85759..de7d091c4c31 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -581,6 +581,7 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) { eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); } +EXPORT_SYMBOL_GPL(eeh_pe_state_mark); static void *__eeh_pe_dev_mode_mark(void *data, void *flag) { diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 2405631e91a2..3841d749a430 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -33,6 +33,7 @@ #include <asm/unistd.h> #include <asm/ftrace.h> #include <asm/ptrace.h> +#include <asm/export.h> /* * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. @@ -175,6 +176,12 @@ transfer_to_handler: addi r12,r12,-1 stw r12,4(r11) #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + CURRENT_THREAD_INFO(r9, r1) + tophys(r9, r9) + ACCOUNT_CPU_USER_ENTRY(r9, r11, r12) +#endif + b 3f 2: /* if from kernel, check interrupted DOZE/NAP mode and @@ -398,6 +405,13 @@ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) stwcx. r0,0,r1 /* to clear the reservation */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + andi. r4,r8,MSR_PR + beq 3f + CURRENT_THREAD_INFO(r4, r1) + ACCOUNT_CPU_USER_EXIT(r4, r5, r7) +3: +#endif lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -641,7 +655,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) #endif /* CONFIG_SMP */ tophys(r0,r4) - CLR_TOP32(r0) mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */ lwz r1,KSP(r4) /* Load new stack pointer */ @@ -769,6 +782,10 @@ restore_user: andis. r10,r0,DBCR0_IDM@h bnel- load_dbcr0 #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + CURRENT_THREAD_INFO(r9, r1) + ACCOUNT_CPU_USER_EXIT(r9, r10, r11) +#endif b restore @@ -1342,6 +1359,7 @@ _GLOBAL(_mcount) MCOUNT_RESTORE_FRAME bctr #endif +EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_stub) blr diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 73e461a3dfbb..6432d4bf08c8 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -38,6 +38,7 @@ #include <asm/context_tracking.h> #include <asm/tm.h> #include <asm/ppc-opcode.h> +#include <asm/export.h> /* * System calls. @@ -72,7 +73,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) std r0,GPR0(r1) std r10,GPR1(r1) beq 2f /* if from kernel mode */ - ACCOUNT_CPU_USER_ENTRY(r10, r11) + ACCOUNT_CPU_USER_ENTRY(r13, r10, r11) 2: std r2,GPR2(r1) std r3,GPR3(r1) mfcr r2 @@ -139,7 +140,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #ifdef CONFIG_PPC_BOOK3E wrteei 1 #else - ld r11,PACAKMSR(r13) + li r11,MSR_RI ori r11,r11,MSR_EE mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -195,7 +196,6 @@ system_call: /* label this so stack traces look sane */ #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - ld r10,PACAKMSR(r13) /* * For performance reasons we clear RI the same time that we * clear EE. We only need to clear RI just before we restore r13 @@ -203,8 +203,7 @@ system_call: /* label this so stack traces look sane */ * We have to be careful to restore RI if we branch anywhere from * here (eg syscall_exit_work). */ - li r9,MSR_RI - andc r11,r10,r9 + li r11,0 mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -221,13 +220,12 @@ system_call: /* label this so stack traces look sane */ #endif 2: addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_BOOK3S + li r10,MSR_RI mtmsrd r10,1 /* Restore RI */ #endif bl restore_math #ifdef CONFIG_PPC_BOOK3S - ld r10,PACAKMSR(r13) - li r9,MSR_RI - andc r11,r10,r9 /* Re-clear RI */ + li r11,0 mtmsrd r11,1 #endif ld r8,_MSR(r1) @@ -246,7 +244,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r4,_LINK(r1) beq- 1f - ACCOUNT_CPU_USER_EXIT(r11, r12) + ACCOUNT_CPU_USER_EXIT(r13, r11, r12) BEGIN_FTR_SECTION HMT_MEDIUM_LOW @@ -308,6 +306,7 @@ syscall_enosys: syscall_exit_work: #ifdef CONFIG_PPC_BOOK3S + li r10,MSR_RI mtmsrd r10,1 /* Restore RI */ #endif /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. @@ -354,7 +353,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) #ifdef CONFIG_PPC_BOOK3E wrteei 1 #else - ld r10,PACAKMSR(r13) + li r10,MSR_RI ori r10,r10,MSR_EE mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -368,13 +367,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) tabort_syscall: /* Firstly we need to enable TM in the kernel */ mfmsr r10 - li r13, 1 - rldimi r10, r13, MSR_TM_LG, 63-MSR_TM_LG + li r9, 1 + rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG mtmsrd r10, 0 /* tabort, this dooms the transaction, nothing else */ - li r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) - TABORT(R13) + li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) + TABORT(R9) /* * Return directly to userspace. We have corrupted user register state, @@ -382,8 +381,8 @@ tabort_syscall: * resume after the tbegin of the aborted transaction with the * checkpointed register state. */ - li r13, MSR_RI - andc r10, r10, r13 + li r9, MSR_RI + andc r10, r10, r9 mtmsrd r10, 1 mtspr SPRN_SRR0, r11 mtspr SPRN_SRR1, r12 @@ -453,7 +452,7 @@ _GLOBAL(ret_from_kernel_thread) REST_NVGPRS(r1) mtlr r14 mr r3,r15 -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 mr r12,r14 #endif blrl @@ -532,7 +531,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) #ifdef CONFIG_PPC_STD_MMU_64 BEGIN_MMU_FTR_SECTION b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) BEGIN_FTR_SECTION clrrdi r6,r8,28 /* get its ESID */ clrrdi r9,r1,28 /* get current sp ESID */ @@ -619,7 +618,7 @@ _GLOBAL(ret_from_except_lite) #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ + li r10,MSR_RI mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ @@ -751,7 +750,7 @@ resume_kernel: #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ + li r10,MSR_RI mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ #endif /* CONFIG_PREEMPT */ @@ -841,8 +840,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * userspace and we take an exception after restoring r13, * we end up corrupting the userspace r13 value. */ - ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */ - andc r4,r4,r0 /* r0 contains MSR_RI here */ + li r4,0 mtmsrd r4,1 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -859,7 +857,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) BEGIN_FTR_SECTION mtspr SPRN_PPR,r2 /* Restore PPR */ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - ACCOUNT_CPU_USER_EXIT(r2, r4) + ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) 1: mtspr SPRN_SRR1,r3 @@ -1180,6 +1178,7 @@ _GLOBAL(enter_prom) #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) +EXPORT_SYMBOL(_mcount) mflr r12 mtctr r12 mtlr r0 @@ -1416,6 +1415,7 @@ livepatch_handler: #else _GLOBAL_TOC(_mcount) +EXPORT_SYMBOL(_mcount) /* Taken from output of objdump from lib64/glibc */ mflr r3 ld r11, 0(r1) diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 9f1ebf7338f1..52ca2471ee1a 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -16,6 +16,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-compat.h> #include <asm/asm-offsets.h> +#include <asm/export.h> #ifndef CONFIG_PPC64 /* epapr_ev_idle() was derived from e500_idle() */ @@ -53,3 +54,4 @@ epapr_hypercall_start: nop nop blr +EXPORT_SYMBOL(epapr_hypercall_start) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 488e6314f993..38a1f96430e1 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -386,7 +386,7 @@ exc_##n##_common: \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \ + ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \ 2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ mfspr r5,scratch; /* get back r13 */ \ @@ -453,7 +453,7 @@ exc_##n##_bad_stack: \ sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \ b bad_stack_book3e; /* bad stack error */ -/* WARNING: If you change the layout of this stub, make sure you chcek +/* WARNING: If you change the layout of this stub, make sure you check * the debug exception handler which handles single stepping * into exceptions from userspace, and the MM code in * arch/powerpc/mm/tlb_nohash.c which patches the branch here @@ -1059,7 +1059,7 @@ fast_exception_return: andi. r6,r10,MSR_PR REST_2GPRS(6, r1) beq 1f - ACCOUNT_CPU_USER_EXIT(r10, r11) + ACCOUNT_CPU_USER_EXIT(r13, r10, r11) ld r0,GPR13(r1) 1: stdcx. r0,0,r1 /* to clear the reservation */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4c9440629128..f129408c6022 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -16,72 +16,71 @@ #include <asm/exception-64s.h> #include <asm/ptrace.h> #include <asm/cpuidle.h> +#include <asm/head-64.h> /* + * There are a few constraints to be concerned with. + * - Real mode exceptions code/data must be located at their physical location. + * - Virtual mode exceptions must be mapped at their 0xc000... location. + * - Fixed location code must not call directly beyond the __end_interrupts + * area when built with CONFIG_RELOCATABLE. LOAD_HANDLER / bctr sequence + * must be used. + * - LOAD_HANDLER targets must be within first 64K of physical 0 / + * virtual 0xc00... + * - Conditional branch targets must be within +/-32K of caller. + * + * "Virtual exceptions" run with relocation on (MSR_IR=1, MSR_DR=1), and + * therefore don't have to run in physically located code or rfid to + * virtual mode kernel code. However on relocatable kernels they do have + * to branch to KERNELBASE offset because the rest of the kernel (outside + * the exception vectors) may be located elsewhere. + * + * Virtual exceptions correspond with physical, except their entry points + * are offset by 0xc000000000000000 and also tend to get an added 0x4000 + * offset applied. Virtual exceptions are enabled with the Alternate + * Interrupt Location (AIL) bit set in the LPCR. However this does not + * guarantee they will be delivered virtually. Some conditions (see the ISA) + * cause exceptions to be delivered in real mode. + * + * It's impossible to receive interrupts below 0x300 via AIL. + * + * KVM: None of the virtual exceptions are from the guest. Anything that + * escalated to HV=1 from HV=0 is delivered via real mode handlers. + * + * * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code - * 0x0100 - 0x17ff : pSeries Interrupt prologs - * 0x1800 - 0x4000 : interrupt support common interrupt prologs - * 0x4000 - 0x5fff : pSeries interrupts with IR=1,DR=1 - * 0x6000 - 0x6fff : more interrupt support including for IR=1,DR=1 + * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors + * 0x1900 - 0x3fff : Real mode trampolines + * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors + * 0x5900 - 0x6fff : Relon mode trampolines * 0x7000 - 0x7fff : FWNMI data area - * 0x8000 - 0x8fff : Initial (CPU0) segment table - * 0x9000 - : Early init and support code + * 0x8000 - .... : Common interrupt handlers, remaining early + * setup code, rest of kernel. + * + * We could reclaim 0x4000-0x42ff for real mode trampolines if the space + * is necessary. Until then it's more consistent to explicitly put VIRT_NONE + * vectors there. */ - /* Syscall routine is used twice, in reloc-off and reloc-on paths */ -#define SYSCALL_PSERIES_1 \ -BEGIN_FTR_SECTION \ - cmpdi r0,0x1ebe ; \ - beq- 1f ; \ -END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ - mr r9,r13 ; \ - GET_PACA(r13) ; \ - mfspr r11,SPRN_SRR0 ; \ -0: - -#define SYSCALL_PSERIES_2_RFID \ - mfspr r12,SPRN_SRR1 ; \ - ld r10,PACAKBASE(r13) ; \ - LOAD_HANDLER(r10, system_call_entry) ; \ - mtspr SPRN_SRR0,r10 ; \ - ld r10,PACAKMSR(r13) ; \ - mtspr SPRN_SRR1,r10 ; \ - rfid ; \ - b . ; /* prevent speculative execution */ - -#define SYSCALL_PSERIES_3 \ - /* Fast LE/BE switch system call */ \ -1: mfspr r12,SPRN_SRR1 ; \ - xori r12,r12,MSR_LE ; \ - mtspr SPRN_SRR1,r12 ; \ - rfid ; /* return to userspace */ \ - b . ; /* prevent speculative execution */ - -#if defined(CONFIG_RELOCATABLE) - /* - * We can't branch directly so we do it via the CTR which - * is volatile across system calls. - */ -#define SYSCALL_PSERIES_2_DIRECT \ - mflr r10 ; \ - ld r12,PACAKBASE(r13) ; \ - LOAD_HANDLER(r12, system_call_entry) ; \ - mtctr r12 ; \ - mfspr r12,SPRN_SRR1 ; \ - /* Re-use of r13... No spare regs to do this */ \ - li r13,MSR_RI ; \ - mtmsrd r13,1 ; \ - GET_PACA(r13) ; /* get r13 back */ \ - bctr ; +OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900) +OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000) +OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900) +OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000) +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +/* + * Data area reserved for FWNMI option. + * This address (0x7000) is fixed by the RPA. + * pseries and powernv need to keep the whole page from + * 0x7000 to 0x8000 free for use by the firmware + */ +ZERO_FIXED_SECTION(fwnmi_page, 0x7000, 0x8000) +OPEN_TEXT_SECTION(0x8000) #else - /* We can branch directly */ -#define SYSCALL_PSERIES_2_DIRECT \ - mfspr r12,SPRN_SRR1 ; \ - li r10,MSR_RI ; \ - mtmsrd r10,1 ; /* Set RI (EE=0) */ \ - b system_call_common ; +OPEN_TEXT_SECTION(0x7000) #endif +USE_FIXED_SECTION(real_vectors) + /* * This is the start of the interrupt handlers for pSeries * This code runs with relocation off. @@ -90,12 +89,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ * Therefore any relative branches in this section must only * branch to labels in this section. */ - . = 0x100 .globl __start_interrupts __start_interrupts: - .globl system_reset_pSeries; -system_reset_pSeries: +/* No virt vectors corresponding with 0x0..0x100 */ +EXC_VIRT_NONE(0x4000, 0x4100) + +EXC_REAL_BEGIN(system_reset, 0x100, 0x200) SET_SCRATCH0(r13) #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION @@ -107,25 +107,9 @@ BEGIN_FTR_SECTION beq 9f cmpwi cr3,r13,2 - - /* - * Check if last bit of HSPGR0 is set. This indicates whether we are - * waking up from winkle. - */ GET_PACA(r13) - clrldi r5,r13,63 - clrrdi r13,r13,1 - cmpwi cr4,r5,1 - mtspr SPRN_HSPRG0,r13 - - lbz r0,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,8f /* Either sleep or Winkle */ - - /* Waking up from nap should not cause hypervisor state loss */ - bgt cr3,. + bl pnv_restore_hyp_resource - /* Waking up from nap */ li r0,PNV_THREAD_RUNNING stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ @@ -143,297 +127,53 @@ BEGIN_FTR_SECTION /* Return SRR1 from power7_nap() */ mfspr r3,SPRN_SRR1 - beq cr3,2f - b power7_wakeup_noloss -2: b power7_wakeup_loss - - /* Fast Sleep wakeup on PowerNV */ -8: GET_PACA(r13) - b power7_wakeup_tb_loss + blt cr3,2f + b pnv_wakeup_loss +2: b pnv_wakeup_noloss 9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, NOTEST, 0x100) +EXC_REAL_END(system_reset, 0x100, 0x200) +EXC_VIRT_NONE(0x4100, 0x4200) +EXC_COMMON(system_reset_common, 0x100, system_reset_exception) + +#ifdef CONFIG_PPC_PSERIES +/* + * Vectors for the FWNMI option. Share common code. + */ +TRAMP_REAL_BEGIN(system_reset_fwnmi) + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + NOTEST, 0x100) +#endif /* CONFIG_PPC_PSERIES */ + - . = 0x200 -machine_check_pSeries_1: +EXC_REAL_BEGIN(machine_check, 0x200, 0x300) /* This is moved out of line as it can be patched by FW, but * some code path might still want to branch into the original * vector */ SET_SCRATCH0(r13) /* save r13 */ -#ifdef CONFIG_PPC_P7_NAP -BEGIN_FTR_SECTION - /* Running native on arch 2.06 or later, check if we are - * waking up from nap. We only handle no state loss and - * supervisor state loss. We do -not- handle hypervisor - * state loss at this time. + /* + * Running native on arch 2.06 or later, we may wakeup from winkle + * inside machine check. If yes, then last bit of HSPGR0 would be set + * to 1. Hence clear it unconditionally. */ - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) - beq 9f - - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - /* waking up from powersave (nap) state */ - cmpwi cr1,r13,2 - /* Total loss of HV state is fatal. let's just stay stuck here */ - OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) - bgt cr1,. -9: - OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#endif /* CONFIG_PPC_P7_NAP */ + GET_PACA(r13) + clrrdi r13,r13,1 + SET_PACA(r13) EXCEPTION_PROLOG_0(PACA_EXMC) BEGIN_FTR_SECTION b machine_check_powernv_early FTR_SECTION_ELSE b machine_check_pSeries_0 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) - - . = 0x300 - .globl data_access_pSeries -data_access_pSeries: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, - KVMTEST, 0x300) - - . = 0x380 - .globl data_access_slb_pSeries -data_access_slb_pSeries: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r3,SPRN_DAR - mfspr r12,SPRN_SRR1 -#ifndef CONFIG_RELOCATABLE - b slb_miss_realmode -#else - /* - * We can't just use a direct branch to slb_miss_realmode - * because the distance from here to there depends on where - * the kernel ends up being put. - */ - mfctr r11 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10, slb_miss_realmode) - mtctr r10 - bctr -#endif - - STD_EXCEPTION_PSERIES(0x400, instruction_access) - - . = 0x480 - .globl instruction_access_slb_pSeries -instruction_access_slb_pSeries: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - mfspr r12,SPRN_SRR1 -#ifndef CONFIG_RELOCATABLE - b slb_miss_realmode -#else - mfctr r11 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10, slb_miss_realmode) - mtctr r10 - bctr -#endif - - /* We open code these as we can't have a ". = x" (even with - * x = "." within a feature section - */ - . = 0x500; - .globl hardware_interrupt_pSeries; - .globl hardware_interrupt_hv; -hardware_interrupt_pSeries: -hardware_interrupt_hv: - BEGIN_FTR_SECTION - _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, - EXC_HV, SOFTEN_TEST_HV) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) - FTR_SECTION_ELSE - _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, - EXC_STD, SOFTEN_TEST_PR) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - - STD_EXCEPTION_PSERIES(0x600, alignment) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600) - - STD_EXCEPTION_PSERIES(0x700, program_check) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700) - - STD_EXCEPTION_PSERIES(0x800, fp_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800) - - . = 0x900 - .globl decrementer_pSeries -decrementer_pSeries: - _MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD, SOFTEN_TEST_PR) - - STD_EXCEPTION_HV(0x980, 0x982, hdecrementer) - - MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00) - - STD_EXCEPTION_PSERIES(0xb00, trap_0b) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00) - - . = 0xc00 - .globl system_call_pSeries -system_call_pSeries: - /* - * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems - * that support it) before changing to HMT_MEDIUM. That allows the KVM - * code to save that value into the guest state (it is the guest's PPR - * value). Otherwise just change to HMT_MEDIUM as userspace has - * already saved the PPR. - */ -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER - SET_SCRATCH0(r13) - GET_PACA(r13) - std r9,PACA_EXGEN+EX_R9(r13) - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); - HMT_MEDIUM; - std r10,PACA_EXGEN+EX_R10(r13) - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); - mfcr r9 - KVMTEST(0xc00) - GET_SCRATCH0(r13) -#else - HMT_MEDIUM; -#endif - SYSCALL_PSERIES_1 - SYSCALL_PSERIES_2_RFID - SYSCALL_PSERIES_3 - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) - - STD_EXCEPTION_PSERIES(0xd00, single_step) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00) - - /* At 0xe??? we have a bunch of hypervisor exceptions, we branch - * out of line to handle them - */ - . = 0xe00 -hv_data_storage_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_data_storage_hv - - . = 0xe20 -hv_instr_storage_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_instr_storage_hv - - . = 0xe40 -emulation_assist_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b emulation_assist_hv - - . = 0xe60 -hv_exception_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_early - - . = 0xe80 -hv_doorbell_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_doorbell_hv - - /* We need to deal with the Altivec unavailable exception - * here which is at 0xf20, thus in the middle of the - * prolog code of the PerformanceMonitor one. A little - * trickery is thus necessary - */ - . = 0xf00 -performance_monitor_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b performance_monitor_pSeries - - . = 0xf20 -altivec_unavailable_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b altivec_unavailable_pSeries - - . = 0xf40 -vsx_unavailable_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b vsx_unavailable_pSeries - - . = 0xf60 -facility_unavailable_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b facility_unavailable_pSeries - - . = 0xf80 -hv_facility_unavailable_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b facility_unavailable_hv - -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) -#endif /* CONFIG_CBE_RAS */ - - STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300) - - . = 0x1500 - .global denorm_exception_hv -denorm_exception_hv: - mtspr SPRN_SPRG_HSCRATCH0,r13 - EXCEPTION_PROLOG_0(PACA_EXGEN) - EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) - -#ifdef CONFIG_PPC_DENORMALISATION - mfspr r10,SPRN_HSRR1 - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ - addi r11,r11,-4 /* HSRR0 is next instruction */ - bne+ denorm_assist -#endif - - KVMTEST(0x1500) - EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500) - -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) -#endif /* CONFIG_CBE_RAS */ - - STD_EXCEPTION_PSERIES(0x1700, altivec_assist) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700) - -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802) -#else - . = 0x1800 -#endif /* CONFIG_CBE_RAS */ - - -/*** Out of line interrupts support ***/ - - .align 7 - /* moved from 0x200 */ -machine_check_powernv_early: +EXC_REAL_END(machine_check, 0x200, 0x300) +EXC_VIRT_NONE(0x4200, 0x4300) +TRAMP_REAL_BEGIN(machine_check_powernv_early) BEGIN_FTR_SECTION EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) /* @@ -486,7 +226,6 @@ BEGIN_FTR_SECTION mfmsr r11 /* get MSR value */ ori r11,r11,MSR_ME /* turn on ME bit */ ori r11,r11,MSR_RI /* turn on RI bit */ - ld r12,PACAKBASE(r13) /* get high part of &label */ LOAD_HANDLER(r12, machine_check_handle_early) 1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 @@ -499,7 +238,6 @@ BEGIN_FTR_SECTION */ addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */ ld r11,PACAKMSR(r13) - ld r12,PACAKBASE(r13) LOAD_HANDLER(r12, unrecover_mce) li r10,MSR_ME andc r11,r11,r10 /* Turn off MSR_ME */ @@ -507,292 +245,289 @@ BEGIN_FTR_SECTION b . /* prevent speculative execution */ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) -machine_check_pSeries: +TRAMP_REAL_BEGIN(machine_check_pSeries) .globl machine_check_fwnmi machine_check_fwnmi: SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_0(PACA_EXMC) machine_check_pSeries_0: - EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200) - EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD) - KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) - KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400) - KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) - -#ifdef CONFIG_PPC_DENORMALISATION -denorm_assist: -BEGIN_FTR_SECTION -/* - * To denormalise we need to move a copy of the register to itself. - * For POWER6 do that here for all FP regs. - */ - mfmsr r10 - ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1) - xori r10,r10,(MSR_FE0|MSR_FE1) - mtmsrd r10 - sync + EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) + /* + * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the + * difference that MSR_RI is not enabled, because PACA_EXMC is being + * used, so nested machine check corrupts it. machine_check_common + * enables MSR_RI. + */ + ld r10,PACAKMSR(r13) + xori r10,r10,MSR_RI + mfspr r11,SPRN_SRR0 + LOAD_HANDLER(r12, machine_check_common) + mtspr SPRN_SRR0,r12 + mfspr r12,SPRN_SRR1 + mtspr SPRN_SRR1,r10 + rfid + b . /* prevent speculative execution */ -#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1 -#define FMR4(n) FMR2(n) ; FMR2(n+2) -#define FMR8(n) FMR4(n) ; FMR4(n+4) -#define FMR16(n) FMR8(n) ; FMR8(n+8) -#define FMR32(n) FMR16(n) ; FMR16(n+16) - FMR32(0) +TRAMP_KVM_SKIP(PACA_EXMC, 0x200) -FTR_SECTION_ELSE -/* - * To denormalise we need to move a copy of the register to itself. - * For POWER7 do that here for the first 32 VSX registers only. - */ - mfmsr r10 - oris r10,r10,MSR_VSX@h - mtmsrd r10 - sync +EXC_COMMON_BEGIN(machine_check_common) + /* + * Machine check is different because we use a different + * save area: PACA_EXMC instead of PACA_EXGEN. + */ + mfspr r10,SPRN_DAR + std r10,PACA_EXMC+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXMC+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) + FINISH_NAP + RECONCILE_IRQ_STATE(r10, r11) + ld r3,PACA_EXMC+EX_DAR(r13) + lwz r4,PACA_EXMC+EX_DSISR(r13) + /* Enable MSR_RI when finished with PACA_EXMC */ + li r10,MSR_RI + mtmsrd r10,1 + std r3,_DAR(r1) + std r4,_DSISR(r1) + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + b ret_from_except -#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1) -#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2) -#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4) -#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8) -#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16) - XVCPSGNDP32(0) +#define MACHINE_CHECK_HANDLER_WINDUP \ + /* Clear MSR_RI before setting SRR0 and SRR1. */\ + li r0,MSR_RI; \ + mfmsr r9; /* get MSR value */ \ + andc r9,r9,r0; \ + mtmsrd r9,1; /* Clear MSR_RI */ \ + /* Move original SRR0 and SRR1 into the respective regs */ \ + ld r9,_MSR(r1); \ + mtspr SPRN_SRR1,r9; \ + ld r3,_NIP(r1); \ + mtspr SPRN_SRR0,r3; \ + ld r9,_CTR(r1); \ + mtctr r9; \ + ld r9,_XER(r1); \ + mtxer r9; \ + ld r9,_LINK(r1); \ + mtlr r9; \ + REST_GPR(0, r1); \ + REST_8GPRS(2, r1); \ + REST_GPR(10, r1); \ + ld r11,_CCR(r1); \ + mtcr r11; \ + /* Decrement paca->in_mce. */ \ + lhz r12,PACA_IN_MCE(r13); \ + subi r12,r12,1; \ + sth r12,PACA_IN_MCE(r13); \ + REST_GPR(11, r1); \ + REST_2GPRS(12, r1); \ + /* restore original r1. */ \ + ld r1,GPR1(r1) -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) + /* + * Handle machine check early in real mode. We come here with + * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. + */ +EXC_COMMON_BEGIN(machine_check_handle_early) + std r0,GPR0(r1) /* Save r0 */ + EXCEPTION_PROLOG_COMMON_3(0x200) + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_early + std r3,RESULT(r1) /* Save result */ + ld r12,_MSR(r1) +#ifdef CONFIG_PPC_P7_NAP + /* + * Check if thread was in power saving mode. We come here when any + * of the following is true: + * a. thread wasn't in power saving mode + * b. thread was in power saving mode with no state loss, + * supervisor state loss or hypervisor state loss. + * + * Go back to nap/sleep/winkle mode again if (b) is true. + */ + rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ + beq 4f /* No, it wasn;t */ + /* Thread was in power saving mode. Go back to nap again. */ + cmpwi r11,2 + blt 3f + /* Supervisor/Hypervisor state loss */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) +3: bl machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP + GET_PACA(r13) + ld r1,PACAR1(r13) + /* + * Check what idle state this CPU was in and go back to same mode + * again. + */ + lbz r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi r3,PNV_THREAD_NAP + bgt 10f + IDLE_STATE_ENTER_SEQ(PPC_NAP) + /* No return */ +10: + cmpwi r3,PNV_THREAD_SLEEP + bgt 2f + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + /* No return */ -BEGIN_FTR_SECTION - b denorm_done -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) -/* - * To denormalise we need to move a copy of the register to itself. - * For POWER8 we need to do that for all 64 VSX registers - */ - XVCPSGNDP32(32) -denorm_done: - mtspr SPRN_HSRR0,r11 - mtcrf 0x80,r9 - ld r9,PACA_EXGEN+EX_R9(r13) - RESTORE_PPR_PACA(PACA_EXGEN, r10) -BEGIN_FTR_SECTION - ld r10,PACA_EXGEN+EX_CFAR(r13) - mtspr SPRN_CFAR,r10 -END_FTR_SECTION_IFSET(CPU_FTR_CFAR) - ld r10,PACA_EXGEN+EX_R10(r13) - ld r11,PACA_EXGEN+EX_R11(r13) - ld r12,PACA_EXGEN+EX_R12(r13) - ld r13,PACA_EXGEN+EX_R13(r13) - HRFID - b . +2: + /* + * Go back to winkle. Please note that this thread was woken up in + * machine check from winkle and have not restored the per-subcore + * state. Hence before going back to winkle, set last bit of HSPGR0 + * to 1. This will make sure that if this thread gets woken up + * again at reset vector 0x100 then it will get chance to restore + * the subcore state. + */ + ori r13,r13,1 + SET_PACA(r13) + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + /* No return */ +4: #endif - - .align 7 - /* moved from 0xe00 */ - STD_EXCEPTION_HV_OOL(0xe02, h_data_storage) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02) - STD_EXCEPTION_HV_OOL(0xe22, h_instr_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22) - STD_EXCEPTION_HV_OOL(0xe42, emulation_assist) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42) - MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) - - MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) - - /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) - STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20) - STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40) - STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf60) - STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82) - -/* - * An interrupt came in while soft-disabled. We set paca->irq_happened, then: - * - If it was a decrementer interrupt, we bump the dec to max and and return. - * - If it was a doorbell we return immediately since doorbells are edge - * triggered and won't automatically refire. - * - If it was a HMI we return immediately since we handled it in realmode - * and it won't refire. - * - else we hard disable and return. - * This is called with r10 containing the value to OR to the paca field. - */ -#define MASKED_INTERRUPT(_H) \ -masked_##_H##interrupt: \ - std r11,PACA_EXGEN+EX_R11(r13); \ - lbz r11,PACAIRQHAPPENED(r13); \ - or r11,r11,r10; \ - stb r11,PACAIRQHAPPENED(r13); \ - cmpwi r10,PACA_IRQ_DEC; \ - bne 1f; \ - lis r10,0x7fff; \ - ori r10,r10,0xffff; \ - mtspr SPRN_DEC,r10; \ - b 2f; \ -1: cmpwi r10,PACA_IRQ_DBELL; \ - beq 2f; \ - cmpwi r10,PACA_IRQ_HMI; \ - beq 2f; \ - mfspr r10,SPRN_##_H##SRR1; \ - rldicl r10,r10,48,1; /* clear MSR_EE */ \ - rotldi r10,r10,16; \ - mtspr SPRN_##_H##SRR1,r10; \ -2: mtcrf 0x80,r9; \ - ld r9,PACA_EXGEN+EX_R9(r13); \ - ld r10,PACA_EXGEN+EX_R10(r13); \ - ld r11,PACA_EXGEN+EX_R11(r13); \ - GET_SCRATCH0(r13); \ - ##_H##rfid; \ - b . - - MASKED_INTERRUPT() - MASKED_INTERRUPT(H) - -/* - * Called from arch_local_irq_enable when an interrupt needs - * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate - * which kind of interrupt. MSR:EE is already off. We generate a - * stackframe like if a real interrupt had happened. - * - * Note: While MSR:EE is off, we need to make sure that _MSR - * in the generated frame has EE set to 1 or the exception - * handler will not properly re-enable them. - */ -_GLOBAL(__replay_interrupt) - /* We are going to jump to the exception common code which - * will retrieve various register values from the PACA which - * we don't give a damn about, so we don't bother storing them. + /* + * Check if we are coming from hypervisor userspace. If yes then we + * continue in host kernel in V mode to deliver the MC event. */ - mfmsr r12 - mflr r11 - mfcr r9 - ori r12,r12,MSR_EE - cmpwi r3,0x900 - beq decrementer_common - cmpwi r3,0x500 - beq hardware_interrupt_common -BEGIN_FTR_SECTION - cmpwi r3,0xe80 - beq h_doorbell_common -FTR_SECTION_ELSE - cmpwi r3,0xa00 - beq doorbell_super_common -ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) - blr - -#ifdef CONFIG_PPC_PSERIES -/* - * Vectors for the FWNMI option. Share common code. - */ - .globl system_reset_fwnmi - .align 7 -system_reset_fwnmi: - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) - -#endif /* CONFIG_PPC_PSERIES */ + rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ + beq 5f + andi. r11,r12,MSR_PR /* See if coming from user. */ + bne 9f /* continue in V mode if we are. */ +5: #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -kvmppc_skip_interrupt: /* - * Here all GPRs are unchanged from when the interrupt happened - * except for r13, which is saved in SPRG_SCRATCH0. + * We are coming from kernel context. Check if we are coming from + * guest. if yes, then we can continue. We will fall through + * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest. */ - mfspr r13, SPRN_SRR0 - addi r13, r13, 4 - mtspr SPRN_SRR0, r13 - GET_SCRATCH0(r13) + lbz r11,HSTATE_IN_GUEST(r13) + cmpwi r11,0 /* Check if coming from guest */ + bne 9f /* continue if we are. */ +#endif + /* + * At this point we are not sure about what context we come from. + * Queue up the MCE event and return from the interrupt. + * But before that, check if this is an un-recoverable exception. + * If yes, then stay on emergency stack and panic. + */ + andi. r11,r12,MSR_RI + bne 2f +1: mfspr r11,SPRN_SRR0 + LOAD_HANDLER(r10,unrecover_mce) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + /* + * We are going down. But there are chances that we might get hit by + * another MCE during panic path and we may run into unstable state + * with no way out. Hence, turn ME bit off while going down, so that + * when another MCE is hit during panic path, system will checkstop + * and hypervisor will get restarted cleanly by SP. + */ + li r3,MSR_ME + andc r10,r10,r3 /* Turn off MSR_ME */ + mtspr SPRN_SRR1,r10 rfid b . - -kvmppc_skip_Hinterrupt: +2: /* - * Here all GPRs are unchanged from when the interrupt happened - * except for r13, which is saved in SPRG_SCRATCH0. + * Check if we have successfully handled/recovered from error, if not + * then stay on emergency stack and panic. */ - mfspr r13, SPRN_HSRR0 - addi r13, r13, 4 - mtspr SPRN_HSRR0, r13 - GET_SCRATCH0(r13) - hrfid - b . -#endif + ld r3,RESULT(r1) /* Load result */ + cmpdi r3,0 /* see if we handled MCE successfully */ -/* - * Ensure that any handlers that get invoked from the exception prologs - * above are below the first 64KB (0x10000) of the kernel image because - * the prologs assemble the addresses of these handlers using the - * LOAD_HANDLER macro, which uses an ori instruction. - */ + beq 1b /* if !handled then panic */ + /* + * Return from MC interrupt. + * Queue up the MCE event so that we can log it later, while + * returning from kernel or opal call. + */ + bl machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP + rfid +9: + /* Deliver the machine check to host kernel in V mode. */ + MACHINE_CHECK_HANDLER_WINDUP + b machine_check_pSeries -/*** Common interrupt handlers ***/ +EXC_COMMON_BEGIN(unrecover_mce) + /* Invoke machine_check_exception to print MCE event and panic. */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + /* + * We will not reach here. Even if we did, there is no way out. Call + * unrecoverable_exception and die. + */ +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b 1b - STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception) - STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) - STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt) - STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt) -#ifdef CONFIG_PPC_DOORBELL - STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception) -#else - STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception) -#endif - STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception) - STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception) - STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception) - STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt) - STD_EXCEPTION_COMMON_ASYNC(0xe60, hmi_exception, handle_hmi_exception) -#ifdef CONFIG_PPC_DOORBELL - STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception) -#else - STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception) -#endif - STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception) - STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception) - STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception) -#ifdef CONFIG_ALTIVEC - STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception) -#else - STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception) -#endif -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) - STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) - STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) -#endif /* CONFIG_CBE_RAS */ +EXC_REAL(data_access, 0x300, 0x380) +EXC_VIRT(data_access, 0x4300, 0x4380, 0x300) +TRAMP_KVM_SKIP(PACA_EXGEN, 0x300) +EXC_COMMON_BEGIN(data_access_common) /* - * Relocation-on interrupts: A subset of the interrupts can be delivered - * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering - * it. Addresses are the same as the original interrupt addresses, but - * offset by 0xc000000000004000. - * It's impossible to receive interrupts below 0x300 via this mechanism. - * KVM: None of these traps are from the guest ; anything that escalated - * to HV=1 from HV=0 is delivered via real mode handlers. + * Here r13 points to the paca, r9 contains the saved CR, + * SRR0 and SRR1 are saved in r11 and r12, + * r9 - r13 are saved in paca->exgen. */ + mfspr r10,SPRN_DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) + RECONCILE_IRQ_STATE(r10, r11) + ld r12,_MSR(r1) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + li r5,0x300 + std r3,_DAR(r1) + std r4,_DSISR(r1) +BEGIN_MMU_FTR_SECTION + b do_hash_page /* Try to handle as hpte fault */ +MMU_FTR_SECTION_ELSE + b handle_page_fault +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + +EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400) + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_DAR + mfspr r12,SPRN_SRR1 + crset 4*cr6+eq +#ifndef CONFIG_RELOCATABLE + b slb_miss_realmode +#else /* - * This uses the standard macro, since the original 0x300 vector - * only has extra guff for STAB-based processors -- which never - * come here. + * We can't just use a direct branch to slb_miss_realmode + * because the distance from here to there depends on where + * the kernel ends up being put. */ - STD_RELON_EXCEPTION_PSERIES(0x4300, 0x300, data_access) - . = 0x4380 - .globl data_access_slb_relon_pSeries -data_access_slb_relon_pSeries: + mfctr r11 + LOAD_HANDLER(r10, slb_miss_realmode) + mtctr r10 + bctr +#endif +EXC_REAL_END(data_access_slb, 0x380, 0x400) + +EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR mfspr r12,SPRN_SRR1 + crset 4*cr6+eq #ifndef CONFIG_RELOCATABLE b slb_miss_realmode #else @@ -802,211 +537,198 @@ data_access_slb_relon_pSeries: * the kernel ends up being put. */ mfctr r11 - ld r10,PACAKBASE(r13) LOAD_HANDLER(r10, slb_miss_realmode) mtctr r10 bctr #endif +EXC_VIRT_END(data_access_slb, 0x4380, 0x4400) +TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) + + +EXC_REAL(instruction_access, 0x400, 0x480) +EXC_VIRT(instruction_access, 0x4400, 0x4480, 0x400) +TRAMP_KVM(PACA_EXGEN, 0x400) + +EXC_COMMON_BEGIN(instruction_access_common) + EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) + RECONCILE_IRQ_STATE(r10, r11) + ld r12,_MSR(r1) + ld r3,_NIP(r1) + andis. r4,r12,0x5820 + li r5,0x400 + std r3,_DAR(r1) + std r4,_DSISR(r1) +BEGIN_MMU_FTR_SECTION + b do_hash_page /* Try to handle as hpte fault */ +MMU_FTR_SECTION_ELSE + b handle_page_fault +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + - STD_RELON_EXCEPTION_PSERIES(0x4400, 0x400, instruction_access) - . = 0x4480 - .globl instruction_access_slb_relon_pSeries -instruction_access_slb_relon_pSeries: +EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ mfspr r12,SPRN_SRR1 + crclr 4*cr6+eq #ifndef CONFIG_RELOCATABLE b slb_miss_realmode #else mfctr r11 - ld r10,PACAKBASE(r13) LOAD_HANDLER(r10, slb_miss_realmode) mtctr r10 bctr #endif +EXC_REAL_END(instruction_access_slb, 0x480, 0x500) - . = 0x4500 - .globl hardware_interrupt_relon_pSeries; - .globl hardware_interrupt_relon_hv; -hardware_interrupt_relon_pSeries: -hardware_interrupt_relon_hv: - BEGIN_FTR_SECTION - _MASKABLE_RELON_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV) - FTR_SECTION_ELSE - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) - STD_RELON_EXCEPTION_PSERIES(0x4600, 0x600, alignment) - STD_RELON_EXCEPTION_PSERIES(0x4700, 0x700, program_check) - STD_RELON_EXCEPTION_PSERIES(0x4800, 0x800, fp_unavailable) - MASKABLE_RELON_EXCEPTION_PSERIES(0x4900, 0x900, decrementer) - STD_RELON_EXCEPTION_HV(0x4980, 0x982, hdecrementer) - MASKABLE_RELON_EXCEPTION_PSERIES(0x4a00, 0xa00, doorbell_super) - STD_RELON_EXCEPTION_PSERIES(0x4b00, 0xb00, trap_0b) - - . = 0x4c00 - .globl system_call_relon_pSeries -system_call_relon_pSeries: - HMT_MEDIUM - SYSCALL_PSERIES_1 - SYSCALL_PSERIES_2_DIRECT - SYSCALL_PSERIES_3 - - STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) +EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500) + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) + EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ + mfspr r12,SPRN_SRR1 + crclr 4*cr6+eq +#ifndef CONFIG_RELOCATABLE + b slb_miss_realmode +#else + mfctr r11 + LOAD_HANDLER(r10, slb_miss_realmode) + mtctr r10 + bctr +#endif +EXC_VIRT_END(instruction_access_slb, 0x4480, 0x4500) +TRAMP_KVM(PACA_EXSLB, 0x480) - . = 0x4e00 - b . /* Can't happen, see v2.07 Book III-S section 6.5 */ - . = 0x4e20 - b . /* Can't happen, see v2.07 Book III-S section 6.5 */ +/* This handler is used by both 0x380 and 0x480 slb miss interrupts */ +EXC_COMMON_BEGIN(slb_miss_realmode) + /* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss + * We assume we aren't going to take any exceptions during this + * procedure. + */ + mflr r10 +#ifdef CONFIG_RELOCATABLE + mtctr r11 +#endif - . = 0x4e40 -emulation_assist_relon_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b emulation_assist_relon_hv + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + std r3,PACA_EXSLB+EX_DAR(r13) - . = 0x4e60 - b . /* Can't happen, see v2.07 Book III-S section 6.5 */ + crset 4*cr0+eq +#ifdef CONFIG_PPC_STD_MMU_64 +BEGIN_MMU_FTR_SECTION + bl slb_allocate_realmode +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) +#endif - . = 0x4e80 -h_doorbell_relon_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_doorbell_relon_hv + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + mtlr r10 - . = 0x4f00 -performance_monitor_relon_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b performance_monitor_relon_pSeries + beq 8f /* if bad address, make full stack frame */ - . = 0x4f20 -altivec_unavailable_relon_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b altivec_unavailable_relon_pSeries - - . = 0x4f40 -vsx_unavailable_relon_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b vsx_unavailable_relon_pSeries + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- 2f - . = 0x4f60 -facility_unavailable_relon_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b facility_unavailable_relon_pSeries + /* All done -- return from exception. */ - . = 0x4f80 -hv_facility_unavailable_relon_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hv_facility_unavailable_relon_hv +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x02,r9 /* I/D indication is in cr6 */ + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop - STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) -#ifdef CONFIG_PPC_DENORMALISATION - . = 0x5500 - b denorm_exception_hv -#endif - STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist) + RESTORE_PPR_PACA(PACA_EXSLB, r9) + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ - .align 7 -system_call_entry: - b system_call_common +2: mfspr r11,SPRN_SRR0 + LOAD_HANDLER(r10,unrecov_slb) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + rfid + b . -ppc64_runlatch_on_trampoline: - b __ppc64_runlatch_on +8: mfspr r11,SPRN_SRR0 + LOAD_HANDLER(r10,bad_addr_slb) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + rfid + b . -/* - * Here r13 points to the paca, r9 contains the saved CR, - * SRR0 and SRR1 are saved in r11 and r12, - * r9 - r13 are saved in paca->exgen. - */ - .align 7 - .globl data_access_common -data_access_common: - mfspr r10,SPRN_DAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) +EXC_COMMON_BEGIN(unrecov_slb) + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) RECONCILE_IRQ_STATE(r10, r11) - ld r12,_MSR(r1) - ld r3,PACA_EXGEN+EX_DAR(r13) - lwz r4,PACA_EXGEN+EX_DSISR(r13) - li r5,0x300 - std r3,_DAR(r1) - std r4,_DSISR(r1) -BEGIN_MMU_FTR_SECTION - b do_hash_page /* Try to handle as hpte fault */ -MMU_FTR_SECTION_ELSE - b handle_page_fault -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) + bl save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b 1b - .align 7 - .globl h_data_storage_common -h_data_storage_common: - mfspr r10,SPRN_HDAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_HDSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) - bl save_nvgprs +EXC_COMMON_BEGIN(bad_addr_slb) + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) RECONCILE_IRQ_STATE(r10, r11) - addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception - b ret_from_except + ld r3, PACA_EXSLB+EX_DAR(r13) + std r3, _DAR(r1) + beq cr6, 2f + li r10, 0x480 /* fix trap number for I-SLB miss */ + std r10, _TRAP(r1) +2: bl save_nvgprs + addi r3, r1, STACK_FRAME_OVERHEAD + bl slb_miss_bad_addr + b ret_from_except - .align 7 - .globl instruction_access_common -instruction_access_common: - EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) - RECONCILE_IRQ_STATE(r10, r11) - ld r12,_MSR(r1) - ld r3,_NIP(r1) - andis. r4,r12,0x5820 - li r5,0x400 - std r3,_DAR(r1) - std r4,_DSISR(r1) -BEGIN_MMU_FTR_SECTION - b do_hash_page /* Try to handle as hpte fault */ -MMU_FTR_SECTION_ELSE - b handle_page_fault -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) +EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x600) + .globl hardware_interrupt_hv; +hardware_interrupt_hv: + BEGIN_FTR_SECTION + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_HV, SOFTEN_TEST_HV) +do_kvm_H0x500: + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) + FTR_SECTION_ELSE + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_STD, SOFTEN_TEST_PR) +do_kvm_0x500: + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +EXC_REAL_END(hardware_interrupt, 0x500, 0x600) - STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception) +EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x4600) + .globl hardware_interrupt_relon_hv; +hardware_interrupt_relon_hv: + BEGIN_FTR_SECTION + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV) + FTR_SECTION_ELSE + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) +EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) - /* - * Machine check is different because we use a different - * save area: PACA_EXMC instead of PACA_EXGEN. - */ - .align 7 - .globl machine_check_common -machine_check_common: +EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) - mfspr r10,SPRN_DAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) - FINISH_NAP - RECONCILE_IRQ_STATE(r10, r11) - ld r3,PACA_EXGEN+EX_DAR(r13) - lwz r4,PACA_EXGEN+EX_DSISR(r13) - std r3,_DAR(r1) - std r4,_DSISR(r1) - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_exception - b ret_from_except - .align 7 - .globl alignment_common -alignment_common: +EXC_REAL(alignment, 0x600, 0x700) +EXC_VIRT(alignment, 0x4600, 0x4700, 0x600) +TRAMP_KVM(PACA_EXGEN, 0x600) +EXC_COMMON_BEGIN(alignment_common) mfspr r10,SPRN_DAR std r10,PACA_EXGEN+EX_DAR(r13) mfspr r10,SPRN_DSISR @@ -1022,9 +744,11 @@ alignment_common: bl alignment_exception b ret_from_except - .align 7 - .globl program_check_common -program_check_common: + +EXC_REAL(program_check, 0x700, 0x800) +EXC_VIRT(program_check, 0x4700, 0x4800, 0x700) +TRAMP_KVM(PACA_EXGEN, 0x700) +EXC_COMMON_BEGIN(program_check_common) EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) @@ -1032,9 +756,11 @@ program_check_common: bl program_check_exception b ret_from_except - .align 7 - .globl fp_unavailable_common -fp_unavailable_common: + +EXC_REAL(fp_unavailable, 0x800, 0x900) +EXC_VIRT(fp_unavailable, 0x4800, 0x4900, 0x800) +TRAMP_KVM(PACA_EXGEN, 0x800) +EXC_COMMON_BEGIN(fp_unavailable_common) EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) bne 1f /* if from user, just load it up */ bl save_nvgprs @@ -1062,9 +788,250 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) bl fp_unavailable_tm b ret_from_except #endif - .align 7 - .globl altivec_unavailable_common -altivec_unavailable_common: + + +EXC_REAL_MASKABLE(decrementer, 0x900, 0x980) +EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x4980, 0x900) +TRAMP_KVM(PACA_EXGEN, 0x900) +EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) + + +EXC_REAL_HV(hdecrementer, 0x980, 0xa00) +EXC_VIRT_HV(hdecrementer, 0x4980, 0x4a00, 0x980) +TRAMP_KVM_HV(PACA_EXGEN, 0x980) +EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) + + +EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0xb00) +EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x4b00, 0xa00) +TRAMP_KVM(PACA_EXGEN, 0xa00) +#ifdef CONFIG_PPC_DOORBELL +EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) +#else +EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception) +#endif + + +EXC_REAL(trap_0b, 0xb00, 0xc00) +EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) +TRAMP_KVM(PACA_EXGEN, 0xb00) +EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) + + +#define LOAD_SYSCALL_HANDLER(reg) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(system_call_common))@l; + +/* Syscall routine is used twice, in reloc-off and reloc-on paths */ +#define SYSCALL_PSERIES_1 \ +BEGIN_FTR_SECTION \ + cmpdi r0,0x1ebe ; \ + beq- 1f ; \ +END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ + mr r9,r13 ; \ + GET_PACA(r13) ; \ + mfspr r11,SPRN_SRR0 ; \ +0: + +#define SYSCALL_PSERIES_2_RFID \ + mfspr r12,SPRN_SRR1 ; \ + LOAD_SYSCALL_HANDLER(r10) ; \ + mtspr SPRN_SRR0,r10 ; \ + ld r10,PACAKMSR(r13) ; \ + mtspr SPRN_SRR1,r10 ; \ + rfid ; \ + b . ; /* prevent speculative execution */ + +#define SYSCALL_PSERIES_3 \ + /* Fast LE/BE switch system call */ \ +1: mfspr r12,SPRN_SRR1 ; \ + xori r12,r12,MSR_LE ; \ + mtspr SPRN_SRR1,r12 ; \ + rfid ; /* return to userspace */ \ + b . ; /* prevent speculative execution */ + +#if defined(CONFIG_RELOCATABLE) + /* + * We can't branch directly so we do it via the CTR which + * is volatile across system calls. + */ +#define SYSCALL_PSERIES_2_DIRECT \ + LOAD_SYSCALL_HANDLER(r12) ; \ + mtctr r12 ; \ + mfspr r12,SPRN_SRR1 ; \ + li r10,MSR_RI ; \ + mtmsrd r10,1 ; \ + bctr ; +#else + /* We can branch directly */ +#define SYSCALL_PSERIES_2_DIRECT \ + mfspr r12,SPRN_SRR1 ; \ + li r10,MSR_RI ; \ + mtmsrd r10,1 ; /* Set RI (EE=0) */ \ + b system_call_common ; +#endif + +EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) + /* + * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems + * that support it) before changing to HMT_MEDIUM. That allows the KVM + * code to save that value into the guest state (it is the guest's PPR + * value). Otherwise just change to HMT_MEDIUM as userspace has + * already saved the PPR. + */ +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + SET_SCRATCH0(r13) + GET_PACA(r13) + std r9,PACA_EXGEN+EX_R9(r13) + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); + HMT_MEDIUM; + std r10,PACA_EXGEN+EX_R10(r13) + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); + mfcr r9 + KVMTEST_PR(0xc00) + GET_SCRATCH0(r13) +#else + HMT_MEDIUM; +#endif + SYSCALL_PSERIES_1 + SYSCALL_PSERIES_2_RFID + SYSCALL_PSERIES_3 +EXC_REAL_END(system_call, 0xc00, 0xd00) + +EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) + HMT_MEDIUM + SYSCALL_PSERIES_1 + SYSCALL_PSERIES_2_DIRECT + SYSCALL_PSERIES_3 +EXC_VIRT_END(system_call, 0x4c00, 0x4d00) + +TRAMP_KVM(PACA_EXGEN, 0xc00) + + +EXC_REAL(single_step, 0xd00, 0xe00) +EXC_VIRT(single_step, 0x4d00, 0x4e00, 0xd00) +TRAMP_KVM(PACA_EXGEN, 0xd00) +EXC_COMMON(single_step_common, 0xd00, single_step_exception) + +EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) +EXC_VIRT_NONE(0x4e00, 0x4e20) +TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) +EXC_COMMON_BEGIN(h_data_storage_common) + mfspr r10,SPRN_HDAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_HDSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) + bl save_nvgprs + RECONCILE_IRQ_STATE(r10, r11) + addi r3,r1,STACK_FRAME_OVERHEAD + bl unknown_exception + b ret_from_except + + +EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) +EXC_VIRT_NONE(0x4e20, 0x4e40) +TRAMP_KVM_HV(PACA_EXGEN, 0xe20) +EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) + + +EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0xe60) +EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x4e60, 0xe40) +TRAMP_KVM_HV(PACA_EXGEN, 0xe40) +EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) + + +/* + * hmi_exception trampoline is a special case. It jumps to hmi_exception_early + * first, and then eventaully from there to the trampoline to get into virtual + * mode. + */ +__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0xe80, hmi_exception_early) +__TRAMP_REAL_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) +EXC_VIRT_NONE(0x4e60, 0x4e80) +TRAMP_KVM_HV(PACA_EXGEN, 0xe60) +TRAMP_REAL_BEGIN(hmi_exception_early) + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + std r9,_CCR(r1) /* save CR in stackframe */ + mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ + std r11,_NIP(r1) /* save HSRR0 in stackframe */ + mfspr r12,SPRN_HSRR1 /* Save SRR1 */ + std r12,_MSR(r1) /* save SRR1 in stackframe */ + std r10,0(r1) /* make stack chain pointer */ + std r0,GPR0(r1) /* save r0 in stackframe */ + std r10,GPR1(r1) /* save r1 in stackframe */ + EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) + EXCEPTION_PROLOG_COMMON_3(0xe60) + addi r3,r1,STACK_FRAME_OVERHEAD + bl hmi_exception_realmode + /* Windup the stack. */ + /* Move original HSRR0 and HSRR1 into the respective regs */ + ld r9,_MSR(r1) + mtspr SPRN_HSRR1,r9 + ld r3,_NIP(r1) + mtspr SPRN_HSRR0,r3 + ld r9,_CTR(r1) + mtctr r9 + ld r9,_XER(r1) + mtxer r9 + ld r9,_LINK(r1) + mtlr r9 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + REST_GPR(10, r1) + ld r11,_CCR(r1) + mtcr r11 + REST_GPR(11, r1) + REST_2GPRS(12, r1) + /* restore original r1. */ + ld r1,GPR1(r1) + + /* + * Go to virtual mode and pull the HMI event information from + * firmware. + */ + .globl hmi_exception_after_realmode +hmi_exception_after_realmode: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b tramp_real_hmi_exception + +EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception) + + +EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0xea0) +EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x4ea0, 0xe80) +TRAMP_KVM_HV(PACA_EXGEN, 0xe80) +#ifdef CONFIG_PPC_DOORBELL +EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) +#else +EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception) +#endif + + +EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0xec0) +EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x4ec0, 0xea0) +TRAMP_KVM_HV(PACA_EXGEN, 0xea0) +EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) + + +EXC_REAL_NONE(0xec0, 0xf00) +EXC_VIRT_NONE(0x4ec0, 0x4f00) + + +EXC_REAL_OOL(performance_monitor, 0xf00, 0xf20) +EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x4f20, 0xf00) +TRAMP_KVM(PACA_EXGEN, 0xf00) +EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) + + +EXC_REAL_OOL(altivec_unavailable, 0xf20, 0xf40) +EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x4f40, 0xf20) +TRAMP_KVM(PACA_EXGEN, 0xf20) +EXC_COMMON_BEGIN(altivec_unavailable_common) EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION @@ -1097,9 +1064,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bl altivec_unavailable_exception b ret_from_except - .align 7 - .globl vsx_unavailable_common -vsx_unavailable_common: + +EXC_REAL_OOL(vsx_unavailable, 0xf40, 0xf60) +EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x4f60, 0xf40) +TRAMP_KVM(PACA_EXGEN, 0xf40) +EXC_COMMON_BEGIN(vsx_unavailable_common) EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) #ifdef CONFIG_VSX BEGIN_FTR_SECTION @@ -1131,315 +1100,284 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl vsx_unavailable_exception b ret_from_except - STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) - STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) - /* Equivalents to the above handlers for relocation-on interrupt vectors */ - STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) - MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) +EXC_REAL_OOL(facility_unavailable, 0xf60, 0xf80) +EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x4f80, 0xf60) +TRAMP_KVM(PACA_EXGEN, 0xf60) +EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) - STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable) - /* - * The __end_interrupts marker must be past the out-of-line (OOL) - * handlers, so that they are copied to real address 0x100 when running - * a relocatable kernel. This ensures they can be reached from the short - * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch - * directly, without using LOAD_HANDLER(). - */ - .align 7 - .globl __end_interrupts -__end_interrupts: +EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0xfa0) +EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x4fa0, 0xf80) +TRAMP_KVM_HV(PACA_EXGEN, 0xf80) +EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception) -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) + +EXC_REAL_NONE(0xfa0, 0x1200) +EXC_VIRT_NONE(0x4fa0, 0x5200) + +#ifdef CONFIG_CBE_RAS +EXC_REAL_HV(cbe_system_error, 0x1200, 0x1300) +EXC_VIRT_NONE(0x5200, 0x5300) +TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200) +EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception) +#else /* CONFIG_CBE_RAS */ +EXC_REAL_NONE(0x1200, 0x1300) +EXC_VIRT_NONE(0x5200, 0x5300) +#endif + + +EXC_REAL(instruction_breakpoint, 0x1300, 0x1400) +EXC_VIRT(instruction_breakpoint, 0x5300, 0x5400, 0x1300) +TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300) +EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception) + +EXC_REAL_NONE(0x1400, 0x1500) +EXC_VIRT_NONE(0x5400, 0x5500) + +EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600) + mtspr SPRN_SPRG_HSCRATCH0,r13 + EXCEPTION_PROLOG_0(PACA_EXGEN) + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) + +#ifdef CONFIG_PPC_DENORMALISATION + mfspr r10,SPRN_HSRR1 + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ + addi r11,r11,-4 /* HSRR0 is next instruction */ + bne+ denorm_assist +#endif + + KVMTEST_PR(0x1500) + EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) +EXC_REAL_END(denorm_exception_hv, 0x1500, 0x1600) + +#ifdef CONFIG_PPC_DENORMALISATION +EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x5600) + b exc_real_0x1500_denorm_exception_hv +EXC_VIRT_END(denorm_exception, 0x5500, 0x5600) +#else +EXC_VIRT_NONE(0x5500, 0x5600) +#endif + +TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) + +#ifdef CONFIG_PPC_DENORMALISATION +TRAMP_REAL_BEGIN(denorm_assist) +BEGIN_FTR_SECTION /* - * Data area reserved for FWNMI option. - * This address (0x7000) is fixed by the RPA. + * To denormalise we need to move a copy of the register to itself. + * For POWER6 do that here for all FP regs. */ - .= 0x7000 - .globl fwnmi_data_area -fwnmi_data_area: + mfmsr r10 + ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1) + xori r10,r10,(MSR_FE0|MSR_FE1) + mtmsrd r10 + sync - /* pseries and powernv need to keep the whole page from - * 0x7000 to 0x8000 free for use by the firmware - */ - . = 0x8000 -#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ +#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1 +#define FMR4(n) FMR2(n) ; FMR2(n+2) +#define FMR8(n) FMR4(n) ; FMR4(n+4) +#define FMR16(n) FMR8(n) ; FMR8(n+8) +#define FMR32(n) FMR16(n) ; FMR16(n+16) + FMR32(0) - .globl hmi_exception_early -hmi_exception_early: - EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60) - mr r10,r1 /* Save r1 */ - ld r1,PACAEMERGSP(r13) /* Use emergency stack */ - subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - std r9,_CCR(r1) /* save CR in stackframe */ - mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ - std r11,_NIP(r1) /* save HSRR0 in stackframe */ - mfspr r12,SPRN_HSRR1 /* Save SRR1 */ - std r12,_MSR(r1) /* save SRR1 in stackframe */ - std r10,0(r1) /* make stack chain pointer */ - std r0,GPR0(r1) /* save r0 in stackframe */ - std r10,GPR1(r1) /* save r1 in stackframe */ - EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) - EXCEPTION_PROLOG_COMMON_3(0xe60) - addi r3,r1,STACK_FRAME_OVERHEAD - bl hmi_exception_realmode - /* Windup the stack. */ - /* Move original HSRR0 and HSRR1 into the respective regs */ - ld r9,_MSR(r1) - mtspr SPRN_HSRR1,r9 - ld r3,_NIP(r1) - mtspr SPRN_HSRR0,r3 - ld r9,_CTR(r1) - mtctr r9 - ld r9,_XER(r1) - mtxer r9 - ld r9,_LINK(r1) - mtlr r9 - REST_GPR(0, r1) - REST_8GPRS(2, r1) - REST_GPR(10, r1) - ld r11,_CCR(r1) - mtcr r11 - REST_GPR(11, r1) - REST_2GPRS(12, r1) - /* restore original r1. */ - ld r1,GPR1(r1) +FTR_SECTION_ELSE +/* + * To denormalise we need to move a copy of the register to itself. + * For POWER7 do that here for the first 32 VSX registers only. + */ + mfmsr r10 + oris r10,r10,MSR_VSX@h + mtmsrd r10 + sync - /* - * Go to virtual mode and pull the HMI event information from - * firmware. - */ - .globl hmi_exception_after_realmode -hmi_exception_after_realmode: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_hv +#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1) +#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2) +#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4) +#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8) +#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16) + XVCPSGNDP32(0) +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) -#define MACHINE_CHECK_HANDLER_WINDUP \ - /* Clear MSR_RI before setting SRR0 and SRR1. */\ - li r0,MSR_RI; \ - mfmsr r9; /* get MSR value */ \ - andc r9,r9,r0; \ - mtmsrd r9,1; /* Clear MSR_RI */ \ - /* Move original SRR0 and SRR1 into the respective regs */ \ - ld r9,_MSR(r1); \ - mtspr SPRN_SRR1,r9; \ - ld r3,_NIP(r1); \ - mtspr SPRN_SRR0,r3; \ - ld r9,_CTR(r1); \ - mtctr r9; \ - ld r9,_XER(r1); \ - mtxer r9; \ - ld r9,_LINK(r1); \ - mtlr r9; \ - REST_GPR(0, r1); \ - REST_8GPRS(2, r1); \ - REST_GPR(10, r1); \ - ld r11,_CCR(r1); \ - mtcr r11; \ - /* Decrement paca->in_mce. */ \ - lhz r12,PACA_IN_MCE(r13); \ - subi r12,r12,1; \ - sth r12,PACA_IN_MCE(r13); \ - REST_GPR(11, r1); \ - REST_2GPRS(12, r1); \ - /* restore original r1. */ \ - ld r1,GPR1(r1) +BEGIN_FTR_SECTION + b denorm_done +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) +/* + * To denormalise we need to move a copy of the register to itself. + * For POWER8 we need to do that for all 64 VSX registers + */ + XVCPSGNDP32(32) +denorm_done: + mtspr SPRN_HSRR0,r11 + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + RESTORE_PPR_PACA(PACA_EXGEN, r10) +BEGIN_FTR_SECTION + ld r10,PACA_EXGEN+EX_CFAR(r13) + mtspr SPRN_CFAR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + HRFID + b . +#endif - /* - * Handle machine check early in real mode. We come here with - * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. - */ - .align 7 - .globl machine_check_handle_early -machine_check_handle_early: - std r0,GPR0(r1) /* Save r0 */ - EXCEPTION_PROLOG_COMMON_3(0x200) - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_early - std r3,RESULT(r1) /* Save result */ - ld r12,_MSR(r1) -#ifdef CONFIG_PPC_P7_NAP - /* - * Check if thread was in power saving mode. We come here when any - * of the following is true: - * a. thread wasn't in power saving mode - * b. thread was in power saving mode with no state loss or - * supervisor state loss - * - * Go back to nap again if (b) is true. - */ - rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ - beq 4f /* No, it wasn;t */ - /* Thread was in power saving mode. Go back to nap again. */ - cmpwi r11,2 - bne 3f - /* Supervisor state loss */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) -3: bl machine_check_queue_event - MACHINE_CHECK_HANDLER_WINDUP - GET_PACA(r13) - ld r1,PACAR1(r13) - li r3,PNV_THREAD_NAP - b power7_enter_nap_mode -4: +EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception) + + +#ifdef CONFIG_CBE_RAS +EXC_REAL_HV(cbe_maintenance, 0x1600, 0x1700) +EXC_VIRT_NONE(0x5600, 0x5700) +TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600) +EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception) +#else /* CONFIG_CBE_RAS */ +EXC_REAL_NONE(0x1600, 0x1700) +EXC_VIRT_NONE(0x5600, 0x5700) #endif - /* - * Check if we are coming from hypervisor userspace. If yes then we - * continue in host kernel in V mode to deliver the MC event. - */ - rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ - beq 5f - andi. r11,r12,MSR_PR /* See if coming from user. */ - bne 9f /* continue in V mode if we are. */ -5: -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER - /* - * We are coming from kernel context. Check if we are coming from - * guest. if yes, then we can continue. We will fall through - * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest. - */ - lbz r11,HSTATE_IN_GUEST(r13) - cmpwi r11,0 /* Check if coming from guest */ - bne 9f /* continue if we are. */ + +EXC_REAL(altivec_assist, 0x1700, 0x1800) +EXC_VIRT(altivec_assist, 0x5700, 0x5800, 0x1700) +TRAMP_KVM(PACA_EXGEN, 0x1700) +#ifdef CONFIG_ALTIVEC +EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception) +#else +EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception) #endif - /* - * At this point we are not sure about what context we come from. - * Queue up the MCE event and return from the interrupt. - * But before that, check if this is an un-recoverable exception. - * If yes, then stay on emergency stack and panic. - */ - andi. r11,r12,MSR_RI - bne 2f -1: mfspr r11,SPRN_SRR0 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10,unrecover_mce) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - /* - * We are going down. But there are chances that we might get hit by - * another MCE during panic path and we may run into unstable state - * with no way out. Hence, turn ME bit off while going down, so that - * when another MCE is hit during panic path, system will checkstop - * and hypervisor will get restarted cleanly by SP. - */ - li r3,MSR_ME - andc r10,r10,r3 /* Turn off MSR_ME */ - mtspr SPRN_SRR1,r10 - rfid + + +#ifdef CONFIG_CBE_RAS +EXC_REAL_HV(cbe_thermal, 0x1800, 0x1900) +EXC_VIRT_NONE(0x5800, 0x5900) +TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800) +EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception) +#else /* CONFIG_CBE_RAS */ +EXC_REAL_NONE(0x1800, 0x1900) +EXC_VIRT_NONE(0x5800, 0x5900) +#endif + + +/* + * An interrupt came in while soft-disabled. We set paca->irq_happened, then: + * - If it was a decrementer interrupt, we bump the dec to max and and return. + * - If it was a doorbell we return immediately since doorbells are edge + * triggered and won't automatically refire. + * - If it was a HMI we return immediately since we handled it in realmode + * and it won't refire. + * - else we hard disable and return. + * This is called with r10 containing the value to OR to the paca field. + */ +#define MASKED_INTERRUPT(_H) \ +masked_##_H##interrupt: \ + std r11,PACA_EXGEN+EX_R11(r13); \ + lbz r11,PACAIRQHAPPENED(r13); \ + or r11,r11,r10; \ + stb r11,PACAIRQHAPPENED(r13); \ + cmpwi r10,PACA_IRQ_DEC; \ + bne 1f; \ + lis r10,0x7fff; \ + ori r10,r10,0xffff; \ + mtspr SPRN_DEC,r10; \ + b 2f; \ +1: cmpwi r10,PACA_IRQ_DBELL; \ + beq 2f; \ + cmpwi r10,PACA_IRQ_HMI; \ + beq 2f; \ + mfspr r10,SPRN_##_H##SRR1; \ + rldicl r10,r10,48,1; /* clear MSR_EE */ \ + rotldi r10,r10,16; \ + mtspr SPRN_##_H##SRR1,r10; \ +2: mtcrf 0x80,r9; \ + ld r9,PACA_EXGEN+EX_R9(r13); \ + ld r10,PACA_EXGEN+EX_R10(r13); \ + ld r11,PACA_EXGEN+EX_R11(r13); \ + GET_SCRATCH0(r13); \ + ##_H##rfid; \ b . -2: - /* - * Check if we have successfully handled/recovered from error, if not - * then stay on emergency stack and panic. - */ - ld r3,RESULT(r1) /* Load result */ - cmpdi r3,0 /* see if we handled MCE successfully */ - beq 1b /* if !handled then panic */ +/* + * Real mode exceptions actually use this too, but alternate + * instruction code patches (which end up in the common .text area) + * cannot reach these if they are put there. + */ +USE_FIXED_SECTION(virt_trampolines) + MASKED_INTERRUPT() + MASKED_INTERRUPT(H) + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) /* - * Return from MC interrupt. - * Queue up the MCE event so that we can log it later, while - * returning from kernel or opal call. + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. */ - bl machine_check_queue_event - MACHINE_CHECK_HANDLER_WINDUP + mfspr r13, SPRN_SRR0 + addi r13, r13, 4 + mtspr SPRN_SRR0, r13 + GET_SCRATCH0(r13) rfid -9: - /* Deliver the machine check to host kernel in V mode. */ - MACHINE_CHECK_HANDLER_WINDUP - b machine_check_pSeries + b . -unrecover_mce: - /* Invoke machine_check_exception to print MCE event and panic. */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_exception +TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) /* - * We will not reach here. Even if we did, there is no way out. Call - * unrecoverable_exception and die. + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. */ -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b 1b -/* - * r13 points to the PACA, r9 contains the saved CR, - * r12 contain the saved SRR1, SRR0 is still ready for return - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * r3 is saved in paca->slb_r3 - * We assume we aren't going to take any exceptions during this procedure. - */ -slb_miss_realmode: - mflr r10 -#ifdef CONFIG_RELOCATABLE - mtctr r11 -#endif - - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - -#ifdef CONFIG_PPC_STD_MMU_64 -BEGIN_MMU_FTR_SECTION - bl slb_allocate_realmode -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX) + mfspr r13, SPRN_HSRR0 + addi r13, r13, 4 + mtspr SPRN_HSRR0, r13 + GET_SCRATCH0(r13) + hrfid + b . #endif - /* All done -- return from exception. */ - ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACA_EXSLB+EX_R3(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ +/* + * Ensure that any handlers that get invoked from the exception prologs + * above are below the first 64KB (0x10000) of the kernel image because + * the prologs assemble the addresses of these handlers using the + * LOAD_HANDLER macro, which uses an ori instruction. + */ - mtlr r10 -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ - beq- 2f +/*** Common interrupt handlers ***/ -.machine push -.machine "power4" - mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop - RESTORE_PPR_PACA(PACA_EXSLB, r9) - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ + /* + * Relocation-on interrupts: A subset of the interrupts can be delivered + * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering + * it. Addresses are the same as the original interrupt addresses, but + * offset by 0xc000000000004000. + * It's impossible to receive interrupts below 0x300 via this mechanism. + * KVM: None of these traps are from the guest ; anything that escalated + * to HV=1 from HV=0 is delivered via real mode handlers. + */ -2: mfspr r11,SPRN_SRR0 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10,unrecov_slb) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - rfid - b . + /* + * This uses the standard macro, since the original 0x300 vector + * only has extra guff for STAB-based processors -- which never + * come here. + */ -unrecov_slb: - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - RECONCILE_IRQ_STATE(r10, r11) - bl save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b 1b +EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline) + b __ppc64_runlatch_on +USE_FIXED_SECTION(virt_trampolines) + /* + * The __end_interrupts marker must be past the out-of-line (OOL) + * handlers, so that they are copied to real address 0x100 when running + * a relocatable kernel. This ensures they can be reached from the short + * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch + * directly, without using LOAD_HANDLER(). + */ + .align 7 + .globl __end_interrupts +__end_interrupts: +DEFINE_FIXED_SYMBOL(__end_interrupts) #ifdef CONFIG_PPC_970_NAP -power4_fixup_nap: +EXC_COMMON_BEGIN(power4_fixup_nap) andc r9,r9,r10 std r9,TI_LOCAL_FLAGS(r11) ld r10,_LINK(r1) /* make idle task do the */ @@ -1447,6 +1385,13 @@ power4_fixup_nap: blr #endif +CLOSE_FIXED_SECTION(real_vectors); +CLOSE_FIXED_SECTION(real_trampolines); +CLOSE_FIXED_SECTION(virt_vectors); +CLOSE_FIXED_SECTION(virt_trampolines); + +USE_TEXT_SECTION() + /* * Hash table stuff */ @@ -1592,3 +1537,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 1: addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_bad_stack b 1b + +/* + * Called from arch_local_irq_enable when an interrupt needs + * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate + * which kind of interrupt. MSR:EE is already off. We generate a + * stackframe like if a real interrupt had happened. + * + * Note: While MSR:EE is off, we need to make sure that _MSR + * in the generated frame has EE set to 1 or the exception + * handler will not properly re-enable them. + */ +_GLOBAL(__replay_interrupt) + /* We are going to jump to the exception common code which + * will retrieve various register values from the PACA which + * we don't give a damn about, so we don't bother storing them. + */ + mfmsr r12 + mflr r11 + mfcr r9 + ori r12,r12,MSR_EE + cmpwi r3,0x900 + beq decrementer_common + cmpwi r3,0x500 + beq hardware_interrupt_common +BEGIN_FTR_SECTION + cmpwi r3,0xe80 + beq h_doorbell_common + cmpwi r3,0xea0 + beq h_virt_irq_common + cmpwi r3,0xe60 + beq hmi_exception_common +FTR_SECTION_ELSE + cmpwi r3,0xa00 + beq doorbell_super_common +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) + blr diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 3cb3b02a13dd..8f0c7c5d93f2 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -333,6 +333,11 @@ int __init fadump_reserve_mem(void) return 1; } +unsigned long __init arch_reserved_kernel_pages(void) +{ + return memblock_reserved_size() / PAGE_SIZE; +} + /* Look for fadump= cmdline option. */ static int __init early_fadump_param(char *p) { @@ -778,7 +783,11 @@ static int fadump_init_elfcore_header(char *bufp) elf->e_entry = 0; elf->e_phoff = sizeof(struct elfhdr); elf->e_shoff = 0; - elf->e_flags = ELF_CORE_EFLAGS; +#if defined(_CALL_ELF) + elf->e_flags = _CALL_ELF; +#else + elf->e_flags = 0; +#endif elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize = sizeof(struct elf_phdr); elf->e_phnum = 0; @@ -1009,8 +1018,7 @@ static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) } while (wait_time); if (rc) { - printk(KERN_ERR "Failed to invalidate firmware-assisted dump " - "rgistration. unexpected error(%d).\n", rc); + pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc); return rc; } fw_dump.dump_active = 0; @@ -1105,7 +1113,9 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj, * Take away the '/proc/vmcore'. We are releasing the dump * memory, hence it will not be valid anymore. */ +#ifdef CONFIG_PROC_VMCORE vmcore_cleanup(); +#endif fadump_invalidate_release_mem(); } else diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 15da2b5df85e..6c509f39bbde 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -24,6 +24,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> +#include <asm/export.h> #ifdef CONFIG_VSX #define __REST_32FPVSRS(n,c,base) \ @@ -50,32 +51,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* void do_load_up_transact_fpu(struct thread_struct *thread) - * - * This is similar to load_up_fpu but for the transactional version of the FP - * register set. It doesn't mess with the task MSR or valid flags. - * Furthermore, we don't do lazy FP with TM currently. - */ -_GLOBAL(do_load_up_transact_fpu) - mfmsr r6 - ori r5,r6,MSR_FP -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r5,r5,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - SYNC - MTMSRD(r5) - - addi r7,r3,THREAD_TRANSACT_FPSTATE - lfd fr0,FPSTATE_FPSCR(r7) - MTFSF_L(fr0) - REST_32FPVSRS(0, R4, R7) - - blr -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - /* * Load state from memory into FP registers including FPSCR. * Assumes the caller has enabled FP in the MSR. @@ -85,6 +60,7 @@ _GLOBAL(load_fp_state) MTFSF_L(fr0) REST_32FPVSRS(0, R4, R3) blr +EXPORT_SYMBOL(load_fp_state) /* * Store FP state into memory, including FPSCR @@ -95,6 +71,7 @@ _GLOBAL(store_fp_state) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) blr +EXPORT_SYMBOL(store_fp_state) /* * This task wants to use the FPU now. diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 1123a4d8d8dd..a95639b8d4ac 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -144,6 +144,21 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } +#ifdef CC_USING_MPROFILE_KERNEL + /* When using -mkernel_profile there is no load to jump over */ + pop = PPC_INST_NOP; + + if (probe_kernel_read(&op, (void *)(ip - 4), 4)) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ + if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) { + pr_err("Unexpected instruction %08x around bl _mcount\n", op); + return -EINVAL; + } +#else /* * Our original call site looks like: * @@ -170,24 +185,10 @@ __ftrace_make_nop(struct module *mod, } if (op != PPC_INST_LD_TOC) { - unsigned int inst; - - if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) { - pr_err("Fetching instruction at %lx failed.\n", ip - 4); - return -EFAULT; - } - - /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */ - if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) { - pr_err("Unexpected instructions around bl _mcount\n" - "when enabling dynamic ftrace!\t" - "(%08x,bl,%08x)\n", inst, op); - return -EINVAL; - } - - /* When using -mkernel_profile there is no load to jump over */ - pop = PPC_INST_NOP; + pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op); + return -EINVAL; } +#endif /* CC_USING_MPROFILE_KERNEL */ if (patch_instruction((unsigned int *)ip, pop)) { pr_err("Patching NOP failed.\n"); @@ -592,7 +593,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) if (!ftrace_graph_entry(&trace)) goto out; - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, + NULL) == -EBUSY) goto out; parent = return_hooker; @@ -608,7 +610,7 @@ unsigned long __init arch_syscall_addr(int nr) } #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */ -#if defined(CONFIG_PPC64) && (!defined(_CALL_ELF) || _CALL_ELF != 2) +#ifdef PPC64_ELF_ABI_v1 char *arch_ftrace_match_adjust(char *str, const char *search) { if (str[0] == '.' && search[0] != '.') @@ -616,4 +618,4 @@ char *arch_ftrace_match_adjust(char *str, const char *search) else return str; } -#endif /* defined(CONFIG_PPC64) && (!defined(_CALL_ELF) || _CALL_ELF != 2) */ +#endif /* PPC64_ELF_ABI_v1 */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index dc0488b6f6e1..9d963547d243 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -34,6 +34,7 @@ #include <asm/ptrace.h> #include <asm/bug.h> #include <asm/kvm_book3s_asm.h> +#include <asm/export.h> /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ @@ -266,7 +267,6 @@ __secondary_hold_acknowledge: #define EXCEPTION_PROLOG_2 \ - CLR_TOP32(r11); \ stw r10,_CCR(r11); /* save registers */ \ stw r12,GPR12(r11); \ stw r9,GPR9(r11); \ @@ -739,6 +739,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) .globl mol_trampoline .set mol_trampoline, i0x2f00 + EXPORT_SYMBOL(mol_trampoline) . = 0x3000 @@ -862,7 +863,6 @@ __secondary_start: /* ptr to phys current thread */ tophys(r4,r2) addi r4,r4,THREAD /* phys address of our thread_struct */ - CLR_TOP32(r4) mtspr SPRN_SPRG_THREAD,r4 li r3,0 mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ @@ -949,7 +949,6 @@ start_here: /* ptr to phys current thread */ tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ - CLR_TOP32(r4) mtspr SPRN_SPRG_THREAD,r4 li r3,0 mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ @@ -1048,6 +1047,7 @@ _ENTRY(switch_mmu_context) 4: trap EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0 blr +EXPORT_SYMBOL(switch_mmu_context) /* * An undocumented "feature" of 604e requires that the v bit @@ -1275,6 +1275,7 @@ sdata: .globl empty_zero_page empty_zero_page: .space 4096 +EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: @@ -1288,6 +1289,7 @@ intercept_table: .long 0, 0, 0, 0, 0, 0, 0, 0 .long 0, 0, 0, 0, 0, 0, 0, 0 .long 0, 0, 0, 0, 0, 0, 0, 0 +EXPORT_SYMBOL(intercept_table) /* Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7d7d8635227a..41374a468d1c 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -41,6 +41,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> +#include <asm/export.h> /* As with the other PowerPC ports, it is expected that when code * execution begins here, the following registers contain valid, yet @@ -971,6 +972,7 @@ sdata: .globl empty_zero_page empty_zero_page: .space 4096 +EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 9cdf5c71e426..37e4a7cf0065 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -39,6 +39,7 @@ #include <asm/asm-offsets.h> #include <asm/ptrace.h> #include <asm/synch.h> +#include <asm/export.h> #include "head_booke.h" @@ -1254,6 +1255,7 @@ sdata: .globl empty_zero_page empty_zero_page: .space PAGE_SIZE +EXPORT_SYMBOL(empty_zero_page) /* * To support >32-bit physical addresses, we use an 8KB pgdir. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 2d14774af6b4..04c546e20cc0 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -28,6 +28,7 @@ #include <asm/page.h> #include <asm/mmu.h> #include <asm/ppc_asm.h> +#include <asm/head-64.h> #include <asm/asm-offsets.h> #include <asm/bug.h> #include <asm/cputable.h> @@ -42,6 +43,7 @@ #include <asm/hw_irq.h> #include <asm/cputhreads.h> #include <asm/ppc-opcode.h> +#include <asm/export.h> /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -65,9 +67,14 @@ * 2. The kernel is entered at __start */ - .text - .globl _stext -_stext: +OPEN_FIXED_SECTION(first_256B, 0x0, 0x100) +USE_FIXED_SECTION(first_256B) + /* + * Offsets are relative from the start of fixed section, and + * first_256B starts at 0. Offsets are a bit easier to use here + * than the fixed section entry macros. + */ + . = 0x0 _GLOBAL(__start) /* NOP this out unconditionally */ BEGIN_FTR_SECTION @@ -104,6 +111,7 @@ __secondary_hold_acknowledge: . = 0x5c .globl __run_at_load __run_at_load: +DEFINE_FIXED_SYMBOL(__run_at_load) .long 0x72756e30 /* "run0" -- relocate to 0 by default */ #endif @@ -133,7 +141,7 @@ __secondary_hold: /* Tell the master cpu we're here */ /* Relocation is off & we are located at an address less */ /* than 0x100, so only need to grab low order offset. */ - std r24,__secondary_hold_acknowledge-_stext(0) + std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0) sync li r26,0 @@ -141,7 +149,7 @@ __secondary_hold: tovirt(r26,r26) #endif /* All secondary cpus wait here until told to start. */ -100: ld r12,__secondary_hold_spinloop-_stext(r26) +100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26) cmpdi 0,r12,0 beq 100b @@ -166,12 +174,13 @@ __secondary_hold: #else BUG_OPCODE #endif +CLOSE_FIXED_SECTION(first_256B) /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" exception_marker: .tc ID_72656773_68657265[TC],0x7265677368657265 - .text + .previous /* * On server, we include the exception vectors code here as it @@ -180,8 +189,12 @@ exception_marker: */ #ifdef CONFIG_PPC_BOOK3S #include "exceptions-64s.S" +#else +OPEN_TEXT_SECTION(0x100) #endif +USE_TEXT_SECTION() + #ifdef CONFIG_PPC_BOOK3E /* * The booting_thread_hwid holds the thread id we want to boot in cpu @@ -401,7 +414,7 @@ generic_secondary_common_init: ld r12,CPU_SPEC_RESTORE(r23) cmpdi 0,r12,0 beq 3f -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 ld r12,0(r12) #endif mtctr r12 @@ -558,7 +571,7 @@ __after_prom_start: #if defined(CONFIG_PPC_BOOK3E) tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ #endif - lwz r7,__run_at_load-_stext(r26) + lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) #if defined(CONFIG_PPC_BOOK3E) tophys(r26,r26) #endif @@ -601,7 +614,7 @@ __after_prom_start: #if defined(CONFIG_PPC_BOOK3E) tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ #endif - lwz r7,__run_at_load-_stext(r26) + lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) cmplwi cr0,r7,1 bne 3f @@ -611,28 +624,35 @@ __after_prom_start: sub r5,r5,r11 #else /* just copy interrupts */ - LOAD_REG_IMMEDIATE(r5, __end_interrupts - _stext) + LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts)) #endif b 5f 3: #endif - lis r5,(copy_to_here - _stext)@ha - addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */ + /* # bytes of memory to copy */ + lis r5,(ABS_ADDR(copy_to_here))@ha + addi r5,r5,(ABS_ADDR(copy_to_here))@l bl copy_and_flush /* copy the first n bytes */ /* this includes the code being */ /* executed here. */ - addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */ - addi r12,r8,(4f - _stext)@l /* that we just made */ + /* Jump to the copy of this code that we just made */ + addis r8,r3,(ABS_ADDR(4f))@ha + addi r12,r8,(ABS_ADDR(4f))@l mtctr r12 bctr .balign 8 -p_end: .llong _end - _stext +p_end: .llong _end - copy_to_here -4: /* Now copy the rest of the kernel up to _end */ - addis r5,r26,(p_end - _stext)@ha - ld r5,(p_end - _stext)@l(r5) /* get _end */ +4: + /* + * Now copy the rest of the kernel up to _end, add + * _end - copy_to_here to the copy limit and run again. + */ + addis r8,r26,(ABS_ADDR(p_end))@ha + ld r8,(ABS_ADDR(p_end))@l(r8) + add r5,r5,r8 5: bl copy_and_flush /* copy the rest */ 9: b start_here_multiplatform @@ -941,7 +961,7 @@ start_here_multiplatform: mtspr SPRN_SRR1,r4 RFI b . /* prevent speculative execution */ - + /* This is where all platforms converge execution */ start_here_common: @@ -951,9 +971,6 @@ start_here_common: /* Load the TOC (virtual address) */ ld r2,PACATOC(r13) - /* Do more system initializations in virtual mode */ - bl setup_system - /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ @@ -986,3 +1003,4 @@ swapper_pg_dir: .globl empty_zero_page empty_zero_page: .space PAGE_SIZE +EXPORT_SYMBOL(empty_zero_page) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 80c69472314e..fb133a163263 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -30,6 +30,8 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> +#include <asm/fixmap.h> +#include <asm/export.h> /* Macro to make the code more readable. */ #ifdef CONFIG_8xx_CPU6 @@ -150,7 +152,6 @@ turn_on_mmu: #define EXCEPTION_PROLOG_2 \ - CLR_TOP32(r11); \ stw r10,_CCR(r11); /* save registers */ \ stw r12,GPR12(r11); \ stw r9,GPR9(r11); \ @@ -226,7 +227,7 @@ i##n: \ ret_from_except) /* System reset */ - EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) /* Machine check */ . = 0x200 @@ -321,7 +322,7 @@ SystemCall: #endif InstructionTLBMiss: -#ifdef CONFIG_8xx_CPU6 +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -329,23 +330,20 @@ InstructionTLBMiss: /* If we are faulting a kernel address, we have to use the * kernel page tables. */ + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ + INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) #if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ - mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) - mfcr r10 - IS_KERNEL(r11, r11) + mfcr r3 + IS_KERNEL(r11, r10) +#endif mfspr r11, SPRN_M_TW /* Get level 1 table */ +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: - mtcr r10 - mfspr r10, SPRN_SRR0 /* Get effective address of fault */ -#else - mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) - mfspr r11, SPRN_M_TW /* Get level 1 table base address */ + mtcr r3 #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -377,7 +375,7 @@ InstructionTLBMiss: MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ -#ifdef CONFIG_8xx_CPU6 +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 @@ -393,18 +391,28 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r10, SPRN_MD_EPN - IS_KERNEL(r11, r10) + rlwinm r10, r10, 16, 0xfff8 + cmpli cr0, r10, PAGE_OFFSET@h mfspr r11, SPRN_M_TW /* Get level 1 table */ - BRANCH_UNLESS_KERNEL(3f) + blt+ 3f +#ifndef CONFIG_PIN_TLB_IMMR + cmpli cr0, r10, VIRT_IMMR_BASE@h +#endif +_ENTRY(DTLBMiss_cmp) + cmpli cr7, r10, (PAGE_OFFSET + 0x1800000)@h lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha +#ifndef CONFIG_PIN_TLB_IMMR +_ENTRY(DTLBMiss_jmp) + beq- DTLBMissIMMR +#endif + blt cr7, DTLBMissLinear 3: + mtcr r3 + mfspr r10, SPRN_MD_EPN /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ - mtcr r11 - bt- 28,DTLBMiss8M /* bit 28 = Large page (8M) */ - mtcr r3 /* We have a pte table, so load fetch the pte from the table. */ @@ -457,28 +465,6 @@ DataStoreTLBMiss: EXCEPTION_EPILOG_0 rfi -DTLBMiss8M: - mtcr r3 - ori r11, r11, MD_SVALID - MTSPR_CPU6(SPRN_MD_TWC, r11, r3) -#ifdef CONFIG_PPC_16K_PAGES - /* - * In 16k pages mode, each PGD entry defines a 64M block. - * Here we select the 8M page within the block. - */ - rlwimi r11, r10, 0, 0x03800000 -#endif - rlwinm r10, r11, 0, 0xff800000 - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ - _PAGE_PRESENT - MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ - - li r11, RPN_PATTERN - mfspr r3, SPRN_SPRG_SCRATCH2 - mtspr SPRN_DAR, r11 /* Tag DAR */ - EXCEPTION_EPILOG_0 - rfi - /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction @@ -540,6 +526,43 @@ DARFixed:/* Return from dcbx instruction bug workaround */ . = 0x2000 +/* + * Bottom part of DataStoreTLBMiss handlers for IMMR area and linear RAM. + * not enough space in the DataStoreTLBMiss area. + */ +DTLBMissIMMR: + mtcr r3 + /* Set 512k byte guarded page and mark it valid */ + li r10, MD_PS512K | MD_GUARDED | MD_SVALID + MTSPR_CPU6(SPRN_MD_TWC, r10, r11) + mfspr r10, SPRN_IMMR /* Get current IMMR */ + rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + _PAGE_PRESENT | _PAGE_NO_CACHE + MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ + + li r11, RPN_PATTERN + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r3, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 + rfi + +DTLBMissLinear: + mtcr r3 + /* Set 8M byte page and mark it valid */ + li r11, MD_PS8MEG | MD_SVALID + MTSPR_CPU6(SPRN_MD_TWC, r11, r3) + rlwinm r10, r10, 16, 0x0f800000 /* 8xx supports max 256Mb RAM */ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + _PAGE_PRESENT + MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ + + li r11, RPN_PATTERN + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r3, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 + rfi + /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. * DAR is set to the calculated address. @@ -553,12 +576,16 @@ FixupDAR:/* Entry point for dcbx workaround. */ IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ BRANCH_UNLESS_KERNEL(3f) + rlwinm r11, r10, 16, 0xfff8 +_ENTRY(FixupDAR_cmp) + cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h + /* create physical page address from effective address */ + tophys(r11, r10) + blt- cr7, 201f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ - mtcr r11 - bt 28,200f /* bit 28 = Large page (8M) */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -584,10 +611,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ 141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ - /* concat physical page address(r11) and page offset(r10) */ -200: rlwimi r11, r10, 0, 32 - (PAGE_SHIFT << 1), 31 - b 201b - 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 @@ -763,10 +786,18 @@ start_here: * virtual to physical. Also, set the cache mode since that is defined * by TLB entries and perform any additional mapping (like of the IMMR). * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel, - * 24 Mbytes of data, and the 8M IMMR space. Anything not covered by + * 24 Mbytes of data, and the 512k IMMR space. Anything not covered by * these mappings is mapped by page tables. */ initial_mmu: + li r8, 0 + mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ + lis r10, MD_RESETVAL@h +#ifndef CONFIG_8xx_COPYBACK + oris r10, r10, MD_WTDEF@h +#endif + mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ + tlbia /* Invalidate all TLB entries */ /* Always pin the first 8 MB ITLB to prevent ITLB misses while mucking around with SRR0/SRR1 in asm @@ -777,34 +808,20 @@ initial_mmu: mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ #ifdef CONFIG_PIN_TLB - lis r10, (MD_RSV4I | MD_RESETVAL)@h - ori r10, r10, 0x1c00 - mr r8, r10 -#else - lis r10, MD_RESETVAL@h -#endif -#ifndef CONFIG_8xx_COPYBACK - oris r10, r10, MD_WTDEF@h -#endif + oris r10, r10, MD_RSV4I@h mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ +#endif - /* Now map the lower 8 Meg into the TLBs. For this quick hack, - * we can load the instruction and data TLB registers with the - * same values. - */ + /* Now map the lower 8 Meg into the ITLB. */ lis r8, KERNELBASE@h /* Create vaddr for TLB */ ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 - mtspr SPRN_MD_EPN, r8 li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */ ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MI_TWC, r8 - li r8, MI_PS8MEG /* Set 8M byte page, APG 0 */ - ori r8, r8, MI_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ - mtspr SPRN_MD_RPN, r8 + lis r8, MI_APG_INIT@h /* Set protection modes */ ori r8, r8, MI_APG_INIT@l mtspr SPRN_MI_AP, r8 @@ -812,51 +829,25 @@ initial_mmu: ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 - /* Map another 8 MByte at the IMMR to get the processor + /* Map a 512k page for the IMMR to get the processor * internal registers (among other things). */ -#ifdef CONFIG_PIN_TLB - addi r10, r10, 0x0100 +#ifdef CONFIG_PIN_TLB_IMMR + ori r10, r10, 0x1c00 mtspr SPRN_MD_CTR, r10 -#endif + mfspr r9, 638 /* Get current IMMR */ - andis. r9, r9, 0xff80 /* Get 8Mbyte boundary */ + andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ - mr r8, r9 /* Create vaddr for TLB */ + lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */ ori r8, r8, MD_EVALID /* Mark it valid */ mtspr SPRN_MD_EPN, r8 - li r8, MD_PS8MEG /* Set 8M byte page */ + li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ ori r8, r8, MD_SVALID /* Make it valid */ mtspr SPRN_MD_TWC, r8 mr r8, r9 /* Create paddr for TLB */ ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ mtspr SPRN_MD_RPN, r8 - -#ifdef CONFIG_PIN_TLB - /* Map two more 8M kernel data pages. - */ - addi r10, r10, 0x0100 - mtspr SPRN_MD_CTR, r10 - - lis r8, KERNELBASE@h /* Create vaddr for TLB */ - addis r8, r8, 0x0080 /* Add 8M */ - ori r8, r8, MI_EVALID /* Mark it valid */ - mtspr SPRN_MD_EPN, r8 - li r9, MI_PS8MEG /* Set 8M byte page */ - ori r9, r9, MI_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r9 - li r11, MI_BOOTINIT /* Create RPN for address 0 */ - addis r11, r11, 0x0080 /* Add 8M */ - mtspr SPRN_MD_RPN, r11 - - addi r10, r10, 0x0100 - mtspr SPRN_MD_CTR, r10 - - addis r8, r8, 0x0080 /* Add 8M */ - mtspr SPRN_MD_EPN, r8 - mtspr SPRN_MD_TWC, r9 - addis r11, r11, 0x0080 /* Add 8M */ - mtspr SPRN_MD_RPN, r11 #endif /* Since the cache is enabled according to the information we @@ -894,6 +885,7 @@ sdata: .align PAGE_SHIFT empty_zero_page: .space PAGE_SIZE +EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 3bfa3150911f..bf4c6021515f 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -42,6 +42,7 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/ptrace.h> +#include <asm/export.h> #include "head_booke.h" /* As with the other PowerPC ports, it is expected that when code @@ -1223,6 +1224,7 @@ sdata: .globl empty_zero_page empty_zero_page: .space 4096 +EXPORT_SYMBOL(empty_zero_page) .globl swapper_pg_dir swapper_pg_dir: .space PGD_TABLE_SIZE diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index aec9a1b1d25b..9781c69eae57 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -206,7 +206,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) /* * Handle debug exception notifications. */ -int __kprobes hw_breakpoint_handler(struct die_args *args) +int hw_breakpoint_handler(struct die_args *args) { int rc = NOTIFY_STOP; struct perf_event *bp; @@ -290,11 +290,12 @@ out: rcu_read_unlock(); return rc; } +NOKPROBE_SYMBOL(hw_breakpoint_handler); /* * Handle single-step exceptions following a DABR hit. */ -static int __kprobes single_step_dabr_instruction(struct die_args *args) +static int single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; @@ -329,11 +330,12 @@ static int __kprobes single_step_dabr_instruction(struct die_args *args) return NOTIFY_STOP; } +NOKPROBE_SYMBOL(single_step_dabr_instruction); /* * Handle debug exception notifications. */ -int __kprobes hw_breakpoint_exceptions_notify( +int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) { int ret = NOTIFY_DONE; @@ -349,6 +351,7 @@ int __kprobes hw_breakpoint_exceptions_notify( return ret; } +NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify); /* * Release the user breakpoints used by ptrace diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index a89f4f7a66bd..6ca9a2ffaac7 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -65,7 +65,7 @@ static void *ibmebus_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { void *mem; @@ -78,7 +78,7 @@ static void *ibmebus_alloc_coherent(struct device *dev, static void ibmebus_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { kfree(vaddr); } @@ -88,7 +88,7 @@ static dma_addr_t ibmebus_map_page(struct device *dev, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return (dma_addr_t)(page_address(page) + offset); } @@ -97,7 +97,7 @@ static void ibmebus_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return; } @@ -105,7 +105,7 @@ static void ibmebus_unmap_page(struct device *dev, static int ibmebus_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -121,7 +121,7 @@ static int ibmebus_map_sg(struct device *dev, static void ibmebus_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return; } @@ -227,7 +227,7 @@ int ibmebus_request_irq(u32 ist, irq_handler_t handler, { unsigned int irq = irq_create_mapping(NULL, ist); - if (irq == NO_IRQ) + if (!irq) return -EINVAL; return request_irq(irq, handler, irq_flags, devname, dev_id); diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_book3s.S index 470ceebd2d23..bd739fed26e3 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -1,5 +1,6 @@ /* - * This file contains the power_save function for Power7 CPUs. + * This file contains idle entry/exit functions for POWER7, + * POWER8 and POWER9 CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -20,6 +21,7 @@ #include <asm/opal.h> #include <asm/cpuidle.h> #include <asm/book3s/64/mmu-hash.h> +#include <asm/mmu.h> #undef DEBUG @@ -36,22 +38,54 @@ #define _AMOR GPR9 #define _WORT GPR10 #define _WORC GPR11 +#define _PTCR GPR12 -/* Idle state entry routines */ - -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ - ptesync; \ - ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ - bne 1b; \ - IDLE_INST; \ - b . +#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ + PSSCR_MTL_MASK .text /* + * Used by threads before entering deep idle states. Saves SPRs + * in interrupt stack frame + */ +save_sprs_to_stack: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ +BEGIN_FTR_SECTION + mfspr r3,SPRN_PTCR + std r3,_PTCR(r1) + /* + * Note - SDR1 is dropped in Power ISA v3. Hence not restoring + * SDR1 here + */ +FTR_SECTION_ELSE + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + + blr + +/* * Used by threads when the lock bit of core_idle_state is set. * Threads will spin in HMT_LOW until the lock bit is cleared. * r14 - pointer to core_idle_state @@ -69,13 +103,16 @@ core_idle_lock_held: /* * Pass requested state in r3: - * r3 - PNV_THREAD_NAP/SLEEP/WINKLE + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 + * - Requested STOP state in POWER9 * * To check IRQ_HAPPENED in r4 * 0 - don't check * 1 - check + * + * Address to 'rfid' to in r5 */ -_GLOBAL(power7_powersave_common) +_GLOBAL(pnv_powersave_common) /* Use r3 to pass state nap/sleep/winkle */ /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and @@ -126,28 +163,28 @@ _GLOBAL(power7_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + /* * Go to real mode to do the nap, as required by the architecture. * Also, we need to be in real mode before setting hwthread_state, * because as soon as we do that, another thread can switch * the MMU context to the guest. */ - LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + LOAD_REG_IMMEDIATE(r7, MSR_IDLE) li r6, MSR_RI andc r6, r9, r6 - LOAD_REG_ADDR(r7, power7_enter_nap_mode) mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ - mtspr SPRN_SRR0, r7 - mtspr SPRN_SRR1, r5 + mtspr SPRN_SRR0, r5 + mtspr SPRN_SRR1, r7 rfid - .globl power7_enter_nap_mode -power7_enter_nap_mode: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're napping */ - li r4,KVM_HWTHREAD_IN_NAP - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif + .globl pnv_enter_arch207_idle_mode +pnv_enter_arch207_idle_mode: stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f @@ -196,8 +233,7 @@ fastsleep_workaround_at_entry: /* Fast sleep workaround */ li r3,1 li r4,1 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode + bl opal_rm_config_cpu_idle_state /* Clear Lock bit */ li r0,0 @@ -206,30 +242,45 @@ fastsleep_workaround_at_entry: b common_enter enter_winkle: - /* - * Note all register i.e per-core, per-subcore or per-thread is saved - * here since any thread in the core might wake up first - */ - mfspr r3,SPRN_SDR1 - std r3,_SDR1(r1) - mfspr r3,SPRN_RPR - std r3,_RPR(r1) - mfspr r3,SPRN_SPURR - std r3,_SPURR(r1) - mfspr r3,SPRN_PURR - std r3,_PURR(r1) - mfspr r3,SPRN_TSCR - std r3,_TSCR(r1) - mfspr r3,SPRN_DSCR - std r3,_DSCR(r1) - mfspr r3,SPRN_AMOR - std r3,_AMOR(r1) - mfspr r3,SPRN_WORT - std r3,_WORT(r1) - mfspr r3,SPRN_WORC - std r3,_WORC(r1) + bl save_sprs_to_stack + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) +/* + * r3 - requested stop state + */ +power_enter_stop: +/* + * Check if the requested state is a deep idle state. + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + cmpd r3,r4 + bge 2f + IDLE_STATE_ENTER_SEQ(PPC_STOP) +2: +/* + * Entering deep idle state. + * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to + * stack and enter stop + */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + +lwarx_loop_stop: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ + + stwcx. r15,0,r14 + bne- lwarx_loop_stop + isync + + bl save_sprs_to_stack + + IDLE_STATE_ENTER_SEQ(PPC_STOP) + _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ LOAD_REG_ADDRBASE(r3,powersave_nap) @@ -242,19 +293,22 @@ _GLOBAL(power7_idle) _GLOBAL(power7_nap) mr r4,r3 li r3,PNV_THREAD_NAP - b power7_powersave_common + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + b pnv_powersave_common /* No return */ _GLOBAL(power7_sleep) li r3,PNV_THREAD_SLEEP li r4,1 - b power7_powersave_common + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + b pnv_powersave_common /* No return */ _GLOBAL(power7_winkle) - li r3,3 + li r3,PNV_THREAD_WINKLE li r4,1 - b power7_powersave_common + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + b pnv_powersave_common /* No return */ #define CHECK_HMI_INTERRUPT \ @@ -270,25 +324,109 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r2,PACATOC(r13); \ ld r1,PACAR1(r13); \ std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ - bl opal_call_realmode; \ + li r3,0; /* NULL argument */ \ + bl hmi_exception_realmode; \ + nop; \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; -_GLOBAL(power7_wakeup_tb_loss) +/* + * r3 - requested stop state + */ +_GLOBAL(power9_idle_stop) + LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) + or r4,r4,r3 + mtspr SPRN_PSSCR, r4 + li r4, 1 + LOAD_REG_ADDR(r5,power_enter_stop) + b pnv_powersave_common + /* No return */ +/* + * Called from reset vector. Check whether we have woken up with + * hypervisor state loss. If yes, restore hypervisor state and return + * back to reset vector. + * + * r13 - Contents of HSPRG0 + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +_GLOBAL(pnv_restore_hyp_resource) +BEGIN_FTR_SECTION + ld r2,PACATOC(r13); + /* + * POWER ISA 3. Use PSSCR to determine if we + * are waking up from deep idle state + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + + mfspr r5,SPRN_PSSCR + /* + * 0-3 bits correspond to Power-Saving Level Status + * which indicates the idle state we are waking up from + */ + rldicl r5,r5,4,60 + cmpd cr4,r5,r4 + bge cr4,pnv_wakeup_tb_loss + /* + * Waking up without hypervisor state loss. Return to + * reset vector + */ + blr + +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + /* + * POWER ISA 2.07 or less. + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. + */ + clrldi r5,r13,63 + clrrdi r13,r13,1 + + /* Now that we are sure r13 is corrected, load TOC */ ld r2,PACATOC(r13); + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ + + /* + * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking + * up from nap. At this stage CR3 shouldn't contains 'gt' since that + * indicates we are waking with hypervisor state loss from nap. + */ + bgt cr3,. + + blr /* Return back to System Reset vector from where + pnv_restore_hyp_resource was invoked */ + +/* + * Called if waking up from idle state which can cause either partial or + * complete hyp state loss. + * In POWER8, called if waking up from fastsleep or winkle + * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state + * + * r13 - PACA + * cr3 - gt if waking up with partial/complete hypervisor state loss + * cr4 - gt or eq if waking up from complete hypervisor state loss. + */ +_GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) /* * Before entering any idle state, the NVGPRs are saved in the stack * and they are restored before switching to the process context. Hence * until they are restored, they are free to be used. * - * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode - * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the - * wakeup reason if we branch to kvm_start_guest. + * Save SRR1 and LR in NVGPRs as they might be clobbered in + * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required + * to determine the wakeup reason if we branch to kvm_start_guest. LR + * is required to return back to reset vector after hypervisor state + * restore is complete. */ - + mflr r17 mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -310,44 +448,45 @@ lwarx_loop2: bnel core_idle_lock_held cmpwi cr2,r15,0 - lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 - cmpwi cr1,r4,0 /* Check if first in subcore */ /* * At this stage - * cr1 - 0b0100 if first thread to wakeup in subcore - * cr2 - 0b0100 if first thread to wakeup in core - * cr3- 0b0010 if waking up from sleep or winkle - * cr4 - 0b0100 if waking up from winkle + * cr2 - eq if first thread to wakeup in core + * cr3- gt if waking up with partial/complete hypervisor state loss + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ - or r15,r15,r7 /* Set thread bit */ - - beq cr1,first_thread_in_subcore - - /* Not first thread in subcore to wake up */ - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - b common_exit - -first_thread_in_subcore: - /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT stwcx. r15,0,r14 bne- lwarx_loop2 isync +BEGIN_FTR_SECTION + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi r4,0 /* Check if first in subcore */ + + or r15,r15,r7 /* Set thread bit */ + beq first_thread_in_subcore +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + + or r15,r15,r7 /* Set thread bit */ + beq cr2,first_thread_in_core + + /* Not first thread in core or subcore to wake up */ + b clear_lock + +first_thread_in_subcore: /* * If waking up from sleep, subcore state is not lost. Hence * skip subcore state restore */ - bne cr4,subcore_state_restored + blt cr4,subcore_state_restored /* Restore per-subcore state */ ld r4,_SDR1(r1) mtspr SPRN_SDR1,r4 + ld r4,_RPR(r1) mtspr SPRN_RPR,r4 ld r4,_AMOR(r1) @@ -363,32 +502,44 @@ subcore_state_restored: first_thread_in_core: /* - * First thread in the core waking up from fastsleep. It needs to + * First thread in the core waking up from any state which can cause + * partial or complete hypervisor state loss. It needs to * call the fastsleep workaround code if the platform requires it. * Call it unconditionally here. The below branch instruction will - * be patched out when the idle states are discovered if platform - * does not require workaround. + * be patched out if the platform does not have fastsleep or does not + * require the workaround. Patching will be performed during the + * discovery of idle-states. */ .global pnv_fastsleep_workaround_at_exit pnv_fastsleep_workaround_at_exit: b fastsleep_workaround_at_exit timebase_resync: - /* Do timebase resync if we are waking up from sleep. Use cr3 value - * set in exceptions-64s.S */ + /* + * Use cr3 which indicates that we are waking up with atleast partial + * hypervisor state loss to determine if TIMEBASE RESYNC is needed. + */ ble cr3,clear_lock /* Time base re-sync */ - li r0,OPAL_RESYNC_TIMEBASE - bl opal_call_realmode; - /* TODO: Check r3 for failure */ - + bl opal_rm_resync_timebase; /* * If waking up from sleep, per core state is not lost, skip to * clear_lock. */ - bne cr4,clear_lock + blt cr4,clear_lock + + /* + * First thread in the core to wake up and its waking up with + * complete hypervisor state loss. Restore per core hypervisor + * state. + */ +BEGIN_FTR_SECTION + ld r4,_PTCR(r1) + mtspr SPRN_PTCR,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - /* Restore per core state */ ld r4,_TSCR(r1) mtspr SPRN_TSCR,r4 ld r4,_WORC(r1) @@ -406,13 +557,13 @@ common_exit: * If waking up from sleep, hypervisor state is not lost. Hence * skip hypervisor state restore. */ - bne cr4,hypervisor_state_restored + blt cr4,hypervisor_state_restored /* Waking up from winkle */ - /* Restore per thread state */ - bl __restore_cpu_power8 - +BEGIN_MMU_FTR_SECTION + b no_segments +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) /* Restore SLB from PACA */ ld r8,PACA_SLBSHADOWPTR(r13) @@ -426,6 +577,9 @@ common_exit: slbmte r6,r5 1: addi r8,r8,16 .endr +no_segments: + + /* Restore per thread state */ ld r4,_SPURR(r1) mtspr SPRN_SPURR,r4 @@ -436,48 +590,34 @@ common_exit: ld r4,_WORT(r1) mtspr SPRN_WORT,r4 -hypervisor_state_restored: + /* Call cur_cpu_spec->cpu_restore() */ + LOAD_REG_ADDR(r4, cur_cpu_spec) + ld r4,0(r4) + ld r12,CPU_SPEC_RESTORE(r4) +#ifdef PPC64_ELF_ABI_v1 + ld r12,0(r12) +#endif + mtctr r12 + bctrl - li r5,PNV_THREAD_RUNNING - stb r5,PACA_THREAD_IDLE_STATE(r13) +hypervisor_state_restored: mtspr SPRN_SRR1,r16 -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 6f - b kvm_start_guest -6: -#endif - - REST_NVGPRS(r1) - REST_GPR(2, r1) - ld r3,_CCR(r1) - ld r4,_MSR(r1) - ld r5,_NIP(r1) - addi r1,r1,INT_FRAME_SIZE - mtcr r3 - mfspr r3,SPRN_SRR1 /* Return SRR1 */ - mtspr SPRN_SRR1,r4 - mtspr SPRN_SRR0,r5 - rfid + mtlr r17 + blr /* Return back to System Reset vector from where + pnv_restore_hyp_resource was invoked */ fastsleep_workaround_at_exit: li r3,1 li r4,0 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode + bl opal_rm_config_cpu_idle_state b timebase_resync /* * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(power7_wakeup_loss) +_GLOBAL(pnv_wakeup_loss) ld r1,PACAR1(r13) BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -497,10 +637,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(power7_wakeup_noloss) +_GLOBAL(pnv_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 - bne power7_wakeup_loss + bne pnv_wakeup_loss BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 12e48d56f771..3963f0b68d52 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -38,6 +38,18 @@ EXPORT_SYMBOL(ioread16); EXPORT_SYMBOL(ioread16be); EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); +#ifdef __powerpc64__ +u64 ioread64(void __iomem *addr) +{ + return readq(addr); +} +u64 ioread64be(void __iomem *addr) +{ + return readq_be(addr); +} +EXPORT_SYMBOL(ioread64); +EXPORT_SYMBOL(ioread64be); +#endif /* __powerpc64__ */ void iowrite8(u8 val, void __iomem *addr) { @@ -64,6 +76,18 @@ EXPORT_SYMBOL(iowrite16); EXPORT_SYMBOL(iowrite16be); EXPORT_SYMBOL(iowrite32); EXPORT_SYMBOL(iowrite32be); +#ifdef __powerpc64__ +void iowrite64(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +void iowrite64be(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +EXPORT_SYMBOL(iowrite64); +EXPORT_SYMBOL(iowrite64be); +#endif /* __powerpc64__ */ /* * These are the "repeat read/write" functions. Note the diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a8e3490b54e3..5f202a566ec5 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -307,7 +307,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *page, unsigned int npages, enum dma_data_direction direction, unsigned long mask, unsigned int align_order, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long entry; dma_addr_t ret = DMA_ERROR_CODE; @@ -431,7 +431,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, unsigned long mask, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t dma_next = 0, dma_addr; struct scatterlist *s, *outs, *segstart; @@ -479,7 +479,8 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Handle failure */ if (unlikely(entry == DMA_ERROR_CODE)) { - if (printk_ratelimit()) + if (!(attrs & DMA_ATTR_NO_WARN) && + printk_ratelimit()) dev_info(dev, "iommu_alloc failed, tbl %p " "vaddr %lx npages %lu\n", tbl, vaddr, npages); @@ -574,7 +575,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; @@ -753,7 +754,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, struct page *page, unsigned long offset, size_t size, unsigned long mask, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t dma_handle = DMA_ERROR_CODE; void *vaddr; @@ -776,7 +777,8 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, mask >> tbl->it_page_shift, align, attrs); if (dma_handle == DMA_ERROR_CODE) { - if (printk_ratelimit()) { + if (!(attrs & DMA_ATTR_NO_WARN) && + printk_ratelimit()) { dev_info(dev, "iommu_alloc failed, tbl %p " "vaddr %p npages %d\n", tbl, vaddr, npages); @@ -790,7 +792,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned int npages; @@ -845,7 +847,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, nio_pages = size >> tbl->it_page_shift; io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, - mask >> tbl->it_page_shift, io_order, NULL); + mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_ERROR_CODE) { free_pages((unsigned long)ret, order); return NULL; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 3cb46a3b1de7..3c05c311e35e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -67,6 +67,7 @@ #include <asm/smp.h> #include <asm/debug.h> #include <asm/livepatch.h> +#include <asm/asm-prototypes.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -75,6 +76,7 @@ #endif #define CREATE_TRACE_POINTS #include <asm/trace.h> +#include <asm/cpu_has_feature.h> DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); @@ -155,6 +157,15 @@ notrace unsigned int __check_irq_replay(void) } /* + * Check if an hypervisor Maintenance interrupt happened. + * This is a higher priority interrupt than the others, so + * replay it first. + */ + local_paca->irq_happened &= ~PACA_IRQ_HMI; + if (happened & PACA_IRQ_HMI) + return 0xe60; + + /* * We may have missed a decrementer interrupt. We check the * decrementer itself rather than the paca irq_happened field * in case we also had a rollover while hard disabled @@ -189,11 +200,6 @@ notrace unsigned int __check_irq_replay(void) } #endif /* CONFIG_PPC_BOOK3E */ - /* Check if an hypervisor Maintenance interrupt happened */ - local_paca->irq_happened &= ~PACA_IRQ_HMI; - if (happened & PACA_IRQ_HMI) - return 0xe60; - /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -250,7 +256,7 @@ notrace void arch_local_irq_restore(unsigned long en) if (WARN_ON(mfmsr() & MSR_EE)) __hard_irq_disable(); } -#endif /* CONFIG_TRACE_IRQFLAG */ +#endif /* CONFIG_TRACE_IRQFLAGS */ set_soft_enabled(0); @@ -342,6 +348,21 @@ bool prep_irq_for_idle(void) return true; } +/* + * Force a replay of the external interrupt handler on this CPU. + */ +void force_external_irq_replay(void) +{ + /* + * This must only be called with interrupts soft-disabled, + * the replay will happen when re-enabling. + */ + WARN_ON(!arch_irqs_disabled()); + + /* Indicate in the PACA that we have an interrupt to replay */ + local_paca->irq_happened |= PACA_IRQ_EE; +} + #endif /* CONFIG_PPC64 */ int arch_show_interrupts(struct seq_file *p, int prec) @@ -498,7 +519,7 @@ void __do_irq(struct pt_regs *regs) may_hard_irq_enable(); /* And finally process it */ - if (unlikely(irq == NO_IRQ)) + if (unlikely(!irq)) __this_cpu_inc(irq_stat.spurious_irqs); else generic_handle_irq(irq); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7c053f281406..e785cc9e1ecd 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -29,7 +29,7 @@ #include <linux/kprobes.h> #include <linux/ptrace.h> #include <linux/preempt.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> #include <asm/code-patching.h> @@ -278,12 +278,11 @@ no_kprobe: * - When the probed function returns, this probe * causes the handlers to fire */ -static void __used kretprobe_trampoline_holder(void) -{ - asm volatile(".global kretprobe_trampoline\n" - "kretprobe_trampoline:\n" - "nop\n"); -} +asm(".global kretprobe_trampoline\n" + ".type kretprobe_trampoline, @function\n" + "kretprobe_trampoline:\n" + "nop\n" + ".size kretprobe_trampoline, .-kretprobe_trampoline\n"); /* * Called when the probe at kretprobe trampoline is hit @@ -506,13 +505,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) /* setup return addr to the jprobe handler routine */ regs->nip = arch_deref_entry_point(jp->entry); -#ifdef CONFIG_PPC64 -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 regs->gpr[12] = (unsigned long)jp->entry; -#else +#elif defined(PPC64_ELF_ABI_v1) regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif -#endif return 1; } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 7b750c4ed5c7..bc525ea0dc09 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -193,10 +193,10 @@ static int __init add_legacy_soc_port(struct device_node *np, */ if (tsi && !strcmp(tsi->type, "tsi-bridge")) return add_legacy_port(np, -1, UPIO_TSI, addr, addr, - NO_IRQ, legacy_port_flags, 0); + 0, legacy_port_flags, 0); else return add_legacy_port(np, -1, UPIO_MEM, addr, addr, - NO_IRQ, legacy_port_flags, 0); + 0, legacy_port_flags, 0); } static int __init add_legacy_isa_port(struct device_node *np, @@ -242,7 +242,7 @@ static int __init add_legacy_isa_port(struct device_node *np, /* Add port, irq will be dealt with later */ return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), - taddr, NO_IRQ, legacy_port_flags, 0); + taddr, 0, legacy_port_flags, 0); } @@ -314,7 +314,7 @@ static int __init add_legacy_pci_port(struct device_node *np, /* Add port, irq will be dealt with later. We passed a translated * IO port value. It will be fixed up later along with the irq */ - return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, + return add_legacy_port(np, index, iotype, base, addr, 0, legacy_port_flags, np != pci_dev); } #endif @@ -462,14 +462,14 @@ static void __init fixup_port_irq(int index, DBG("fixup_port_irq(%d)\n", index); virq = irq_of_parse_and_map(np, 0); - if (virq == NO_IRQ && legacy_serial_infos[index].irq_check_parent) { + if (!virq && legacy_serial_infos[index].irq_check_parent) { np = of_get_parent(np); if (np == NULL) return; virq = irq_of_parse_and_map(np, 0); of_node_put(np); } - if (virq == NO_IRQ) + if (!virq) return; port->irq = virq; @@ -543,7 +543,7 @@ static int __init serial_dev_init(void) struct plat_serial8250_port *port = &legacy_serial_ports[i]; struct device_node *np = legacy_serial_infos[i].np; - if (port->irq == NO_IRQ) + if (!port->irq) fixup_port_irq(i, np, port); if (port->iotype == UPIO_PORT) fixup_port_pio(i, np, port); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index b8c202d63ecb..a205fa3d9bf3 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -23,28 +23,15 @@ #include <asm/current.h> #include <asm/machdep.h> #include <asm/cacheflush.h> +#include <asm/firmware.h> #include <asm/paca.h> #include <asm/mmu.h> #include <asm/sections.h> /* _end */ #include <asm/prom.h> #include <asm/smp.h> #include <asm/hw_breakpoint.h> +#include <asm/asm-prototypes.h> -#ifdef CONFIG_PPC_BOOK3E -int default_machine_kexec_prepare(struct kimage *image) -{ - int i; - /* - * Since we use the kernel fault handlers and paging code to - * handle the virtual mode, we must make sure no destination - * overlaps kernel static data or bss. - */ - for (i = 0; i < image->nr_segments; i++) - if (image->segment[i].mem < __pa(_end)) - return -ETXTBSY; - return 0; -} -#else int default_machine_kexec_prepare(struct kimage *image) { int i; @@ -54,9 +41,6 @@ int default_machine_kexec_prepare(struct kimage *image) const unsigned long *basep; const unsigned int *sizep; - if (!ppc_md.hpte_clear_all) - return -ENOENT; - /* * Since we use the kernel fault handlers and paging code to * handle the virtual mode, we must make sure no destination @@ -66,31 +50,6 @@ int default_machine_kexec_prepare(struct kimage *image) if (image->segment[i].mem < __pa(_end)) return -ETXTBSY; - /* - * For non-LPAR, we absolutely can not overwrite the mmu hash - * table, since we are still using the bolted entries in it to - * do the copy. Check that here. - * - * It is safe if the end is below the start of the blocked - * region (end <= low), or if the beginning is after the - * end of the blocked region (begin >= high). Use the - * boolean identity !(a || b) === (!a && !b). - */ -#ifdef CONFIG_PPC_STD_MMU_64 - if (htab_address) { - low = __pa(htab_address); - high = low + htab_size_bytes; - - for (i = 0; i < image->nr_segments; i++) { - begin = image->segment[i].mem; - end = begin + image->segment[i].memsz; - - if ((begin < high) && (end > low)) - return -ETXTBSY; - } - } -#endif /* CONFIG_PPC_STD_MMU_64 */ - /* We also should not overwrite the tce tables */ for_each_node_by_type(node, "pci") { basep = of_get_property(node, "linux,tce-base", NULL); @@ -112,7 +71,6 @@ int default_machine_kexec_prepare(struct kimage *image) return 0; } -#endif /* !CONFIG_PPC_BOOK3E */ static void copy_segments(unsigned long ind) { @@ -331,11 +289,14 @@ struct paca_struct kexec_paca; /* Our assembly helper, in misc_64.S */ extern void kexec_sequence(void *newstack, unsigned long start, void *image, void *control, - void (*clear_all)(void)) __noreturn; + void (*clear_all)(void), + bool copy_with_mmu_off) __noreturn; /* too late to fail here */ void default_machine_kexec(struct kimage *image) { + bool copy_with_mmu_off; + /* prepare control code if any */ /* @@ -373,13 +334,29 @@ void default_machine_kexec(struct kimage *image) /* XXX: If anyone does 'dynamic lppacas' this will also need to be * switched to a static version! */ + /* + * On Book3S, the copy must happen with the MMU off if we are either + * using Radix page tables or we are not in an LPAR since we can + * overwrite the page tables while copying. + * + * In an LPAR, we keep the MMU on otherwise we can't access beyond + * the RMA. On BookE there is no real MMU off mode, so we have to + * keep it enabled as well (but then we have bolted TLB entries). + */ +#ifdef CONFIG_PPC_BOOK3E + copy_with_mmu_off = false; +#else + copy_with_mmu_off = radix_enabled() || + !(firmware_has_feature(FW_FEATURE_LPAR) || + firmware_has_feature(FW_FEATURE_PS3_LV1)); +#endif /* Some things are best done in assembly. Finding globals with * a toc is easier in C, so pass in what we can. */ kexec_sequence(&kexec_stack, image->start, image, - page_address(image->control_code_page), - ppc_md.hpte_clear_all); + page_address(image->control_code_page), + mmu_cleanup_all, copy_with_mmu_off); /* NOTREACHED */ } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index ef267fd9dd22..5e7ece0fda9f 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -92,7 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->in_use = 1; mce->initiator = MCE_INITIATOR_CPU; - if (handled) + /* Mark it recovered if we have handled it and MSR(RI=1). */ + if (handled && (regs->msr & MSR_RI)) mce->disposition = MCE_DISPOSITION_RECOVERED; else mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 0d432194c018..384357cb8bc0 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -18,6 +18,7 @@ #include <asm/unistd.h> #include <asm/asm-compat.h> #include <asm/asm-offsets.h> +#include <asm/export.h> .text @@ -118,3 +119,4 @@ _GLOBAL(longjmp) _GLOBAL(current_stack_pointer) PPC_LL r3,0(r1) blr +EXPORT_SYMBOL(current_stack_pointer) diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 285ca8c6cc2e..93cf7a5846a6 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -33,6 +33,7 @@ #include <asm/kexec.h> #include <asm/bug.h> #include <asm/ptrace.h> +#include <asm/export.h> .text @@ -104,20 +105,6 @@ _GLOBAL(mulhdu) blr /* - * sub_reloc_offset(x) returns x - reloc_offset(). - */ -_GLOBAL(sub_reloc_offset) - mflr r0 - bl 1f -1: mflr r5 - lis r4,1b@ha - addi r4,r4,1b@l - subf r5,r4,r5 - subf r3,r5,r3 - mtlr r0 - blr - -/* * reloc_got2 runs through the .got2 section adding an offset * to each entry. */ @@ -333,6 +320,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) #endif /* CONFIG_4xx */ isync blr +EXPORT_SYMBOL(flush_instruction_cache) #endif /* CONFIG_PPC_8xx */ /* @@ -342,7 +330,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) * * flush_icache_range(unsigned long start, unsigned long stop) */ -_KPROBE(flush_icache_range) +_GLOBAL(flush_icache_range) BEGIN_FTR_SECTION PURGE_PREFETCHED_INS blr /* for 601, do nothing */ @@ -372,6 +360,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) sync /* additional sync needed on g4 */ isync blr +_ASM_NOKPROBE_SYMBOL(flush_icache_range) +EXPORT_SYMBOL(flush_icache_range) + /* * Flush a particular page from the data cache to RAM. * Note: this is necessary because the instruction cache does *not* @@ -509,6 +500,7 @@ _GLOBAL(copy_page) li r0,MAX_COPY_PREFETCH li r11,4 b 2b +EXPORT_SYMBOL(copy_page) /* * Extended precision shifts. @@ -536,6 +528,7 @@ _GLOBAL(__ashrdi3) sraw r3,r3,r5 # MSW = MSW >> count or r4,r4,r7 # LSW |= t2 blr +EXPORT_SYMBOL(__ashrdi3) _GLOBAL(__ashldi3) subfic r6,r5,32 @@ -547,6 +540,7 @@ _GLOBAL(__ashldi3) slw r4,r4,r5 # LSW = LSW << count or r3,r3,r7 # MSW |= t2 blr +EXPORT_SYMBOL(__ashldi3) _GLOBAL(__lshrdi3) subfic r6,r5,32 @@ -558,6 +552,7 @@ _GLOBAL(__lshrdi3) srw r3,r3,r5 # MSW = MSW >> count or r4,r4,r7 # LSW |= t2 blr +EXPORT_SYMBOL(__lshrdi3) /* * 64-bit comparison: __cmpdi2(s64 a, s64 b) @@ -573,6 +568,7 @@ _GLOBAL(__cmpdi2) bltlr li r3,2 blr +EXPORT_SYMBOL(__cmpdi2) /* * 64-bit comparison: __ucmpdi2(u64 a, u64 b) * Returns 0 if a < b, 1 if a == b, 2 if a > b. @@ -587,6 +583,7 @@ _GLOBAL(__ucmpdi2) bltlr li r3,2 blr +EXPORT_SYMBOL(__ucmpdi2) _GLOBAL(__bswapdi2) rotlwi r9,r4,8 @@ -598,6 +595,7 @@ _GLOBAL(__bswapdi2) mr r3,r9 mr r4,r10 blr +EXPORT_SYMBOL(__bswapdi2) #ifdef CONFIG_SMP _GLOBAL(start_secondary_resume) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index f28754c497e5..4f178671f230 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -27,6 +27,7 @@ #include <asm/kexec.h> #include <asm/ptrace.h> #include <asm/mmu.h> +#include <asm/export.h> .text @@ -66,7 +67,7 @@ PPC64_CACHES: * flush all bytes from start through stop-1 inclusive */ -_KPROBE(flush_icache_range) +_GLOBAL(flush_icache_range) BEGIN_FTR_SECTION PURGE_PREFETCHED_INS blr @@ -109,7 +110,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) bdnz 2b isync blr - .previous .text +_ASM_NOKPROBE_SYMBOL(flush_icache_range) +EXPORT_SYMBOL(flush_icache_range) + /* * Like above, but only do the D-cache. * @@ -139,6 +142,7 @@ _GLOBAL(flush_dcache_range) bdnz 0b sync blr +EXPORT_SYMBOL(flush_dcache_range) /* * Like above, but works on non-mapped physical addresses. @@ -242,6 +246,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) blr _GLOBAL(__bswapdi2) +EXPORT_SYMBOL(__bswapdi2) srdi r8,r3,32 rlwinm r7,r3,8,0xffffffff rlwimi r7,r3,24,0,7 @@ -591,7 +596,8 @@ real_mode: /* assume normal blr return */ #endif /* - * kexec_sequence(newstack, start, image, control, clear_all()) + * kexec_sequence(newstack, start, image, control, clear_all(), + copy_with_mmu_off) * * does the grungy work with stack switching and real mode switches * also does simple calls to other code @@ -627,7 +633,7 @@ _GLOBAL(kexec_sequence) mr r29,r5 /* image (virt) */ mr r28,r6 /* control, unused */ mr r27,r7 /* clear_all() fn desc */ - mr r26,r8 /* spare */ + mr r26,r8 /* copy_with_mmu_off */ lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ /* disable interrupts, we are overwriting kernel data next */ @@ -639,15 +645,24 @@ _GLOBAL(kexec_sequence) mtmsrd r3,1 #endif + /* We need to turn the MMU off unless we are in hash mode + * under a hypervisor + */ + cmpdi r26,0 + beq 1f + bl real_mode +1: /* copy dest pages, flush whole dest image */ mr r3,r29 bl kexec_copy_flush /* (image) */ - /* turn off mmu */ + /* turn off mmu now if not done earlier */ + cmpdi r26,0 + bne 1f bl real_mode /* copy 0x100 bytes starting at start to 0 */ - li r3,0 +1: li r3,0 mr r4,r30 /* start, aka phys mem offset */ li r5,0x100 li r6,0 @@ -659,16 +674,17 @@ _GLOBAL(kexec_sequence) li r6,1 stw r6,kexec_flag-1b(5) -#ifndef CONFIG_PPC_BOOK3E + cmpdi r27,0 + beq 1f + /* clear out hardware hash page table and tlb */ -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 ld r12,0(r27) /* deref function descriptor */ #else mr r12,r27 #endif mtctr r12 - bctrl /* ppc_md.hpte_clear_all(void); */ -#endif /* !CONFIG_PPC_BOOK3E */ + bctrl /* mmu_hash_ops.hpte_clear_all(void); */ /* * kexec image calling is: @@ -695,7 +711,7 @@ _GLOBAL(kexec_sequence) * are the boot cpu ????? * other device tree differences (prop sizes, va vs pa, etc)... */ - mr r3,r25 # my phys cpu +1: mr r3,r25 # my phys cpu mr r4,r30 # start, aka phys mem offset mtlr 4 li r5,0 diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index d1f1b35bf0c7..30b89d5cbb03 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -27,7 +27,7 @@ #include <linux/sort.h> #include <asm/setup.h> -LIST_HEAD(module_bug_list); +static LIST_HEAD(module_bug_list); static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 9ce9a25f58b5..183368e008cf 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -41,7 +41,7 @@ this, and makes other things simpler. Anton? --RR. */ -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 /* An address is simply the address of the function. */ typedef unsigned long func_desc_t; @@ -132,7 +132,7 @@ static u32 ppc64_stub_insns[] = { /* Save current r2 value in magic place on the stack. */ 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */ 0xe98b0020, /* ld r12,32(r11) */ -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 /* Set up new r2 from function descriptor */ 0xe84b0028, /* ld r2,40(r11) */ #endif @@ -494,9 +494,10 @@ static bool is_early_mcount_callsite(u32 *instruction) restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { + if (is_early_mcount_callsite(instruction - 1)) + return 1; + if (*instruction != PPC_INST_NOP) { - if (is_early_mcount_callsite(instruction - 1)) - return 1; pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 856f9a7944cd..34d2c595de23 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -444,7 +444,8 @@ static int nvram_pstore_write(enum pstore_type_id type, */ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, int *count, struct timespec *time, char **buf, - bool *compressed, struct pstore_info *psi) + bool *compressed, ssize_t *ecc_notice_size, + struct pstore_info *psi) { struct oops_log_info *oops_hdr; unsigned int err_type, id_no, size = 0; @@ -541,10 +542,11 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, time->tv_nsec = 0; } *buf = kmemdup(buff + hdr_size, length, GFP_KERNEL); + kfree(buff); if (*buf == NULL) return -ENOMEM; - kfree(buff); + *ecc_notice_size = 0; if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) *compressed = true; else @@ -849,7 +851,7 @@ static long dev_nvram_ioctl(struct file *file, unsigned int cmd, } } -const struct file_operations nvram_fops = { +static const struct file_operations nvram_fops = { .owner = THIS_MODULE, .llseek = dev_nvram_llseek, .read = dev_nvram_read, @@ -954,7 +956,7 @@ int __init nvram_remove_partition(const char *name, int sig, /* Make partition a free partition */ part->header.signature = NVRAM_SIG_FREE; - strncpy(part->header.name, "wwwwwwwwwwww", 12); + memset(part->header.name, 'w', 12); part->header.checksum = nvram_checksum(&part->header); rc = nvram_write_header(part); if (rc <= 0) { @@ -972,8 +974,8 @@ int __init nvram_remove_partition(const char *name, int sig, } if (prev) { prev->header.length += part->header.length; - prev->header.checksum = nvram_checksum(&part->header); - rc = nvram_write_header(part); + prev->header.checksum = nvram_checksum(&prev->header); + rc = nvram_write_header(prev); if (rc <= 0) { printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc); return rc; diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 93dae296b6be..fa20060ff7a5 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -184,7 +184,7 @@ void setup_paca(struct paca_struct *new_paca) * if we do a GET_PACA() before the feature fixups have been * applied */ - if (cpu_has_feature(CPU_FTR_HVMODE)) + if (early_cpu_has_feature(CPU_FTR_HVMODE)) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0f7a60f1e9f6..74bec5498972 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -41,14 +41,22 @@ #include <asm/ppc-pci.h> #include <asm/eeh.h> +/* hose_spinlock protects accesses to the the phb_bitmap. */ static DEFINE_SPINLOCK(hose_spinlock); LIST_HEAD(hose_list); -/* XXX kill that some day ... */ -static int global_phb_number; /* Global phb counter */ +/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */ +#define MAX_PHBS 0x10000 + +/* + * For dynamic PHB numbering: used/free PHBs tracking bitmap. + * Accesses to this bitmap should be protected by hose_spinlock. + */ +static DECLARE_BITMAP(phb_bitmap, MAX_PHBS); /* ISA Memory physical address */ resource_size_t isa_mem_base; +EXPORT_SYMBOL(isa_mem_base); static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; @@ -64,6 +72,45 @@ struct dma_map_ops *get_pci_dma_ops(void) } EXPORT_SYMBOL(get_pci_dma_ops); +/* + * This function should run under locking protection, specifically + * hose_spinlock. + */ +static int get_phb_number(struct device_node *dn) +{ + int ret, phb_id = -1; + u32 prop_32; + u64 prop; + + /* + * Try fixed PHB numbering first, by checking archs and reading + * the respective device-tree properties. Firstly, try powernv by + * reading "ibm,opal-phbid", only present in OPAL environment. + */ + ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + if (ret) { + ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); + prop = prop_32; + } + + if (!ret) + phb_id = (int)(prop & (MAX_PHBS - 1)); + + /* We need to be sure to not use the same PHB number twice. */ + if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) + return phb_id; + + /* + * If not pseries nor powernv, or if fixed PHB numbering tried to add + * the same PHB number twice, then fallback to dynamic PHB numbering. + */ + phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); + BUG_ON(phb_id >= MAX_PHBS); + set_bit(phb_id, phb_bitmap); + + return phb_id; +} + struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; @@ -72,7 +119,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) if (phb == NULL) return NULL; spin_lock(&hose_spinlock); - phb->global_number = global_phb_number++; + phb->global_number = get_phb_number(dev); list_add_tail(&phb->list_node, &hose_list); spin_unlock(&hose_spinlock); phb->dn = dev; @@ -94,6 +141,11 @@ EXPORT_SYMBOL_GPL(pcibios_alloc_controller); void pcibios_free_controller(struct pci_controller *phb) { spin_lock(&hose_spinlock); + + /* Clear bit of phb_bitmap to allow reuse of this PHB number. */ + if (phb->global_number < MAX_PHBS) + clear_bit(phb->global_number, phb_bitmap); + list_del(&phb->list_node); spin_unlock(&hose_spinlock); @@ -103,6 +155,42 @@ void pcibios_free_controller(struct pci_controller *phb) EXPORT_SYMBOL_GPL(pcibios_free_controller); /* + * This function is used to call pcibios_free_controller() + * in a deferred manner: a callback from the PCI subsystem. + * + * _*DO NOT*_ call pcibios_free_controller() explicitly if + * this is used (or it may access an invalid *phb pointer). + * + * The callback occurs when all references to the root bus + * are dropped (e.g., child buses/devices and their users). + * + * It's called as .release_fn() of 'struct pci_host_bridge' + * which is associated with the 'struct pci_controller.bus' + * (root bus) - it expects .release_data to hold a pointer + * to 'struct pci_controller'. + * + * In order to use it, register .release_fn()/release_data + * like this: + * + * pci_set_host_bridge_release(bridge, + * pcibios_free_controller_deferred + * (void *) phb); + * + * e.g. in the pcibios_root_bridge_prepare() callback from + * pci_create_root_bus(). + */ +void pcibios_free_controller_deferred(struct pci_host_bridge *bridge) +{ + struct pci_controller *phb = (struct pci_controller *) + bridge->release_data; + + pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic); + + pcibios_free_controller(phb); +} +EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred); + +/* * The function is used to return the minimal alignment * for memory or I/O windows of the associated P2P bridge. * By default, 4KiB alignment for I/O windows and 1MiB for @@ -124,6 +212,14 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, return 1; } +void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + + if (hose->controller_ops.setup_bridge) + hose->controller_ops.setup_bridge(bus, type); +} + void pcibios_reset_secondary_bus(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev->bus); @@ -265,7 +361,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) line, pin); virq = irq_create_mapping(NULL, line); - if (virq != NO_IRQ) + if (virq) irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); } else { pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", @@ -274,7 +370,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) virq = irq_create_of_mapping(&oirq); } - if(virq == NO_IRQ) { + + if (!virq) { pr_debug(" Failed to map !\n"); return -1; } @@ -356,36 +453,6 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, } /* - * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci - * device mapping. - */ -static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, - pgprot_t protection, - enum pci_mmap_state mmap_state, - int write_combine) -{ - - /* Write combine is always 0 on non-memory space mappings. On - * memory space, if the user didn't pass 1, we check for a - * "prefetchable" resource. This is a bit hackish, but we use - * this to workaround the inability of /sysfs to provide a write - * combine bit - */ - if (mmap_state != pci_mmap_mem) - write_combine = 0; - else if (write_combine == 0) { - if (rp->flags & IORESOURCE_PREFETCH) - write_combine = 1; - } - - /* XXX would be nice to have a way to ask for write-through */ - if (write_combine) - return pgprot_noncached_wc(protection); - else - return pgprot_noncached(protection); -} - -/* * This one is used by /dev/mem and fbdev who have no clue about the * PCI device, it tries to find the PCI device first and calls the * above routine @@ -458,9 +525,10 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; vma->vm_pgoff = offset >> PAGE_SHIFT; - vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, - vma->vm_page_prot, - mmap_state, write_combine); + if (write_combine) + vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot); @@ -610,39 +678,25 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, const struct resource *rsrc, resource_size_t *start, resource_size_t *end) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - resource_size_t offset = 0; + struct pci_bus_region region; - if (hose == NULL) + if (rsrc->flags & IORESOURCE_IO) { + pcibios_resource_to_bus(dev->bus, ®ion, + (struct resource *) rsrc); + *start = region.start; + *end = region.end; return; + } - if (rsrc->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - _IO_BASE; - - /* We pass a fully fixed up address to userland for MMIO instead of - * a BAR value because X is lame and expects to be able to use that - * to pass to /dev/mem ! - * - * That means that we'll have potentially 64 bits values where some - * userland apps only expect 32 (like X itself since it thinks only - * Sparc has 64 bits MMIO) but if we don't do that, we break it on - * 32 bits CHRPs :-( + /* We pass a CPU physical address to userland for MMIO instead of a + * BAR value because X is lame and expects to be able to use that + * to pass to /dev/mem! * - * Hopefully, the sysfs insterface is immune to that gunk. Once X - * has been fixed (and the fix spread enough), we can re-enable the - * 2 lines below and pass down a BAR value to userland. In that case - * we'll also have to re-enable the matching code in - * __pci_mmap_make_offset(). - * - * BenH. + * That means we may have 64-bit values where some apps only expect + * 32 (like X itself since it thinks only Sparc has 64-bit MMIO). */ -#if 0 - else if (rsrc->flags & IORESOURCE_MEM) - offset = hose->pci_mem_offset; -#endif - - *start = rsrc->start - offset; - *end = rsrc->end - offset; + *start = rsrc->start; + *end = rsrc->end; } /** @@ -1362,8 +1416,10 @@ void __init pcibios_resource_survey(void) /* Allocate and assign resources */ list_for_each_entry(b, &pci_root_buses, node) pcibios_allocate_bus_resources(b); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); + if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + } /* Before we start assigning unassigned resource, we try to reserve * the low IO area and the VGA memory area if they intersect the @@ -1436,8 +1492,12 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) /* Allocate bus and devices resources */ pcibios_allocate_bus_resources(bus); pcibios_claim_one_bus(bus); - if (!pci_has_flag(PCI_PROBE_ONLY)) - pci_assign_unassigned_bus_resources(bus); + if (!pci_has_flag(PCI_PROBE_ONLY)) { + if (bus->self) + pci_assign_unassigned_bridge_resources(bus->self); + else + pci_assign_unassigned_bus_resources(bus); + } /* Fixup EEH */ eeh_add_device_tree_late(bus); @@ -1485,9 +1545,9 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, res = &hose->io_resource; if (!res->flags) { - pr_info("PCI: I/O resource not set for host" - " bridge %s (domain %d)\n", - hose->dn->full_name, hose->global_number); + pr_debug("PCI: I/O resource not set for host" + " bridge %s (domain %d)\n", + hose->dn->full_name, hose->global_number); } else { offset = pcibios_io_space_offset(hose); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 1f7930037cb7..678f87a63645 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -32,6 +32,8 @@ unsigned long isa_io_base = 0; unsigned long pci_dram_offset = 0; int pcibios_assign_bus_offset = 1; +EXPORT_SYMBOL(isa_io_base); +EXPORT_SYMBOL(pci_dram_offset); void pcibios_make_OF_bus_map(void); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3759df52bd67..ed5e9ff61a68 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -47,7 +47,6 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); - pci_io_base = ISA_IO_BASE; /* For now, override phys_mem_access_prot. If we need it,g * later, we may move that initialization to each ppc_md */ @@ -82,7 +81,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus) /* If this is not a PHB, we only flush the hash table over * the area mapped by this bridge. We don't play with the PTE - * mappings since we might have to deal with sub-page alignemnts + * mappings since we might have to deal with sub-page alignments * so flushing the hash table is the only sane way to make sure * that no hash entries are covering that removed bridge area * while still allowing other busses overlapping those pages diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index ecdccce78719..592693437070 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -31,6 +31,7 @@ #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> #include <asm/firmware.h> +#include <asm/eeh.h> /* * The function is used to find the firmware data of one @@ -181,7 +182,6 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; - struct eeh_dev *edev; int i; /* Only support IOV for now */ @@ -199,6 +199,8 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { + struct eeh_dev *edev __maybe_unused; + pdn = add_one_dev_pci_data(parent, NULL, i, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); @@ -208,11 +210,12 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; } +#ifdef CONFIG_EEH /* Create the EEH device for the VF */ - eeh_dev_init(pdn, pci_bus_to_host(pdev->bus)); - edev = pdn_to_eeh_dev(pdn); + edev = eeh_dev_init(pdn); BUG_ON(!edev); edev->physfn = pdev; +#endif /* CONFIG_EEH */ } #endif /* CONFIG_PCI_IOV */ @@ -224,7 +227,6 @@ void remove_dev_pci_data(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV struct pci_dn *parent; struct pci_dn *pdn, *tmp; - struct eeh_dev *edev; int i; /* @@ -260,18 +262,22 @@ void remove_dev_pci_data(struct pci_dev *pdev) * a batch mode. */ for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { + struct eeh_dev *edev __maybe_unused; + list_for_each_entry_safe(pdn, tmp, &parent->child_list, list) { if (pdn->busno != pci_iov_virtfn_bus(pdev, i) || pdn->devfn != pci_iov_virtfn_devfn(pdev, i)) continue; +#ifdef CONFIG_EEH /* Release EEH device for the VF */ edev = pdn_to_eeh_dev(pdn); if (edev) { pdn->edev = NULL; kfree(edev); } +#endif /* CONFIG_EEH */ if (!list_empty(&pdn->list)) list_del(&pdn->list); @@ -289,8 +295,11 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, const __be32 *regs; struct device_node *parent; struct pci_dn *pdn; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif - pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); + pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); if (pdn == NULL) return NULL; dn->data = pdn; @@ -319,6 +328,15 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, /* Extended config space */ pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1); + /* Create EEH device */ +#ifdef CONFIG_EEH + edev = eeh_dev_init(pdn); + if (!edev) { + kfree(pdn); + return NULL; + } +#endif + /* Attach to parent node */ INIT_LIST_HEAD(&pdn->child_list); INIT_LIST_HEAD(&pdn->list); @@ -504,15 +522,19 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) * pci device found underneath. This routine runs once, * early in the boot sequence. */ -void __init pci_devs_phb_init(void) +static int __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ list_for_each_entry_safe(phb, tmp, &hose_list, list_node) pci_devs_phb_init_dynamic(phb); + + return 0; } +core_initcall(pci_devs_phb_init); + static void pci_dev_pdn_setup(struct pci_dev *pdev) { struct pci_dn *pdn; diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 526ac6750e4d..ea3d98115b88 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -178,7 +178,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->hdr_type = PCI_HEADER_TYPE_NORMAL; dev->rom_base_reg = PCI_ROM_ADDRESS; /* Maybe do a default OF mapping here */ - dev->irq = NO_IRQ; + dev->irq = 0; } of_pci_parse_addrs(node, dev); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c deleted file mode 100644 index 9f01e28ecef3..000000000000 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ /dev/null @@ -1,37 +0,0 @@ -#include <linux/ftrace.h> -#include <linux/mm.h> - -#include <asm/processor.h> -#include <asm/switch_to.h> -#include <asm/cacheflush.h> -#include <asm/epapr_hcalls.h> - -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(flush_dcache_range); -#endif -EXPORT_SYMBOL(flush_icache_range); - -EXPORT_SYMBOL(empty_zero_page); - -long long __bswapdi2(long long); -EXPORT_SYMBOL(__bswapdi2); - -#ifdef CONFIG_FUNCTION_TRACER -EXPORT_SYMBOL(_mcount); -#endif - -#ifdef CONFIG_PPC_FPU -EXPORT_SYMBOL(load_fp_state); -EXPORT_SYMBOL(store_fp_state); -#endif - -#ifdef CONFIG_ALTIVEC -EXPORT_SYMBOL(load_vr_state); -EXPORT_SYMBOL(store_vr_state); -#endif - -#ifdef CONFIG_EPAPR_PARAVIRT -EXPORT_SYMBOL(epapr_hypercall_start); -#endif - -EXPORT_SYMBOL(current_stack_pointer); diff --git a/arch/powerpc/kernel/ppc_ksyms_32.c b/arch/powerpc/kernel/ppc_ksyms_32.c deleted file mode 100644 index 2bfaafe5be99..000000000000 --- a/arch/powerpc/kernel/ppc_ksyms_32.c +++ /dev/null @@ -1,60 +0,0 @@ -#include <linux/export.h> -#include <linux/smp.h> - -#include <asm/page.h> -#include <asm/dma.h> -#include <asm/io.h> -#include <asm/hw_irq.h> -#include <asm/time.h> -#include <asm/mmu_context.h> -#include <asm/pgtable.h> -#include <asm/dcr.h> - -EXPORT_SYMBOL(ISA_DMA_THRESHOLD); -EXPORT_SYMBOL(DMA_MODE_READ); -EXPORT_SYMBOL(DMA_MODE_WRITE); - -#if defined(CONFIG_PCI) -EXPORT_SYMBOL(isa_io_base); -EXPORT_SYMBOL(isa_mem_base); -EXPORT_SYMBOL(pci_dram_offset); -#endif - -#ifdef CONFIG_SMP -EXPORT_SYMBOL(smp_hw_index); -#endif - -long long __ashrdi3(long long, int); -long long __ashldi3(long long, int); -long long __lshrdi3(long long, int); -int __ucmpdi2(unsigned long long, unsigned long long); -int __cmpdi2(long long, long long); -EXPORT_SYMBOL(__ashrdi3); -EXPORT_SYMBOL(__ashldi3); -EXPORT_SYMBOL(__lshrdi3); -EXPORT_SYMBOL(__ucmpdi2); -EXPORT_SYMBOL(__cmpdi2); - -EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(tb_ticks_per_jiffy); - -EXPORT_SYMBOL(switch_mmu_context); - -#ifdef CONFIG_PPC_STD_MMU_32 -extern long mol_trampoline; -EXPORT_SYMBOL(mol_trampoline); /* For MOL */ -EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ -#ifdef CONFIG_SMP -extern int mmu_hash_lock; -EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ -#endif /* CONFIG_SMP */ -extern long *intercept_table; -EXPORT_SYMBOL(intercept_table); -#endif /* CONFIG_PPC_STD_MMU_32 */ - -#ifdef CONFIG_PPC_DCR_NATIVE -EXPORT_SYMBOL(__mtdcr); -EXPORT_SYMBOL(__mfdcr); -#endif - -EXPORT_SYMBOL(flush_instruction_cache); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e2f12cbcade9..9e7c10fe205f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -58,6 +58,8 @@ #include <asm/code-patching.h> #include <asm/exec.h> #include <asm/livepatch.h> +#include <asm/cpu_has_feature.h> +#include <asm/asm-prototypes.h> #include <linux/kprobes.h> #include <linux/kdebug.h> @@ -87,7 +89,13 @@ static void check_if_tm_restore_required(struct task_struct *tsk) set_thread_flag(TIF_RESTORE_TM); } } + +static inline bool msr_tm_active(unsigned long msr) +{ + return MSR_TM_ACTIVE(msr); +} #else +static inline bool msr_tm_active(unsigned long msr) { return false; } static inline void check_if_tm_restore_required(struct task_struct *tsk) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -103,7 +111,7 @@ static int __init enable_strict_msr_control(char *str) } early_param("ppc_strict_facility_enable", enable_strict_msr_control); -void msr_check_and_set(unsigned long bits) +unsigned long msr_check_and_set(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -117,6 +125,8 @@ void msr_check_and_set(unsigned long bits) if (oldmsr != newmsr) mtmsr_isync(newmsr); + + return newmsr; } void __msr_check_and_clear(unsigned long bits) @@ -139,12 +149,16 @@ EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU void __giveup_fpu(struct task_struct *tsk) { + unsigned long msr; + save_fpu(tsk); - tsk->thread.regs->msr &= ~MSR_FP; + msr = tsk->thread.regs->msr; + msr &= ~MSR_FP; #ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) - tsk->thread.regs->msr &= ~MSR_VSX; + msr &= ~MSR_VSX; #endif + tsk->thread.regs->msr = msr; } void giveup_fpu(struct task_struct *tsk) @@ -191,19 +205,30 @@ EXPORT_SYMBOL_GPL(flush_fp_to_thread); void enable_kernel_fp(void) { + unsigned long cpumsr; + WARN_ON(preemptible()); - msr_check_and_set(MSR_FP); + cpumsr = msr_check_and_set(MSR_FP); if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) { check_if_tm_restore_required(current); + /* + * If a thread has already been reclaimed then the + * checkpointed registers are on the CPU but have definitely + * been saved by the reclaim code. Don't need to and *cannot* + * giveup as this would save to the 'live' structure not the + * checkpointed structure. + */ + if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) + return; __giveup_fpu(current); } } EXPORT_SYMBOL(enable_kernel_fp); static int restore_fp(struct task_struct *tsk) { - if (tsk->thread.load_fp) { + if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) { load_fp_state(¤t->thread.fp_state); current->thread.load_fp++; return 1; @@ -219,12 +244,16 @@ static int restore_fp(struct task_struct *tsk) { return 0; } static void __giveup_altivec(struct task_struct *tsk) { + unsigned long msr; + save_altivec(tsk); - tsk->thread.regs->msr &= ~MSR_VEC; + msr = tsk->thread.regs->msr; + msr &= ~MSR_VEC; #ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) - tsk->thread.regs->msr &= ~MSR_VSX; + msr &= ~MSR_VSX; #endif + tsk->thread.regs->msr = msr; } void giveup_altivec(struct task_struct *tsk) @@ -239,12 +268,23 @@ EXPORT_SYMBOL(giveup_altivec); void enable_kernel_altivec(void) { + unsigned long cpumsr; + WARN_ON(preemptible()); - msr_check_and_set(MSR_VEC); + cpumsr = msr_check_and_set(MSR_VEC); if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) { check_if_tm_restore_required(current); + /* + * If a thread has already been reclaimed then the + * checkpointed registers are on the CPU but have definitely + * been saved by the reclaim code. Don't need to and *cannot* + * giveup as this would save to the 'live' structure not the + * checkpointed structure. + */ + if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) + return; __giveup_altivec(current); } } @@ -269,7 +309,8 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); static int restore_altivec(struct task_struct *tsk) { - if (cpu_has_feature(CPU_FTR_ALTIVEC) && tsk->thread.load_vec) { + if (cpu_has_feature(CPU_FTR_ALTIVEC) && + (tsk->thread.load_vec || msr_tm_active(tsk->thread.regs->msr))) { load_vr_state(&tsk->thread.vr_state); tsk->thread.used_vr = 1; tsk->thread.load_vec++; @@ -312,12 +353,23 @@ static void save_vsx(struct task_struct *tsk) void enable_kernel_vsx(void) { + unsigned long cpumsr; + WARN_ON(preemptible()); - msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); + cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { check_if_tm_restore_required(current); + /* + * If a thread has already been reclaimed then the + * checkpointed registers are on the CPU but have definitely + * been saved by the reclaim code. Don't need to and *cannot* + * giveup as this would save to the 'live' structure not the + * checkpointed structure. + */ + if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) + return; if (current->thread.regs->msr & MSR_FP) __giveup_fpu(current); if (current->thread.regs->msr & MSR_VEC) @@ -429,6 +481,7 @@ void giveup_all(struct task_struct *tsk) return; msr_check_and_set(msr_all_available); + check_if_tm_restore_required(tsk); #ifdef CONFIG_PPC_FPU if (usermsr & MSR_FP) @@ -455,7 +508,8 @@ void restore_math(struct pt_regs *regs) { unsigned long msr; - if (!current->thread.load_fp && !loadvec(current->thread)) + if (!msr_tm_active(regs->msr) && + !current->thread.load_fp && !loadvec(current->thread)) return; msr = regs->msr; @@ -758,29 +812,15 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + +static inline bool tm_enabled(struct task_struct *tsk) +{ + return tsk && tsk->thread.regs && (tsk->thread.regs->msr & MSR_TM); +} + static void tm_reclaim_thread(struct thread_struct *thr, struct thread_info *ti, uint8_t cause) { - unsigned long msr_diff = 0; - - /* - * If FP/VSX registers have been already saved to the - * thread_struct, move them to the transact_fp array. - * We clear the TIF_RESTORE_TM bit since after the reclaim - * the thread will no longer be transactional. - */ - if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) { - msr_diff = thr->ckpt_regs.msr & ~thr->regs->msr; - if (msr_diff & MSR_FP) - memcpy(&thr->transact_fp, &thr->fp_state, - sizeof(struct thread_fp_state)); - if (msr_diff & MSR_VEC) - memcpy(&thr->transact_vr, &thr->vr_state, - sizeof(struct thread_vr_state)); - clear_ti_thread_flag(ti, TIF_RESTORE_TM); - msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1; - } - /* * Use the current MSR TM suspended bit to track if we have * checkpointed state outstanding. @@ -794,20 +834,14 @@ static void tm_reclaim_thread(struct thread_struct *thr, * this state. * We do this using the current MSR, rather tracking it in * some specific thread_struct bit, as it has the additional - * benifit of checking for a potential TM bad thing exception. + * benefit of checking for a potential TM bad thing exception. */ if (!MSR_TM_SUSPENDED(mfmsr())) return; - tm_reclaim(thr, thr->regs->msr, cause); + giveup_all(container_of(thr, struct task_struct, thread)); - /* Having done the reclaim, we now have the checkpointed - * FP/VSX values in the registers. These might be valid - * even if we have previously called enable_kernel_fp() or - * flush_fp_to_thread(), so update thr->regs->msr to - * indicate their current validity. - */ - thr->regs->msr |= msr_diff; + tm_reclaim(thr, thr->ckpt_regs.msr, cause); } void tm_reclaim_current(uint8_t cause) @@ -823,8 +857,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk) * * In switching we need to maintain a 2nd register state as * oldtask->thread.ckpt_regs. We tm_reclaim(oldproc); this saves the - * checkpointed (tbegin) state in ckpt_regs and saves the transactional - * (current) FPRs into oldtask->thread.transact_fpr[]. + * checkpointed (tbegin) state in ckpt_regs, ckfp_state and + * ckvr_state * * We also context switch (save) TFHAR/TEXASR/TFIAR in here. */ @@ -836,14 +870,6 @@ static inline void tm_reclaim_task(struct task_struct *tsk) if (!MSR_TM_ACTIVE(thr->regs->msr)) goto out_and_saveregs; - /* Stash the original thread MSR, as giveup_fpu et al will - * modify it. We hold onto it to see whether the task used - * FP & vector regs. If the TIF_RESTORE_TM flag is set, - * ckpt_regs.msr is already set. - */ - if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM)) - thr->ckpt_regs.msr = thr->regs->msr; - TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " "ccr=%lx, msr=%lx, trap=%lx)\n", tsk->pid, thr->regs->nip, @@ -872,6 +898,9 @@ void tm_recheckpoint(struct thread_struct *thread, { unsigned long flags; + if (!(thread->regs->msr & MSR_TM)) + return; + /* We really can't be interrupted here as the TEXASR registers can't * change and later in the trecheckpoint code, we have a userspace R1. * So let's hard disable over this region. @@ -901,10 +930,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) * If the task was using FP, we non-lazily reload both the original and * the speculative FP register states. This is because the kernel * doesn't see if/when a TM rollback occurs, so if we take an FP - * unavoidable later, we are unable to determine which set of FP regs + * unavailable later, we are unable to determine which set of FP regs * need to be restored. */ - if (!new->thread.regs) + if (!tm_enabled(new)) return; if (!MSR_TM_ACTIVE(new->thread.regs->msr)){ @@ -917,35 +946,35 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) "(new->msr 0x%lx, new->origmsr 0x%lx)\n", new->pid, new->thread.regs->msr, msr); - /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(&new->thread, msr); - /* This loads the speculative FP/VEC state, if used */ - if (msr & MSR_FP) { - do_load_up_transact_fpu(&new->thread); - new->thread.regs->msr |= - (MSR_FP | new->thread.fpexc_mode); - } -#ifdef CONFIG_ALTIVEC - if (msr & MSR_VEC) { - do_load_up_transact_altivec(&new->thread); - new->thread.regs->msr |= MSR_VEC; - } -#endif - /* We may as well turn on VSX too since all the state is restored now */ - if (msr & MSR_VSX) - new->thread.regs->msr |= MSR_VSX; + /* + * The checkpointed state has been restored but the live state has + * not, ensure all the math functionality is turned off to trigger + * restore_math() to reload. + */ + new->thread.regs->msr &= ~(MSR_FP | MSR_VEC | MSR_VSX); TM_DEBUG("*** tm_recheckpoint of pid %d complete " "(kernel msr 0x%lx)\n", new->pid, mfmsr()); } -static inline void __switch_to_tm(struct task_struct *prev) +static inline void __switch_to_tm(struct task_struct *prev, + struct task_struct *new) { if (cpu_has_feature(CPU_FTR_TM)) { - tm_enable(); - tm_reclaim_task(prev); + if (tm_enabled(prev) || tm_enabled(new)) + tm_enable(); + + if (tm_enabled(prev)) { + prev->thread.load_tm++; + tm_reclaim_task(prev); + if (!MSR_TM_ACTIVE(prev->thread.regs->msr) && prev->thread.load_tm == 0) + prev->thread.regs->msr &= ~MSR_TM; + } + + tm_recheckpoint_new_task(new); } } @@ -967,6 +996,12 @@ void restore_tm_state(struct pt_regs *regs) { unsigned long msr_diff; + /* + * This is the only moment we should clear TIF_RESTORE_TM as + * it is here that ckpt_regs.msr and pt_regs.msr become the same + * again, anything else could lead to an incorrect ckpt_msr being + * saved and therefore incorrect signal contexts. + */ clear_thread_flag(TIF_RESTORE_TM); if (!MSR_TM_ACTIVE(regs->msr)) return; @@ -974,6 +1009,13 @@ void restore_tm_state(struct pt_regs *regs) msr_diff = current->thread.ckpt_regs.msr & ~regs->msr; msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; + /* Ensure that restore_math() will restore */ + if (msr_diff & MSR_FP) + current->thread.load_fp = 1; +#ifdef CONFIG_ALIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC) + current->thread.load_vec = 1; +#endif restore_math(regs); regs->msr |= msr_diff; @@ -981,7 +1023,7 @@ void restore_tm_state(struct pt_regs *regs) #else #define tm_recheckpoint_new_task(new) -#define __switch_to_tm(prev) +#define __switch_to_tm(prev, new) #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ static inline void save_sprs(struct thread_struct *t) @@ -1009,6 +1051,14 @@ static inline void save_sprs(struct thread_struct *t) */ t->tar = mfspr(SPRN_TAR); } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* Conditionally save Load Monitor registers, if enabled */ + if (t->fscr & FSCR_LM) { + t->lmrr = mfspr(SPRN_LMRR); + t->lmser = mfspr(SPRN_LMSER); + } + } #endif } @@ -1023,18 +1073,11 @@ static inline void restore_sprs(struct thread_struct *old_thread, #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) { u64 dscr = get_paca()->dscr_default; - u64 fscr = old_thread->fscr & ~FSCR_DSCR; - - if (new_thread->dscr_inherit) { + if (new_thread->dscr_inherit) dscr = new_thread->dscr; - fscr |= FSCR_DSCR; - } if (old_thread->dscr != dscr) mtspr(SPRN_DSCR, dscr); - - if (old_thread->fscr != fscr) - mtspr(SPRN_FSCR, fscr); } if (cpu_has_feature(CPU_FTR_ARCH_207S)) { @@ -1045,9 +1088,22 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->ebbrr != new_thread->ebbrr) mtspr(SPRN_EBBRR, new_thread->ebbrr); + if (old_thread->fscr != new_thread->fscr) + mtspr(SPRN_FSCR, new_thread->fscr); + if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* Conditionally restore Load Monitor registers, if enabled */ + if (new_thread->fscr & FSCR_LM) { + if (old_thread->lmrr != new_thread->lmrr) + mtspr(SPRN_LMRR, new_thread->lmrr); + if (old_thread->lmser != new_thread->lmser) + mtspr(SPRN_LMSER, new_thread->lmser); + } + } #endif } @@ -1108,11 +1164,11 @@ struct task_struct *__switch_to(struct task_struct *prev, */ save_sprs(&prev->thread); - __switch_to_tm(prev); - /* Save FPU, Altivec, VSX and SPE state */ giveup_all(prev); + __switch_to_tm(prev, new); + /* * We can't take a PMU exception inside _switch() since there is a * window where the kernel stack SLB and the kernel stack are out @@ -1120,8 +1176,6 @@ struct task_struct *__switch_to(struct task_struct *prev, */ hard_irq_disable(); - tm_recheckpoint_new_task(new); - /* * Call restore_sprs() before calling _switch(). If we move it after * _switch() then we miss out on calling it for new tasks. The reason @@ -1356,9 +1410,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) * transitions the CPU out of TM mode. Hence we need to call * tm_recheckpoint_new_task() (on the same task) to restore the * checkpointed state back and the TM mode. + * + * Can't pass dst because it isn't ready. Doesn't matter, passing + * dst is only important for __switch_to() */ - __switch_to_tm(src); - tm_recheckpoint_new_task(src); + __switch_to_tm(src, src); *dst = *src; @@ -1505,6 +1561,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.regs = regs - 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Clear any transactional state, we're exec()ing. The cause is + * not important as there will never be a recheckpoint so it's not + * user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); +#endif + memset(regs->gpr, 0, sizeof(regs->gpr)); regs->ctr = 0; regs->link = 0; @@ -1590,8 +1656,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.used_spe = 0; #endif /* CONFIG_SPE */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (cpu_has_feature(CPU_FTR_TM)) - regs->msr |= MSR_TM; current->thread.tm_tfhar = 0; current->thread.tm_texasr = 0; current->thread.tm_tfiar = 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 946e34ffeae9..b0245bed6f54 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -56,6 +56,8 @@ #include <asm/opal.h> #include <asm/fadump.h> #include <asm/debug.h> +#include <asm/epapr_hcalls.h> +#include <asm/firmware.h> #include <mm/mmu_decl.h> @@ -168,7 +170,7 @@ static struct ibm_pa_feature { */ {CPU_FTR_TM_COMP, 0, 0, PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0}, - {0, MMU_FTR_RADIX, 0, 0, 40, 0, 0}, + {0, MMU_FTR_TYPE_RADIX, 0, 0, 40, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -735,10 +737,22 @@ void __init early_init_devtree(void *params) spinning_secondaries = boot_cpu_count - 1; #endif + mmu_early_init_devtree(); + #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); #endif + epapr_paravirt_early_init(); + + /* Now try to figure out if we are running on LPAR and so on */ + pseries_probe_fw_features(); + +#ifdef CONFIG_PPC_PS3 + /* Identify PS3 firmware */ + if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) + powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; +#endif DBG(" <- early_init_devtree()\n"); } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index da5192590c44..88ac964f4858 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -42,6 +42,7 @@ #include <asm/sections.h> #include <asm/machdep.h> #include <asm/opal.h> +#include <asm/asm-prototypes.h> #include <linux/linux_logo.h> @@ -656,6 +657,7 @@ unsigned char ibm_architecture_vec[] = { W(0xffff0000), W(0x003e0000), /* POWER6 */ W(0xffff0000), W(0x003f0000), /* POWER7 */ W(0xffff0000), W(0x004b0000), /* POWER8E */ + W(0xffff0000), W(0x004c0000), /* POWER8NVL */ W(0xffff0000), W(0x004d0000), /* POWER8 */ W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ @@ -694,7 +696,7 @@ unsigned char ibm_architecture_vec[] = { OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ /* option vector 5: PAPR/OF options */ - VECTOR_LENGTH(18), /* length */ + VECTOR_LENGTH(21), /* length */ 0, /* don't ignore, don't halt */ OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | @@ -718,15 +720,18 @@ unsigned char ibm_architecture_vec[] = { * must match by the macro below. Update the definition if * the structure layout changes. */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 125 +#define IBM_ARCH_VEC_NRCORES_OFFSET 133 W(NR_CPUS), /* number of cores supported */ 0, 0, 0, 0, OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | - OV5_FEAT(OV5_PFO_HW_842), - OV5_FEAT(OV5_SUB_PROCESSORS), + OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */ + 0, /* Byte 18 */ + 0, /* Byte 19 */ + 0, /* Byte 20 */ + OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */ /* option vector 6: IBM PAPR hints */ VECTOR_LENGTH(3), /* length */ @@ -2639,6 +2644,86 @@ static void __init fixup_device_tree_efika(void) #define fixup_device_tree_efika() #endif +#ifdef CONFIG_PPC_PASEMI_NEMO +/* + * CFE supplied on Nemo is broken in several ways, biggest + * problem is that it reassigns ISA interrupts to unused mpic ints. + * Add an interrupt-controller property for the io-bridge to use + * and correct the ints so we can attach them to an irq_domain + */ +static void __init fixup_device_tree_pasemi(void) +{ + u32 interrupts[2], parent, rval, val = 0; + char *name, *pci_name; + phandle iob, node; + + /* Find the root pci node */ + name = "/pxp@0,e0000000"; + iob = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(iob)) + return; + + /* check if interrupt-controller node set yet */ + if (prom_getproplen(iob, "interrupt-controller") !=PROM_ERROR) + return; + + prom_printf("adding interrupt-controller property for SB600...\n"); + + prom_setprop(iob, name, "interrupt-controller", &val, 0); + + pci_name = "/pxp@0,e0000000/pci@11"; + node = call_prom("finddevice", 1, 1, ADDR(pci_name)); + parent = ADDR(iob); + + for( ; prom_next_node(&node); ) { + /* scan each node for one with an interrupt */ + if (!PHANDLE_VALID(node)) + continue; + + rval = prom_getproplen(node, "interrupts"); + if (rval == 0 || rval == PROM_ERROR) + continue; + + prom_getprop(node, "interrupts", &interrupts, sizeof(interrupts)); + if ((interrupts[0] < 212) || (interrupts[0] > 222)) + continue; + + /* found a node, update both interrupts and interrupt-parent */ + if ((interrupts[0] >= 212) && (interrupts[0] <= 215)) + interrupts[0] -= 203; + if ((interrupts[0] >= 216) && (interrupts[0] <= 220)) + interrupts[0] -= 213; + if (interrupts[0] == 221) + interrupts[0] = 14; + if (interrupts[0] == 222) + interrupts[0] = 8; + + prom_setprop(node, pci_name, "interrupts", interrupts, + sizeof(interrupts)); + prom_setprop(node, pci_name, "interrupt-parent", &parent, + sizeof(parent)); + } + + /* + * The io-bridge has device_type set to 'io-bridge' change it to 'isa' + * so that generic isa-bridge code can add the SB600 and its on-board + * peripherals. + */ + name = "/pxp@0,e0000000/io-bridge@0"; + iob = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(iob)) + return; + + /* device_type is already set, just change it. */ + + prom_printf("Changing device_type of SB600 node...\n"); + + prom_setprop(iob, name, "device_type", "isa", sizeof("isa")); +} +#else /* !CONFIG_PPC_PASEMI_NEMO */ +static inline void fixup_device_tree_pasemi(void) { } +#endif + static void __init fixup_device_tree(void) { fixup_device_tree_maple(); @@ -2646,6 +2731,7 @@ static void __init fixup_device_tree(void) fixup_device_tree_chrp(); fixup_device_tree_pmac(); fixup_device_tree_efika(); + fixup_device_tree_pasemi(); } static void __init prom_find_boot_cpu(void) @@ -2939,7 +3025,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* Don't print anything after quiesce under OPAL, it crashes OFW */ if (of_platform != PLATFORM_OPAL) { - prom_printf("Booting Linux via __start() ...\n"); + prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); prom_debug("->dt_header_start=0x%x\n", hdr); } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 30a03c03fe73..b1ec62f2cc31 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -38,6 +38,8 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/switch_to.h> +#include <asm/tm.h> +#include <asm/asm-prototypes.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -64,6 +66,10 @@ struct pt_regs_offset { {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} #define REG_OFFSET_END {.name = NULL, .offset = 0} +#define TVSO(f) (offsetof(struct thread_vr_state, f)) +#define TFSO(f) (offsetof(struct thread_fp_state, f)) +#define TSO(f) (offsetof(struct thread_struct, f)) + static const struct pt_regs_offset regoffset_table[] = { GPR_OFFSET_NAME(0), GPR_OFFSET_NAME(1), @@ -114,6 +120,24 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_END, }; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static void flush_tmregs_to_thread(struct task_struct *tsk) +{ + /* + * If task is not current, it will have been flushed already to + * it's thread_struct during __switch_to(). + * + * A reclaim flushes ALL the state. + */ + + if (tsk == current && MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(TM_CAUSE_SIGNAL); + +} +#else +static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } +#endif + /** * regs_query_register_offset() - query register offset from its name * @name: the name of a register @@ -181,6 +205,26 @@ static int set_user_msr(struct task_struct *task, unsigned long msr) return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static unsigned long get_user_ckpt_msr(struct task_struct *task) +{ + return task->thread.ckpt_regs.msr | task->thread.fpexc_mode; +} + +static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) +{ + task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE; + task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE; + return 0; +} + +static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) +{ + task->thread.ckpt_regs.trap = trap & 0xfff0; + return 0; +} +#endif + #ifdef CONFIG_PPC64 static int get_user_dscr(struct task_struct *task, unsigned long *data) { @@ -358,6 +402,18 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return ret; } +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) @@ -365,25 +421,38 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_VSX u64 buf[33]; int i; -#endif + flush_fp_to_thread(target); -#ifdef CONFIG_VSX /* copy to local buffer then write that out */ for (i = 0; i < 32 ; i++) buf[i] = target->thread.TS_FPR(i); buf[32] = target->thread.fp_state.fpscr; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); - #else BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32][0])); + offsetof(struct thread_fp_state, fpr[32])); + + flush_fp_to_thread(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fp_state, 0, -1); #endif } +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + * + */ static int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) @@ -391,21 +460,23 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_VSX u64 buf[33]; int i; -#endif + flush_fp_to_thread(target); -#ifdef CONFIG_VSX /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) return i; + for (i = 0; i < 32 ; i++) target->thread.TS_FPR(i) = buf[i]; target->thread.fp_state.fpscr = buf[32]; return 0; #else BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32][0])); + offsetof(struct thread_fp_state, fpr[32])); + + flush_fp_to_thread(target); return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fp_state, 0, -1); @@ -433,6 +504,20 @@ static int vr_active(struct task_struct *target, return target->thread.used_vr ? regset->n : 0; } +/* + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ static int vr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) @@ -456,7 +541,9 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, u32 word; } vrsave; memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.vrsave; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, 33 * sizeof(vector128), -1); } @@ -464,6 +551,20 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, return ret; } +/* + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ static int vr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) @@ -487,7 +588,9 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, u32 word; } vrsave; memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.vrsave; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, 33 * sizeof(vector128), -1); if (!ret) @@ -512,6 +615,18 @@ static int vsr_active(struct task_struct *target, return target->thread.used_vsr ? regset->n : 0; } +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ static int vsr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) @@ -519,16 +634,32 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset, u64 buf[32]; int ret, i; + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); flush_vsx_to_thread(target); for (i = 0; i < 32 ; i++) buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); return ret; } +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ static int vsr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) @@ -536,13 +667,16 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, u64 buf[32]; int ret,i; + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); flush_vsx_to_thread(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); - for (i = 0; i < 32 ; i++) - target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; - + if (!ret) + for (i = 0; i < 32 ; i++) + target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return ret; } @@ -614,8 +748,1031 @@ static int evr_set(struct task_struct *target, const struct user_regset *regset, } #endif /* CONFIG_SPE */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/** + * tm_cgpr_active - get active number of registers in CGPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed GPR category. + */ +static int tm_cgpr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cgpr_get - get CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds all the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function gets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +static int tm_cgpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, offsetof(struct pt_regs, msr)); + if (!ret) { + unsigned long msr = get_user_ckpt_msr(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, + offsetof(struct pt_regs, msr), + offsetof(struct pt_regs, msr) + + sizeof(msr)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + offsetof(struct pt_regs, orig_gpr3), + sizeof(struct pt_regs)); + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(struct pt_regs), -1); + + return ret; +} /* + * tm_cgpr_set - set the CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function sets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +static int tm_cgpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long reg; + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, PT_MSR * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_MSR * sizeof(reg), + (PT_MSR + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_msr(target, reg); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + PT_ORIG_R3 * sizeof(reg), + (PT_MAX_PUT_REG + 1) * sizeof(reg)); + + if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_TRAP * sizeof(reg), + (PT_TRAP + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_trap(target, reg); + } + + if (!ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + + return ret; +} + +/** + * tm_cfpr_active - get active number of registers in CFPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed FPR category. + */ +static int tm_cfpr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cfpr_get - get CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed FPR registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed FPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +static int tm_cfpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +} + +/** + * tm_cfpr_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed FPR registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * FPR register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +static int tm_cfpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + for (i = 0; i < 32 ; i++) + target->thread.TS_CKFPR(i) = buf[i]; + target->thread.ckfp_state.fpscr = buf[32]; + return 0; +} + +/** + * tm_cvmx_active - get active number of registers in CVMX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in checkpointed VMX category. + */ +static int tm_cvmx_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cvmx_get - get CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VMX registers. + * + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +static int tm_cvmx_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckvr_state, 0, + 33 * sizeof(vector128)); + if (!ret) { + /* + * Copy out only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + } + + return ret; +} + +/** + * tm_cvmx_set - set CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VMX registers. + * + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +static int tm_cvmx_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckvr_state, 0, + 33 * sizeof(vector128)); + if (!ret && count > 0) { + /* + * We use only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + if (!ret) + target->thread.ckvrsave = vrsave.word; + } + + return ret; +} + +/** + * tm_cvsx_active - get active number of registers in CVSX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed VSX category. + */ +static int tm_cvsx_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +/** + * tm_cvsx_get - get CVSX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VSX registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed VSX registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +static int tm_cvsx_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +/** + * tm_cvsx_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VSX registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * VSX register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +static int tm_cvsx_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + if (!ret) + for (i = 0; i < 32 ; i++) + target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + return ret; +} + +/** + * tm_spr_active - get active number of registers in TM SPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks the active number of available + * regisers in the transactional memory SPR category. + */ +static int tm_spr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + return regset->n; +} + +/** + * tm_spr_get - get the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +static int tm_spr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +/** + * tm_spr_set - set the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +static int tm_spr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +static int tm_tar_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +static int tm_tar_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +static int tm_tar_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +static int tm_ppr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + + +static int tm_ppr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +static int tm_ppr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +static int tm_dscr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +static int tm_dscr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} + +static int tm_dscr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + +#ifdef CONFIG_PPC64 +static int ppr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ppr, 0, sizeof(u64)); + return ret; +} + +static int ppr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ppr, 0, sizeof(u64)); + return ret; +} + +static int dscr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); + return ret; +} +static int dscr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); + return ret; +} +#endif +#ifdef CONFIG_PPC_BOOK3S_64 +static int tar_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); + return ret; +} +static int tar_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); + return ret; +} + +static int ebb_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return regset->n; + + return 0; +} + +static int ebb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (!target->thread.used_ebb) + return -ENODATA; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbrr, 0, 3 * sizeof(unsigned long)); +} + +static int ebb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbrr, 0, sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbhr, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.bescr, + 2 * sizeof(unsigned long), 3 * sizeof(unsigned long)); + + return ret; +} +static int pmu_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return regset->n; +} + +static int pmu_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.siar, 0, + 5 * sizeof(unsigned long)); +} + +static int pmu_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.siar, 0, + sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sdar, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sier, 2 * sizeof(unsigned long), + 3 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr2, 3 * sizeof(unsigned long), + 4 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr0, 4 * sizeof(unsigned long), + 5 * sizeof(unsigned long)); + return ret; +} +#endif +/* * These are our native regset flavors. */ enum powerpc_regset { @@ -630,6 +1787,25 @@ enum powerpc_regset { #ifdef CONFIG_SPE REGSET_SPE, #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + REGSET_TM_CGPR, /* TM checkpointed GPR registers */ + REGSET_TM_CFPR, /* TM checkpointed FPR registers */ + REGSET_TM_CVMX, /* TM checkpointed VMX registers */ + REGSET_TM_CVSX, /* TM checkpointed VSX registers */ + REGSET_TM_SPR, /* TM specific SPR registers */ + REGSET_TM_CTAR, /* TM checkpointed TAR register */ + REGSET_TM_CPPR, /* TM checkpointed PPR register */ + REGSET_TM_CDSCR, /* TM checkpointed DSCR register */ +#endif +#ifdef CONFIG_PPC64 + REGSET_PPR, /* PPR register */ + REGSET_DSCR, /* DSCR register */ +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + REGSET_TAR, /* TAR register */ + REGSET_EBB, /* EBB registers */ + REGSET_PMR, /* Performance Monitor Registers */ +#endif }; static const struct user_regset native_regsets[] = { @@ -664,6 +1840,77 @@ static const struct user_regset native_regsets[] = { .active = evr_active, .get = evr_get, .set = evr_set }, #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, + [REGSET_PMR] = { + .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, + .size = sizeof(u64), .align = sizeof(u64), + .active = pmu_active, .get = pmu_get, .set = pmu_set + }, +#endif }; static const struct user_regset_view user_ppc_native_view = { @@ -674,25 +1921,15 @@ static const struct user_regset_view user_ppc_native_view = { #ifdef CONFIG_PPC64 #include <linux/compat.h> -static int gpr32_get(struct task_struct *target, +static int gpr32_get_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + void *kbuf, void __user *ubuf, + unsigned long *regs) { - const unsigned long *regs = &target->thread.regs->gpr[0]; compat_ulong_t *k = kbuf; compat_ulong_t __user *u = ubuf; compat_ulong_t reg; - int i; - - if (target->thread.regs == NULL) - return -EIO; - - if (!FULL_REGS(target->thread.regs)) { - /* We have a partial register set. Fill 14-31 with bogus values */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } pos /= sizeof(reg); count /= sizeof(reg); @@ -731,21 +1968,16 @@ static int gpr32_get(struct task_struct *target, PT_REGS_COUNT * sizeof(reg), -1); } -static int gpr32_set(struct task_struct *target, +static int gpr32_set_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + const void *kbuf, const void __user *ubuf, + unsigned long *regs) { - unsigned long *regs = &target->thread.regs->gpr[0]; const compat_ulong_t *k = kbuf; const compat_ulong_t __user *u = ubuf; compat_ulong_t reg; - if (target->thread.regs == NULL) - return -EIO; - - CHECK_FULL_REGS(target->thread.regs); - pos /= sizeof(reg); count /= sizeof(reg); @@ -804,6 +2036,61 @@ static int gpr32_set(struct task_struct *target, (PT_TRAP + 1) * sizeof(reg), -1); } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static int tm_cgpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); +} + +static int tm_cgpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + +static int gpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int i; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* + * We have a partial register set. + * Fill 14-31 with bogus values. + */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); +} + +static int gpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); +} + /* * These are the regset flavors matching the CONFIG_PPC32 native set. */ @@ -832,6 +2119,73 @@ static const struct user_regset compat_regsets[] = { .active = evr_active, .get = evr_get, .set = evr_set }, #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, + .get = tm_cgpr32_get, .set = tm_cgpr32_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, +#endif }; static const struct user_regset_view user_ppc_compat_view = { @@ -1783,12 +3137,12 @@ static int do_seccomp(struct pt_regs *regs) * have already loaded -ENOSYS into r3, or seccomp has put * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). */ - if (__secure_computing()) + if (__secure_computing(NULL)) return -1; /* * The syscall was allowed by seccomp, restore the register - * state to what ptrace and audit expect. + * state to what audit expects. * Note that we use orig_gpr3, which means a seccomp tracer can * modify the first syscall parameter (in orig_gpr3) and also * allow the syscall to proceed. @@ -1822,22 +3176,25 @@ static inline int do_seccomp(struct pt_regs *regs) { return 0; } */ long do_syscall_trace_enter(struct pt_regs *regs) { - bool abort = false; - user_exit(); + /* + * The tracer may decide to abort the syscall, if so tracehook + * will return !0. Note that the tracer may also just change + * regs->gpr[0] to an invalid syscall number, that is handled + * below on the exit path. + */ + if (test_thread_flag(TIF_SYSCALL_TRACE) && + tracehook_report_syscall_entry(regs)) + goto skip; + + /* Run seccomp after ptrace; allow it to set gpr[3]. */ if (do_seccomp(regs)) return -1; - if (test_thread_flag(TIF_SYSCALL_TRACE)) { - /* - * The tracer may decide to abort the syscall, if so tracehook - * will return !0. Note that the tracer may also just change - * regs->gpr[0] to an invalid syscall number, that is handled - * below on the exit path. - */ - abort = tracehook_report_syscall_entry(regs) != 0; - } + /* Avoid trace and audit when syscall is invalid. */ + if (regs->gpr[0] >= NR_syscalls) + goto skip; if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gpr[0]); @@ -1854,17 +3211,16 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gpr[5] & 0xffffffff, regs->gpr[6] & 0xffffffff); - if (abort || regs->gpr[0] >= NR_syscalls) { - /* - * If we are aborting explicitly, or if the syscall number is - * now invalid, set the return value to -ENOSYS. - */ - regs->gpr[3] = -ENOSYS; - return -1; - } - /* Return the possibly modified but valid syscall number */ return regs->gpr[0]; + +skip: + /* + * If we are aborting explicitly, or if the syscall number is + * now invalid, set the return value to -ENOSYS. + */ + regs->gpr[3] = -ENOSYS; + return -1; } void do_syscall_trace_leave(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index fb2fb3ea85e5..c82eed97bd22 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -698,7 +698,7 @@ static void check_location(struct seq_file *m, const char *c) /* * Format: * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ] - * the '.' may be an abbrevation + * the '.' may be an abbreviation */ static void check_location_string(struct seq_file *m, const char *c) { diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 28736ff27fea..6a3e5de544ce 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -685,7 +685,7 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) return rc; } -void rtas_restart(char *cmd) +void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); @@ -704,7 +704,7 @@ void rtas_power_off(void) for (;;); } -void rtas_halt(void) +void __noreturn rtas_halt(void) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_HALT); @@ -1070,7 +1070,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) nret = be32_to_cpu(args.nret); token = be32_to_cpu(args.token); - if (nargs > ARRAY_SIZE(args.args) + if (nargs >= ARRAY_SIZE(args.args) || nret > ARRAY_SIZE(args.args) || nargs + nret > ARRAY_SIZE(args.args)) return -EINVAL; @@ -1174,7 +1174,7 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) { rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index c638e2487a9c..a26a02006576 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -483,7 +483,7 @@ static void rtas_event_scan(struct work_struct *w) } #ifdef CONFIG_PPC64 -static void retreive_nvram_error_log(void) +static void retrieve_nvram_error_log(void) { unsigned int err_type ; int rc ; @@ -501,7 +501,7 @@ static void retreive_nvram_error_log(void) } } #else /* CONFIG_PPC64 */ -static void retreive_nvram_error_log(void) +static void retrieve_nvram_error_log(void) { } #endif /* CONFIG_PPC64 */ @@ -513,7 +513,7 @@ static void start_event_scan(void) (30000 / rtas_event_scan_rate)); /* Retrieve errors from nvram if any */ - retreive_nvram_error_log(); + retrieve_nvram_error_log(); schedule_delayed_work_on(cpumask_first(cpu_online_mask), &event_scan_work, event_scan_delay); @@ -526,10 +526,8 @@ void rtas_cancel_event_scan(void) } EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); -static int __init rtas_init(void) +static int __init rtas_event_scan_init(void) { - struct proc_dir_entry *entry; - if (!machine_is(pseries) && !machine_is(chrp)) return 0; @@ -562,13 +560,27 @@ static int __init rtas_init(void) return -ENOMEM; } + start_event_scan(); + + return 0; +} +arch_initcall(rtas_event_scan_init); + +static int __init rtas_init(void) +{ + struct proc_dir_entry *entry; + + if (!machine_is(pseries) && !machine_is(chrp)) + return 0; + + if (!rtas_log_buf) + return -ENODEV; + entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL, &proc_rtas_log_operations); if (!entry) printk(KERN_ERR "Failed to create error_log proc entry\n"); - start_event_scan(); - return 0; } __initcall(rtas_init); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 8ca79b7503d8..270ee30abdcf 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -35,6 +35,7 @@ #include <linux/percpu.h> #include <linux/memblock.h> #include <linux/of_platform.h> +#include <linux/hugetlb.h> #include <asm/io.h> #include <asm/paca.h> #include <asm/prom.h> @@ -61,6 +62,13 @@ #include <asm/cputhreads.h> #include <mm/mmu_decl.h> #include <asm/fadump.h> +#include <asm/udbg.h> +#include <asm/hugetlb.h> +#include <asm/livepatch.h> +#include <asm/mmu_context.h> +#include <asm/cpu_has_feature.h> + +#include "setup.h" #ifdef DEBUG #include <asm/udbg.h> @@ -123,15 +131,26 @@ void machine_shutdown(void) ppc_md.machine_shutdown(); } +static void machine_hang(void) +{ + pr_emerg("System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) + ; +} + void machine_restart(char *cmd) { machine_shutdown(); if (ppc_md.restart) ppc_md.restart(cmd); + smp_send_stop(); - printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); - while (1) ; + + do_kernel_restart(cmd); + mdelay(1000); + + machine_hang(); } void machine_power_off(void) @@ -139,10 +158,9 @@ void machine_power_off(void) machine_shutdown(); if (pm_power_off) pm_power_off(); + smp_send_stop(); - printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); - while (1) ; + machine_hang(); } /* Used by the G5 thermal driver */ EXPORT_SYMBOL_GPL(machine_power_off); @@ -155,10 +173,9 @@ void machine_halt(void) machine_shutdown(); if (ppc_md.halt) ppc_md.halt(); + smp_send_stop(); - printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); - while (1) ; + machine_hang(); } @@ -494,7 +511,7 @@ void __init smp_setup_cpu_maps(void) * On pSeries LPAR, we need to know how many cpus * could possibly be added to this partition. */ - if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) && + if (firmware_has_feature(FW_FEATURE_LPAR) && (dn = of_find_node_by_path("/rtas"))) { int num_addr_cell, num_size_cell, maxcpus; const __be32 *ireg; @@ -575,6 +592,7 @@ void probe_machine(void) { extern struct machdep_calls __machine_desc_start; extern struct machdep_calls __machine_desc_end; + unsigned int i; /* * Iterate all ppc_md structures until we find the proper @@ -582,6 +600,17 @@ void probe_machine(void) */ DBG("Probing machine type ...\n"); + /* + * Check ppc_md is empty, if not we have a bug, ie, we setup an + * entry before probe_machine() which will be overwritten + */ + for (i = 0; i < (sizeof(ppc_md) / sizeof(void *)); i++) { + if (((void **)&ppc_md)[i]) { + printk(KERN_ERR "Entry %d in ppc_md non empty before" + " machine probe !\n", i); + } + } + for (machine_id = &__machine_desc_start; machine_id < &__machine_desc_end; machine_id++) { @@ -676,6 +705,8 @@ static struct notifier_block ppc_panic_block = { void __init setup_panic(void) { + if (!ppc_md.panic) + return; atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); } @@ -744,3 +775,169 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) pdev->dev.dma_mask = &pdev->archdata.dma_mask; set_dma_ops(&pdev->dev, &dma_direct_ops); } + +static __init void print_system_info(void) +{ + pr_info("-----------------------------------------------------\n"); +#ifdef CONFIG_PPC_STD_MMU_64 + pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); +#endif +#ifdef CONFIG_PPC_STD_MMU_32 + pr_info("Hash_size = 0x%lx\n", Hash_size); +#endif + pr_info("phys_mem_size = 0x%llx\n", + (unsigned long long)memblock_phys_mem_size()); + + pr_info("dcache_bsize = 0x%x\n", dcache_bsize); + pr_info("icache_bsize = 0x%x\n", icache_bsize); + if (ucache_bsize != 0) + pr_info("ucache_bsize = 0x%x\n", ucache_bsize); + + pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + pr_info(" possible = 0x%016lx\n", + (unsigned long)CPU_FTRS_POSSIBLE); + pr_info(" always = 0x%016lx\n", + (unsigned long)CPU_FTRS_ALWAYS); + pr_info("cpu_user_features = 0x%08x 0x%08x\n", + cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); +#ifdef CONFIG_PPC64 + pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); +#endif + +#ifdef CONFIG_PPC_STD_MMU_64 + if (htab_address) + pr_info("htab_address = 0x%p\n", htab_address); + if (htab_hash_mask) + pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif +#ifdef CONFIG_PPC_STD_MMU_32 + if (Hash) + pr_info("Hash = 0x%p\n", Hash); + if (Hash_mask) + pr_info("Hash_mask = 0x%lx\n", Hash_mask); +#endif + + if (PHYSICAL_START > 0) + pr_info("physical_start = 0x%llx\n", + (unsigned long long)PHYSICAL_START); + pr_info("-----------------------------------------------------\n"); +} + +/* + * Called into from start_kernel this initializes memblock, which is used + * to manage page allocation until mem_init is called. + */ +void __init setup_arch(char **cmdline_p) +{ + *cmdline_p = boot_command_line; + + /* Set a half-reasonable default so udelay does something sensible */ + loops_per_jiffy = 500000000 / HZ; + + /* Unflatten the device-tree passed by prom_init or kexec */ + unflatten_device_tree(); + + /* + * Initialize cache line/block info from device-tree (on ppc64) or + * just cputable (on ppc32). + */ + initialize_cache_info(); + + /* Initialize RTAS if available. */ + rtas_initialize(); + + /* Check if we have an initrd provided via the device-tree. */ + check_for_initrd(); + + /* Probe the machine type, establish ppc_md. */ + probe_machine(); + + /* Setup panic notifier if requested by the platform. */ + setup_panic(); + + /* + * Configure ppc_md.power_save (ppc32 only, 64-bit machines do + * it from their respective probe() function. + */ + setup_power_save(); + + /* Discover standard serial ports. */ + find_legacy_serial_ports(); + + /* Register early console with the printk subsystem. */ + register_early_udbg_console(); + + /* Setup the various CPU maps based on the device-tree. */ + smp_setup_cpu_maps(); + + /* Initialize xmon. */ + xmon_setup(); + + /* Check the SMT related command line arguments (ppc64). */ + check_smt_enabled(); + + /* On BookE, setup per-core TLB data structures. */ + setup_tlb_core_data(); + + /* + * Release secondary cpus out of their spinloops at 0x60 now that + * we can map physical -> logical CPU ids. + * + * Freescale Book3e parts spin in a loop provided by firmware, + * so smp_release_cpus() does nothing for them. + */ +#ifdef CONFIG_SMP + smp_release_cpus(); +#endif + + /* Print various info about the machine that has been gathered so far. */ + print_system_info(); + + /* Reserve large chunks of memory for use by CMA for KVM. */ + kvm_cma_reserve(); + + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + + klp_init_thread_info(&init_thread_info); + + init_mm.start_code = (unsigned long)_stext; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = klimit; +#ifdef CONFIG_PPC_64K_PAGES + init_mm.context.pte_frag = NULL; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&init_mm.context); +#endif + irqstack_early_init(); + exc_lvl_early_init(); + emergency_stack_init(); + + initmem_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + if (ppc_md.setup_arch) + ppc_md.setup_arch(); + + paging_init(); + + /* Initialize the MMU context management stuff. */ + mmu_context_init(); + +#ifdef CONFIG_PPC64 + /* Interrupt code needs to be 64K-aligned. */ + if ((unsigned long)_stext & 0xffff) + panic("Kernelbase not 64K-aligned (0x%lx)!\n", + (unsigned long)_stext); +#endif +} diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h new file mode 100644 index 000000000000..cfba134b3024 --- /dev/null +++ b/arch/powerpc/kernel/setup.h @@ -0,0 +1,58 @@ +/* + * Prototypes for functions that are shared between setup_(32|64|common).c + * + * Copyright 2016 Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ARCH_POWERPC_KERNEL_SETUP_H +#define __ARCH_POWERPC_KERNEL_SETUP_H + +void initialize_cache_info(void); +void irqstack_early_init(void); + +#ifdef CONFIG_PPC32 +void setup_power_save(void); +#else +static inline void setup_power_save(void) { }; +#endif + +#if defined(CONFIG_PPC64) && defined(CONFIG_SMP) +void check_smt_enabled(void); +#else +static inline void check_smt_enabled(void) { }; +#endif + +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +void setup_tlb_core_data(void); +#else +static inline void setup_tlb_core_data(void) { }; +#endif + +#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) +void exc_lvl_early_init(void); +#else +static inline void exc_lvl_early_init(void) { }; +#endif + +#ifdef CONFIG_PPC64 +void emergency_stack_init(void); +#else +static inline void emergency_stack_init(void) { }; +#endif + +/* + * Having this in kvm_ppc.h makes include dependencies too + * tricky to solve for setup-common.c so have it here. + */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void kvm_cma_reserve(void); +#else +static inline void kvm_cma_reserve(void) { }; +#endif + +#endif /* __ARCH_POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index d544fa311757..5fe79182f0fa 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -16,6 +16,7 @@ #include <linux/cpu.h> #include <linux/console.h> #include <linux/memblock.h> +#include <linux/export.h> #include <asm/io.h> #include <asm/prom.h> @@ -36,9 +37,8 @@ #include <asm/time.h> #include <asm/serial.h> #include <asm/udbg.h> -#include <asm/mmu_context.h> -#include <asm/epapr_hcalls.h> #include <asm/code-patching.h> +#include <asm/cpu_has_feature.h> #define DBG(fmt...) @@ -48,11 +48,16 @@ int boot_cpuid_phys; EXPORT_SYMBOL_GPL(boot_cpuid_phys); int smp_hw_index[NR_CPUS]; +EXPORT_SYMBOL(smp_hw_index); unsigned long ISA_DMA_THRESHOLD; unsigned int DMA_MODE_READ; unsigned int DMA_MODE_WRITE; +EXPORT_SYMBOL(ISA_DMA_THRESHOLD); +EXPORT_SYMBOL(DMA_MODE_READ); +EXPORT_SYMBOL(DMA_MODE_WRITE); + /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. @@ -62,9 +67,7 @@ int icache_bsize; int ucache_bsize; /* - * We're called here very early in the boot. We determine the machine - * type and call the appropriate low-level setup functions. - * -- Cort <cort@fsmlabs.com> + * We're called here very early in the boot. * * Note that the kernel may be running at an address which is different * from the address that it was linked at, so we must use RELOC/PTRRELOC @@ -73,7 +76,6 @@ int ucache_bsize; notrace unsigned long __init early_init(unsigned long dt_ptr) { unsigned long offset = reloc_offset(); - struct cpu_spec *spec; /* First zero the BSS -- use memset_io, some platforms don't have * caches on yet */ @@ -84,36 +86,29 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * Identify the CPU type and fix up code sections * that depend on which cpu we have. */ - spec = identify_cpu(offset, mfspr(SPRN_PVR)); - - do_feature_fixups(spec->cpu_features, - PTRRELOC(&__start___ftr_fixup), - PTRRELOC(&__stop___ftr_fixup)); - - do_feature_fixups(spec->mmu_features, - PTRRELOC(&__start___mmu_ftr_fixup), - PTRRELOC(&__stop___mmu_ftr_fixup)); - - do_lwsync_fixups(spec->cpu_features, - PTRRELOC(&__start___lwsync_fixup), - PTRRELOC(&__stop___lwsync_fixup)); + identify_cpu(offset, mfspr(SPRN_PVR)); - do_final_fixups(); + apply_feature_fixups(); return KERNELBASE + offset; } /* - * Find out what kind of machine we're on and save any data we need - * from the early boot process (devtree is copied on pmac by prom_init()). - * This is called very early on the boot process, after a minimal - * MMU environment has been set up but before MMU_init is called. + * This is run before start_kernel(), the kernel has been relocated + * and we are running with enough of the MMU enabled to have our + * proper kernel virtual addresses + * + * We do the initial parsing of the flat device-tree and prepares + * for the MMU to be fully initialized. */ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ notrace void __init machine_init(u64 dt_ptr) { + /* Configure static keys first, now that we're relocated. */ + setup_feature_keys(); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); @@ -123,27 +118,9 @@ notrace void __init machine_init(u64 dt_ptr) /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); - epapr_paravirt_early_init(); - early_init_mmu(); - probe_machine(); - setup_kdump_trampoline(); - -#ifdef CONFIG_6xx - if (cpu_has_feature(CPU_FTR_CAN_DOZE) || - cpu_has_feature(CPU_FTR_CAN_NAP)) - ppc_md.power_save = ppc6xx_idle; -#endif - -#ifdef CONFIG_E500 - if (cpu_has_feature(CPU_FTR_CAN_DOZE) || - cpu_has_feature(CPU_FTR_CAN_NAP)) - ppc_md.power_save = e500_idle; -#endif - if (ppc_md.progress) - ppc_md.progress("id mach(): done", 0x200); } /* Checks "l2cr=xxxx" command-line option */ @@ -221,7 +198,7 @@ int __init ppc_init(void) arch_initcall(ppc_init); -static void __init irqstack_early_init(void) +void __init irqstack_early_init(void) { unsigned int i; @@ -236,7 +213,7 @@ static void __init irqstack_early_init(void) } #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) -static void __init exc_lvl_early_init(void) +void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; @@ -259,33 +236,25 @@ static void __init exc_lvl_early_init(void) #endif } } -#else -#define exc_lvl_early_init() #endif -/* Warning, IO base is not yet inited */ -void __init setup_arch(char **cmdline_p) +void __init setup_power_save(void) { - *cmdline_p = boot_command_line; - - /* so udelay does something sensible, assume <= 1000 bogomips */ - loops_per_jiffy = 500000000 / HZ; - - unflatten_device_tree(); - check_for_initrd(); - - if (ppc_md.init_early) - ppc_md.init_early(); - - find_legacy_serial_ports(); - - smp_setup_cpu_maps(); - - /* Register early console */ - register_early_udbg_console(); +#ifdef CONFIG_6xx + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = ppc6xx_idle; +#endif - xmon_setup(); +#ifdef CONFIG_E500 + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = e500_idle; +#endif +} +__init void initialize_cache_info(void) +{ /* * Set cache line size based on type of cpu as a default. * Systems with OF can look in the properties on the cpu node(s) @@ -296,32 +265,4 @@ void __init setup_arch(char **cmdline_p) ucache_bsize = 0; if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) ucache_bsize = icache_bsize = dcache_bsize; - - if (ppc_md.panic) - setup_panic(); - - init_mm.start_code = (unsigned long)_stext; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; - - exc_lvl_early_init(); - - irqstack_early_init(); - - initmem_init(); - if ( ppc_md.progress ) ppc_md.progress("setup_arch: initmem", 0x3eab); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - if (ppc_md.setup_arch) - ppc_md.setup_arch(); - if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); - - paging_init(); - - /* Initialize the MMU context management stuff */ - mmu_context_init(); } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 96d4a2b23d0f..7ac8e6eaab5b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -35,7 +35,6 @@ #include <linux/pci.h> #include <linux/lockdep.h> #include <linux/memblock.h> -#include <linux/hugetlb.h> #include <linux/memory.h> #include <linux/nmi.h> @@ -64,12 +63,10 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/mmu_context.h> #include <asm/code-patching.h> -#include <asm/kvm_ppc.h> -#include <asm/hugetlb.h> -#include <asm/epapr_hcalls.h> #include <asm/livepatch.h> +#include <asm/opal.h> +#include <asm/cputhreads.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -100,7 +97,7 @@ int icache_bsize; int ucache_bsize; #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) -static void setup_tlb_core_data(void) +void __init setup_tlb_core_data(void) { int cpu; @@ -133,10 +130,6 @@ static void setup_tlb_core_data(void) } } } -#else -static void setup_tlb_core_data(void) -{ -} #endif #ifdef CONFIG_SMP @@ -144,7 +137,7 @@ static void setup_tlb_core_data(void) static char *smt_enabled_cmdline; /* Look for ibm,smt-enabled OF option */ -static void check_smt_enabled(void) +void __init check_smt_enabled(void) { struct device_node *dn; const char *smt_option; @@ -193,12 +186,10 @@ static int __init early_smt_enabled(char *p) } early_param("smt-enabled", early_smt_enabled); -#else -#define check_smt_enabled() #endif /* CONFIG_SMP */ /** Fix up paca fields required for the boot cpu */ -static void fixup_boot_paca(void) +static void __init fixup_boot_paca(void) { /* The boot cpu is started */ get_paca()->cpu_start = 1; @@ -206,23 +197,50 @@ static void fixup_boot_paca(void) get_paca()->data_offset = 0; } -static void cpu_ready_for_interrupts(void) +static void __init configure_exceptions(void) { - /* Set IR and DR in PACA MSR */ - get_paca()->kernel_msr = MSR_KERNEL; - /* - * Enable AIL if supported, and we are in hypervisor mode. If we are - * not in hypervisor mode, we enable relocation-on interrupts later - * in pSeries_setup_arch() using the H_SET_MODE hcall. + * Setup the trampolines from the lowmem exception vectors + * to the kdump kernel when not using a relocatable kernel. */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { - unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + setup_kdump_trampoline(); + + /* Under a PAPR hypervisor, we need hypercalls */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + /* Enable AIL if possible */ + pseries_enable_reloc_on_exc(); + + /* + * Tell the hypervisor that we want our exceptions to + * be taken in little endian mode. + * + * We don't call this for big endian as our calling convention + * makes us always enter in BE, and the call may fail under + * some circumstances with kdump. + */ +#ifdef __LITTLE_ENDIAN__ + pseries_little_endian_exceptions(); +#endif + } else { + /* Set endian mode using OPAL */ + if (firmware_has_feature(FW_FEATURE_OPAL)) + opal_configure_cores(); + + /* Enable AIL if supported, and we are in hypervisor mode */ + if (early_cpu_has_feature(CPU_FTR_HVMODE) && + early_cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } } } +static void cpu_ready_for_interrupts(void) +{ + /* Set IR and DR in PACA MSR */ + get_paca()->kernel_msr = MSR_KERNEL; +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -270,18 +288,19 @@ void __init early_setup(unsigned long dt_ptr) */ early_init_devtree(__va(dt_ptr)); - epapr_paravirt_early_init(); - /* Now we know the logical id of our boot cpu, setup the paca. */ setup_paca(&paca[boot_cpuid]); fixup_boot_paca(); - /* Probe the machine type */ - probe_machine(); - - setup_kdump_trampoline(); + /* + * Configure exception handlers. This include setting up trampolines + * if needed, setting exception endian mode, etc... + */ + configure_exceptions(); - DBG("Found, Initializing memory management...\n"); + /* Apply all the dynamic patching */ + apply_feature_fixups(); + setup_feature_keys(); /* Initialize the hash table or TLB handling */ early_init_mmu(); @@ -293,16 +312,6 @@ void __init early_setup(unsigned long dt_ptr) */ cpu_ready_for_interrupts(); - /* Reserve large chunks of memory for use by CMA for KVM */ - kvm_cma_reserve(); - - /* - * Reserve any gigantic pages requested on the command line. - * memblock needs to have been initialized by the time this is - * called since this will reserve memory. - */ - reserve_hugetlb_gpages(); - DBG(" <- early_setup()\n"); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX @@ -321,7 +330,7 @@ void __init early_setup(unsigned long dt_ptr) #ifdef CONFIG_SMP void early_setup_secondary(void) { - /* Mark interrupts enabled in PACA */ + /* Mark interrupts disabled in PACA */ get_paca()->soft_enabled = 0; /* Initialize the hash table or TLB handling */ @@ -391,7 +400,7 @@ void smp_release_cpus(void) * cache informations about the CPU that will be used by cache flush * routines and/or provided to userland */ -static void __init initialize_cache_info(void) +void __init initialize_cache_info(void) { struct device_node *np; unsigned long num_cpus = 0; @@ -456,127 +465,11 @@ static void __init initialize_cache_info(void) } } - DBG(" <- initialize_cache_info()\n"); -} - - -/* - * Do some initial setup of the system. The parameters are those which - * were passed in from the bootloader. - */ -void __init setup_system(void) -{ - DBG(" -> setup_system()\n"); - - /* Apply the CPUs-specific and firmware specific fixups to kernel - * text (nop out sections not relevant to this CPU or this firmware) - */ - do_feature_fixups(cur_cpu_spec->cpu_features, - &__start___ftr_fixup, &__stop___ftr_fixup); - do_feature_fixups(cur_cpu_spec->mmu_features, - &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup); - do_feature_fixups(powerpc_firmware_features, - &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); - do_lwsync_fixups(cur_cpu_spec->cpu_features, - &__start___lwsync_fixup, &__stop___lwsync_fixup); - do_final_fixups(); - - /* - * Unflatten the device-tree passed by prom_init or kexec - */ - unflatten_device_tree(); - - /* - * Fill the ppc64_caches & systemcfg structures with informations - * retrieved from the device-tree. - */ - initialize_cache_info(); - -#ifdef CONFIG_PPC_RTAS - /* - * Initialize RTAS if available - */ - rtas_initialize(); -#endif /* CONFIG_PPC_RTAS */ - - /* - * Check if we have an initrd provided via the device-tree - */ - check_for_initrd(); - - /* - * Do some platform specific early initializations, that includes - * setting up the hash table pointers. It also sets up some interrupt-mapping - * related options that will be used by finish_device_tree() - */ - if (ppc_md.init_early) - ppc_md.init_early(); - - /* - * We can discover serial ports now since the above did setup the - * hash table management for us, thus ioremap works. We do that early - * so that further code can be debugged - */ - find_legacy_serial_ports(); - - /* - * Register early console - */ - register_early_udbg_console(); - - /* - * Initialize xmon - */ - xmon_setup(); - - smp_setup_cpu_maps(); - check_smt_enabled(); - setup_tlb_core_data(); - - /* - * Freescale Book3e parts spin in a loop provided by firmware, - * so smp_release_cpus() does nothing for them - */ -#if defined(CONFIG_SMP) - /* Release secondary cpus out of their spinloops at 0x60 now that - * we can map physical -> logical CPU ids - */ - smp_release_cpus(); -#endif - - pr_info("Starting Linux %s %s\n", init_utsname()->machine, - init_utsname()->version); - - pr_info("-----------------------------------------------------\n"); - pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); - - if (ppc64_caches.dline_size != 0x80) - pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); - if (ppc64_caches.iline_size != 0x80) - pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size); - - pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); - pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); - pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); - pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, - cur_cpu_spec->cpu_user_features2); - pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); - pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); - -#ifdef CONFIG_PPC_STD_MMU_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - - pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif - - if (PHYSICAL_START > 0) - pr_info("physical_start = 0x%llx\n", - (unsigned long long)PHYSICAL_START); - pr_info("-----------------------------------------------------\n"); + /* For use by binfmt_elf */ + dcache_bsize = ppc64_caches.dline_size; + icache_bsize = ppc64_caches.iline_size; - DBG(" <- setup_system()\n"); + DBG(" <- initialize_cache_info()\n"); } /* This returns the limit below which memory accesses to the linear @@ -584,7 +477,7 @@ void __init setup_system(void) * used to allocate interrupt or emergency stacks for which our * exception entry path doesn't deal with being interrupted. */ -static u64 safe_stack_limit(void) +static __init u64 safe_stack_limit(void) { #ifdef CONFIG_PPC_BOOK3E /* Freescale BookE bolts the entire linear mapping */ @@ -600,7 +493,7 @@ static u64 safe_stack_limit(void) #endif } -static void __init irqstack_early_init(void) +void __init irqstack_early_init(void) { u64 limit = safe_stack_limit(); unsigned int i; @@ -620,7 +513,7 @@ static void __init irqstack_early_init(void) } #ifdef CONFIG_PPC_BOOK3E -static void __init exc_lvl_early_init(void) +void __init exc_lvl_early_init(void) { unsigned int i; unsigned long sp; @@ -642,8 +535,6 @@ static void __init exc_lvl_early_init(void) if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) patch_exception(0x040, exc_debug_debug_book3e); } -#else -#define exc_lvl_early_init() #endif /* @@ -651,7 +542,7 @@ static void __init exc_lvl_early_init(void) * early in SMP boots before relocation is enabled. Exclusive emergency * stack for machine checks. */ -static void __init emergency_stack_init(void) +void __init emergency_stack_init(void) { u64 limit; unsigned int i; @@ -682,61 +573,6 @@ static void __init emergency_stack_init(void) } } -/* - * Called into from start_kernel this initializes memblock, which is used - * to manage page allocation until mem_init is called. - */ -void __init setup_arch(char **cmdline_p) -{ - *cmdline_p = boot_command_line; - - /* - * Set cache line size based on type of cpu as a default. - * Systems with OF can look in the properties on the cpu node(s) - * for a possibly more accurate value. - */ - dcache_bsize = ppc64_caches.dline_size; - icache_bsize = ppc64_caches.iline_size; - - if (ppc_md.panic) - setup_panic(); - - klp_init_thread_info(&init_thread_info); - - init_mm.start_code = (unsigned long)_stext; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; -#ifdef CONFIG_PPC_64K_PAGES - init_mm.context.pte_frag = NULL; -#endif -#ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&init_mm.context); -#endif - irqstack_early_init(); - exc_lvl_early_init(); - emergency_stack_init(); - - initmem_init(); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - if (ppc_md.setup_arch) - ppc_md.setup_arch(); - - paging_init(); - - /* Initialize the MMU context management stuff */ - mmu_context_init(); - - /* Interrupt code needs to be 64K-aligned */ - if ((unsigned long)_stext & 0xffff) - panic("Kernelbase not 64K-aligned (0x%lx)!\n", - (unsigned long)_stext); -} - #ifdef CONFIG_SMP #define PCPU_DYN_SIZE () diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index cb64d6feb45a..bbe77aed198d 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -99,22 +99,24 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, } } -static void do_signal(struct pt_regs *regs) +static void do_signal(struct task_struct *tsk) { sigset_t *oldset = sigmask_to_save(); struct ksignal ksig; int ret; int is32 = is_32bit_task(); + BUG_ON(tsk != current); + get_signal(&ksig); /* Is there any syscall restart business here ? */ - check_syscall_restart(regs, &ksig.ka, ksig.sig > 0); + check_syscall_restart(tsk->thread.regs, &ksig.ka, ksig.sig > 0); if (ksig.sig <= 0) { /* No signal to deliver -- put the saved sigmask back */ restore_saved_sigmask(); - regs->trap = 0; + tsk->thread.regs->trap = 0; return; /* no signals delivered */ } @@ -124,23 +126,22 @@ static void do_signal(struct pt_regs *regs) * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (current->thread.hw_brk.address && - current->thread.hw_brk.type) - __set_breakpoint(¤t->thread.hw_brk); + if (tsk->thread.hw_brk.address && tsk->thread.hw_brk.type) + __set_breakpoint(&tsk->thread.hw_brk); #endif /* Re-enable the breakpoints for the signal stack */ - thread_change_pc(current, regs); + thread_change_pc(tsk, tsk->thread.regs); if (is32) { if (ksig.ka.sa.sa_flags & SA_SIGINFO) - ret = handle_rt_signal32(&ksig, oldset, regs); + ret = handle_rt_signal32(&ksig, oldset, tsk); else - ret = handle_signal32(&ksig, oldset, regs); + ret = handle_signal32(&ksig, oldset, tsk); } else { - ret = handle_rt_signal64(&ksig, oldset, regs); + ret = handle_rt_signal64(&ksig, oldset, tsk); } - regs->trap = 0; + tsk->thread.regs->trap = 0; signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP)); } @@ -151,8 +152,10 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs); + if (thread_info_flags & _TIF_SIGPENDING) { + BUG_ON(regs != current->thread.regs); + do_signal(current); + } if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); @@ -162,7 +165,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) user_enter(); } -unsigned long get_tm_stackpointer(struct pt_regs *regs) +unsigned long get_tm_stackpointer(struct task_struct *tsk) { /* When in an active transaction that takes a signal, we need to be * careful with the stack. It's possible that the stack has moved back @@ -187,11 +190,13 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs) */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(regs->msr)) { + BUG_ON(tsk != current); + + if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { tm_reclaim_current(TM_CAUSE_SIGNAL); - if (MSR_TM_TRANSACTIONAL(regs->msr)) - return current->thread.ckpt_regs.gpr[1]; + if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) + return tsk->thread.ckpt_regs.gpr[1]; } #endif - return regs->gpr[1]; + return tsk->thread.regs->gpr[1]; } diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index be305c858e51..7c59d88b9d86 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -16,39 +16,41 @@ extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size, int is_32); extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, - struct pt_regs *regs); + struct task_struct *tsk); extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, - struct pt_regs *regs); + struct task_struct *tsk); extern unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); -extern unsigned long copy_transact_fpr_to_user(void __user *to, +extern unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task); extern unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); -extern unsigned long copy_transact_fpr_from_user(struct task_struct *task, +extern unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); +extern unsigned long get_tm_stackpointer(struct task_struct *tsk); + #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); -extern unsigned long copy_transact_vsx_to_user(void __user *to, +extern unsigned long copy_ckvsx_to_user(void __user *to, struct task_struct *task); extern unsigned long copy_vsx_from_user(struct task_struct *task, void __user *from); -extern unsigned long copy_transact_vsx_from_user(struct task_struct *task, +extern unsigned long copy_ckvsx_from_user(struct task_struct *task, void __user *from); #endif #ifdef CONFIG_PPC64 extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, - struct pt_regs *regs); + struct task_struct *tsk); #else /* CONFIG_PPC64 */ static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, - struct pt_regs *regs) + struct task_struct *tsk) { return -EFAULT; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index b6aa378aff63..27aa913ac91d 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -44,6 +44,7 @@ #include <asm/vdso.h> #include <asm/switch_to.h> #include <asm/tm.h> +#include <asm/asm-prototypes.h> #ifdef CONFIG_PPC64 #include "ppc32.h" #include <asm/unistd.h> @@ -315,7 +316,7 @@ unsigned long copy_vsx_from_user(struct task_struct *task, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -unsigned long copy_transact_fpr_to_user(void __user *to, +unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task) { u64 buf[ELF_NFPREG]; @@ -323,12 +324,12 @@ unsigned long copy_transact_fpr_to_user(void __user *to, /* save FPR copy to local buffer then write to the thread_struct */ for (i = 0; i < (ELF_NFPREG - 1) ; i++) - buf[i] = task->thread.TS_TRANS_FPR(i); - buf[i] = task->thread.transact_fp.fpscr; + buf[i] = task->thread.TS_CKFPR(i); + buf[i] = task->thread.ckfp_state.fpscr; return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); } -unsigned long copy_transact_fpr_from_user(struct task_struct *task, +unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from) { u64 buf[ELF_NFPREG]; @@ -337,13 +338,13 @@ unsigned long copy_transact_fpr_from_user(struct task_struct *task, if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) return 1; for (i = 0; i < (ELF_NFPREG - 1) ; i++) - task->thread.TS_TRANS_FPR(i) = buf[i]; - task->thread.transact_fp.fpscr = buf[i]; + task->thread.TS_CKFPR(i) = buf[i]; + task->thread.ckfp_state.fpscr = buf[i]; return 0; } -unsigned long copy_transact_vsx_to_user(void __user *to, +unsigned long copy_ckvsx_to_user(void __user *to, struct task_struct *task) { u64 buf[ELF_NVSRHALFREG]; @@ -351,11 +352,11 @@ unsigned long copy_transact_vsx_to_user(void __user *to, /* save FPR copy to local buffer then write to the thread_struct */ for (i = 0; i < ELF_NVSRHALFREG; i++) - buf[i] = task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET]; + buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); } -unsigned long copy_transact_vsx_from_user(struct task_struct *task, +unsigned long copy_ckvsx_from_user(struct task_struct *task, void __user *from) { u64 buf[ELF_NVSRHALFREG]; @@ -364,7 +365,7 @@ unsigned long copy_transact_vsx_from_user(struct task_struct *task, if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) return 1; for (i = 0; i < ELF_NVSRHALFREG ; i++) - task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return 0; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -384,17 +385,17 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -inline unsigned long copy_transact_fpr_to_user(void __user *to, +inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task) { - return __copy_to_user(to, task->thread.transact_fp.fpr, + return __copy_to_user(to, task->thread.ckfp_state.fpr, ELF_NFPREG * sizeof(double)); } -inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, +inline unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from) { - return __copy_from_user(task->thread.transact_fp.fpr, from, + return __copy_from_user(task->thread.ckfp_state.fpr, from, ELF_NFPREG * sizeof(double)); } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -525,9 +526,6 @@ static int save_tm_user_regs(struct pt_regs *regs, */ regs->msr &= ~MSR_TS_MASK; - /* Make sure floating point registers are stored in regs */ - flush_fp_to_thread(current); - /* Save both sets of general registers */ if (save_general_regs(¤t->thread.ckpt_regs, frame) || save_general_regs(regs, tm_frame)) @@ -545,18 +543,17 @@ static int save_tm_user_regs(struct pt_regs *regs, #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - flush_altivec_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, + if (__copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, ELF_NVRREG * sizeof(vector128))) return 1; if (msr & MSR_VEC) { if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.transact_vr, + ¤t->thread.vr_state, ELF_NVRREG * sizeof(vector128))) return 1; } else { if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.vr_state, + ¤t->thread.ckvr_state, ELF_NVRREG * sizeof(vector128))) return 1; } @@ -573,28 +570,28 @@ static int save_tm_user_regs(struct pt_regs *regs, * most significant bits of that same vector. --BenH */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - if (__put_user(current->thread.vrsave, + current->thread.ckvrsave = mfspr(SPRN_VRSAVE); + if (__put_user(current->thread.ckvrsave, (u32 __user *)&frame->mc_vregs[32])) return 1; if (msr & MSR_VEC) { - if (__put_user(current->thread.transact_vrsave, + if (__put_user(current->thread.vrsave, (u32 __user *)&tm_frame->mc_vregs[32])) return 1; } else { - if (__put_user(current->thread.vrsave, + if (__put_user(current->thread.ckvrsave, (u32 __user *)&tm_frame->mc_vregs[32])) return 1; } #endif /* CONFIG_ALTIVEC */ - if (copy_fpr_to_user(&frame->mc_fregs, current)) + if (copy_ckfpr_to_user(&frame->mc_fregs, current)) return 1; if (msr & MSR_FP) { - if (copy_transact_fpr_to_user(&tm_frame->mc_fregs, current)) + if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) return 1; } else { - if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) + if (copy_ckfpr_to_user(&tm_frame->mc_fregs, current)) return 1; } @@ -606,15 +603,14 @@ static int save_tm_user_regs(struct pt_regs *regs, * contains valid data */ if (current->thread.used_vsr) { - flush_vsx_to_thread(current); - if (copy_vsx_to_user(&frame->mc_vsregs, current)) + if (copy_ckvsx_to_user(&frame->mc_vsregs, current)) return 1; if (msr & MSR_VSX) { - if (copy_transact_vsx_to_user(&tm_frame->mc_vsregs, + if (copy_vsx_to_user(&tm_frame->mc_vsregs, current)) return 1; } else { - if (copy_vsx_to_user(&tm_frame->mc_vsregs, current)) + if (copy_ckvsx_to_user(&tm_frame->mc_vsregs, current)) return 1; } @@ -698,6 +694,7 @@ static long restore_user_regs(struct pt_regs *regs, if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, sizeof(sr->mc_vregs))) return 1; + current->thread.used_vr = true; } else if (current->thread.used_vr) memset(¤t->thread.vr_state, 0, ELF_NVRREG * sizeof(vector128)); @@ -724,6 +721,7 @@ static long restore_user_regs(struct pt_regs *regs, */ if (copy_vsx_from_user(current, &sr->mc_vsregs)) return 1; + current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; @@ -743,6 +741,7 @@ static long restore_user_regs(struct pt_regs *regs, if (__copy_from_user(current->thread.evr, &sr->mc_vregs, ELF_NEVRREG * sizeof(u32))) return 1; + current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); @@ -793,33 +792,34 @@ static long restore_tm_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, + if (__copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, sizeof(sr->mc_vregs)) || - __copy_from_user(¤t->thread.transact_vr, + __copy_from_user(¤t->thread.vr_state, &tm_sr->mc_vregs, sizeof(sr->mc_vregs))) return 1; + current->thread.used_vr = true; } else if (current->thread.used_vr) { memset(¤t->thread.vr_state, 0, ELF_NVRREG * sizeof(vector128)); - memset(¤t->thread.transact_vr, 0, + memset(¤t->thread.ckvr_state, 0, ELF_NVRREG * sizeof(vector128)); } /* Always get VRSAVE back */ - if (__get_user(current->thread.vrsave, + if (__get_user(current->thread.ckvrsave, (u32 __user *)&sr->mc_vregs[32]) || - __get_user(current->thread.transact_vrsave, + __get_user(current->thread.vrsave, (u32 __user *)&tm_sr->mc_vregs[32])) return 1; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mtspr(SPRN_VRSAVE, current->thread.vrsave); + mtspr(SPRN_VRSAVE, current->thread.ckvrsave); #endif /* CONFIG_ALTIVEC */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); if (copy_fpr_from_user(current, &sr->mc_fregs) || - copy_transact_fpr_from_user(current, &tm_sr->mc_fregs)) + copy_ckfpr_from_user(current, &tm_sr->mc_fregs)) return 1; #ifdef CONFIG_VSX @@ -829,13 +829,14 @@ static long restore_tm_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_vsx_from_user(current, &sr->mc_vsregs) || - copy_transact_vsx_from_user(current, &tm_sr->mc_vsregs)) + if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) || + copy_ckvsx_from_user(current, &sr->mc_vsregs)) return 1; + current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) { current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; - current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } #endif /* CONFIG_VSX */ @@ -848,6 +849,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, if (__copy_from_user(current->thread.evr, &sr->mc_vregs, ELF_NEVRREG * sizeof(u32))) return 1; + current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); @@ -877,13 +879,14 @@ static long restore_tm_user_regs(struct pt_regs *regs, tm_recheckpoint(¤t->thread, msr); /* This loads the speculative FP/VEC state, if used */ + msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { - do_load_up_transact_fpu(¤t->thread); + load_fp_state(¤t->thread.fp_state); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } #ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { - do_load_up_transact_altivec(¤t->thread); + load_vr_state(¤t->thread.vr_state); regs->msr |= MSR_VEC; } #endif @@ -971,7 +974,7 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) * (one which gets siginfo). */ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, - struct pt_regs *regs) + struct task_struct *tsk) { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; @@ -980,10 +983,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, unsigned long newsp = 0; int sigret; unsigned long tramp; + struct pt_regs *regs = tsk->thread.regs; + + BUG_ON(tsk != current); /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*rt_sf), 1); + rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1); addr = rt_sf; if (unlikely(rt_sf == NULL)) goto badframe; @@ -1000,9 +1006,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; addr = frame; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { + if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { sigret = 0; - tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp; + tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; } else { sigret = __NR_rt_sigreturn; tramp = (unsigned long) frame->tramp; @@ -1029,7 +1035,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, } regs->link = tramp; - current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ + tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); @@ -1054,7 +1060,7 @@ badframe: printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_rt_signal32: " "%p nip %08lx lr %08lx\n", - current->comm, current->pid, + tsk->comm, tsk->pid, addr, regs->nip, regs->link); return 1; @@ -1226,7 +1232,21 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16); if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf))) goto bad; + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * If there is a transactional state then throw it away. + * The purpose of a sigreturn is to destroy all traces of the + * signal frame, this includes any transactional state created + * within in. We only check for suspended as we can never be + * active in the kernel, we are active, there is nothing better to + * do than go ahead and Bad Thing later. + * The cause is not important as there will never be a + * recheckpoint so it's not user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); + if (__get_user(tmp, &rt_sf->uc.uc_link)) goto bad; uc_transact = (struct ucontext __user *)(uintptr_t)tmp; @@ -1396,7 +1416,8 @@ int sys_debug_setcontext(struct ucontext __user *ctx, /* * OK, we're invoking a handler */ -int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs) +int handle_signal32(struct ksignal *ksig, sigset_t *oldset, + struct task_struct *tsk) { struct sigcontext __user *sc; struct sigframe __user *frame; @@ -1404,9 +1425,12 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs unsigned long newsp = 0; int sigret; unsigned long tramp; + struct pt_regs *regs = tsk->thread.regs; + + BUG_ON(tsk != current); /* Set up Signal Frame */ - frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 1); + frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1); if (unlikely(frame == NULL)) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; @@ -1425,9 +1449,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs || __put_user(ksig->sig, &sc->signal)) goto badframe; - if (vdso32_sigtramp && current->mm->context.vdso_base) { + if (vdso32_sigtramp && tsk->mm->context.vdso_base) { sigret = 0; - tramp = current->mm->context.vdso_base + vdso32_sigtramp; + tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; } else { sigret = __NR_sigreturn; tramp = (unsigned long) frame->mctx.tramp; @@ -1449,7 +1473,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs regs->link = tramp; - current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ + tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; @@ -1469,7 +1493,7 @@ badframe: printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_signal32: " "%p nip %08lx lr %08lx\n", - current->comm, current->pid, + tsk->comm, tsk->pid, frame, regs->nip, regs->link); return 1; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 25520794aa37..96698fdf93b4 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -35,6 +35,7 @@ #include <asm/vdso.h> #include <asm/switch_to.h> #include <asm/tm.h> +#include <asm/asm-prototypes.h> #include "signal.h" @@ -90,9 +91,9 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc) * Set up the sigcontext for the signal frame. */ -static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, - int signr, sigset_t *set, unsigned long handler, - int ctx_has_vsx_region) +static long setup_sigcontext(struct sigcontext __user *sc, + struct task_struct *tsk, int signr, sigset_t *set, + unsigned long handler, int ctx_has_vsx_region) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -104,18 +105,22 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, */ #ifdef CONFIG_ALTIVEC elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc); + unsigned long vrsave; #endif + struct pt_regs *regs = tsk->thread.regs; unsigned long msr = regs->msr; long err = 0; + BUG_ON(tsk != current); + #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); /* save altivec registers */ - if (current->thread.used_vr) { - flush_altivec_to_thread(current); + if (tsk->thread.used_vr) { + flush_altivec_to_thread(tsk); /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ - err |= __copy_to_user(v_regs, ¤t->thread.vr_state, + err |= __copy_to_user(v_regs, &tsk->thread.vr_state, 33 * sizeof(vector128)); /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg) * contains valid data. @@ -125,15 +130,19 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, /* We always copy to/from vrsave, it's 0 if we don't have or don't * use altivec. */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + vrsave = 0; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) { + vrsave = mfspr(SPRN_VRSAVE); + tsk->thread.vrsave = vrsave; + } + + err |= __put_user(vrsave, (u32 __user *)&v_regs[33]); #else /* CONFIG_ALTIVEC */ err |= __put_user(0, &sc->v_regs); #endif /* CONFIG_ALTIVEC */ - flush_fp_to_thread(current); + flush_fp_to_thread(tsk); /* copy fpr regs and fpscr */ - err |= copy_fpr_to_user(&sc->fp_regs, current); + err |= copy_fpr_to_user(&sc->fp_regs, tsk); /* * Clear the MSR VSX bit to indicate there is no valid state attached @@ -146,10 +155,10 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, * then out to userspace. Update v_regs to point after the * VMX data. */ - if (current->thread.used_vsr && ctx_has_vsx_region) { - flush_vsx_to_thread(current); + if (tsk->thread.used_vsr && ctx_has_vsx_region) { + flush_vsx_to_thread(tsk); v_regs += ELF_NVRREG; - err |= copy_vsx_to_user(v_regs, current); + err |= copy_vsx_to_user(v_regs, tsk); /* set MSR_VSX in the MSR value in the frame to * indicate that sc->vs_reg) contains valid data. */ @@ -182,7 +191,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, */ static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, - struct pt_regs *regs, + struct task_struct *tsk, int signr, sigset_t *set, unsigned long handler) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the @@ -197,9 +206,12 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc); elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc); #endif - unsigned long msr = regs->msr; + struct pt_regs *regs = tsk->thread.regs; + unsigned long msr = tsk->thread.ckpt_regs.msr; long err = 0; + BUG_ON(tsk != current); + BUG_ON(!MSR_TM_ACTIVE(regs->msr)); /* Remove TM bits from thread's MSR. The MSR in the sigcontext @@ -209,28 +221,25 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, */ regs->msr &= ~MSR_TS_MASK; - flush_fp_to_thread(current); - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); err |= __put_user(tm_v_regs, &tm_sc->v_regs); /* save altivec registers */ - if (current->thread.used_vr) { - flush_altivec_to_thread(current); + if (tsk->thread.used_vr) { /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ - err |= __copy_to_user(v_regs, ¤t->thread.vr_state, + err |= __copy_to_user(v_regs, &tsk->thread.ckvr_state, 33 * sizeof(vector128)); /* If VEC was enabled there are transactional VRs valid too, * else they're a copy of the checkpointed VRs. */ if (msr & MSR_VEC) err |= __copy_to_user(tm_v_regs, - ¤t->thread.transact_vr, + &tsk->thread.vr_state, 33 * sizeof(vector128)); else err |= __copy_to_user(tm_v_regs, - ¤t->thread.vr_state, + &tsk->thread.ckvr_state, 33 * sizeof(vector128)); /* set MSR_VEC in the MSR value in the frame to indicate @@ -242,13 +251,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, * use altivec. */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + tsk->thread.ckvrsave = mfspr(SPRN_VRSAVE); + err |= __put_user(tsk->thread.ckvrsave, (u32 __user *)&v_regs[33]); if (msr & MSR_VEC) - err |= __put_user(current->thread.transact_vrsave, + err |= __put_user(tsk->thread.vrsave, (u32 __user *)&tm_v_regs[33]); else - err |= __put_user(current->thread.vrsave, + err |= __put_user(tsk->thread.ckvrsave, (u32 __user *)&tm_v_regs[33]); #else /* CONFIG_ALTIVEC */ @@ -257,11 +266,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, #endif /* CONFIG_ALTIVEC */ /* copy fpr regs and fpscr */ - err |= copy_fpr_to_user(&sc->fp_regs, current); + err |= copy_ckfpr_to_user(&sc->fp_regs, tsk); if (msr & MSR_FP) - err |= copy_transact_fpr_to_user(&tm_sc->fp_regs, current); + err |= copy_fpr_to_user(&tm_sc->fp_regs, tsk); else - err |= copy_fpr_to_user(&tm_sc->fp_regs, current); + err |= copy_ckfpr_to_user(&tm_sc->fp_regs, tsk); #ifdef CONFIG_VSX /* @@ -269,17 +278,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, * then out to userspace. Update v_regs to point after the * VMX data. */ - if (current->thread.used_vsr) { - flush_vsx_to_thread(current); + if (tsk->thread.used_vsr) { v_regs += ELF_NVRREG; tm_v_regs += ELF_NVRREG; - err |= copy_vsx_to_user(v_regs, current); + err |= copy_ckvsx_to_user(v_regs, tsk); if (msr & MSR_VSX) - err |= copy_transact_vsx_to_user(tm_v_regs, current); + err |= copy_vsx_to_user(tm_v_regs, tsk); else - err |= copy_vsx_to_user(tm_v_regs, current); + err |= copy_ckvsx_to_user(tm_v_regs, tsk); /* set MSR_VSX in the MSR value in the frame to * indicate that sc->vs_reg) contains valid data. @@ -293,7 +301,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE); err |= __copy_to_user(&sc->gp_regs, - ¤t->thread.ckpt_regs, GP_REGS_SIZE); + &tsk->thread.ckpt_regs, GP_REGS_SIZE); err |= __put_user(msr, &tm_sc->gp_regs[PT_MSR]); err |= __put_user(msr, &sc->gp_regs[PT_MSR]); err |= __put_user(signr, &sc->signal); @@ -309,7 +317,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, * Restore the sigcontext from the signal frame. */ -static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, +static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, struct sigcontext __user *sc) { #ifdef CONFIG_ALTIVEC @@ -318,10 +326,13 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, unsigned long err = 0; unsigned long save_r13 = 0; unsigned long msr; + struct pt_regs *regs = tsk->thread.regs; #ifdef CONFIG_VSX int i; #endif + BUG_ON(tsk != current); + /* If this is not a signal return, we preserve the TLS in r13 */ if (!sig) save_r13 = regs->gpr[13]; @@ -351,7 +362,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, /* * Force reload of FP/VEC. - * This has to be done before copying stuff into current->thread.fpr/vr + * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); @@ -363,21 +374,23 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128))) return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ - if (v_regs != NULL && (msr & MSR_VEC) != 0) - err |= __copy_from_user(¤t->thread.vr_state, v_regs, + if (v_regs != NULL && (msr & MSR_VEC) != 0) { + err |= __copy_from_user(&tsk->thread.vr_state, v_regs, 33 * sizeof(vector128)); - else if (current->thread.used_vr) - memset(¤t->thread.vr_state, 0, 33 * sizeof(vector128)); + tsk->thread.used_vr = true; + } else if (tsk->thread.used_vr) { + memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128)); + } /* Always get VRSAVE back */ if (v_regs != NULL) - err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]); else - current->thread.vrsave = 0; + tsk->thread.vrsave = 0; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mtspr(SPRN_VRSAVE, current->thread.vrsave); + mtspr(SPRN_VRSAVE, tsk->thread.vrsave); #endif /* CONFIG_ALTIVEC */ /* restore floating point */ - err |= copy_fpr_from_user(current, &sc->fp_regs); + err |= copy_fpr_from_user(tsk, &sc->fp_regs); #ifdef CONFIG_VSX /* * Get additional VSX data. Update v_regs to point after the @@ -385,11 +398,13 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, * buffer for formatting, then into the taskstruct. */ v_regs += ELF_NVRREG; - if ((msr & MSR_VSX) != 0) - err |= copy_vsx_from_user(current, v_regs); - else + if ((msr & MSR_VSX) != 0) { + err |= copy_vsx_from_user(tsk, v_regs); + tsk->thread.used_vsr = true; + } else { for (i = 0; i < 32 ; i++) - current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; + tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; + } #endif return err; } @@ -399,7 +414,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, * Restore the two sigcontexts from the frame of a transactional processes. */ -static long restore_tm_sigcontexts(struct pt_regs *regs, +static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc, struct sigcontext __user *tm_sc) { @@ -408,12 +423,16 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, #endif unsigned long err = 0; unsigned long msr; + struct pt_regs *regs = tsk->thread.regs; #ifdef CONFIG_VSX int i; #endif + + BUG_ON(tsk != current); + /* copy the GPRs */ err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr)); - err |= __copy_from_user(¤t->thread.ckpt_regs, sc->gp_regs, + err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs, sizeof(regs->gpr)); /* @@ -425,7 +444,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, * we don't need to re-copy them here. */ err |= __get_user(regs->nip, &tm_sc->gp_regs[PT_NIP]); - err |= __get_user(current->thread.tm_tfhar, &sc->gp_regs[PT_NIP]); + err |= __get_user(tsk->thread.tm_tfhar, &sc->gp_regs[PT_NIP]); /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); @@ -444,13 +463,13 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, err |= __get_user(regs->link, &tm_sc->gp_regs[PT_LNK]); err |= __get_user(regs->xer, &tm_sc->gp_regs[PT_XER]); err |= __get_user(regs->ccr, &tm_sc->gp_regs[PT_CCR]); - err |= __get_user(current->thread.ckpt_regs.ctr, + err |= __get_user(tsk->thread.ckpt_regs.ctr, &sc->gp_regs[PT_CTR]); - err |= __get_user(current->thread.ckpt_regs.link, + err |= __get_user(tsk->thread.ckpt_regs.link, &sc->gp_regs[PT_LNK]); - err |= __get_user(current->thread.ckpt_regs.xer, + err |= __get_user(tsk->thread.ckpt_regs.xer, &sc->gp_regs[PT_XER]); - err |= __get_user(current->thread.ckpt_regs.ccr, + err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); /* These regs are not checkpointed; they can go in 'regs'. */ @@ -461,7 +480,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, /* * Force reload of FP/VEC. - * This has to be done before copying stuff into current->thread.fpr/vr + * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); @@ -478,32 +497,33 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) { - err |= __copy_from_user(¤t->thread.vr_state, v_regs, + err |= __copy_from_user(&tsk->thread.ckvr_state, v_regs, 33 * sizeof(vector128)); - err |= __copy_from_user(¤t->thread.transact_vr, tm_v_regs, + err |= __copy_from_user(&tsk->thread.vr_state, tm_v_regs, 33 * sizeof(vector128)); + current->thread.used_vr = true; } - else if (current->thread.used_vr) { - memset(¤t->thread.vr_state, 0, 33 * sizeof(vector128)); - memset(¤t->thread.transact_vr, 0, 33 * sizeof(vector128)); + else if (tsk->thread.used_vr) { + memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128)); + memset(&tsk->thread.ckvr_state, 0, 33 * sizeof(vector128)); } /* Always get VRSAVE back */ if (v_regs != NULL && tm_v_regs != NULL) { - err |= __get_user(current->thread.vrsave, + err |= __get_user(tsk->thread.ckvrsave, (u32 __user *)&v_regs[33]); - err |= __get_user(current->thread.transact_vrsave, + err |= __get_user(tsk->thread.vrsave, (u32 __user *)&tm_v_regs[33]); } else { - current->thread.vrsave = 0; - current->thread.transact_vrsave = 0; + tsk->thread.vrsave = 0; + tsk->thread.ckvrsave = 0; } if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mtspr(SPRN_VRSAVE, current->thread.vrsave); + mtspr(SPRN_VRSAVE, tsk->thread.vrsave); #endif /* CONFIG_ALTIVEC */ /* restore floating point */ - err |= copy_fpr_from_user(current, &sc->fp_regs); - err |= copy_transact_fpr_from_user(current, &tm_sc->fp_regs); + err |= copy_fpr_from_user(tsk, &tm_sc->fp_regs); + err |= copy_ckfpr_from_user(tsk, &sc->fp_regs); #ifdef CONFIG_VSX /* * Get additional VSX data. Update v_regs to point after the @@ -513,32 +533,31 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, if (v_regs && ((msr & MSR_VSX) != 0)) { v_regs += ELF_NVRREG; tm_v_regs += ELF_NVRREG; - err |= copy_vsx_from_user(current, v_regs); - err |= copy_transact_vsx_from_user(current, tm_v_regs); + err |= copy_vsx_from_user(tsk, tm_v_regs); + err |= copy_ckvsx_from_user(tsk, v_regs); + tsk->thread.used_vsr = true; } else { for (i = 0; i < 32 ; i++) { - current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; - current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0; + tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; + tsk->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } } #endif tm_enable(); /* Make sure the transaction is marked as failed */ - current->thread.tm_texasr |= TEXASR_FS; + tsk->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ - tm_recheckpoint(¤t->thread, msr); + tm_recheckpoint(&tsk->thread, msr); - /* This loads the speculative FP/VEC state, if used */ + msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { - do_load_up_transact_fpu(¤t->thread); - regs->msr |= (MSR_FP | current->thread.fpexc_mode); + load_fp_state(&tsk->thread.fp_state); + regs->msr |= (MSR_FP | tsk->thread.fpexc_mode); } -#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { - do_load_up_transact_altivec(¤t->thread); + load_vr_state(&tsk->thread.vr_state); regs->msr |= MSR_VEC; } -#endif return err; } @@ -589,6 +608,8 @@ int sys_swapcontext(struct ucontext __user *old_ctx, unsigned long new_msr = 0; int ctx_has_vsx_region = 0; + BUG_ON(regs != current->thread.regs); + if (new_ctx && get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR])) return -EFAULT; @@ -611,7 +632,7 @@ int sys_swapcontext(struct ucontext __user *old_ctx, if (old_ctx != NULL) { if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) - || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0, + || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0, ctx_has_vsx_region) || __copy_to_user(&old_ctx->uc_sigmask, ¤t->blocked, sizeof(sigset_t))) @@ -639,7 +660,7 @@ int sys_swapcontext(struct ucontext __user *old_ctx, if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set))) do_exit(SIGSEGV); set_current_blocked(&set); - if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext)) + if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) do_exit(SIGSEGV); /* This returns like rt_sigreturn */ @@ -662,6 +683,8 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long msr; #endif + BUG_ON(current->thread.regs != regs); + /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -671,7 +694,21 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * If there is a transactional state then throw it away. + * The purpose of a sigreturn is to destroy all traces of the + * signal frame, this includes any transactional state created + * within in. We only check for suspended as we can never be + * active in the kernel, we are active, there is nothing better to + * do than go ahead and Bad Thing later. + * The cause is not important as there will never be a + * recheckpoint so it's not user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; if (MSR_TM_ACTIVE(msr)) { @@ -679,14 +716,14 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, struct ucontext __user *uc_transact; if (__get_user(uc_transact, &uc->uc_link)) goto badframe; - if (restore_tm_sigcontexts(regs, &uc->uc_mcontext, + if (restore_tm_sigcontexts(current, &uc->uc_mcontext, &uc_transact->uc_mcontext)) goto badframe; } else /* Fall through, for non-TM restore */ #endif - if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext)) + if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) goto badframe; if (restore_altstack(&uc->uc_stack)) @@ -705,13 +742,17 @@ badframe: return 0; } -int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) +int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, + struct task_struct *tsk) { struct rt_sigframe __user *frame; unsigned long newsp = 0; long err = 0; + struct pt_regs *regs = tsk->thread.regs; + + BUG_ON(tsk != current); - frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 0); + frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0); if (unlikely(frame == NULL)) goto badframe; @@ -732,14 +773,13 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs err |= __put_user(&frame->uc_transact, &frame->uc.uc_link); err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, - regs, ksig->sig, - NULL, + tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler); } else #endif { err |= __put_user(0, &frame->uc.uc_link); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, ksig->sig, + err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler, 1); } @@ -748,11 +788,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs goto badframe; /* Make sure signal handler doesn't get spurious FP exceptions */ - current->thread.fp_state.fpscr = 0; + tsk->thread.fp_state.fpscr = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { - regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp; + if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { + regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) @@ -802,7 +842,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs badframe: if (show_unhandled_signals) printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, "setup_rt_frame", + tsk->comm, tsk->pid, "setup_rt_frame", (long)frame, regs->nip, regs->link); return 1; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 55c924b65f71..9c6f3fd58059 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -31,6 +31,7 @@ #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/topology.h> +#include <linux/profile.h> #include <asm/ptrace.h> #include <linux/atomic.h> @@ -53,6 +54,8 @@ #include <asm/vdso.h> #include <asm/debug.h> #include <asm/kexec.h> +#include <asm/asm-prototypes.h> +#include <asm/cpu_has_feature.h> #ifdef DEBUG #include <asm/udbg.h> @@ -593,6 +596,7 @@ out: of_node_put(np); return id; } +EXPORT_SYMBOL_GPL(cpu_to_core_id); /* Helper routines for cpu to core mapping */ int cpu_core_index_of_thread(int cpu) @@ -826,7 +830,7 @@ int __cpu_disable(void) /* Update sibling maps */ base = cpu_first_thread_sibling(cpu); - for (i = 0; i < threads_per_core; i++) { + for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) { cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 5fa92706444b..644cce3d8dce 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -40,6 +40,7 @@ #include <asm/syscalls.h> #include <asm/time.h> #include <asm/unistd.h> +#include <asm/asm-prototypes.h> static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 692873bff334..c4f1d1f7bae0 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -35,7 +35,7 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); #ifdef CONFIG_PPC64 /* Time in microseconds we delay before sleeping in the idle loop */ -DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; +static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; static ssize_t store_smt_snooze_delay(struct device *dev, struct device_attribute *attr, diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 3ed9a5a21d77..bc3f7d0d7b79 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -56,6 +56,7 @@ #include <linux/irq_work.h> #include <linux/clk-provider.h> #include <linux/suspend.h> +#include <linux/rtc.h> #include <asm/trace.h> #include <asm/io.h> @@ -72,6 +73,7 @@ #include <asm/vdso_datapage.h> #include <asm/firmware.h> #include <asm/cputime.h> +#include <asm/asm-prototypes.h> /* powerpc clocksource/clockevent code */ @@ -96,7 +98,8 @@ static struct clocksource clocksource_timebase = { .read = timebase_read, }; -#define DECREMENTER_MAX 0x7fffffff +#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF +u64 decrementer_max = DECREMENTER_DEFAULT_MAX; static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev); @@ -166,7 +169,15 @@ DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); cputime_t cputime_one_jiffy; +#ifdef CONFIG_PPC_SPLPAR void (*dtl_consumer)(struct dtl_entry *, u64); +#endif + +#ifdef CONFIG_PPC64 +#define get_accounting(tsk) (&get_paca()->accounting) +#else +#define get_accounting(tsk) (&task_thread_info(tsk)->accounting) +#endif static void calc_cputime_factors(void) { @@ -186,7 +197,7 @@ static void calc_cputime_factors(void) * Read the SPURR on systems that have it, otherwise the PURR, * or if that doesn't exist return the timebase value passed in. */ -static u64 read_spurr(u64 tb) +static unsigned long read_spurr(unsigned long tb) { if (cpu_has_feature(CPU_FTR_SPURR)) return mfspr(SPRN_SPURR); @@ -249,8 +260,8 @@ static u64 scan_dispatch_log(u64 stop_tb) void accumulate_stolen_time(void) { u64 sst, ust; - u8 save_soft_enabled = local_paca->soft_enabled; + struct cpu_accounting_data *acct = &local_paca->accounting; /* We are called early in the exception entry, before * soft/hard_enabled are sync'ed to the expected state @@ -260,10 +271,10 @@ void accumulate_stolen_time(void) */ local_paca->soft_enabled = 0; - sst = scan_dispatch_log(local_paca->starttime_user); - ust = scan_dispatch_log(local_paca->starttime); - local_paca->system_time -= sst; - local_paca->user_time -= ust; + sst = scan_dispatch_log(acct->starttime_user); + ust = scan_dispatch_log(acct->starttime); + acct->system_time -= sst; + acct->user_time -= ust; local_paca->stolen_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; @@ -275,7 +286,7 @@ static inline u64 calculate_stolen_time(u64 stop_tb) if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) { stolen = scan_dispatch_log(stop_tb); - get_paca()->system_time -= stolen; + get_paca()->accounting.system_time -= stolen; } stolen += get_paca()->stolen_time; @@ -295,27 +306,29 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * Account time for a transition between system, hard irq * or soft irq state. */ -static u64 vtime_delta(struct task_struct *tsk, - u64 *sys_scaled, u64 *stolen) +static unsigned long vtime_delta(struct task_struct *tsk, + unsigned long *sys_scaled, + unsigned long *stolen) { - u64 now, nowscaled, deltascaled; - u64 udelta, delta, user_scaled; + unsigned long now, nowscaled, deltascaled; + unsigned long udelta, delta, user_scaled; + struct cpu_accounting_data *acct = get_accounting(tsk); WARN_ON_ONCE(!irqs_disabled()); now = mftb(); nowscaled = read_spurr(now); - get_paca()->system_time += now - get_paca()->starttime; - get_paca()->starttime = now; - deltascaled = nowscaled - get_paca()->startspurr; - get_paca()->startspurr = nowscaled; + acct->system_time += now - acct->starttime; + acct->starttime = now; + deltascaled = nowscaled - acct->startspurr; + acct->startspurr = nowscaled; *stolen = calculate_stolen_time(now); - delta = get_paca()->system_time; - get_paca()->system_time = 0; - udelta = get_paca()->user_time - get_paca()->utime_sspurr; - get_paca()->utime_sspurr = get_paca()->user_time; + delta = acct->system_time; + acct->system_time = 0; + udelta = acct->user_time - acct->utime_sspurr; + acct->utime_sspurr = acct->user_time; /* * Because we don't read the SPURR on every kernel entry/exit, @@ -337,14 +350,14 @@ static u64 vtime_delta(struct task_struct *tsk, *sys_scaled = deltascaled; } } - get_paca()->user_time_scaled += user_scaled; + acct->user_time_scaled += user_scaled; return delta; } void vtime_account_system(struct task_struct *tsk) { - u64 delta, sys_scaled, stolen; + unsigned long delta, sys_scaled, stolen; delta = vtime_delta(tsk, &sys_scaled, &stolen); account_system_time(tsk, 0, delta, sys_scaled); @@ -355,7 +368,7 @@ EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { - u64 delta, sys_scaled, stolen; + unsigned long delta, sys_scaled, stolen; delta = vtime_delta(tsk, &sys_scaled, &stolen); account_idle_time(delta + stolen); @@ -373,15 +386,32 @@ void vtime_account_idle(struct task_struct *tsk) void vtime_account_user(struct task_struct *tsk) { cputime_t utime, utimescaled; + struct cpu_accounting_data *acct = get_accounting(tsk); - utime = get_paca()->user_time; - utimescaled = get_paca()->user_time_scaled; - get_paca()->user_time = 0; - get_paca()->user_time_scaled = 0; - get_paca()->utime_sspurr = 0; + utime = acct->user_time; + utimescaled = acct->user_time_scaled; + acct->user_time = 0; + acct->user_time_scaled = 0; + acct->utime_sspurr = 0; account_user_time(tsk, utime, utimescaled); } +#ifdef CONFIG_PPC32 +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void arch_vtime_task_switch(struct task_struct *prev) +{ + struct cpu_accounting_data *acct = get_accounting(current); + + acct->starttime = get_accounting(prev)->starttime; + acct->system_time = 0; + acct->user_time = 0; +} +#endif /* CONFIG_PPC32 */ + #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #define calc_cputime_factors() #endif @@ -504,8 +534,8 @@ static void __timer_interrupt(void) __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; - if (now <= DECREMENTER_MAX) - set_dec((int)now); + if (now <= decrementer_max) + set_dec(now); /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); @@ -535,7 +565,7 @@ void timer_interrupt(struct pt_regs * regs) /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. */ - set_dec(DECREMENTER_MAX); + set_dec(decrementer_max); /* Some implementations of hotplug will get timer interrupts while * offline, just ignore these and we also need to set @@ -566,6 +596,7 @@ void timer_interrupt(struct pt_regs * regs) irq_exit(); set_irq_regs(old_regs); } +EXPORT_SYMBOL(timer_interrupt); /* * Hypervisor decrementer interrupts shouldn't occur but are sometimes @@ -583,9 +614,9 @@ static void generic_suspend_disable_irqs(void) * with suspending. */ - set_dec(DECREMENTER_MAX); + set_dec(decrementer_max); local_irq_disable(); - set_dec(DECREMENTER_MAX); + set_dec(decrementer_max); } static void generic_suspend_enable_irqs(void) @@ -866,7 +897,7 @@ static int decrementer_set_next_event(unsigned long evt, static int decrementer_shutdown(struct clock_event_device *dev) { - decrementer_set_next_event(DECREMENTER_MAX, dev); + decrementer_set_next_event(decrementer_max, dev); return 0; } @@ -892,6 +923,49 @@ static void register_decrementer_clockevent(int cpu) clockevents_register_device(dec); } +static void enable_large_decrementer(void) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + if (decrementer_max <= DECREMENTER_DEFAULT_MAX) + return; + + /* + * If we're running as the hypervisor we need to enable the LD manually + * otherwise firmware should have done it for us. + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD); +} + +static void __init set_decrementer_max(void) +{ + struct device_node *cpu; + u32 bits = 32; + + /* Prior to ISAv3 the decrementer is always 32 bit */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + cpu = of_find_node_by_type(NULL, "cpu"); + + if (of_property_read_u32(cpu, "ibm,dec-bits", &bits) == 0) { + if (bits > 64 || bits < 32) { + pr_warn("time_init: firmware supplied invalid ibm,dec-bits"); + bits = 32; + } + + /* calculate the signed maximum given this many bits */ + decrementer_max = (1ul << (bits - 1)) - 1; + } + + of_node_put(cpu); + + pr_info("time_init: %u bit decrementer (max: %llx)\n", + bits, decrementer_max); +} + static void __init init_decrementer_clockevent(void) { int cpu = smp_processor_id(); @@ -899,7 +973,7 @@ static void __init init_decrementer_clockevent(void) clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); decrementer_clockevent.max_delta_ns = - clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); + clockevent_delta2ns(decrementer_max, &decrementer_clockevent); decrementer_clockevent.min_delta_ns = clockevent_delta2ns(2, &decrementer_clockevent); @@ -908,6 +982,9 @@ static void __init init_decrementer_clockevent(void) void secondary_cpu_time_init(void) { + /* Enable and test the large decrementer for this cpu */ + enable_large_decrementer(); + /* Start the decrementer on CPUs that have manual control * such as BookE */ @@ -973,6 +1050,10 @@ void __init time_init(void) vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + /* initialise and enable the large decrementer (if we have one) */ + set_decrementer_max(); + enable_large_decrementer(); + /* Start the decrementer on CPUs that have manual control * such as BookE */ @@ -1081,6 +1162,29 @@ void calibrate_delay(void) loops_per_jiffy = tb_ticks_per_jiffy; } +#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC) +static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) +{ + ppc_md.get_rtc_time(tm); + return rtc_valid_tm(tm); +} + +static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) +{ + if (!ppc_md.set_rtc_time) + return -EOPNOTSUPP; + + if (ppc_md.set_rtc_time(tm) < 0) + return -EOPNOTSUPP; + + return 0; +} + +static const struct rtc_class_ops rtc_generic_ops = { + .read_time = rtc_generic_get_time, + .set_time = rtc_generic_set_time, +}; + static int __init rtc_init(void) { struct platform_device *pdev; @@ -1088,9 +1192,12 @@ static int __init rtc_init(void) if (!ppc_md.get_rtc_time) return -ENODEV; - pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); + pdev = platform_device_register_data(NULL, "rtc-generic", -1, + &rtc_generic_ops, + sizeof(rtc_generic_ops)); return PTR_ERR_OR_ZERO(pdev); } device_initcall(rtc_init); +#endif diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index bf8f34a58670..3a2d04134da9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -108,19 +108,14 @@ _GLOBAL(tm_reclaim) /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ std r3, STK_PARAM(R3)(r1) + std r4, STK_PARAM(R4)(r1) SAVE_NVGPRS(r1) - /* We need to setup MSR for VSX register save instructions. Here we - * also clear the MSR RI since when we do the treclaim, we won't have a - * valid kernel pointer for a while. We clear RI here as it avoids - * adding another mtmsr closer to the treclaim. This makes the region - * maked as non-recoverable wider than it needs to be but it saves on - * inserting another mtmsrd later. - */ + /* We need to setup MSR for VSX register save instructions. */ mfmsr r14 mr r15, r14 ori r15, r15, MSR_FP - li r16, MSR_RI + li r16, 0 ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h @@ -132,43 +127,6 @@ _GLOBAL(tm_reclaim) mtmsrd r15 std r14, TM_FRAME_L0(r1) - /* Stash the stack pointer away for use after reclaim */ - std r1, PACAR1(r13) - - /* ******************** FPR/VR/VSRs ************ - * Before reclaiming, capture the current/transactional FPR/VR - * versions /if used/. - * - * (If VSX used, FP and VMX are implied. Or, we don't need to look - * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.) - * - * We're passed the thread's MSR as parameter 2. - * - * We enabled VEC/FP/VSX in the msr above, so we can execute these - * instructions! - */ - andis. r0, r4, MSR_VEC@h - beq dont_backup_vec - - addi r7, r3, THREAD_TRANSACT_VRSTATE - SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */ - mfvscr v0 - li r6, VRSTATE_VSCR - stvx v0, r7, r6 -dont_backup_vec: - mfspr r0, SPRN_VRSAVE - std r0, THREAD_TRANSACT_VRSAVE(r3) - - andi. r0, r4, MSR_FP - beq dont_backup_fp - - addi r7, r3, THREAD_TRANSACT_FPSTATE - SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */ - - mffs fr0 - stfd fr0,FPSTATE_FPSCR(r7) - -dont_backup_fp: /* Do sanity check on MSR to make sure we are suspended */ li r7, (MSR_TS_S)@higher srdi r6, r14, 32 @@ -176,7 +134,20 @@ dont_backup_fp: 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 - /* The moment we treclaim, ALL of our GPRs will switch + /* Stash the stack pointer away for use after reclaim */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to change r1, EE is already off. */ + li r4, 0 + mtmsrd r4, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + * + * The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. */ @@ -197,6 +168,11 @@ dont_backup_fp: /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + + /* Reset MSR RI so we can take SLB faults again */ + li r11, MSR_RI + mtmsrd r11, 1 + mfspr r11, SPRN_PPR HMT_MEDIUM @@ -264,6 +240,43 @@ dont_backup_fp: * MSR. */ + + /* ******************** FPR/VR/VSRs ************ + * After reclaiming, capture the checkpointed FPRs/VRs /if used/. + * + * (If VSX used, FP and VMX are implied. Or, we don't need to look + * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.) + * + * We're passed the thread's MSR as the second parameter + * + * We enabled VEC/FP/VSX in the msr above, so we can execute these + * instructions! + */ + ld r4, STK_PARAM(R4)(r1) /* Second parameter, MSR * */ + mr r3, r12 + andis. r0, r4, MSR_VEC@h + beq dont_backup_vec + + addi r7, r3, THREAD_CKVRSTATE + SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */ + mfvscr v0 + li r6, VRSTATE_VSCR + stvx v0, r7, r6 +dont_backup_vec: + mfspr r0, SPRN_VRSAVE + std r0, THREAD_CKVRSAVE(r3) + + andi. r0, r4, MSR_FP + beq dont_backup_fp + + addi r7, r3, THREAD_CKFPSTATE + SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */ + + mffs fr0 + stfd fr0,FPSTATE_FPSCR(r7) + +dont_backup_fp: + /* TM regs, incl TEXASR -- these live in thread_struct. Note they've * been updated by the treclaim, to explain to userland the failure * cause (aborted). @@ -279,6 +292,7 @@ dont_backup_fp: /* Restore original MSR/IRQ state & clear TM mode */ ld r14, TM_FRAME_L0(r1) /* Orig MSR */ + li r15, 0 rldimi r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1 mtmsrd r14 @@ -329,8 +343,6 @@ _GLOBAL(__tm_recheckpoint) */ subi r7, r7, STACK_FRAME_OVERHEAD - SET_SCRATCH0(r1) - mfmsr r6 /* R4 = original MSR to indicate whether thread used FP/Vector etc. */ @@ -349,28 +361,29 @@ _GLOBAL(__tm_recheckpoint) mtmsr r5 #ifdef CONFIG_ALTIVEC - /* FP and VEC registers: These are recheckpointed from thread.fpr[] - * and thread.vr[] respectively. The thread.transact_fpr[] version - * is more modern, and will be loaded subsequently by any FPUnavailable - * trap. + /* + * FP and VEC registers: These are recheckpointed from + * thread.ckfp_state and thread.ckvr_state respectively. The + * thread.fp_state[] version holds the 'live' (transactional) + * and will be loaded subsequently by any FPUnavailable trap. */ andis. r0, r4, MSR_VEC@h beq dont_restore_vec - addi r8, r3, THREAD_VRSTATE + addi r8, r3, THREAD_CKVRSTATE li r5, VRSTATE_VSCR lvx v0, r8, r5 mtvscr v0 REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */ dont_restore_vec: - ld r5, THREAD_VRSAVE(r3) + ld r5, THREAD_CKVRSAVE(r3) mtspr SPRN_VRSAVE, r5 #endif andi. r0, r4, MSR_FP beq dont_restore_fp - addi r8, r3, THREAD_FPSTATE + addi r8, r3, THREAD_CKFPSTATE lfd fr0, FPSTATE_FPSCR(r8) MTFSF_L(fr0) REST_32FPRS_VSRS(0, R4, R8) @@ -397,11 +410,6 @@ restore_gprs: ld r5, THREAD_TM_DSCR(r3) ld r6, THREAD_TM_PPR(r3) - /* Clear the MSR RI since we are about to change R1. EE is already off - */ - li r4, 0 - mtmsrd r4, 1 - REST_GPR(0, r7) /* GPR0 */ REST_2GPRS(2, r7) /* GPR2-3 */ REST_GPR(4, r7) /* GPR4 */ @@ -439,10 +447,34 @@ restore_gprs: ld r6, _CCR(r7) mtcr r6 - REST_GPR(1, r7) /* GPR1 */ - REST_GPR(5, r7) /* GPR5-7 */ REST_GPR(6, r7) - ld r7, GPR7(r7) + + /* + * Store r1 and r5 on the stack so that we can access them + * after we clear MSR RI. + */ + + REST_GPR(5, r7) + std r5, -8(r1) + ld r5, GPR1(r7) + std r5, -16(r1) + + REST_GPR(7, r7) + + /* Clear MSR RI since we are about to change r1. EE is already off */ + li r5, 0 + mtmsrd r5, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + */ + + SET_SCRATCH0(r1) + ld r5, -8(r1) + ld r1, -16(r1) /* Commit register state as checkpointed state: */ TRECHKPT diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9229ba63c370..023a462725b5 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -25,7 +25,8 @@ #include <linux/user.h> #include <linux/interrupt.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/extable.h> +#include <linux/module.h> /* print_modules */ #include <linux/prctl.h> #include <linux/delay.h> #include <linux/kprobes.h> @@ -60,6 +61,8 @@ #include <asm/switch_to.h> #include <asm/tm.h> #include <asm/debug.h> +#include <asm/asm-prototypes.h> +#include <asm/hmi.h> #include <sysdev/fsl_pci.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) @@ -114,7 +117,7 @@ static int die_owner = -1; static unsigned int die_nest_count; static int die_counter; -static unsigned __kprobes long oops_begin(struct pt_regs *regs) +static unsigned long oops_begin(struct pt_regs *regs) { int cpu; unsigned long flags; @@ -141,8 +144,9 @@ static unsigned __kprobes long oops_begin(struct pt_regs *regs) pmac_backlight_unblank(); return flags; } +NOKPROBE_SYMBOL(oops_begin); -static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, +static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { bust_spinlocks(0); @@ -193,8 +197,9 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, panic("Fatal exception"); do_exit(signr); } +NOKPROBE_SYMBOL(oops_end); -static int __kprobes __die(const char *str, struct pt_regs *regs, long err) +static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); #ifdef CONFIG_PREEMPT @@ -218,6 +223,7 @@ static int __kprobes __die(const char *str, struct pt_regs *regs, long err) return 0; } +NOKPROBE_SYMBOL(__die); void die(const char *str, struct pt_regs *regs, long err) { @@ -267,7 +273,6 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) force_sig_info(signr, &info, current); } -#ifdef CONFIG_PPC64 void system_reset_exception(struct pt_regs *regs) { /* See if any machine dependent calls */ @@ -285,6 +290,7 @@ void system_reset_exception(struct pt_regs *regs) /* What should we do here? We could issue a shutdown or hard reset. */ } +#ifdef CONFIG_PPC64 /* * This function is called in real mode. Strictly no printk's please. * @@ -307,9 +313,13 @@ long hmi_exception_realmode(struct pt_regs *regs) { __this_cpu_inc(irq_stat.hmi_exceptions); + wait_for_subcore_guest_exit(); + if (ppc_md.hmi_exception_early) ppc_md.hmi_exception_early(regs); + wait_for_tb_resync(); + return 0; } @@ -342,12 +352,11 @@ static inline int check_io_access(struct pt_regs *regs) * For the debug message, we look at the preceding * load or store. */ - if (*nip == 0x60000000) /* nop */ + if (*nip == PPC_INST_NOP) nip -= 2; - else if (*nip == 0x4c00012c) /* isync */ + else if (*nip == PPC_INST_ISYNC) --nip; - if (*nip == 0x7c0004ac || (*nip >> 26) == 3) { - /* sync or twi */ + if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) { unsigned int rb; --nip; @@ -658,6 +667,31 @@ int machine_check_e200(struct pt_regs *regs) return 0; } +#elif defined(CONFIG_PPC_8xx) +int machine_check_8xx(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + + pr_err("Machine check in kernel mode.\n"); + pr_err("Caused by (from SRR1=%lx): ", reason); + if (reason & 0x40000000) + pr_err("Fetch error at address %lx\n", regs->nip); + else + pr_err("Data access error at address %lx\n", regs->dar); + +#ifdef CONFIG_PCI + /* the qspan pci read routines can cause machine checks -- Cort + * + * yuck !!! that totally needs to go away ! There are better ways + * to deal with that than having a wart in the mcheck handler. + * -- BenH + */ + bad_page_fault(regs, regs->dar, SIGBUS); + return 1; +#else + return 0; +#endif +} #else int machine_check_generic(struct pt_regs *regs) { @@ -717,17 +751,6 @@ void machine_check_exception(struct pt_regs *regs) if (recover > 0) goto bail; -#if defined(CONFIG_8xx) && defined(CONFIG_PCI) - /* the qspan pci read routines can cause machine checks -- Cort - * - * yuck !!! that totally needs to go away ! There are better ways - * to deal with that than having a wart in the mcheck handler. - * -- BenH - */ - bad_page_fault(regs, regs->dar, SIGBUS); - goto bail; -#endif - if (debugger_fault_handler(regs)) goto bail; @@ -795,7 +818,7 @@ void RunModeException(struct pt_regs *regs) _exception(SIGTRAP, regs, 0, 0); } -void __kprobes single_step_exception(struct pt_regs *regs) +void single_step_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); @@ -812,6 +835,7 @@ void __kprobes single_step_exception(struct pt_regs *regs) bail: exception_exit(prev_state); } +NOKPROBE_SYMBOL(single_step_exception); /* * After we have successfully emulated an instruction, we have to @@ -1133,7 +1157,7 @@ static int emulate_math(struct pt_regs *regs) static inline int emulate_math(struct pt_regs *regs) { return -1; } #endif -void __kprobes program_check_exception(struct pt_regs *regs) +void program_check_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); unsigned int reason = get_reason(regs); @@ -1253,16 +1277,18 @@ sigill: bail: exception_exit(prev_state); } +NOKPROBE_SYMBOL(program_check_exception); /* * This occurs when running in hypervisor mode on POWER6 or later * and an illegal instruction is encountered. */ -void __kprobes emulation_assist_interrupt(struct pt_regs *regs) +void emulation_assist_interrupt(struct pt_regs *regs) { regs->msr |= REASON_ILLEGAL; program_check_exception(regs); } +NOKPROBE_SYMBOL(emulation_assist_interrupt); void alignment_exception(struct pt_regs *regs) { @@ -1303,6 +1329,18 @@ bail: exception_exit(prev_state); } +void slb_miss_bad_addr(struct pt_regs *regs) +{ + enum ctx_state prev_state = exception_enter(); + + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); + else + bad_page_fault(regs, regs->dar, SIGSEGV); + + exception_exit(prev_state); +} + void StackOverflow(struct pt_regs *regs) { printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", @@ -1365,6 +1403,22 @@ void vsx_unavailable_exception(struct pt_regs *regs) } #ifdef CONFIG_PPC64 +static void tm_unavailable(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (user_mode(regs)) { + current->thread.load_tm++; + regs->msr |= MSR_TM; + tm_enable(); + tm_restore_sprs(¤t->thread); + return; + } +#endif + pr_emerg("Unrecoverable TM Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable TM Unavailable Exception", regs, SIGABRT); +} + void facility_unavailable_exception(struct pt_regs *regs) { static char *facility_strings[] = { @@ -1376,6 +1430,7 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", + [FSCR_LM_LG] = "LM", }; char *facility = "unknown"; u64 value; @@ -1418,7 +1473,8 @@ void facility_unavailable_exception(struct pt_regs *regs) rd = (instword >> 21) & 0x1f; current->thread.dscr = regs->gpr[rd]; current->thread.dscr_inherit = 1; - mtspr(SPRN_FSCR, value | FSCR_DSCR); + current->thread.fscr |= FSCR_DSCR; + mtspr(SPRN_FSCR, current->thread.fscr); } /* Read from DSCR (mfspr RT, 0x03) */ @@ -1432,6 +1488,35 @@ void facility_unavailable_exception(struct pt_regs *regs) emulate_single_step(regs); } return; + } else if ((status == FSCR_LM_LG) && cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * This process has touched LM, so turn it on forever + * for this process + */ + current->thread.fscr |= FSCR_LM; + mtspr(SPRN_FSCR, current->thread.fscr); + return; + } + + if (status == FSCR_TM_LG) { + /* + * If we're here then the hardware is TM aware because it + * generated an exception with FSRM_TM set. + * + * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware + * told us not to do TM, or the kernel is not built with TM + * support. + * + * If both of those things are true, then userspace can spam the + * console by triggering the printk() below just by continually + * doing tbegin (or any TM instruction). So in that case just + * send the process a SIGILL immediately. + */ + if (!cpu_has_feature(CPU_FTR_TM)) + goto out; + + tm_unavailable(regs); + return; } if ((status < ARRAY_SIZE(facility_strings)) && @@ -1446,6 +1531,7 @@ void facility_unavailable_exception(struct pt_regs *regs) "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); +out: if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return; @@ -1487,7 +1573,8 @@ void fp_unavailable_tm(struct pt_regs *regs) /* If VMX is in use, get the transactional values back */ if (regs->msr & MSR_VEC) { - do_load_up_transact_altivec(¤t->thread); + msr_check_and_set(MSR_VEC); + load_vr_state(¤t->thread.vr_state); /* At this point all the VSX state is loaded, so enable it */ regs->msr |= MSR_VSX; } @@ -1508,7 +1595,8 @@ void altivec_unavailable_tm(struct pt_regs *regs) current->thread.used_vr = 1; if (regs->msr & MSR_FP) { - do_load_up_transact_fpu(¤t->thread); + msr_check_and_set(MSR_FP); + load_fp_state(¤t->thread.fp_state); regs->msr |= MSR_VSX; } } @@ -1547,10 +1635,12 @@ void vsx_unavailable_tm(struct pt_regs *regs) */ tm_recheckpoint(¤t->thread, regs->msr & ~orig_msr); + msr_check_and_set(orig_msr & (MSR_FP | MSR_VEC)); + if (orig_msr & MSR_FP) - do_load_up_transact_fpu(¤t->thread); + load_fp_state(¤t->thread.fp_state); if (orig_msr & MSR_VEC) - do_load_up_transact_altivec(¤t->thread); + load_vr_state(¤t->thread.vr_state); } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -1639,7 +1729,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) mtspr(SPRN_DBCR0, current->thread.debug.dbcr0); } -void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) +void DebugException(struct pt_regs *regs, unsigned long debug_status) { current->thread.debug.dbsr = debug_status; @@ -1700,6 +1790,7 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) } else handle_debug(regs, debug_status); } +NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #if !defined(CONFIG_TAU_INT) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6767605ea8da..4111d30badfa 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -22,6 +22,7 @@ #include <linux/security.h> #include <linux/memblock.h> +#include <asm/cpu_has_feature.h> #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mmu.h> diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index cbabd143acae..78a7449bf489 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -30,7 +30,7 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE $(call if_changed,vdso32ld) # strip rule for the .so file @@ -39,12 +39,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S +$(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index c710802b8fb6..31107bf5a61f 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -23,7 +23,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE $(call if_changed,vdso64ld) # strip rule for the .so file @@ -31,15 +31,9 @@ $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) -# assembly rules for the .S files -$(obj-vdso64): %.o: %.S - $(call if_changed_dep,vdso64as) - # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ -quiet_cmd_vdso64as = VDSO64A $@ - cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index 184a6ba7f283..abf17feffe40 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -59,7 +59,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) bl V_LOCAL_FUNC(__get_datapage) mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 - cmpli cr0,r4,0 + cmpldi cr0,r4,0 crclr cr0*4+so beqlr li r0,NR_syscalls diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index a76b4af37ef2..382021324883 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -145,7 +145,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) bne cr0,99f li r3,0 - cmpli cr0,r4,0 + cmpldi cr0,r4,0 crclr cr0*4+so beqlr lis r5,CLOCK_REALTIME_RES@h diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 1c2e7a343bf5..0c123f3406cd 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -6,31 +6,7 @@ #include <asm/thread_info.h> #include <asm/page.h> #include <asm/ptrace.h> - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* void do_load_up_transact_altivec(struct thread_struct *thread) - * - * This is similar to load_up_altivec but for the transactional version of the - * vector regs. It doesn't mess with the task MSR or valid flags. - * Furthermore, VEC laziness is not supported with TM currently. - */ -_GLOBAL(do_load_up_transact_altivec) - mfmsr r6 - oris r5,r6,MSR_VEC@h - MTMSRD(r5) - isync - - li r4,1 - stw r4,THREAD_USED_VR(r3) - - li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR - lvx v0,r10,r3 - mtvscr v0 - addi r10,r3,THREAD_TRANSACT_VRSTATE - REST_32VRS(0,r4,r10) - - blr -#endif +#include <asm/export.h> /* * Load state from memory into VMX registers including VSCR. @@ -42,6 +18,7 @@ _GLOBAL(load_vr_state) mtvscr v0 REST_32VRS(0,r4,r3) blr +EXPORT_SYMBOL(load_vr_state) /* * Store VMX state into memory, including VSCR. @@ -53,6 +30,7 @@ _GLOBAL(store_vr_state) li r4, VRSTATE_VSCR stvx v0, r4, r3 blr +EXPORT_SYMBOL(store_vr_state) /* * Disable VMX for the task which had it previously, @@ -70,10 +48,11 @@ _GLOBAL(load_up_altivec) MTMSRD(r5) /* enable use of AltiVec now */ isync - /* Hack: if we get an altivec unavailable trap with VRSAVE - * set to all zeros, we assume this is a broken application - * that fails to set it properly, and thus we switch it to - * all 1's + /* + * While userspace in general ignores VRSAVE, glibc uses it as a boolean + * to optimise userspace context save/restore. Whenever we take an + * altivec unavailable exception we must set VRSAVE to something non + * zero. Set it to all 1s. See also the programming note in the ISA. */ mfspr r4,SPRN_VRSAVE cmpwi 0,r4,0 diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 8d7358f3a273..b3813ddb2fb4 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -482,7 +482,7 @@ static void vio_cmo_balance(struct work_struct *work) static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); void *ret; @@ -503,7 +503,7 @@ static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); @@ -515,7 +515,7 @@ static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; @@ -539,7 +539,7 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; @@ -552,7 +552,7 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; @@ -588,7 +588,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, static void vio_dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 2dd91f79de05..8295f51c1a5f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -44,18 +44,68 @@ SECTIONS * Text, read only data and other permanent read-only sections */ - /* Text and gots */ + _text = .; + _stext = .; + + /* + * Head text. + * This needs to be in its own output section to avoid ld placing + * branch trampoline stubs randomly throughout the fixed sections, + * which it will do (even if the branch comes from another section) + * in order to optimize stub generation. + */ + .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { +#ifdef CONFIG_PPC64 + KEEP(*(.head.text.first_256B)); +#ifdef CONFIG_PPC_BOOK3E +# define END_FIXED 0x100 +#else + KEEP(*(.head.text.real_vectors)); + *(.head.text.real_trampolines); + KEEP(*(.head.text.virt_vectors)); + *(.head.text.virt_trampolines); +# if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) + KEEP(*(.head.data.fwnmi_page)); +# define END_FIXED 0x8000 +# else +# define END_FIXED 0x7000 +# endif +#endif + ASSERT((. == END_FIXED), "vmlinux.lds.S: fixed section overflow error"); +#else /* !CONFIG_PPC64 */ + HEAD_TEXT +#endif + } :kernel + + /* + * If the build dies here, it's likely code in head_64.S is referencing + * labels it can't reach, and the linker inserting stubs without the + * assembler's knowledge. To debug, remove the above assert and + * rebuild. Look for branch stubs in the fixed section region. + * + * Linker stub generation could be allowed in "trampoline" + * sections if absolutely necessary, but this would require + * some rework of the fixed sections. Before resorting to this, + * consider references that have sufficient addressing range, + * (e.g., hand coded trampolines) so the linker does not have + * to add stubs. + * + * Linker stubs at the top of the main text section are currently not + * detected, and will result in a crash at boot due to offsets being + * wrong. + */ .text : AT(ADDR(.text) - LOAD_OFFSET) { ALIGN_FUNCTION(); - HEAD_TEXT - _text = .; /* careful! __ftr_alt_* sections need to be close to .text */ *(.text .fixup __ftr_alt_* .ref.text) SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT + MEM_KEEP(init.text) + MEM_KEEP(exit.text) #ifdef CONFIG_PPC32 *(.got1) @@ -165,7 +215,7 @@ SECTIONS . = ALIGN(8); .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { -#ifdef CONFIG_RELOCATABLE_PPC32 +#ifdef CONFIG_PPC32 __dynamic_symtab = .; #endif *(.dynsym) |