diff options
Diffstat (limited to 'arch/powerpc/kernel')
39 files changed, 2090 insertions, 1296 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 811f441a125f..b9db46ae545b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -25,8 +25,6 @@ CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) -# do not trace tracer code -CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) # timers used by tracing CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) endif @@ -97,6 +95,7 @@ obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o +obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -118,10 +117,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o -obj-$(CONFIG_TRACING) += trace_clock.o +obj-y += trace/ ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) obj-y += iomap.o @@ -142,14 +138,14 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o # Disable GCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n -GCOV_PROFILE_ftrace.o := n -UBSAN_SANITIZE_ftrace.o := n GCOV_PROFILE_machine_kexec_64.o := n UBSAN_SANITIZE_machine_kexec_64.o := n GCOV_PROFILE_machine_kexec_32.o := n UBSAN_SANITIZE_machine_kexec_32.o := n GCOV_PROFILE_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n +GCOV_PROFILE_kprobes-ftrace.o := n +UBSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_vdso.o := n extra-$(CONFIG_PPC_FPU) += fpu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4367e7df51a1..439c257dec4a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -185,6 +185,7 @@ int main(void) #ifdef CONFIG_PPC_MM_SLICES OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); + DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit)); DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); #endif /* CONFIG_PPC_MM_SLICES */ #endif @@ -219,6 +220,7 @@ int main(void) OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXSLB, paca_struct, exslb); + OFFSET(PACA_EXNMI, paca_struct, exnmi); OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); @@ -232,7 +234,9 @@ int main(void) OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp); + OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); OFFSET(PACA_IN_MCE, paca_struct, in_mce); + OFFSET(PACA_IN_NMI, paca_struct, in_nmi); #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); @@ -399,8 +403,8 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif -#ifdef MAX_PGD_TABLE_SIZE - DEFINE(PGD_TABLE_SIZE, MAX_PGD_TABLE_SIZE); +#ifdef CONFIG_PPC_BOOK3S_64 + DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE))); #else DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); #endif @@ -727,6 +731,7 @@ int main(void) OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state); OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask); OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); + OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 7fe8c79e6937..10cb2896b2ae 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -29,7 +29,8 @@ _GLOBAL(__setup_cpu_power7) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - bl __init_LPCR + li r4,(LPCR_LPES1 >> LPCR_LPES_SH) + bl __init_LPCR_ISA206 bl __init_tlb_power7 mtlr r11 blr @@ -42,7 +43,8 @@ _GLOBAL(__restore_cpu_power7) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - bl __init_LPCR + li r4,(LPCR_LPES1 >> LPCR_LPES_SH) + bl __init_LPCR_ISA206 bl __init_tlb_power7 mtlr r11 blr @@ -59,7 +61,8 @@ _GLOBAL(__setup_cpu_power8) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA206 bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV @@ -80,7 +83,8 @@ _GLOBAL(__restore_cpu_power8) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA206 bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV @@ -99,11 +103,12 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) andc r3, r3, r4 - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA300 bl __init_HFSCR bl __init_tlb_power9 bl __init_PMU_HV @@ -122,11 +127,12 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) andc r3, r3, r4 - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA300 bl __init_HFSCR bl __init_tlb_power9 bl __init_PMU_HV @@ -144,9 +150,9 @@ __init_hvmode_206: std r5,CPU_SPEC_FEATURES(r4) blr -__init_LPCR: +__init_LPCR_ISA206: /* Setup a sane LPCR: - * Called with initial LPCR in R3 + * Called with initial LPCR in R3 and desired LPES 2-bit value in R4 * * LPES = 0b01 (HSRR0/1 used for 0x500) * PECE = 0b111 @@ -157,16 +163,18 @@ __init_LPCR: * * Other bits untouched for now */ - li r5,1 - rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 + li r5,0x10 + rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 + + /* POWER9 has no VRMASD */ +__init_LPCR_ISA300: + rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) li r5,4 rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 clrrdi r3,r3,1 /* clear HDICE */ li r5,4 rldimi r3,r5, LPCR_VC_SH, 0 - li r5,0x10 - rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 mtspr SPRN_LPCR,r3 isync blr diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 2128f3a96c32..b6fe883b1016 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -20,18 +20,60 @@ #include <asm/kvm_ppc.h> #ifdef CONFIG_SMP -void doorbell_setup_this_cpu(void) + +/* + * Doorbells must only be used if CPU_FTR_DBELL is available. + * msgsnd is used in HV, and msgsndp is used in !HV. + * + * These should be used by platform code that is aware of restrictions. + * Other arch code should use ->cause_ipi. + * + * doorbell_global_ipi() sends a dbell to any target CPU. + * Must be used only by architectures that address msgsnd target + * by PIR/get_hard_smp_processor_id. + */ +void doorbell_global_ipi(int cpu) { - unsigned long tag = mfspr(SPRN_DOORBELL_CPUTAG) & PPC_DBELL_TAG_MASK; + u32 tag = get_hard_smp_processor_id(cpu); - smp_muxed_ipi_set_data(smp_processor_id(), tag); + kvmppc_set_host_ipi(cpu, 1); + /* Order previous accesses vs. msgsnd, which is treated as a store */ + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); } -void doorbell_cause_ipi(int cpu, unsigned long data) +/* + * doorbell_core_ipi() sends a dbell to a target CPU in the same core. + * Must be used only by architectures that address msgsnd target + * by TIR/cpu_thread_in_core. + */ +void doorbell_core_ipi(int cpu) { + u32 tag = cpu_thread_in_core(cpu); + + kvmppc_set_host_ipi(cpu, 1); /* Order previous accesses vs. msgsnd, which is treated as a store */ - mb(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, data); + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); +} + +/* + * Attempt to cause a core doorbell if destination is on the same core. + * Returns 1 on success, 0 on failure. + */ +int doorbell_try_core_ipi(int cpu) +{ + int this_cpu = get_cpu(); + int ret = 0; + + if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { + doorbell_core_ipi(cpu); + ret = 1; + } + + put_cpu(); + + return ret; } void doorbell_exception(struct pt_regs *regs) @@ -40,12 +82,14 @@ void doorbell_exception(struct pt_regs *regs) irq_enter(); + ppc_msgsync(); + may_hard_irq_enable(); kvmppc_set_host_ipi(smp_processor_id(), 0); __this_cpu_inc(irq_stat.doorbell_irqs); - smp_ipi_demux(); + smp_ipi_demux_relaxed(); /* already performed the barrier */ irq_exit(); set_irq_regs(old_regs); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9de7f79e702b..63992b2d8e15 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -22,7 +22,6 @@ */ #include <linux/delay.h> -#include <linux/debugfs.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/list.h> @@ -37,7 +36,7 @@ #include <linux/of.h> #include <linux/atomic.h> -#include <asm/debug.h> +#include <asm/debugfs.h> #include <asm/eeh.h> #include <asm/eeh_event.h> #include <asm/io.h> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b94887165a10..c405c79e50cd 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -724,7 +724,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ #define MAX_WAIT_FOR_RECOVERY 300 -static void eeh_handle_normal_event(struct eeh_pe *pe) +/** + * eeh_handle_normal_event - Handle EEH events on a specific PE + * @pe: EEH PE + * + * Attempts to recover the given PE. If recovery fails or the PE has failed + * too many times, remove the PE. + * + * Returns true if @pe should no longer be used, else false. + */ +static bool eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; struct eeh_dev *edev, *tmp; @@ -736,13 +745,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) if (!frozen_bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); - return; + return false; } eeh_pe_update_time_stamp(pe); pe->freeze_count++; - if (pe->freeze_count > eeh_max_freezes) - goto excess_failures; + if (pe->freeze_count > eeh_max_freezes) { + pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" + "last hour and has been permanently disabled.\n", + pe->phb->global_number, pe->addr, + pe->freeze_count); + goto hard_fail; + } pr_warn("EEH: This PCI device has failed %d times in the last hour\n", pe->freeze_count); @@ -870,27 +884,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return; + return false; -excess_failures: +hard_fail: /* * About 90% of all real-life EEH failures in the field * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ - pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" - "last hour and has been permanently disabled.\n" - "Please try reseating or replacing it.\n", - pe->phb->global_number, pe->addr, - pe->freeze_count); - goto perm_error; - -hard_fail: pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" "Please try reseating or replacing it\n", pe->phb->global_number, pe->addr); -perm_error: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ @@ -915,10 +920,21 @@ perm_error: pci_lock_rescan_remove(); pci_hp_remove_devices(frozen_bus); pci_unlock_rescan_remove(); + + /* The passed PE should no longer be used */ + return true; } } + return false; } +/** + * eeh_handle_special_event - Handle EEH events without a specific failing PE + * + * Called when an EEH event is detected but can't be narrowed down to a + * specific PE. Iterates through possible failures and handles them as + * necessary. + */ static void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; @@ -982,7 +998,14 @@ static void eeh_handle_special_event(void) */ if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { - eeh_handle_normal_event(pe); + /* + * eeh_handle_normal_event() can make the PE stale if it + * determines that the PE cannot possibly be recovered. + * Don't modify the PE state if that's the case. + */ + if (eeh_handle_normal_event(pe)) + continue; + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { pci_lock_rescan_remove(); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index a38600949f3a..8587059ad848 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -31,7 +31,6 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> -#include <asm/ftrace.h> #include <asm/ptrace.h> #include <asm/export.h> @@ -1315,109 +1314,3 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_RTAS */ - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -_GLOBAL(mcount) -_GLOBAL(_mcount) - /* - * It is required that _mcount on PPC32 must preserve the - * link register. But we have r0 to play with. We use r0 - * to push the return address back to the caller of mcount - * into the ctr register, restore the link register and - * then jump back using the ctr register. - */ - mflr r0 - mtctr r0 - lwz r0, 4(r1) - mtlr r0 - bctr - -_GLOBAL(ftrace_caller) - MCOUNT_SAVE_FRAME - /* r3 ends up with link register */ - subi r3, r3, MCOUNT_INSN_SIZE -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - MCOUNT_RESTORE_FRAME - /* old link register ends up in ctr reg */ - bctr -#else -_GLOBAL(mcount) -_GLOBAL(_mcount) - - MCOUNT_SAVE_FRAME - - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5, ftrace_trace_function) - lwz r5,0(r5) - - mtctr r5 - bctrl - nop - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - MCOUNT_RESTORE_FRAME - bctr -#endif -EXPORT_SYMBOL(_mcount) - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - /* load r4 with local address */ - lwz r4, 44(r1) - subi r4, r4, MCOUNT_INSN_SIZE - - /* Grab the LR out of the caller stack frame */ - lwz r3,52(r1) - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - stw r3,52(r1) - - MCOUNT_RESTORE_FRAME - /* old link register ends up in ctr reg */ - bctr - -_GLOBAL(return_to_handler) - /* need to save return values */ - stwu r1, -32(r1) - stw r3, 20(r1) - stw r4, 16(r1) - stw r31, 12(r1) - mr r31, r1 - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - lwz r3, 20(r1) - lwz r4, 16(r1) - lwz r31,12(r1) - lwz r1, 0(r1) - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 767ef6d68c9e..bfbad08a1207 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -20,7 +20,6 @@ #include <linux/errno.h> #include <linux/err.h> -#include <linux/magic.h> #include <asm/unistd.h> #include <asm/processor.h> #include <asm/page.h> @@ -33,7 +32,6 @@ #include <asm/bug.h> #include <asm/ptrace.h> #include <asm/irqflags.h> -#include <asm/ftrace.h> #include <asm/hw_irq.h> #include <asm/context_tracking.h> #include <asm/tm.h> @@ -1173,381 +1171,3 @@ _GLOBAL(enter_prom) ld r0,16(r1) mtlr r0 blr - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -_GLOBAL(mcount) -_GLOBAL(_mcount) -EXPORT_SYMBOL(_mcount) - mflr r12 - mtctr r12 - mtlr r0 - bctr - -#ifndef CC_USING_MPROFILE_KERNEL -_GLOBAL_TOC(ftrace_caller) - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - subi r3, r3, MCOUNT_INSN_SIZE -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - -#else /* CC_USING_MPROFILE_KERNEL */ -/* - * - * ftrace_caller() is the function that replaces _mcount() when ftrace is - * active. - * - * We arrive here after a function A calls function B, and we are the trace - * function for B. When we enter r1 points to A's stack frame, B has not yet - * had a chance to allocate one yet. - * - * Additionally r2 may point either to the TOC for A, or B, depending on - * whether B did a TOC setup sequence before calling us. - * - * On entry the LR points back to the _mcount() call site, and r0 holds the - * saved LR as it was on entry to B, ie. the original return address at the - * call site in A. - * - * Our job is to save the register state into a struct pt_regs (on the stack) - * and then arrange for the ftrace function to be called. - */ -_GLOBAL(ftrace_caller) - /* Save the original return address in A's stack frame */ - std r0,LRSAVE(r1) - - /* Create our stack frame + pt_regs */ - stdu r1,-SWITCH_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - SAVE_8GPRS(0,r1) - SAVE_8GPRS(8,r1) - SAVE_8GPRS(16,r1) - SAVE_8GPRS(24,r1) - - /* Load special regs for save below */ - mfmsr r8 - mfctr r9 - mfxer r10 - mfcr r11 - - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip & pt_regs->link */ - std r7, _NIP(r1) - std r7, _LINK(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2,PACATOC(r13) /* get kernel TOC in r2 */ - - addis r3,r2,function_trace_op@toc@ha - addi r3,r3,function_trace_op@toc@l - ld r5,0(r3) - -#ifdef CONFIG_LIVEPATCH - mr r14,r7 /* remember old NIP */ -#endif - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Save special regs */ - std r8, _MSR(r1) - std r9, _CTR(r1) - std r10, _XER(r1) - std r11, _CCR(r1) - - /* Load &pt_regs in r6 for call below */ - addi r6, r1 ,STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - - /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) - mtctr r3 -#ifdef CONFIG_LIVEPATCH - cmpd r14,r3 /* has NIP been altered? */ -#endif - - /* Restore gprs */ - REST_8GPRS(0,r1) - REST_8GPRS(8,r1) - REST_8GPRS(16,r1) - REST_8GPRS(24,r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE - - /* Restore original LR for return to B */ - ld r0, LRSAVE(r1) - mtlr r0 - -#ifdef CONFIG_LIVEPATCH - /* Based on the cmpd above, if the NIP was altered handle livepatch */ - bne- livepatch_handler -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - stdu r1, -112(r1) -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) - addi r1, r1, 112 -#endif - - ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */ - mtlr r0 - bctr /* jump after _mcount site */ -#endif /* CC_USING_MPROFILE_KERNEL */ - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_LIVEPATCH - /* - * This function runs in the mcount context, between two functions. As - * such it can only clobber registers which are volatile and used in - * function linkage. - * - * We get here when a function A, calls another function B, but B has - * been live patched with a new function C. - * - * On entry: - * - we have no stack frame and can not allocate one - * - LR points back to the original caller (in A) - * - CTR holds the new NIP in C - * - r0 & r12 are free - * - * r0 can't be used as the base register for a DS-form load or store, so - * we temporarily shuffle r1 (stack pointer) into r0 and then put it back. - */ -livepatch_handler: - CURRENT_THREAD_INFO(r12, r1) - - /* Save stack pointer into r0 */ - mr r0, r1 - - /* Allocate 3 x 8 bytes */ - ld r1, TI_livepatch_sp(r12) - addi r1, r1, 24 - std r1, TI_livepatch_sp(r12) - - /* Save toc & real LR on livepatch stack */ - std r2, -24(r1) - mflr r12 - std r12, -16(r1) - - /* Store stack end marker */ - lis r12, STACK_END_MAGIC@h - ori r12, r12, STACK_END_MAGIC@l - std r12, -8(r1) - - /* Restore real stack pointer */ - mr r1, r0 - - /* Put ctr in r12 for global entry and branch there */ - mfctr r12 - bctrl - - /* - * Now we are returning from the patched function to the original - * caller A. We are free to use r0 and r12, and we can use r2 until we - * restore it. - */ - - CURRENT_THREAD_INFO(r12, r1) - - /* Save stack pointer into r0 */ - mr r0, r1 - - ld r1, TI_livepatch_sp(r12) - - /* Check stack marker hasn't been trashed */ - lis r2, STACK_END_MAGIC@h - ori r2, r2, STACK_END_MAGIC@l - ld r12, -8(r1) -1: tdne r12, r2 - EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 - - /* Restore LR & toc from livepatch stack */ - ld r12, -16(r1) - mtlr r12 - ld r2, -24(r1) - - /* Pop livepatch stack frame */ - CURRENT_THREAD_INFO(r12, r0) - subi r1, r1, 24 - std r1, TI_livepatch_sp(r12) - - /* Restore real stack pointer */ - mr r1, r0 - - /* Return to original caller of live patched function */ - blr -#endif - - -#else -_GLOBAL_TOC(_mcount) -EXPORT_SYMBOL(_mcount) - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5,ftrace_trace_function) - ld r5,0(r5) - ld r5,0(r5) - mtctr r5 - bctrl - nop - - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 -_GLOBAL(ftrace_stub) - blr - -#endif /* CONFIG_DYNAMIC_FTRACE */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifndef CC_USING_MPROFILE_KERNEL -_GLOBAL(ftrace_graph_caller) - /* load r4 with local address */ - ld r4, 128(r1) - subi r4, r4, MCOUNT_INSN_SIZE - - /* Grab the LR out of the caller stack frame */ - ld r11, 112(r1) - ld r3, 16(r11) - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - ld r11, 112(r1) - std r3, 16(r11) - - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - blr - -#else /* CC_USING_MPROFILE_KERNEL */ -_GLOBAL(ftrace_graph_caller) - /* with -mprofile-kernel, parameter regs are still alive at _mcount */ - std r10, 104(r1) - std r9, 96(r1) - std r8, 88(r1) - std r7, 80(r1) - std r6, 72(r1) - std r5, 64(r1) - std r4, 56(r1) - std r3, 48(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - mfctr r4 /* ftrace_caller has moved local addr here */ - std r4, 40(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR to this. - */ - mtlr r3 - - ld r0, 40(r1) - mtctr r0 - ld r10, 104(r1) - ld r9, 96(r1) - ld r8, 88(r1) - ld r7, 80(r1) - ld r6, 72(r1) - ld r5, 64(r1) - ld r4, 56(r1) - ld r3, 48(r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - addi r1, r1, 112 - mflr r0 - std r0, LRSAVE(r1) - bctr -#endif /* CC_USING_MPROFILE_KERNEL */ - -_GLOBAL(return_to_handler) - /* need to save return values */ - std r4, -32(r1) - std r3, -24(r1) - /* save TOC */ - std r2, -16(r1) - std r31, -8(r1) - mr r31, r1 - stdu r1, -112(r1) - - /* - * We might be called from a module. - * Switch to our TOC to run inside the core kernel. - */ - ld r2, PACATOC(r13) - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - ld r1, 0(r1) - ld r4, -32(r1) - ld r3, -24(r1) - ld r2, -16(r1) - ld r31, -8(r1) - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6353019966e6..a9312b52fe6f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -116,9 +116,11 @@ EXC_VIRT_NONE(0x4000, 0x100) EXC_REAL_BEGIN(system_reset, 0x100, 0x100) SET_SCRATCH0(r13) - GET_PACA(r13) - clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ - EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, + /* + * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is + * being used, so a nested NMI exception would corrupt it. + */ + EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, EXC_STD, IDLETEST, 0x100) EXC_REAL_END(system_reset, 0x100, 0x100) @@ -126,34 +128,37 @@ EXC_VIRT_NONE(0x4100, 0x100) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) -BEGIN_FTR_SECTION - GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - bl pnv_restore_hyp_resource + b pnv_powersave_wakeup +#endif - li r0,PNV_THREAD_RUNNING - stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ +EXC_COMMON_BEGIN(system_reset_common) + /* + * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able + * to recover, but nested NMI will notice in_nmi and not recover + * because of the use of the NMI stack. in_nmi reentrancy is tested in + * system_reset_exception. + */ + lhz r10,PACA_IN_NMI(r13) + addi r10,r10,1 + sth r10,PACA_IN_NMI(r13) + li r10,MSR_RI + mtmsrd r10,1 -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 1f - BRANCH_TO_KVM(r10, kvm_start_guest) -1: -#endif + mr r10,r1 + ld r1,PACA_NMI_EMERG_SP(r13) + subi r1,r1,INT_FRAME_SIZE + EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, + system_reset, system_reset_exception, + ADD_NVGPRS;ADD_RECONCILE) - /* Return SRR1 from power7_nap() */ - mfspr r3,SPRN_SRR1 - blt cr3,2f - b pnv_wakeup_loss -2: b pnv_wakeup_noloss -#endif + /* + * The stack is no longer in use, decrement in_nmi. + */ + lhz r10,PACA_IN_NMI(r13) + subi r10,r10,1 + sth r10,PACA_IN_NMI(r13) -EXC_COMMON(system_reset_common, 0x100, system_reset_exception) + b ret_from_except #ifdef CONFIG_PPC_PSERIES /* @@ -161,8 +166,9 @@ EXC_COMMON(system_reset_common, 0x100, system_reset_exception) */ TRAMP_REAL_BEGIN(system_reset_fwnmi) SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) + /* See comment at system_reset exception */ + EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, + EXC_STD, NOTEST, 0x100) #endif /* CONFIG_PPC_PSERIES */ @@ -172,14 +178,6 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100) * vector */ SET_SCRATCH0(r13) /* save r13 */ - /* - * Running native on arch 2.06 or later, we may wakeup from winkle - * inside machine check. If yes, then last bit of HSPRG0 would be set - * to 1. Hence clear it unconditionally. - */ - GET_PACA(r13) - clrrdi r13,r13,1 - SET_PACA(r13) EXCEPTION_PROLOG_0(PACA_EXMC) BEGIN_FTR_SECTION b machine_check_powernv_early @@ -212,6 +210,12 @@ BEGIN_FTR_SECTION * NOTE: We are here with MSR_ME=0 (off), which means we risk a * checkstop if we get another machine check exception before we do * rfid with MSR_ME=1. + * + * This interrupt can wake directly from idle. If that is the case, + * the machine check is handled then the idle wakeup code is called + * to restore state. In that case, the POWER9 DD1 idle PACA workaround + * is not applied in the early machine check code, which will cause + * bugs. */ mr r11,r1 /* Save r1 */ lhz r10,PACA_IN_MCE(r13) @@ -268,20 +272,11 @@ machine_check_fwnmi: machine_check_pSeries_0: EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) /* - * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the - * difference that MSR_RI is not enabled, because PACA_EXMC is being - * used, so nested machine check corrupts it. machine_check_common - * enables MSR_RI. + * MSR_RI is not enabled, because PACA_EXMC is being used, so a + * nested machine check corrupts it. machine_check_common enables + * MSR_RI. */ - ld r10,PACAKMSR(r13) - xori r10,r10,MSR_RI - mfspr r11,SPRN_SRR0 - LOAD_HANDLER(r12, machine_check_common) - mtspr SPRN_SRR0,r12 - mfspr r12,SPRN_SRR1 - mtspr SPRN_SRR1,r10 - rfid - b . /* prevent speculative execution */ + EXCEPTION_PROLOG_PSERIES_1_NORI(machine_check_common, EXC_STD) TRAMP_KVM_SKIP(PACA_EXMC, 0x200) @@ -340,6 +335,37 @@ EXC_COMMON_BEGIN(machine_check_common) /* restore original r1. */ \ ld r1,GPR1(r1) +#ifdef CONFIG_PPC_P7_NAP +/* + * This is an idle wakeup. Low level machine check has already been + * done. Queue the event then call the idle code to do the wake up. + */ +EXC_COMMON_BEGIN(machine_check_idle_common) + bl machine_check_queue_event + + /* + * We have not used any non-volatile GPRs here, and as a rule + * most exception code including machine check does not. + * Therefore PACA_NAPSTATELOST does not need to be set. Idle + * wakeup will restore volatile registers. + * + * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce. + * + * Then decrement MCE nesting after finishing with the stack. + */ + ld r3,_MSR(r1) + + lhz r11,PACA_IN_MCE(r13) + subi r11,r11,1 + sth r11,PACA_IN_MCE(r13) + + /* Turn off the RI bit because SRR1 is used by idle wakeup code. */ + /* Recoverability could be improved by reducing the use of SRR1. */ + li r11,0 + mtmsrd r11,1 + + b pnv_powersave_wakeup_mce +#endif /* * Handle machine check early in real mode. We come here with * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. @@ -352,6 +378,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) + #ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any @@ -362,48 +389,14 @@ EXC_COMMON_BEGIN(machine_check_handle_early) * * Go back to nap/sleep/winkle mode again if (b) is true. */ - rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ - beq 4f /* No, it wasn;t */ - /* Thread was in power saving mode. Go back to nap again. */ - cmpwi r11,2 - blt 3f - /* Supervisor/Hypervisor state loss */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) -3: bl machine_check_queue_event - MACHINE_CHECK_HANDLER_WINDUP - GET_PACA(r13) - ld r1,PACAR1(r13) - /* - * Check what idle state this CPU was in and go back to same mode - * again. - */ - lbz r3,PACA_THREAD_IDLE_STATE(r13) - cmpwi r3,PNV_THREAD_NAP - bgt 10f - IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) - /* No return */ -10: - cmpwi r3,PNV_THREAD_SLEEP - bgt 2f - IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) - /* No return */ - -2: - /* - * Go back to winkle. Please note that this thread was woken up in - * machine check from winkle and have not restored the per-subcore - * state. Hence before going back to winkle, set last bit of HSPRG0 - * to 1. This will make sure that if this thread gets woken up - * again at reset vector 0x100 then it will get chance to restore - * the subcore state. - */ - ori r13,r13,1 - SET_PACA(r13) - IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) - /* No return */ + BEGIN_FTR_SECTION + rlwinm. r11,r12,47-31,30,31 + beq- 4f + BRANCH_TO_COMMON(r10, machine_check_idle_common) 4: + END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif + /* * Check if we are coming from hypervisor userspace. If yes then we * continue in host kernel in V mode to deliver the MC event. @@ -968,17 +961,12 @@ EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) - mr r10,r1 /* Save r1 */ - ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - std r9,_CCR(r1) /* save CR in stackframe */ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ - std r11,_NIP(r1) /* save HSRR0 in stackframe */ - mfspr r12,SPRN_HSRR1 /* Save SRR1 */ - std r12,_MSR(r1) /* save SRR1 in stackframe */ - std r10,0(r1) /* make stack chain pointer */ - std r0,GPR0(r1) /* save r0 in stackframe */ - std r10,GPR1(r1) /* save r1 in stackframe */ + mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ + EXCEPTION_PROLOG_COMMON_1() EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8ff0dd4e77a7..243dbef7e926 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -30,17 +30,16 @@ #include <linux/string.h> #include <linux/memblock.h> #include <linux/delay.h> -#include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/crash_dump.h> #include <linux/kobject.h> #include <linux/sysfs.h> +#include <asm/debugfs.h> #include <asm/page.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/fadump.h> -#include <asm/debug.h> #include <asm/setup.h> static struct fw_dump fw_dump; @@ -319,15 +318,34 @@ int __init fadump_reserve_mem(void) pr_debug("fadumphdr_addr = %p\n", (void *) fw_dump.fadumphdr_addr); } else { - /* Reserve the memory at the top of memory. */ size = get_fadump_area_size(); - base = memory_boundary - size; - memblock_reserve(base, size); - printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " - "for firmware-assisted dump\n", - (unsigned long)(size >> 20), - (unsigned long)(base >> 20)); + + /* + * Reserve memory at an offset closer to bottom of the RAM to + * minimize the impact of memory hot-remove operation. We can't + * use memblock_find_in_range() here since it doesn't allocate + * from bottom to top. + */ + for (base = fw_dump.boot_memory_size; + base <= (memory_boundary - size); + base += size) { + if (memblock_is_region_memory(base, size) && + !memblock_is_region_reserved(base, size)) + break; + } + if ((base > (memory_boundary - size)) || + memblock_reserve(base, size)) { + pr_err("Failed to reserve memory\n"); + return 0; + } + + pr_info("Reserved %ldMB of memory at %ldMB for firmware-" + "assisted dump (System RAM: %ldMB)\n", + (unsigned long)(size >> 20), + (unsigned long)(base >> 20), + (unsigned long)(memblock_phys_mem_size() >> 20)); } + fw_dump.reserve_dump_area_start = base; fw_dump.reserve_dump_area_size = size; return 1; diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 1607be7c0ef2..e22734278458 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -735,11 +735,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE) - - .globl mol_trampoline - .set mol_trampoline, i0x2f00 - EXPORT_SYMBOL(mol_trampoline) + EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE) . = 0x3000 @@ -1278,16 +1274,6 @@ EXPORT_SYMBOL(empty_zero_page) swapper_pg_dir: .space PGD_TABLE_SIZE - .globl intercept_table -intercept_table: - .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700 - .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0 - .long 0, 0, 0, i0x1300, 0, 0, 0, 0 - .long 0, 0, 0, 0, 0, 0, 0, 0 - .long 0, 0, 0, 0, 0, 0, 0, 0 - .long 0, 0, 0, 0, 0, 0, 0, 0 -EXPORT_SYMBOL(intercept_table) - /* Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1dc5eae2ced3..0ddc602b33a4 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -949,7 +949,8 @@ start_here_multiplatform: LOAD_REG_ADDR(r3,init_thread_union) /* set up a stack pointer */ - addi r1,r3,THREAD_SIZE + LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) + add r1,r3,r1 li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 6fd08219248d..07d4e0ad60db 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -20,6 +20,7 @@ #include <asm/kvm_book3s_asm.h> #include <asm/opal.h> #include <asm/cpuidle.h> +#include <asm/exception-64s.h> #include <asm/book3s/64/mmu-hash.h> #include <asm/mmu.h> @@ -94,12 +95,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) core_idle_lock_held: HMT_LOW 3: lwz r15,0(r14) - andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h bne 3b HMT_MEDIUM lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_held + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bne- core_idle_lock_held blr /* @@ -113,7 +114,7 @@ core_idle_lock_held: * * Address to 'rfid' to in r5 */ -_GLOBAL(pnv_powersave_common) +pnv_powersave_common: /* Use r3 to pass state nap/sleep/winkle */ /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and @@ -188,8 +189,8 @@ pnv_enter_arch207_idle_mode: /* The following store to HSTATE_HWTHREAD_STATE(r13) */ /* MUST occur in real mode, i.e. with the MMU off, */ /* and the MMU must stay off until we clear this flag */ - /* and test HSTATE_HWTHREAD_REQ(r13) in the system */ - /* reset interrupt vector in exceptions-64s.S. */ + /* and test HSTATE_HWTHREAD_REQ(r13) in */ + /* pnv_powersave_wakeup in this file. */ /* The reason is that another thread can switch the */ /* MMU to a guest context whenever this flag is set */ /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ @@ -209,15 +210,20 @@ pnv_enter_arch207_idle_mode: /* Sleep or winkle */ lbz r7,PACA_THREAD_MASK(r13) ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + li r5,0 + beq cr3,3f + lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h +3: lwarx_loop1: lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bnel core_idle_lock_held + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bnel- core_idle_lock_held + add r15,r15,r5 /* Add if winkle */ andc r15,r15,r7 /* Clear thread bit */ - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS /* * If cr0 = 0, then current thread is the last thread of the core entering @@ -240,7 +246,7 @@ common_enter: /* common code for all the threads entering sleep or winkle */ IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) fastsleep_workaround_at_entry: - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h stwcx. r15,0,r14 bne- lwarx_loop1 isync @@ -250,10 +256,10 @@ fastsleep_workaround_at_entry: li r4,1 bl opal_config_cpu_idle_state - /* Clear Lock bit */ - li r0,0 + /* Unlock */ + xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h lwsync - stw r0,0(r14) + stw r15,0(r14) b common_enter enter_winkle: @@ -301,8 +307,8 @@ power_enter_stop: lwarx_loop_stop: lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bnel core_idle_lock_held + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bnel- core_idle_lock_held andc r15,r15,r7 /* Clear thread bit */ stwcx. r15,0,r14 @@ -375,17 +381,113 @@ _GLOBAL(power9_idle_stop) li r4,1 b pnv_powersave_common /* No return */ + /* - * Called from reset vector. Check whether we have woken up with - * hypervisor state loss. If yes, restore hypervisor state and return - * back to reset vector. + * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1, + * HSPRG0 will be set to the HSPRG0 value of one of the + * threads in this core. Thus the value we have in r13 + * may not be this thread's paca pointer. + * + * Fortunately, the TIR remains invariant. Since this thread's + * paca pointer is recorded in all its sibling's paca, we can + * correctly recover this thread's paca pointer if we + * know the index of this thread in the core. + * + * This index can be obtained from the TIR. * - * r13 - Contents of HSPRG0 + * i.e, thread's position in the core = TIR. + * If this value is i, then this thread's paca is + * paca->thread_sibling_pacas[i]. + */ +power9_dd1_recover_paca: + mfspr r4, SPRN_TIR + /* + * Since each entry in thread_sibling_pacas is 8 bytes + * we need to left-shift by 3 bits. Thus r4 = i * 8 + */ + sldi r4, r4, 3 + /* Get &paca->thread_sibling_pacas[0] in r5 */ + ld r5, PACA_SIBLING_PACA_PTRS(r13) + /* Load paca->thread_sibling_pacas[i] into r13 */ + ldx r13, r4, r5 + SET_PACA(r13) + /* + * Indicate that we have lost NVGPR state + * which needs to be restored from the stack. + */ + li r3, 1 + stb r0,PACA_NAPSTATELOST(r13) + blr + +/* + * Called from machine check handler for powersave wakeups. + * Low level machine check processing has already been done. Now just + * go through the wake up path to get everything in order. + * + * r3 - The original SRR1 value. + * Original SRR[01] have been clobbered. + * MSR_RI is clear. + */ +.global pnv_powersave_wakeup_mce +pnv_powersave_wakeup_mce: + /* Set cr3 for pnv_powersave_wakeup */ + rlwinm r11,r3,47-31,30,31 + cmpwi cr3,r11,2 + + /* + * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake + * reason into SRR1, which allows reuse of the system reset wakeup + * code without being mistaken for another type of wakeup. + */ + oris r3,r3,SRR1_WAKEMCE_RESVD@h + mtspr SPRN_SRR1,r3 + + b pnv_powersave_wakeup + +/* + * Called from reset vector for powersave wakeups. * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ -_GLOBAL(pnv_restore_hyp_resource) +.global pnv_powersave_wakeup +pnv_powersave_wakeup: + ld r2, PACATOC(r13) + BEGIN_FTR_SECTION - ld r2,PACATOC(r13); +BEGIN_FTR_SECTION_NESTED(70) + bl power9_dd1_recover_paca +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70) + bl pnv_restore_hyp_resource_arch300 +FTR_SECTION_ELSE + bl pnv_restore_hyp_resource_arch207 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) + + li r0,PNV_THREAD_RUNNING + stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beq 1f + b kvm_start_guest +1: +#endif + + /* Return SRR1 from power7_nap() */ + mfspr r3,SPRN_SRR1 + blt cr3,pnv_wakeup_noloss + b pnv_wakeup_loss + +/* + * Check whether we have woken up with hypervisor state loss. + * If yes, restore hypervisor state and return back to link. + * + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +pnv_restore_hyp_resource_arch300: /* * POWER ISA 3. Use PSSCR to determine if we * are waking up from deep idle state @@ -400,31 +502,19 @@ BEGIN_FTR_SECTION */ rldicl r5,r5,4,60 cmpd cr4,r5,r4 - bge cr4,pnv_wakeup_tb_loss - /* - * Waking up without hypervisor state loss. Return to - * reset vector - */ - blr + bge cr4,pnv_wakeup_tb_loss /* returns to caller */ -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + blr /* Waking up without hypervisor state loss. */ +/* Same calling convention as arch300 */ +pnv_restore_hyp_resource_arch207: /* * POWER ISA 2.07 or less. - * Check if last bit of HSPGR0 is set. This indicates whether we are - * waking up from winkle. + * Check if we slept with sleep or winkle. */ - clrldi r5,r13,63 - clrrdi r13,r13,1 - - /* Now that we are sure r13 is corrected, load TOC */ - ld r2,PACATOC(r13); - cmpwi cr4,r5,1 - mtspr SPRN_HSPRG0,r13 - - lbz r0,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ + lbz r4,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r4,PNV_THREAD_NAP + bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ /* * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking @@ -433,8 +523,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) */ bgt cr3,. - blr /* Return back to System Reset vector from where - pnv_restore_hyp_resource was invoked */ + blr /* Waking up without hypervisor state loss */ /* * Called if waking up from idle state which can cause either partial or @@ -444,9 +533,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * * r13 - PACA * cr3 - gt if waking up with partial/complete hypervisor state loss + * + * If ISA300: * cr4 - gt or eq if waking up from complete hypervisor state loss. + * + * If ISA207: + * r4 - PACA_THREAD_IDLE_STATE */ -_GLOBAL(pnv_wakeup_tb_loss) +pnv_wakeup_tb_loss: ld r1,PACAR1(r13) /* * Before entering any idle state, the NVGPRs are saved in the stack. @@ -473,18 +567,19 @@ _GLOBAL(pnv_wakeup_tb_loss) * is required to return back to reset vector after hypervisor state * restore is complete. */ + mr r18,r4 mflr r17 mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - lbz r7,PACA_THREAD_MASK(r13) ld r14,PACA_CORE_IDLE_STATE_PTR(r13) -lwarx_loop2: - lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + lbz r7,PACA_THREAD_MASK(r13) + /* + * Take the core lock to synchronize against other threads. + * * Lock bit is set in one of the 2 cases- * a. In the sleep/winkle enter path, the last thread is executing * fastsleep workaround code. @@ -492,23 +587,93 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bnel core_idle_lock_held +1: + lwarx r15,0,r14 + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bnel- core_idle_lock_held + oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h + stwcx. r15,0,r14 + bne- 1b + isync - cmpwi cr2,r15,0 + andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS + cmpwi cr2,r9,0 /* * At this stage * cr2 - eq if first thread to wakeup in core * cr3- gt if waking up with partial/complete hypervisor state loss + * ISA300: * cr4 - gt or eq if waking up from complete hypervisor state loss. */ - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - BEGIN_FTR_SECTION + /* + * Were we in winkle? + * If yes, check if all threads were in winkle, decrement our + * winkle count, set all thread winkle bits if all were in winkle. + * Check if our thread has a winkle bit set, and set cr4 accordingly + * (to match ISA300, above). Pseudo-code for core idle state + * transitions for ISA207 is as follows (everything happens atomically + * due to store conditional and/or lock bit): + * + * nap_idle() { } + * nap_wake() { } + * + * sleep_idle() + * { + * core_idle_state &= ~thread_in_core + * } + * + * sleep_wake() + * { + * bool first_in_core, first_in_subcore; + * + * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; + * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; + * + * core_idle_state |= thread_in_core; + * } + * + * winkle_idle() + * { + * core_idle_state &= ~thread_in_core; + * core_idle_state += 1 << WINKLE_COUNT_SHIFT; + * } + * + * winkle_wake() + * { + * bool first_in_core, first_in_subcore, winkle_state_lost; + * + * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; + * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; + * + * core_idle_state |= thread_in_core; + * + * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT)) + * core_idle_state |= THREAD_WINKLE_BITS; + * core_idle_state -= 1 << WINKLE_COUNT_SHIFT; + * + * winkle_state_lost = core_idle_state & + * (thread_in_core << WINKLE_THREAD_SHIFT); + * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT); + * } + * + */ + cmpwi r18,PNV_THREAD_WINKLE + bne 2f + andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h + subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h + beq 2f + ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ +2: + /* Shift thread bit to winkle mask, then test if this thread is set, + * and remove it from the winkle bits */ + slwi r8,r7,8 + and r8,r8,r15 + andc r15,r15,r8 + cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */ + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) and r4,r4,r15 cmpwi r4,0 /* Check if first in subcore */ @@ -593,7 +758,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_WORC,r4 clear_lock: - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h lwsync stw r15,0(r14) @@ -651,8 +816,7 @@ hypervisor_state_restored: mtspr SPRN_SRR1,r16 mtlr r17 - blr /* Return back to System Reset vector from where - pnv_restore_hyp_resource was invoked */ + blr /* return to pnv_powersave_wakeup */ fastsleep_workaround_at_exit: li r3,1 @@ -664,7 +828,8 @@ fastsleep_workaround_at_exit: * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(pnv_wakeup_loss) +.global pnv_wakeup_loss +pnv_wakeup_loss: ld r1,PACAR1(r13) BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -684,7 +849,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(pnv_wakeup_noloss) +pnv_wakeup_noloss: lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 bne pnv_wakeup_loss diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5f202a566ec5..5a3231fedf08 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) return tbl; } -void iommu_free_table(struct iommu_table *tbl, const char *node_name) +static void iommu_table_free(struct kref *kref) { unsigned long bitmap_sz; unsigned int order; + struct iommu_table *tbl; - if (!tbl) - return; + tbl = container_of(kref, struct iommu_table, it_kref); + + if (tbl->it_ops->free) + tbl->it_ops->free(tbl); if (!tbl->it_map) { kfree(tbl); @@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) - pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); + pr_warn("%s: Unexpected TCEs\n", __func__); /* calculate bitmap size in bytes */ bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); @@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) kfree(tbl); } +struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl) +{ + if (kref_get_unless_zero(&tbl->it_kref)) + return tbl; + + return NULL; +} +EXPORT_SYMBOL_GPL(iommu_tce_table_get); + +int iommu_tce_table_put(struct iommu_table *tbl) +{ + if (WARN_ON(!tbl)) + return 0; + + return kref_put(&tbl->it_kref, iommu_table_free); +} +EXPORT_SYMBOL_GPL(iommu_tce_table_put); + /* Creates TCEs for a user provided buffer. The user buffer must be * contiguous real kernel storage (not vmalloc). The address passed here * comprises a page address and offset into that page. The dma_addr_t @@ -1004,6 +1025,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, } EXPORT_SYMBOL_GPL(iommu_tce_xchg); +#ifdef CONFIG_PPC_BOOK3S_64 +long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret; + + ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) { + struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT); + + if (likely(pg)) { + SetPageDirty(pg); + } else { + tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + ret = -EFAULT; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm); +#endif + int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a018f5cae899..5c291df30fe3 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -65,7 +65,6 @@ #include <asm/machdep.h> #include <asm/udbg.h> #include <asm/smp.h> -#include <asm/debug.h> #include <asm/livepatch.h> #include <asm/asm-prototypes.h> @@ -442,46 +441,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) return sum; } -#ifdef CONFIG_HOTPLUG_CPU -void migrate_irqs(void) -{ - struct irq_desc *desc; - unsigned int irq; - static int warned; - cpumask_var_t mask; - const struct cpumask *map = cpu_online_mask; - - alloc_cpumask_var(&mask, GFP_KERNEL); - - for_each_irq_desc(irq, desc) { - struct irq_data *data; - struct irq_chip *chip; - - data = irq_desc_get_irq_data(desc); - if (irqd_is_per_cpu(data)) - continue; - - chip = irq_data_get_irq_chip(data); - - cpumask_and(mask, irq_data_get_affinity_mask(data), map); - if (cpumask_any(mask) >= nr_cpu_ids) { - pr_warn("Breaking affinity for irq %i\n", irq); - cpumask_copy(mask, map); - } - if (chip->irq_set_affinity) - chip->irq_set_affinity(data, mask, true); - else if (desc->action && !(warned++)) - pr_err("Cannot set affinity for irq %i\n", irq); - } - - free_cpumask_var(mask); - - local_irq_enable(); - mdelay(1); - local_irq_disable(); -} -#endif - static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c new file mode 100644 index 000000000000..6c089d9757c9 --- /dev/null +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -0,0 +1,104 @@ +/* + * Dynamic Ftrace based Kprobes Optimization + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Hitachi Ltd., 2012 + * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> + * IBM Corporation + */ +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/hardirq.h> +#include <linux/preempt.h> +#include <linux/ftrace.h> + +static nokprobe_inline +int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, unsigned long orig_nip) +{ + /* + * Emulate singlestep (and also recover regs->nip) + * as if there is a nop + */ + regs->nip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); + if (orig_nip) + regs->nip = orig_nip; + return 1; +} + +int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + if (kprobe_ftrace(p)) + return __skip_singlestep(p, regs, kcb, 0); + else + return 0; +} +NOKPROBE_SYMBOL(skip_singlestep); + +/* Ftrace callback handler for kprobes */ +void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + hard_irq_disable(); + + p = get_kprobe((kprobe_opcode_t *)nip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + unsigned long orig_nip = regs->nip; + + /* + * On powerpc, NIP is *before* this instruction for the + * pre handler + */ + regs->nip -= MCOUNT_INSN_SIZE; + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + __skip_singlestep(p, regs, kcb, orig_nip); + /* + * If pre_handler returns !0, it sets regs->nip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index fce05a38851c..160ae0fa7d0d 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -35,6 +35,7 @@ #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> +#include <asm/sections.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -42,7 +43,86 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; -int __kprobes arch_prepare_kprobe(struct kprobe *p) +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + return (addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)_stext && + addr < (unsigned long)__head_end); +} + +kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) +{ + kprobe_opcode_t *addr; + +#ifdef PPC64_ELF_ABI_v2 + /* PPC64 ABIv2 needs local entry point */ + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); + if (addr && !offset) { +#ifdef CONFIG_KPROBES_ON_FTRACE + unsigned long faddr; + /* + * Per livepatch.h, ftrace location is always within the first + * 16 bytes of a function on powerpc with -mprofile-kernel. + */ + faddr = ftrace_location_range((unsigned long)addr, + (unsigned long)addr + 16); + if (faddr) + addr = (kprobe_opcode_t *)faddr; + else +#endif + addr = (kprobe_opcode_t *)ppc_function_entry(addr); + } +#elif defined(PPC64_ELF_ABI_v1) + /* + * 64bit powerpc ABIv1 uses function descriptors: + * - Check for the dot variant of the symbol first. + * - If that fails, try looking up the symbol provided. + * + * This ensures we always get to the actual symbol and not + * the descriptor. + * + * Also handle <module:symbol> format. + */ + char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN]; + const char *modsym; + bool dot_appended = false; + if ((modsym = strchr(name, ':')) != NULL) { + modsym++; + if (*modsym != '\0' && *modsym != '.') { + /* Convert to <module:.symbol> */ + strncpy(dot_name, name, modsym - name); + dot_name[modsym - name] = '.'; + dot_name[modsym - name + 1] = '\0'; + strncat(dot_name, modsym, + sizeof(dot_name) - (modsym - name) - 2); + dot_appended = true; + } else { + dot_name[0] = '\0'; + strncat(dot_name, name, sizeof(dot_name) - 1); + } + } else if (name[0] != '.') { + dot_name[0] = '.'; + dot_name[1] = '\0'; + strncat(dot_name, name, KSYM_NAME_LEN - 2); + dot_appended = true; + } else { + dot_name[0] = '\0'; + strncat(dot_name, name, KSYM_NAME_LEN - 1); + } + addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name); + if (!addr && dot_appended) { + /* Let's try the original non-dot symbol lookup */ + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); + } +#else + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); +#endif + + return addr; +} + +int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; kprobe_opcode_t insn = *p->addr; @@ -74,30 +154,34 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) p->ainsn.boostable = 0; return ret; } +NOKPROBE_SYMBOL(arch_prepare_kprobe); -void __kprobes arch_arm_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } +NOKPROBE_SYMBOL(arch_arm_kprobe); -void __kprobes arch_disarm_kprobe(struct kprobe *p) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } +NOKPROBE_SYMBOL(arch_disarm_kprobe); -void __kprobes arch_remove_kprobe(struct kprobe *p) +void arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { free_insn_slot(p->ainsn.insn, 0); p->ainsn.insn = NULL; } } +NOKPROBE_SYMBOL(arch_remove_kprobe); -static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { enable_single_step(regs); @@ -110,37 +194,80 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->nip = (unsigned long)p->ainsn.insn; } -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr; } -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, +static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_msr = regs->msr; } -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) +bool arch_function_offset_within_entry(unsigned long offset) +{ +#ifdef PPC64_ELF_ABI_v2 +#ifdef CONFIG_KPROBES_ON_FTRACE + return offset <= 16; +#else + return offset <= 8; +#endif +#else + return !offset; +#endif +} + +void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->link; /* Replace the return addr with trampoline addr */ regs->link = (unsigned long)kretprobe_trampoline; } +NOKPROBE_SYMBOL(arch_prepare_kretprobe); -int __kprobes kprobe_handler(struct pt_regs *regs) +int try_to_emulate(struct kprobe *p, struct pt_regs *regs) +{ + int ret; + unsigned int insn = *p->ainsn.insn; + + /* regs->nip is also adjusted if emulate_step returns 1 */ + ret = emulate_step(regs, insn); + if (ret > 0) { + /* + * Once this instruction has been boosted + * successfully, set the boostable flag + */ + if (unlikely(p->ainsn.boostable == 0)) + p->ainsn.boostable = 1; + } else if (ret < 0) { + /* + * We don't allow kprobes on mtmsr(d)/rfi(d), etc. + * So, we should never get here... but, its still + * good to catch them, just in case... + */ + printk("Can't step on instruction %x\n", insn); + BUG(); + } else if (ret == 0) + /* This instruction can't be boosted */ + p->ainsn.boostable = -1; + + return ret; +} +NOKPROBE_SYMBOL(try_to_emulate); + +int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -177,10 +304,17 @@ int __kprobes kprobe_handler(struct pt_regs *regs) */ save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); - kcb->kprobe_saved_msr = regs->msr; kprobes_inc_nmissed_count(p); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; + if (p->ainsn.boostable >= 0) { + ret = try_to_emulate(p, regs); + + if (ret > 0) { + restore_previous_kprobe(kcb); + return 1; + } + } return 1; } else { if (*addr != BREAKPOINT_INSTRUCTION) { @@ -197,7 +331,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs) } p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { - goto ss_probe; + if (!skip_singlestep(p, regs, kcb)) + goto ss_probe; + ret = 1; } } goto no_kprobe; @@ -235,18 +371,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs) ss_probe: if (p->ainsn.boostable >= 0) { - unsigned int insn = *p->ainsn.insn; + ret = try_to_emulate(p, regs); - /* regs->nip is also adjusted if emulate_step returns 1 */ - ret = emulate_step(regs, insn); if (ret > 0) { - /* - * Once this instruction has been boosted - * successfully, set the boostable flag - */ - if (unlikely(p->ainsn.boostable == 0)) - p->ainsn.boostable = 1; - if (p->post_handler) p->post_handler(p, regs, 0); @@ -254,17 +381,7 @@ ss_probe: reset_current_kprobe(); preempt_enable_no_resched(); return 1; - } else if (ret < 0) { - /* - * We don't allow kprobes on mtmsr(d)/rfi(d), etc. - * So, we should never get here... but, its still - * good to catch them, just in case... - */ - printk("Can't step on instruction %x\n", insn); - BUG(); - } else if (ret == 0) - /* This instruction can't be boosted */ - p->ainsn.boostable = -1; + } } prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; @@ -274,6 +391,7 @@ no_kprobe: preempt_enable_no_resched(); return ret; } +NOKPROBE_SYMBOL(kprobe_handler); /* * Function return probe trampoline: @@ -291,8 +409,7 @@ asm(".global kretprobe_trampoline\n" /* * Called when the probe at kretprobe trampoline is hit */ -static int __kprobes trampoline_probe_handler(struct kprobe *p, - struct pt_regs *regs) +static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; @@ -361,6 +478,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, */ return 1; } +NOKPROBE_SYMBOL(trampoline_probe_handler); /* * Called after single-stepping. p->addr is the address of the @@ -370,7 +488,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. */ -int __kprobes kprobe_post_handler(struct pt_regs *regs) +int kprobe_post_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -410,8 +528,9 @@ out: return 1; } +NOKPROBE_SYMBOL(kprobe_post_handler); -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -474,13 +593,15 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) } return 0; } +NOKPROBE_SYMBOL(kprobe_fault_handler); unsigned long arch_deref_entry_point(void *entry) { return ppc_global_function_entry(entry); } +NOKPROBE_SYMBOL(arch_deref_entry_point); -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -497,17 +618,20 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } +NOKPROBE_SYMBOL(setjmp_pre_handler); -void __used __kprobes jprobe_return(void) +void __used jprobe_return(void) { asm volatile("trap" ::: "memory"); } +NOKPROBE_SYMBOL(jprobe_return); -static void __used __kprobes jprobe_return_end(void) +static void __used jprobe_return_end(void) { -}; +} +NOKPROBE_SYMBOL(jprobe_return_end); -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -520,6 +644,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) preempt_enable_no_resched(); return 1; } +NOKPROBE_SYMBOL(longjmp_break_handler); static struct kprobe trampoline_p = { .addr = (kprobe_opcode_t *) &kretprobe_trampoline, @@ -531,10 +656,11 @@ int __init arch_init_kprobes(void) return register_kprobe(&trampoline_p); } -int __kprobes arch_trampoline_kprobe(struct kprobe *p) +int arch_trampoline_kprobe(struct kprobe *p) { if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) return 1; return 0; } +NOKPROBE_SYMBOL(arch_trampoline_kprobe); diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a1475e6aef3a..5f9eada3519b 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -221,6 +221,8 @@ static void machine_check_process_queued_event(struct irq_work *work) { int index; + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* * For now just print it to console. * TODO: log this error event to FSP or nvram. @@ -228,12 +230,13 @@ static void machine_check_process_queued_event(struct irq_work *work) while (__this_cpu_read(mce_queue_count) > 0) { index = __this_cpu_read(mce_queue_count) - 1; machine_check_print_event_info( - this_cpu_ptr(&mce_event_queue[index])); + this_cpu_ptr(&mce_event_queue[index]), false); __this_cpu_dec(mce_queue_count); } } -void machine_check_print_event_info(struct machine_check_event *evt) +void machine_check_print_event_info(struct machine_check_event *evt, + bool user_mode) { const char *level, *sevstr, *subtype; static const char *mc_ue_types[] = { @@ -310,7 +313,16 @@ void machine_check_print_event_info(struct machine_check_event *evt) printk("%s%s Machine check interrupt [%s]\n", level, sevstr, evt->disposition == MCE_DISPOSITION_RECOVERED ? - "Recovered" : "[Not recovered"); + "Recovered" : "Not recovered"); + + if (user_mode) { + printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, + evt->srr0, current->pid, current->comm); + } else { + printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, + (void *)evt->srr0); + } + printk("%s Initiator: %s\n", level, evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); switch (evt->error_type) { diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 763d6f58caa8..f913139bb0c2 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -72,10 +72,14 @@ void __flush_tlb_power8(unsigned int action) void __flush_tlb_power9(unsigned int action) { + unsigned int num_sets; + if (radix_enabled()) - flush_tlb_206(POWER9_TLB_SETS_RADIX, action); + num_sets = POWER9_TLB_SETS_RADIX; + else + num_sets = POWER9_TLB_SETS_HASH; - flush_tlb_206(POWER9_TLB_SETS_HASH, action); + flush_tlb_206(num_sets, action); } @@ -147,159 +151,365 @@ static int mce_flush(int what) return 0; } -static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat) -{ - if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB)) - dsisr &= ~slb; - if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT)) - dsisr &= ~erat; - if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB)) - dsisr &= ~tlb; - /* Any other errors we don't understand? */ - if (dsisr) - return 0; - return 1; -} - -static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) + +struct mce_ierror_table { + unsigned long srr1_mask; + unsigned long srr1_value; + bool nip_valid; /* nip is a valid indicator of faulting address */ + unsigned int error_type; + unsigned int error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct mce_ierror_table mce_p7_ierror_table[] = { +{ 0x00000000001c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, 0, 0, 0, 0, 0 } }; + +static const struct mce_ierror_table mce_p8_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008000000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008040000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, 0, 0, 0, 0, 0 } }; + +static const struct mce_ierror_table mce_p9_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008000000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008040000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000080c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008100000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008140000, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x0000000008180000, false, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, 0, 0, 0, 0, 0 } }; + +struct mce_derror_table { + unsigned long dsisr_value; + bool dar_valid; /* dar is a valid indicator of faulting address */ + unsigned int error_type; + unsigned int error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct mce_derror_table mce_p7_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000040, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, false, 0, 0, 0, 0 } }; + +static const struct mce_derror_table mce_p8_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00002000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00001000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000200, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, false, 0, 0, 0, 0 } }; + +static const struct mce_derror_table mce_p9_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00002000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00001000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000200, false, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000040, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000020, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000010, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000008, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, false, 0, 0, 0, 0 } }; + +static int mce_handle_ierror(struct pt_regs *regs, + const struct mce_ierror_table table[], + struct mce_error_info *mce_err, uint64_t *addr) { - long handled = 1; + uint64_t srr1 = regs->msr; + int handled = 0; + int i; + + *addr = 0; + + for (i = 0; table[i].srr1_mask; i++) { + if ((srr1 & table[i].srr1_mask) != table[i].srr1_value) + continue; + + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + handled = mce_flush(MCE_FLUSH_SLB); + break; + case MCE_ERROR_TYPE_ERAT: + handled = mce_flush(MCE_FLUSH_ERAT); + break; + case MCE_ERROR_TYPE_TLB: + handled = mce_flush(MCE_FLUSH_TLB); + break; + } - /* - * flush and reload SLBs for SLB errors and flush TLBs for TLB errors. - * reset the error bits whenever we handle them so that at the end - * we can check whether we handled all of them or not. - * */ -#ifdef CONFIG_PPC_STD_MMU_64 - if (dsisr & slb_error_bits) { - flush_and_reload_slb(); - /* reset error bits */ - dsisr &= ~(slb_error_bits); - } - if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); - /* reset error bits */ - dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; + /* now fill in mce_error_info */ + mce_err->error_type = table[i].error_type; + switch (table[i].error_type) { + case MCE_ERROR_TYPE_UE: + mce_err->u.ue_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_SLB: + mce_err->u.slb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_ERAT: + mce_err->u.erat_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_TLB: + mce_err->u.tlb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_USER: + mce_err->u.user_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_RA: + mce_err->u.ra_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_LINK: + mce_err->u.link_error_type = table[i].error_subtype; + break; + } + mce_err->severity = table[i].severity; + mce_err->initiator = table[i].initiator; + if (table[i].nip_valid) + *addr = regs->nip; + return handled; } -#endif - /* Any other errors we don't understand? */ - if (dsisr & 0xffffffffUL) - handled = 0; - return handled; -} + mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; -static long mce_handle_derror_p7(uint64_t dsisr) -{ - return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS); + return 0; } -static long mce_handle_common_ierror(uint64_t srr1) +static int mce_handle_derror(struct pt_regs *regs, + const struct mce_derror_table table[], + struct mce_error_info *mce_err, uint64_t *addr) { - long handled = 0; - - switch (P7_SRR1_MC_IFETCH(srr1)) { - case 0: - break; -#ifdef CONFIG_PPC_STD_MMU_64 - case P7_SRR1_MC_IFETCH_SLB_PARITY: - case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: - /* flush and reload SLBs for SLB errors. */ - flush_and_reload_slb(); - handled = 1; - break; - case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); - handled = 1; + uint64_t dsisr = regs->dsisr; + int handled = 0; + int found = 0; + int i; + + *addr = 0; + + for (i = 0; table[i].dsisr_value; i++) { + if (!(dsisr & table[i].dsisr_value)) + continue; + + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (mce_flush(MCE_FLUSH_SLB)) + handled = 1; + break; + case MCE_ERROR_TYPE_ERAT: + if (mce_flush(MCE_FLUSH_ERAT)) + handled = 1; + break; + case MCE_ERROR_TYPE_TLB: + if (mce_flush(MCE_FLUSH_TLB)) + handled = 1; + break; } - break; -#endif - default: - break; - } - return handled; -} - -static long mce_handle_ierror_p7(uint64_t srr1) -{ - long handled = 0; - - handled = mce_handle_common_ierror(srr1); + /* + * Attempt to handle multiple conditions, but only return + * one. Ensure uncorrectable errors are first in the table + * to match. + */ + if (found) + continue; + + /* now fill in mce_error_info */ + mce_err->error_type = table[i].error_type; + switch (table[i].error_type) { + case MCE_ERROR_TYPE_UE: + mce_err->u.ue_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_SLB: + mce_err->u.slb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_ERAT: + mce_err->u.erat_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_TLB: + mce_err->u.tlb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_USER: + mce_err->u.user_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_RA: + mce_err->u.ra_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_LINK: + mce_err->u.link_error_type = table[i].error_subtype; + break; + } + mce_err->severity = table[i].severity; + mce_err->initiator = table[i].initiator; + if (table[i].dar_valid) + *addr = regs->dar; -#ifdef CONFIG_PPC_STD_MMU_64 - if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { - flush_and_reload_slb(); - handled = 1; + found = 1; } -#endif - return handled; -} -static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1) -{ - switch (P7_SRR1_MC_IFETCH(srr1)) { - case P7_SRR1_MC_IFETCH_SLB_PARITY: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - break; - case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - break; - case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - break; - case P7_SRR1_MC_IFETCH_UE: - case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; - break; - case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = - MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; - break; - } -} + if (found) + return handled; -static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1) -{ - mce_get_common_ierror(mce_err, srr1); - if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; - } -} + mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; -static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr) -{ - if (dsisr & P7_DSISR_MC_UE) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; - } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = - MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; - } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; - } + return 0; } static long mce_handle_ue_error(struct pt_regs *regs) @@ -320,292 +530,42 @@ static long mce_handle_ue_error(struct pt_regs *regs) return handled; } -long __machine_check_early_realmode_p7(struct pt_regs *regs) +static long mce_handle_error(struct pt_regs *regs, + const struct mce_derror_table dtable[], + const struct mce_ierror_table itable[]) { - uint64_t srr1, nip, addr; - long handled = 1; - struct mce_error_info mce_error_info = { 0 }; - - mce_error_info.severity = MCE_SEV_ERROR_SYNC; - mce_error_info.initiator = MCE_INITIATOR_CPU; - - srr1 = regs->msr; - nip = regs->nip; + struct mce_error_info mce_err = { 0 }; + uint64_t addr; + uint64_t srr1 = regs->msr; + long handled; - /* - * Handle memory errors depending whether this was a load/store or - * ifetch exception. Also, populate the mce error_type and - * type-specific error_type from either SRR1 or DSISR, depending - * whether this was a load/store or ifetch exception - */ - if (P7_SRR1_MC_LOADSTORE(srr1)) { - handled = mce_handle_derror_p7(regs->dsisr); - mce_get_derror_p7(&mce_error_info, regs->dsisr); - addr = regs->dar; - } else { - handled = mce_handle_ierror_p7(srr1); - mce_get_ierror_p7(&mce_error_info, srr1); - addr = regs->nip; - } + if (SRR1_MC_LOADSTORE(srr1)) + handled = mce_handle_derror(regs, dtable, &mce_err, &addr); + else + handled = mce_handle_ierror(regs, itable, &mce_err, &addr); - /* Handle UE error. */ - if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) + if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) handled = mce_handle_ue_error(regs); - save_mce_event(regs, handled, &mce_error_info, nip, addr); - return handled; -} - -static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1) -{ - mce_get_common_ierror(mce_err, srr1); - if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } -} - -static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr) -{ - mce_get_derror_p7(mce_err, dsisr); - if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } -} - -static long mce_handle_ierror_p8(uint64_t srr1) -{ - long handled = 0; + save_mce_event(regs, handled, &mce_err, regs->nip, addr); - handled = mce_handle_common_ierror(srr1); - -#ifdef CONFIG_PPC_STD_MMU_64 - if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { - flush_and_reload_slb(); - handled = 1; - } -#endif return handled; } -static long mce_handle_derror_p8(uint64_t dsisr) -{ - return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS); -} - -long __machine_check_early_realmode_p8(struct pt_regs *regs) -{ - uint64_t srr1, nip, addr; - long handled = 1; - struct mce_error_info mce_error_info = { 0 }; - - mce_error_info.severity = MCE_SEV_ERROR_SYNC; - mce_error_info.initiator = MCE_INITIATOR_CPU; - - srr1 = regs->msr; - nip = regs->nip; - - if (P7_SRR1_MC_LOADSTORE(srr1)) { - handled = mce_handle_derror_p8(regs->dsisr); - mce_get_derror_p8(&mce_error_info, regs->dsisr); - addr = regs->dar; - } else { - handled = mce_handle_ierror_p8(srr1); - mce_get_ierror_p8(&mce_error_info, srr1); - addr = regs->nip; - } - - /* Handle UE error. */ - if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) - handled = mce_handle_ue_error(regs); - - save_mce_event(regs, handled, &mce_error_info, nip, addr); - return handled; -} - -static int mce_handle_derror_p9(struct pt_regs *regs) -{ - uint64_t dsisr = regs->dsisr; - - return mce_handle_flush_derrors(dsisr, - P9_DSISR_MC_SLB_PARITY_MFSLB | - P9_DSISR_MC_SLB_MULTIHIT_MFSLB, - - P9_DSISR_MC_TLB_MULTIHIT_MFTLB, - - P9_DSISR_MC_ERAT_MULTIHIT); -} - -static int mce_handle_ierror_p9(struct pt_regs *regs) +long __machine_check_early_realmode_p7(struct pt_regs *regs) { - uint64_t srr1 = regs->msr; + /* P7 DD1 leaves top bits of DSISR undefined */ + regs->dsisr &= 0x0000ffff; - switch (P9_SRR1_MC_IFETCH(srr1)) { - case P9_SRR1_MC_IFETCH_SLB_PARITY: - case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: - return mce_flush(MCE_FLUSH_SLB); - case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: - return mce_flush(MCE_FLUSH_TLB); - case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: - return mce_flush(MCE_FLUSH_ERAT); - default: - return 0; - } + return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); } -static void mce_get_derror_p9(struct pt_regs *regs, - struct mce_error_info *mce_err, uint64_t *addr) -{ - uint64_t dsisr = regs->dsisr; - - mce_err->severity = MCE_SEV_ERROR_SYNC; - mce_err->initiator = MCE_INITIATOR_CPU; - - if (dsisr & P9_DSISR_MC_USER_TLBIE) - *addr = regs->nip; - else - *addr = regs->dar; - - if (dsisr & P9_DSISR_MC_UE) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; - } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; - } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) { - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT; - } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) { - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT; - } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) { - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - } else if (dsisr & P9_DSISR_MC_USER_TLBIE) { - mce_err->error_type = MCE_ERROR_TYPE_USER; - mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE; - } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - } else if (dsisr & P9_DSISR_MC_RA_LOAD) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD; - } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE; - } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; - } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN; - } -} - -static void mce_get_ierror_p9(struct pt_regs *regs, - struct mce_error_info *mce_err, uint64_t *addr) +long __machine_check_early_realmode_p8(struct pt_regs *regs) { - uint64_t srr1 = regs->msr; - - switch (P9_SRR1_MC_IFETCH(srr1)) { - case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: - case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: - mce_err->severity = MCE_SEV_FATAL; - break; - default: - mce_err->severity = MCE_SEV_ERROR_SYNC; - break; - } - - mce_err->initiator = MCE_INITIATOR_CPU; - - *addr = regs->nip; - - switch (P9_SRR1_MC_IFETCH(srr1)) { - case P9_SRR1_MC_IFETCH_UE: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; - break; - case P9_SRR1_MC_IFETCH_SLB_PARITY: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - break; - case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - break; - case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - break; - case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - break; - case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; - break; - case P9_SRR1_MC_IFETCH_LINK_TIMEOUT: - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT; - break; - case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT: - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT; - break; - case P9_SRR1_MC_IFETCH_RA: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH; - break; - case P9_SRR1_MC_IFETCH_RA_TABLEWALK: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH; - break; - case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_STORE; - break; - case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT; - break; - case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN; - break; - default: - break; - } + return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); } long __machine_check_early_realmode_p9(struct pt_regs *regs) { - uint64_t nip, addr; - long handled; - struct mce_error_info mce_error_info = { 0 }; - - nip = regs->nip; - - if (P9_SRR1_MC_LOADSTORE(regs->msr)) { - handled = mce_handle_derror_p9(regs); - mce_get_derror_p9(regs, &mce_error_info, &addr); - } else { - handled = mce_handle_ierror_p9(regs); - mce_get_ierror_p9(regs, &mce_error_info, &addr); - } - - /* Handle UE error. */ - if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) - handled = mce_handle_ue_error(regs); - - save_mce_event(regs, handled, &mce_error_info, nip, addr); - return handled; + return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 2282bf4e63cd..ec60ed0d4aad 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -243,10 +243,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 2. branch to optimized_callback() and emulate_step() */ - kprobe_lookup_name("optimized_callback", op_callback_addr); - kprobe_lookup_name("emulate_step", emulate_step_addr); + op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); + emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); if (!op_callback_addr || !emulate_step_addr) { - WARN(1, "kprobe_lookup_name() failed\n"); + WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); goto error; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index dfc479df9634..8d63627e067f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -245,3 +245,24 @@ void __init free_unused_pacas(void) free_lppacas(); } + +void copy_mm_to_paca(struct mm_struct *mm) +{ +#ifdef CONFIG_PPC_BOOK3S + mm_context_t *context = &mm->context; + + get_paca()->mm_ctx_id = context->id; +#ifdef CONFIG_PPC_MM_SLICES + VM_BUG_ON(!mm->context.addr_limit); + get_paca()->addr_limit = mm->context.addr_limit; + get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; + memcpy(&get_paca()->mm_ctx_high_slices_psize, + &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); +#else /* CONFIG_PPC_MM_SLICES */ + get_paca()->mm_ctx_user_psize = context->user_psize; + get_paca()->mm_ctx_sllp = context->sllp; +#endif +#else /* CONFIG_PPC_BOOK3S */ + return; +#endif +} diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f5d399e46193..d2f0afeae5a0 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -55,7 +55,6 @@ #include <asm/kexec.h> #include <asm/opal.h> #include <asm/fadump.h> -#include <asm/debug.h> #include <asm/epapr_hcalls.h> #include <asm/firmware.h> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 1c1b44ec7642..dd8a04f3053a 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -815,7 +815,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .virt_base = cpu_to_be32(0xffffffff), .virt_size = cpu_to_be32(0xffffffff), .load_base = cpu_to_be32(0xffffffff), - .min_rma = cpu_to_be32(256), /* 256MB min RMA */ + .min_rma = cpu_to_be32(512), /* 512MB min RMA */ .min_load = cpu_to_be32(0xffffffff), /* full client load */ .min_rma_percent = 0, /* min RMA percentage of total RAM */ .max_pft_size = 48, /* max log_2(hash table size) */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4697da895133..5c10b5925ac2 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -31,11 +31,11 @@ #include <linux/unistd.h> #include <linux/serial.h> #include <linux/serial_8250.h> -#include <linux/debugfs.h> #include <linux/percpu.h> #include <linux/memblock.h> #include <linux/of_platform.h> #include <linux/hugetlb.h> +#include <asm/debugfs.h> #include <asm/io.h> #include <asm/paca.h> #include <asm/prom.h> @@ -920,6 +920,15 @@ void __init setup_arch(char **cmdline_p) init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; + +#ifdef CONFIG_PPC_MM_SLICES +#ifdef CONFIG_PPC64 + init_mm.context.addr_limit = TASK_SIZE_128TB; +#else +#error "context.addr_limit not initialized." +#endif +#endif + #ifdef CONFIG_PPC_64K_PAGES init_mm.context.pte_frag = NULL; #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f997154dfc41..0d4dcaeaafcb 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -230,8 +230,8 @@ static void cpu_ready_for_interrupts(void) * If we are not in hypervisor mode the job is done once for * the whole partition in configure_exceptions(). */ - if (early_cpu_has_feature(CPU_FTR_HVMODE) && - early_cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) { unsigned long lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); } @@ -637,6 +637,11 @@ void __init emergency_stack_init(void) paca[i].emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 + /* emergency stack for NMI exception handling. */ + ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + klp_init_thread_info(ti); + paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; + /* emergency stack for machine check exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); klp_init_thread_info(ti); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index d68ed1f004a3..df2a41647d8e 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/kvm_ppc.h> +#include <asm/dbell.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/prom.h> @@ -86,8 +87,6 @@ volatile unsigned int cpu_callin_map[NR_CPUS]; int smt_enabled_at_boot = 1; -static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; - /* * Returns 1 if the specified cpu should be brought up during boot. * Used to inhibit booting threads if they've been disabled or @@ -158,32 +157,33 @@ static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t debug_ipi_action(int irq, void *data) +#ifdef CONFIG_NMI_IPI +static irqreturn_t nmi_ipi_action(int irq, void *data) { - if (crash_ipi_function_ptr) { - crash_ipi_function_ptr(get_irq_regs()); - return IRQ_HANDLED; - } - -#ifdef CONFIG_DEBUGGER - debugger_ipi(get_irq_regs()); -#endif /* CONFIG_DEBUGGER */ - + smp_handle_nmi_ipi(get_irq_regs()); return IRQ_HANDLED; } +#endif static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, - [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, +#ifdef CONFIG_NMI_IPI + [PPC_MSG_NMI_IPI] = nmi_ipi_action, +#endif }; +/* + * The NMI IPI is a fallback and not truly non-maskable. It is simpler + * than going through the call function infrastructure, and strongly + * serialized, so it is more appropriate for debugging. + */ const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", - [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", + [PPC_MSG_NMI_IPI] = "nmi ipi", }; /* optional function to request ipi, for controllers with >= 4 ipis */ @@ -191,14 +191,13 @@ int smp_request_message_ipi(int virq, int msg) { int err; - if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { + if (msg < 0 || msg > PPC_MSG_NMI_IPI) return -EINVAL; - } -#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE) - if (msg == PPC_MSG_DEBUGGER_BREAK) { +#ifndef CONFIG_NMI_IPI + if (msg == PPC_MSG_NMI_IPI) return 1; - } #endif + err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, smp_ipi_name[msg], NULL); @@ -211,17 +210,9 @@ int smp_request_message_ipi(int virq, int msg) #ifdef CONFIG_PPC_SMP_MUXED_IPI struct cpu_messages { long messages; /* current messages */ - unsigned long data; /* data for cause ipi */ }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); -void smp_muxed_ipi_set_data(int cpu, unsigned long data) -{ - struct cpu_messages *info = &per_cpu(ipi_message, cpu); - - info->data = data; -} - void smp_muxed_ipi_set_message(int cpu, int msg) { struct cpu_messages *info = &per_cpu(ipi_message, cpu); @@ -236,14 +227,13 @@ void smp_muxed_ipi_set_message(int cpu, int msg) void smp_muxed_ipi_message_pass(int cpu, int msg) { - struct cpu_messages *info = &per_cpu(ipi_message, cpu); - smp_muxed_ipi_set_message(cpu, msg); + /* * cause_ipi functions are required to include a full barrier * before doing whatever causes the IPI. */ - smp_ops->cause_ipi(cpu, info->data); + smp_ops->cause_ipi(cpu); } #ifdef __BIG_ENDIAN__ @@ -254,11 +244,18 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = this_cpu_ptr(&ipi_message); - unsigned long all; - mb(); /* order any irq clear */ + return smp_ipi_demux_relaxed(); +} + +/* sync-free variant. Callers should ensure synchronization */ +irqreturn_t smp_ipi_demux_relaxed(void) +{ + struct cpu_messages *info; + unsigned long all; + + info = this_cpu_ptr(&ipi_message); do { all = xchg(&info->messages, 0); #if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) @@ -278,8 +275,10 @@ irqreturn_t smp_ipi_demux(void) scheduler_ipi(); if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) tick_broadcast_ipi_handler(); - if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) - debug_ipi_action(0, NULL); +#ifdef CONFIG_NMI_IPI + if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) + nmi_ipi_action(0, NULL); +#endif } while (info->messages); return IRQ_HANDLED; @@ -316,6 +315,187 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } +#ifdef CONFIG_NMI_IPI + +/* + * "NMI IPI" system. + * + * NMI IPIs may not be recoverable, so should not be used as ongoing part of + * a running system. They can be used for crash, debug, halt/reboot, etc. + * + * NMI IPIs are globally single threaded. No more than one in progress at + * any time. + * + * The IPI call waits with interrupts disabled until all targets enter the + * NMI handler, then the call returns. + * + * No new NMI can be initiated until targets exit the handler. + * + * The IPI call may time out without all targets entering the NMI handler. + * In that case, there is some logic to recover (and ignore subsequent + * NMI interrupts that may eventually be raised), but the platform interrupt + * handler may not be able to distinguish this from other exception causes, + * which may cause a crash. + */ + +static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0); +static struct cpumask nmi_ipi_pending_mask; +static int nmi_ipi_busy_count = 0; +static void (*nmi_ipi_function)(struct pt_regs *) = NULL; + +static void nmi_ipi_lock_start(unsigned long *flags) +{ + raw_local_irq_save(*flags); + hard_irq_disable(); + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { + raw_local_irq_restore(*flags); + cpu_relax(); + raw_local_irq_save(*flags); + hard_irq_disable(); + } +} + +static void nmi_ipi_lock(void) +{ + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) + cpu_relax(); +} + +static void nmi_ipi_unlock(void) +{ + smp_mb(); + WARN_ON(atomic_read(&__nmi_ipi_lock) != 1); + atomic_set(&__nmi_ipi_lock, 0); +} + +static void nmi_ipi_unlock_end(unsigned long *flags) +{ + nmi_ipi_unlock(); + raw_local_irq_restore(*flags); +} + +/* + * Platform NMI handler calls this to ack + */ +int smp_handle_nmi_ipi(struct pt_regs *regs) +{ + void (*fn)(struct pt_regs *); + unsigned long flags; + int me = raw_smp_processor_id(); + int ret = 0; + + /* + * Unexpected NMIs are possible here because the interrupt may not + * be able to distinguish NMI IPIs from other types of NMIs, or + * because the caller may have timed out. + */ + nmi_ipi_lock_start(&flags); + if (!nmi_ipi_busy_count) + goto out; + if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask)) + goto out; + + fn = nmi_ipi_function; + if (!fn) + goto out; + + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); + nmi_ipi_busy_count++; + nmi_ipi_unlock(); + + ret = 1; + + fn(regs); + + nmi_ipi_lock(); + nmi_ipi_busy_count--; +out: + nmi_ipi_unlock_end(&flags); + + return ret; +} + +static void do_smp_send_nmi_ipi(int cpu) +{ + if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) + return; + + if (cpu >= 0) { + do_message_pass(cpu, PPC_MSG_NMI_IPI); + } else { + int c; + + for_each_online_cpu(c) { + if (c == raw_smp_processor_id()) + continue; + do_message_pass(c, PPC_MSG_NMI_IPI); + } + } +} + +/* + * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. + * - fn is the target callback function. + * - delay_us > 0 is the delay before giving up waiting for targets to + * enter the handler, == 0 specifies indefinite delay. + */ +static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +{ + unsigned long flags; + int me = raw_smp_processor_id(); + int ret = 1; + + BUG_ON(cpu == me); + BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS); + + if (unlikely(!smp_ops)) + return 0; + + /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */ + nmi_ipi_lock_start(&flags); + while (nmi_ipi_busy_count) { + nmi_ipi_unlock_end(&flags); + cpu_relax(); + nmi_ipi_lock_start(&flags); + } + + nmi_ipi_function = fn; + + if (cpu < 0) { + /* ALL_OTHERS */ + cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask); + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); + } else { + /* cpumask starts clear */ + cpumask_set_cpu(cpu, &nmi_ipi_pending_mask); + } + nmi_ipi_busy_count++; + nmi_ipi_unlock(); + + do_smp_send_nmi_ipi(cpu); + + while (!cpumask_empty(&nmi_ipi_pending_mask)) { + udelay(1); + if (delay_us) { + delay_us--; + if (!delay_us) + break; + } + } + + nmi_ipi_lock(); + if (!cpumask_empty(&nmi_ipi_pending_mask)) { + /* Could not gather all CPUs */ + ret = 0; + cpumask_clear(&nmi_ipi_pending_mask); + } + nmi_ipi_busy_count--; + nmi_ipi_unlock_end(&flags); + + return ret; +} +#endif /* CONFIG_NMI_IPI */ + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void tick_broadcast(const struct cpumask *mask) { @@ -326,29 +506,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) -void smp_send_debugger_break(void) +#ifdef CONFIG_DEBUGGER +void debugger_ipi_callback(struct pt_regs *regs) { - int cpu; - int me = raw_smp_processor_id(); - - if (unlikely(!smp_ops)) - return; + debugger_ipi(regs); +} - for_each_online_cpu(cpu) - if (cpu != me) - do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); +void smp_send_debugger_break(void) +{ + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000); } #endif #ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { - crash_ipi_function_ptr = crash_ipi_callback; - if (crash_ipi_callback) { - mb(); - smp_send_debugger_break(); - } + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); } #endif @@ -439,7 +612,21 @@ int generic_cpu_disable(void) #ifdef CONFIG_PPC64 vdso_data->processorCount--; #endif - migrate_irqs(); + /* Update affinity of all IRQs previously aimed at this CPU */ + irq_migrate_all_off_this_cpu(); + + /* + * Depending on the details of the interrupt controller, it's possible + * that one of the interrupts we just migrated away from this CPU is + * actually already pending on this CPU. If we leave it in that state + * the interrupt will never be EOI'ed, and will never fire again. So + * temporarily enable interrupts here, to allow any pending interrupt to + * be received (and EOI'ed), before we take this CPU offline. + */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); + return 0; } @@ -521,6 +708,16 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) cpu_idle_thread_init(cpu, tidle); + /* + * The platform might need to allocate resources prior to bringing + * up the CPU + */ + if (smp_ops->prepare_cpu) { + rc = smp_ops->prepare_cpu(cpu); + if (rc) + return rc; + } + /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 66711958493c..d534ed901538 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -59,7 +59,14 @@ EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - save_context_stack(trace, tsk->thread.ksp, tsk, 0); + unsigned long sp; + + if (tsk == current) + sp = current_stack_pointer(); + else + sp = tsk->thread.ksp; + + save_context_stack(trace, sp, tsk, 0); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c index 6ae9bd5086a4..0050b2d2ff7a 100644 --- a/arch/powerpc/kernel/swsusp.c +++ b/arch/powerpc/kernel/swsusp.c @@ -10,6 +10,7 @@ */ #include <linux/sched.h> +#include <linux/suspend.h> #include <asm/current.h> #include <asm/mmu_context.h> #include <asm/switch_to.h> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index de04c9fbb5cd..a877bf8269fe 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -42,11 +42,11 @@ #include <asm/unistd.h> #include <asm/asm-prototypes.h> -static inline unsigned long do_mmap2(unsigned long addr, size_t len, +static inline long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) { - unsigned long ret = -EINVAL; + long ret = -EINVAL; if (!arch_validate_prot(prot)) goto out; @@ -62,16 +62,16 @@ out: return ret; } -unsigned long sys_mmap2(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) { return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12); } -unsigned long sys_mmap(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t offset) +SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, offset) { return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c1fb255a60d6..4437c70c7c2b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -710,6 +710,10 @@ static int register_cpu_online(unsigned int cpu) struct device_attribute *attrs, *pmc_attrs; int i, nattrs; + /* For cpus present at boot a reference was already grabbed in register_cpu() */ + if (!s->of_node) + s->of_node = of_get_cpu_node(cpu, NULL); + #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_SMT)) device_create_file(s, &dev_attr_smt_snooze_delay); @@ -785,9 +789,9 @@ static int register_cpu_online(unsigned int cpu) return 0; } +#ifdef CONFIG_HOTPLUG_CPU static int unregister_cpu_online(unsigned int cpu) { -#ifdef CONFIG_HOTPLUG_CPU struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; struct device_attribute *attrs, *pmc_attrs; @@ -864,9 +868,13 @@ static int unregister_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_offline(cpu); -#endif /* CONFIG_HOTPLUG_CPU */ + of_node_put(s->of_node); + s->of_node = NULL; return 0; } +#else /* !CONFIG_HOTPLUG_CPU */ +#define unregister_cpu_online NULL +#endif #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE ssize_t arch_cpu_probe(const char *buf, size_t count) diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile new file mode 100644 index 000000000000..729dffc5f7bc --- /dev/null +++ b/arch/powerpc/kernel/trace/Makefile @@ -0,0 +1,29 @@ +# +# Makefile for the powerpc trace subsystem +# + +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +ifdef CONFIG_FUNCTION_TRACER +# do not trace tracer code +CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +endif + +obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o +ifdef CONFIG_MPROFILE_KERNEL +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o +else +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o +endif +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_TRACING) += trace_clock.o + +obj-$(CONFIG_PPC64) += $(obj64-y) +obj-$(CONFIG_PPC32) += $(obj32-y) + +# Disable GCOV & sanitizers in odd or sensitive code +GCOV_PROFILE_ftrace.o := n +UBSAN_SANITIZE_ftrace.o := n diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 5c9f50c1aa99..32509de6ce4c 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/list.h> +#include <asm/asm-prototypes.h> #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/ftrace.h> diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S new file mode 100644 index 000000000000..afef2c076282 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -0,0 +1,118 @@ +/* + * Split from entry_32.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/reg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/export.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) + /* + * It is required that _mcount on PPC32 must preserve the + * link register. But we have r0 to play with. We use r0 + * to push the return address back to the caller of mcount + * into the ctr register, restore the link register and + * then jump back using the ctr register. + */ + mflr r0 + mtctr r0 + lwz r0, 4(r1) + mtlr r0 + bctr + +_GLOBAL(ftrace_caller) + MCOUNT_SAVE_FRAME + /* r3 ends up with link register */ + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + MCOUNT_RESTORE_FRAME + /* old link register ends up in ctr reg */ + bctr +#else +_GLOBAL(mcount) +_GLOBAL(_mcount) + + MCOUNT_SAVE_FRAME + + subi r3, r3, MCOUNT_INSN_SIZE + LOAD_REG_ADDR(r5, ftrace_trace_function) + lwz r5,0(r5) + + mtctr r5 + bctrl + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + b ftrace_graph_caller +#endif + MCOUNT_RESTORE_FRAME + bctr +#endif +EXPORT_SYMBOL(_mcount) + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + /* load r4 with local address */ + lwz r4, 44(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* Grab the LR out of the caller stack frame */ + lwz r3,52(r1) + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + stw r3,52(r1) + + MCOUNT_RESTORE_FRAME + /* old link register ends up in ctr reg */ + bctr + +_GLOBAL(return_to_handler) + /* need to save return values */ + stwu r1, -32(r1) + stw r3, 20(r1) + stw r4, 16(r1) + stw r31, 12(r1) + mr r31, r1 + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + + lwz r3, 20(r1) + lwz r4, 16(r1) + lwz r31,12(r1) + lwz r1, 0(r1) + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S new file mode 100644 index 000000000000..e5ccea19821e --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64.S @@ -0,0 +1,85 @@ +/* + * Split from entry_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/export.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) +EXPORT_SYMBOL(_mcount) + mflr r12 + mtctr r12 + mtlr r0 + bctr + +#else /* CONFIG_DYNAMIC_FTRACE */ +_GLOBAL_TOC(_mcount) +EXPORT_SYMBOL(_mcount) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + + subi r3, r3, MCOUNT_INSN_SIZE + LOAD_REG_ADDR(r5,ftrace_trace_function) + ld r5,0(r5) + ld r5,0(r5) + mtctr r5 + bctrl + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + b ftrace_graph_caller +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 +_GLOBAL(ftrace_stub) + blr +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(return_to_handler) + /* need to save return values */ + std r4, -32(r1) + std r3, -24(r1) + /* save TOC */ + std r2, -16(r1) + std r31, -8(r1) + mr r31, r1 + stdu r1, -112(r1) + + /* + * We might be called from a module. + * Switch to our TOC to run inside the core kernel. + */ + ld r2, PACATOC(r13) + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + + ld r1, 0(r1) + ld r4, -32(r1) + ld r3, -24(r1) + ld r2, -16(r1) + ld r31, -8(r1) + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S new file mode 100644 index 000000000000..7c933a99f5d5 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -0,0 +1,272 @@ +/* + * Split from ftrace_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/export.h> +#include <asm/thread_info.h> +#include <asm/bug.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * + * ftrace_caller() is the function that replaces _mcount() when ftrace is + * active. + * + * We arrive here after a function A calls function B, and we are the trace + * function for B. When we enter r1 points to A's stack frame, B has not yet + * had a chance to allocate one yet. + * + * Additionally r2 may point either to the TOC for A, or B, depending on + * whether B did a TOC setup sequence before calling us. + * + * On entry the LR points back to the _mcount() call site, and r0 holds the + * saved LR as it was on entry to B, ie. the original return address at the + * call site in A. + * + * Our job is to save the register state into a struct pt_regs (on the stack) + * and then arrange for the ftrace function to be called. + */ +_GLOBAL(ftrace_caller) + /* Save the original return address in A's stack frame */ + std r0,LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stdu r1,-SWITCH_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + SAVE_8GPRS(0,r1) + SAVE_8GPRS(8,r1) + SAVE_8GPRS(16,r1) + SAVE_8GPRS(24,r1) + + /* Load special regs for save below */ + mfmsr r8 + mfctr r9 + mfxer r10 + mfcr r11 + + /* Get the _mcount() call site out of LR */ + mflr r7 + /* Save it as pt_regs->nip */ + std r7, _NIP(r1) + /* Save the read LR in pt_regs->link */ + std r0, _LINK(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2,PACATOC(r13) /* get kernel TOC in r2 */ + + addis r3,r2,function_trace_op@toc@ha + addi r3,r3,function_trace_op@toc@l + ld r5,0(r3) + +#ifdef CONFIG_LIVEPATCH + mr r14,r7 /* remember old NIP */ +#endif + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Save special regs */ + std r8, _MSR(r1) + std r9, _CTR(r1) + std r10, _XER(r1) + std r11, _CCR(r1) + + /* Load &pt_regs in r6 for call below */ + addi r6, r1 ,STACK_FRAME_OVERHEAD + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + /* Load ctr with the possibly modified NIP */ + ld r3, _NIP(r1) + mtctr r3 +#ifdef CONFIG_LIVEPATCH + cmpd r14,r3 /* has NIP been altered? */ +#endif + + /* Restore gprs */ + REST_8GPRS(0,r1) + REST_8GPRS(8,r1) + REST_8GPRS(16,r1) + REST_8GPRS(24,r1) + + /* Restore possibly modified LR */ + ld r0, _LINK(r1) + mtlr r0 + + /* Restore callee's TOC */ + ld r2, 24(r1) + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE + +#ifdef CONFIG_LIVEPATCH + /* Based on the cmpd above, if the NIP was altered handle livepatch */ + bne- livepatch_handler +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + + bctr /* jump after _mcount site */ + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_LIVEPATCH + /* + * This function runs in the mcount context, between two functions. As + * such it can only clobber registers which are volatile and used in + * function linkage. + * + * We get here when a function A, calls another function B, but B has + * been live patched with a new function C. + * + * On entry: + * - we have no stack frame and can not allocate one + * - LR points back to the original caller (in A) + * - CTR holds the new NIP in C + * - r0 & r12 are free + * + * r0 can't be used as the base register for a DS-form load or store, so + * we temporarily shuffle r1 (stack pointer) into r0 and then put it back. + */ +livepatch_handler: + CURRENT_THREAD_INFO(r12, r1) + + /* Save stack pointer into r0 */ + mr r0, r1 + + /* Allocate 3 x 8 bytes */ + ld r1, TI_livepatch_sp(r12) + addi r1, r1, 24 + std r1, TI_livepatch_sp(r12) + + /* Save toc & real LR on livepatch stack */ + std r2, -24(r1) + mflr r12 + std r12, -16(r1) + + /* Store stack end marker */ + lis r12, STACK_END_MAGIC@h + ori r12, r12, STACK_END_MAGIC@l + std r12, -8(r1) + + /* Restore real stack pointer */ + mr r1, r0 + + /* Put ctr in r12 for global entry and branch there */ + mfctr r12 + bctrl + + /* + * Now we are returning from the patched function to the original + * caller A. We are free to use r0 and r12, and we can use r2 until we + * restore it. + */ + + CURRENT_THREAD_INFO(r12, r1) + + /* Save stack pointer into r0 */ + mr r0, r1 + + ld r1, TI_livepatch_sp(r12) + + /* Check stack marker hasn't been trashed */ + lis r2, STACK_END_MAGIC@h + ori r2, r2, STACK_END_MAGIC@l + ld r12, -8(r1) +1: tdne r12, r2 + EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 + + /* Restore LR & toc from livepatch stack */ + ld r12, -16(r1) + mtlr r12 + ld r2, -24(r1) + + /* Pop livepatch stack frame */ + CURRENT_THREAD_INFO(r12, r0) + subi r1, r1, 24 + std r1, TI_livepatch_sp(r12) + + /* Restore real stack pointer */ + mr r1, r0 + + /* Return to original caller of live patched function */ + blr +#endif /* CONFIG_LIVEPATCH */ + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + stdu r1, -112(r1) + /* with -mprofile-kernel, parameter regs are still alive at _mcount */ + std r10, 104(r1) + std r9, 96(r1) + std r8, 88(r1) + std r7, 80(r1) + std r6, 72(r1) + std r5, 64(r1) + std r4, 56(r1) + std r3, 48(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2, PACATOC(r13) /* get kernel TOC in r2 */ + + mfctr r4 /* ftrace_caller has moved local addr here */ + std r4, 40(r1) + mflr r3 /* ftrace_caller has restored LR from stack */ + subi r4, r4, MCOUNT_INSN_SIZE + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR to this. + */ + mtlr r3 + + ld r0, 40(r1) + mtctr r0 + ld r10, 104(r1) + ld r9, 96(r1) + ld r8, 88(r1) + ld r7, 80(r1) + ld r6, 72(r1) + ld r5, 64(r1) + ld r4, 56(r1) + ld r3, 48(r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + addi r1, r1, 112 + mflr r0 + std r0, LRSAVE(r1) + bctr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S new file mode 100644 index 000000000000..f095358da96e --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S @@ -0,0 +1,68 @@ +/* + * Split from ftrace_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/export.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL_TOC(ftrace_caller) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + +_GLOBAL(ftrace_stub) + blr +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + /* load r4 with local address */ + ld r4, 128(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* Grab the LR out of the caller stack frame */ + ld r11, 112(r1) + ld r3, 16(r11) + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + ld r11, 112(r1) + std r3, 16(r11) + + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace_clock.c b/arch/powerpc/kernel/trace/trace_clock.c index 49170690946d..49170690946d 100644 --- a/arch/powerpc/kernel/trace_clock.c +++ b/arch/powerpc/kernel/trace/trace_clock.c diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index ff365f9de27a..d4e545d27ef9 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -35,13 +35,13 @@ #include <linux/backlight.h> #include <linux/bug.h> #include <linux/kdebug.h> -#include <linux/debugfs.h> #include <linux/ratelimit.h> #include <linux/context_tracking.h> #include <asm/emulated_ops.h> #include <asm/pgtable.h> #include <linux/uaccess.h> +#include <asm/debugfs.h> #include <asm/io.h> #include <asm/machdep.h> #include <asm/rtas.h> @@ -279,18 +279,35 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) void system_reset_exception(struct pt_regs *regs) { + /* + * Avoid crashes in case of nested NMI exceptions. Recoverability + * is determined by RI and in_nmi + */ + bool nested = in_nmi(); + if (!nested) + nmi_enter(); + /* See if any machine dependent calls */ if (ppc_md.system_reset_exception) { if (ppc_md.system_reset_exception(regs)) - return; + goto out; } die("System Reset", regs, SIGABRT); +out: +#ifdef CONFIG_PPC_BOOK3S_64 + BUG_ON(get_paca()->in_nmi == 0); + if (get_paca()->in_nmi > 1) + panic("Unrecoverable nested System Reset"); +#endif /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable System Reset"); + if (!nested) + nmi_exit(); + /* What should we do here? We could issue a shutdown or hard reset. */ } @@ -306,8 +323,6 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); - add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; @@ -741,6 +756,8 @@ void machine_check_exception(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU * one returns a positive number. However there is existing code @@ -1440,6 +1457,8 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", + [FSCR_MSGP_LG] = "MSGP", + [FSCR_SCV_LG] = "SCV", }; char *facility = "unknown"; u64 value; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 1c24c894c908..2f793be3d2b1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -77,6 +77,8 @@ SECTIONS #endif } :kernel + __head_end = .; + /* * If the build dies here, it's likely code in head_64.S is referencing * labels it can't reach, and the linker inserting stubs without the |