summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile12
-rw-r--r--arch/powerpc/kernel/asm-offsets.c9
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S36
-rw-r--r--arch/powerpc/kernel/dbell.c58
-rw-r--r--arch/powerpc/kernel/eeh.c3
-rw-r--r--arch/powerpc/kernel/eeh_driver.c55
-rw-r--r--arch/powerpc/kernel/entry_32.S107
-rw-r--r--arch/powerpc/kernel/entry_64.S380
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S186
-rw-r--r--arch/powerpc/kernel/fadump.c36
-rw-r--r--arch/powerpc/kernel/head_32.S16
-rw-r--r--arch/powerpc/kernel/head_64.S3
-rw-r--r--arch/powerpc/kernel/idle_book3s.S285
-rw-r--r--arch/powerpc/kernel/iommu.c54
-rw-r--r--arch/powerpc/kernel/irq.c41
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c104
-rw-r--r--arch/powerpc/kernel/kprobes.c214
-rw-r--r--arch/powerpc/kernel/mce.c18
-rw-r--r--arch/powerpc/kernel/mce_power.c780
-rw-r--r--arch/powerpc/kernel/optprobes.c6
-rw-r--r--arch/powerpc/kernel/paca.c21
-rw-r--r--arch/powerpc/kernel/prom.c1
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/kernel/setup-common.c11
-rw-r--r--arch/powerpc/kernel/setup_64.c9
-rw-r--r--arch/powerpc/kernel/smp.c299
-rw-r--r--arch/powerpc/kernel/stacktrace.c9
-rw-r--r--arch/powerpc/kernel/swsusp.c1
-rw-r--r--arch/powerpc/kernel/syscalls.c16
-rw-r--r--arch/powerpc/kernel/sysfs.c12
-rw-r--r--arch/powerpc/kernel/trace/Makefile29
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c (renamed from arch/powerpc/kernel/ftrace.c)1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S118
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64.S85
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S272
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg.S68
-rw-r--r--arch/powerpc/kernel/trace/trace_clock.c (renamed from arch/powerpc/kernel/trace_clock.c)0
-rw-r--r--arch/powerpc/kernel/traps.c27
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
39 files changed, 2090 insertions, 1296 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 811f441a125f..b9db46ae545b 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -25,8 +25,6 @@ CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-# do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
# timers used by tracing
CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
endif
@@ -97,6 +95,7 @@ obj-$(CONFIG_BOOTX_TEXT) += btext.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -118,10 +117,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o
-obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
-obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
-obj-$(CONFIG_TRACING) += trace_clock.o
+obj-y += trace/
ifneq ($(CONFIG_PPC_INDIRECT_PIO),y)
obj-y += iomap.o
@@ -142,14 +138,14 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
# Disable GCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
UBSAN_SANITIZE_prom_init.o := n
-GCOV_PROFILE_ftrace.o := n
-UBSAN_SANITIZE_ftrace.o := n
GCOV_PROFILE_machine_kexec_64.o := n
UBSAN_SANITIZE_machine_kexec_64.o := n
GCOV_PROFILE_machine_kexec_32.o := n
UBSAN_SANITIZE_machine_kexec_32.o := n
GCOV_PROFILE_kprobes.o := n
UBSAN_SANITIZE_kprobes.o := n
+GCOV_PROFILE_kprobes-ftrace.o := n
+UBSAN_SANITIZE_kprobes-ftrace.o := n
UBSAN_SANITIZE_vdso.o := n
extra-$(CONFIG_PPC_FPU) += fpu.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4367e7df51a1..439c257dec4a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,6 +185,7 @@ int main(void)
#ifdef CONFIG_PPC_MM_SLICES
OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
+ DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit));
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
#endif /* CONFIG_PPC_MM_SLICES */
#endif
@@ -219,6 +220,7 @@ int main(void)
OFFSET(PACA_EXGEN, paca_struct, exgen);
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXSLB, paca_struct, exslb);
+ OFFSET(PACA_EXNMI, paca_struct, exnmi);
OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
@@ -232,7 +234,9 @@ int main(void)
OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
+ OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
OFFSET(PACA_IN_MCE, paca_struct, in_mce);
+ OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
#endif
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
@@ -399,8 +403,8 @@ int main(void)
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
#endif
-#ifdef MAX_PGD_TABLE_SIZE
- DEFINE(PGD_TABLE_SIZE, MAX_PGD_TABLE_SIZE);
+#ifdef CONFIG_PPC_BOOK3S_64
+ DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE)));
#else
DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
#endif
@@ -727,6 +731,7 @@ int main(void)
OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
+ OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 7fe8c79e6937..10cb2896b2ae 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -29,7 +29,8 @@ _GLOBAL(__setup_cpu_power7)
li r0,0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- bl __init_LPCR
+ li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
+ bl __init_LPCR_ISA206
bl __init_tlb_power7
mtlr r11
blr
@@ -42,7 +43,8 @@ _GLOBAL(__restore_cpu_power7)
li r0,0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- bl __init_LPCR
+ li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
+ bl __init_LPCR_ISA206
bl __init_tlb_power7
mtlr r11
blr
@@ -59,7 +61,8 @@ _GLOBAL(__setup_cpu_power8)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
- bl __init_LPCR
+ li r4,0 /* LPES = 0 */
+ bl __init_LPCR_ISA206
bl __init_HFSCR
bl __init_tlb_power8
bl __init_PMU_HV
@@ -80,7 +83,8 @@ _GLOBAL(__restore_cpu_power8)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
- bl __init_LPCR
+ li r4,0 /* LPES = 0 */
+ bl __init_LPCR_ISA206
bl __init_HFSCR
bl __init_tlb_power8
bl __init_PMU_HV
@@ -99,11 +103,12 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
+ LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
andc r3, r3, r4
- bl __init_LPCR
+ li r4,0 /* LPES = 0 */
+ bl __init_LPCR_ISA300
bl __init_HFSCR
bl __init_tlb_power9
bl __init_PMU_HV
@@ -122,11 +127,12 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
+ LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
andc r3, r3, r4
- bl __init_LPCR
+ li r4,0 /* LPES = 0 */
+ bl __init_LPCR_ISA300
bl __init_HFSCR
bl __init_tlb_power9
bl __init_PMU_HV
@@ -144,9 +150,9 @@ __init_hvmode_206:
std r5,CPU_SPEC_FEATURES(r4)
blr
-__init_LPCR:
+__init_LPCR_ISA206:
/* Setup a sane LPCR:
- * Called with initial LPCR in R3
+ * Called with initial LPCR in R3 and desired LPES 2-bit value in R4
*
* LPES = 0b01 (HSRR0/1 used for 0x500)
* PECE = 0b111
@@ -157,16 +163,18 @@ __init_LPCR:
*
* Other bits untouched for now
*/
- li r5,1
- rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
+ li r5,0x10
+ rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
+
+ /* POWER9 has no VRMASD */
+__init_LPCR_ISA300:
+ rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
li r5,4
rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3
clrrdi r3,r3,1 /* clear HDICE */
li r5,4
rldimi r3,r5, LPCR_VC_SH, 0
- li r5,0x10
- rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
mtspr SPRN_LPCR,r3
isync
blr
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 2128f3a96c32..b6fe883b1016 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -20,18 +20,60 @@
#include <asm/kvm_ppc.h>
#ifdef CONFIG_SMP
-void doorbell_setup_this_cpu(void)
+
+/*
+ * Doorbells must only be used if CPU_FTR_DBELL is available.
+ * msgsnd is used in HV, and msgsndp is used in !HV.
+ *
+ * These should be used by platform code that is aware of restrictions.
+ * Other arch code should use ->cause_ipi.
+ *
+ * doorbell_global_ipi() sends a dbell to any target CPU.
+ * Must be used only by architectures that address msgsnd target
+ * by PIR/get_hard_smp_processor_id.
+ */
+void doorbell_global_ipi(int cpu)
{
- unsigned long tag = mfspr(SPRN_DOORBELL_CPUTAG) & PPC_DBELL_TAG_MASK;
+ u32 tag = get_hard_smp_processor_id(cpu);
- smp_muxed_ipi_set_data(smp_processor_id(), tag);
+ kvmppc_set_host_ipi(cpu, 1);
+ /* Order previous accesses vs. msgsnd, which is treated as a store */
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
}
-void doorbell_cause_ipi(int cpu, unsigned long data)
+/*
+ * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
+ * Must be used only by architectures that address msgsnd target
+ * by TIR/cpu_thread_in_core.
+ */
+void doorbell_core_ipi(int cpu)
{
+ u32 tag = cpu_thread_in_core(cpu);
+
+ kvmppc_set_host_ipi(cpu, 1);
/* Order previous accesses vs. msgsnd, which is treated as a store */
- mb();
- ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, data);
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * Attempt to cause a core doorbell if destination is on the same core.
+ * Returns 1 on success, 0 on failure.
+ */
+int doorbell_try_core_ipi(int cpu)
+{
+ int this_cpu = get_cpu();
+ int ret = 0;
+
+ if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
+ doorbell_core_ipi(cpu);
+ ret = 1;
+ }
+
+ put_cpu();
+
+ return ret;
}
void doorbell_exception(struct pt_regs *regs)
@@ -40,12 +82,14 @@ void doorbell_exception(struct pt_regs *regs)
irq_enter();
+ ppc_msgsync();
+
may_hard_irq_enable();
kvmppc_set_host_ipi(smp_processor_id(), 0);
__this_cpu_inc(irq_stat.doorbell_irqs);
- smp_ipi_demux();
+ smp_ipi_demux_relaxed(); /* already performed the barrier */
irq_exit();
set_irq_regs(old_regs);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9de7f79e702b..63992b2d8e15 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -22,7 +22,6 @@
*/
#include <linux/delay.h>
-#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -37,7 +36,7 @@
#include <linux/of.h>
#include <linux/atomic.h>
-#include <asm/debug.h>
+#include <asm/debugfs.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index b94887165a10..c405c79e50cd 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -724,7 +724,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
#define MAX_WAIT_FOR_RECOVERY 300
-static void eeh_handle_normal_event(struct eeh_pe *pe)
+/**
+ * eeh_handle_normal_event - Handle EEH events on a specific PE
+ * @pe: EEH PE
+ *
+ * Attempts to recover the given PE. If recovery fails or the PE has failed
+ * too many times, remove the PE.
+ *
+ * Returns true if @pe should no longer be used, else false.
+ */
+static bool eeh_handle_normal_event(struct eeh_pe *pe)
{
struct pci_bus *frozen_bus;
struct eeh_dev *edev, *tmp;
@@ -736,13 +745,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
if (!frozen_bus) {
pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
- return;
+ return false;
}
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
- if (pe->freeze_count > eeh_max_freezes)
- goto excess_failures;
+ if (pe->freeze_count > eeh_max_freezes) {
+ pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n"
+ "last hour and has been permanently disabled.\n",
+ pe->phb->global_number, pe->addr,
+ pe->freeze_count);
+ goto hard_fail;
+ }
pr_warn("EEH: This PCI device has failed %d times in the last hour\n",
pe->freeze_count);
@@ -870,27 +884,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Notify device driver to resume\n");
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
- return;
+ return false;
-excess_failures:
+hard_fail:
/*
* About 90% of all real-life EEH failures in the field
* are due to poorly seated PCI cards. Only 10% or so are
* due to actual, failed cards.
*/
- pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n"
- "last hour and has been permanently disabled.\n"
- "Please try reseating or replacing it.\n",
- pe->phb->global_number, pe->addr,
- pe->freeze_count);
- goto perm_error;
-
-hard_fail:
pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
"Please try reseating or replacing it\n",
pe->phb->global_number, pe->addr);
-perm_error:
eeh_slot_error_detail(pe, EEH_LOG_PERM);
/* Notify all devices that they're about to go down. */
@@ -915,10 +920,21 @@ perm_error:
pci_lock_rescan_remove();
pci_hp_remove_devices(frozen_bus);
pci_unlock_rescan_remove();
+
+ /* The passed PE should no longer be used */
+ return true;
}
}
+ return false;
}
+/**
+ * eeh_handle_special_event - Handle EEH events without a specific failing PE
+ *
+ * Called when an EEH event is detected but can't be narrowed down to a
+ * specific PE. Iterates through possible failures and handles them as
+ * necessary.
+ */
static void eeh_handle_special_event(void)
{
struct eeh_pe *pe, *phb_pe;
@@ -982,7 +998,14 @@ static void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
- eeh_handle_normal_event(pe);
+ /*
+ * eeh_handle_normal_event() can make the PE stale if it
+ * determines that the PE cannot possibly be recovered.
+ * Don't modify the PE state if that's the case.
+ */
+ if (eeh_handle_normal_event(pe))
+ continue;
+
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
} else {
pci_lock_rescan_remove();
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index a38600949f3a..8587059ad848 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -31,7 +31,6 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
-#include <asm/ftrace.h>
#include <asm/ptrace.h>
#include <asm/export.h>
@@ -1315,109 +1314,3 @@ machine_check_in_rtas:
/* XXX load up BATs and panic */
#endif /* CONFIG_PPC_RTAS */
-
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
- /*
- * It is required that _mcount on PPC32 must preserve the
- * link register. But we have r0 to play with. We use r0
- * to push the return address back to the caller of mcount
- * into the ctr register, restore the link register and
- * then jump back using the ctr register.
- */
- mflr r0
- mtctr r0
- lwz r0, 4(r1)
- mtlr r0
- bctr
-
-_GLOBAL(ftrace_caller)
- MCOUNT_SAVE_FRAME
- /* r3 ends up with link register */
- subi r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
- bctr
-#else
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-
- MCOUNT_SAVE_FRAME
-
- subi r3, r3, MCOUNT_INSN_SIZE
- LOAD_REG_ADDR(r5, ftrace_trace_function)
- lwz r5,0(r5)
-
- mtctr r5
- bctrl
- nop
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- b ftrace_graph_caller
-#endif
- MCOUNT_RESTORE_FRAME
- bctr
-#endif
-EXPORT_SYMBOL(_mcount)
-
-_GLOBAL(ftrace_stub)
- blr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
- /* load r4 with local address */
- lwz r4, 44(r1)
- subi r4, r4, MCOUNT_INSN_SIZE
-
- /* Grab the LR out of the caller stack frame */
- lwz r3,52(r1)
-
- bl prepare_ftrace_return
- nop
-
- /*
- * prepare_ftrace_return gives us the address we divert to.
- * Change the LR in the callers stack frame to this.
- */
- stw r3,52(r1)
-
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
- bctr
-
-_GLOBAL(return_to_handler)
- /* need to save return values */
- stwu r1, -32(r1)
- stw r3, 20(r1)
- stw r4, 16(r1)
- stw r31, 12(r1)
- mr r31, r1
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
- lwz r3, 20(r1)
- lwz r4, 16(r1)
- lwz r31,12(r1)
- lwz r1, 0(r1)
-
- /* Jump back to real return address */
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 767ef6d68c9e..bfbad08a1207 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -20,7 +20,6 @@
#include <linux/errno.h>
#include <linux/err.h>
-#include <linux/magic.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/page.h>
@@ -33,7 +32,6 @@
#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/irqflags.h>
-#include <asm/ftrace.h>
#include <asm/hw_irq.h>
#include <asm/context_tracking.h>
#include <asm/tm.h>
@@ -1173,381 +1171,3 @@ _GLOBAL(enter_prom)
ld r0,16(r1)
mtlr r0
blr
-
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-EXPORT_SYMBOL(_mcount)
- mflr r12
- mtctr r12
- mtlr r0
- bctr
-
-#ifndef CC_USING_MPROFILE_KERNEL
-_GLOBAL_TOC(ftrace_caller)
- /* Taken from output of objdump from lib64/glibc */
- mflr r3
- ld r11, 0(r1)
- stdu r1, -112(r1)
- std r3, 128(r1)
- ld r4, 16(r11)
- subi r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
-
-#else /* CC_USING_MPROFILE_KERNEL */
-/*
- *
- * ftrace_caller() is the function that replaces _mcount() when ftrace is
- * active.
- *
- * We arrive here after a function A calls function B, and we are the trace
- * function for B. When we enter r1 points to A's stack frame, B has not yet
- * had a chance to allocate one yet.
- *
- * Additionally r2 may point either to the TOC for A, or B, depending on
- * whether B did a TOC setup sequence before calling us.
- *
- * On entry the LR points back to the _mcount() call site, and r0 holds the
- * saved LR as it was on entry to B, ie. the original return address at the
- * call site in A.
- *
- * Our job is to save the register state into a struct pt_regs (on the stack)
- * and then arrange for the ftrace function to be called.
- */
-_GLOBAL(ftrace_caller)
- /* Save the original return address in A's stack frame */
- std r0,LRSAVE(r1)
-
- /* Create our stack frame + pt_regs */
- stdu r1,-SWITCH_FRAME_SIZE(r1)
-
- /* Save all gprs to pt_regs */
- SAVE_8GPRS(0,r1)
- SAVE_8GPRS(8,r1)
- SAVE_8GPRS(16,r1)
- SAVE_8GPRS(24,r1)
-
- /* Load special regs for save below */
- mfmsr r8
- mfctr r9
- mfxer r10
- mfcr r11
-
- /* Get the _mcount() call site out of LR */
- mflr r7
- /* Save it as pt_regs->nip & pt_regs->link */
- std r7, _NIP(r1)
- std r7, _LINK(r1)
-
- /* Save callee's TOC in the ABI compliant location */
- std r2, 24(r1)
- ld r2,PACATOC(r13) /* get kernel TOC in r2 */
-
- addis r3,r2,function_trace_op@toc@ha
- addi r3,r3,function_trace_op@toc@l
- ld r5,0(r3)
-
-#ifdef CONFIG_LIVEPATCH
- mr r14,r7 /* remember old NIP */
-#endif
- /* Calculate ip from nip-4 into r3 for call below */
- subi r3, r7, MCOUNT_INSN_SIZE
-
- /* Put the original return address in r4 as parent_ip */
- mr r4, r0
-
- /* Save special regs */
- std r8, _MSR(r1)
- std r9, _CTR(r1)
- std r10, _XER(r1)
- std r11, _CCR(r1)
-
- /* Load &pt_regs in r6 for call below */
- addi r6, r1 ,STACK_FRAME_OVERHEAD
-
- /* ftrace_call(r3, r4, r5, r6) */
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-
- /* Load ctr with the possibly modified NIP */
- ld r3, _NIP(r1)
- mtctr r3
-#ifdef CONFIG_LIVEPATCH
- cmpd r14,r3 /* has NIP been altered? */
-#endif
-
- /* Restore gprs */
- REST_8GPRS(0,r1)
- REST_8GPRS(8,r1)
- REST_8GPRS(16,r1)
- REST_8GPRS(24,r1)
-
- /* Restore callee's TOC */
- ld r2, 24(r1)
-
- /* Pop our stack frame */
- addi r1, r1, SWITCH_FRAME_SIZE
-
- /* Restore original LR for return to B */
- ld r0, LRSAVE(r1)
- mtlr r0
-
-#ifdef CONFIG_LIVEPATCH
- /* Based on the cmpd above, if the NIP was altered handle livepatch */
- bne- livepatch_handler
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- stdu r1, -112(r1)
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
- addi r1, r1, 112
-#endif
-
- ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */
- mtlr r0
- bctr /* jump after _mcount site */
-#endif /* CC_USING_MPROFILE_KERNEL */
-
-_GLOBAL(ftrace_stub)
- blr
-
-#ifdef CONFIG_LIVEPATCH
- /*
- * This function runs in the mcount context, between two functions. As
- * such it can only clobber registers which are volatile and used in
- * function linkage.
- *
- * We get here when a function A, calls another function B, but B has
- * been live patched with a new function C.
- *
- * On entry:
- * - we have no stack frame and can not allocate one
- * - LR points back to the original caller (in A)
- * - CTR holds the new NIP in C
- * - r0 & r12 are free
- *
- * r0 can't be used as the base register for a DS-form load or store, so
- * we temporarily shuffle r1 (stack pointer) into r0 and then put it back.
- */
-livepatch_handler:
- CURRENT_THREAD_INFO(r12, r1)
-
- /* Save stack pointer into r0 */
- mr r0, r1
-
- /* Allocate 3 x 8 bytes */
- ld r1, TI_livepatch_sp(r12)
- addi r1, r1, 24
- std r1, TI_livepatch_sp(r12)
-
- /* Save toc & real LR on livepatch stack */
- std r2, -24(r1)
- mflr r12
- std r12, -16(r1)
-
- /* Store stack end marker */
- lis r12, STACK_END_MAGIC@h
- ori r12, r12, STACK_END_MAGIC@l
- std r12, -8(r1)
-
- /* Restore real stack pointer */
- mr r1, r0
-
- /* Put ctr in r12 for global entry and branch there */
- mfctr r12
- bctrl
-
- /*
- * Now we are returning from the patched function to the original
- * caller A. We are free to use r0 and r12, and we can use r2 until we
- * restore it.
- */
-
- CURRENT_THREAD_INFO(r12, r1)
-
- /* Save stack pointer into r0 */
- mr r0, r1
-
- ld r1, TI_livepatch_sp(r12)
-
- /* Check stack marker hasn't been trashed */
- lis r2, STACK_END_MAGIC@h
- ori r2, r2, STACK_END_MAGIC@l
- ld r12, -8(r1)
-1: tdne r12, r2
- EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
-
- /* Restore LR & toc from livepatch stack */
- ld r12, -16(r1)
- mtlr r12
- ld r2, -24(r1)
-
- /* Pop livepatch stack frame */
- CURRENT_THREAD_INFO(r12, r0)
- subi r1, r1, 24
- std r1, TI_livepatch_sp(r12)
-
- /* Restore real stack pointer */
- mr r1, r0
-
- /* Return to original caller of live patched function */
- blr
-#endif
-
-
-#else
-_GLOBAL_TOC(_mcount)
-EXPORT_SYMBOL(_mcount)
- /* Taken from output of objdump from lib64/glibc */
- mflr r3
- ld r11, 0(r1)
- stdu r1, -112(r1)
- std r3, 128(r1)
- ld r4, 16(r11)
-
- subi r3, r3, MCOUNT_INSN_SIZE
- LOAD_REG_ADDR(r5,ftrace_trace_function)
- ld r5,0(r5)
- ld r5,0(r5)
- mtctr r5
- bctrl
- nop
-
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- b ftrace_graph_caller
-#endif
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
-_GLOBAL(ftrace_stub)
- blr
-
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-#ifndef CC_USING_MPROFILE_KERNEL
-_GLOBAL(ftrace_graph_caller)
- /* load r4 with local address */
- ld r4, 128(r1)
- subi r4, r4, MCOUNT_INSN_SIZE
-
- /* Grab the LR out of the caller stack frame */
- ld r11, 112(r1)
- ld r3, 16(r11)
-
- bl prepare_ftrace_return
- nop
-
- /*
- * prepare_ftrace_return gives us the address we divert to.
- * Change the LR in the callers stack frame to this.
- */
- ld r11, 112(r1)
- std r3, 16(r11)
-
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
- blr
-
-#else /* CC_USING_MPROFILE_KERNEL */
-_GLOBAL(ftrace_graph_caller)
- /* with -mprofile-kernel, parameter regs are still alive at _mcount */
- std r10, 104(r1)
- std r9, 96(r1)
- std r8, 88(r1)
- std r7, 80(r1)
- std r6, 72(r1)
- std r5, 64(r1)
- std r4, 56(r1)
- std r3, 48(r1)
-
- /* Save callee's TOC in the ABI compliant location */
- std r2, 24(r1)
- ld r2, PACATOC(r13) /* get kernel TOC in r2 */
-
- mfctr r4 /* ftrace_caller has moved local addr here */
- std r4, 40(r1)
- mflr r3 /* ftrace_caller has restored LR from stack */
- subi r4, r4, MCOUNT_INSN_SIZE
-
- bl prepare_ftrace_return
- nop
-
- /*
- * prepare_ftrace_return gives us the address we divert to.
- * Change the LR to this.
- */
- mtlr r3
-
- ld r0, 40(r1)
- mtctr r0
- ld r10, 104(r1)
- ld r9, 96(r1)
- ld r8, 88(r1)
- ld r7, 80(r1)
- ld r6, 72(r1)
- ld r5, 64(r1)
- ld r4, 56(r1)
- ld r3, 48(r1)
-
- /* Restore callee's TOC */
- ld r2, 24(r1)
-
- addi r1, r1, 112
- mflr r0
- std r0, LRSAVE(r1)
- bctr
-#endif /* CC_USING_MPROFILE_KERNEL */
-
-_GLOBAL(return_to_handler)
- /* need to save return values */
- std r4, -32(r1)
- std r3, -24(r1)
- /* save TOC */
- std r2, -16(r1)
- std r31, -8(r1)
- mr r31, r1
- stdu r1, -112(r1)
-
- /*
- * We might be called from a module.
- * Switch to our TOC to run inside the core kernel.
- */
- ld r2, PACATOC(r13)
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
- ld r1, 0(r1)
- ld r4, -32(r1)
- ld r3, -24(r1)
- ld r2, -16(r1)
- ld r31, -8(r1)
-
- /* Jump back to real return address */
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6353019966e6..a9312b52fe6f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -116,9 +116,11 @@ EXC_VIRT_NONE(0x4000, 0x100)
EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
SET_SCRATCH0(r13)
- GET_PACA(r13)
- clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
- EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD,
+ /*
+ * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
+ * being used, so a nested NMI exception would corrupt it.
+ */
+ EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, EXC_STD,
IDLETEST, 0x100)
EXC_REAL_END(system_reset, 0x100, 0x100)
@@ -126,34 +128,37 @@ EXC_VIRT_NONE(0x4100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
-BEGIN_FTR_SECTION
- GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
- bl pnv_restore_hyp_resource
+ b pnv_powersave_wakeup
+#endif
- li r0,PNV_THREAD_RUNNING
- stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
+EXC_COMMON_BEGIN(system_reset_common)
+ /*
+ * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
+ * to recover, but nested NMI will notice in_nmi and not recover
+ * because of the use of the NMI stack. in_nmi reentrancy is tested in
+ * system_reset_exception.
+ */
+ lhz r10,PACA_IN_NMI(r13)
+ addi r10,r10,1
+ sth r10,PACA_IN_NMI(r13)
+ li r10,MSR_RI
+ mtmsrd r10,1
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
- lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 1f
- BRANCH_TO_KVM(r10, kvm_start_guest)
-1:
-#endif
+ mr r10,r1
+ ld r1,PACA_NMI_EMERG_SP(r13)
+ subi r1,r1,INT_FRAME_SIZE
+ EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
+ system_reset, system_reset_exception,
+ ADD_NVGPRS;ADD_RECONCILE)
- /* Return SRR1 from power7_nap() */
- mfspr r3,SPRN_SRR1
- blt cr3,2f
- b pnv_wakeup_loss
-2: b pnv_wakeup_noloss
-#endif
+ /*
+ * The stack is no longer in use, decrement in_nmi.
+ */
+ lhz r10,PACA_IN_NMI(r13)
+ subi r10,r10,1
+ sth r10,PACA_IN_NMI(r13)
-EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
+ b ret_from_except
#ifdef CONFIG_PPC_PSERIES
/*
@@ -161,8 +166,9 @@ EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
- NOTEST, 0x100)
+ /* See comment at system_reset exception */
+ EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common,
+ EXC_STD, NOTEST, 0x100)
#endif /* CONFIG_PPC_PSERIES */
@@ -172,14 +178,6 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
* vector
*/
SET_SCRATCH0(r13) /* save r13 */
- /*
- * Running native on arch 2.06 or later, we may wakeup from winkle
- * inside machine check. If yes, then last bit of HSPRG0 would be set
- * to 1. Hence clear it unconditionally.
- */
- GET_PACA(r13)
- clrrdi r13,r13,1
- SET_PACA(r13)
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
b machine_check_powernv_early
@@ -212,6 +210,12 @@ BEGIN_FTR_SECTION
* NOTE: We are here with MSR_ME=0 (off), which means we risk a
* checkstop if we get another machine check exception before we do
* rfid with MSR_ME=1.
+ *
+ * This interrupt can wake directly from idle. If that is the case,
+ * the machine check is handled then the idle wakeup code is called
+ * to restore state. In that case, the POWER9 DD1 idle PACA workaround
+ * is not applied in the early machine check code, which will cause
+ * bugs.
*/
mr r11,r1 /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
@@ -268,20 +272,11 @@ machine_check_fwnmi:
machine_check_pSeries_0:
EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
/*
- * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the
- * difference that MSR_RI is not enabled, because PACA_EXMC is being
- * used, so nested machine check corrupts it. machine_check_common
- * enables MSR_RI.
+ * MSR_RI is not enabled, because PACA_EXMC is being used, so a
+ * nested machine check corrupts it. machine_check_common enables
+ * MSR_RI.
*/
- ld r10,PACAKMSR(r13)
- xori r10,r10,MSR_RI
- mfspr r11,SPRN_SRR0
- LOAD_HANDLER(r12, machine_check_common)
- mtspr SPRN_SRR0,r12
- mfspr r12,SPRN_SRR1
- mtspr SPRN_SRR1,r10
- rfid
- b . /* prevent speculative execution */
+ EXCEPTION_PROLOG_PSERIES_1_NORI(machine_check_common, EXC_STD)
TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
@@ -340,6 +335,37 @@ EXC_COMMON_BEGIN(machine_check_common)
/* restore original r1. */ \
ld r1,GPR1(r1)
+#ifdef CONFIG_PPC_P7_NAP
+/*
+ * This is an idle wakeup. Low level machine check has already been
+ * done. Queue the event then call the idle code to do the wake up.
+ */
+EXC_COMMON_BEGIN(machine_check_idle_common)
+ bl machine_check_queue_event
+
+ /*
+ * We have not used any non-volatile GPRs here, and as a rule
+ * most exception code including machine check does not.
+ * Therefore PACA_NAPSTATELOST does not need to be set. Idle
+ * wakeup will restore volatile registers.
+ *
+ * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce.
+ *
+ * Then decrement MCE nesting after finishing with the stack.
+ */
+ ld r3,_MSR(r1)
+
+ lhz r11,PACA_IN_MCE(r13)
+ subi r11,r11,1
+ sth r11,PACA_IN_MCE(r13)
+
+ /* Turn off the RI bit because SRR1 is used by idle wakeup code. */
+ /* Recoverability could be improved by reducing the use of SRR1. */
+ li r11,0
+ mtmsrd r11,1
+
+ b pnv_powersave_wakeup_mce
+#endif
/*
* Handle machine check early in real mode. We come here with
* ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
@@ -352,6 +378,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
bl machine_check_early
std r3,RESULT(r1) /* Save result */
ld r12,_MSR(r1)
+
#ifdef CONFIG_PPC_P7_NAP
/*
* Check if thread was in power saving mode. We come here when any
@@ -362,48 +389,14 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
*
* Go back to nap/sleep/winkle mode again if (b) is true.
*/
- rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
- beq 4f /* No, it wasn;t */
- /* Thread was in power saving mode. Go back to nap again. */
- cmpwi r11,2
- blt 3f
- /* Supervisor/Hypervisor state loss */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-3: bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- GET_PACA(r13)
- ld r1,PACAR1(r13)
- /*
- * Check what idle state this CPU was in and go back to same mode
- * again.
- */
- lbz r3,PACA_THREAD_IDLE_STATE(r13)
- cmpwi r3,PNV_THREAD_NAP
- bgt 10f
- IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
- /* No return */
-10:
- cmpwi r3,PNV_THREAD_SLEEP
- bgt 2f
- IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
- /* No return */
-
-2:
- /*
- * Go back to winkle. Please note that this thread was woken up in
- * machine check from winkle and have not restored the per-subcore
- * state. Hence before going back to winkle, set last bit of HSPRG0
- * to 1. This will make sure that if this thread gets woken up
- * again at reset vector 0x100 then it will get chance to restore
- * the subcore state.
- */
- ori r13,r13,1
- SET_PACA(r13)
- IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
- /* No return */
+ BEGIN_FTR_SECTION
+ rlwinm. r11,r12,47-31,30,31
+ beq- 4f
+ BRANCH_TO_COMMON(r10, machine_check_idle_common)
4:
+ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
+
/*
* Check if we are coming from hypervisor userspace. If yes then we
* continue in host kernel in V mode to deliver the MC event.
@@ -968,17 +961,12 @@ EXC_VIRT_NONE(0x4e60, 0x20)
TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
TRAMP_REAL_BEGIN(hmi_exception_early)
EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60)
- mr r10,r1 /* Save r1 */
- ld r1,PACAEMERGSP(r13) /* Use emergency stack */
+ mr r10,r1 /* Save r1 */
+ ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- std r9,_CCR(r1) /* save CR in stackframe */
mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- std r11,_NIP(r1) /* save HSRR0 in stackframe */
- mfspr r12,SPRN_HSRR1 /* Save SRR1 */
- std r12,_MSR(r1) /* save SRR1 in stackframe */
- std r10,0(r1) /* make stack chain pointer */
- std r0,GPR0(r1) /* save r0 in stackframe */
- std r10,GPR1(r1) /* save r1 in stackframe */
+ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
+ EXCEPTION_PROLOG_COMMON_1()
EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addi r3,r1,STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8ff0dd4e77a7..243dbef7e926 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -30,17 +30,16 @@
#include <linux/string.h>
#include <linux/memblock.h>
#include <linux/delay.h>
-#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/crash_dump.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
+#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/fadump.h>
-#include <asm/debug.h>
#include <asm/setup.h>
static struct fw_dump fw_dump;
@@ -319,15 +318,34 @@ int __init fadump_reserve_mem(void)
pr_debug("fadumphdr_addr = %p\n",
(void *) fw_dump.fadumphdr_addr);
} else {
- /* Reserve the memory at the top of memory. */
size = get_fadump_area_size();
- base = memory_boundary - size;
- memblock_reserve(base, size);
- printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
- "for firmware-assisted dump\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20));
+
+ /*
+ * Reserve memory at an offset closer to bottom of the RAM to
+ * minimize the impact of memory hot-remove operation. We can't
+ * use memblock_find_in_range() here since it doesn't allocate
+ * from bottom to top.
+ */
+ for (base = fw_dump.boot_memory_size;
+ base <= (memory_boundary - size);
+ base += size) {
+ if (memblock_is_region_memory(base, size) &&
+ !memblock_is_region_reserved(base, size))
+ break;
+ }
+ if ((base > (memory_boundary - size)) ||
+ memblock_reserve(base, size)) {
+ pr_err("Failed to reserve memory\n");
+ return 0;
+ }
+
+ pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
+ "assisted dump (System RAM: %ldMB)\n",
+ (unsigned long)(size >> 20),
+ (unsigned long)(base >> 20),
+ (unsigned long)(memblock_phys_mem_size() >> 20));
}
+
fw_dump.reserve_dump_area_start = base;
fw_dump.reserve_dump_area_size = size;
return 1;
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 1607be7c0ef2..e22734278458 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -735,11 +735,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE)
-
- .globl mol_trampoline
- .set mol_trampoline, i0x2f00
- EXPORT_SYMBOL(mol_trampoline)
+ EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE)
. = 0x3000
@@ -1278,16 +1274,6 @@ EXPORT_SYMBOL(empty_zero_page)
swapper_pg_dir:
.space PGD_TABLE_SIZE
- .globl intercept_table
-intercept_table:
- .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700
- .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0
- .long 0, 0, 0, i0x1300, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
-EXPORT_SYMBOL(intercept_table)
-
/* Room for two PTE pointers, usually the kernel and current user pointers
* to their respective root page table.
*/
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 1dc5eae2ced3..0ddc602b33a4 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -949,7 +949,8 @@ start_here_multiplatform:
LOAD_REG_ADDR(r3,init_thread_union)
/* set up a stack pointer */
- addi r1,r3,THREAD_SIZE
+ LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
+ add r1,r3,r1
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 6fd08219248d..07d4e0ad60db 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -20,6 +20,7 @@
#include <asm/kvm_book3s_asm.h>
#include <asm/opal.h>
#include <asm/cpuidle.h>
+#include <asm/exception-64s.h>
#include <asm/book3s/64/mmu-hash.h>
#include <asm/mmu.h>
@@ -94,12 +95,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
core_idle_lock_held:
HMT_LOW
3: lwz r15,0(r14)
- andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT
+ andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
bne 3b
HMT_MEDIUM
lwarx r15,0,r14
- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
- bne core_idle_lock_held
+ andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ bne- core_idle_lock_held
blr
/*
@@ -113,7 +114,7 @@ core_idle_lock_held:
*
* Address to 'rfid' to in r5
*/
-_GLOBAL(pnv_powersave_common)
+pnv_powersave_common:
/* Use r3 to pass state nap/sleep/winkle */
/* NAP is a state loss, we create a regs frame on the
* stack, fill it up with the state we care about and
@@ -188,8 +189,8 @@ pnv_enter_arch207_idle_mode:
/* The following store to HSTATE_HWTHREAD_STATE(r13) */
/* MUST occur in real mode, i.e. with the MMU off, */
/* and the MMU must stay off until we clear this flag */
- /* and test HSTATE_HWTHREAD_REQ(r13) in the system */
- /* reset interrupt vector in exceptions-64s.S. */
+ /* and test HSTATE_HWTHREAD_REQ(r13) in */
+ /* pnv_powersave_wakeup in this file. */
/* The reason is that another thread can switch the */
/* MMU to a guest context whenever this flag is set */
/* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
@@ -209,15 +210,20 @@ pnv_enter_arch207_idle_mode:
/* Sleep or winkle */
lbz r7,PACA_THREAD_MASK(r13)
ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
+ li r5,0
+ beq cr3,3f
+ lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h
+3:
lwarx_loop1:
lwarx r15,0,r14
- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
- bnel core_idle_lock_held
+ andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ bnel- core_idle_lock_held
+ add r15,r15,r5 /* Add if winkle */
andc r15,r15,r7 /* Clear thread bit */
- andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+ andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
/*
* If cr0 = 0, then current thread is the last thread of the core entering
@@ -240,7 +246,7 @@ common_enter: /* common code for all the threads entering sleep or winkle */
IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
fastsleep_workaround_at_entry:
- ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
+ oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
stwcx. r15,0,r14
bne- lwarx_loop1
isync
@@ -250,10 +256,10 @@ fastsleep_workaround_at_entry:
li r4,1
bl opal_config_cpu_idle_state
- /* Clear Lock bit */
- li r0,0
+ /* Unlock */
+ xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
lwsync
- stw r0,0(r14)
+ stw r15,0(r14)
b common_enter
enter_winkle:
@@ -301,8 +307,8 @@ power_enter_stop:
lwarx_loop_stop:
lwarx r15,0,r14
- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
- bnel core_idle_lock_held
+ andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ bnel- core_idle_lock_held
andc r15,r15,r7 /* Clear thread bit */
stwcx. r15,0,r14
@@ -375,17 +381,113 @@ _GLOBAL(power9_idle_stop)
li r4,1
b pnv_powersave_common
/* No return */
+
/*
- * Called from reset vector. Check whether we have woken up with
- * hypervisor state loss. If yes, restore hypervisor state and return
- * back to reset vector.
+ * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
+ * HSPRG0 will be set to the HSPRG0 value of one of the
+ * threads in this core. Thus the value we have in r13
+ * may not be this thread's paca pointer.
+ *
+ * Fortunately, the TIR remains invariant. Since this thread's
+ * paca pointer is recorded in all its sibling's paca, we can
+ * correctly recover this thread's paca pointer if we
+ * know the index of this thread in the core.
+ *
+ * This index can be obtained from the TIR.
*
- * r13 - Contents of HSPRG0
+ * i.e, thread's position in the core = TIR.
+ * If this value is i, then this thread's paca is
+ * paca->thread_sibling_pacas[i].
+ */
+power9_dd1_recover_paca:
+ mfspr r4, SPRN_TIR
+ /*
+ * Since each entry in thread_sibling_pacas is 8 bytes
+ * we need to left-shift by 3 bits. Thus r4 = i * 8
+ */
+ sldi r4, r4, 3
+ /* Get &paca->thread_sibling_pacas[0] in r5 */
+ ld r5, PACA_SIBLING_PACA_PTRS(r13)
+ /* Load paca->thread_sibling_pacas[i] into r13 */
+ ldx r13, r4, r5
+ SET_PACA(r13)
+ /*
+ * Indicate that we have lost NVGPR state
+ * which needs to be restored from the stack.
+ */
+ li r3, 1
+ stb r0,PACA_NAPSTATELOST(r13)
+ blr
+
+/*
+ * Called from machine check handler for powersave wakeups.
+ * Low level machine check processing has already been done. Now just
+ * go through the wake up path to get everything in order.
+ *
+ * r3 - The original SRR1 value.
+ * Original SRR[01] have been clobbered.
+ * MSR_RI is clear.
+ */
+.global pnv_powersave_wakeup_mce
+pnv_powersave_wakeup_mce:
+ /* Set cr3 for pnv_powersave_wakeup */
+ rlwinm r11,r3,47-31,30,31
+ cmpwi cr3,r11,2
+
+ /*
+ * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
+ * reason into SRR1, which allows reuse of the system reset wakeup
+ * code without being mistaken for another type of wakeup.
+ */
+ oris r3,r3,SRR1_WAKEMCE_RESVD@h
+ mtspr SPRN_SRR1,r3
+
+ b pnv_powersave_wakeup
+
+/*
+ * Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
*/
-_GLOBAL(pnv_restore_hyp_resource)
+.global pnv_powersave_wakeup
+pnv_powersave_wakeup:
+ ld r2, PACATOC(r13)
+
BEGIN_FTR_SECTION
- ld r2,PACATOC(r13);
+BEGIN_FTR_SECTION_NESTED(70)
+ bl power9_dd1_recover_paca
+END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70)
+ bl pnv_restore_hyp_resource_arch300
+FTR_SECTION_ELSE
+ bl pnv_restore_hyp_resource_arch207
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
+
+ li r0,PNV_THREAD_RUNNING
+ stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 1f
+ b kvm_start_guest
+1:
+#endif
+
+ /* Return SRR1 from power7_nap() */
+ mfspr r3,SPRN_SRR1
+ blt cr3,pnv_wakeup_noloss
+ b pnv_wakeup_loss
+
+/*
+ * Check whether we have woken up with hypervisor state loss.
+ * If yes, restore hypervisor state and return back to link.
+ *
+ * cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ */
+pnv_restore_hyp_resource_arch300:
/*
* POWER ISA 3. Use PSSCR to determine if we
* are waking up from deep idle state
@@ -400,31 +502,19 @@ BEGIN_FTR_SECTION
*/
rldicl r5,r5,4,60
cmpd cr4,r5,r4
- bge cr4,pnv_wakeup_tb_loss
- /*
- * Waking up without hypervisor state loss. Return to
- * reset vector
- */
- blr
+ bge cr4,pnv_wakeup_tb_loss /* returns to caller */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ blr /* Waking up without hypervisor state loss. */
+/* Same calling convention as arch300 */
+pnv_restore_hyp_resource_arch207:
/*
* POWER ISA 2.07 or less.
- * Check if last bit of HSPGR0 is set. This indicates whether we are
- * waking up from winkle.
+ * Check if we slept with sleep or winkle.
*/
- clrldi r5,r13,63
- clrrdi r13,r13,1
-
- /* Now that we are sure r13 is corrected, load TOC */
- ld r2,PACATOC(r13);
- cmpwi cr4,r5,1
- mtspr SPRN_HSPRG0,r13
-
- lbz r0,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr2,r0,PNV_THREAD_NAP
- bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
+ lbz r4,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi cr2,r4,PNV_THREAD_NAP
+ bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
/*
* We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
@@ -433,8 +523,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
*/
bgt cr3,.
- blr /* Return back to System Reset vector from where
- pnv_restore_hyp_resource was invoked */
+ blr /* Waking up without hypervisor state loss */
/*
* Called if waking up from idle state which can cause either partial or
@@ -444,9 +533,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
*
* r13 - PACA
* cr3 - gt if waking up with partial/complete hypervisor state loss
+ *
+ * If ISA300:
* cr4 - gt or eq if waking up from complete hypervisor state loss.
+ *
+ * If ISA207:
+ * r4 - PACA_THREAD_IDLE_STATE
*/
-_GLOBAL(pnv_wakeup_tb_loss)
+pnv_wakeup_tb_loss:
ld r1,PACAR1(r13)
/*
* Before entering any idle state, the NVGPRs are saved in the stack.
@@ -473,18 +567,19 @@ _GLOBAL(pnv_wakeup_tb_loss)
* is required to return back to reset vector after hypervisor state
* restore is complete.
*/
+ mr r18,r4
mflr r17
mfspr r16,SPRN_SRR1
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- lbz r7,PACA_THREAD_MASK(r13)
ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
-lwarx_loop2:
- lwarx r15,0,r14
- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+ lbz r7,PACA_THREAD_MASK(r13)
+
/*
+ * Take the core lock to synchronize against other threads.
+ *
* Lock bit is set in one of the 2 cases-
* a. In the sleep/winkle enter path, the last thread is executing
* fastsleep workaround code.
@@ -492,23 +587,93 @@ lwarx_loop2:
* workaround undo code or resyncing timebase or restoring context
* In either case loop until the lock bit is cleared.
*/
- bnel core_idle_lock_held
+1:
+ lwarx r15,0,r14
+ andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ bnel- core_idle_lock_held
+ oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ stwcx. r15,0,r14
+ bne- 1b
+ isync
- cmpwi cr2,r15,0
+ andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
+ cmpwi cr2,r9,0
/*
* At this stage
* cr2 - eq if first thread to wakeup in core
* cr3- gt if waking up with partial/complete hypervisor state loss
+ * ISA300:
* cr4 - gt or eq if waking up from complete hypervisor state loss.
*/
- ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
- stwcx. r15,0,r14
- bne- lwarx_loop2
- isync
-
BEGIN_FTR_SECTION
+ /*
+ * Were we in winkle?
+ * If yes, check if all threads were in winkle, decrement our
+ * winkle count, set all thread winkle bits if all were in winkle.
+ * Check if our thread has a winkle bit set, and set cr4 accordingly
+ * (to match ISA300, above). Pseudo-code for core idle state
+ * transitions for ISA207 is as follows (everything happens atomically
+ * due to store conditional and/or lock bit):
+ *
+ * nap_idle() { }
+ * nap_wake() { }
+ *
+ * sleep_idle()
+ * {
+ * core_idle_state &= ~thread_in_core
+ * }
+ *
+ * sleep_wake()
+ * {
+ * bool first_in_core, first_in_subcore;
+ *
+ * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
+ * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
+ *
+ * core_idle_state |= thread_in_core;
+ * }
+ *
+ * winkle_idle()
+ * {
+ * core_idle_state &= ~thread_in_core;
+ * core_idle_state += 1 << WINKLE_COUNT_SHIFT;
+ * }
+ *
+ * winkle_wake()
+ * {
+ * bool first_in_core, first_in_subcore, winkle_state_lost;
+ *
+ * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
+ * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
+ *
+ * core_idle_state |= thread_in_core;
+ *
+ * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
+ * core_idle_state |= THREAD_WINKLE_BITS;
+ * core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
+ *
+ * winkle_state_lost = core_idle_state &
+ * (thread_in_core << WINKLE_THREAD_SHIFT);
+ * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
+ * }
+ *
+ */
+ cmpwi r18,PNV_THREAD_WINKLE
+ bne 2f
+ andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
+ subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
+ beq 2f
+ ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
+2:
+ /* Shift thread bit to winkle mask, then test if this thread is set,
+ * and remove it from the winkle bits */
+ slwi r8,r7,8
+ and r8,r8,r15
+ andc r15,r15,r8
+ cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
+
lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
and r4,r4,r15
cmpwi r4,0 /* Check if first in subcore */
@@ -593,7 +758,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_WORC,r4
clear_lock:
- andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+ xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
lwsync
stw r15,0(r14)
@@ -651,8 +816,7 @@ hypervisor_state_restored:
mtspr SPRN_SRR1,r16
mtlr r17
- blr /* Return back to System Reset vector from where
- pnv_restore_hyp_resource was invoked */
+ blr /* return to pnv_powersave_wakeup */
fastsleep_workaround_at_exit:
li r3,1
@@ -664,7 +828,8 @@ fastsleep_workaround_at_exit:
* R3 here contains the value that will be returned to the caller
* of power7_nap.
*/
-_GLOBAL(pnv_wakeup_loss)
+.global pnv_wakeup_loss
+pnv_wakeup_loss:
ld r1,PACAR1(r13)
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
@@ -684,7 +849,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* R3 here contains the value that will be returned to the caller
* of power7_nap.
*/
-_GLOBAL(pnv_wakeup_noloss)
+pnv_wakeup_noloss:
lbz r0,PACA_NAPSTATELOST(r13)
cmpwi r0,0
bne pnv_wakeup_loss
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 5f202a566ec5..5a3231fedf08 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
return tbl;
}
-void iommu_free_table(struct iommu_table *tbl, const char *node_name)
+static void iommu_table_free(struct kref *kref)
{
unsigned long bitmap_sz;
unsigned int order;
+ struct iommu_table *tbl;
- if (!tbl)
- return;
+ tbl = container_of(kref, struct iommu_table, it_kref);
+
+ if (tbl->it_ops->free)
+ tbl->it_ops->free(tbl);
if (!tbl->it_map) {
kfree(tbl);
@@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
- pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
+ pr_warn("%s: Unexpected TCEs\n", __func__);
/* calculate bitmap size in bytes */
bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
@@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
kfree(tbl);
}
+struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
+{
+ if (kref_get_unless_zero(&tbl->it_kref))
+ return tbl;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_get);
+
+int iommu_tce_table_put(struct iommu_table *tbl)
+{
+ if (WARN_ON(!tbl))
+ return 0;
+
+ return kref_put(&tbl->it_kref, iommu_table_free);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_put);
+
/* Creates TCEs for a user provided buffer. The user buffer must be
* contiguous real kernel storage (not vmalloc). The address passed here
* comprises a page address and offset into that page. The dma_addr_t
@@ -1004,6 +1025,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+#ifdef CONFIG_PPC_BOOK3S_64
+long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret;
+
+ ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+
+ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+ (*direction == DMA_BIDIRECTIONAL))) {
+ struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
+
+ if (likely(pg)) {
+ SetPageDirty(pg);
+ } else {
+ tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+ ret = -EFAULT;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
+#endif
+
int iommu_take_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index a018f5cae899..5c291df30fe3 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -65,7 +65,6 @@
#include <asm/machdep.h>
#include <asm/udbg.h>
#include <asm/smp.h>
-#include <asm/debug.h>
#include <asm/livepatch.h>
#include <asm/asm-prototypes.h>
@@ -442,46 +441,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
return sum;
}
-#ifdef CONFIG_HOTPLUG_CPU
-void migrate_irqs(void)
-{
- struct irq_desc *desc;
- unsigned int irq;
- static int warned;
- cpumask_var_t mask;
- const struct cpumask *map = cpu_online_mask;
-
- alloc_cpumask_var(&mask, GFP_KERNEL);
-
- for_each_irq_desc(irq, desc) {
- struct irq_data *data;
- struct irq_chip *chip;
-
- data = irq_desc_get_irq_data(desc);
- if (irqd_is_per_cpu(data))
- continue;
-
- chip = irq_data_get_irq_chip(data);
-
- cpumask_and(mask, irq_data_get_affinity_mask(data), map);
- if (cpumask_any(mask) >= nr_cpu_ids) {
- pr_warn("Breaking affinity for irq %i\n", irq);
- cpumask_copy(mask, map);
- }
- if (chip->irq_set_affinity)
- chip->irq_set_affinity(data, mask, true);
- else if (desc->action && !(warned++))
- pr_err("Cannot set affinity for irq %i\n", irq);
- }
-
- free_cpumask_var(mask);
-
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-}
-#endif
-
static inline void check_stack_overflow(void)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
new file mode 100644
index 000000000000..6c089d9757c9
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -0,0 +1,104 @@
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ * IBM Corporation
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+static nokprobe_inline
+int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb, unsigned long orig_nip)
+{
+ /*
+ * Emulate singlestep (and also recover regs->nip)
+ * as if there is a nop
+ */
+ regs->nip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ __this_cpu_write(current_kprobe, NULL);
+ if (orig_nip)
+ regs->nip = orig_nip;
+ return 1;
+}
+
+int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ if (kprobe_ftrace(p))
+ return __skip_singlestep(p, regs, kcb, 0);
+ else
+ return 0;
+}
+NOKPROBE_SYMBOL(skip_singlestep);
+
+/* Ftrace callback handler for kprobes */
+void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ unsigned long flags;
+
+ /* Disable irq for emulating a breakpoint and avoiding preempt */
+ local_irq_save(flags);
+ hard_irq_disable();
+
+ p = get_kprobe((kprobe_opcode_t *)nip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto end;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ unsigned long orig_nip = regs->nip;
+
+ /*
+ * On powerpc, NIP is *before* this instruction for the
+ * pre handler
+ */
+ regs->nip -= MCOUNT_INSN_SIZE;
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ __skip_singlestep(p, regs, kcb, orig_nip);
+ /*
+ * If pre_handler returns !0, it sets regs->nip and
+ * resets current kprobe.
+ */
+ }
+end:
+ local_irq_restore(flags);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fce05a38851c..160ae0fa7d0d 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -35,6 +35,7 @@
#include <asm/code-patching.h>
#include <asm/cacheflush.h>
#include <asm/sstep.h>
+#include <asm/sections.h>
#include <linux/uaccess.h>
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -42,7 +43,86 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+ return (addr >= (unsigned long)__kprobes_text_start &&
+ addr < (unsigned long)__kprobes_text_end) ||
+ (addr >= (unsigned long)_stext &&
+ addr < (unsigned long)__head_end);
+}
+
+kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
+{
+ kprobe_opcode_t *addr;
+
+#ifdef PPC64_ELF_ABI_v2
+ /* PPC64 ABIv2 needs local entry point */
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+ if (addr && !offset) {
+#ifdef CONFIG_KPROBES_ON_FTRACE
+ unsigned long faddr;
+ /*
+ * Per livepatch.h, ftrace location is always within the first
+ * 16 bytes of a function on powerpc with -mprofile-kernel.
+ */
+ faddr = ftrace_location_range((unsigned long)addr,
+ (unsigned long)addr + 16);
+ if (faddr)
+ addr = (kprobe_opcode_t *)faddr;
+ else
+#endif
+ addr = (kprobe_opcode_t *)ppc_function_entry(addr);
+ }
+#elif defined(PPC64_ELF_ABI_v1)
+ /*
+ * 64bit powerpc ABIv1 uses function descriptors:
+ * - Check for the dot variant of the symbol first.
+ * - If that fails, try looking up the symbol provided.
+ *
+ * This ensures we always get to the actual symbol and not
+ * the descriptor.
+ *
+ * Also handle <module:symbol> format.
+ */
+ char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN];
+ const char *modsym;
+ bool dot_appended = false;
+ if ((modsym = strchr(name, ':')) != NULL) {
+ modsym++;
+ if (*modsym != '\0' && *modsym != '.') {
+ /* Convert to <module:.symbol> */
+ strncpy(dot_name, name, modsym - name);
+ dot_name[modsym - name] = '.';
+ dot_name[modsym - name + 1] = '\0';
+ strncat(dot_name, modsym,
+ sizeof(dot_name) - (modsym - name) - 2);
+ dot_appended = true;
+ } else {
+ dot_name[0] = '\0';
+ strncat(dot_name, name, sizeof(dot_name) - 1);
+ }
+ } else if (name[0] != '.') {
+ dot_name[0] = '.';
+ dot_name[1] = '\0';
+ strncat(dot_name, name, KSYM_NAME_LEN - 2);
+ dot_appended = true;
+ } else {
+ dot_name[0] = '\0';
+ strncat(dot_name, name, KSYM_NAME_LEN - 1);
+ }
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name);
+ if (!addr && dot_appended) {
+ /* Let's try the original non-dot symbol lookup */
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+ }
+#else
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+#endif
+
+ return addr;
+}
+
+int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
kprobe_opcode_t insn = *p->addr;
@@ -74,30 +154,34 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
p->ainsn.boostable = 0;
return ret;
}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+void arch_arm_kprobe(struct kprobe *p)
{
*p->addr = BREAKPOINT_INSTRUCTION;
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void arch_disarm_kprobe(struct kprobe *p)
{
*p->addr = p->opcode;
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
-void __kprobes arch_remove_kprobe(struct kprobe *p)
+void arch_remove_kprobe(struct kprobe *p)
{
if (p->ainsn.insn) {
free_insn_slot(p->ainsn.insn, 0);
p->ainsn.insn = NULL;
}
}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
{
enable_single_step(regs);
@@ -110,37 +194,80 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->nip = (unsigned long)p->ainsn.insn;
}
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
{
kcb->prev_kprobe.kp = kprobe_running();
kcb->prev_kprobe.status = kcb->kprobe_status;
kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
}
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
}
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
__this_cpu_write(current_kprobe, p);
kcb->kprobe_saved_msr = regs->msr;
}
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
+bool arch_function_offset_within_entry(unsigned long offset)
+{
+#ifdef PPC64_ELF_ABI_v2
+#ifdef CONFIG_KPROBES_ON_FTRACE
+ return offset <= 16;
+#else
+ return offset <= 8;
+#endif
+#else
+ return !offset;
+#endif
+}
+
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
ri->ret_addr = (kprobe_opcode_t *)regs->link;
/* Replace the return addr with trampoline addr */
regs->link = (unsigned long)kretprobe_trampoline;
}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-int __kprobes kprobe_handler(struct pt_regs *regs)
+int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
+{
+ int ret;
+ unsigned int insn = *p->ainsn.insn;
+
+ /* regs->nip is also adjusted if emulate_step returns 1 */
+ ret = emulate_step(regs, insn);
+ if (ret > 0) {
+ /*
+ * Once this instruction has been boosted
+ * successfully, set the boostable flag
+ */
+ if (unlikely(p->ainsn.boostable == 0))
+ p->ainsn.boostable = 1;
+ } else if (ret < 0) {
+ /*
+ * We don't allow kprobes on mtmsr(d)/rfi(d), etc.
+ * So, we should never get here... but, its still
+ * good to catch them, just in case...
+ */
+ printk("Can't step on instruction %x\n", insn);
+ BUG();
+ } else if (ret == 0)
+ /* This instruction can't be boosted */
+ p->ainsn.boostable = -1;
+
+ return ret;
+}
+NOKPROBE_SYMBOL(try_to_emulate);
+
+int kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p;
int ret = 0;
@@ -177,10 +304,17 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
*/
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
- kcb->kprobe_saved_msr = regs->msr;
kprobes_inc_nmissed_count(p);
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_REENTER;
+ if (p->ainsn.boostable >= 0) {
+ ret = try_to_emulate(p, regs);
+
+ if (ret > 0) {
+ restore_previous_kprobe(kcb);
+ return 1;
+ }
+ }
return 1;
} else {
if (*addr != BREAKPOINT_INSTRUCTION) {
@@ -197,7 +331,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
}
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
- goto ss_probe;
+ if (!skip_singlestep(p, regs, kcb))
+ goto ss_probe;
+ ret = 1;
}
}
goto no_kprobe;
@@ -235,18 +371,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
ss_probe:
if (p->ainsn.boostable >= 0) {
- unsigned int insn = *p->ainsn.insn;
+ ret = try_to_emulate(p, regs);
- /* regs->nip is also adjusted if emulate_step returns 1 */
- ret = emulate_step(regs, insn);
if (ret > 0) {
- /*
- * Once this instruction has been boosted
- * successfully, set the boostable flag
- */
- if (unlikely(p->ainsn.boostable == 0))
- p->ainsn.boostable = 1;
-
if (p->post_handler)
p->post_handler(p, regs, 0);
@@ -254,17 +381,7 @@ ss_probe:
reset_current_kprobe();
preempt_enable_no_resched();
return 1;
- } else if (ret < 0) {
- /*
- * We don't allow kprobes on mtmsr(d)/rfi(d), etc.
- * So, we should never get here... but, its still
- * good to catch them, just in case...
- */
- printk("Can't step on instruction %x\n", insn);
- BUG();
- } else if (ret == 0)
- /* This instruction can't be boosted */
- p->ainsn.boostable = -1;
+ }
}
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
@@ -274,6 +391,7 @@ no_kprobe:
preempt_enable_no_resched();
return ret;
}
+NOKPROBE_SYMBOL(kprobe_handler);
/*
* Function return probe trampoline:
@@ -291,8 +409,7 @@ asm(".global kretprobe_trampoline\n"
/*
* Called when the probe at kretprobe trampoline is hit
*/
-static int __kprobes trampoline_probe_handler(struct kprobe *p,
- struct pt_regs *regs)
+static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
@@ -361,6 +478,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
*/
return 1;
}
+NOKPROBE_SYMBOL(trampoline_probe_handler);
/*
* Called after single-stepping. p->addr is the address of the
@@ -370,7 +488,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
* single-stepped a copy of the instruction. The address of this
* copy is p->ainsn.insn.
*/
-int __kprobes kprobe_post_handler(struct pt_regs *regs)
+int kprobe_post_handler(struct pt_regs *regs)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -410,8 +528,9 @@ out:
return 1;
}
+NOKPROBE_SYMBOL(kprobe_post_handler);
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -474,13 +593,15 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
}
return 0;
}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
unsigned long arch_deref_entry_point(void *entry)
{
return ppc_global_function_entry(entry);
}
+NOKPROBE_SYMBOL(arch_deref_entry_point);
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -497,17 +618,20 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
return 1;
}
+NOKPROBE_SYMBOL(setjmp_pre_handler);
-void __used __kprobes jprobe_return(void)
+void __used jprobe_return(void)
{
asm volatile("trap" ::: "memory");
}
+NOKPROBE_SYMBOL(jprobe_return);
-static void __used __kprobes jprobe_return_end(void)
+static void __used jprobe_return_end(void)
{
-};
+}
+NOKPROBE_SYMBOL(jprobe_return_end);
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -520,6 +644,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
preempt_enable_no_resched();
return 1;
}
+NOKPROBE_SYMBOL(longjmp_break_handler);
static struct kprobe trampoline_p = {
.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
@@ -531,10 +656,11 @@ int __init arch_init_kprobes(void)
return register_kprobe(&trampoline_p);
}
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+int arch_trampoline_kprobe(struct kprobe *p)
{
if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
return 1;
return 0;
}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index a1475e6aef3a..5f9eada3519b 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -221,6 +221,8 @@ static void machine_check_process_queued_event(struct irq_work *work)
{
int index;
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
/*
* For now just print it to console.
* TODO: log this error event to FSP or nvram.
@@ -228,12 +230,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
machine_check_print_event_info(
- this_cpu_ptr(&mce_event_queue[index]));
+ this_cpu_ptr(&mce_event_queue[index]), false);
__this_cpu_dec(mce_queue_count);
}
}
-void machine_check_print_event_info(struct machine_check_event *evt)
+void machine_check_print_event_info(struct machine_check_event *evt,
+ bool user_mode)
{
const char *level, *sevstr, *subtype;
static const char *mc_ue_types[] = {
@@ -310,7 +313,16 @@ void machine_check_print_event_info(struct machine_check_event *evt)
printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
evt->disposition == MCE_DISPOSITION_RECOVERED ?
- "Recovered" : "[Not recovered");
+ "Recovered" : "Not recovered");
+
+ if (user_mode) {
+ printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
+ evt->srr0, current->pid, current->comm);
+ } else {
+ printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
+ (void *)evt->srr0);
+ }
+
printk("%s Initiator: %s\n", level,
evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
switch (evt->error_type) {
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 763d6f58caa8..f913139bb0c2 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -72,10 +72,14 @@ void __flush_tlb_power8(unsigned int action)
void __flush_tlb_power9(unsigned int action)
{
+ unsigned int num_sets;
+
if (radix_enabled())
- flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
+ num_sets = POWER9_TLB_SETS_RADIX;
+ else
+ num_sets = POWER9_TLB_SETS_HASH;
- flush_tlb_206(POWER9_TLB_SETS_HASH, action);
+ flush_tlb_206(num_sets, action);
}
@@ -147,159 +151,365 @@ static int mce_flush(int what)
return 0;
}
-static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
-{
- if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
- dsisr &= ~slb;
- if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
- dsisr &= ~erat;
- if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
- dsisr &= ~tlb;
- /* Any other errors we don't understand? */
- if (dsisr)
- return 0;
- return 1;
-}
-
-static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
+struct mce_ierror_table {
+ unsigned long srr1_mask;
+ unsigned long srr1_value;
+ bool nip_valid; /* nip is a valid indicator of faulting address */
+ unsigned int error_type;
+ unsigned int error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct mce_ierror_table mce_p7_ierror_table[] = {
+{ 0x00000000001c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p8_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p9_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x0000000008180000, false,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+struct mce_derror_table {
+ unsigned long dsisr_value;
+ bool dar_valid; /* dar is a valid indicator of faulting address */
+ unsigned int error_type;
+ unsigned int error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct mce_derror_table mce_p7_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p8_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00002000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00001000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000200, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p9_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00002000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00001000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000200, false,
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000020, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000010, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000008, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+static int mce_handle_ierror(struct pt_regs *regs,
+ const struct mce_ierror_table table[],
+ struct mce_error_info *mce_err, uint64_t *addr)
{
- long handled = 1;
+ uint64_t srr1 = regs->msr;
+ int handled = 0;
+ int i;
+
+ *addr = 0;
+
+ for (i = 0; table[i].srr1_mask; i++) {
+ if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
+ continue;
+
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+ handled = mce_flush(MCE_FLUSH_SLB);
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ handled = mce_flush(MCE_FLUSH_ERAT);
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ handled = mce_flush(MCE_FLUSH_TLB);
+ break;
+ }
- /*
- * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
- * reset the error bits whenever we handle them so that at the end
- * we can check whether we handled all of them or not.
- * */
-#ifdef CONFIG_PPC_STD_MMU_64
- if (dsisr & slb_error_bits) {
- flush_and_reload_slb();
- /* reset error bits */
- dsisr &= ~(slb_error_bits);
- }
- if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
- /* reset error bits */
- dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
+ /* now fill in mce_error_info */
+ mce_err->error_type = table[i].error_type;
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce_err->u.ue_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce_err->u.slb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce_err->u.erat_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce_err->u.tlb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ mce_err->u.user_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce_err->u.ra_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce_err->u.link_error_type = table[i].error_subtype;
+ break;
+ }
+ mce_err->severity = table[i].severity;
+ mce_err->initiator = table[i].initiator;
+ if (table[i].nip_valid)
+ *addr = regs->nip;
+ return handled;
}
-#endif
- /* Any other errors we don't understand? */
- if (dsisr & 0xffffffffUL)
- handled = 0;
- return handled;
-}
+ mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->initiator = MCE_INITIATOR_CPU;
-static long mce_handle_derror_p7(uint64_t dsisr)
-{
- return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
+ return 0;
}
-static long mce_handle_common_ierror(uint64_t srr1)
+static int mce_handle_derror(struct pt_regs *regs,
+ const struct mce_derror_table table[],
+ struct mce_error_info *mce_err, uint64_t *addr)
{
- long handled = 0;
-
- switch (P7_SRR1_MC_IFETCH(srr1)) {
- case 0:
- break;
-#ifdef CONFIG_PPC_STD_MMU_64
- case P7_SRR1_MC_IFETCH_SLB_PARITY:
- case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
- /* flush and reload SLBs for SLB errors. */
- flush_and_reload_slb();
- handled = 1;
- break;
- case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
- handled = 1;
+ uint64_t dsisr = regs->dsisr;
+ int handled = 0;
+ int found = 0;
+ int i;
+
+ *addr = 0;
+
+ for (i = 0; table[i].dsisr_value; i++) {
+ if (!(dsisr & table[i].dsisr_value))
+ continue;
+
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+ if (mce_flush(MCE_FLUSH_SLB))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (mce_flush(MCE_FLUSH_ERAT))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (mce_flush(MCE_FLUSH_TLB))
+ handled = 1;
+ break;
}
- break;
-#endif
- default:
- break;
- }
- return handled;
-}
-
-static long mce_handle_ierror_p7(uint64_t srr1)
-{
- long handled = 0;
-
- handled = mce_handle_common_ierror(srr1);
+ /*
+ * Attempt to handle multiple conditions, but only return
+ * one. Ensure uncorrectable errors are first in the table
+ * to match.
+ */
+ if (found)
+ continue;
+
+ /* now fill in mce_error_info */
+ mce_err->error_type = table[i].error_type;
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce_err->u.ue_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce_err->u.slb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce_err->u.erat_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce_err->u.tlb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ mce_err->u.user_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce_err->u.ra_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce_err->u.link_error_type = table[i].error_subtype;
+ break;
+ }
+ mce_err->severity = table[i].severity;
+ mce_err->initiator = table[i].initiator;
+ if (table[i].dar_valid)
+ *addr = regs->dar;
-#ifdef CONFIG_PPC_STD_MMU_64
- if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
- flush_and_reload_slb();
- handled = 1;
+ found = 1;
}
-#endif
- return handled;
-}
-static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
-{
- switch (P7_SRR1_MC_IFETCH(srr1)) {
- case P7_SRR1_MC_IFETCH_SLB_PARITY:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- break;
- case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- break;
- case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- break;
- case P7_SRR1_MC_IFETCH_UE:
- case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
- break;
- case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type =
- MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- }
-}
+ if (found)
+ return handled;
-static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
-{
- mce_get_common_ierror(mce_err, srr1);
- if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
- }
-}
+ mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->initiator = MCE_INITIATOR_CPU;
-static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
-{
- if (dsisr & P7_DSISR_MC_UE) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
- } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type =
- MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
- }
+ return 0;
}
static long mce_handle_ue_error(struct pt_regs *regs)
@@ -320,292 +530,42 @@ static long mce_handle_ue_error(struct pt_regs *regs)
return handled;
}
-long __machine_check_early_realmode_p7(struct pt_regs *regs)
+static long mce_handle_error(struct pt_regs *regs,
+ const struct mce_derror_table dtable[],
+ const struct mce_ierror_table itable[])
{
- uint64_t srr1, nip, addr;
- long handled = 1;
- struct mce_error_info mce_error_info = { 0 };
-
- mce_error_info.severity = MCE_SEV_ERROR_SYNC;
- mce_error_info.initiator = MCE_INITIATOR_CPU;
-
- srr1 = regs->msr;
- nip = regs->nip;
+ struct mce_error_info mce_err = { 0 };
+ uint64_t addr;
+ uint64_t srr1 = regs->msr;
+ long handled;
- /*
- * Handle memory errors depending whether this was a load/store or
- * ifetch exception. Also, populate the mce error_type and
- * type-specific error_type from either SRR1 or DSISR, depending
- * whether this was a load/store or ifetch exception
- */
- if (P7_SRR1_MC_LOADSTORE(srr1)) {
- handled = mce_handle_derror_p7(regs->dsisr);
- mce_get_derror_p7(&mce_error_info, regs->dsisr);
- addr = regs->dar;
- } else {
- handled = mce_handle_ierror_p7(srr1);
- mce_get_ierror_p7(&mce_error_info, srr1);
- addr = regs->nip;
- }
+ if (SRR1_MC_LOADSTORE(srr1))
+ handled = mce_handle_derror(regs, dtable, &mce_err, &addr);
+ else
+ handled = mce_handle_ierror(regs, itable, &mce_err, &addr);
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
handled = mce_handle_ue_error(regs);
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
- return handled;
-}
-
-static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
-{
- mce_get_common_ierror(mce_err, srr1);
- if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- }
-}
-
-static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
-{
- mce_get_derror_p7(mce_err, dsisr);
- if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- }
-}
-
-static long mce_handle_ierror_p8(uint64_t srr1)
-{
- long handled = 0;
+ save_mce_event(regs, handled, &mce_err, regs->nip, addr);
- handled = mce_handle_common_ierror(srr1);
-
-#ifdef CONFIG_PPC_STD_MMU_64
- if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
- flush_and_reload_slb();
- handled = 1;
- }
-#endif
return handled;
}
-static long mce_handle_derror_p8(uint64_t dsisr)
-{
- return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
-}
-
-long __machine_check_early_realmode_p8(struct pt_regs *regs)
-{
- uint64_t srr1, nip, addr;
- long handled = 1;
- struct mce_error_info mce_error_info = { 0 };
-
- mce_error_info.severity = MCE_SEV_ERROR_SYNC;
- mce_error_info.initiator = MCE_INITIATOR_CPU;
-
- srr1 = regs->msr;
- nip = regs->nip;
-
- if (P7_SRR1_MC_LOADSTORE(srr1)) {
- handled = mce_handle_derror_p8(regs->dsisr);
- mce_get_derror_p8(&mce_error_info, regs->dsisr);
- addr = regs->dar;
- } else {
- handled = mce_handle_ierror_p8(srr1);
- mce_get_ierror_p8(&mce_error_info, srr1);
- addr = regs->nip;
- }
-
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
-
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
- return handled;
-}
-
-static int mce_handle_derror_p9(struct pt_regs *regs)
-{
- uint64_t dsisr = regs->dsisr;
-
- return mce_handle_flush_derrors(dsisr,
- P9_DSISR_MC_SLB_PARITY_MFSLB |
- P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
-
- P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
-
- P9_DSISR_MC_ERAT_MULTIHIT);
-}
-
-static int mce_handle_ierror_p9(struct pt_regs *regs)
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
{
- uint64_t srr1 = regs->msr;
+ /* P7 DD1 leaves top bits of DSISR undefined */
+ regs->dsisr &= 0x0000ffff;
- switch (P9_SRR1_MC_IFETCH(srr1)) {
- case P9_SRR1_MC_IFETCH_SLB_PARITY:
- case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
- return mce_flush(MCE_FLUSH_SLB);
- case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
- return mce_flush(MCE_FLUSH_TLB);
- case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
- return mce_flush(MCE_FLUSH_ERAT);
- default:
- return 0;
- }
+ return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
}
-static void mce_get_derror_p9(struct pt_regs *regs,
- struct mce_error_info *mce_err, uint64_t *addr)
-{
- uint64_t dsisr = regs->dsisr;
-
- mce_err->severity = MCE_SEV_ERROR_SYNC;
- mce_err->initiator = MCE_INITIATOR_CPU;
-
- if (dsisr & P9_DSISR_MC_USER_TLBIE)
- *addr = regs->nip;
- else
- *addr = regs->dar;
-
- if (dsisr & P9_DSISR_MC_UE) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
- } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
- } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
- } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- } else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
- mce_err->error_type = MCE_ERROR_TYPE_USER;
- mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
- } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- } else if (dsisr & P9_DSISR_MC_RA_LOAD) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
- } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
- } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
- }
-}
-
-static void mce_get_ierror_p9(struct pt_regs *regs,
- struct mce_error_info *mce_err, uint64_t *addr)
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
{
- uint64_t srr1 = regs->msr;
-
- switch (P9_SRR1_MC_IFETCH(srr1)) {
- case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
- case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
- mce_err->severity = MCE_SEV_FATAL;
- break;
- default:
- mce_err->severity = MCE_SEV_ERROR_SYNC;
- break;
- }
-
- mce_err->initiator = MCE_INITIATOR_CPU;
-
- *addr = regs->nip;
-
- switch (P9_SRR1_MC_IFETCH(srr1)) {
- case P9_SRR1_MC_IFETCH_UE:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_SLB_PARITY:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- break;
- case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- break;
- case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- break;
- case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- break;
- case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
- break;
- case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
- break;
- case P9_SRR1_MC_IFETCH_RA:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
- break;
- case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
- break;
- case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
- break;
- default:
- break;
- }
+ return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
}
long __machine_check_early_realmode_p9(struct pt_regs *regs)
{
- uint64_t nip, addr;
- long handled;
- struct mce_error_info mce_error_info = { 0 };
-
- nip = regs->nip;
-
- if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
- handled = mce_handle_derror_p9(regs);
- mce_get_derror_p9(regs, &mce_error_info, &addr);
- } else {
- handled = mce_handle_ierror_p9(regs);
- mce_get_ierror_p9(regs, &mce_error_info, &addr);
- }
-
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
-
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
- return handled;
+ return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
}
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 2282bf4e63cd..ec60ed0d4aad 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -243,10 +243,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
/*
* 2. branch to optimized_callback() and emulate_step()
*/
- kprobe_lookup_name("optimized_callback", op_callback_addr);
- kprobe_lookup_name("emulate_step", emulate_step_addr);
+ op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback");
+ emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step");
if (!op_callback_addr || !emulate_step_addr) {
- WARN(1, "kprobe_lookup_name() failed\n");
+ WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n");
goto error;
}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index dfc479df9634..8d63627e067f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -245,3 +245,24 @@ void __init free_unused_pacas(void)
free_lppacas();
}
+
+void copy_mm_to_paca(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_BOOK3S
+ mm_context_t *context = &mm->context;
+
+ get_paca()->mm_ctx_id = context->id;
+#ifdef CONFIG_PPC_MM_SLICES
+ VM_BUG_ON(!mm->context.addr_limit);
+ get_paca()->addr_limit = mm->context.addr_limit;
+ get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+ memcpy(&get_paca()->mm_ctx_high_slices_psize,
+ &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+#else /* CONFIG_PPC_MM_SLICES */
+ get_paca()->mm_ctx_user_psize = context->user_psize;
+ get_paca()->mm_ctx_sllp = context->sllp;
+#endif
+#else /* CONFIG_PPC_BOOK3S */
+ return;
+#endif
+}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f5d399e46193..d2f0afeae5a0 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -55,7 +55,6 @@
#include <asm/kexec.h>
#include <asm/opal.h>
#include <asm/fadump.h>
-#include <asm/debug.h>
#include <asm/epapr_hcalls.h>
#include <asm/firmware.h>
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1c1b44ec7642..dd8a04f3053a 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -815,7 +815,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.virt_base = cpu_to_be32(0xffffffff),
.virt_size = cpu_to_be32(0xffffffff),
.load_base = cpu_to_be32(0xffffffff),
- .min_rma = cpu_to_be32(256), /* 256MB min RMA */
+ .min_rma = cpu_to_be32(512), /* 512MB min RMA */
.min_load = cpu_to_be32(0xffffffff), /* full client load */
.min_rma_percent = 0, /* min RMA percentage of total RAM */
.max_pft_size = 48, /* max log_2(hash table size) */
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 4697da895133..5c10b5925ac2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -31,11 +31,11 @@
#include <linux/unistd.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
-#include <linux/debugfs.h>
#include <linux/percpu.h>
#include <linux/memblock.h>
#include <linux/of_platform.h>
#include <linux/hugetlb.h>
+#include <asm/debugfs.h>
#include <asm/io.h>
#include <asm/paca.h>
#include <asm/prom.h>
@@ -920,6 +920,15 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = klimit;
+
+#ifdef CONFIG_PPC_MM_SLICES
+#ifdef CONFIG_PPC64
+ init_mm.context.addr_limit = TASK_SIZE_128TB;
+#else
+#error "context.addr_limit not initialized."
+#endif
+#endif
+
#ifdef CONFIG_PPC_64K_PAGES
init_mm.context.pte_frag = NULL;
#endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index f997154dfc41..0d4dcaeaafcb 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -230,8 +230,8 @@ static void cpu_ready_for_interrupts(void)
* If we are not in hypervisor mode the job is done once for
* the whole partition in configure_exceptions().
*/
- if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
- early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S)) {
unsigned long lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
}
@@ -637,6 +637,11 @@ void __init emergency_stack_init(void)
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
+ /* emergency stack for NMI exception handling. */
+ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+ klp_init_thread_info(ti);
+ paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+
/* emergency stack for machine check exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
klp_init_thread_info(ti);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d68ed1f004a3..df2a41647d8e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -39,6 +39,7 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
@@ -86,8 +87,6 @@ volatile unsigned int cpu_callin_map[NR_CPUS];
int smt_enabled_at_boot = 1;
-static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
-
/*
* Returns 1 if the specified cpu should be brought up during boot.
* Used to inhibit booting threads if they've been disabled or
@@ -158,32 +157,33 @@ static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
return IRQ_HANDLED;
}
-static irqreturn_t debug_ipi_action(int irq, void *data)
+#ifdef CONFIG_NMI_IPI
+static irqreturn_t nmi_ipi_action(int irq, void *data)
{
- if (crash_ipi_function_ptr) {
- crash_ipi_function_ptr(get_irq_regs());
- return IRQ_HANDLED;
- }
-
-#ifdef CONFIG_DEBUGGER
- debugger_ipi(get_irq_regs());
-#endif /* CONFIG_DEBUGGER */
-
+ smp_handle_nmi_ipi(get_irq_regs());
return IRQ_HANDLED;
}
+#endif
static irq_handler_t smp_ipi_action[] = {
[PPC_MSG_CALL_FUNCTION] = call_function_action,
[PPC_MSG_RESCHEDULE] = reschedule_action,
[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
- [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+#ifdef CONFIG_NMI_IPI
+ [PPC_MSG_NMI_IPI] = nmi_ipi_action,
+#endif
};
+/*
+ * The NMI IPI is a fallback and not truly non-maskable. It is simpler
+ * than going through the call function infrastructure, and strongly
+ * serialized, so it is more appropriate for debugging.
+ */
const char *smp_ipi_name[] = {
[PPC_MSG_CALL_FUNCTION] = "ipi call function",
[PPC_MSG_RESCHEDULE] = "ipi reschedule",
[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
- [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+ [PPC_MSG_NMI_IPI] = "nmi ipi",
};
/* optional function to request ipi, for controllers with >= 4 ipis */
@@ -191,14 +191,13 @@ int smp_request_message_ipi(int virq, int msg)
{
int err;
- if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+ if (msg < 0 || msg > PPC_MSG_NMI_IPI)
return -EINVAL;
- }
-#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE)
- if (msg == PPC_MSG_DEBUGGER_BREAK) {
+#ifndef CONFIG_NMI_IPI
+ if (msg == PPC_MSG_NMI_IPI)
return 1;
- }
#endif
+
err = request_irq(virq, smp_ipi_action[msg],
IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
smp_ipi_name[msg], NULL);
@@ -211,17 +210,9 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages {
long messages; /* current messages */
- unsigned long data; /* data for cause ipi */
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
-void smp_muxed_ipi_set_data(int cpu, unsigned long data)
-{
- struct cpu_messages *info = &per_cpu(ipi_message, cpu);
-
- info->data = data;
-}
-
void smp_muxed_ipi_set_message(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
@@ -236,14 +227,13 @@ void smp_muxed_ipi_set_message(int cpu, int msg)
void smp_muxed_ipi_message_pass(int cpu, int msg)
{
- struct cpu_messages *info = &per_cpu(ipi_message, cpu);
-
smp_muxed_ipi_set_message(cpu, msg);
+
/*
* cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI.
*/
- smp_ops->cause_ipi(cpu, info->data);
+ smp_ops->cause_ipi(cpu);
}
#ifdef __BIG_ENDIAN__
@@ -254,11 +244,18 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
irqreturn_t smp_ipi_demux(void)
{
- struct cpu_messages *info = this_cpu_ptr(&ipi_message);
- unsigned long all;
-
mb(); /* order any irq clear */
+ return smp_ipi_demux_relaxed();
+}
+
+/* sync-free variant. Callers should ensure synchronization */
+irqreturn_t smp_ipi_demux_relaxed(void)
+{
+ struct cpu_messages *info;
+ unsigned long all;
+
+ info = this_cpu_ptr(&ipi_message);
do {
all = xchg(&info->messages, 0);
#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
@@ -278,8 +275,10 @@ irqreturn_t smp_ipi_demux(void)
scheduler_ipi();
if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
tick_broadcast_ipi_handler();
- if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
- debug_ipi_action(0, NULL);
+#ifdef CONFIG_NMI_IPI
+ if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
+ nmi_ipi_action(0, NULL);
+#endif
} while (info->messages);
return IRQ_HANDLED;
@@ -316,6 +315,187 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
+#ifdef CONFIG_NMI_IPI
+
+/*
+ * "NMI IPI" system.
+ *
+ * NMI IPIs may not be recoverable, so should not be used as ongoing part of
+ * a running system. They can be used for crash, debug, halt/reboot, etc.
+ *
+ * NMI IPIs are globally single threaded. No more than one in progress at
+ * any time.
+ *
+ * The IPI call waits with interrupts disabled until all targets enter the
+ * NMI handler, then the call returns.
+ *
+ * No new NMI can be initiated until targets exit the handler.
+ *
+ * The IPI call may time out without all targets entering the NMI handler.
+ * In that case, there is some logic to recover (and ignore subsequent
+ * NMI interrupts that may eventually be raised), but the platform interrupt
+ * handler may not be able to distinguish this from other exception causes,
+ * which may cause a crash.
+ */
+
+static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
+static struct cpumask nmi_ipi_pending_mask;
+static int nmi_ipi_busy_count = 0;
+static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
+
+static void nmi_ipi_lock_start(unsigned long *flags)
+{
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+ raw_local_irq_restore(*flags);
+ cpu_relax();
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ }
+}
+
+static void nmi_ipi_lock(void)
+{
+ while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+ cpu_relax();
+}
+
+static void nmi_ipi_unlock(void)
+{
+ smp_mb();
+ WARN_ON(atomic_read(&__nmi_ipi_lock) != 1);
+ atomic_set(&__nmi_ipi_lock, 0);
+}
+
+static void nmi_ipi_unlock_end(unsigned long *flags)
+{
+ nmi_ipi_unlock();
+ raw_local_irq_restore(*flags);
+}
+
+/*
+ * Platform NMI handler calls this to ack
+ */
+int smp_handle_nmi_ipi(struct pt_regs *regs)
+{
+ void (*fn)(struct pt_regs *);
+ unsigned long flags;
+ int me = raw_smp_processor_id();
+ int ret = 0;
+
+ /*
+ * Unexpected NMIs are possible here because the interrupt may not
+ * be able to distinguish NMI IPIs from other types of NMIs, or
+ * because the caller may have timed out.
+ */
+ nmi_ipi_lock_start(&flags);
+ if (!nmi_ipi_busy_count)
+ goto out;
+ if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
+ goto out;
+
+ fn = nmi_ipi_function;
+ if (!fn)
+ goto out;
+
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ nmi_ipi_busy_count++;
+ nmi_ipi_unlock();
+
+ ret = 1;
+
+ fn(regs);
+
+ nmi_ipi_lock();
+ nmi_ipi_busy_count--;
+out:
+ nmi_ipi_unlock_end(&flags);
+
+ return ret;
+}
+
+static void do_smp_send_nmi_ipi(int cpu)
+{
+ if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
+ return;
+
+ if (cpu >= 0) {
+ do_message_pass(cpu, PPC_MSG_NMI_IPI);
+ } else {
+ int c;
+
+ for_each_online_cpu(c) {
+ if (c == raw_smp_processor_id())
+ continue;
+ do_message_pass(c, PPC_MSG_NMI_IPI);
+ }
+ }
+}
+
+/*
+ * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
+ * - fn is the target callback function.
+ * - delay_us > 0 is the delay before giving up waiting for targets to
+ * enter the handler, == 0 specifies indefinite delay.
+ */
+static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+{
+ unsigned long flags;
+ int me = raw_smp_processor_id();
+ int ret = 1;
+
+ BUG_ON(cpu == me);
+ BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);
+
+ if (unlikely(!smp_ops))
+ return 0;
+
+ /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
+ nmi_ipi_lock_start(&flags);
+ while (nmi_ipi_busy_count) {
+ nmi_ipi_unlock_end(&flags);
+ cpu_relax();
+ nmi_ipi_lock_start(&flags);
+ }
+
+ nmi_ipi_function = fn;
+
+ if (cpu < 0) {
+ /* ALL_OTHERS */
+ cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ } else {
+ /* cpumask starts clear */
+ cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
+ }
+ nmi_ipi_busy_count++;
+ nmi_ipi_unlock();
+
+ do_smp_send_nmi_ipi(cpu);
+
+ while (!cpumask_empty(&nmi_ipi_pending_mask)) {
+ udelay(1);
+ if (delay_us) {
+ delay_us--;
+ if (!delay_us)
+ break;
+ }
+ }
+
+ nmi_ipi_lock();
+ if (!cpumask_empty(&nmi_ipi_pending_mask)) {
+ /* Could not gather all CPUs */
+ ret = 0;
+ cpumask_clear(&nmi_ipi_pending_mask);
+ }
+ nmi_ipi_busy_count--;
+ nmi_ipi_unlock_end(&flags);
+
+ return ret;
+}
+#endif /* CONFIG_NMI_IPI */
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
@@ -326,29 +506,22 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
-void smp_send_debugger_break(void)
+#ifdef CONFIG_DEBUGGER
+void debugger_ipi_callback(struct pt_regs *regs)
{
- int cpu;
- int me = raw_smp_processor_id();
-
- if (unlikely(!smp_ops))
- return;
+ debugger_ipi(regs);
+}
- for_each_online_cpu(cpu)
- if (cpu != me)
- do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+void smp_send_debugger_break(void)
+{
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
}
#endif
#ifdef CONFIG_KEXEC_CORE
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
- crash_ipi_function_ptr = crash_ipi_callback;
- if (crash_ipi_callback) {
- mb();
- smp_send_debugger_break();
- }
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
}
#endif
@@ -439,7 +612,21 @@ int generic_cpu_disable(void)
#ifdef CONFIG_PPC64
vdso_data->processorCount--;
#endif
- migrate_irqs();
+ /* Update affinity of all IRQs previously aimed at this CPU */
+ irq_migrate_all_off_this_cpu();
+
+ /*
+ * Depending on the details of the interrupt controller, it's possible
+ * that one of the interrupts we just migrated away from this CPU is
+ * actually already pending on this CPU. If we leave it in that state
+ * the interrupt will never be EOI'ed, and will never fire again. So
+ * temporarily enable interrupts here, to allow any pending interrupt to
+ * be received (and EOI'ed), before we take this CPU offline.
+ */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
return 0;
}
@@ -521,6 +708,16 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
cpu_idle_thread_init(cpu, tidle);
+ /*
+ * The platform might need to allocate resources prior to bringing
+ * up the CPU
+ */
+ if (smp_ops->prepare_cpu) {
+ rc = smp_ops->prepare_cpu(cpu);
+ if (rc)
+ return rc;
+ }
+
/* Make sure callin-map entry is 0 (can be leftover a CPU
* hotplug
*/
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 66711958493c..d534ed901538 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -59,7 +59,14 @@ EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- save_context_stack(trace, tsk->thread.ksp, tsk, 0);
+ unsigned long sp;
+
+ if (tsk == current)
+ sp = current_stack_pointer();
+ else
+ sp = tsk->thread.ksp;
+
+ save_context_stack(trace, sp, tsk, 0);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
index 6ae9bd5086a4..0050b2d2ff7a 100644
--- a/arch/powerpc/kernel/swsusp.c
+++ b/arch/powerpc/kernel/swsusp.c
@@ -10,6 +10,7 @@
*/
#include <linux/sched.h>
+#include <linux/suspend.h>
#include <asm/current.h>
#include <asm/mmu_context.h>
#include <asm/switch_to.h>
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index de04c9fbb5cd..a877bf8269fe 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -42,11 +42,11 @@
#include <asm/unistd.h>
#include <asm/asm-prototypes.h>
-static inline unsigned long do_mmap2(unsigned long addr, size_t len,
+static inline long do_mmap2(unsigned long addr, size_t len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long off, int shift)
{
- unsigned long ret = -EINVAL;
+ long ret = -EINVAL;
if (!arch_validate_prot(prot))
goto out;
@@ -62,16 +62,16 @@ out:
return ret;
}
-unsigned long sys_mmap2(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, pgoff)
{
return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12);
}
-unsigned long sys_mmap(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, off_t offset)
+SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, off_t, offset)
{
return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT);
}
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index c1fb255a60d6..4437c70c7c2b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -710,6 +710,10 @@ static int register_cpu_online(unsigned int cpu)
struct device_attribute *attrs, *pmc_attrs;
int i, nattrs;
+ /* For cpus present at boot a reference was already grabbed in register_cpu() */
+ if (!s->of_node)
+ s->of_node = of_get_cpu_node(cpu, NULL);
+
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SMT))
device_create_file(s, &dev_attr_smt_snooze_delay);
@@ -785,9 +789,9 @@ static int register_cpu_online(unsigned int cpu)
return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
static int unregister_cpu_online(unsigned int cpu)
{
-#ifdef CONFIG_HOTPLUG_CPU
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct device *s = &c->dev;
struct device_attribute *attrs, *pmc_attrs;
@@ -864,9 +868,13 @@ static int unregister_cpu_online(unsigned int cpu)
}
#endif
cacheinfo_cpu_offline(cpu);
-#endif /* CONFIG_HOTPLUG_CPU */
+ of_node_put(s->of_node);
+ s->of_node = NULL;
return 0;
}
+#else /* !CONFIG_HOTPLUG_CPU */
+#define unregister_cpu_online NULL
+#endif
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
ssize_t arch_cpu_probe(const char *buf, size_t count)
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
new file mode 100644
index 000000000000..729dffc5f7bc
--- /dev/null
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -0,0 +1,29 @@
+#
+# Makefile for the powerpc trace subsystem
+#
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+ifdef CONFIG_FUNCTION_TRACER
+# do not trace tracer code
+CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+endif
+
+obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o
+ifdef CONFIG_MPROFILE_KERNEL
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o
+else
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o
+endif
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+obj-$(CONFIG_TRACING) += trace_clock.o
+
+obj-$(CONFIG_PPC64) += $(obj64-y)
+obj-$(CONFIG_PPC32) += $(obj32-y)
+
+# Disable GCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_ftrace.o := n
+UBSAN_SANITIZE_ftrace.o := n
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 5c9f50c1aa99..32509de6ce4c 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/list.h>
+#include <asm/asm-prototypes.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
#include <asm/ftrace.h>
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
new file mode 100644
index 000000000000..afef2c076282
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -0,0 +1,118 @@
+/*
+ * Split from entry_32.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/magic.h>
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/export.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ /*
+ * It is required that _mcount on PPC32 must preserve the
+ * link register. But we have r0 to play with. We use r0
+ * to push the return address back to the caller of mcount
+ * into the ctr register, restore the link register and
+ * then jump back using the ctr register.
+ */
+ mflr r0
+ mtctr r0
+ lwz r0, 4(r1)
+ mtlr r0
+ bctr
+
+_GLOBAL(ftrace_caller)
+ MCOUNT_SAVE_FRAME
+ /* r3 ends up with link register */
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ MCOUNT_RESTORE_FRAME
+ /* old link register ends up in ctr reg */
+ bctr
+#else
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+
+ MCOUNT_SAVE_FRAME
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5, ftrace_trace_function)
+ lwz r5,0(r5)
+
+ mtctr r5
+ bctrl
+ nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ b ftrace_graph_caller
+#endif
+ MCOUNT_RESTORE_FRAME
+ bctr
+#endif
+EXPORT_SYMBOL(_mcount)
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ /* load r4 with local address */
+ lwz r4, 44(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* Grab the LR out of the caller stack frame */
+ lwz r3,52(r1)
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR in the callers stack frame to this.
+ */
+ stw r3,52(r1)
+
+ MCOUNT_RESTORE_FRAME
+ /* old link register ends up in ctr reg */
+ bctr
+
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+ stwu r1, -32(r1)
+ stw r3, 20(r1)
+ stw r4, 16(r1)
+ stw r31, 12(r1)
+ mr r31, r1
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ lwz r3, 20(r1)
+ lwz r4, 16(r1)
+ lwz r31,12(r1)
+ lwz r1, 0(r1)
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S
new file mode 100644
index 000000000000..e5ccea19821e
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64.S
@@ -0,0 +1,85 @@
+/*
+ * Split from entry_64.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/export.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+_GLOBAL_TOC(_mcount)
+EXPORT_SYMBOL(_mcount)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5,ftrace_trace_function)
+ ld r5,0(r5)
+ ld r5,0(r5)
+ mtctr r5
+ bctrl
+ nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ b ftrace_graph_caller
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+ std r4, -32(r1)
+ std r3, -24(r1)
+ /* save TOC */
+ std r2, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ /*
+ * We might be called from a module.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ ld r2, PACATOC(r13)
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ ld r1, 0(r1)
+ ld r4, -32(r1)
+ ld r3, -24(r1)
+ ld r2, -16(r1)
+ ld r31, -8(r1)
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
new file mode 100644
index 000000000000..7c933a99f5d5
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -0,0 +1,272 @@
+/*
+ * Split from ftrace_64.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/export.h>
+#include <asm/thread_info.h>
+#include <asm/bug.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ *
+ * ftrace_caller() is the function that replaces _mcount() when ftrace is
+ * active.
+ *
+ * We arrive here after a function A calls function B, and we are the trace
+ * function for B. When we enter r1 points to A's stack frame, B has not yet
+ * had a chance to allocate one yet.
+ *
+ * Additionally r2 may point either to the TOC for A, or B, depending on
+ * whether B did a TOC setup sequence before calling us.
+ *
+ * On entry the LR points back to the _mcount() call site, and r0 holds the
+ * saved LR as it was on entry to B, ie. the original return address at the
+ * call site in A.
+ *
+ * Our job is to save the register state into a struct pt_regs (on the stack)
+ * and then arrange for the ftrace function to be called.
+ */
+_GLOBAL(ftrace_caller)
+ /* Save the original return address in A's stack frame */
+ std r0,LRSAVE(r1)
+
+ /* Create our stack frame + pt_regs */
+ stdu r1,-SWITCH_FRAME_SIZE(r1)
+
+ /* Save all gprs to pt_regs */
+ SAVE_8GPRS(0,r1)
+ SAVE_8GPRS(8,r1)
+ SAVE_8GPRS(16,r1)
+ SAVE_8GPRS(24,r1)
+
+ /* Load special regs for save below */
+ mfmsr r8
+ mfctr r9
+ mfxer r10
+ mfcr r11
+
+ /* Get the _mcount() call site out of LR */
+ mflr r7
+ /* Save it as pt_regs->nip */
+ std r7, _NIP(r1)
+ /* Save the read LR in pt_regs->link */
+ std r0, _LINK(r1)
+
+ /* Save callee's TOC in the ABI compliant location */
+ std r2, 24(r1)
+ ld r2,PACATOC(r13) /* get kernel TOC in r2 */
+
+ addis r3,r2,function_trace_op@toc@ha
+ addi r3,r3,function_trace_op@toc@l
+ ld r5,0(r3)
+
+#ifdef CONFIG_LIVEPATCH
+ mr r14,r7 /* remember old NIP */
+#endif
+ /* Calculate ip from nip-4 into r3 for call below */
+ subi r3, r7, MCOUNT_INSN_SIZE
+
+ /* Put the original return address in r4 as parent_ip */
+ mr r4, r0
+
+ /* Save special regs */
+ std r8, _MSR(r1)
+ std r9, _CTR(r1)
+ std r10, _XER(r1)
+ std r11, _CCR(r1)
+
+ /* Load &pt_regs in r6 for call below */
+ addi r6, r1 ,STACK_FRAME_OVERHEAD
+
+ /* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+
+ /* Load ctr with the possibly modified NIP */
+ ld r3, _NIP(r1)
+ mtctr r3
+#ifdef CONFIG_LIVEPATCH
+ cmpd r14,r3 /* has NIP been altered? */
+#endif
+
+ /* Restore gprs */
+ REST_8GPRS(0,r1)
+ REST_8GPRS(8,r1)
+ REST_8GPRS(16,r1)
+ REST_8GPRS(24,r1)
+
+ /* Restore possibly modified LR */
+ ld r0, _LINK(r1)
+ mtlr r0
+
+ /* Restore callee's TOC */
+ ld r2, 24(r1)
+
+ /* Pop our stack frame */
+ addi r1, r1, SWITCH_FRAME_SIZE
+
+#ifdef CONFIG_LIVEPATCH
+ /* Based on the cmpd above, if the NIP was altered handle livepatch */
+ bne- livepatch_handler
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+
+ bctr /* jump after _mcount site */
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#ifdef CONFIG_LIVEPATCH
+ /*
+ * This function runs in the mcount context, between two functions. As
+ * such it can only clobber registers which are volatile and used in
+ * function linkage.
+ *
+ * We get here when a function A, calls another function B, but B has
+ * been live patched with a new function C.
+ *
+ * On entry:
+ * - we have no stack frame and can not allocate one
+ * - LR points back to the original caller (in A)
+ * - CTR holds the new NIP in C
+ * - r0 & r12 are free
+ *
+ * r0 can't be used as the base register for a DS-form load or store, so
+ * we temporarily shuffle r1 (stack pointer) into r0 and then put it back.
+ */
+livepatch_handler:
+ CURRENT_THREAD_INFO(r12, r1)
+
+ /* Save stack pointer into r0 */
+ mr r0, r1
+
+ /* Allocate 3 x 8 bytes */
+ ld r1, TI_livepatch_sp(r12)
+ addi r1, r1, 24
+ std r1, TI_livepatch_sp(r12)
+
+ /* Save toc & real LR on livepatch stack */
+ std r2, -24(r1)
+ mflr r12
+ std r12, -16(r1)
+
+ /* Store stack end marker */
+ lis r12, STACK_END_MAGIC@h
+ ori r12, r12, STACK_END_MAGIC@l
+ std r12, -8(r1)
+
+ /* Restore real stack pointer */
+ mr r1, r0
+
+ /* Put ctr in r12 for global entry and branch there */
+ mfctr r12
+ bctrl
+
+ /*
+ * Now we are returning from the patched function to the original
+ * caller A. We are free to use r0 and r12, and we can use r2 until we
+ * restore it.
+ */
+
+ CURRENT_THREAD_INFO(r12, r1)
+
+ /* Save stack pointer into r0 */
+ mr r0, r1
+
+ ld r1, TI_livepatch_sp(r12)
+
+ /* Check stack marker hasn't been trashed */
+ lis r2, STACK_END_MAGIC@h
+ ori r2, r2, STACK_END_MAGIC@l
+ ld r12, -8(r1)
+1: tdne r12, r2
+ EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
+
+ /* Restore LR & toc from livepatch stack */
+ ld r12, -16(r1)
+ mtlr r12
+ ld r2, -24(r1)
+
+ /* Pop livepatch stack frame */
+ CURRENT_THREAD_INFO(r12, r0)
+ subi r1, r1, 24
+ std r1, TI_livepatch_sp(r12)
+
+ /* Restore real stack pointer */
+ mr r1, r0
+
+ /* Return to original caller of live patched function */
+ blr
+#endif /* CONFIG_LIVEPATCH */
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ stdu r1, -112(r1)
+ /* with -mprofile-kernel, parameter regs are still alive at _mcount */
+ std r10, 104(r1)
+ std r9, 96(r1)
+ std r8, 88(r1)
+ std r7, 80(r1)
+ std r6, 72(r1)
+ std r5, 64(r1)
+ std r4, 56(r1)
+ std r3, 48(r1)
+
+ /* Save callee's TOC in the ABI compliant location */
+ std r2, 24(r1)
+ ld r2, PACATOC(r13) /* get kernel TOC in r2 */
+
+ mfctr r4 /* ftrace_caller has moved local addr here */
+ std r4, 40(r1)
+ mflr r3 /* ftrace_caller has restored LR from stack */
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR to this.
+ */
+ mtlr r3
+
+ ld r0, 40(r1)
+ mtctr r0
+ ld r10, 104(r1)
+ ld r9, 96(r1)
+ ld r8, 88(r1)
+ ld r7, 80(r1)
+ ld r6, 72(r1)
+ ld r5, 64(r1)
+ ld r4, 56(r1)
+ ld r3, 48(r1)
+
+ /* Restore callee's TOC */
+ ld r2, 24(r1)
+
+ addi r1, r1, 112
+ mflr r0
+ std r0, LRSAVE(r1)
+ bctr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
new file mode 100644
index 000000000000..f095358da96e
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -0,0 +1,68 @@
+/*
+ * Split from ftrace_64.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/export.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL_TOC(ftrace_caller)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+
+_GLOBAL(ftrace_stub)
+ blr
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ /* load r4 with local address */
+ ld r4, 128(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* Grab the LR out of the caller stack frame */
+ ld r11, 112(r1)
+ ld r3, 16(r11)
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR in the callers stack frame to this.
+ */
+ ld r11, 112(r1)
+ std r3, 16(r11)
+
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace_clock.c b/arch/powerpc/kernel/trace/trace_clock.c
index 49170690946d..49170690946d 100644
--- a/arch/powerpc/kernel/trace_clock.c
+++ b/arch/powerpc/kernel/trace/trace_clock.c
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index ff365f9de27a..d4e545d27ef9 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -35,13 +35,13 @@
#include <linux/backlight.h>
#include <linux/bug.h>
#include <linux/kdebug.h>
-#include <linux/debugfs.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
#include <linux/uaccess.h>
+#include <asm/debugfs.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
@@ -279,18 +279,35 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
void system_reset_exception(struct pt_regs *regs)
{
+ /*
+ * Avoid crashes in case of nested NMI exceptions. Recoverability
+ * is determined by RI and in_nmi
+ */
+ bool nested = in_nmi();
+ if (!nested)
+ nmi_enter();
+
/* See if any machine dependent calls */
if (ppc_md.system_reset_exception) {
if (ppc_md.system_reset_exception(regs))
- return;
+ goto out;
}
die("System Reset", regs, SIGABRT);
+out:
+#ifdef CONFIG_PPC_BOOK3S_64
+ BUG_ON(get_paca()->in_nmi == 0);
+ if (get_paca()->in_nmi > 1)
+ panic("Unrecoverable nested System Reset");
+#endif
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
panic("Unrecoverable System Reset");
+ if (!nested)
+ nmi_exit();
+
/* What should we do here? We could issue a shutdown or hard reset. */
}
@@ -306,8 +323,6 @@ long machine_check_early(struct pt_regs *regs)
__this_cpu_inc(irq_stat.mce_exceptions);
- add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
-
if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
handled = cur_cpu_spec->machine_check_early(regs);
return handled;
@@ -741,6 +756,8 @@ void machine_check_exception(struct pt_regs *regs)
__this_cpu_inc(irq_stat.mce_exceptions);
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
/* See if any machine dependent calls. In theory, we would want
* to call the CPU first, and call the ppc_md. one if the CPU
* one returns a positive number. However there is existing code
@@ -1440,6 +1457,8 @@ void facility_unavailable_exception(struct pt_regs *regs)
[FSCR_TM_LG] = "TM",
[FSCR_EBB_LG] = "EBB",
[FSCR_TAR_LG] = "TAR",
+ [FSCR_MSGP_LG] = "MSGP",
+ [FSCR_SCV_LG] = "SCV",
};
char *facility = "unknown";
u64 value;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 1c24c894c908..2f793be3d2b1 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -77,6 +77,8 @@ SECTIONS
#endif
} :kernel
+ __head_end = .;
+
/*
* If the build dies here, it's likely code in head_64.S is referencing
* labels it can't reach, and the linker inserting stubs without the
OpenPOWER on IntegriCloud