summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/powernv/smp.c
diff options
context:
space:
mode:
authorBalbir Singh <bsingharora@gmail.com>2017-12-15 19:14:55 +1100
committerMichael Ellerman <mpe@ellerman.id.au>2018-01-16 23:47:11 +1100
commit4145f358644b970fcff293c09fdcc7939e8527d2 (patch)
tree4f0bb58e3b329b005ad956c7429baca2c077b868 /arch/powerpc/platforms/powernv/smp.c
parent04b9c96eae72d862726f2f4bfcec2078240c33c5 (diff)
downloadtalos-obmc-linux-4145f358644b970fcff293c09fdcc7939e8527d2.tar.gz
talos-obmc-linux-4145f358644b970fcff293c09fdcc7939e8527d2.zip
powernv/kdump: Fix cases where the kdump kernel can get HMI's
Certain HMI's such as malfunction error propagate through all threads/core on the system. If a thread was offline prior to us crashing the system and jumping to the kdump kernel, bad things happen when it wakes up due to an HMI in the kdump kernel. There are several possible ways to solve this problem 1. Put the offline cores in a state such that they are not woken up for machine check and HMI errors. This does not work, since we might need to wake up offline threads to handle TB errors 2. Ignore HMI errors, setup HMEER to mask HMI errors, but this still leads the window open for any MCEs and masking them for the duration of the dump might be a concern 3. Wake up offline CPUs, as in send them to crash_ipi_callback (not wake them up as in mark them online as seen by the hotplug). kexec does a wake_online_cpus() call, this patch does something similar, but instead sends an IPI and forces them to crash_ipi_callback() This patch takes approach #3. Care is taken to enable this only for powenv platforms via crash_wake_offline (a global value set at setup time). The crash code sends out IPI's to all CPU's which then move to crash_ipi_callback and kexec_smp_wait(). Signed-off-by: Balbir Singh <bsingharora@gmail.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/platforms/powernv/smp.c')
-rw-r--r--arch/powerpc/platforms/powernv/smp.c28
1 files changed, 28 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index ba030669eca1..9664c8461f03 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -37,6 +37,8 @@
#include <asm/kvm_ppc.h>
#include <asm/ppc-opcode.h>
#include <asm/cpuidle.h>
+#include <asm/kexec.h>
+#include <asm/reg.h>
#include "powernv.h"
@@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void)
} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+ } else if ((srr1 & wmask) == SRR1_WAKERESET) {
+ irq_set_pending_from_srr1(srr1);
+ /* Does not return */
}
+
smp_mb();
+ /*
+ * For kdump kernels, we process the ipi and jump to
+ * crash_ipi_callback
+ */
+ if (kdump_in_progress()) {
+ /*
+ * If we got to this point, we've not used
+ * NMI's, otherwise we would have gone
+ * via the SRR1_WAKERESET path. We are
+ * using regular IPI's for waking up offline
+ * threads.
+ */
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ crash_ipi_callback(&regs);
+ /* Does not return */
+ }
+
if (cpu_core_split_required())
continue;
@@ -371,5 +396,8 @@ void __init pnv_smp_init(void)
#ifdef CONFIG_HOTPLUG_CPU
ppc_md.cpu_die = pnv_smp_cpu_kill_self;
+#ifdef CONFIG_KEXEC_CORE
+ crash_wake_offline = 1;
+#endif
#endif
}
OpenPOWER on IntegriCloud