summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck/mce.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c228
1 files changed, 104 insertions, 124 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8c50754c09c1..953b3ce92dcc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -42,6 +42,7 @@
#include <linux/irq_work.h>
#include <linux/export.h>
#include <linux/jump_label.h>
+#include <linux/set_memory.h>
#include <asm/intel-family.h>
#include <asm/processor.h>
@@ -50,7 +51,6 @@
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/reboot.h>
-#include <asm/set_memory.h>
#include "mce-internal.h"
@@ -108,10 +108,6 @@ static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
-#ifndef mce_unmap_kpfn
-static void mce_unmap_kpfn(unsigned long pfn);
-#endif
-
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@@ -123,8 +119,8 @@ void mce_setup(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = m->extcpu = smp_processor_id();
- /* We hope get_seconds stays lockless */
- m->time = get_seconds();
+ /* need the internal __ version to avoid deadlocks */
+ m->time = __ktime_get_real_seconds();
m->cpuvendor = boot_cpu_data.x86_vendor;
m->cpuid = cpuid_eax(1);
m->socketid = cpu_data(m->extcpu).phys_proc_id;
@@ -602,7 +598,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
if (!memory_failure(pfn, 0))
- mce_unmap_kpfn(pfn);
+ set_mce_nospec(pfn);
}
return NOTIFY_OK;
@@ -1072,37 +1068,104 @@ static int do_memory_failure(struct mce *m)
if (ret)
pr_err("Memory error not recovered");
else
- mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
+ set_mce_nospec(m->addr >> PAGE_SHIFT);
return ret;
}
-#ifndef mce_unmap_kpfn
-static void mce_unmap_kpfn(unsigned long pfn)
+
+/*
+ * Cases where we avoid rendezvous handler timeout:
+ * 1) If this CPU is offline.
+ *
+ * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
+ * skip those CPUs which remain looping in the 1st kernel - see
+ * crash_nmi_callback().
+ *
+ * Note: there still is a small window between kexec-ing and the new,
+ * kdump kernel establishing a new #MC handler where a broadcasted MCE
+ * might not get handled properly.
+ */
+static bool __mc_check_crashing_cpu(int cpu)
{
- unsigned long decoy_addr;
+ if (cpu_is_offline(cpu) ||
+ (crashing_cpu != -1 && crashing_cpu != cpu)) {
+ u64 mcgstatus;
- /*
- * Unmap this page from the kernel 1:1 mappings to make sure
- * we don't log more errors because of speculative access to
- * the page.
- * We would like to just call:
- * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
- * but doing that would radically increase the odds of a
- * speculative access to the poison page because we'd have
- * the virtual address of the kernel 1:1 mapping sitting
- * around in registers.
- * Instead we get tricky. We create a non-canonical address
- * that looks just like the one we want, but has bit 63 flipped.
- * This relies on set_memory_np() not checking whether we passed
- * a legal address.
- */
+ mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ if (mcgstatus & MCG_STATUS_RIPV) {
+ mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void __mc_scan_banks(struct mce *m, struct mce *final,
+ unsigned long *toclear, unsigned long *valid_banks,
+ int no_way_out, int *worst)
+{
+ struct mca_config *cfg = &mca_cfg;
+ int severity, i;
+
+ for (i = 0; i < cfg->banks; i++) {
+ __clear_bit(i, toclear);
+ if (!test_bit(i, valid_banks))
+ continue;
+
+ if (!mce_banks[i].ctl)
+ continue;
+
+ m->misc = 0;
+ m->addr = 0;
+ m->bank = i;
+
+ m->status = mce_rdmsrl(msr_ops.status(i));
+ if (!(m->status & MCI_STATUS_VAL))
+ continue;
+
+ /*
+ * Corrected or non-signaled errors are handled by
+ * machine_check_poll(). Leave them alone, unless this panics.
+ */
+ if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
+ !no_way_out)
+ continue;
+
+ /* Set taint even when machine check was not enabled. */
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
+ severity = mce_severity(m, cfg->tolerant, NULL, true);
+
+ /*
+ * When machine check was for corrected/deferred handler don't
+ * touch, unless we're panicking.
+ */
+ if ((severity == MCE_KEEP_SEVERITY ||
+ severity == MCE_UCNA_SEVERITY) && !no_way_out)
+ continue;
- decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
+ __set_bit(i, toclear);
+
+ /* Machine check event was not enabled. Clear, but ignore. */
+ if (severity == MCE_NO_SEVERITY)
+ continue;
- if (set_memory_np(decoy_addr, 1))
- pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
+ mce_read_aux(m, i);
+
+ /* assuming valid severity level != 0 */
+ m->severity = severity;
+
+ mce_log(m);
+
+ if (severity > *worst) {
+ *final = *m;
+ *worst = severity;
+ }
+ }
+
+ /* mce_clear_state will clear *final, save locally for use later */
+ *m = *final;
}
-#endif
/*
* The actual machine check handler. This only handles real
@@ -1118,68 +1181,45 @@ static void mce_unmap_kpfn(unsigned long pfn)
*/
void do_machine_check(struct pt_regs *regs, long error_code)
{
+ DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
+ DECLARE_BITMAP(toclear, MAX_NR_BANKS);
struct mca_config *cfg = &mca_cfg;
+ int cpu = smp_processor_id();
+ char *msg = "Unknown";
struct mce m, *final;
- int i;
int worst = 0;
- int severity;
/*
* Establish sequential order between the CPUs entering the machine
* check handler.
*/
int order = -1;
+
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
*/
int no_way_out = 0;
+
/*
* If kill_it gets set, there might be a way to recover from this
* error.
*/
int kill_it = 0;
- DECLARE_BITMAP(toclear, MAX_NR_BANKS);
- DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
- char *msg = "Unknown";
/*
* MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
* on Intel.
*/
int lmce = 1;
- int cpu = smp_processor_id();
- /*
- * Cases where we avoid rendezvous handler timeout:
- * 1) If this CPU is offline.
- *
- * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
- * skip those CPUs which remain looping in the 1st kernel - see
- * crash_nmi_callback().
- *
- * Note: there still is a small window between kexec-ing and the new,
- * kdump kernel establishing a new #MC handler where a broadcasted MCE
- * might not get handled properly.
- */
- if (cpu_is_offline(cpu) ||
- (crashing_cpu != -1 && crashing_cpu != cpu)) {
- u64 mcgstatus;
-
- mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
- if (mcgstatus & MCG_STATUS_RIPV) {
- mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
- return;
- }
- }
+ if (__mc_check_crashing_cpu(cpu))
+ return;
ist_enter(regs);
this_cpu_inc(mce_exception_count);
- if (!cfg->banks)
- goto out;
-
mce_gather_info(&m, regs);
m.tsc = rdtsc();
@@ -1220,67 +1260,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
order = mce_start(&no_way_out);
}
- for (i = 0; i < cfg->banks; i++) {
- __clear_bit(i, toclear);
- if (!test_bit(i, valid_banks))
- continue;
- if (!mce_banks[i].ctl)
- continue;
-
- m.misc = 0;
- m.addr = 0;
- m.bank = i;
-
- m.status = mce_rdmsrl(msr_ops.status(i));
- if ((m.status & MCI_STATUS_VAL) == 0)
- continue;
-
- /*
- * Non uncorrected or non signaled errors are handled by
- * machine_check_poll. Leave them alone, unless this panics.
- */
- if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
- !no_way_out)
- continue;
-
- /*
- * Set taint even when machine check was not enabled.
- */
- add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
-
- severity = mce_severity(&m, cfg->tolerant, NULL, true);
-
- /*
- * When machine check was for corrected/deferred handler don't
- * touch, unless we're panicing.
- */
- if ((severity == MCE_KEEP_SEVERITY ||
- severity == MCE_UCNA_SEVERITY) && !no_way_out)
- continue;
- __set_bit(i, toclear);
- if (severity == MCE_NO_SEVERITY) {
- /*
- * Machine check event was not enabled. Clear, but
- * ignore.
- */
- continue;
- }
-
- mce_read_aux(&m, i);
-
- /* assuming valid severity level != 0 */
- m.severity = severity;
-
- mce_log(&m);
-
- if (severity > worst) {
- *final = m;
- worst = severity;
- }
- }
-
- /* mce_clear_state will clear *final, save locally for use later */
- m = *final;
+ __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1319,7 +1299,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (worst > 0)
mce_report_event(regs);
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
-out:
+
sync_core();
if (worst != MCE_AR_SEVERITY && !kill_it)
OpenPOWER on IntegriCloud