summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck/mce.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c284
1 files changed, 134 insertions, 150 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a7fdf453d895..af44ebeb593f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -43,6 +43,7 @@
#include <linux/export.h>
#include <linux/jump_label.h>
+#include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/traps.h>
#include <asm/tlbflush.h>
@@ -53,6 +54,8 @@
static DEFINE_MUTEX(mce_chrdev_read_mutex);
+static int mce_chrdev_open_count; /* #times opened */
+
#define mce_log_get_idx_check(p) \
({ \
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
@@ -120,14 +123,13 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
*/
-ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
/* Do initial initialization of a struct mce */
void mce_setup(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = m->extcpu = smp_processor_id();
- m->tsc = rdtsc();
/* We hope get_seconds stays lockless */
m->time = get_seconds();
m->cpuvendor = boot_cpu_data.x86_vendor;
@@ -135,6 +137,9 @@ void mce_setup(struct mce *m)
m->socketid = cpu_data(m->extcpu).phys_proc_id;
m->apicid = cpu_data(m->extcpu).initial_apicid;
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
+
+ if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
+ rdmsrl(MSR_PPIN, m->ppin);
}
DEFINE_PER_CPU(struct mce, injectm);
@@ -207,19 +212,23 @@ EXPORT_SYMBOL_GPL(mce_inject_log);
static struct notifier_block mce_srao_nb;
+static atomic_t num_notifiers;
+
void mce_register_decode_chain(struct notifier_block *nb)
{
- /* Ensure SRAO notifier has the highest priority in the decode chain. */
- if (nb != &mce_srao_nb && nb->priority == INT_MAX)
- nb->priority -= 1;
+ atomic_inc(&num_notifiers);
+
+ WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC);
- atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
+ blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
}
EXPORT_SYMBOL_GPL(mce_register_decode_chain);
void mce_unregister_decode_chain(struct notifier_block *nb)
{
- atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
+ atomic_dec(&num_notifiers);
+
+ blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
@@ -270,17 +279,17 @@ struct mca_msr_regs msr_ops = {
.misc = misc_reg
};
-static void print_mce(struct mce *m)
+static void __print_mce(struct mce *m)
{
- int ret = 0;
-
- pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
- m->extcpu, m->mcgstatus, m->bank, m->status);
+ pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
+ m->extcpu,
+ (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
+ m->mcgstatus, m->bank, m->status);
if (m->ip) {
pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
- m->cs, m->ip);
+ m->cs, m->ip);
if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->ip);
@@ -308,15 +317,11 @@ static void print_mce(struct mce *m)
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
cpu_data(m->extcpu).microcode);
+}
- /*
- * Print out human-readable details about the MCE error,
- * (if the CPU has an implementation for that)
- */
- ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
- if (ret == NOTIFY_STOP)
- return;
-
+static void print_mce(struct mce *m)
+{
+ __print_mce(m);
pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
}
@@ -499,7 +504,7 @@ int mce_available(struct cpuinfo_x86 *c)
static void mce_schedule_work(void)
{
- if (!mce_gen_pool_empty() && keventd_up())
+ if (!mce_gen_pool_empty())
schedule_work(&mce_work);
}
@@ -566,7 +571,37 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
}
static struct notifier_block mce_srao_nb = {
.notifier_call = srao_decode_notifier,
- .priority = INT_MAX,
+ .priority = MCE_PRIO_SRAO,
+};
+
+static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct mce *m = (struct mce *)data;
+
+ if (!m)
+ return NOTIFY_DONE;
+
+ /*
+ * Run the default notifier if we have only the SRAO
+ * notifier and us registered.
+ */
+ if (atomic_read(&num_notifiers) > 2)
+ return NOTIFY_DONE;
+
+ /* Don't print when mcelog is running */
+ if (mce_chrdev_open_count > 0)
+ return NOTIFY_DONE;
+
+ __print_mce(m);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block mce_default_nb = {
+ .notifier_call = mce_default_notifier,
+ /* lowest prio, we want it to run last. */
+ .priority = MCE_PRIO_LOWEST,
};
/*
@@ -667,6 +702,9 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_gather_info(&m, NULL);
+ if (flags & MCP_TIMESTAMP)
+ m.tsc = rdtsc();
+
for (i = 0; i < mca_cfg.banks; i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -674,14 +712,12 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.misc = 0;
m.addr = 0;
m.bank = i;
- m.tsc = 0;
barrier();
m.status = mce_rdmsrl(msr_ops.status(i));
if (!(m.status & MCI_STATUS_VAL))
continue;
-
/*
* Uncorrected or signalled events are handled by the exception
* handler when it is enabled, so don't process those here.
@@ -696,9 +732,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_read_aux(&m, i);
- if (!(flags & MCP_TIMESTAMP))
- m.tsc = 0;
-
severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
@@ -1109,6 +1142,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
goto out;
mce_gather_info(&m, regs);
+ m.tsc = rdtsc();
final = this_cpu_ptr(&mces_seen);
*final = m;
@@ -1275,41 +1309,6 @@ int memory_failure(unsigned long pfn, int vector, int flags)
#endif
/*
- * Action optional processing happens here (picking up
- * from the list of faulting pages that do_machine_check()
- * placed into the genpool).
- */
-static void mce_process_work(struct work_struct *dummy)
-{
- mce_gen_pool_process();
-}
-
-#ifdef CONFIG_X86_MCE_INTEL
-/***
- * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
- * @cpu: The CPU on which the event occurred.
- * @status: Event status information
- *
- * This function should be called by the thermal interrupt after the
- * event has been processed and the decision was made to log the event
- * further.
- *
- * The status parameter will be saved to the 'status' field of 'struct mce'
- * and historically has been the register value of the
- * MSR_IA32_THERMAL_STATUS (Intel) msr.
- */
-void mce_log_therm_throt_event(__u64 status)
-{
- struct mce m;
-
- mce_setup(&m);
- m.bank = MCE_THERMAL_BANK;
- m.status = status;
- mce_log(&m);
-}
-#endif /* CONFIG_X86_MCE_INTEL */
-
-/*
* Periodic polling timer for "silent" machine check errors. If the
* poller finds an MCE, poll 2x faster. When the poller finds no more
* errors, poll 2x slower (up to check_interval seconds).
@@ -1326,20 +1325,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
-static void __restart_timer(struct timer_list *t, unsigned long interval)
+static void __start_timer(struct timer_list *t, unsigned long interval)
{
unsigned long when = jiffies + interval;
unsigned long flags;
local_irq_save(flags);
- if (timer_pending(t)) {
- if (time_before(when, t->expires))
- mod_timer(t, when);
- } else {
- t->expires = round_jiffies(when);
- add_timer_on(t, smp_processor_id());
- }
+ if (!timer_pending(t) || time_before(when, t->expires))
+ mod_timer(t, round_jiffies(when));
local_irq_restore(flags);
}
@@ -1355,7 +1349,7 @@ static void mce_timer_fn(unsigned long data)
iv = __this_cpu_read(mce_next_interval);
if (mce_available(this_cpu_ptr(&cpu_info))) {
- machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
+ machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
if (mce_intel_cmci_poll()) {
iv = mce_adjust_timer(iv);
@@ -1374,7 +1368,7 @@ static void mce_timer_fn(unsigned long data)
done:
__this_cpu_write(mce_next_interval, iv);
- __restart_timer(t, iv);
+ __start_timer(t, iv);
}
/*
@@ -1385,7 +1379,7 @@ void mce_timer_kick(unsigned long interval)
struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned long iv = __this_cpu_read(mce_next_interval);
- __restart_timer(t, interval);
+ __start_timer(t, interval);
if (interval < iv)
__this_cpu_write(mce_next_interval, interval);
@@ -1732,17 +1726,23 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
}
}
-static void mce_start_timer(unsigned int cpu, struct timer_list *t)
+static void mce_start_timer(struct timer_list *t)
{
unsigned long iv = check_interval * HZ;
if (mca_cfg.ignore_ce || !iv)
return;
- per_cpu(mce_next_interval, cpu) = iv;
+ this_cpu_write(mce_next_interval, iv);
+ __start_timer(t, iv);
+}
+
+static void __mcheck_cpu_setup_timer(void)
+{
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
+ unsigned int cpu = smp_processor_id();
- t->expires = round_jiffies(jiffies + iv);
- add_timer_on(t, cpu);
+ setup_pinned_timer(t, mce_timer_fn, cpu);
}
static void __mcheck_cpu_init_timer(void)
@@ -1751,7 +1751,7 @@ static void __mcheck_cpu_init_timer(void)
unsigned int cpu = smp_processor_id();
setup_pinned_timer(t, mce_timer_fn, cpu);
- mce_start_timer(cpu, t);
+ mce_start_timer(t);
}
/* Handle unconfigured int18 (should never happen) */
@@ -1796,7 +1796,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_clear_banks();
- __mcheck_cpu_init_timer();
+ __mcheck_cpu_setup_timer();
}
/*
@@ -1823,7 +1823,6 @@ void mcheck_cpu_clear(struct cpuinfo_x86 *c)
*/
static DEFINE_SPINLOCK(mce_chrdev_state_lock);
-static int mce_chrdev_open_count; /* #times opened */
static int mce_chrdev_open_exclu; /* already open exclusive? */
static int mce_chrdev_open(struct inode *inode, struct file *file)
@@ -2138,9 +2137,10 @@ int __init mcheck_init(void)
{
mcheck_intel_therm_init();
mce_register_decode_chain(&mce_srao_nb);
+ mce_register_decode_chain(&mce_default_nb);
mcheck_vendor_init_severity();
- INIT_WORK(&mce_work, mce_process_work);
+ INIT_WORK(&mce_work, mce_gen_pool_process);
init_irq_work(&mce_irq_work, mce_irq_work_cb);
return 0;
@@ -2255,8 +2255,6 @@ static struct bus_type mce_subsys = {
DEFINE_PER_CPU(struct device *, mce_device);
-void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
-
static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
{
return container_of(attr, struct mce_bank, attr);
@@ -2409,6 +2407,10 @@ static int mce_device_create(unsigned int cpu)
if (!mce_available(&boot_cpu_data))
return -EIO;
+ dev = per_cpu(mce_device, cpu);
+ if (dev)
+ return 0;
+
dev = kzalloc(sizeof *dev, GFP_KERNEL);
if (!dev)
return -ENOMEM;
@@ -2468,28 +2470,25 @@ static void mce_device_remove(unsigned int cpu)
}
/* Make sure there are no machine checks on offlined CPUs. */
-static void mce_disable_cpu(void *h)
+static void mce_disable_cpu(void)
{
- unsigned long action = *(unsigned long *)h;
-
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
- if (!(action & CPU_TASKS_FROZEN))
+ if (!cpuhp_tasks_frozen)
cmci_clear();
vendor_disable_error_reporting();
}
-static void mce_reenable_cpu(void *h)
+static void mce_reenable_cpu(void)
{
- unsigned long action = *(unsigned long *)h;
int i;
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
- if (!(action & CPU_TASKS_FROZEN))
+ if (!cpuhp_tasks_frozen)
cmci_reenable();
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
@@ -2499,45 +2498,43 @@ static void mce_reenable_cpu(void *h)
}
}
-/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int
-mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static int mce_cpu_dead(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
- struct timer_list *t = &per_cpu(mce_timer, cpu);
+ mce_intel_hcpu_update(cpu);
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- mce_device_create(cpu);
- if (threshold_cpu_callback)
- threshold_cpu_callback(action, cpu);
- break;
- case CPU_DEAD:
- if (threshold_cpu_callback)
- threshold_cpu_callback(action, cpu);
- mce_device_remove(cpu);
- mce_intel_hcpu_update(cpu);
+ /* intentionally ignoring frozen here */
+ if (!cpuhp_tasks_frozen)
+ cmci_rediscover();
+ return 0;
+}
- /* intentionally ignoring frozen here */
- if (!(action & CPU_TASKS_FROZEN))
- cmci_rediscover();
- break;
- case CPU_DOWN_PREPARE:
- smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
- del_timer_sync(t);
- break;
- case CPU_DOWN_FAILED:
- smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
- mce_start_timer(cpu, t);
- break;
- }
+static int mce_cpu_online(unsigned int cpu)
+{
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
+ int ret;
- return NOTIFY_OK;
+ mce_device_create(cpu);
+
+ ret = mce_threshold_create_device(cpu);
+ if (ret) {
+ mce_device_remove(cpu);
+ return ret;
+ }
+ mce_reenable_cpu();
+ mce_start_timer(t);
+ return 0;
}
-static struct notifier_block mce_cpu_notifier = {
- .notifier_call = mce_cpu_callback,
-};
+static int mce_cpu_pre_down(unsigned int cpu)
+{
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
+
+ mce_disable_cpu();
+ del_timer_sync(t);
+ mce_threshold_remove_device(cpu);
+ mce_device_remove(cpu);
+ return 0;
+}
static __init void mce_init_banks(void)
{
@@ -2559,8 +2556,8 @@ static __init void mce_init_banks(void)
static __init int mcheck_init_device(void)
{
+ enum cpuhp_state hp_online;
int err;
- int i = 0;
if (!mce_available(&boot_cpu_data)) {
err = -EIO;
@@ -2578,23 +2575,16 @@ static __init int mcheck_init_device(void)
if (err)
goto err_out_mem;
- cpu_notifier_register_begin();
- for_each_online_cpu(i) {
- err = mce_device_create(i);
- if (err) {
- /*
- * Register notifier anyway (and do not unreg it) so
- * that we don't leave undeleted timers, see notifier
- * callback above.
- */
- __register_hotcpu_notifier(&mce_cpu_notifier);
- cpu_notifier_register_done();
- goto err_device_create;
- }
- }
+ err = cpuhp_setup_state(CPUHP_X86_MCE_DEAD, "x86/mce:dead", NULL,
+ mce_cpu_dead);
+ if (err)
+ goto err_out_mem;
- __register_hotcpu_notifier(&mce_cpu_notifier);
- cpu_notifier_register_done();
+ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online",
+ mce_cpu_online, mce_cpu_pre_down);
+ if (err < 0)
+ goto err_out_online;
+ hp_online = err;
register_syscore_ops(&mce_syscore_ops);
@@ -2607,16 +2597,10 @@ static __init int mcheck_init_device(void)
err_register:
unregister_syscore_ops(&mce_syscore_ops);
+ cpuhp_remove_state(hp_online);
-err_device_create:
- /*
- * We didn't keep track of which devices were created above, but
- * even if we had, the set of online cpus might have changed.
- * Play safe and remove for every possible cpu, since
- * mce_device_remove() will do the right thing.
- */
- for_each_possible_cpu(i)
- mce_device_remove(i);
+err_out_online:
+ cpuhp_remove_state(CPUHP_X86_MCE_DEAD);
err_out_mem:
free_cpumask_var(mce_device_initialized);
OpenPOWER on IntegriCloud