From 9b052ea4ced0fa1ad30a2eafe86984a16297e6f1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 23 Jan 2017 19:35:07 +0100 Subject: x86/ras/therm_throt: Do not log a fake MCE for thermal events We log a fake bank 128 MCE to note that we're handling a CPU thermal event. However, this confuses people into thinking that their hardware generates MCEs. Hijacking MCA for logging thermal events is a gross misuse anyway and it shouldn't have been done in the first place. And besides we have other means for dealing with thermal events which are much more suitable. So let's kill the MCE logging part. Signed-off-by: Borislav Petkov Acked-by: Ashok Raj Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/20170105213846.GA12024@gmail.com Link: http://lkml.kernel.org/r/20170123183514.13356-3-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel/cpu/mcheck/therm_throt.c') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 465aca8be009..85469f84c921 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -6,7 +6,7 @@ * * Maintains a counter in /sys that keeps track of the number of thermal * events, such that the user knows how bad the thermal problem might be - * (since the logging to syslog and mcelog is rate limited). + * (since the logging to syslog is rate limited). * * Author: Dmitriy Zavin (dmitriyz@google.com) * @@ -141,13 +141,8 @@ static struct attribute_group thermal_attr_group = { * IRQ has been acknowledged. * * It will take care of rate limiting and printing messages to the syslog. - * - * Returns: 0 : Event should NOT be further logged, i.e. still in - * "timeout" from previous log message. - * 1 : Event should be logged further, and a message has been - * printed to the syslog. */ -static int therm_throt_process(bool new_event, int event, int level) +static void therm_throt_process(bool new_event, int event, int level) { struct _thermal_state *state; unsigned int this_cpu = smp_processor_id(); @@ -162,16 +157,16 @@ static int therm_throt_process(bool new_event, int event, int level) else if (event == POWER_LIMIT_EVENT) state = &pstate->core_power_limit; else - return 0; + return; } else if (level == PACKAGE_LEVEL) { if (event == THERMAL_THROTTLING_EVENT) state = &pstate->package_throttle; else if (event == POWER_LIMIT_EVENT) state = &pstate->package_power_limit; else - return 0; + return; } else - return 0; + return; old_event = state->new_event; state->new_event = new_event; @@ -181,7 +176,7 @@ static int therm_throt_process(bool new_event, int event, int level) if (time_before64(now, state->next_check) && state->count != state->last_count) - return 0; + return; state->next_check = now + CHECK_INTERVAL; state->last_count = state->count; @@ -193,16 +188,14 @@ static int therm_throt_process(bool new_event, int event, int level) this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); - return 1; + return; } if (old_event) { if (event == THERMAL_THROTTLING_EVENT) pr_info("CPU%d: %s temperature/speed normal\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package"); - return 1; + return; } - - return 0; } static int thresh_event_valid(int level, int event) @@ -365,10 +358,9 @@ static void intel_thermal_interrupt(void) /* Check for violation of core thermal thresholds*/ notify_thresholds(msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, - THERMAL_THROTTLING_EVENT, - CORE_LEVEL) != 0) - mce_log_therm_throt_event(msr_val); + therm_throt_process(msr_val & THERM_STATUS_PROCHOT, + THERMAL_THROTTLING_EVENT, + CORE_LEVEL); if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, -- cgit v1.2.1