From cf870c70a194443f8fc654ddc9d6cfd02c58003b Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 10 Jul 2013 14:57:01 +0530 Subject: mce: acpi/apei: Soft-offline a page on firmware GHES notification If the firmware indicates in GHES error data entry that the error threshold has exceeded for a corrected error event, then we try to soft-offline the page. This could be called in interrupt context, so we queue this up similar to how we handle memory failure scenarios. Signed-off-by: Naveen N. Rao Acked-by: Borislav Petkov Signed-off-by: Tony Luck --- drivers/acpi/apei/ghes.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'drivers/acpi') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index fcd7d91cec34..a8f362acc8ec 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -409,6 +409,34 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } +static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) +{ +#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE + unsigned long pfn; + int sec_sev = ghes_severity(gdata->error_severity); + struct cper_sec_mem_err *mem_err; + mem_err = (struct cper_sec_mem_err *)(gdata + 1); + + if (sec_sev == GHES_SEV_CORRECTED && + (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) && + (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) { + pfn = mem_err->physical_addr >> PAGE_SHIFT; + if (pfn_valid(pfn)) + memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE); + else if (printk_ratelimit()) + pr_warn(FW_WARN GHES_PFX + "Invalid address in generic error data: %#llx\n", + mem_err->physical_addr); + } + if (sev == GHES_SEV_RECOVERABLE && + sec_sev == GHES_SEV_RECOVERABLE && + mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + pfn = mem_err->physical_addr >> PAGE_SHIFT; + memory_failure_queue(pfn, 0, 0); + } +#endif +} + static void ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -428,15 +456,7 @@ static void ghes_do_proc(struct ghes *ghes, apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, mem_err); #endif -#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE - if (sev == GHES_SEV_RECOVERABLE && - sec_sev == GHES_SEV_RECOVERABLE && - mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { - unsigned long pfn; - pfn = mem_err->physical_addr >> PAGE_SHIFT; - memory_failure_queue(pfn, 0, 0); - } -#endif + ghes_handle_memory_failure(gdata, sev); } #ifdef CONFIG_ACPI_APEI_PCIEAER else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, -- cgit v1.2.1