diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 11:07:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 11:07:04 -0700 |
commit | b20c99eb668f10b855a9fd87e0a2f5db3fb3637d (patch) | |
tree | 87cb380f2006a1c5ee2c612fead142d261c64c4e /drivers/acpi/apei | |
parent | bb8c4701704d81ef98657dc51adb99aa5a0c5ac9 (diff) | |
parent | ead6fa95b7e9d38b4526503403ba1c029b03dd72 (diff) | |
download | blackbird-op-linux-b20c99eb668f10b855a9fd87e0a2f5db3fb3637d.tar.gz blackbird-op-linux-b20c99eb668f10b855a9fd87e0a2f5db3fb3637d.zip |
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS changes from Ingo Molnar:
"[ The reason for drivers/ updates is that Boris asked for the
drivers/edac/ changes to go via x86/ras in this cycle ]
Main changes:
- AMD CPUs:
. Add ECC event decoding support for new F15h models
. Various erratum fixes
. Fix single-channel on dual-channel-controllers bug.
- Intel CPUs:
. UC uncorrectable memory error parsing fix
. Add support for CMC (Corrected Machine Check) 'FF' (Firmware
First) flag in the APEI HEST
- Various cleanups and fixes"
* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
amd64_edac: Fix incorrect wraparounds
amd64_edac: Correct erratum 505 range
cpc925_edac: Use proper array termination
x86/mce, acpi/apei: Only disable banks listed in HEST if mce is configured
amd64_edac: Get rid of boot_cpu_data accesses
amd64_edac: Add ECC decoding support for newer F15h models
x86, amd_nb: Clarify F15h, model 30h GART and L3 support
pci_ids: Add PCI device ID functions 3 and 4 for newer F15h models.
x38_edac: Make a local function static
i3200_edac: Make a local function static
x86/mce: Pay no attention to 'F' bit in MCACOD when parsing 'UC' errors
APEI/ERST: Fix error message formatting
amd64_edac: Fix single-channel setups
EDAC: Replace strict_strtol() with kstrtol()
mce: acpi/apei: Soft-offline a page on firmware GHES notification
mce: acpi/apei: Add a boot option to disable ff mode for corrected errors
mce: acpi/apei: Honour Firmware First for MCA banks listed in APEI HEST CMC
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r-- | drivers/acpi/apei/erst.c | 51 | ||||
-rw-r--r-- | drivers/acpi/apei/ghes.c | 38 | ||||
-rw-r--r-- | drivers/acpi/apei/hest.c | 39 |
3 files changed, 91 insertions, 37 deletions
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 822b1ed3b00f..26311f23c824 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -39,7 +39,8 @@ #include "apei-internal.h" -#define ERST_PFX "ERST: " +#undef pr_fmt +#define pr_fmt(fmt) "ERST: " fmt /* ERST command status */ #define ERST_STATUS_SUCCESS 0x0 @@ -109,8 +110,7 @@ static inline int erst_errno(int command_status) static int erst_timedout(u64 *t, u64 spin_unit) { if ((s64)*t < spin_unit) { - pr_warning(FW_WARN ERST_PFX - "Firmware does not respond in time\n"); + pr_warn(FW_WARN "Firmware does not respond in time.\n"); return 1; } *t -= spin_unit; @@ -186,8 +186,8 @@ static int erst_exec_stall(struct apei_exec_context *ctx, if (ctx->value > FIRMWARE_MAX_STALL) { if (!in_nmi()) - pr_warning(FW_WARN ERST_PFX - "Too long stall time for stall instruction: %llx.\n", + pr_warn(FW_WARN + "Too long stall time for stall instruction: 0x%llx.\n", ctx->value); stall_time = FIRMWARE_MAX_STALL; } else @@ -206,8 +206,8 @@ static int erst_exec_stall_while_true(struct apei_exec_context *ctx, if (ctx->var1 > FIRMWARE_MAX_STALL) { if (!in_nmi()) - pr_warning(FW_WARN ERST_PFX - "Too long stall time for stall while true instruction: %llx.\n", + pr_warn(FW_WARN + "Too long stall time for stall while true instruction: 0x%llx.\n", ctx->var1); stall_time = FIRMWARE_MAX_STALL; } else @@ -271,8 +271,7 @@ static int erst_exec_move_data(struct apei_exec_context *ctx, /* ioremap does not work in interrupt context */ if (in_interrupt()) { - pr_warning(ERST_PFX - "MOVE_DATA can not be used in interrupt context"); + pr_warn("MOVE_DATA can not be used in interrupt context.\n"); return -EBUSY; } @@ -524,8 +523,7 @@ retry: ERST_RECORD_ID_CACHE_SIZE_MAX); if (new_size <= erst_record_id_cache.size) { if (printk_ratelimit()) - pr_warning(FW_WARN ERST_PFX - "too many record ID!\n"); + pr_warn(FW_WARN "too many record IDs!\n"); return 0; } alloc_size = new_size * sizeof(entries[0]); @@ -761,8 +759,7 @@ static int __erst_clear_from_storage(u64 record_id) static void pr_unimpl_nvram(void) { if (printk_ratelimit()) - pr_warning(ERST_PFX - "NVRAM ERST Log Address Range is not implemented yet\n"); + pr_warn("NVRAM ERST Log Address Range not implemented yet.\n"); } static int __erst_write_to_nvram(const struct cper_record_header *record) @@ -1133,7 +1130,7 @@ static int __init erst_init(void) goto err; if (erst_disable) { - pr_info(ERST_PFX + pr_info( "Error Record Serialization Table (ERST) support is disabled.\n"); goto err; } @@ -1144,14 +1141,14 @@ static int __init erst_init(void) goto err; else if (ACPI_FAILURE(status)) { const char *msg = acpi_format_exception(status); - pr_err(ERST_PFX "Failed to get table, %s\n", msg); + pr_err("Failed to get table, %s\n", msg); rc = -EINVAL; goto err; } rc = erst_check_table(erst_tab); if (rc) { - pr_err(FW_BUG ERST_PFX "ERST table is invalid\n"); + pr_err(FW_BUG "ERST table is invalid.\n"); goto err; } @@ -1169,21 +1166,19 @@ static int __init erst_init(void) rc = erst_get_erange(&erst_erange); if (rc) { if (rc == -ENODEV) - pr_info(ERST_PFX + pr_info( "The corresponding hardware device or firmware implementation " "is not available.\n"); else - pr_err(ERST_PFX - "Failed to get Error Log Address Range.\n"); + pr_err("Failed to get Error Log Address Range.\n"); goto err_unmap_reg; } r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST"); if (!r) { - pr_err(ERST_PFX - "Can not request iomem region <0x%16llx-0x%16llx> for ERST.\n", - (unsigned long long)erst_erange.base, - (unsigned long long)erst_erange.base + erst_erange.size); + pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n", + (unsigned long long)erst_erange.base, + (unsigned long long)erst_erange.base + erst_erange.size - 1); rc = -EIO; goto err_unmap_reg; } @@ -1193,7 +1188,7 @@ static int __init erst_init(void) if (!erst_erange.vaddr) goto err_release_erange; - pr_info(ERST_PFX + pr_info( "Error Record Serialization Table (ERST) support is initialized.\n"); buf = kmalloc(erst_erange.size, GFP_KERNEL); @@ -1205,15 +1200,15 @@ static int __init erst_init(void) rc = pstore_register(&erst_info); if (rc) { if (rc != -EPERM) - pr_info(ERST_PFX - "Could not register with persistent store\n"); + pr_info( + "Could not register with persistent store.\n"); erst_info.buf = NULL; erst_info.bufsize = 0; kfree(buf); } } else - pr_err(ERST_PFX - "Failed to allocate %lld bytes for persistent store error log\n", + pr_err( + "Failed to allocate %lld bytes for persistent store error log.\n", erst_erange.size); return 0; diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ec9b57d428a1..8ec37bbdd699 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -409,6 +409,34 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } +static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) +{ +#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE + unsigned long pfn; + int sec_sev = ghes_severity(gdata->error_severity); + struct cper_sec_mem_err *mem_err; + mem_err = (struct cper_sec_mem_err *)(gdata + 1); + + if (sec_sev == GHES_SEV_CORRECTED && + (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) && + (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) { + pfn = mem_err->physical_addr >> PAGE_SHIFT; + if (pfn_valid(pfn)) + memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE); + else if (printk_ratelimit()) + pr_warn(FW_WARN GHES_PFX + "Invalid address in generic error data: %#llx\n", + mem_err->physical_addr); + } + if (sev == GHES_SEV_RECOVERABLE && + sec_sev == GHES_SEV_RECOVERABLE && + mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + pfn = mem_err->physical_addr >> PAGE_SHIFT; + memory_failure_queue(pfn, 0, 0); + } +#endif +} + static void ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -428,15 +456,7 @@ static void ghes_do_proc(struct ghes *ghes, apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, mem_err); #endif -#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE - if (sev == GHES_SEV_RECOVERABLE && - sec_sev == GHES_SEV_RECOVERABLE && - mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { - unsigned long pfn; - pfn = mem_err->physical_addr >> PAGE_SHIFT; - memory_failure_queue(pfn, 0, 0); - } -#endif + ghes_handle_memory_failure(gdata, sev); } #ifdef CONFIG_ACPI_APEI_PCIEAER else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index f5ef5d54e4ac..f5e37f32c71f 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -36,6 +36,7 @@ #include <linux/io.h> #include <linux/platform_device.h> #include <acpi/apei.h> +#include <asm/mce.h> #include "apei-internal.h" @@ -121,6 +122,41 @@ int apei_hest_parse(apei_hest_func_t func, void *data) } EXPORT_SYMBOL_GPL(apei_hest_parse); +/* + * Check if firmware advertises firmware first mode. We need FF bit to be set + * along with a set of MC banks which work in FF mode. + */ +static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data) +{ +#ifdef CONFIG_X86_MCE + int i; + struct acpi_hest_ia_corrected *cmc; + struct acpi_hest_ia_error_bank *mc_bank; + + if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) + return 0; + + cmc = (struct acpi_hest_ia_corrected *)hest_hdr; + if (!cmc->enabled) + return 0; + + /* + * We expect HEST to provide a list of MC banks that report errors + * in firmware first mode. Otherwise, return non-zero value to + * indicate that we are done parsing HEST. + */ + if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks) + return 1; + + pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n"); + + mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1); + for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++) + mce_disable_bank(mc_bank->bank_number); +#endif + return 1; +} + struct ghes_arr { struct platform_device **ghes_devs; unsigned int count; @@ -227,6 +263,9 @@ void __init acpi_hest_init(void) goto err; } + if (!acpi_disable_cmcff) + apei_hest_parse(hest_parse_cmc, NULL); + if (!ghes_disable) { rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); if (rc) |