diff options
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r-- | drivers/acpi/apei/Kconfig | 20 | ||||
-rw-r--r-- | drivers/acpi/apei/apei-base.c | 35 | ||||
-rw-r--r-- | drivers/acpi/apei/apei-internal.h | 15 | ||||
-rw-r--r-- | drivers/acpi/apei/cper.c | 18 | ||||
-rw-r--r-- | drivers/acpi/apei/einj.c | 51 | ||||
-rw-r--r-- | drivers/acpi/apei/erst-dbg.c | 30 | ||||
-rw-r--r-- | drivers/acpi/apei/erst.c | 437 | ||||
-rw-r--r-- | drivers/acpi/apei/ghes.c | 453 | ||||
-rw-r--r-- | drivers/acpi/apei/hest.c | 29 |
9 files changed, 956 insertions, 132 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index fca34ccfd294..f0c1ce95a0ec 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -1,5 +1,7 @@ config ACPI_APEI bool "ACPI Platform Error Interface (APEI)" + select MISC_FILESYSTEMS + select PSTORE depends on X86 help APEI allows to report errors (for example from the chipset) @@ -8,9 +10,11 @@ config ACPI_APEI error injection. config ACPI_APEI_GHES - tristate "APEI Generic Hardware Error Source" + bool "APEI Generic Hardware Error Source" depends on ACPI_APEI && X86 select ACPI_HED + select IRQ_WORK + select GENERIC_ALLOCATOR help Generic Hardware Error Source provides a way to report platform hardware errors (such as that from chipset). It @@ -21,6 +25,20 @@ config ACPI_APEI_GHES by firmware to produce more valuable hardware error information for Linux. +config ACPI_APEI_PCIEAER + bool "APEI PCIe AER logging/recovering support" + depends on ACPI_APEI && PCIEAER + help + PCIe AER errors may be reported via APEI firmware first mode. + Turn on this option to enable the corresponding support. + +config ACPI_APEI_MEMORY_FAILURE + bool "APEI memory error recovering support" + depends on ACPI_APEI && MEMORY_FAILURE + help + Memory errors may be reported via APEI firmware first mode. + Turn on this option to enable the memory recovering support. + config ACPI_APEI_EINJ tristate "APEI Error INJection (EINJ)" depends on ACPI_APEI && DEBUG_FS diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 4a904a4bf05f..61540360d5ce 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop); * Interpret the specified action. Go through whole action table, * execute all instructions belong to the action. */ -int apei_exec_run(struct apei_exec_context *ctx, u8 action) +int __apei_exec_run(struct apei_exec_context *ctx, u8 action, + bool optional) { - int rc; + int rc = -ENOENT; u32 i, ip; struct acpi_whea_header *entry; apei_exec_ins_func_t run; @@ -198,9 +199,9 @@ rewind: goto rewind; } - return 0; + return !optional && rc < 0 ? rc : 0; } -EXPORT_SYMBOL_GPL(apei_exec_run); +EXPORT_SYMBOL_GPL(__apei_exec_run); typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, struct acpi_whea_header *entry, @@ -603,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void) return dapei; } EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); + +int apei_osc_setup(void) +{ + static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; + acpi_handle handle; + u32 capbuf[3]; + struct acpi_osc_context context = { + .uuid_str = whea_uuid_str, + .rev = 1, + .cap.length = sizeof(capbuf), + .cap.pointer = capbuf, + }; + + capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; + capbuf[OSC_SUPPORT_TYPE] = 1; + capbuf[OSC_CONTROL_TYPE] = 0; + + if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)) + || ACPI_FAILURE(acpi_run_osc(handle, &context))) + return -EIO; + else { + kfree(context.ret.pointer); + return 0; + } +} +EXPORT_SYMBOL_GPL(apei_osc_setup); diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index ef0581f2094d..f57050e7a5e7 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx) return ctx->value; } -int apei_exec_run(struct apei_exec_context *ctx, u8 action); +int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional); + +static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action) +{ + return __apei_exec_run(ctx, action, 0); +} + +/* It is optional whether the firmware provides the action */ +static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action) +{ + return __apei_exec_run(ctx, action, 1); +} /* Common instruction implementation */ @@ -113,4 +124,6 @@ void apei_estatus_print(const char *pfx, const struct acpi_hest_generic_status *estatus); int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); int apei_estatus_check(const struct acpi_hest_generic_status *estatus); + +int apei_osc_setup(void); #endif diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c index 31464a006d76..5d4189464d63 100644 --- a/drivers/acpi/apei/cper.c +++ b/drivers/acpi/apei/cper.c @@ -29,6 +29,7 @@ #include <linux/time.h> #include <linux/cper.h> #include <linux/acpi.h> +#include <linux/aer.h> /* * CPER record ID need to be unique even after reboot, because record @@ -70,8 +71,8 @@ static const char *cper_severity_str(unsigned int severity) * If the output length is longer than 80, multiple line will be * printed, with @pfx is printed at the beginning of each line. */ -static void cper_print_bits(const char *pfx, unsigned int bits, - const char *strs[], unsigned int strs_size) +void cper_print_bits(const char *pfx, unsigned int bits, + const char *strs[], unsigned int strs_size) { int i, len = 0; const char *str; @@ -81,6 +82,8 @@ static void cper_print_bits(const char *pfx, unsigned int bits, if (!(bits & (1U << i))) continue; str = strs[i]; + if (!str) + continue; if (len && len + strlen(str) + 2 > 80) { printk("%s\n", buf); len = 0; @@ -243,7 +246,8 @@ static const char *cper_pcie_port_type_strs[] = { "root complex event collector", }; -static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie) +static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, + const struct acpi_hest_generic_data *gdata) { if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, @@ -276,6 +280,12 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie) printk( "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", pfx, pcie->bridge.secondary_status, pcie->bridge.control); +#ifdef CONFIG_ACPI_APEI_PCIEAER + if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) { + struct aer_capability_regs *aer_regs = (void *)pcie->aer_info; + cper_print_aer(pfx, gdata->error_severity, aer_regs); + } +#endif } static const char *apei_estatus_section_flag_strs[] = { @@ -322,7 +332,7 @@ static void apei_estatus_print_section( struct cper_sec_pcie *pcie = (void *)(gdata + 1); printk("%s""section_type: PCIe error\n", pfx); if (gdata->error_data_length >= sizeof(*pcie)) - cper_print_pcie(pfx, pcie); + cper_print_pcie(pfx, pcie, gdata); else goto err_section_too_small; } else diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 096aebfe7f32..589b96c38704 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -46,7 +46,8 @@ * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the * EINJ table through an unpublished extension. Use with caution as * most will ignore the parameter and make their own choice of address - * for error injection. + * for error injection. This extension is used only if + * param_extension module parameter is specified. */ struct einj_parameter { u64 type; @@ -65,6 +66,9 @@ struct einj_parameter { ((struct acpi_whea_header *)((char *)(tab) + \ sizeof(struct acpi_table_einj))) +static bool param_extension; +module_param(param_extension, bool, 0); + static struct acpi_table_einj *einj_tab; static struct apei_resources einj_resources; @@ -101,6 +105,14 @@ static DEFINE_MUTEX(einj_mutex); static struct einj_parameter *einj_param; +#ifndef writeq +static inline void writeq(__u64 val, volatile void __iomem *addr) +{ + writel(val, addr); + writel(val >> 32, addr+4); +} +#endif + static void einj_exec_ctx_init(struct apei_exec_context *ctx) { apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type), @@ -277,7 +289,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) einj_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION); + rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION); if (rc) return rc; apei_exec_ctx_set_input(&ctx, type); @@ -315,7 +327,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) rc = __einj_error_trigger(trigger_paddr); if (rc) return rc; - rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION); + rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION); return rc; } @@ -481,14 +493,6 @@ static int __init einj_init(void) einj_debug_dir, NULL, &error_type_fops); if (!fentry) goto err_cleanup; - fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param1); - if (!fentry) - goto err_cleanup; - fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param2); - if (!fentry) - goto err_cleanup; fentry = debugfs_create_file("error_inject", S_IWUSR, einj_debug_dir, NULL, &error_inject_fops); if (!fentry) @@ -505,12 +509,23 @@ static int __init einj_init(void) rc = apei_exec_pre_map_gars(&ctx); if (rc) goto err_release; - param_paddr = einj_get_parameter_address(); - if (param_paddr) { - einj_param = ioremap(param_paddr, sizeof(*einj_param)); - rc = -ENOMEM; - if (!einj_param) - goto err_unmap; + if (param_extension) { + param_paddr = einj_get_parameter_address(); + if (param_paddr) { + einj_param = ioremap(param_paddr, sizeof(*einj_param)); + rc = -ENOMEM; + if (!einj_param) + goto err_unmap; + fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param1); + if (!fentry) + goto err_unmap; + fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param2); + if (!fentry) + goto err_unmap; + } else + pr_warn(EINJ_PFX "Parameter extension is not supported.\n"); } pr_info(EINJ_PFX "Error INJection is initialized.\n"); @@ -518,6 +533,8 @@ static int __init einj_init(void) return 0; err_unmap: + if (einj_param) + iounmap(einj_param); apei_exec_post_unmap_gars(&ctx); err_release: apei_resources_release(&einj_resources); diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index de73caf3cebc..903549df809b 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -33,7 +33,7 @@ #define ERST_DBG_PFX "ERST DBG: " -#define ERST_DBG_RECORD_LEN_MAX 4096 +#define ERST_DBG_RECORD_LEN_MAX 0x4000 static void *erst_dbg_buf; static unsigned int erst_dbg_buf_len; @@ -43,12 +43,27 @@ static DEFINE_MUTEX(erst_dbg_mutex); static int erst_dbg_open(struct inode *inode, struct file *file) { + int rc, *pos; + if (erst_disable) return -ENODEV; + pos = (int *)&file->private_data; + + rc = erst_get_record_id_begin(pos); + if (rc) + return rc; + return nonseekable_open(inode, file); } +static int erst_dbg_release(struct inode *inode, struct file *file) +{ + erst_get_record_id_end(); + + return 0; +} + static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { int rc; @@ -79,18 +94,20 @@ static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg) static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off) { - int rc; + int rc, *pos; ssize_t len = 0; u64 id; - if (*off != 0) + if (*off) return -EINVAL; if (mutex_lock_interruptible(&erst_dbg_mutex) != 0) return -EINTR; + pos = (int *)&filp->private_data; + retry_next: - rc = erst_get_next_record_id(&id); + rc = erst_get_record_id_next(pos, &id); if (rc) goto out; /* no more record */ @@ -181,6 +198,7 @@ out: static const struct file_operations erst_dbg_ops = { .owner = THIS_MODULE, .open = erst_dbg_open, + .release = erst_dbg_release, .read = erst_dbg_read, .write = erst_dbg_write, .unlocked_ioctl = erst_dbg_ioctl, @@ -195,6 +213,10 @@ static struct miscdevice erst_dbg_dev = { static __init int erst_dbg_init(void) { + if (erst_disable) { + pr_info(ERST_DBG_PFX "ERST support is disabled.\n"); + return -ENODEV; + } return misc_register(&erst_dbg_dev); } diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index cf6db6b7662a..631b9477b99c 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -34,6 +34,7 @@ #include <linux/cper.h> #include <linux/nmi.h> #include <linux/hardirq.h> +#include <linux/pstore.h> #include <acpi/apei.h> #include "apei-internal.h" @@ -429,6 +430,22 @@ ssize_t erst_get_record_count(void) } EXPORT_SYMBOL_GPL(erst_get_record_count); +#define ERST_RECORD_ID_CACHE_SIZE_MIN 16 +#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024 + +struct erst_record_id_cache { + struct mutex lock; + u64 *entries; + int len; + int size; + int refcount; +}; + +static struct erst_record_id_cache erst_record_id_cache = { + .lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock), + .refcount = 0, +}; + static int __erst_get_next_record_id(u64 *record_id) { struct apei_exec_context ctx; @@ -443,26 +460,179 @@ static int __erst_get_next_record_id(u64 *record_id) return 0; } +int erst_get_record_id_begin(int *pos) +{ + int rc; + + if (erst_disable) + return -ENODEV; + + rc = mutex_lock_interruptible(&erst_record_id_cache.lock); + if (rc) + return rc; + erst_record_id_cache.refcount++; + mutex_unlock(&erst_record_id_cache.lock); + + *pos = 0; + + return 0; +} +EXPORT_SYMBOL_GPL(erst_get_record_id_begin); + +/* erst_record_id_cache.lock must be held by caller */ +static int __erst_record_id_cache_add_one(void) +{ + u64 id, prev_id, first_id; + int i, rc; + u64 *entries; + unsigned long flags; + + id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID; +retry: + raw_spin_lock_irqsave(&erst_lock, flags); + rc = __erst_get_next_record_id(&id); + raw_spin_unlock_irqrestore(&erst_lock, flags); + if (rc == -ENOENT) + return 0; + if (rc) + return rc; + if (id == APEI_ERST_INVALID_RECORD_ID) + return 0; + /* can not skip current ID, or loop back to first ID */ + if (id == prev_id || id == first_id) + return 0; + if (first_id == APEI_ERST_INVALID_RECORD_ID) + first_id = id; + prev_id = id; + + entries = erst_record_id_cache.entries; + for (i = 0; i < erst_record_id_cache.len; i++) { + if (entries[i] == id) + break; + } + /* record id already in cache, try next */ + if (i < erst_record_id_cache.len) + goto retry; + if (erst_record_id_cache.len >= erst_record_id_cache.size) { + int new_size, alloc_size; + u64 *new_entries; + + new_size = erst_record_id_cache.size * 2; + new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN, + ERST_RECORD_ID_CACHE_SIZE_MAX); + if (new_size <= erst_record_id_cache.size) { + if (printk_ratelimit()) + pr_warning(FW_WARN ERST_PFX + "too many record ID!\n"); + return 0; + } + alloc_size = new_size * sizeof(entries[0]); + if (alloc_size < PAGE_SIZE) + new_entries = kmalloc(alloc_size, GFP_KERNEL); + else + new_entries = vmalloc(alloc_size); + if (!new_entries) + return -ENOMEM; + memcpy(new_entries, entries, + erst_record_id_cache.len * sizeof(entries[0])); + if (erst_record_id_cache.size < PAGE_SIZE) + kfree(entries); + else + vfree(entries); + erst_record_id_cache.entries = entries = new_entries; + erst_record_id_cache.size = new_size; + } + entries[i] = id; + erst_record_id_cache.len++; + + return 1; +} + /* * Get the record ID of an existing error record on the persistent * storage. If there is no error record on the persistent storage, the * returned record_id is APEI_ERST_INVALID_RECORD_ID. */ -int erst_get_next_record_id(u64 *record_id) +int erst_get_record_id_next(int *pos, u64 *record_id) { - int rc; - unsigned long flags; + int rc = 0; + u64 *entries; if (erst_disable) return -ENODEV; - raw_spin_lock_irqsave(&erst_lock, flags); - rc = __erst_get_next_record_id(record_id); - raw_spin_unlock_irqrestore(&erst_lock, flags); + /* must be enclosed by erst_get_record_id_begin/end */ + BUG_ON(!erst_record_id_cache.refcount); + BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len); + + mutex_lock(&erst_record_id_cache.lock); + entries = erst_record_id_cache.entries; + for (; *pos < erst_record_id_cache.len; (*pos)++) + if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID) + break; + /* found next record id in cache */ + if (*pos < erst_record_id_cache.len) { + *record_id = entries[*pos]; + (*pos)++; + goto out_unlock; + } + + /* Try to add one more record ID to cache */ + rc = __erst_record_id_cache_add_one(); + if (rc < 0) + goto out_unlock; + /* successfully add one new ID */ + if (rc == 1) { + *record_id = erst_record_id_cache.entries[*pos]; + (*pos)++; + rc = 0; + } else { + *pos = -1; + *record_id = APEI_ERST_INVALID_RECORD_ID; + } +out_unlock: + mutex_unlock(&erst_record_id_cache.lock); return rc; } -EXPORT_SYMBOL_GPL(erst_get_next_record_id); +EXPORT_SYMBOL_GPL(erst_get_record_id_next); + +/* erst_record_id_cache.lock must be held by caller */ +static void __erst_record_id_cache_compact(void) +{ + int i, wpos = 0; + u64 *entries; + + if (erst_record_id_cache.refcount) + return; + + entries = erst_record_id_cache.entries; + for (i = 0; i < erst_record_id_cache.len; i++) { + if (entries[i] == APEI_ERST_INVALID_RECORD_ID) + continue; + if (wpos != i) + memcpy(&entries[wpos], &entries[i], sizeof(entries[i])); + wpos++; + } + erst_record_id_cache.len = wpos; +} + +void erst_get_record_id_end(void) +{ + /* + * erst_disable != 0 should be detected by invoker via the + * return value of erst_get_record_id_begin/next, so this + * function should not be called for erst_disable != 0. + */ + BUG_ON(erst_disable); + + mutex_lock(&erst_record_id_cache.lock); + erst_record_id_cache.refcount--; + BUG_ON(erst_record_id_cache.refcount < 0); + __erst_record_id_cache_compact(); + mutex_unlock(&erst_record_id_cache.lock); +} +EXPORT_SYMBOL_GPL(erst_get_record_id_end); static int __erst_write_to_storage(u64 offset) { @@ -472,7 +642,7 @@ static int __erst_write_to_storage(u64 offset) int rc; erst_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE); if (rc) return rc; apei_exec_ctx_set_input(&ctx, offset); @@ -496,7 +666,7 @@ static int __erst_write_to_storage(u64 offset) if (rc) return rc; val = apei_exec_ctx_get_output(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_END); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); if (rc) return rc; @@ -511,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) int rc; erst_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ); if (rc) return rc; apei_exec_ctx_set_input(&ctx, offset); @@ -539,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) if (rc) return rc; val = apei_exec_ctx_get_output(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_END); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); if (rc) return rc; @@ -554,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id) int rc; erst_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR); if (rc) return rc; apei_exec_ctx_set_input(&ctx, record_id); @@ -578,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id) if (rc) return rc; val = apei_exec_ctx_get_output(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_END); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); if (rc) return rc; @@ -703,56 +873,34 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record, } EXPORT_SYMBOL_GPL(erst_read); -/* - * If return value > buflen, the buffer size is not big enough, - * else if return value = 0, there is no more record to read, - * else if return value < 0, something goes wrong, - * else everything is OK, and return value is record length - */ -ssize_t erst_read_next(struct cper_record_header *record, size_t buflen) -{ - int rc; - ssize_t len; - unsigned long flags; - u64 record_id; - - if (erst_disable) - return -ENODEV; - - raw_spin_lock_irqsave(&erst_lock, flags); - rc = __erst_get_next_record_id(&record_id); - if (rc) { - raw_spin_unlock_irqrestore(&erst_lock, flags); - return rc; - } - /* no more record */ - if (record_id == APEI_ERST_INVALID_RECORD_ID) { - raw_spin_unlock_irqrestore(&erst_lock, flags); - return 0; - } - - len = __erst_read(record_id, record, buflen); - raw_spin_unlock_irqrestore(&erst_lock, flags); - - return len; -} -EXPORT_SYMBOL_GPL(erst_read_next); - int erst_clear(u64 record_id) { - int rc; + int rc, i; unsigned long flags; + u64 *entries; if (erst_disable) return -ENODEV; + rc = mutex_lock_interruptible(&erst_record_id_cache.lock); + if (rc) + return rc; raw_spin_lock_irqsave(&erst_lock, flags); if (erst_erange.attr & ERST_RANGE_NVRAM) rc = __erst_clear_from_nvram(record_id); else rc = __erst_clear_from_storage(record_id); raw_spin_unlock_irqrestore(&erst_lock, flags); - + if (rc) + goto out; + entries = erst_record_id_cache.entries; + for (i = 0; i < erst_record_id_cache.len; i++) { + if (entries[i] == record_id) + entries[i] = APEI_ERST_INVALID_RECORD_ID; + } + __erst_record_id_cache_compact(); +out: + mutex_unlock(&erst_record_id_cache.lock); return rc; } EXPORT_SYMBOL_GPL(erst_clear); @@ -781,6 +929,182 @@ static int erst_check_table(struct acpi_table_erst *erst_tab) return 0; } +static int erst_open_pstore(struct pstore_info *psi); +static int erst_close_pstore(struct pstore_info *psi); +static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, + struct timespec *time, char **buf, + struct pstore_info *psi); +static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part, + size_t size, struct pstore_info *psi); +static int erst_clearer(enum pstore_type_id type, u64 id, + struct pstore_info *psi); + +static struct pstore_info erst_info = { + .owner = THIS_MODULE, + .name = "erst", + .open = erst_open_pstore, + .close = erst_close_pstore, + .read = erst_reader, + .write = erst_writer, + .erase = erst_clearer +}; + +#define CPER_CREATOR_PSTORE \ + UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ + 0x64, 0x90, 0xb8, 0x9d) +#define CPER_SECTION_TYPE_DMESG \ + UUID_LE(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54, \ + 0x94, 0x19, 0xeb, 0x12) +#define CPER_SECTION_TYPE_MCE \ + UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ + 0x04, 0x4a, 0x38, 0xfc) + +struct cper_pstore_record { + struct cper_record_header hdr; + struct cper_section_descriptor sec_hdr; + char data[]; +} __packed; + +static int reader_pos; + +static int erst_open_pstore(struct pstore_info *psi) +{ + int rc; + + if (erst_disable) + return -ENODEV; + + rc = erst_get_record_id_begin(&reader_pos); + + return rc; +} + +static int erst_close_pstore(struct pstore_info *psi) +{ + erst_get_record_id_end(); + + return 0; +} + +static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, + struct timespec *time, char **buf, + struct pstore_info *psi) +{ + int rc; + ssize_t len = 0; + u64 record_id; + struct cper_pstore_record *rcd; + size_t rcd_len = sizeof(*rcd) + erst_info.bufsize; + + if (erst_disable) + return -ENODEV; + + rcd = kmalloc(rcd_len, GFP_KERNEL); + if (!rcd) { + rc = -ENOMEM; + goto out; + } +skip: + rc = erst_get_record_id_next(&reader_pos, &record_id); + if (rc) + goto out; + + /* no more record */ + if (record_id == APEI_ERST_INVALID_RECORD_ID) { + rc = -EINVAL; + goto out; + } + + len = erst_read(record_id, &rcd->hdr, rcd_len); + /* The record may be cleared by others, try read next record */ + if (len == -ENOENT) + goto skip; + else if (len < sizeof(*rcd)) { + rc = -EIO; + goto out; + } + if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0) + goto skip; + + *buf = kmalloc(len, GFP_KERNEL); + if (*buf == NULL) { + rc = -ENOMEM; + goto out; + } + memcpy(*buf, rcd->data, len - sizeof(*rcd)); + *id = record_id; + if (uuid_le_cmp(rcd->sec_hdr.section_type, + CPER_SECTION_TYPE_DMESG) == 0) + *type = PSTORE_TYPE_DMESG; + else if (uuid_le_cmp(rcd->sec_hdr.section_type, + CPER_SECTION_TYPE_MCE) == 0) + *type = PSTORE_TYPE_MCE; + else + *type = PSTORE_TYPE_UNKNOWN; + + if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP) + time->tv_sec = rcd->hdr.timestamp; + else + time->tv_sec = 0; + time->tv_nsec = 0; + +out: + kfree(rcd); + return (rc < 0) ? rc : (len - sizeof(*rcd)); +} + +static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part, + size_t size, struct pstore_info *psi) +{ + struct cper_pstore_record *rcd = (struct cper_pstore_record *) + (erst_info.buf - sizeof(*rcd)); + int ret; + + memset(rcd, 0, sizeof(*rcd)); + memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); + rcd->hdr.revision = CPER_RECORD_REV; + rcd->hdr.signature_end = CPER_SIG_END; + rcd->hdr.section_count = 1; + rcd->hdr.error_severity = CPER_SEV_FATAL; + /* timestamp valid. platform_id, partition_id are invalid */ + rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP; + rcd->hdr.timestamp = get_seconds(); + rcd->hdr.record_length = sizeof(*rcd) + size; + rcd->hdr.creator_id = CPER_CREATOR_PSTORE; + rcd->hdr.notification_type = CPER_NOTIFY_MCE; + rcd->hdr.record_id = cper_next_record_id(); + rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; + + rcd->sec_hdr.section_offset = sizeof(*rcd); + rcd->sec_hdr.section_length = size; + rcd->sec_hdr.revision = CPER_SEC_REV; + /* fru_id and fru_text is invalid */ + rcd->sec_hdr.validation_bits = 0; + rcd->sec_hdr.flags = CPER_SEC_PRIMARY; + switch (type) { + case PSTORE_TYPE_DMESG: + rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG; + break; + case PSTORE_TYPE_MCE: + rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE; + break; + default: + return -EINVAL; + } + rcd->sec_hdr.section_severity = CPER_SEV_FATAL; + + ret = erst_write(&rcd->hdr); + *id = rcd->hdr.record_id; + + return ret; +} + +static int erst_clearer(enum pstore_type_id type, u64 id, + struct pstore_info *psi) +{ + return erst_clear(id); +} + static int __init erst_init(void) { int rc = 0; @@ -788,6 +1112,7 @@ static int __init erst_init(void) struct apei_exec_context ctx; struct apei_resources erst_resources; struct resource *r; + char *buf; if (acpi_disabled) goto err; @@ -854,6 +1179,18 @@ static int __init erst_init(void) if (!erst_erange.vaddr) goto err_release_erange; + buf = kmalloc(erst_erange.size, GFP_KERNEL); + spin_lock_init(&erst_info.buf_lock); + if (buf) { + erst_info.buf = buf + sizeof(struct cper_pstore_record); + erst_info.bufsize = erst_erange.size - + sizeof(struct cper_pstore_record); + if (pstore_register(&erst_info)) { + pr_info(ERST_PFX "Could not register with persistent store\n"); + kfree(buf); + } + } + pr_info(ERST_PFX "Error Record Serialization Table (ERST) support is initialized.\n"); diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index d1d484d4a06a..b8e08cb67a18 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -12,7 +12,7 @@ * For more information about Generic Hardware Error Source, please * refer to ACPI Specification version 4.0, section 17.3.2.6 * - * Copyright 2010 Intel Corp. + * Copyright 2010,2011 Intel Corp. * Author: Huang Ying <ying.huang@intel.com> * * This program is free software; you can redistribute it and/or @@ -42,17 +42,45 @@ #include <linux/mutex.h> #include <linux/ratelimit.h> #include <linux/vmalloc.h> +#include <linux/irq_work.h> +#include <linux/llist.h> +#include <linux/genalloc.h> #include <acpi/apei.h> #include <acpi/atomicio.h> #include <acpi/hed.h> #include <asm/mce.h> #include <asm/tlbflush.h> +#include <asm/nmi.h> #include "apei-internal.h" #define GHES_PFX "GHES: " #define GHES_ESTATUS_MAX_SIZE 65536 +#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 + +#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 + +/* This is just an estimation for memory pool allocation */ +#define GHES_ESTATUS_CACHE_AVG_SIZE 512 + +#define GHES_ESTATUS_CACHES_SIZE 4 + +#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL +/* Prevent too many caches are allocated because of RCU */ +#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) + +#define GHES_ESTATUS_CACHE_LEN(estatus_len) \ + (sizeof(struct ghes_estatus_cache) + (estatus_len)) +#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ + ((struct acpi_hest_generic_status *) \ + ((struct ghes_estatus_cache *)(estatus_cache) + 1)) + +#define GHES_ESTATUS_NODE_LEN(estatus_len) \ + (sizeof(struct ghes_estatus_node) + (estatus_len)) +#define GHES_ESTATUS_FROM_NODE(estatus_node) \ + ((struct acpi_hest_generic_status *) \ + ((struct ghes_estatus_node *)(estatus_node) + 1)) /* * One struct ghes is created for each generic hardware error source. @@ -77,6 +105,22 @@ struct ghes { }; }; +struct ghes_estatus_node { + struct llist_node llnode; + struct acpi_hest_generic *generic; +}; + +struct ghes_estatus_cache { + u32 estatus_len; + atomic_t count; + struct acpi_hest_generic *generic; + unsigned long long time_in; + struct rcu_head rcu; +}; + +int ghes_disable; +module_param_named(disable, ghes_disable, bool, 0); + static int ghes_panic_timeout __read_mostly = 30; /* @@ -121,6 +165,22 @@ static struct vm_struct *ghes_ioremap_area; static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); +/* + * printk is not safe in NMI context. So in NMI handler, we allocate + * required memory from lock-less memory allocator + * (ghes_estatus_pool), save estatus into it, put them into lock-less + * list (ghes_estatus_llist), then delay printk into IRQ context via + * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record + * required pool size by all NMI error source. + */ +static struct gen_pool *ghes_estatus_pool; +static unsigned long ghes_estatus_pool_size_request; +static struct llist_head ghes_estatus_llist; +static struct irq_work ghes_proc_irq_work; + +struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; +static atomic_t ghes_estatus_cache_alloced; + static int ghes_ioremap_init(void) { ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, @@ -180,6 +240,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr) __flush_tlb_one(vaddr); } +static int ghes_estatus_pool_init(void) +{ + ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); + if (!ghes_estatus_pool) + return -ENOMEM; + return 0; +} + +static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, + struct gen_pool_chunk *chunk, + void *data) +{ + free_page(chunk->start_addr); +} + +static void ghes_estatus_pool_exit(void) +{ + gen_pool_for_each_chunk(ghes_estatus_pool, + ghes_estatus_pool_free_chunk_page, NULL); + gen_pool_destroy(ghes_estatus_pool); +} + +static int ghes_estatus_pool_expand(unsigned long len) +{ + unsigned long i, pages, size, addr; + int ret; + + ghes_estatus_pool_size_request += PAGE_ALIGN(len); + size = gen_pool_size(ghes_estatus_pool); + if (size >= ghes_estatus_pool_size_request) + return 0; + pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; + for (i = 0; i < pages; i++) { + addr = __get_free_page(GFP_KERNEL); + if (!addr) + return -ENOMEM; + ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); + if (ret) + return ret; + } + + return 0; +} + +static void ghes_estatus_pool_shrink(unsigned long len) +{ + ghes_estatus_pool_size_request -= PAGE_ALIGN(len); +} + static struct ghes *ghes_new(struct acpi_hest_generic *generic) { struct ghes *ghes; @@ -241,7 +350,7 @@ static inline int ghes_severity(int severity) case CPER_SEV_FATAL: return GHES_SEV_PANIC; default: - /* Unkown, go panic */ + /* Unknown, go panic */ return GHES_SEV_PANIC; } } @@ -341,43 +450,196 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } -static void ghes_do_proc(struct ghes *ghes) +static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) { - int sev, processed = 0; + int sev, sec_sev; struct acpi_hest_generic_data *gdata; - sev = ghes_severity(ghes->estatus->error_severity); - apei_estatus_for_each_section(ghes->estatus, gdata) { -#ifdef CONFIG_X86_MCE + sev = ghes_severity(estatus->error_severity); + apei_estatus_for_each_section(estatus, gdata) { + sec_sev = ghes_severity(gdata->error_severity); if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, CPER_SEC_PLATFORM_MEM)) { - apei_mce_report_mem_error( - sev == GHES_SEV_CORRECTED, - (struct cper_sec_mem_err *)(gdata+1)); - processed = 1; - } + struct cper_sec_mem_err *mem_err; + mem_err = (struct cper_sec_mem_err *)(gdata+1); +#ifdef CONFIG_X86_MCE + apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, + mem_err); #endif +#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE + if (sev == GHES_SEV_RECOVERABLE && + sec_sev == GHES_SEV_RECOVERABLE && + mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + unsigned long pfn; + pfn = mem_err->physical_addr >> PAGE_SHIFT; + memory_failure_queue(pfn, 0, 0); + } +#endif + } } } -static void ghes_print_estatus(const char *pfx, struct ghes *ghes) +static void __ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) { - /* Not more than 2 messages every 5 seconds */ - static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); - if (pfx == NULL) { - if (ghes_severity(ghes->estatus->error_severity) <= + if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) pfx = KERN_WARNING HW_ERR; else pfx = KERN_ERR HW_ERR; } - if (__ratelimit(&ratelimit)) { - printk( - "%s""Hardware error from APEI Generic Hardware Error Source: %d\n", - pfx, ghes->generic->header.source_id); - apei_estatus_print(pfx, ghes->estatus); + printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", + pfx, generic->header.source_id); + apei_estatus_print(pfx, estatus); +} + +static int ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) +{ + /* Not more than 2 messages every 5 seconds */ + static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); + static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); + struct ratelimit_state *ratelimit; + + if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) + ratelimit = &ratelimit_corrected; + else + ratelimit = &ratelimit_uncorrected; + if (__ratelimit(ratelimit)) { + __ghes_print_estatus(pfx, generic, estatus); + return 1; } + return 0; +} + +/* + * GHES error status reporting throttle, to report more kinds of + * errors, instead of just most frequently occurred errors. + */ +static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) +{ + u32 len; + int i, cached = 0; + unsigned long long now; + struct ghes_estatus_cache *cache; + struct acpi_hest_generic_status *cache_estatus; + + len = apei_estatus_len(estatus); + rcu_read_lock(); + for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { + cache = rcu_dereference(ghes_estatus_caches[i]); + if (cache == NULL) + continue; + if (len != cache->estatus_len) + continue; + cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); + if (memcmp(estatus, cache_estatus, len)) + continue; + atomic_inc(&cache->count); + now = sched_clock(); + if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) + cached = 1; + break; + } + rcu_read_unlock(); + return cached; +} + +static struct ghes_estatus_cache *ghes_estatus_cache_alloc( + struct acpi_hest_generic *generic, + struct acpi_hest_generic_status *estatus) +{ + int alloced; + u32 len, cache_len; + struct ghes_estatus_cache *cache; + struct acpi_hest_generic_status *cache_estatus; + + alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); + if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { + atomic_dec(&ghes_estatus_cache_alloced); + return NULL; + } + len = apei_estatus_len(estatus); + cache_len = GHES_ESTATUS_CACHE_LEN(len); + cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); + if (!cache) { + atomic_dec(&ghes_estatus_cache_alloced); + return NULL; + } + cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); + memcpy(cache_estatus, estatus, len); + cache->estatus_len = len; + atomic_set(&cache->count, 0); + cache->generic = generic; + cache->time_in = sched_clock(); + return cache; +} + +static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) +{ + u32 len; + + len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); + len = GHES_ESTATUS_CACHE_LEN(len); + gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); + atomic_dec(&ghes_estatus_cache_alloced); +} + +static void ghes_estatus_cache_rcu_free(struct rcu_head *head) +{ + struct ghes_estatus_cache *cache; + + cache = container_of(head, struct ghes_estatus_cache, rcu); + ghes_estatus_cache_free(cache); +} + +static void ghes_estatus_cache_add( + struct acpi_hest_generic *generic, + struct acpi_hest_generic_status *estatus) +{ + int i, slot = -1, count; + unsigned long long now, duration, period, max_period = 0; + struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; + + new_cache = ghes_estatus_cache_alloc(generic, estatus); + if (new_cache == NULL) + return; + rcu_read_lock(); + now = sched_clock(); + for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { + cache = rcu_dereference(ghes_estatus_caches[i]); + if (cache == NULL) { + slot = i; + slot_cache = NULL; + break; + } + duration = now - cache->time_in; + if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { + slot = i; + slot_cache = cache; + break; + } + count = atomic_read(&cache->count); + period = duration; + do_div(period, (count + 1)); + if (period > max_period) { + max_period = period; + slot = i; + slot_cache = cache; + } + } + /* new_cache must be put into array after its contents are written */ + smp_wmb(); + if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, + slot_cache, new_cache) == slot_cache) { + if (slot_cache) + call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); + } else + ghes_estatus_cache_free(new_cache); + rcu_read_unlock(); } static int ghes_proc(struct ghes *ghes) @@ -387,9 +649,11 @@ static int ghes_proc(struct ghes *ghes) rc = ghes_read_estatus(ghes, 0); if (rc) goto out; - ghes_print_estatus(NULL, ghes); - ghes_do_proc(ghes); - + if (!ghes_estatus_cached(ghes->estatus)) { + if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) + ghes_estatus_cache_add(ghes->generic, ghes->estatus); + } + ghes_do_proc(ghes->estatus); out: ghes_clear_estatus(ghes); return 0; @@ -447,15 +711,50 @@ static int ghes_notify_sci(struct notifier_block *this, return ret; } -static int ghes_notify_nmi(struct notifier_block *this, - unsigned long cmd, void *data) +static void ghes_proc_in_irq(struct irq_work *irq_work) +{ + struct llist_node *llnode, *next, *tail = NULL; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic *generic; + struct acpi_hest_generic_status *estatus; + u32 len, node_len; + + /* + * Because the time order of estatus in list is reversed, + * revert it back to proper order. + */ + llnode = llist_del_all(&ghes_estatus_llist); + while (llnode) { + next = llnode->next; + llnode->next = tail; + tail = llnode; + llnode = next; + } + llnode = tail; + while (llnode) { + next = llnode->next; + estatus_node = llist_entry(llnode, struct ghes_estatus_node, + llnode); + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + len = apei_estatus_len(estatus); + node_len = GHES_ESTATUS_NODE_LEN(len); + ghes_do_proc(estatus); + if (!ghes_estatus_cached(estatus)) { + generic = estatus_node->generic; + if (ghes_print_estatus(NULL, generic, estatus)) + ghes_estatus_cache_add(generic, estatus); + } + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, + node_len); + llnode = next; + } +} + +static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { struct ghes *ghes, *ghes_global = NULL; int sev, sev_global = -1; - int ret = NOTIFY_DONE; - - if (cmd != DIE_NMI) - return ret; + int ret = NMI_DONE; raw_spin_lock(&ghes_nmi_lock); list_for_each_entry_rcu(ghes, &ghes_nmi, list) { @@ -468,15 +767,16 @@ static int ghes_notify_nmi(struct notifier_block *this, sev_global = sev; ghes_global = ghes; } - ret = NOTIFY_STOP; + ret = NMI_HANDLED; } - if (ret == NOTIFY_DONE) + if (ret == NMI_DONE) goto out; if (sev_global >= GHES_SEV_PANIC) { oops_begin(); - ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); + __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic, + ghes_global->estatus); /* reboot to log the error! */ if (panic_timeout == 0) panic_timeout = ghes_panic_timeout; @@ -484,12 +784,34 @@ static int ghes_notify_nmi(struct notifier_block *this, } list_for_each_entry_rcu(ghes, &ghes_nmi, list) { +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + u32 len, node_len; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic_status *estatus; +#endif if (!(ghes->flags & GHES_TO_CLEAR)) continue; - /* Do not print estatus because printk is not NMI safe */ - ghes_do_proc(ghes); +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + if (ghes_estatus_cached(ghes->estatus)) + goto next; + /* Save estatus for further processing in IRQ context */ + len = apei_estatus_len(ghes->estatus); + node_len = GHES_ESTATUS_NODE_LEN(len); + estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, + node_len); + if (estatus_node) { + estatus_node->generic = ghes->generic; + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + memcpy(estatus, ghes->estatus, len); + llist_add(&estatus_node->llnode, &ghes_estatus_llist); + } +next: +#endif ghes_clear_estatus(ghes); } +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + irq_work_queue(&ghes_proc_irq_work); +#endif out: raw_spin_unlock(&ghes_nmi_lock); @@ -500,14 +822,26 @@ static struct notifier_block ghes_notifier_sci = { .notifier_call = ghes_notify_sci, }; -static struct notifier_block ghes_notifier_nmi = { - .notifier_call = ghes_notify_nmi, -}; +static unsigned long ghes_esource_prealloc_size( + const struct acpi_hest_generic *generic) +{ + unsigned long block_length, prealloc_records, prealloc_size; + + block_length = min_t(unsigned long, generic->error_block_length, + GHES_ESTATUS_MAX_SIZE); + prealloc_records = max_t(unsigned long, + generic->records_to_preallocate, 1); + prealloc_size = min_t(unsigned long, block_length * prealloc_records, + GHES_ESOURCE_PREALLOC_MAX_SIZE); + + return prealloc_size; +} static int __devinit ghes_probe(struct platform_device *ghes_dev) { struct acpi_hest_generic *generic; struct ghes *ghes = NULL; + unsigned long len; int rc = -EINVAL; generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; @@ -573,9 +907,12 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev) mutex_unlock(&ghes_list_mutex); break; case ACPI_HEST_NOTIFY_NMI: + len = ghes_esource_prealloc_size(generic); + ghes_estatus_pool_expand(len); mutex_lock(&ghes_list_mutex); if (list_empty(&ghes_nmi)) - register_die_notifier(&ghes_notifier_nmi); + register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, + "ghes"); list_add_rcu(&ghes->list, &ghes_nmi); mutex_unlock(&ghes_list_mutex); break; @@ -597,6 +934,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) { struct ghes *ghes; struct acpi_hest_generic *generic; + unsigned long len; ghes = platform_get_drvdata(ghes_dev); generic = ghes->generic; @@ -620,13 +958,15 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) mutex_lock(&ghes_list_mutex); list_del_rcu(&ghes->list); if (list_empty(&ghes_nmi)) - unregister_die_notifier(&ghes_notifier_nmi); + unregister_nmi_handler(NMI_LOCAL, "ghes"); mutex_unlock(&ghes_list_mutex); /* * To synchronize with NMI handler, ghes can only be * freed after NMI handler finishes. */ synchronize_rcu(); + len = ghes_esource_prealloc_size(generic); + ghes_estatus_pool_shrink(len); break; default: BUG(); @@ -662,15 +1002,43 @@ static int __init ghes_init(void) return -EINVAL; } + if (ghes_disable) { + pr_info(GHES_PFX "GHES is not enabled!\n"); + return -EINVAL; + } + + init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); + rc = ghes_ioremap_init(); if (rc) goto err; - rc = platform_driver_register(&ghes_platform_driver); + rc = ghes_estatus_pool_init(); if (rc) goto err_ioremap_exit; + rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * + GHES_ESTATUS_CACHE_ALLOCED_MAX); + if (rc) + goto err_pool_exit; + + rc = platform_driver_register(&ghes_platform_driver); + if (rc) + goto err_pool_exit; + + rc = apei_osc_setup(); + if (rc == 0 && osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); + else if (rc == 0 && !osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); + else if (rc && osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); + else + pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); + return 0; +err_pool_exit: + ghes_estatus_pool_exit(); err_ioremap_exit: ghes_ioremap_exit(); err: @@ -680,6 +1048,7 @@ err: static void __exit ghes_exit(void) { platform_driver_unregister(&ghes_platform_driver); + ghes_estatus_pool_exit(); ghes_ioremap_exit(); } diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index abda3786a5d7..05fee06f4d6e 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -139,13 +139,23 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data) { struct platform_device *ghes_dev; struct ghes_arr *ghes_arr = data; - int rc; + int rc, i; if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) return 0; if (!((struct acpi_hest_generic *)hest_hdr)->enabled) return 0; + for (i = 0; i < ghes_arr->count; i++) { + struct acpi_hest_header *hdr; + ghes_dev = ghes_arr->ghes_devs[i]; + hdr = *(struct acpi_hest_header **)ghes_dev->dev.platform_data; + if (hdr->source_id == hest_hdr->source_id) { + pr_warning(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n", + hdr->source_id); + return -EIO; + } + } ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id); if (!ghes_dev) return -ENOMEM; @@ -221,16 +231,17 @@ void __init acpi_hest_init(void) goto err; } - rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); - if (rc) - goto err; - - rc = hest_ghes_dev_register(ghes_count); - if (!rc) { - pr_info(HEST_PFX "Table parsing has been initialized.\n"); - return; + if (!ghes_disable) { + rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); + if (rc) + goto err; + rc = hest_ghes_dev_register(ghes_count); + if (rc) + goto err; } + pr_info(HEST_PFX "Table parsing has been initialized.\n"); + return; err: hest_disable = 1; } |