summaryrefslogtreecommitdiffstats
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/Kconfig20
-rw-r--r--drivers/acpi/apei/apei-base.c35
-rw-r--r--drivers/acpi/apei/apei-internal.h15
-rw-r--r--drivers/acpi/apei/cper.c18
-rw-r--r--drivers/acpi/apei/einj.c51
-rw-r--r--drivers/acpi/apei/erst-dbg.c30
-rw-r--r--drivers/acpi/apei/erst.c437
-rw-r--r--drivers/acpi/apei/ghes.c453
-rw-r--r--drivers/acpi/apei/hest.c29
9 files changed, 956 insertions, 132 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index fca34ccfd294..f0c1ce95a0ec 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -1,5 +1,7 @@
config ACPI_APEI
bool "ACPI Platform Error Interface (APEI)"
+ select MISC_FILESYSTEMS
+ select PSTORE
depends on X86
help
APEI allows to report errors (for example from the chipset)
@@ -8,9 +10,11 @@ config ACPI_APEI
error injection.
config ACPI_APEI_GHES
- tristate "APEI Generic Hardware Error Source"
+ bool "APEI Generic Hardware Error Source"
depends on ACPI_APEI && X86
select ACPI_HED
+ select IRQ_WORK
+ select GENERIC_ALLOCATOR
help
Generic Hardware Error Source provides a way to report
platform hardware errors (such as that from chipset). It
@@ -21,6 +25,20 @@ config ACPI_APEI_GHES
by firmware to produce more valuable hardware error
information for Linux.
+config ACPI_APEI_PCIEAER
+ bool "APEI PCIe AER logging/recovering support"
+ depends on ACPI_APEI && PCIEAER
+ help
+ PCIe AER errors may be reported via APEI firmware first mode.
+ Turn on this option to enable the corresponding support.
+
+config ACPI_APEI_MEMORY_FAILURE
+ bool "APEI memory error recovering support"
+ depends on ACPI_APEI && MEMORY_FAILURE
+ help
+ Memory errors may be reported via APEI firmware first mode.
+ Turn on this option to enable the memory recovering support.
+
config ACPI_APEI_EINJ
tristate "APEI Error INJection (EINJ)"
depends on ACPI_APEI && DEBUG_FS
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 4a904a4bf05f..61540360d5ce 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop);
* Interpret the specified action. Go through whole action table,
* execute all instructions belong to the action.
*/
-int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
+ bool optional)
{
- int rc;
+ int rc = -ENOENT;
u32 i, ip;
struct acpi_whea_header *entry;
apei_exec_ins_func_t run;
@@ -198,9 +199,9 @@ rewind:
goto rewind;
}
- return 0;
+ return !optional && rc < 0 ? rc : 0;
}
-EXPORT_SYMBOL_GPL(apei_exec_run);
+EXPORT_SYMBOL_GPL(__apei_exec_run);
typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
struct acpi_whea_header *entry,
@@ -603,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void)
return dapei;
}
EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
+
+int apei_osc_setup(void)
+{
+ static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
+ acpi_handle handle;
+ u32 capbuf[3];
+ struct acpi_osc_context context = {
+ .uuid_str = whea_uuid_str,
+ .rev = 1,
+ .cap.length = sizeof(capbuf),
+ .cap.pointer = capbuf,
+ };
+
+ capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
+ capbuf[OSC_SUPPORT_TYPE] = 1;
+ capbuf[OSC_CONTROL_TYPE] = 0;
+
+ if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
+ || ACPI_FAILURE(acpi_run_osc(handle, &context)))
+ return -EIO;
+ else {
+ kfree(context.ret.pointer);
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(apei_osc_setup);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index ef0581f2094d..f57050e7a5e7 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
return ctx->value;
}
-int apei_exec_run(struct apei_exec_context *ctx, u8 action);
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
+
+static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+{
+ return __apei_exec_run(ctx, action, 0);
+}
+
+/* It is optional whether the firmware provides the action */
+static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
+{
+ return __apei_exec_run(ctx, action, 1);
+}
/* Common instruction implementation */
@@ -113,4 +124,6 @@ void apei_estatus_print(const char *pfx,
const struct acpi_hest_generic_status *estatus);
int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
+
+int apei_osc_setup(void);
#endif
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index 31464a006d76..5d4189464d63 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -29,6 +29,7 @@
#include <linux/time.h>
#include <linux/cper.h>
#include <linux/acpi.h>
+#include <linux/aer.h>
/*
* CPER record ID need to be unique even after reboot, because record
@@ -70,8 +71,8 @@ static const char *cper_severity_str(unsigned int severity)
* If the output length is longer than 80, multiple line will be
* printed, with @pfx is printed at the beginning of each line.
*/
-static void cper_print_bits(const char *pfx, unsigned int bits,
- const char *strs[], unsigned int strs_size)
+void cper_print_bits(const char *pfx, unsigned int bits,
+ const char *strs[], unsigned int strs_size)
{
int i, len = 0;
const char *str;
@@ -81,6 +82,8 @@ static void cper_print_bits(const char *pfx, unsigned int bits,
if (!(bits & (1U << i)))
continue;
str = strs[i];
+ if (!str)
+ continue;
if (len && len + strlen(str) + 2 > 80) {
printk("%s\n", buf);
len = 0;
@@ -243,7 +246,8 @@ static const char *cper_pcie_port_type_strs[] = {
"root complex event collector",
};
-static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
+static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
+ const struct acpi_hest_generic_data *gdata)
{
if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
@@ -276,6 +280,12 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
printk(
"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
pfx, pcie->bridge.secondary_status, pcie->bridge.control);
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+ struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
+ cper_print_aer(pfx, gdata->error_severity, aer_regs);
+ }
+#endif
}
static const char *apei_estatus_section_flag_strs[] = {
@@ -322,7 +332,7 @@ static void apei_estatus_print_section(
struct cper_sec_pcie *pcie = (void *)(gdata + 1);
printk("%s""section_type: PCIe error\n", pfx);
if (gdata->error_data_length >= sizeof(*pcie))
- cper_print_pcie(pfx, pcie);
+ cper_print_pcie(pfx, pcie, gdata);
else
goto err_section_too_small;
} else
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 096aebfe7f32..589b96c38704 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -46,7 +46,8 @@
* Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
* EINJ table through an unpublished extension. Use with caution as
* most will ignore the parameter and make their own choice of address
- * for error injection.
+ * for error injection. This extension is used only if
+ * param_extension module parameter is specified.
*/
struct einj_parameter {
u64 type;
@@ -65,6 +66,9 @@ struct einj_parameter {
((struct acpi_whea_header *)((char *)(tab) + \
sizeof(struct acpi_table_einj)))
+static bool param_extension;
+module_param(param_extension, bool, 0);
+
static struct acpi_table_einj *einj_tab;
static struct apei_resources einj_resources;
@@ -101,6 +105,14 @@ static DEFINE_MUTEX(einj_mutex);
static struct einj_parameter *einj_param;
+#ifndef writeq
+static inline void writeq(__u64 val, volatile void __iomem *addr)
+{
+ writel(val, addr);
+ writel(val >> 32, addr+4);
+}
+#endif
+
static void einj_exec_ctx_init(struct apei_exec_context *ctx)
{
apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type),
@@ -277,7 +289,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
einj_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION);
+ rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, type);
@@ -315,7 +327,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
rc = __einj_error_trigger(trigger_paddr);
if (rc)
return rc;
- rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION);
+ rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
return rc;
}
@@ -481,14 +493,6 @@ static int __init einj_init(void)
einj_debug_dir, NULL, &error_type_fops);
if (!fentry)
goto err_cleanup;
- fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
- einj_debug_dir, &error_param1);
- if (!fentry)
- goto err_cleanup;
- fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
- einj_debug_dir, &error_param2);
- if (!fentry)
- goto err_cleanup;
fentry = debugfs_create_file("error_inject", S_IWUSR,
einj_debug_dir, NULL, &error_inject_fops);
if (!fentry)
@@ -505,12 +509,23 @@ static int __init einj_init(void)
rc = apei_exec_pre_map_gars(&ctx);
if (rc)
goto err_release;
- param_paddr = einj_get_parameter_address();
- if (param_paddr) {
- einj_param = ioremap(param_paddr, sizeof(*einj_param));
- rc = -ENOMEM;
- if (!einj_param)
- goto err_unmap;
+ if (param_extension) {
+ param_paddr = einj_get_parameter_address();
+ if (param_paddr) {
+ einj_param = ioremap(param_paddr, sizeof(*einj_param));
+ rc = -ENOMEM;
+ if (!einj_param)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param1);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param2);
+ if (!fentry)
+ goto err_unmap;
+ } else
+ pr_warn(EINJ_PFX "Parameter extension is not supported.\n");
}
pr_info(EINJ_PFX "Error INJection is initialized.\n");
@@ -518,6 +533,8 @@ static int __init einj_init(void)
return 0;
err_unmap:
+ if (einj_param)
+ iounmap(einj_param);
apei_exec_post_unmap_gars(&ctx);
err_release:
apei_resources_release(&einj_resources);
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index de73caf3cebc..903549df809b 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -33,7 +33,7 @@
#define ERST_DBG_PFX "ERST DBG: "
-#define ERST_DBG_RECORD_LEN_MAX 4096
+#define ERST_DBG_RECORD_LEN_MAX 0x4000
static void *erst_dbg_buf;
static unsigned int erst_dbg_buf_len;
@@ -43,12 +43,27 @@ static DEFINE_MUTEX(erst_dbg_mutex);
static int erst_dbg_open(struct inode *inode, struct file *file)
{
+ int rc, *pos;
+
if (erst_disable)
return -ENODEV;
+ pos = (int *)&file->private_data;
+
+ rc = erst_get_record_id_begin(pos);
+ if (rc)
+ return rc;
+
return nonseekable_open(inode, file);
}
+static int erst_dbg_release(struct inode *inode, struct file *file)
+{
+ erst_get_record_id_end();
+
+ return 0;
+}
+
static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
int rc;
@@ -79,18 +94,20 @@ static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
size_t usize, loff_t *off)
{
- int rc;
+ int rc, *pos;
ssize_t len = 0;
u64 id;
- if (*off != 0)
+ if (*off)
return -EINVAL;
if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
return -EINTR;
+ pos = (int *)&filp->private_data;
+
retry_next:
- rc = erst_get_next_record_id(&id);
+ rc = erst_get_record_id_next(pos, &id);
if (rc)
goto out;
/* no more record */
@@ -181,6 +198,7 @@ out:
static const struct file_operations erst_dbg_ops = {
.owner = THIS_MODULE,
.open = erst_dbg_open,
+ .release = erst_dbg_release,
.read = erst_dbg_read,
.write = erst_dbg_write,
.unlocked_ioctl = erst_dbg_ioctl,
@@ -195,6 +213,10 @@ static struct miscdevice erst_dbg_dev = {
static __init int erst_dbg_init(void)
{
+ if (erst_disable) {
+ pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
+ return -ENODEV;
+ }
return misc_register(&erst_dbg_dev);
}
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index cf6db6b7662a..631b9477b99c 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -34,6 +34,7 @@
#include <linux/cper.h>
#include <linux/nmi.h>
#include <linux/hardirq.h>
+#include <linux/pstore.h>
#include <acpi/apei.h>
#include "apei-internal.h"
@@ -429,6 +430,22 @@ ssize_t erst_get_record_count(void)
}
EXPORT_SYMBOL_GPL(erst_get_record_count);
+#define ERST_RECORD_ID_CACHE_SIZE_MIN 16
+#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024
+
+struct erst_record_id_cache {
+ struct mutex lock;
+ u64 *entries;
+ int len;
+ int size;
+ int refcount;
+};
+
+static struct erst_record_id_cache erst_record_id_cache = {
+ .lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
+ .refcount = 0,
+};
+
static int __erst_get_next_record_id(u64 *record_id)
{
struct apei_exec_context ctx;
@@ -443,26 +460,179 @@ static int __erst_get_next_record_id(u64 *record_id)
return 0;
}
+int erst_get_record_id_begin(int *pos)
+{
+ int rc;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
+ erst_record_id_cache.refcount++;
+ mutex_unlock(&erst_record_id_cache.lock);
+
+ *pos = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
+
+/* erst_record_id_cache.lock must be held by caller */
+static int __erst_record_id_cache_add_one(void)
+{
+ u64 id, prev_id, first_id;
+ int i, rc;
+ u64 *entries;
+ unsigned long flags;
+
+ id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
+retry:
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ rc = __erst_get_next_record_id(&id);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ if (rc == -ENOENT)
+ return 0;
+ if (rc)
+ return rc;
+ if (id == APEI_ERST_INVALID_RECORD_ID)
+ return 0;
+ /* can not skip current ID, or loop back to first ID */
+ if (id == prev_id || id == first_id)
+ return 0;
+ if (first_id == APEI_ERST_INVALID_RECORD_ID)
+ first_id = id;
+ prev_id = id;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == id)
+ break;
+ }
+ /* record id already in cache, try next */
+ if (i < erst_record_id_cache.len)
+ goto retry;
+ if (erst_record_id_cache.len >= erst_record_id_cache.size) {
+ int new_size, alloc_size;
+ u64 *new_entries;
+
+ new_size = erst_record_id_cache.size * 2;
+ new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
+ ERST_RECORD_ID_CACHE_SIZE_MAX);
+ if (new_size <= erst_record_id_cache.size) {
+ if (printk_ratelimit())
+ pr_warning(FW_WARN ERST_PFX
+ "too many record ID!\n");
+ return 0;
+ }
+ alloc_size = new_size * sizeof(entries[0]);
+ if (alloc_size < PAGE_SIZE)
+ new_entries = kmalloc(alloc_size, GFP_KERNEL);
+ else
+ new_entries = vmalloc(alloc_size);
+ if (!new_entries)
+ return -ENOMEM;
+ memcpy(new_entries, entries,
+ erst_record_id_cache.len * sizeof(entries[0]));
+ if (erst_record_id_cache.size < PAGE_SIZE)
+ kfree(entries);
+ else
+ vfree(entries);
+ erst_record_id_cache.entries = entries = new_entries;
+ erst_record_id_cache.size = new_size;
+ }
+ entries[i] = id;
+ erst_record_id_cache.len++;
+
+ return 1;
+}
+
/*
* Get the record ID of an existing error record on the persistent
* storage. If there is no error record on the persistent storage, the
* returned record_id is APEI_ERST_INVALID_RECORD_ID.
*/
-int erst_get_next_record_id(u64 *record_id)
+int erst_get_record_id_next(int *pos, u64 *record_id)
{
- int rc;
- unsigned long flags;
+ int rc = 0;
+ u64 *entries;
if (erst_disable)
return -ENODEV;
- raw_spin_lock_irqsave(&erst_lock, flags);
- rc = __erst_get_next_record_id(record_id);
- raw_spin_unlock_irqrestore(&erst_lock, flags);
+ /* must be enclosed by erst_get_record_id_begin/end */
+ BUG_ON(!erst_record_id_cache.refcount);
+ BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
+
+ mutex_lock(&erst_record_id_cache.lock);
+ entries = erst_record_id_cache.entries;
+ for (; *pos < erst_record_id_cache.len; (*pos)++)
+ if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
+ break;
+ /* found next record id in cache */
+ if (*pos < erst_record_id_cache.len) {
+ *record_id = entries[*pos];
+ (*pos)++;
+ goto out_unlock;
+ }
+
+ /* Try to add one more record ID to cache */
+ rc = __erst_record_id_cache_add_one();
+ if (rc < 0)
+ goto out_unlock;
+ /* successfully add one new ID */
+ if (rc == 1) {
+ *record_id = erst_record_id_cache.entries[*pos];
+ (*pos)++;
+ rc = 0;
+ } else {
+ *pos = -1;
+ *record_id = APEI_ERST_INVALID_RECORD_ID;
+ }
+out_unlock:
+ mutex_unlock(&erst_record_id_cache.lock);
return rc;
}
-EXPORT_SYMBOL_GPL(erst_get_next_record_id);
+EXPORT_SYMBOL_GPL(erst_get_record_id_next);
+
+/* erst_record_id_cache.lock must be held by caller */
+static void __erst_record_id_cache_compact(void)
+{
+ int i, wpos = 0;
+ u64 *entries;
+
+ if (erst_record_id_cache.refcount)
+ return;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
+ continue;
+ if (wpos != i)
+ memcpy(&entries[wpos], &entries[i], sizeof(entries[i]));
+ wpos++;
+ }
+ erst_record_id_cache.len = wpos;
+}
+
+void erst_get_record_id_end(void)
+{
+ /*
+ * erst_disable != 0 should be detected by invoker via the
+ * return value of erst_get_record_id_begin/next, so this
+ * function should not be called for erst_disable != 0.
+ */
+ BUG_ON(erst_disable);
+
+ mutex_lock(&erst_record_id_cache.lock);
+ erst_record_id_cache.refcount--;
+ BUG_ON(erst_record_id_cache.refcount < 0);
+ __erst_record_id_cache_compact();
+ mutex_unlock(&erst_record_id_cache.lock);
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_end);
static int __erst_write_to_storage(u64 offset)
{
@@ -472,7 +642,7 @@ static int __erst_write_to_storage(u64 offset)
int rc;
erst_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, offset);
@@ -496,7 +666,7 @@ static int __erst_write_to_storage(u64 offset)
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_END);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
@@ -511,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
int rc;
erst_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, offset);
@@ -539,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_END);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
@@ -554,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id)
int rc;
erst_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, record_id);
@@ -578,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id)
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_END);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
@@ -703,56 +873,34 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
}
EXPORT_SYMBOL_GPL(erst_read);
-/*
- * If return value > buflen, the buffer size is not big enough,
- * else if return value = 0, there is no more record to read,
- * else if return value < 0, something goes wrong,
- * else everything is OK, and return value is record length
- */
-ssize_t erst_read_next(struct cper_record_header *record, size_t buflen)
-{
- int rc;
- ssize_t len;
- unsigned long flags;
- u64 record_id;
-
- if (erst_disable)
- return -ENODEV;
-
- raw_spin_lock_irqsave(&erst_lock, flags);
- rc = __erst_get_next_record_id(&record_id);
- if (rc) {
- raw_spin_unlock_irqrestore(&erst_lock, flags);
- return rc;
- }
- /* no more record */
- if (record_id == APEI_ERST_INVALID_RECORD_ID) {
- raw_spin_unlock_irqrestore(&erst_lock, flags);
- return 0;
- }
-
- len = __erst_read(record_id, record, buflen);
- raw_spin_unlock_irqrestore(&erst_lock, flags);
-
- return len;
-}
-EXPORT_SYMBOL_GPL(erst_read_next);
-
int erst_clear(u64 record_id)
{
- int rc;
+ int rc, i;
unsigned long flags;
+ u64 *entries;
if (erst_disable)
return -ENODEV;
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
raw_spin_lock_irqsave(&erst_lock, flags);
if (erst_erange.attr & ERST_RANGE_NVRAM)
rc = __erst_clear_from_nvram(record_id);
else
rc = __erst_clear_from_storage(record_id);
raw_spin_unlock_irqrestore(&erst_lock, flags);
-
+ if (rc)
+ goto out;
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == record_id)
+ entries[i] = APEI_ERST_INVALID_RECORD_ID;
+ }
+ __erst_record_id_cache_compact();
+out:
+ mutex_unlock(&erst_record_id_cache.lock);
return rc;
}
EXPORT_SYMBOL_GPL(erst_clear);
@@ -781,6 +929,182 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
return 0;
}
+static int erst_open_pstore(struct pstore_info *psi);
+static int erst_close_pstore(struct pstore_info *psi);
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
+ struct timespec *time, char **buf,
+ struct pstore_info *psi);
+static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part,
+ size_t size, struct pstore_info *psi);
+static int erst_clearer(enum pstore_type_id type, u64 id,
+ struct pstore_info *psi);
+
+static struct pstore_info erst_info = {
+ .owner = THIS_MODULE,
+ .name = "erst",
+ .open = erst_open_pstore,
+ .close = erst_close_pstore,
+ .read = erst_reader,
+ .write = erst_writer,
+ .erase = erst_clearer
+};
+
+#define CPER_CREATOR_PSTORE \
+ UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_DMESG \
+ UUID_LE(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54, \
+ 0x94, 0x19, 0xeb, 0x12)
+#define CPER_SECTION_TYPE_MCE \
+ UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
+
+struct cper_pstore_record {
+ struct cper_record_header hdr;
+ struct cper_section_descriptor sec_hdr;
+ char data[];
+} __packed;
+
+static int reader_pos;
+
+static int erst_open_pstore(struct pstore_info *psi)
+{
+ int rc;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = erst_get_record_id_begin(&reader_pos);
+
+ return rc;
+}
+
+static int erst_close_pstore(struct pstore_info *psi)
+{
+ erst_get_record_id_end();
+
+ return 0;
+}
+
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
+ struct timespec *time, char **buf,
+ struct pstore_info *psi)
+{
+ int rc;
+ ssize_t len = 0;
+ u64 record_id;
+ struct cper_pstore_record *rcd;
+ size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rcd = kmalloc(rcd_len, GFP_KERNEL);
+ if (!rcd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+skip:
+ rc = erst_get_record_id_next(&reader_pos, &record_id);
+ if (rc)
+ goto out;
+
+ /* no more record */
+ if (record_id == APEI_ERST_INVALID_RECORD_ID) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ len = erst_read(record_id, &rcd->hdr, rcd_len);
+ /* The record may be cleared by others, try read next record */
+ if (len == -ENOENT)
+ goto skip;
+ else if (len < sizeof(*rcd)) {
+ rc = -EIO;
+ goto out;
+ }
+ if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
+ goto skip;
+
+ *buf = kmalloc(len, GFP_KERNEL);
+ if (*buf == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ memcpy(*buf, rcd->data, len - sizeof(*rcd));
+ *id = record_id;
+ if (uuid_le_cmp(rcd->sec_hdr.section_type,
+ CPER_SECTION_TYPE_DMESG) == 0)
+ *type = PSTORE_TYPE_DMESG;
+ else if (uuid_le_cmp(rcd->sec_hdr.section_type,
+ CPER_SECTION_TYPE_MCE) == 0)
+ *type = PSTORE_TYPE_MCE;
+ else
+ *type = PSTORE_TYPE_UNKNOWN;
+
+ if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP)
+ time->tv_sec = rcd->hdr.timestamp;
+ else
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+
+out:
+ kfree(rcd);
+ return (rc < 0) ? rc : (len - sizeof(*rcd));
+}
+
+static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part,
+ size_t size, struct pstore_info *psi)
+{
+ struct cper_pstore_record *rcd = (struct cper_pstore_record *)
+ (erst_info.buf - sizeof(*rcd));
+ int ret;
+
+ memset(rcd, 0, sizeof(*rcd));
+ memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+ rcd->hdr.revision = CPER_RECORD_REV;
+ rcd->hdr.signature_end = CPER_SIG_END;
+ rcd->hdr.section_count = 1;
+ rcd->hdr.error_severity = CPER_SEV_FATAL;
+ /* timestamp valid. platform_id, partition_id are invalid */
+ rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP;
+ rcd->hdr.timestamp = get_seconds();
+ rcd->hdr.record_length = sizeof(*rcd) + size;
+ rcd->hdr.creator_id = CPER_CREATOR_PSTORE;
+ rcd->hdr.notification_type = CPER_NOTIFY_MCE;
+ rcd->hdr.record_id = cper_next_record_id();
+ rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+ rcd->sec_hdr.section_offset = sizeof(*rcd);
+ rcd->sec_hdr.section_length = size;
+ rcd->sec_hdr.revision = CPER_SEC_REV;
+ /* fru_id and fru_text is invalid */
+ rcd->sec_hdr.validation_bits = 0;
+ rcd->sec_hdr.flags = CPER_SEC_PRIMARY;
+ switch (type) {
+ case PSTORE_TYPE_DMESG:
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG;
+ break;
+ case PSTORE_TYPE_MCE:
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+ break;
+ default:
+ return -EINVAL;
+ }
+ rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
+
+ ret = erst_write(&rcd->hdr);
+ *id = rcd->hdr.record_id;
+
+ return ret;
+}
+
+static int erst_clearer(enum pstore_type_id type, u64 id,
+ struct pstore_info *psi)
+{
+ return erst_clear(id);
+}
+
static int __init erst_init(void)
{
int rc = 0;
@@ -788,6 +1112,7 @@ static int __init erst_init(void)
struct apei_exec_context ctx;
struct apei_resources erst_resources;
struct resource *r;
+ char *buf;
if (acpi_disabled)
goto err;
@@ -854,6 +1179,18 @@ static int __init erst_init(void)
if (!erst_erange.vaddr)
goto err_release_erange;
+ buf = kmalloc(erst_erange.size, GFP_KERNEL);
+ spin_lock_init(&erst_info.buf_lock);
+ if (buf) {
+ erst_info.buf = buf + sizeof(struct cper_pstore_record);
+ erst_info.bufsize = erst_erange.size -
+ sizeof(struct cper_pstore_record);
+ if (pstore_register(&erst_info)) {
+ pr_info(ERST_PFX "Could not register with persistent store\n");
+ kfree(buf);
+ }
+ }
+
pr_info(ERST_PFX
"Error Record Serialization Table (ERST) support is initialized.\n");
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index d1d484d4a06a..b8e08cb67a18 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -12,7 +12,7 @@
* For more information about Generic Hardware Error Source, please
* refer to ACPI Specification version 4.0, section 17.3.2.6
*
- * Copyright 2010 Intel Corp.
+ * Copyright 2010,2011 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or
@@ -42,17 +42,45 @@
#include <linux/mutex.h>
#include <linux/ratelimit.h>
#include <linux/vmalloc.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
+#include <linux/genalloc.h>
#include <acpi/apei.h>
#include <acpi/atomicio.h>
#include <acpi/hed.h>
#include <asm/mce.h>
#include <asm/tlbflush.h>
+#include <asm/nmi.h>
#include "apei-internal.h"
#define GHES_PFX "GHES: "
#define GHES_ESTATUS_MAX_SIZE 65536
+#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
+
+#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
+
+/* This is just an estimation for memory pool allocation */
+#define GHES_ESTATUS_CACHE_AVG_SIZE 512
+
+#define GHES_ESTATUS_CACHES_SIZE 4
+
+#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
+/* Prevent too many caches are allocated because of RCU */
+#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
+
+#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_cache) + (estatus_len))
+#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
+ ((struct acpi_hest_generic_status *) \
+ ((struct ghes_estatus_cache *)(estatus_cache) + 1))
+
+#define GHES_ESTATUS_NODE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_node) + (estatus_len))
+#define GHES_ESTATUS_FROM_NODE(estatus_node) \
+ ((struct acpi_hest_generic_status *) \
+ ((struct ghes_estatus_node *)(estatus_node) + 1))
/*
* One struct ghes is created for each generic hardware error source.
@@ -77,6 +105,22 @@ struct ghes {
};
};
+struct ghes_estatus_node {
+ struct llist_node llnode;
+ struct acpi_hest_generic *generic;
+};
+
+struct ghes_estatus_cache {
+ u32 estatus_len;
+ atomic_t count;
+ struct acpi_hest_generic *generic;
+ unsigned long long time_in;
+ struct rcu_head rcu;
+};
+
+int ghes_disable;
+module_param_named(disable, ghes_disable, bool, 0);
+
static int ghes_panic_timeout __read_mostly = 30;
/*
@@ -121,6 +165,22 @@ static struct vm_struct *ghes_ioremap_area;
static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
+/*
+ * printk is not safe in NMI context. So in NMI handler, we allocate
+ * required memory from lock-less memory allocator
+ * (ghes_estatus_pool), save estatus into it, put them into lock-less
+ * list (ghes_estatus_llist), then delay printk into IRQ context via
+ * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
+ * required pool size by all NMI error source.
+ */
+static struct gen_pool *ghes_estatus_pool;
+static unsigned long ghes_estatus_pool_size_request;
+static struct llist_head ghes_estatus_llist;
+static struct irq_work ghes_proc_irq_work;
+
+struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
+static atomic_t ghes_estatus_cache_alloced;
+
static int ghes_ioremap_init(void)
{
ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -180,6 +240,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
__flush_tlb_one(vaddr);
}
+static int ghes_estatus_pool_init(void)
+{
+ ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
+ if (!ghes_estatus_pool)
+ return -ENOMEM;
+ return 0;
+}
+
+static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
+ struct gen_pool_chunk *chunk,
+ void *data)
+{
+ free_page(chunk->start_addr);
+}
+
+static void ghes_estatus_pool_exit(void)
+{
+ gen_pool_for_each_chunk(ghes_estatus_pool,
+ ghes_estatus_pool_free_chunk_page, NULL);
+ gen_pool_destroy(ghes_estatus_pool);
+}
+
+static int ghes_estatus_pool_expand(unsigned long len)
+{
+ unsigned long i, pages, size, addr;
+ int ret;
+
+ ghes_estatus_pool_size_request += PAGE_ALIGN(len);
+ size = gen_pool_size(ghes_estatus_pool);
+ if (size >= ghes_estatus_pool_size_request)
+ return 0;
+ pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
+ for (i = 0; i < pages; i++) {
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ return -ENOMEM;
+ ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void ghes_estatus_pool_shrink(unsigned long len)
+{
+ ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
+}
+
static struct ghes *ghes_new(struct acpi_hest_generic *generic)
{
struct ghes *ghes;
@@ -241,7 +350,7 @@ static inline int ghes_severity(int severity)
case CPER_SEV_FATAL:
return GHES_SEV_PANIC;
default:
- /* Unkown, go panic */
+ /* Unknown, go panic */
return GHES_SEV_PANIC;
}
}
@@ -341,43 +450,196 @@ static void ghes_clear_estatus(struct ghes *ghes)
ghes->flags &= ~GHES_TO_CLEAR;
}
-static void ghes_do_proc(struct ghes *ghes)
+static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
{
- int sev, processed = 0;
+ int sev, sec_sev;
struct acpi_hest_generic_data *gdata;
- sev = ghes_severity(ghes->estatus->error_severity);
- apei_estatus_for_each_section(ghes->estatus, gdata) {
-#ifdef CONFIG_X86_MCE
+ sev = ghes_severity(estatus->error_severity);
+ apei_estatus_for_each_section(estatus, gdata) {
+ sec_sev = ghes_severity(gdata->error_severity);
if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
CPER_SEC_PLATFORM_MEM)) {
- apei_mce_report_mem_error(
- sev == GHES_SEV_CORRECTED,
- (struct cper_sec_mem_err *)(gdata+1));
- processed = 1;
- }
+ struct cper_sec_mem_err *mem_err;
+ mem_err = (struct cper_sec_mem_err *)(gdata+1);
+#ifdef CONFIG_X86_MCE
+ apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
+ mem_err);
#endif
+#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
+ if (sev == GHES_SEV_RECOVERABLE &&
+ sec_sev == GHES_SEV_RECOVERABLE &&
+ mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
+ unsigned long pfn;
+ pfn = mem_err->physical_addr >> PAGE_SHIFT;
+ memory_failure_queue(pfn, 0, 0);
+ }
+#endif
+ }
}
}
-static void ghes_print_estatus(const char *pfx, struct ghes *ghes)
+static void __ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus)
{
- /* Not more than 2 messages every 5 seconds */
- static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2);
-
if (pfx == NULL) {
- if (ghes_severity(ghes->estatus->error_severity) <=
+ if (ghes_severity(estatus->error_severity) <=
GHES_SEV_CORRECTED)
pfx = KERN_WARNING HW_ERR;
else
pfx = KERN_ERR HW_ERR;
}
- if (__ratelimit(&ratelimit)) {
- printk(
- "%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
- pfx, ghes->generic->header.source_id);
- apei_estatus_print(pfx, ghes->estatus);
+ printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
+ pfx, generic->header.source_id);
+ apei_estatus_print(pfx, estatus);
+}
+
+static int ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_hest_generic_status *estatus)
+{
+ /* Not more than 2 messages every 5 seconds */
+ static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
+ static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
+ struct ratelimit_state *ratelimit;
+
+ if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
+ ratelimit = &ratelimit_corrected;
+ else
+ ratelimit = &ratelimit_uncorrected;
+ if (__ratelimit(ratelimit)) {
+ __ghes_print_estatus(pfx, generic, estatus);
+ return 1;
}
+ return 0;
+}
+
+/*
+ * GHES error status reporting throttle, to report more kinds of
+ * errors, instead of just most frequently occurred errors.
+ */
+static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
+{
+ u32 len;
+ int i, cached = 0;
+ unsigned long long now;
+ struct ghes_estatus_cache *cache;
+ struct acpi_hest_generic_status *cache_estatus;
+
+ len = apei_estatus_len(estatus);
+ rcu_read_lock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL)
+ continue;
+ if (len != cache->estatus_len)
+ continue;
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ if (memcmp(estatus, cache_estatus, len))
+ continue;
+ atomic_inc(&cache->count);
+ now = sched_clock();
+ if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
+ cached = 1;
+ break;
+ }
+ rcu_read_unlock();
+ return cached;
+}
+
+static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
+ struct acpi_hest_generic *generic,
+ struct acpi_hest_generic_status *estatus)
+{
+ int alloced;
+ u32 len, cache_len;
+ struct ghes_estatus_cache *cache;
+ struct acpi_hest_generic_status *cache_estatus;
+
+ alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
+ if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ len = apei_estatus_len(estatus);
+ cache_len = GHES_ESTATUS_CACHE_LEN(len);
+ cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
+ if (!cache) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ memcpy(cache_estatus, estatus, len);
+ cache->estatus_len = len;
+ atomic_set(&cache->count, 0);
+ cache->generic = generic;
+ cache->time_in = sched_clock();
+ return cache;
+}
+
+static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
+{
+ u32 len;
+
+ len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
+ len = GHES_ESTATUS_CACHE_LEN(len);
+ gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
+ atomic_dec(&ghes_estatus_cache_alloced);
+}
+
+static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
+{
+ struct ghes_estatus_cache *cache;
+
+ cache = container_of(head, struct ghes_estatus_cache, rcu);
+ ghes_estatus_cache_free(cache);
+}
+
+static void ghes_estatus_cache_add(
+ struct acpi_hest_generic *generic,
+ struct acpi_hest_generic_status *estatus)
+{
+ int i, slot = -1, count;
+ unsigned long long now, duration, period, max_period = 0;
+ struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
+
+ new_cache = ghes_estatus_cache_alloc(generic, estatus);
+ if (new_cache == NULL)
+ return;
+ rcu_read_lock();
+ now = sched_clock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL) {
+ slot = i;
+ slot_cache = NULL;
+ break;
+ }
+ duration = now - cache->time_in;
+ if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
+ slot = i;
+ slot_cache = cache;
+ break;
+ }
+ count = atomic_read(&cache->count);
+ period = duration;
+ do_div(period, (count + 1));
+ if (period > max_period) {
+ max_period = period;
+ slot = i;
+ slot_cache = cache;
+ }
+ }
+ /* new_cache must be put into array after its contents are written */
+ smp_wmb();
+ if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
+ slot_cache, new_cache) == slot_cache) {
+ if (slot_cache)
+ call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
+ } else
+ ghes_estatus_cache_free(new_cache);
+ rcu_read_unlock();
}
static int ghes_proc(struct ghes *ghes)
@@ -387,9 +649,11 @@ static int ghes_proc(struct ghes *ghes)
rc = ghes_read_estatus(ghes, 0);
if (rc)
goto out;
- ghes_print_estatus(NULL, ghes);
- ghes_do_proc(ghes);
-
+ if (!ghes_estatus_cached(ghes->estatus)) {
+ if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
+ ghes_estatus_cache_add(ghes->generic, ghes->estatus);
+ }
+ ghes_do_proc(ghes->estatus);
out:
ghes_clear_estatus(ghes);
return 0;
@@ -447,15 +711,50 @@ static int ghes_notify_sci(struct notifier_block *this,
return ret;
}
-static int ghes_notify_nmi(struct notifier_block *this,
- unsigned long cmd, void *data)
+static void ghes_proc_in_irq(struct irq_work *irq_work)
+{
+ struct llist_node *llnode, *next, *tail = NULL;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_status *estatus;
+ u32 len, node_len;
+
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_del_all(&ghes_estatus_llist);
+ while (llnode) {
+ next = llnode->next;
+ llnode->next = tail;
+ tail = llnode;
+ llnode = next;
+ }
+ llnode = tail;
+ while (llnode) {
+ next = llnode->next;
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ len = apei_estatus_len(estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ ghes_do_proc(estatus);
+ if (!ghes_estatus_cached(estatus)) {
+ generic = estatus_node->generic;
+ if (ghes_print_estatus(NULL, generic, estatus))
+ ghes_estatus_cache_add(generic, estatus);
+ }
+ gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
+ node_len);
+ llnode = next;
+ }
+}
+
+static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
{
struct ghes *ghes, *ghes_global = NULL;
int sev, sev_global = -1;
- int ret = NOTIFY_DONE;
-
- if (cmd != DIE_NMI)
- return ret;
+ int ret = NMI_DONE;
raw_spin_lock(&ghes_nmi_lock);
list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
@@ -468,15 +767,16 @@ static int ghes_notify_nmi(struct notifier_block *this,
sev_global = sev;
ghes_global = ghes;
}
- ret = NOTIFY_STOP;
+ ret = NMI_HANDLED;
}
- if (ret == NOTIFY_DONE)
+ if (ret == NMI_DONE)
goto out;
if (sev_global >= GHES_SEV_PANIC) {
oops_begin();
- ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global);
+ __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
+ ghes_global->estatus);
/* reboot to log the error! */
if (panic_timeout == 0)
panic_timeout = ghes_panic_timeout;
@@ -484,12 +784,34 @@ static int ghes_notify_nmi(struct notifier_block *this,
}
list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ u32 len, node_len;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic_status *estatus;
+#endif
if (!(ghes->flags & GHES_TO_CLEAR))
continue;
- /* Do not print estatus because printk is not NMI safe */
- ghes_do_proc(ghes);
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ if (ghes_estatus_cached(ghes->estatus))
+ goto next;
+ /* Save estatus for further processing in IRQ context */
+ len = apei_estatus_len(ghes->estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
+ node_len);
+ if (estatus_node) {
+ estatus_node->generic = ghes->generic;
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ memcpy(estatus, ghes->estatus, len);
+ llist_add(&estatus_node->llnode, &ghes_estatus_llist);
+ }
+next:
+#endif
ghes_clear_estatus(ghes);
}
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ irq_work_queue(&ghes_proc_irq_work);
+#endif
out:
raw_spin_unlock(&ghes_nmi_lock);
@@ -500,14 +822,26 @@ static struct notifier_block ghes_notifier_sci = {
.notifier_call = ghes_notify_sci,
};
-static struct notifier_block ghes_notifier_nmi = {
- .notifier_call = ghes_notify_nmi,
-};
+static unsigned long ghes_esource_prealloc_size(
+ const struct acpi_hest_generic *generic)
+{
+ unsigned long block_length, prealloc_records, prealloc_size;
+
+ block_length = min_t(unsigned long, generic->error_block_length,
+ GHES_ESTATUS_MAX_SIZE);
+ prealloc_records = max_t(unsigned long,
+ generic->records_to_preallocate, 1);
+ prealloc_size = min_t(unsigned long, block_length * prealloc_records,
+ GHES_ESOURCE_PREALLOC_MAX_SIZE);
+
+ return prealloc_size;
+}
static int __devinit ghes_probe(struct platform_device *ghes_dev)
{
struct acpi_hest_generic *generic;
struct ghes *ghes = NULL;
+ unsigned long len;
int rc = -EINVAL;
generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -573,9 +907,12 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
mutex_unlock(&ghes_list_mutex);
break;
case ACPI_HEST_NOTIFY_NMI:
+ len = ghes_esource_prealloc_size(generic);
+ ghes_estatus_pool_expand(len);
mutex_lock(&ghes_list_mutex);
if (list_empty(&ghes_nmi))
- register_die_notifier(&ghes_notifier_nmi);
+ register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
+ "ghes");
list_add_rcu(&ghes->list, &ghes_nmi);
mutex_unlock(&ghes_list_mutex);
break;
@@ -597,6 +934,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
{
struct ghes *ghes;
struct acpi_hest_generic *generic;
+ unsigned long len;
ghes = platform_get_drvdata(ghes_dev);
generic = ghes->generic;
@@ -620,13 +958,15 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
mutex_lock(&ghes_list_mutex);
list_del_rcu(&ghes->list);
if (list_empty(&ghes_nmi))
- unregister_die_notifier(&ghes_notifier_nmi);
+ unregister_nmi_handler(NMI_LOCAL, "ghes");
mutex_unlock(&ghes_list_mutex);
/*
* To synchronize with NMI handler, ghes can only be
* freed after NMI handler finishes.
*/
synchronize_rcu();
+ len = ghes_esource_prealloc_size(generic);
+ ghes_estatus_pool_shrink(len);
break;
default:
BUG();
@@ -662,15 +1002,43 @@ static int __init ghes_init(void)
return -EINVAL;
}
+ if (ghes_disable) {
+ pr_info(GHES_PFX "GHES is not enabled!\n");
+ return -EINVAL;
+ }
+
+ init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+
rc = ghes_ioremap_init();
if (rc)
goto err;
- rc = platform_driver_register(&ghes_platform_driver);
+ rc = ghes_estatus_pool_init();
if (rc)
goto err_ioremap_exit;
+ rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
+ GHES_ESTATUS_CACHE_ALLOCED_MAX);
+ if (rc)
+ goto err_pool_exit;
+
+ rc = platform_driver_register(&ghes_platform_driver);
+ if (rc)
+ goto err_pool_exit;
+
+ rc = apei_osc_setup();
+ if (rc == 0 && osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
+ else if (rc == 0 && !osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
+ else if (rc && osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
+ else
+ pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
+
return 0;
+err_pool_exit:
+ ghes_estatus_pool_exit();
err_ioremap_exit:
ghes_ioremap_exit();
err:
@@ -680,6 +1048,7 @@ err:
static void __exit ghes_exit(void)
{
platform_driver_unregister(&ghes_platform_driver);
+ ghes_estatus_pool_exit();
ghes_ioremap_exit();
}
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index abda3786a5d7..05fee06f4d6e 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -139,13 +139,23 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
{
struct platform_device *ghes_dev;
struct ghes_arr *ghes_arr = data;
- int rc;
+ int rc, i;
if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
return 0;
if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
return 0;
+ for (i = 0; i < ghes_arr->count; i++) {
+ struct acpi_hest_header *hdr;
+ ghes_dev = ghes_arr->ghes_devs[i];
+ hdr = *(struct acpi_hest_header **)ghes_dev->dev.platform_data;
+ if (hdr->source_id == hest_hdr->source_id) {
+ pr_warning(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n",
+ hdr->source_id);
+ return -EIO;
+ }
+ }
ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id);
if (!ghes_dev)
return -ENOMEM;
@@ -221,16 +231,17 @@ void __init acpi_hest_init(void)
goto err;
}
- rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
- if (rc)
- goto err;
-
- rc = hest_ghes_dev_register(ghes_count);
- if (!rc) {
- pr_info(HEST_PFX "Table parsing has been initialized.\n");
- return;
+ if (!ghes_disable) {
+ rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
+ if (rc)
+ goto err;
+ rc = hest_ghes_dev_register(ghes_count);
+ if (rc)
+ goto err;
}
+ pr_info(HEST_PFX "Table parsing has been initialized.\n");
+ return;
err:
hest_disable = 1;
}
OpenPOWER on IntegriCloud