diff options
-rw-r--r-- | drivers/Kconfig | 2 | ||||
-rw-r--r-- | drivers/Makefile | 1 | ||||
-rw-r--r-- | drivers/acpi/Kconfig | 4 | ||||
-rw-r--r-- | drivers/acpi/acpi_extlog.c | 46 | ||||
-rw-r--r-- | drivers/edac/Kconfig | 1 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 3 | ||||
-rw-r--r-- | drivers/firmware/efi/cper.c | 192 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/Kconfig | 1 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv_errprint.c | 4 | ||||
-rw-r--r-- | drivers/ras/Kconfig | 2 | ||||
-rw-r--r-- | drivers/ras/Makefile | 1 | ||||
-rw-r--r-- | drivers/ras/debugfs.c | 56 | ||||
-rw-r--r-- | drivers/ras/ras.c | 29 | ||||
-rw-r--r-- | include/linux/cper.h | 32 | ||||
-rw-r--r-- | include/linux/ras.h | 14 | ||||
-rw-r--r-- | include/ras/ras_event.h | 128 | ||||
-rw-r--r-- | include/trace/events/ras.h | 77 | ||||
-rw-r--r-- | include/trace/ftrace.h | 33 | ||||
-rw-r--r-- | include/trace/syscall.h | 15 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/tracepoint.c | 26 | ||||
-rw-r--r-- | samples/trace_events/trace-events-sample.h | 3 |
22 files changed, 506 insertions, 166 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index 0e87a34b6472..4e6e66c3c8d6 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -176,4 +176,6 @@ source "drivers/powercap/Kconfig" source "drivers/mcb/Kconfig" +source "drivers/ras/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index f98b50d8251d..65c32b1cea3d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -158,3 +158,4 @@ obj-$(CONFIG_NTB) += ntb/ obj-$(CONFIG_FMC) += fmc/ obj-$(CONFIG_POWERCAP) += powercap/ obj-$(CONFIG_MCB) += mcb/ +obj-$(CONFIG_RAS) += ras/ diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index a34a22841002..206942b8d105 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -370,6 +370,7 @@ config ACPI_EXTLOG tristate "Extended Error Log support" depends on X86_MCE && X86_LOCAL_APIC select UEFI_CPER + select RAS default n help Certain usages such as Predictive Failure Analysis (PFA) require @@ -384,6 +385,7 @@ config ACPI_EXTLOG Enhanced MCA Logging allows firmware to provide additional error information to system software, synchronous with MCE or CMCI. This - driver adds support for that functionality. + driver adds support for that functionality with corresponding + tracepoint which carries that information to userspace. endif # ACPI diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 185334114d71..0ad6f389d922 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -12,10 +12,12 @@ #include <linux/cper.h> #include <linux/ratelimit.h> #include <linux/edac.h> +#include <linux/ras.h> #include <asm/cpu.h> #include <asm/mce.h> #include "apei/apei-internal.h" +#include <ras/ras_event.h> #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ @@ -137,8 +139,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; int bank = mce->bank; int cpu = mce->extcpu; - struct acpi_generic_status *estatus; - int rc; + struct acpi_generic_status *estatus, *tmp; + struct acpi_generic_data *gdata; + const uuid_le *fru_id = &NULL_UUID_LE; + char *fru_text = ""; + uuid_le *sec_type; + static u32 err_seq; estatus = extlog_elog_entry_check(cpu, bank); if (estatus == NULL) @@ -148,8 +154,29 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, /* clear record status to enable BIOS to update it again */ estatus->block_status = 0; - rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); + tmp = (struct acpi_generic_status *)elog_buf; + + if (!ras_userspace_consumers()) { + print_extlog_rcd(NULL, tmp, cpu); + goto out; + } + + /* log event via trace */ + err_seq++; + gdata = (struct acpi_generic_data *)(tmp + 1); + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) + fru_id = (uuid_le *)gdata->fru_id; + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) + fru_text = gdata->fru_text; + sec_type = (uuid_le *)gdata->section_type; + if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { + struct cper_sec_mem_err *mem = (void *)(gdata + 1); + if (gdata->error_data_length >= sizeof(*mem)) + trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, + (u8)gdata->error_severity); + } +out: return NOTIFY_STOP; } @@ -196,19 +223,16 @@ static int __init extlog_init(void) u64 cap; int rc; + rdmsrl(MSR_IA32_MCG_CAP, cap); + + if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) + return -ENODEV; + if (get_edac_report_status() == EDAC_REPORTING_FORCE) { pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); return -EPERM; } - rc = -ENODEV; - rdmsrl(MSR_IA32_MCG_CAP, cap); - if (!(cap & MCG_ELOG_P)) - return rc; - - if (!extlog_get_l1addr()) - return rc; - rc = -EINVAL; /* get L1 header to fetch necessary information */ l1_hdr_size = sizeof(struct extlog_l1_head); diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 878f09005fad..d3c0465ba456 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -72,6 +72,7 @@ config EDAC_MCE_INJ config EDAC_MM_EDAC tristate "Main Memory EDAC (Error Detection And Correction) reporting" + select RAS help Some systems are able to detect and correct errors in main memory. EDAC can report statistics on memory error diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 2c694b5297cc..9f134823fa75 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -33,9 +33,6 @@ #include <asm/edac.h> #include "edac_core.h" #include "edac_module.h" - -#define CREATE_TRACE_POINTS -#define TRACE_INCLUDE_PATH ../../include/ras #include <ras/ras_event.h> /* lock to memory controller's control array */ diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 1491dd4f08f9..437e6fd47311 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -34,6 +34,9 @@ #include <linux/aer.h> #define INDENT_SP " " + +static char rcd_decode_str[CPER_REC_LEN]; + /* * CPER record ID need to be unique even after reboot, because record * ID is used as index for ERST storage, while CPER records from @@ -50,18 +53,19 @@ u64 cper_next_record_id(void) } EXPORT_SYMBOL_GPL(cper_next_record_id); -static const char *cper_severity_strs[] = { +static const char * const severity_strs[] = { "recoverable", "fatal", "corrected", "info", }; -static const char *cper_severity_str(unsigned int severity) +const char *cper_severity_str(unsigned int severity) { - return severity < ARRAY_SIZE(cper_severity_strs) ? - cper_severity_strs[severity] : "unknown"; + return severity < ARRAY_SIZE(severity_strs) ? + severity_strs[severity] : "unknown"; } +EXPORT_SYMBOL_GPL(cper_severity_str); /* * cper_print_bits - print strings for set bits @@ -100,32 +104,32 @@ void cper_print_bits(const char *pfx, unsigned int bits, printk("%s\n", buf); } -static const char * const cper_proc_type_strs[] = { +static const char * const proc_type_strs[] = { "IA32/X64", "IA64", }; -static const char * const cper_proc_isa_strs[] = { +static const char * const proc_isa_strs[] = { "IA32", "IA64", "X64", }; -static const char * const cper_proc_error_type_strs[] = { +static const char * const proc_error_type_strs[] = { "cache error", "TLB error", "bus error", "micro-architectural error", }; -static const char * const cper_proc_op_strs[] = { +static const char * const proc_op_strs[] = { "unknown or generic", "data read", "data write", "instruction execution", }; -static const char * const cper_proc_flag_strs[] = { +static const char * const proc_flag_strs[] = { "restartable", "precise IP", "overflow", @@ -137,26 +141,26 @@ static void cper_print_proc_generic(const char *pfx, { if (proc->validation_bits & CPER_PROC_VALID_TYPE) printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type, - proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ? - cper_proc_type_strs[proc->proc_type] : "unknown"); + proc->proc_type < ARRAY_SIZE(proc_type_strs) ? + proc_type_strs[proc->proc_type] : "unknown"); if (proc->validation_bits & CPER_PROC_VALID_ISA) printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa, - proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ? - cper_proc_isa_strs[proc->proc_isa] : "unknown"); + proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ? + proc_isa_strs[proc->proc_isa] : "unknown"); if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); cper_print_bits(pfx, proc->proc_error_type, - cper_proc_error_type_strs, - ARRAY_SIZE(cper_proc_error_type_strs)); + proc_error_type_strs, + ARRAY_SIZE(proc_error_type_strs)); } if (proc->validation_bits & CPER_PROC_VALID_OPERATION) printk("%s""operation: %d, %s\n", pfx, proc->operation, - proc->operation < ARRAY_SIZE(cper_proc_op_strs) ? - cper_proc_op_strs[proc->operation] : "unknown"); + proc->operation < ARRAY_SIZE(proc_op_strs) ? + proc_op_strs[proc->operation] : "unknown"); if (proc->validation_bits & CPER_PROC_VALID_FLAGS) { printk("%s""flags: 0x%02x\n", pfx, proc->flags); - cper_print_bits(pfx, proc->flags, cper_proc_flag_strs, - ARRAY_SIZE(cper_proc_flag_strs)); + cper_print_bits(pfx, proc->flags, proc_flag_strs, + ARRAY_SIZE(proc_flag_strs)); } if (proc->validation_bits & CPER_PROC_VALID_LEVEL) printk("%s""level: %d\n", pfx, proc->level); @@ -177,7 +181,7 @@ static void cper_print_proc_generic(const char *pfx, printk("%s""IP: 0x%016llx\n", pfx, proc->ip); } -static const char *cper_mem_err_type_strs[] = { +static const char * const mem_err_type_strs[] = { "unknown", "no error", "single-bit ECC", @@ -196,58 +200,136 @@ static const char *cper_mem_err_type_strs[] = { "physical memory map-out event", }; -static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) +const char *cper_mem_err_type_str(unsigned int etype) { - if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) - printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); - if (mem->validation_bits & CPER_MEM_VALID_PA) - printk("%s""physical_address: 0x%016llx\n", - pfx, mem->physical_addr); - if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) - printk("%s""physical_address_mask: 0x%016llx\n", - pfx, mem->physical_addr_mask); + return etype < ARRAY_SIZE(mem_err_type_strs) ? + mem_err_type_strs[etype] : "unknown"; +} +EXPORT_SYMBOL_GPL(cper_mem_err_type_str); + +static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg) +{ + u32 len, n; + + if (!msg) + return 0; + + n = 0; + len = CPER_REC_LEN - 1; if (mem->validation_bits & CPER_MEM_VALID_NODE) - pr_debug("node: %d\n", mem->node); + n += scnprintf(msg + n, len - n, "node: %d ", mem->node); if (mem->validation_bits & CPER_MEM_VALID_CARD) - pr_debug("card: %d\n", mem->card); + n += scnprintf(msg + n, len - n, "card: %d ", mem->card); if (mem->validation_bits & CPER_MEM_VALID_MODULE) - pr_debug("module: %d\n", mem->module); + n += scnprintf(msg + n, len - n, "module: %d ", mem->module); if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) - pr_debug("rank: %d\n", mem->rank); + n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank); if (mem->validation_bits & CPER_MEM_VALID_BANK) - pr_debug("bank: %d\n", mem->bank); + n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank); if (mem->validation_bits & CPER_MEM_VALID_DEVICE) - pr_debug("device: %d\n", mem->device); + n += scnprintf(msg + n, len - n, "device: %d ", mem->device); if (mem->validation_bits & CPER_MEM_VALID_ROW) - pr_debug("row: %d\n", mem->row); + n += scnprintf(msg + n, len - n, "row: %d ", mem->row); if (mem->validation_bits & CPER_MEM_VALID_COLUMN) - pr_debug("column: %d\n", mem->column); + n += scnprintf(msg + n, len - n, "column: %d ", mem->column); if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) - pr_debug("bit_position: %d\n", mem->bit_pos); + n += scnprintf(msg + n, len - n, "bit_position: %d ", + mem->bit_pos); if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) - pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id); + n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ", + mem->requestor_id); if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) - pr_debug("responder_id: 0x%016llx\n", mem->responder_id); + n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ", + mem->responder_id); if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) - pr_debug("target_id: 0x%016llx\n", mem->target_id); + scnprintf(msg + n, len - n, "target_id: 0x%016llx ", + mem->target_id); + + msg[n] = '\0'; + return n; +} + +static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg) +{ + u32 len, n; + const char *bank = NULL, *device = NULL; + + if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) + return 0; + + n = 0; + len = CPER_REC_LEN - 1; + dmi_memdev_name(mem->mem_dev_handle, &bank, &device); + if (bank && device) + n = snprintf(msg, len, "DIMM location: %s %s ", bank, device); + else + n = snprintf(msg, len, + "DIMM location: not present. DMI handle: 0x%.4x ", + mem->mem_dev_handle); + + msg[n] = '\0'; + return n; +} + +void cper_mem_err_pack(const struct cper_sec_mem_err *mem, + struct cper_mem_err_compact *cmem) +{ + cmem->validation_bits = mem->validation_bits; + cmem->node = mem->node; + cmem->card = mem->card; + cmem->module = mem->module; + cmem->bank = mem->bank; + cmem->device = mem->device; + cmem->row = mem->row; + cmem->column = mem->column; + cmem->bit_pos = mem->bit_pos; + cmem->requestor_id = mem->requestor_id; + cmem->responder_id = mem->responder_id; + cmem->target_id = mem->target_id; + cmem->rank = mem->rank; + cmem->mem_array_handle = mem->mem_array_handle; + cmem->mem_dev_handle = mem->mem_dev_handle; +} + +const char *cper_mem_err_unpack(struct trace_seq *p, + struct cper_mem_err_compact *cmem) +{ + const char *ret = p->buffer + p->len; + + if (cper_mem_err_location(cmem, rcd_decode_str)) + trace_seq_printf(p, "%s", rcd_decode_str); + if (cper_dimm_err_location(cmem, rcd_decode_str)) + trace_seq_printf(p, "%s", rcd_decode_str); + trace_seq_putc(p, '\0'); + + return ret; +} + +static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) +{ + struct cper_mem_err_compact cmem; + + if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) + printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); + if (mem->validation_bits & CPER_MEM_VALID_PA) + printk("%s""physical_address: 0x%016llx\n", + pfx, mem->physical_addr); + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) + printk("%s""physical_address_mask: 0x%016llx\n", + pfx, mem->physical_addr_mask); + cper_mem_err_pack(mem, &cmem); + if (cper_mem_err_location(&cmem, rcd_decode_str)) + printk("%s%s\n", pfx, rcd_decode_str); if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { u8 etype = mem->error_type; printk("%s""error_type: %d, %s\n", pfx, etype, - etype < ARRAY_SIZE(cper_mem_err_type_strs) ? - cper_mem_err_type_strs[etype] : "unknown"); - } - if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { - const char *bank = NULL, *device = NULL; - dmi_memdev_name(mem->mem_dev_handle, &bank, &device); - if (bank != NULL && device != NULL) - printk("%s""DIMM location: %s %s", pfx, bank, device); - else - printk("%s""DIMM DMI handle: 0x%.4x", - pfx, mem->mem_dev_handle); + cper_mem_err_type_str(etype)); } + if (cper_dimm_err_location(&cmem, rcd_decode_str)) + printk("%s%s\n", pfx, rcd_decode_str); } -static const char *cper_pcie_port_type_strs[] = { +static const char * const pcie_port_type_strs[] = { "PCIe end point", "legacy PCI end point", "unknown", @@ -266,8 +348,8 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, { if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, - pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? - cper_pcie_port_type_strs[pcie->port_type] : "unknown"); + pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ? + pcie_port_type_strs[pcie->port_type] : "unknown"); if (pcie->validation_bits & CPER_PCIE_VALID_VERSION) printk("%s""version: %d.%d\n", pfx, pcie->version.major, pcie->version.minor); diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig index 50e94e02378a..389440228c1d 100644 --- a/drivers/pci/pcie/aer/Kconfig +++ b/drivers/pci/pcie/aer/Kconfig @@ -5,6 +5,7 @@ config PCIEAER boolean "Root Port Advanced Error Reporting support" depends on PCIEPORTBUS + select RAS default y help This enables PCI Express Root Port Advanced Error Reporting diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 36ed31b52198..35d06e177917 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -22,9 +22,7 @@ #include <linux/cper.h> #include "aerdrv.h" - -#define CREATE_TRACE_POINTS -#include <trace/events/ras.h> +#include <ras/ras_event.h> #define AER_AGENT_RECEIVER 0 #define AER_AGENT_REQUESTER 1 diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig new file mode 100644 index 000000000000..f9da613052c2 --- /dev/null +++ b/drivers/ras/Kconfig @@ -0,0 +1,2 @@ +config RAS + bool diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile new file mode 100644 index 000000000000..d7f73341ced3 --- /dev/null +++ b/drivers/ras/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_RAS) += ras.o debugfs.o diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c new file mode 100644 index 000000000000..0322acf67ea5 --- /dev/null +++ b/drivers/ras/debugfs.c @@ -0,0 +1,56 @@ +#include <linux/debugfs.h> + +static struct dentry *ras_debugfs_dir; + +static atomic_t trace_count = ATOMIC_INIT(0); + +int ras_userspace_consumers(void) +{ + return atomic_read(&trace_count); +} +EXPORT_SYMBOL_GPL(ras_userspace_consumers); + +static int trace_show(struct seq_file *m, void *v) +{ + return atomic_read(&trace_count); +} + +static int trace_open(struct inode *inode, struct file *file) +{ + atomic_inc(&trace_count); + return single_open(file, trace_show, NULL); +} + +static int trace_release(struct inode *inode, struct file *file) +{ + atomic_dec(&trace_count); + return single_release(inode, file); +} + +static const struct file_operations trace_fops = { + .open = trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = trace_release, +}; + +int __init ras_add_daemon_trace(void) +{ + struct dentry *fentry; + + if (!ras_debugfs_dir) + return -ENOENT; + + fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir, + NULL, &trace_fops); + if (!fentry) + return -ENODEV; + + return 0; + +} + +void __init ras_debugfs_init(void) +{ + ras_debugfs_dir = debugfs_create_dir("ras", NULL); +} diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c new file mode 100644 index 000000000000..b67dd362b7b6 --- /dev/null +++ b/drivers/ras/ras.c @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2014 Intel Corporation + * + * Authors: + * Chen, Gong <gong.chen@linux.intel.com> + */ + +#include <linux/init.h> +#include <linux/ras.h> + +#define CREATE_TRACE_POINTS +#define TRACE_INCLUDE_PATH ../../include/ras +#include <ras/ras_event.h> + +static int __init ras_init(void) +{ + int rc = 0; + + ras_debugfs_init(); + rc = ras_add_daemon_trace(); + + return rc; +} +subsys_initcall(ras_init); + +#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) +EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); +#endif +EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); diff --git a/include/linux/cper.h b/include/linux/cper.h index 2fc0ec3d89cc..76abba4b238e 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -22,6 +22,7 @@ #define LINUX_CPER_H #include <linux/uuid.h> +#include <linux/trace_seq.h> /* CPER record signature and the size */ #define CPER_SIG_RECORD "CPER" @@ -36,6 +37,13 @@ #define CPER_RECORD_REV 0x0100 /* + * CPER record length contains the CPER fields which are relevant for further + * handling of a memory error in userspace (we don't carry all the fields + * defined in the UEFI spec because some of them don't make any sense.) + * Currently, a length of 256 should be more than enough. + */ +#define CPER_REC_LEN 256 +/* * Severity difinition for error_severity in struct cper_record_header * and section_severity in struct cper_section_descriptor */ @@ -356,6 +364,24 @@ struct cper_sec_mem_err { __u16 mem_dev_handle; /* module handle in UEFI 2.4 */ }; +struct cper_mem_err_compact { + __u64 validation_bits; + __u16 node; + __u16 card; + __u16 module; + __u16 bank; + __u16 device; + __u16 row; + __u16 column; + __u16 bit_pos; + __u64 requestor_id; + __u64 responder_id; + __u64 target_id; + __u16 rank; + __u16 mem_array_handle; + __u16 mem_dev_handle; +}; + struct cper_sec_pcie { __u64 validation_bits; __u32 port_type; @@ -395,7 +421,13 @@ struct cper_sec_pcie { #pragma pack() u64 cper_next_record_id(void); +const char *cper_severity_str(unsigned int); +const char *cper_mem_err_type_str(unsigned int); void cper_print_bits(const char *prefix, unsigned int bits, const char * const strs[], unsigned int strs_size); +void cper_mem_err_pack(const struct cper_sec_mem_err *, + struct cper_mem_err_compact *); +const char *cper_mem_err_unpack(struct trace_seq *, + struct cper_mem_err_compact *); #endif diff --git a/include/linux/ras.h b/include/linux/ras.h new file mode 100644 index 000000000000..2aceeafd6fe5 --- /dev/null +++ b/include/linux/ras.h @@ -0,0 +1,14 @@ +#ifndef __RAS_H__ +#define __RAS_H__ + +#ifdef CONFIG_DEBUG_FS +int ras_userspace_consumers(void); +void ras_debugfs_init(void); +int ras_add_daemon_trace(void); +#else +static inline int ras_userspace_consumers(void) { return 0; } +static inline void ras_debugfs_init(void) { return; } +static inline int ras_add_daemon_trace(void) { return 0; } +#endif + +#endif diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 21cdb0b7b0fb..47da53c27ffa 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -8,6 +8,71 @@ #include <linux/tracepoint.h> #include <linux/edac.h> #include <linux/ktime.h> +#include <linux/aer.h> +#include <linux/cper.h> + +/* + * MCE Extended Error Log trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event. + */ + +/* memory trace event */ + +#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) +TRACE_EVENT(extlog_mem_event, + TP_PROTO(struct cper_sec_mem_err *mem, + u32 err_seq, + const uuid_le *fru_id, + const char *fru_text, + u8 sev), + + TP_ARGS(mem, err_seq, fru_id, fru_text, sev), + + TP_STRUCT__entry( + __field(u32, err_seq) + __field(u8, etype) + __field(u8, sev) + __field(u64, pa) + __field(u8, pa_mask_lsb) + __field_struct(uuid_le, fru_id) + __string(fru_text, fru_text) + __field_struct(struct cper_mem_err_compact, data) + ), + + TP_fast_assign( + __entry->err_seq = err_seq; + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) + __entry->etype = mem->error_type; + else + __entry->etype = ~0; + __entry->sev = sev; + if (mem->validation_bits & CPER_MEM_VALID_PA) + __entry->pa = mem->physical_addr; + else + __entry->pa = ~0ull; + + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) + __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask); + else + __entry->pa_mask_lsb = ~0; + __entry->fru_id = *fru_id; + __assign_str(fru_text, fru_text); + cper_mem_err_pack(mem, &__entry->data); + ), + + TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s", + __entry->err_seq, + cper_severity_str(__entry->sev), + cper_mem_err_type_str(__entry->etype), + __entry->pa, + __entry->pa_mask_lsb, + cper_mem_err_unpack(p, &__entry->data), + &__entry->fru_id, + __get_str(fru_text)) +); +#endif /* * Hardware Events Report @@ -94,6 +159,69 @@ TRACE_EVENT(mc_event, __get_str(driver_detail)) ); +/* + * PCIe AER Trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event on a PCIe device. The event report has + * the following structure: + * + * char * dev_name - The name of the slot where the device resides + * ([domain:]bus:device.function). + * u32 status - Either the correctable or uncorrectable register + * indicating what error or errors have been seen + * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED + */ + +#define aer_correctable_errors \ + {BIT(0), "Receiver Error"}, \ + {BIT(6), "Bad TLP"}, \ + {BIT(7), "Bad DLLP"}, \ + {BIT(8), "RELAY_NUM Rollover"}, \ + {BIT(12), "Replay Timer Timeout"}, \ + {BIT(13), "Advisory Non-Fatal"} + +#define aer_uncorrectable_errors \ + {BIT(4), "Data Link Protocol"}, \ + {BIT(12), "Poisoned TLP"}, \ + {BIT(13), "Flow Control Protocol"}, \ + {BIT(14), "Completion Timeout"}, \ + {BIT(15), "Completer Abort"}, \ + {BIT(16), "Unexpected Completion"}, \ + {BIT(17), "Receiver Overflow"}, \ + {BIT(18), "Malformed TLP"}, \ + {BIT(19), "ECRC"}, \ + {BIT(20), "Unsupported Request"} + +TRACE_EVENT(aer_event, + TP_PROTO(const char *dev_name, + const u32 status, + const u8 severity), + + TP_ARGS(dev_name, status, severity), + + TP_STRUCT__entry( + __string( dev_name, dev_name ) + __field( u32, status ) + __field( u8, severity ) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + __entry->severity = severity; + ), + + TP_printk("%s PCIe Bus Error: severity=%s, %s\n", + __get_str(dev_name), + __entry->severity == AER_CORRECTABLE ? "Corrected" : + __entry->severity == AER_FATAL ? + "Fatal" : "Uncorrected, non-fatal", + __entry->severity == AER_CORRECTABLE ? + __print_flags(__entry->status, "|", aer_correctable_errors) : + __print_flags(__entry->status, "|", aer_uncorrectable_errors)) +); + #endif /* _TRACE_HW_EVENT_MC_H */ /* This part must be outside protection */ diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h deleted file mode 100644 index 1c875ad1ee5f..000000000000 --- a/include/trace/events/ras.h +++ /dev/null @@ -1,77 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ras - -#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_AER_H - -#include <linux/tracepoint.h> -#include <linux/aer.h> - - -/* - * PCIe AER Trace event - * - * These events are generated when hardware detects a corrected or - * uncorrected event on a PCIe device. The event report has - * the following structure: - * - * char * dev_name - The name of the slot where the device resides - * ([domain:]bus:device.function). - * u32 status - Either the correctable or uncorrectable register - * indicating what error or errors have been seen - * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED - */ - -#define aer_correctable_errors \ - {BIT(0), "Receiver Error"}, \ - {BIT(6), "Bad TLP"}, \ - {BIT(7), "Bad DLLP"}, \ - {BIT(8), "RELAY_NUM Rollover"}, \ - {BIT(12), "Replay Timer Timeout"}, \ - {BIT(13), "Advisory Non-Fatal"} - -#define aer_uncorrectable_errors \ - {BIT(4), "Data Link Protocol"}, \ - {BIT(12), "Poisoned TLP"}, \ - {BIT(13), "Flow Control Protocol"}, \ - {BIT(14), "Completion Timeout"}, \ - {BIT(15), "Completer Abort"}, \ - {BIT(16), "Unexpected Completion"}, \ - {BIT(17), "Receiver Overflow"}, \ - {BIT(18), "Malformed TLP"}, \ - {BIT(19), "ECRC"}, \ - {BIT(20), "Unsupported Request"} - -TRACE_EVENT(aer_event, - TP_PROTO(const char *dev_name, - const u32 status, - const u8 severity), - - TP_ARGS(dev_name, status, severity), - - TP_STRUCT__entry( - __string( dev_name, dev_name ) - __field( u32, status ) - __field( u8, severity ) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name); - __entry->status = status; - __entry->severity = severity; - ), - - TP_printk("%s PCIe Bus Error: severity=%s, %s\n", - __get_str(dev_name), - __entry->severity == AER_CORRECTABLE ? "Corrected" : - __entry->severity == AER_FATAL ? - "Fatal" : "Uncorrected, non-fatal", - __entry->severity == AER_CORRECTABLE ? - __print_flags(__entry->status, "|", aer_correctable_errors) : - __print_flags(__entry->status, "|", aer_uncorrectable_errors)) -); - -#endif /* _TRACE_AER_H */ - -/* This part must be outside protection */ -#include <trace/define_trace.h> diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 0fd06fef9fac..26b4f2e13275 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -44,6 +44,12 @@ #undef __field_ext #define __field_ext(type, item, filter_type) type item; +#undef __field_struct +#define __field_struct(type, item) type item; + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) type item; + #undef __array #define __array(type, item, len) type item[len]; @@ -122,6 +128,12 @@ #undef __field_ext #define __field_ext(type, item, filter_type) +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) @@ -315,9 +327,21 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ if (ret) \ return ret; +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), \ + 0, filter_type); \ + if (ret) \ + return ret; + #undef __field #define __field(type, item) __field_ext(type, item, FILTER_OTHER) +#undef __field_struct +#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) + #undef __array #define __array(type, item, len) \ do { \ @@ -379,6 +403,12 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __field_ext #define __field_ext(type, item, filter_type) +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) @@ -550,6 +580,9 @@ static inline notrace int ftrace_get_offsets_##call( \ #undef __field #define __field(type, item) +#undef __field_struct +#define __field_struct(type, item) + #undef __array #define __array(type, item, len) diff --git a/include/trace/syscall.h b/include/trace/syscall.h index fed853f3d7aa..9674145e2f6a 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -4,6 +4,7 @@ #include <linux/tracepoint.h> #include <linux/unistd.h> #include <linux/ftrace_event.h> +#include <linux/thread_info.h> #include <asm/ptrace.h> @@ -32,4 +33,18 @@ struct syscall_metadata { struct ftrace_event_call *exit_event; }; +#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS) +static inline void syscall_tracepoint_update(struct task_struct *p) +{ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); + else + clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); +} +#else +static inline void syscall_tracepoint_update(struct task_struct *p) +{ +} +#endif + #endif /* _TRACE_SYSCALL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index d2799d1fc952..6a13c46cd87d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1487,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); + syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); + proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 33cbd8c203f8..3490407dc7b7 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -492,33 +492,29 @@ static int sys_tracepoint_refcount; void syscall_regfunc(void) { - unsigned long flags; - struct task_struct *g, *t; + struct task_struct *p, *t; if (!sys_tracepoint_refcount) { - read_lock_irqsave(&tasklist_lock, flags); - do_each_thread(g, t) { - /* Skip kernel threads. */ - if (t->mm) - set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); - } while_each_thread(g, t); - read_unlock_irqrestore(&tasklist_lock, flags); + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { + set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); + } + read_unlock(&tasklist_lock); } sys_tracepoint_refcount++; } void syscall_unregfunc(void) { - unsigned long flags; - struct task_struct *g, *t; + struct task_struct *p, *t; sys_tracepoint_refcount--; if (!sys_tracepoint_refcount) { - read_lock_irqsave(&tasklist_lock, flags); - do_each_thread(g, t) { + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); - } while_each_thread(g, t); - read_unlock_irqrestore(&tasklist_lock, flags); + } + read_unlock(&tasklist_lock); } } #endif diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 6af373236d73..4b0113f73ee9 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -56,7 +56,8 @@ * struct: This defines the way the data will be stored in the ring buffer. * There are currently two types of elements. __field and __array. * a __field is broken up into (type, name). Where type can be any - * type but an array. + * primitive type (integer, long or pointer). __field_struct() can + * be any static complex data value (struct, union, but not an array). * For an array. there are three fields. (type, name, size). The * type of elements in the array, the name of the field and the size * of the array. |