diff options
Diffstat (limited to 'drivers/edac')
36 files changed, 2029 insertions, 864 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 200c04ce5b0e..b3c99bb5fe77 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -462,10 +462,17 @@ config EDAC_ALTERA_SDMMC config EDAC_SIFIVE bool "Sifive platform EDAC driver" - depends on EDAC=y && RISCV + depends on EDAC=y && SIFIVE_L2 help Support for error detection and correction on the SiFive SoCs. +config EDAC_ARMADA_XP + bool "Marvell Armada XP DDR and L2 Cache ECC" + depends on MACH_MVEBU_V7 + help + Support for error correction and detection on the Marvell Aramada XP + DDR RAM and L2 cache controllers. + config EDAC_SYNOPSYS tristate "Synopsys DDR Memory Controller" depends on ARCH_ZYNQ || ARCH_ZYNQMP @@ -484,8 +491,7 @@ config EDAC_TI tristate "Texas Instruments DDR3 ECC Controller" depends on ARCH_KEYSTONE || SOC_DRA7XX help - Support for error detection and correction on the - TI SoCs. + Support for error detection and correction on the TI SoCs. config EDAC_QCOM tristate "QCOM EDAC Controller" @@ -510,4 +516,11 @@ config EDAC_ASPEED First, ECC must be configured in the bootloader. Then, this driver will expose error counters via the EDAC kernel framework. +config EDAC_BLUEFIELD + tristate "Mellanox BlueField Memory ECC" + depends on ARM64 && ((MELLANOX_PLATFORM && ACPI) || COMPILE_TEST) + help + Support for error detection and correction on the + Mellanox BlueField SoCs. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 165ca65e1a3a..d77200c9680b 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -80,8 +80,10 @@ obj-$(CONFIG_EDAC_THUNDERX) += thunderx_edac.o obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o obj-$(CONFIG_EDAC_SIFIVE) += sifive_edac.o +obj-$(CONFIG_EDAC_ARMADA_XP) += armada_xp_edac.o obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o obj-$(CONFIG_EDAC_TI) += ti_edac.o obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o +obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index c2e693e34d43..e91cf1147a4e 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/irqchip/chained_irq.h> #include <linux/kernel.h> +#include <linux/mfd/altera-sysmgr.h> #include <linux/mfd/syscon.h> #include <linux/notifier.h> #include <linux/of_address.h> @@ -222,7 +223,6 @@ static unsigned long get_total_mem(void) static const struct of_device_id altr_sdram_ctrl_of_match[] = { { .compatible = "altr,sdram-edac", .data = &c5_data}, { .compatible = "altr,sdram-edac-a10", .data = &a10_data}, - { .compatible = "altr,sdram-edac-s10", .data = &a10_data}, {}, }; MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); @@ -276,7 +276,6 @@ release: return ret; } -static int socfpga_is_a10(void); static int altr_sdram_probe(struct platform_device *pdev) { const struct of_device_id *id; @@ -400,7 +399,7 @@ static int altr_sdram_probe(struct platform_device *pdev) goto err; /* Only the Arria10 has separate IRQs */ - if (socfpga_is_a10()) { + if (of_machine_is_compatible("altr,socfpga-arria10")) { /* Arria10 specific initialization */ res = a10_init(mc_vbase); if (res < 0) @@ -503,68 +502,6 @@ module_platform_driver(altr_sdram_edac_driver); #endif /* CONFIG_EDAC_ALTERA_SDRAM */ -/**************** Stratix 10 EDAC Memory Controller Functions ************/ - -/** - * s10_protected_reg_write - * Write to a protected SMC register. - * @context: Not used. - * @reg: Address of register - * @value: Value to write - * Return: INTEL_SIP_SMC_STATUS_OK (0) on success - * INTEL_SIP_SMC_REG_ERROR on error - * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported - */ -static int s10_protected_reg_write(void *context, unsigned int reg, - unsigned int val) -{ - struct arm_smccc_res result; - unsigned long offset = (unsigned long)context; - - arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, offset + reg, val, 0, 0, - 0, 0, 0, &result); - - return (int)result.a0; -} - -/** - * s10_protected_reg_read - * Read the status of a protected SMC register - * @context: Not used. - * @reg: Address of register - * @value: Value read. - * Return: INTEL_SIP_SMC_STATUS_OK (0) on success - * INTEL_SIP_SMC_REG_ERROR on error - * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported - */ -static int s10_protected_reg_read(void *context, unsigned int reg, - unsigned int *val) -{ - struct arm_smccc_res result; - unsigned long offset = (unsigned long)context; - - arm_smccc_smc(INTEL_SIP_SMC_REG_READ, offset + reg, 0, 0, 0, - 0, 0, 0, &result); - - *val = (unsigned int)result.a1; - - return (int)result.a0; -} - -static const struct regmap_config s10_sdram_regmap_cfg = { - .name = "s10_ddr", - .reg_bits = 32, - .reg_stride = 4, - .val_bits = 32, - .max_register = 0xffd12228, - .reg_read = s10_protected_reg_read, - .reg_write = s10_protected_reg_write, - .use_single_read = true, - .use_single_write = true, -}; - -/************** </Stratix10 EDAC Memory Controller Functions> ***********/ - /************************* EDAC Parent Probe *************************/ static const struct of_device_id altr_edac_device_of_match[]; @@ -1009,16 +946,6 @@ static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port) return ret; } -static int socfpga_is_a10(void) -{ - return of_machine_is_compatible("altr,socfpga-arria10"); -} - -static int socfpga_is_s10(void) -{ - return of_machine_is_compatible("altr,socfpga-stratix10"); -} - static __init int __maybe_unused altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, u32 ecc_ctrl_en_mask, bool dual_port) @@ -1034,34 +961,10 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, /* Get the ECC Manager - parent of the device EDACs */ np_eccmgr = of_get_parent(np); - if (socfpga_is_a10()) { - ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr, - "altr,sysmgr-syscon"); - } else { - struct device_node *sysmgr_np; - struct resource res; - uintptr_t base; - - sysmgr_np = of_parse_phandle(np_eccmgr, - "altr,sysmgr-syscon", 0); - if (!sysmgr_np) { - edac_printk(KERN_ERR, EDAC_DEVICE, - "Unable to find altr,sysmgr-syscon\n"); - return -ENODEV; - } - - if (of_address_to_resource(sysmgr_np, 0, &res)) { - of_node_put(sysmgr_np); - return -ENOMEM; - } - - /* Need physical address for SMCC call */ - base = res.start; + ecc_mgr_map = + altr_sysmgr_regmap_lookup_by_phandle(np_eccmgr, + "altr,sysmgr-syscon"); - ecc_mgr_map = regmap_init(NULL, NULL, (void *)base, - &s10_sdram_regmap_cfg); - of_node_put(sysmgr_np); - } of_node_put(np_eccmgr); if (IS_ERR(ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, @@ -1126,9 +1029,6 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) int irq; struct device_node *child, *np; - if (!socfpga_is_a10() && !socfpga_is_s10()) - return -ENODEV; - np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-ecc-manager"); if (!np) { @@ -1170,6 +1070,24 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) return 0; } +/*********************** SDRAM EDAC Device Functions *********************/ + +#ifdef CONFIG_EDAC_ALTERA_SDRAM + +static const struct edac_device_prv_data s10_sdramecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = ALTR_S10_ECC_SERRPENA, + .ue_clear_mask = ALTR_S10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_S10_ECC_EN, + .ecc_en_ofst = ALTR_S10_ECC_CTRL_SDRAM_OFST, + .ce_set_mask = ALTR_S10_ECC_TSERRA, + .ue_set_mask = ALTR_S10_ECC_TDERRA, + .set_err_ofst = ALTR_S10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; +#endif /* CONFIG_EDAC_ALTERA_SDRAM */ + /*********************** OCRAM EDAC Device Functions *********************/ #ifdef CONFIG_EDAC_ALTERA_OCRAM @@ -1759,6 +1677,9 @@ static const struct of_device_id altr_edac_a10_device_of_match[] = { #ifdef CONFIG_EDAC_ALTERA_SDMMC { .compatible = "altr,socfpga-sdmmc-ecc", .data = &a10_sdmmcecca_data }, #endif +#ifdef CONFIG_EDAC_ALTERA_SDRAM + { .compatible = "altr,sdram-edac-s10", .data = &s10_sdramecc_data }, +#endif {}, }; MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match); @@ -1866,6 +1787,7 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc) struct altr_arria10_edac *edac = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); int irq = irq_desc_get_irq(desc); + unsigned long bits; dberr = (irq == edac->db_irq) ? 1 : 0; sm_offset = dberr ? A10_SYSMGR_ECC_INTSTAT_DERR_OFST : @@ -1875,7 +1797,8 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc) regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status); - for_each_set_bit(bit, (unsigned long *)&irq_status, 32) { + bits = irq_status; + for_each_set_bit(bit, &bits, 32) { irq = irq_linear_revmap(edac->domain, dberr * 32 + bit); if (irq) generic_handle_irq(irq); @@ -1889,6 +1812,10 @@ static int validate_parent_available(struct device_node *np) struct device_node *parent; int ret = 0; + /* SDRAM must be present for Linux (implied parent) */ + if (of_device_is_compatible(np, "altr,sdram-edac-s10")) + return 0; + /* Ensure parent device is enabled if parent node exists */ parent = of_parse_phandle(np, "altr,ecc-parent", 0); if (parent && !of_device_is_available(parent)) @@ -1898,6 +1825,22 @@ static int validate_parent_available(struct device_node *np) return ret; } +static int get_s10_sdram_edac_resource(struct device_node *np, + struct resource *res) +{ + struct device_node *parent; + int ret; + + parent = of_parse_phandle(np, "altr,sdr-syscon", 0); + if (!parent) + return -ENODEV; + + ret = of_address_to_resource(parent, 0, res); + of_node_put(parent); + + return ret; +} + static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, struct device_node *np) { @@ -1925,7 +1868,11 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL)) return -ENOMEM; - rc = of_address_to_resource(np, 0, &res); + if (of_device_is_compatible(np, "altr,sdram-edac-s10")) + rc = get_s10_sdram_edac_resource(np, &res); + else + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { edac_printk(KERN_ERR, EDAC_DEVICE, "%s: no resource address\n", ecc_name); @@ -2132,33 +2079,9 @@ static int altr_edac_a10_probe(struct platform_device *pdev) platform_set_drvdata(pdev, edac); INIT_LIST_HEAD(&edac->a10_ecc_devices); - if (socfpga_is_a10()) { - edac->ecc_mgr_map = - syscon_regmap_lookup_by_phandle(pdev->dev.of_node, - "altr,sysmgr-syscon"); - } else { - struct device_node *sysmgr_np; - struct resource res; - uintptr_t base; - - sysmgr_np = of_parse_phandle(pdev->dev.of_node, - "altr,sysmgr-syscon", 0); - if (!sysmgr_np) { - edac_printk(KERN_ERR, EDAC_DEVICE, - "Unable to find altr,sysmgr-syscon\n"); - return -ENODEV; - } - - if (of_address_to_resource(sysmgr_np, 0, &res)) - return -ENOMEM; - - /* Need physical address for SMCC call */ - base = res.start; - - edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL, - (void *)base, - &s10_sdram_regmap_cfg); - } + edac->ecc_mgr_map = + altr_sysmgr_regmap_lookup_by_phandle(pdev->dev.of_node, + "altr,sysmgr-syscon"); if (IS_ERR(edac->ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, @@ -2224,20 +2147,11 @@ static int altr_edac_a10_probe(struct platform_device *pdev) if (!of_device_is_available(child)) continue; - if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc") || - of_device_is_compatible(child, "altr,socfpga-a10-ocram-ecc") || - of_device_is_compatible(child, "altr,socfpga-eth-mac-ecc") || - of_device_is_compatible(child, "altr,socfpga-nand-ecc") || - of_device_is_compatible(child, "altr,socfpga-dma-ecc") || - of_device_is_compatible(child, "altr,socfpga-usb-ecc") || - of_device_is_compatible(child, "altr,socfpga-qspi-ecc") || - of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc")) - + if (of_match_node(altr_edac_a10_device_of_match, child)) altr_edac_a10_device_add(edac, child); #ifdef CONFIG_EDAC_ALTERA_SDRAM - else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) || - (of_device_is_compatible(child, "altr,sdram-edac-s10"))) + else if (of_device_is_compatible(child, "altr,sdram-edac-a10")) of_platform_populate(pdev->dev.of_node, altr_sdram_ctrl_of_match, NULL, &pdev->dev); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 55654cc4bcdf..3727e72c8c2e 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -289,6 +289,29 @@ struct altr_sdram_mc_data { #define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 /************* Stratix10 Defines **************/ +#define ALTR_S10_ECC_CTRL_SDRAM_OFST 0x00 +#define ALTR_S10_ECC_EN BIT(0) + +#define ALTR_S10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_S10_ECC_ERRINTENS_OFST 0x14 +#define ALTR_S10_ECC_ERRINTENR_OFST 0x18 +#define ALTR_S10_ECC_SERRINTEN BIT(0) + +#define ALTR_S10_ECC_INTMODE_OFST 0x1C +#define ALTR_S10_ECC_INTMODE BIT(0) + +#define ALTR_S10_ECC_INTSTAT_OFST 0x20 +#define ALTR_S10_ECC_SERRPENA BIT(0) +#define ALTR_S10_ECC_DERRPENA BIT(8) +#define ALTR_S10_ECC_ERRPENA_MASK (ALTR_S10_ECC_SERRPENA | \ + ALTR_S10_ECC_DERRPENA) + +#define ALTR_S10_ECC_INTTEST_OFST 0x24 +#define ALTR_S10_ECC_TSERRA BIT(0) +#define ALTR_S10_ECC_TDERRA BIT(8) +#define ALTR_S10_ECC_TSERRB BIT(16) +#define ALTR_S10_ECC_TDERRB BIT(24) + #define ALTR_S10_DERR_ADDRA_OFST 0x2C /* Stratix10 ECC Manager Defines */ @@ -300,7 +323,7 @@ struct altr_sdram_mc_data { #define S10_SYSMGR_UE_ADDR_OFST 0x224 #define S10_DDR0_IRQ_MASK BIT(16) -#define S10_DBE_IRQ_MASK 0x3FE +#define S10_DBE_IRQ_MASK 0x3FFFE /* Define ECC Block Offsets for peripherals */ #define ECC_BLK_ADDRESS_OFST 0x40 diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 873437be86d9..9fbad908a854 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -16,12 +16,11 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; +static struct amd64_family_type *fam_type; + /* Per-node stuff */ static struct ecc_settings **ecc_stngs; -/* Number of Unified Memory Controllers */ -static u8 num_umcs; - /* * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- @@ -215,7 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) scrubval = scrubrates[i].scrubval; - if (pvt->fam == 0x17 || pvt->fam == 0x18) { + if (pvt->umc) { __f17h_set_scrubval(pvt, scrubval); } else if (pvt->fam == 0x15 && pvt->model == 0x60) { f15h_select_dct(pvt, 0); @@ -257,18 +256,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci) int i, retval = -EINVAL; u32 scrubval = 0; - switch (pvt->fam) { - case 0x15: - /* Erratum #505 */ - if (pvt->model < 0x10) - f15h_select_dct(pvt, 0); - - if (pvt->model == 0x60) - amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); - break; - - case 0x17: - case 0x18: + if (pvt->umc) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); if (scrubval & BIT(0)) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); @@ -277,11 +265,15 @@ static int get_scrub_rate(struct mem_ctl_info *mci) } else { scrubval = 0; } - break; + } else if (pvt->fam == 0x15) { + /* Erratum #505 */ + if (pvt->model < 0x10) + f15h_select_dct(pvt, 0); - default: + if (pvt->model == 0x60) + amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); + } else { amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); - break; } scrubval = scrubval & 0x001F; @@ -454,7 +446,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, for (i = 0; i < pvt->csels[dct].m_cnt; i++) #define for_each_umc(i) \ - for (i = 0; i < num_umcs; i++) + for (i = 0; i < fam_type->max_mcs; i++) /* * @input_addr is an InputAddr associated with the node given by mci. Return the @@ -788,51 +780,45 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) (dclr & BIT(15)) ? "yes" : "no"); } -/* - * The Address Mask should be a contiguous set of bits in the non-interleaved - * case. So to check for CS interleaving, find the most- and least-significant - * bits of the mask, generate a contiguous bitmask, and compare the two. - */ -static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs) +#define CS_EVEN_PRIMARY BIT(0) +#define CS_ODD_PRIMARY BIT(1) +#define CS_EVEN_SECONDARY BIT(2) +#define CS_ODD_SECONDARY BIT(3) + +#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) +#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) + +static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { - u32 mask = pvt->csels[ctrl].csmasks[cs >> 1]; - u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1; - u32 test_mask = GENMASK(msb, lsb); + int cs_mode = 0; + + if (csrow_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_PRIMARY; + + if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_PRIMARY; - edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask); + /* Asymmetric dual-rank DIMM support. */ + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_SECONDARY; - return mask ^ test_mask; + return cs_mode; } static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) { - int dimm, size0, size1, cs0, cs1; + int dimm, size0, size1, cs0, cs1, cs_mode; edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); - for (dimm = 0; dimm < 4; dimm++) { - size0 = 0; + for (dimm = 0; dimm < 2; dimm++) { cs0 = dimm * 2; - - if (csrow_enabled(cs0, ctrl, pvt)) - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0); - - size1 = 0; cs1 = dimm * 2 + 1; - if (csrow_enabled(cs1, ctrl, pvt)) { - /* - * CS interleaving is only supported if both CSes have - * the same amount of memory. Because they are - * interleaved, it will look like both CSes have the - * full amount of memory. Save the size for both as - * half the amount we found on CS0, if interleaved. - */ - if (f17_cs_interleaved(pvt, ctrl, cs1)) - size1 = size0 = (size0 >> 1); - else - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); - } + cs_mode = f17_get_cs_mode(dimm, ctrl, pvt); + + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", cs0, size0, @@ -942,89 +928,119 @@ static void prep_chip_selects(struct amd64_pvt *pvt) } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; + } else if (pvt->fam >= 0x17) { + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 4; + pvt->csels[umc].m_cnt = 2; + } + } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; } } +static void read_umc_base_mask(struct amd64_pvt *pvt) +{ + u32 umc_base_reg, umc_base_reg_sec; + u32 umc_mask_reg, umc_mask_reg_sec; + u32 base_reg, base_reg_sec; + u32 mask_reg, mask_reg_sec; + u32 *base, *base_sec; + u32 *mask, *mask_sec; + int cs, umc; + + for_each_umc(umc) { + umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; + umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC; + + for_each_chip_select(cs, umc, pvt) { + base = &pvt->csels[umc].csbases[cs]; + base_sec = &pvt->csels[umc].csbases_sec[cs]; + + base_reg = umc_base_reg + (cs * 4); + base_reg_sec = umc_base_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + + if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec)) + edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base_sec, base_reg_sec); + } + + umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; + umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; + + for_each_chip_select_mask(cs, umc, pvt) { + mask = &pvt->csels[umc].csmasks[cs]; + mask_sec = &pvt->csels[umc].csmasks_sec[cs]; + + mask_reg = umc_mask_reg + (cs * 4); + mask_reg_sec = umc_mask_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec)) + edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask_sec, mask_reg_sec); + } + } +} + /* * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers */ static void read_dct_base_mask(struct amd64_pvt *pvt) { - int base_reg0, base_reg1, mask_reg0, mask_reg1, cs; + int cs; prep_chip_selects(pvt); - if (pvt->umc) { - base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR; - base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR; - mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK; - mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK; - } else { - base_reg0 = DCSB0; - base_reg1 = DCSB1; - mask_reg0 = DCSM0; - mask_reg1 = DCSM1; - } + if (pvt->umc) + return read_umc_base_mask(pvt); for_each_chip_select(cs, 0, pvt) { - int reg0 = base_reg0 + (cs * 4); - int reg1 = base_reg1 + (cs * 4); + int reg0 = DCSB0 + (cs * 4); + int reg1 = DCSB1 + (cs * 4); u32 *base0 = &pvt->csels[0].csbases[cs]; u32 *base1 = &pvt->csels[1].csbases[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n", - cs, *base0, reg0); - - if (!amd_smn_read(pvt->mc_node_id, reg1, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n", - cs, *base1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", - cs, *base0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) + edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", + cs, *base0, reg0); - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", - cs, *base1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) + edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", + cs, *base1, (pvt->fam == 0x10) ? reg1 + : reg0); } for_each_chip_select_mask(cs, 0, pvt) { - int reg0 = mask_reg0 + (cs * 4); - int reg1 = mask_reg1 + (cs * 4); + int reg0 = DCSM0 + (cs * 4); + int reg1 = DCSM1 + (cs * 4); u32 *mask0 = &pvt->csels[0].csmasks[cs]; u32 *mask1 = &pvt->csels[1].csmasks[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n", - cs, *mask0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) + edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", + cs, *mask0, reg0); - if (!amd_smn_read(pvt->mc_node_id, reg1, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n", - cs, *mask1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", - cs, *mask0, reg0); - - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", - cs, *mask1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) + edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", + cs, *mask1, (pvt->fam == 0x10) ? reg1 + : reg0); } } @@ -1032,6 +1048,16 @@ static void determine_memory_type(struct amd64_pvt *pvt) { u32 dram_ctrl, dcsm; + if (pvt->umc) { + if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) + pvt->dram_type = MEM_LRDDR4; + else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) + pvt->dram_type = MEM_RDDR4; + else + pvt->dram_type = MEM_DDR4; + return; + } + switch (pvt->fam) { case 0xf: if (pvt->ext_model >= K8_REV_F) @@ -1077,16 +1103,6 @@ static void determine_memory_type(struct amd64_pvt *pvt) case 0x16: goto ddr3; - case 0x17: - case 0x18: - if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) - pvt->dram_type = MEM_LRDDR4; - else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) - pvt->dram_type = MEM_RDDR4; - else - pvt->dram_type = MEM_DDR4; - return; - default: WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); pvt->dram_type = MEM_EMPTY; @@ -1556,18 +1572,58 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr3_cs_size(cs_mode, false); } -static int f17_base_addr_to_cs_size(struct amd64_pvt *pvt, u8 umc, +static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { - u32 base_addr = pvt->csels[umc].csbases[csrow_nr]; + u32 addr_mask_orig, addr_mask_deinterleaved; + u32 msb, weight, num_zero_bits; + int dimm, size = 0; + + /* No Chip Selects are enabled. */ + if (!cs_mode) + return size; + + /* Requested size of an even CS but none are enabled. */ + if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) + return size; - /* Each mask is used for every two base addresses. */ - u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr >> 1]; + /* Requested size of an odd CS but none are enabled. */ + if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) + return size; - /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - u32 size = ((addr_mask >> 1) - (base_addr >> 1) + 1) >> 1; + /* + * There is one mask per DIMM, and two Chip Selects per DIMM. + * CS0 and CS1 -> DIMM0 + * CS2 and CS3 -> DIMM1 + */ + dimm = csrow_nr >> 1; - edac_dbg(1, "BaseAddr: 0x%x, AddrMask: 0x%x\n", base_addr, addr_mask); + /* Asymmetric dual-rank DIMM support. */ + if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) + addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; + else + addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + + /* + * The number of zero bits in the mask is equal to the number of bits + * in a full mask minus the number of bits in the current mask. + * + * The MSB is the number of bits in the full mask because BIT[0] is + * always 0. + */ + msb = fls(addr_mask_orig) - 1; + weight = hweight_long(addr_mask_orig); + num_zero_bits = msb - weight; + + /* Take the number of zero bits off from the top of the mask. */ + addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); + + edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); + edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + + /* Register [31:1] = Address [39:9]. Size is in kBs here. */ + size = (addr_mask_deinterleaved >> 2) + 1; /* Return size in MBs. */ return size >> 10; @@ -2160,6 +2216,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "K8", .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, + .max_mcs = 2, .ops = { .early_channel_count = k8_early_channel_count, .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, @@ -2170,6 +2227,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F10h", .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2180,6 +2238,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h", .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2190,6 +2249,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h_M30h", .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2200,6 +2260,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h_M60h", .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2210,6 +2271,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F16h", .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2220,6 +2282,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F16h_M30h", .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2230,27 +2293,50 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h", .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, [F17_M10H_CPUS] = { .ctl_name = "F17h_M10h", .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, [F17_M30H_CPUS] = { .ctl_name = "F17h_M30h", .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, + .max_mcs = 8, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, + [F17_M70H_CPUS] = { + .ctl_name = "F17h_M70h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, + .max_mcs = 2, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, + [F19_CPUS] = { + .ctl_name = "F19h", + .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6, + .max_mcs = 8, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, }; @@ -2537,13 +2623,6 @@ static void decode_umc_error(int node_id, struct mce *m) err.channel = find_umc_channel(m); - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { - err.err_code = ERR_NORM_ADDR; - goto log_error; - } - - error_address_to_page_and_offset(sys_addr, &err); - if (!(m->status & MCI_STATUS_SYNDV)) { err.err_code = ERR_SYND; goto log_error; @@ -2560,6 +2639,13 @@ static void decode_umc_error(int node_id, struct mce *m) err.csrow = m->synd & 0x7; + if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { + err.err_code = ERR_NORM_ADDR; + goto log_error; + } + + error_address_to_page_and_offset(sys_addr, &err); + log_error: __log_ecc_error(mci, &err, ecc_type); } @@ -2765,8 +2851,6 @@ skip: edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); determine_ecc_sym_sz(pvt); - - dump_misc_regs(pvt); } /* @@ -2809,10 +2893,12 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) int csrow_nr = csrow_nr_orig; u32 cs_mode, nr_pages; - if (!pvt->umc) + if (!pvt->umc) { csrow_nr >>= 1; - - cs_mode = DBAM_DIMM(csrow_nr, dbam); + cs_mode = DBAM_DIMM(csrow_nr, dbam); + } else { + cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt); + } nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); nr_pages <<= 20 - PAGE_SHIFT; @@ -2824,6 +2910,50 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) return nr_pages; } +static int init_csrows_df(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + enum edac_type edac_mode = EDAC_NONE; + enum dev_type dev_type = DEV_UNKNOWN; + struct dimm_info *dimm; + int empty = 1; + u8 umc, cs; + + if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) { + edac_mode = EDAC_S16ECD16ED; + dev_type = DEV_X16; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S8ECD8ED) { + edac_mode = EDAC_S8ECD8ED; + dev_type = DEV_X8; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) { + edac_mode = EDAC_S4ECD4ED; + dev_type = DEV_X4; + } else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) { + edac_mode = EDAC_SECDED; + } + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + if (!csrow_enabled(cs, umc, pvt)) + continue; + + empty = 0; + dimm = mci->csrows[cs]->channels[umc]->dimm; + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, cs); + + dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); + dimm->mtype = pvt->dram_type; + dimm->edac_mode = edac_mode; + dimm->dtype = dev_type; + dimm->grain = 64; + } + } + + return empty; +} + /* * Initialize the array of csrow attribute instances, based on the values * from pci config hardware registers. @@ -2838,15 +2968,16 @@ static int init_csrows(struct mem_ctl_info *mci) int nr_pages = 0; u32 val; - if (!pvt->umc) { - amd64_read_pci_cfg(pvt->F3, NBCFG, &val); + if (pvt->umc) + return init_csrows_df(mci); - pvt->nbcfg = val; + amd64_read_pci_cfg(pvt->F3, NBCFG, &val); - edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", - pvt->mc_node_id, val, - !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); - } + pvt->nbcfg = val; + + edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", + pvt->mc_node_id, val, + !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); /* * We iterate over DCT0 here but we look at DCT1 in parallel, if needed. @@ -2883,13 +3014,7 @@ static int init_csrows(struct mem_ctl_info *mci) edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); /* Determine DIMM ECC mode: */ - if (pvt->umc) { - if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) - edac_mode = EDAC_S4ECD4ED; - else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) - edac_mode = EDAC_SECDED; - - } else if (pvt->nbcfg & NBCFG_ECC_ENABLE) { + if (pvt->nbcfg & NBCFG_ECC_ENABLE) { edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ? EDAC_S4ECD4ED : EDAC_SECDED; @@ -2899,6 +3024,7 @@ static int init_csrows(struct mem_ctl_info *mci) dimm = csrow->channels[j]->dimm; dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; + dimm->grain = 64; } } @@ -3065,43 +3191,27 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, amd64_warn("Error restoring NB MCGCTL settings!\n"); } -/* - * EDAC requires that the BIOS have ECC enabled before - * taking over the processing of ECC errors. A command line - * option allows to force-enable hardware ECC later in - * enable_ecc_error_reporting(). - */ -static const char *ecc_msg = - "ECC disabled in the BIOS or no ECC capability, module will not load.\n" - " Either enable ECC checking or force module loading by setting " - "'ecc_enable_override'.\n" - " (Note that use of the override may cause unknown side effects.)\n"; - -static bool ecc_enabled(struct pci_dev *F3, u16 nid) +static bool ecc_enabled(struct amd64_pvt *pvt) { + u16 nid = pvt->mc_node_id; bool nb_mce_en = false; u8 ecc_en = 0, i; u32 value; if (boot_cpu_data.x86 >= 0x17) { u8 umc_en_mask = 0, ecc_en_mask = 0; + struct amd64_umc *umc; for_each_umc(i) { - u32 base = get_umc_base(i); + umc = &pvt->umc[i]; /* Only check enabled UMCs. */ - if (amd_smn_read(nid, base + UMCCH_SDP_CTRL, &value)) - continue; - - if (!(value & UMC_SDP_INIT)) + if (!(umc->sdp_ctrl & UMC_SDP_INIT)) continue; umc_en_mask |= BIT(i); - if (amd_smn_read(nid, base + UMCCH_UMC_CAP_HI, &value)) - continue; - - if (value & UMC_ECC_ENABLED) + if (umc->umc_cap_hi & UMC_ECC_ENABLED) ecc_en_mask |= BIT(i); } @@ -3114,7 +3224,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) /* Assume UMC MCA banks are enabled. */ nb_mce_en = true; } else { - amd64_read_pci_cfg(F3, NBCFG, &value); + amd64_read_pci_cfg(pvt->F3, NBCFG, &value); ecc_en = !!(value & NBCFG_ECC_ENABLE); @@ -3127,22 +3237,24 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) amd64_info("Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); - if (!ecc_en || !nb_mce_en) { - amd64_info("%s", ecc_msg); + if (!ecc_en || !nb_mce_en) return false; - } - return true; + else + return true; } static inline void f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { - u8 i, ecc_en = 1, cpk_en = 1; + u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1; for_each_umc(i) { if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED); cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP); + + dev_x4 &= !!(pvt->umc[i].dimm_cfg & BIT(6)); + dev_x16 &= !!(pvt->umc[i].dimm_cfg & BIT(7)); } } @@ -3150,13 +3262,19 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) if (ecc_en) { mci->edac_ctl_cap |= EDAC_FLAG_SECDED; - if (cpk_en) + if (!cpk_en) + return; + + if (dev_x4) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; + else if (dev_x16) + mci->edac_ctl_cap |= EDAC_FLAG_S16ECD16ED; + else + mci->edac_ctl_cap |= EDAC_FLAG_S8ECD8ED; } } -static void setup_mci_misc_attrs(struct mem_ctl_info *mci, - struct amd64_family_type *fam) +static void setup_mci_misc_attrs(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; @@ -3175,7 +3293,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, mci->edac_cap = determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = fam->ctl_name; + mci->ctl_name = fam_type->ctl_name; mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; @@ -3189,8 +3307,6 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, */ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) { - struct amd64_family_type *fam_type = NULL; - pvt->ext_model = boot_cpu_data.x86_model >> 4; pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; @@ -3241,6 +3357,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F17_M30H_CPUS]; pvt->ops = &family_types[F17_M30H_CPUS].ops; break; + } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) { + fam_type = &family_types[F17_M70H_CPUS]; + pvt->ops = &family_types[F17_M70H_CPUS].ops; + break; } /* fall through */ case 0x18: @@ -3251,6 +3371,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) family_types[F17_CPUS].ctl_name = "F18h"; break; + case 0x19: + fam_type = &family_types[F19_CPUS]; + pvt->ops = &family_types[F19_CPUS].ops; + family_types[F19_CPUS].ctl_name = "F19h"; + break; + default: amd64_err("Unsupported family!\n"); return NULL; @@ -3274,51 +3400,15 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; -/* Set the number of Unified Memory Controllers in the system. */ -static void compute_num_umcs(void) +static int hw_info_get(struct amd64_pvt *pvt) { - u8 model = boot_cpu_data.x86_model; - - if (boot_cpu_data.x86 < 0x17) - return; - - if (model >= 0x30 && model <= 0x3f) - num_umcs = 8; - else - num_umcs = 2; - - edac_dbg(1, "Number of UMCs: %x", num_umcs); -} - -static int init_one_instance(unsigned int nid) -{ - struct pci_dev *F3 = node_to_amd_nb(nid)->misc; - struct amd64_family_type *fam_type = NULL; - struct mem_ctl_info *mci = NULL; - struct edac_mc_layer layers[2]; - struct amd64_pvt *pvt = NULL; u16 pci_id1, pci_id2; - int err = 0, ret; - - ret = -ENOMEM; - pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); - if (!pvt) - goto err_ret; - - pvt->mc_node_id = nid; - pvt->F3 = F3; - - ret = -EINVAL; - fam_type = per_family_init(pvt); - if (!fam_type) - goto err_free; + int ret = -EINVAL; if (pvt->fam >= 0x17) { - pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL); - if (!pvt->umc) { - ret = -ENOMEM; - goto err_free; - } + pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); + if (!pvt->umc) + return -ENOMEM; pci_id1 = fam_type->f0_id; pci_id2 = fam_type->f6_id; @@ -3327,21 +3417,37 @@ static int init_one_instance(unsigned int nid) pci_id2 = fam_type->f2_id; } - err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); - if (err) - goto err_post_init; + ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); + if (ret) + return ret; read_mc_regs(pvt); + return 0; +} + +static void hw_info_put(struct amd64_pvt *pvt) +{ + if (pvt->F0 || pvt->F1) + free_mc_sibling_devs(pvt); + + kfree(pvt->umc); +} + +static int init_one_instance(struct amd64_pvt *pvt) +{ + struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; + int ret = -EINVAL; + /* * We need to determine how many memory channels there are. Then use * that information for calculating the size of the dynamic instance * tables in the 'mci' structure. */ - ret = -EINVAL; pvt->channel_count = pvt->ops->early_channel_count(pvt); if (pvt->channel_count < 0) - goto err_siblings; + return ret; ret = -ENOMEM; layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; @@ -3353,24 +3459,18 @@ static int init_one_instance(unsigned int nid) * Always allocate two channels since we can have setups with DIMMs on * only one channel. Also, this simplifies handling later for the price * of a couple of KBs tops. - * - * On Fam17h+, the number of controllers may be greater than two. So set - * the size equal to the maximum number of UMCs. */ - if (pvt->fam >= 0x17) - layers[1].size = num_umcs; - else - layers[1].size = 2; + layers[1].size = fam_type->max_mcs; layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); + mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); if (!mci) - goto err_siblings; + return ret; mci->pvt_info = pvt; mci->pdev = &pvt->F3->dev; - setup_mci_misc_attrs(mci, fam_type); + setup_mci_misc_attrs(mci); if (init_csrows(mci)) mci->edac_cap = EDAC_FLAG_NONE; @@ -3378,31 +3478,30 @@ static int init_one_instance(unsigned int nid) ret = -ENODEV; if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) { edac_dbg(1, "failed edac_mc_add_mc()\n"); - goto err_add_mc; + edac_mc_free(mci); + return ret; } return 0; +} -err_add_mc: - edac_mc_free(mci); - -err_siblings: - free_mc_sibling_devs(pvt); - -err_post_init: - if (pvt->fam >= 0x17) - kfree(pvt->umc); +static bool instance_has_memory(struct amd64_pvt *pvt) +{ + bool cs_enabled = false; + int cs = 0, dct = 0; -err_free: - kfree(pvt); + for (dct = 0; dct < fam_type->max_mcs; dct++) { + for_each_chip_select(cs, dct, pvt) + cs_enabled |= csrow_enabled(cs, dct, pvt); + } -err_ret: - return ret; + return cs_enabled; } static int probe_one_instance(unsigned int nid) { struct pci_dev *F3 = node_to_amd_nb(nid)->misc; + struct amd64_pvt *pvt = NULL; struct ecc_settings *s; int ret; @@ -3413,8 +3512,29 @@ static int probe_one_instance(unsigned int nid) ecc_stngs[nid] = s; - if (!ecc_enabled(F3, nid)) { - ret = 0; + pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); + if (!pvt) + goto err_settings; + + pvt->mc_node_id = nid; + pvt->F3 = F3; + + fam_type = per_family_init(pvt); + if (!fam_type) + goto err_enable; + + ret = hw_info_get(pvt); + if (ret < 0) + goto err_enable; + + ret = 0; + if (!instance_has_memory(pvt)) { + amd64_info("Node %d: No DIMMs detected.\n", nid); + goto err_enable; + } + + if (!ecc_enabled(pvt)) { + ret = -ENODEV; if (!ecc_enable_override) goto err_enable; @@ -3429,7 +3549,7 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } - ret = init_one_instance(nid); + ret = init_one_instance(pvt); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); @@ -3439,9 +3559,15 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } + dump_misc_regs(pvt); + return ret; err_enable: + hw_info_put(pvt); + kfree(pvt); + +err_settings: kfree(s); ecc_stngs[nid] = NULL; @@ -3456,9 +3582,6 @@ static void remove_one_instance(unsigned int nid) struct mem_ctl_info *mci; struct amd64_pvt *pvt; - mci = find_mci_by_dev(&F3->dev); - WARN_ON(!mci); - /* Remove from EDAC CORE tracking list */ mci = edac_mc_del_mc(&F3->dev); if (!mci) @@ -3468,14 +3591,13 @@ static void remove_one_instance(unsigned int nid) restore_ecc_error_reporting(s, nid, F3); - free_mc_sibling_devs(pvt); - kfree(ecc_stngs[nid]); ecc_stngs[nid] = NULL; /* Free the EDAC CORE resources */ mci->pvt_info = NULL; + hw_info_put(pvt); kfree(pvt); edac_mc_free(mci); } @@ -3510,6 +3632,7 @@ static const struct x86_cpu_id amd64_cpuids[] = { { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_AMD, 0x19, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { } }; MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); @@ -3541,8 +3664,6 @@ static int __init amd64_edac_init(void) if (!msrs) goto err_free; - compute_num_umcs(); - for (i = 0; i < amd_nb_num(); i++) { err = probe_one_instance(i); if (err) { diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8f66472f7adc..abbf3c274d74 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,6 +96,7 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 +#define NUM_CONTROLLERS 8 #define ON true #define OFF false @@ -119,6 +120,10 @@ #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440 +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 +#define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 +#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 /* * Function 1 - Address Map @@ -168,7 +173,8 @@ #define DCSM0 0x60 #define DCSM1 0x160 -#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_sec_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases_sec[(i)] & DCSB_CS_ENABLE) #define DRAM_CONTROL 0x78 @@ -258,7 +264,9 @@ /* UMC CH register offsets */ #define UMCCH_BASE_ADDR 0x0 +#define UMCCH_BASE_ADDR_SEC 0x10 #define UMCCH_ADDR_MASK 0x20 +#define UMCCH_ADDR_MASK_SEC 0x28 #define UMCCH_ADDR_CFG 0x30 #define UMCCH_DIMM_CFG 0x80 #define UMCCH_UMC_CFG 0x100 @@ -285,6 +293,8 @@ enum amd_families { F17_CPUS, F17_M10H_CPUS, F17_M30H_CPUS, + F17_M70H_CPUS, + F19_CPUS, NUM_FAMILIES, }; @@ -311,9 +321,11 @@ struct dram_range { /* A DCT chip selects collection */ struct chip_select { u32 csbases[NUM_CHIPSELECTS]; + u32 csbases_sec[NUM_CHIPSELECTS]; u8 b_cnt; u32 csmasks[NUM_CHIPSELECTS]; + u32 csmasks_sec[NUM_CHIPSELECTS]; u8 m_cnt; }; @@ -351,8 +363,8 @@ struct amd64_pvt { u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */ u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ - /* one for each DCT */ - struct chip_select csels[2]; + /* one for each DCT/UMC */ + struct chip_select csels[NUM_CONTROLLERS]; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; @@ -470,6 +482,8 @@ struct low_ops { struct amd64_family_type { const char *ctl_name; u16 f0_id, f1_id, f2_id, f6_id; + /* Maximum number of memory controllers per die/node. */ + u8 max_mcs; struct low_ops ops; }; diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c new file mode 100644 index 000000000000..7f227bdcbc84 --- /dev/null +++ b/drivers/edac/armada_xp_edac.c @@ -0,0 +1,635 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Pengutronix, Jan Luebbe <kernel@pengutronix.de> + */ + +#include <linux/kernel.h> +#include <linux/edac.h> +#include <linux/of_platform.h> + +#include <asm/hardware/cache-l2x0.h> +#include <asm/hardware/cache-aurora-l2.h> + +#include "edac_mc.h" +#include "edac_device.h" +#include "edac_module.h" + +/************************ EDAC MC (DDR RAM) ********************************/ + +#define SDRAM_NUM_CS 4 + +#define SDRAM_CONFIG_REG 0x0 +#define SDRAM_CONFIG_ECC_MASK BIT(18) +#define SDRAM_CONFIG_REGISTERED_MASK BIT(17) +#define SDRAM_CONFIG_BUS_WIDTH_MASK BIT(15) + +#define SDRAM_ADDR_CTRL_REG 0x10 +#define SDRAM_ADDR_CTRL_SIZE_HIGH_OFFSET(cs) (20+cs) +#define SDRAM_ADDR_CTRL_SIZE_HIGH_MASK(cs) (0x1 << SDRAM_ADDR_CTRL_SIZE_HIGH_OFFSET(cs)) +#define SDRAM_ADDR_CTRL_ADDR_SEL_MASK(cs) BIT(16+cs) +#define SDRAM_ADDR_CTRL_SIZE_LOW_OFFSET(cs) (cs*4+2) +#define SDRAM_ADDR_CTRL_SIZE_LOW_MASK(cs) (0x3 << SDRAM_ADDR_CTRL_SIZE_LOW_OFFSET(cs)) +#define SDRAM_ADDR_CTRL_STRUCT_OFFSET(cs) (cs*4) +#define SDRAM_ADDR_CTRL_STRUCT_MASK(cs) (0x3 << SDRAM_ADDR_CTRL_STRUCT_OFFSET(cs)) + +#define SDRAM_ERR_DATA_H_REG 0x40 +#define SDRAM_ERR_DATA_L_REG 0x44 + +#define SDRAM_ERR_RECV_ECC_REG 0x48 +#define SDRAM_ERR_RECV_ECC_VALUE_MASK 0xff + +#define SDRAM_ERR_CALC_ECC_REG 0x4c +#define SDRAM_ERR_CALC_ECC_ROW_OFFSET 8 +#define SDRAM_ERR_CALC_ECC_ROW_MASK (0xffff << SDRAM_ERR_CALC_ECC_ROW_OFFSET) +#define SDRAM_ERR_CALC_ECC_VALUE_MASK 0xff + +#define SDRAM_ERR_ADDR_REG 0x50 +#define SDRAM_ERR_ADDR_BANK_OFFSET 23 +#define SDRAM_ERR_ADDR_BANK_MASK (0x7 << SDRAM_ERR_ADDR_BANK_OFFSET) +#define SDRAM_ERR_ADDR_COL_OFFSET 8 +#define SDRAM_ERR_ADDR_COL_MASK (0x7fff << SDRAM_ERR_ADDR_COL_OFFSET) +#define SDRAM_ERR_ADDR_CS_OFFSET 1 +#define SDRAM_ERR_ADDR_CS_MASK (0x3 << SDRAM_ERR_ADDR_CS_OFFSET) +#define SDRAM_ERR_ADDR_TYPE_MASK BIT(0) + +#define SDRAM_ERR_CTRL_REG 0x54 +#define SDRAM_ERR_CTRL_THR_OFFSET 16 +#define SDRAM_ERR_CTRL_THR_MASK (0xff << SDRAM_ERR_CTRL_THR_OFFSET) +#define SDRAM_ERR_CTRL_PROP_MASK BIT(9) + +#define SDRAM_ERR_SBE_COUNT_REG 0x58 +#define SDRAM_ERR_DBE_COUNT_REG 0x5c + +#define SDRAM_ERR_CAUSE_ERR_REG 0xd0 +#define SDRAM_ERR_CAUSE_MSG_REG 0xd8 +#define SDRAM_ERR_CAUSE_DBE_MASK BIT(1) +#define SDRAM_ERR_CAUSE_SBE_MASK BIT(0) + +#define SDRAM_RANK_CTRL_REG 0x1e0 +#define SDRAM_RANK_CTRL_EXIST_MASK(cs) BIT(cs) + +struct axp_mc_drvdata { + void __iomem *base; + /* width in bytes */ + unsigned int width; + /* bank interleaving */ + bool cs_addr_sel[SDRAM_NUM_CS]; + + char msg[128]; +}; + +/* derived from "DRAM Address Multiplexing" in the ARAMDA XP Functional Spec */ +static uint32_t axp_mc_calc_address(struct axp_mc_drvdata *drvdata, + uint8_t cs, uint8_t bank, uint16_t row, + uint16_t col) +{ + if (drvdata->width == 8) { + /* 64 bit */ + if (drvdata->cs_addr_sel[cs]) + /* bank interleaved */ + return (((row & 0xfff8) << 16) | + ((bank & 0x7) << 16) | + ((row & 0x7) << 13) | + ((col & 0x3ff) << 3)); + else + return (((row & 0xffff << 16) | + ((bank & 0x7) << 13) | + ((col & 0x3ff)) << 3)); + } else if (drvdata->width == 4) { + /* 32 bit */ + if (drvdata->cs_addr_sel[cs]) + /* bank interleaved */ + return (((row & 0xfff0) << 15) | + ((bank & 0x7) << 16) | + ((row & 0xf) << 12) | + ((col & 0x3ff) << 2)); + else + return (((row & 0xffff << 15) | + ((bank & 0x7) << 12) | + ((col & 0x3ff)) << 2)); + } else { + /* 16 bit */ + if (drvdata->cs_addr_sel[cs]) + /* bank interleaved */ + return (((row & 0xffe0) << 14) | + ((bank & 0x7) << 16) | + ((row & 0x1f) << 11) | + ((col & 0x3ff) << 1)); + else + return (((row & 0xffff << 14) | + ((bank & 0x7) << 11) | + ((col & 0x3ff)) << 1)); + } +} + +static void axp_mc_check(struct mem_ctl_info *mci) +{ + struct axp_mc_drvdata *drvdata = mci->pvt_info; + uint32_t data_h, data_l, recv_ecc, calc_ecc, addr; + uint32_t cnt_sbe, cnt_dbe, cause_err, cause_msg; + uint32_t row_val, col_val, bank_val, addr_val; + uint8_t syndrome_val, cs_val; + char *msg = drvdata->msg; + + data_h = readl(drvdata->base + SDRAM_ERR_DATA_H_REG); + data_l = readl(drvdata->base + SDRAM_ERR_DATA_L_REG); + recv_ecc = readl(drvdata->base + SDRAM_ERR_RECV_ECC_REG); + calc_ecc = readl(drvdata->base + SDRAM_ERR_CALC_ECC_REG); + addr = readl(drvdata->base + SDRAM_ERR_ADDR_REG); + cnt_sbe = readl(drvdata->base + SDRAM_ERR_SBE_COUNT_REG); + cnt_dbe = readl(drvdata->base + SDRAM_ERR_DBE_COUNT_REG); + cause_err = readl(drvdata->base + SDRAM_ERR_CAUSE_ERR_REG); + cause_msg = readl(drvdata->base + SDRAM_ERR_CAUSE_MSG_REG); + + /* clear cause registers */ + writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), + drvdata->base + SDRAM_ERR_CAUSE_ERR_REG); + writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), + drvdata->base + SDRAM_ERR_CAUSE_MSG_REG); + + /* clear error counter registers */ + if (cnt_sbe) + writel(0, drvdata->base + SDRAM_ERR_SBE_COUNT_REG); + if (cnt_dbe) + writel(0, drvdata->base + SDRAM_ERR_DBE_COUNT_REG); + + if (!cnt_sbe && !cnt_dbe) + return; + + if (!(addr & SDRAM_ERR_ADDR_TYPE_MASK)) { + if (cnt_sbe) + cnt_sbe--; + else + dev_warn(mci->pdev, "inconsistent SBE count detected"); + } else { + if (cnt_dbe) + cnt_dbe--; + else + dev_warn(mci->pdev, "inconsistent DBE count detected"); + } + + /* report earlier errors */ + if (cnt_sbe) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + cnt_sbe, /* error count */ + 0, 0, 0, /* pfn, offset, syndrome */ + -1, -1, -1, /* top, mid, low layer */ + mci->ctl_name, + "details unavailable (multiple errors)"); + if (cnt_dbe) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + cnt_sbe, /* error count */ + 0, 0, 0, /* pfn, offset, syndrome */ + -1, -1, -1, /* top, mid, low layer */ + mci->ctl_name, + "details unavailable (multiple errors)"); + + /* report details for most recent error */ + cs_val = (addr & SDRAM_ERR_ADDR_CS_MASK) >> SDRAM_ERR_ADDR_CS_OFFSET; + bank_val = (addr & SDRAM_ERR_ADDR_BANK_MASK) >> SDRAM_ERR_ADDR_BANK_OFFSET; + row_val = (calc_ecc & SDRAM_ERR_CALC_ECC_ROW_MASK) >> SDRAM_ERR_CALC_ECC_ROW_OFFSET; + col_val = (addr & SDRAM_ERR_ADDR_COL_MASK) >> SDRAM_ERR_ADDR_COL_OFFSET; + syndrome_val = (recv_ecc ^ calc_ecc) & 0xff; + addr_val = axp_mc_calc_address(drvdata, cs_val, bank_val, row_val, + col_val); + msg += sprintf(msg, "row=0x%04x ", row_val); /* 11 chars */ + msg += sprintf(msg, "bank=0x%x ", bank_val); /* 9 chars */ + msg += sprintf(msg, "col=0x%04x ", col_val); /* 11 chars */ + msg += sprintf(msg, "cs=%d", cs_val); /* 4 chars */ + + if (!(addr & SDRAM_ERR_ADDR_TYPE_MASK)) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + 1, /* error count */ + addr_val >> PAGE_SHIFT, + addr_val & ~PAGE_MASK, + syndrome_val, + cs_val, -1, -1, /* top, mid, low layer */ + mci->ctl_name, drvdata->msg); + } else { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + 1, /* error count */ + addr_val >> PAGE_SHIFT, + addr_val & ~PAGE_MASK, + syndrome_val, + cs_val, -1, -1, /* top, mid, low layer */ + mci->ctl_name, drvdata->msg); + } +} + +static void axp_mc_read_config(struct mem_ctl_info *mci) +{ + struct axp_mc_drvdata *drvdata = mci->pvt_info; + uint32_t config, addr_ctrl, rank_ctrl; + unsigned int i, cs_struct, cs_size; + struct dimm_info *dimm; + + config = readl(drvdata->base + SDRAM_CONFIG_REG); + if (config & SDRAM_CONFIG_BUS_WIDTH_MASK) + /* 64 bit */ + drvdata->width = 8; + else + /* 32 bit */ + drvdata->width = 4; + + addr_ctrl = readl(drvdata->base + SDRAM_ADDR_CTRL_REG); + rank_ctrl = readl(drvdata->base + SDRAM_RANK_CTRL_REG); + for (i = 0; i < SDRAM_NUM_CS; i++) { + dimm = mci->dimms[i]; + + if (!(rank_ctrl & SDRAM_RANK_CTRL_EXIST_MASK(i))) + continue; + + drvdata->cs_addr_sel[i] = + !!(addr_ctrl & SDRAM_ADDR_CTRL_ADDR_SEL_MASK(i)); + + cs_struct = (addr_ctrl & SDRAM_ADDR_CTRL_STRUCT_MASK(i)) >> SDRAM_ADDR_CTRL_STRUCT_OFFSET(i); + cs_size = ((addr_ctrl & SDRAM_ADDR_CTRL_SIZE_HIGH_MASK(i)) >> (SDRAM_ADDR_CTRL_SIZE_HIGH_OFFSET(i) - 2) | + ((addr_ctrl & SDRAM_ADDR_CTRL_SIZE_LOW_MASK(i)) >> SDRAM_ADDR_CTRL_SIZE_LOW_OFFSET(i))); + + switch (cs_size) { + case 0: /* 2GBit */ + dimm->nr_pages = 524288; + break; + case 1: /* 256MBit */ + dimm->nr_pages = 65536; + break; + case 2: /* 512MBit */ + dimm->nr_pages = 131072; + break; + case 3: /* 1GBit */ + dimm->nr_pages = 262144; + break; + case 4: /* 4GBit */ + dimm->nr_pages = 1048576; + break; + case 5: /* 8GBit */ + dimm->nr_pages = 2097152; + break; + } + dimm->grain = 8; + dimm->dtype = cs_struct ? DEV_X16 : DEV_X8; + dimm->mtype = (config & SDRAM_CONFIG_REGISTERED_MASK) ? + MEM_RDDR3 : MEM_DDR3; + dimm->edac_mode = EDAC_SECDED; + } +} + +static const struct of_device_id axp_mc_of_match[] = { + {.compatible = "marvell,armada-xp-sdram-controller",}, + {}, +}; +MODULE_DEVICE_TABLE(of, axp_mc_of_match); + +static int axp_mc_probe(struct platform_device *pdev) +{ + struct axp_mc_drvdata *drvdata; + struct edac_mc_layer layers[1]; + const struct of_device_id *id; + struct mem_ctl_info *mci; + struct resource *r; + void __iomem *base; + uint32_t config; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "Unable to get mem resource\n"); + return -ENODEV; + } + + base = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(base)) { + dev_err(&pdev->dev, "Unable to map regs\n"); + return PTR_ERR(base); + } + + config = readl(base + SDRAM_CONFIG_REG); + if (!(config & SDRAM_CONFIG_ECC_MASK)) { + dev_warn(&pdev->dev, "SDRAM ECC is not enabled"); + return -EINVAL; + } + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = SDRAM_NUM_CS; + layers[0].is_virt_csrow = true; + + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*drvdata)); + if (!mci) + return -ENOMEM; + + drvdata = mci->pvt_info; + drvdata->base = base; + mci->pdev = &pdev->dev; + platform_set_drvdata(pdev, mci); + + id = of_match_device(axp_mc_of_match, &pdev->dev); + mci->edac_check = axp_mc_check; + mci->mtype_cap = MEM_FLAG_DDR3; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = pdev->dev.driver->name; + mci->ctl_name = id ? id->compatible : "unknown"; + mci->dev_name = dev_name(&pdev->dev); + mci->scrub_mode = SCRUB_NONE; + + axp_mc_read_config(mci); + + /* These SoCs have a reduced width bus */ + if (of_machine_is_compatible("marvell,armada380") || + of_machine_is_compatible("marvell,armadaxp-98dx3236")) + drvdata->width /= 2; + + /* configure SBE threshold */ + /* it seems that SBEs are not captured otherwise */ + writel(1 << SDRAM_ERR_CTRL_THR_OFFSET, drvdata->base + SDRAM_ERR_CTRL_REG); + + /* clear cause registers */ + writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), drvdata->base + SDRAM_ERR_CAUSE_ERR_REG); + writel(~(SDRAM_ERR_CAUSE_DBE_MASK | SDRAM_ERR_CAUSE_SBE_MASK), drvdata->base + SDRAM_ERR_CAUSE_MSG_REG); + + /* clear counter registers */ + writel(0, drvdata->base + SDRAM_ERR_SBE_COUNT_REG); + writel(0, drvdata->base + SDRAM_ERR_DBE_COUNT_REG); + + if (edac_mc_add_mc(mci)) { + edac_mc_free(mci); + return -EINVAL; + } + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; +} + +static int axp_mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver axp_mc_driver = { + .probe = axp_mc_probe, + .remove = axp_mc_remove, + .driver = { + .name = "armada_xp_mc_edac", + .of_match_table = of_match_ptr(axp_mc_of_match), + }, +}; + +/************************ EDAC Device (L2 Cache) ***************************/ + +struct aurora_l2_drvdata { + void __iomem *base; + + char msg[128]; + + /* error injection via debugfs */ + uint32_t inject_addr; + uint32_t inject_mask; + uint8_t inject_ctl; + + struct dentry *debugfs; +}; + +#ifdef CONFIG_EDAC_DEBUG +static void aurora_l2_inject(struct aurora_l2_drvdata *drvdata) +{ + drvdata->inject_addr &= AURORA_ERR_INJECT_CTL_ADDR_MASK; + drvdata->inject_ctl &= AURORA_ERR_INJECT_CTL_EN_MASK; + writel(0, drvdata->base + AURORA_ERR_INJECT_CTL_REG); + writel(drvdata->inject_mask, drvdata->base + AURORA_ERR_INJECT_MASK_REG); + writel(drvdata->inject_addr | drvdata->inject_ctl, drvdata->base + AURORA_ERR_INJECT_CTL_REG); +} +#endif + +static void aurora_l2_check(struct edac_device_ctl_info *dci) +{ + struct aurora_l2_drvdata *drvdata = dci->pvt_info; + uint32_t cnt, src, txn, err, attr_cap, addr_cap, way_cap; + unsigned int cnt_ce, cnt_ue; + char *msg = drvdata->msg; + size_t size = sizeof(drvdata->msg); + size_t len = 0; + + cnt = readl(drvdata->base + AURORA_ERR_CNT_REG); + attr_cap = readl(drvdata->base + AURORA_ERR_ATTR_CAP_REG); + addr_cap = readl(drvdata->base + AURORA_ERR_ADDR_CAP_REG); + way_cap = readl(drvdata->base + AURORA_ERR_WAY_CAP_REG); + + cnt_ce = (cnt & AURORA_ERR_CNT_CE_MASK) >> AURORA_ERR_CNT_CE_OFFSET; + cnt_ue = (cnt & AURORA_ERR_CNT_UE_MASK) >> AURORA_ERR_CNT_UE_OFFSET; + /* clear error counter registers */ + if (cnt_ce || cnt_ue) + writel(AURORA_ERR_CNT_CLR, drvdata->base + AURORA_ERR_CNT_REG); + + if (!(attr_cap & AURORA_ERR_ATTR_CAP_VALID)) + goto clear_remaining; + + src = (attr_cap & AURORA_ERR_ATTR_SRC_MSK) >> AURORA_ERR_ATTR_SRC_OFF; + if (src <= 3) + len += snprintf(msg+len, size-len, "src=CPU%d ", src); + else + len += snprintf(msg+len, size-len, "src=IO "); + + txn = (attr_cap & AURORA_ERR_ATTR_TXN_MSK) >> AURORA_ERR_ATTR_TXN_OFF; + switch (txn) { + case 0: + len += snprintf(msg+len, size-len, "txn=Data-Read "); + break; + case 1: + len += snprintf(msg+len, size-len, "txn=Isn-Read "); + break; + case 2: + len += snprintf(msg+len, size-len, "txn=Clean-Flush "); + break; + case 3: + len += snprintf(msg+len, size-len, "txn=Eviction "); + break; + case 4: + len += snprintf(msg+len, size-len, + "txn=Read-Modify-Write "); + break; + } + + err = (attr_cap & AURORA_ERR_ATTR_ERR_MSK) >> AURORA_ERR_ATTR_ERR_OFF; + switch (err) { + case 0: + len += snprintf(msg+len, size-len, "err=CorrECC "); + break; + case 1: + len += snprintf(msg+len, size-len, "err=UnCorrECC "); + break; + case 2: + len += snprintf(msg+len, size-len, "err=TagParity "); + break; + } + + len += snprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK); + len += snprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF); + len += snprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET); + + /* clear error capture registers */ + writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG); + if (err) { + /* UnCorrECC or TagParity */ + if (cnt_ue) + cnt_ue--; + edac_device_handle_ue(dci, 0, 0, drvdata->msg); + } else { + if (cnt_ce) + cnt_ce--; + edac_device_handle_ce(dci, 0, 0, drvdata->msg); + } + +clear_remaining: + /* report remaining errors */ + while (cnt_ue--) + edac_device_handle_ue(dci, 0, 0, "details unavailable (multiple errors)"); + while (cnt_ce--) + edac_device_handle_ue(dci, 0, 0, "details unavailable (multiple errors)"); +} + +static void aurora_l2_poll(struct edac_device_ctl_info *dci) +{ +#ifdef CONFIG_EDAC_DEBUG + struct aurora_l2_drvdata *drvdata = dci->pvt_info; +#endif + + aurora_l2_check(dci); +#ifdef CONFIG_EDAC_DEBUG + aurora_l2_inject(drvdata); +#endif +} + +static const struct of_device_id aurora_l2_of_match[] = { + {.compatible = "marvell,aurora-system-cache",}, + {}, +}; +MODULE_DEVICE_TABLE(of, aurora_l2_of_match); + +static int aurora_l2_probe(struct platform_device *pdev) +{ + struct aurora_l2_drvdata *drvdata; + struct edac_device_ctl_info *dci; + const struct of_device_id *id; + uint32_t l2x0_aux_ctrl; + void __iomem *base; + struct resource *r; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "Unable to get mem resource\n"); + return -ENODEV; + } + + base = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(base)) { + dev_err(&pdev->dev, "Unable to map regs\n"); + return PTR_ERR(base); + } + + l2x0_aux_ctrl = readl(base + L2X0_AUX_CTRL); + if (!(l2x0_aux_ctrl & AURORA_ACR_PARITY_EN)) + dev_warn(&pdev->dev, "tag parity is not enabled"); + if (!(l2x0_aux_ctrl & AURORA_ACR_ECC_EN)) + dev_warn(&pdev->dev, "data ECC is not enabled"); + + dci = edac_device_alloc_ctl_info(sizeof(*drvdata), + "cpu", 1, "L", 1, 2, NULL, 0, 0); + if (!dci) + return -ENOMEM; + + drvdata = dci->pvt_info; + drvdata->base = base; + dci->dev = &pdev->dev; + platform_set_drvdata(pdev, dci); + + id = of_match_device(aurora_l2_of_match, &pdev->dev); + dci->edac_check = aurora_l2_poll; + dci->mod_name = pdev->dev.driver->name; + dci->ctl_name = id ? id->compatible : "unknown"; + dci->dev_name = dev_name(&pdev->dev); + + /* clear registers */ + writel(AURORA_ERR_CNT_CLR, drvdata->base + AURORA_ERR_CNT_REG); + writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG); + + if (edac_device_add_device(dci)) { + edac_device_free_ctl_info(dci); + return -EINVAL; + } + +#ifdef CONFIG_EDAC_DEBUG + drvdata->debugfs = edac_debugfs_create_dir(dev_name(&pdev->dev)); + if (drvdata->debugfs) { + edac_debugfs_create_x32("inject_addr", 0644, + drvdata->debugfs, + &drvdata->inject_addr); + edac_debugfs_create_x32("inject_mask", 0644, + drvdata->debugfs, + &drvdata->inject_mask); + edac_debugfs_create_x8("inject_ctl", 0644, + drvdata->debugfs, &drvdata->inject_ctl); + } +#endif + + return 0; +} + +static int aurora_l2_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); +#ifdef CONFIG_EDAC_DEBUG + struct aurora_l2_drvdata *drvdata = dci->pvt_info; + + edac_debugfs_remove_recursive(drvdata->debugfs); +#endif + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(dci); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver aurora_l2_driver = { + .probe = aurora_l2_probe, + .remove = aurora_l2_remove, + .driver = { + .name = "aurora_l2_edac", + .of_match_table = of_match_ptr(aurora_l2_of_match), + }, +}; + +/************************ Driver registration ******************************/ + +static struct platform_driver * const drivers[] = { + &axp_mc_driver, + &aurora_l2_driver, +}; + +static int __init armada_xp_edac_init(void) +{ + int res; + + /* only polling is supported */ + edac_op_state = EDAC_OPSTATE_POLL; + + res = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); + if (res) + pr_warn("Aramda XP EDAC drivers fail to register\n"); + + return 0; +} +module_init(armada_xp_edac_init); + +static void __exit armada_xp_edac_exit(void) +{ + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); +} +module_exit(armada_xp_edac_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Pengutronix"); +MODULE_DESCRIPTION("EDAC Drivers for Marvell Armada XP SDRAM and L2 Cache Controller"); diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c index 5634437bb39d..b194658b8b5c 100644 --- a/drivers/edac/aspeed_edac.c +++ b/drivers/edac/aspeed_edac.c @@ -243,7 +243,7 @@ static int init_csrows(struct mem_ctl_info *mci) if (!np) { dev_err(mci->pdev, "dt: missing /memory node\n"); return -ENODEV; - }; + } rc = of_address_to_resource(np, 0, &r); @@ -252,7 +252,7 @@ static int init_csrows(struct mem_ctl_info *mci) if (rc) { dev_err(mci->pdev, "dt: failed requesting resource for /memory node\n"); return rc; - }; + } dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n", r.start, resource_size(&r), PAGE_SHIFT); @@ -281,16 +281,11 @@ static int aspeed_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; - struct resource *res; void __iomem *regs; u32 reg04; int rc; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENOENT; - - regs = devm_ioremap_resource(dev, res); + regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(regs)) return PTR_ERR(regs); diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c new file mode 100644 index 000000000000..e4736eb37bfb --- /dev/null +++ b/drivers/edac/bluefield_edac.c @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Bluefield-specific EDAC driver. + * + * Copyright (c) 2019 Mellanox Technologies. + */ + +#include <linux/acpi.h> +#include <linux/arm-smccc.h> +#include <linux/bitfield.h> +#include <linux/edac.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#include "edac_module.h" + +#define DRIVER_NAME "bluefield-edac" + +/* + * Mellanox BlueField EMI (External Memory Interface) register definitions. + */ + +#define MLXBF_ECC_CNT 0x340 +#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) +#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) + +#define MLXBF_ECC_ERR 0x348 +#define MLXBF_ECC_ERR__SECC BIT(0) +#define MLXBF_ECC_ERR__DECC BIT(16) + +#define MLXBF_ECC_LATCH_SEL 0x354 +#define MLXBF_ECC_LATCH_SEL__START BIT(24) + +#define MLXBF_ERR_ADDR_0 0x358 + +#define MLXBF_ERR_ADDR_1 0x37c + +#define MLXBF_SYNDROM 0x35c +#define MLXBF_SYNDROM__DERR BIT(0) +#define MLXBF_SYNDROM__SERR BIT(1) +#define MLXBF_SYNDROM__SYN GENMASK(25, 16) + +#define MLXBF_ADD_INFO 0x364 +#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) + +#define MLXBF_EDAC_MAX_DIMM_PER_MC 2 +#define MLXBF_EDAC_ERROR_GRAIN 8 + +/* + * Request MLNX_SIP_GET_DIMM_INFO + * + * Retrieve information about DIMM on a certain slot. + * + * Call register usage: + * a0: MLNX_SIP_GET_DIMM_INFO + * a1: (Memory controller index) << 16 | (Dimm index in memory controller) + * a2-7: not used. + * + * Return status: + * a0: MLXBF_DIMM_INFO defined below describing the DIMM. + * a1-3: not used. + */ +#define MLNX_SIP_GET_DIMM_INFO 0x82000008 + +/* Format for the SMC response about the memory information */ +#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) +#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) +#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) +#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) +#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) +#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) + +struct bluefield_edac_priv { + int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; + void __iomem *emi_base; + int dimm_per_mc; +}; + +static u64 smc_call1(u64 smc_op, u64 smc_arg) +{ + struct arm_smccc_res res; + + arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +/* + * Gather the ECC information from the External Memory Interface registers + * and report it to the edac handler. + */ +static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, + int error_cnt, + int is_single_ecc) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + u32 dram_additional_info, err_prank, edea0, edea1; + u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; + enum hw_event_mc_err_type ecc_type; + u64 ecc_dimm_addr; + int ecc_dimm; + + ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : + HW_EVENT_ERR_UNCORRECTED; + + /* + * Tell the External Memory Interface to populate the relevant + * registers with information about the last ECC error occurrence. + */ + ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; + writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL); + + /* + * Verify that the ECC reported info in the registers is of the + * same type as the one asked to report. If not, just report the + * error without the detailed information. + */ + dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM); + serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); + derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); + syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); + + if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { + edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, + 0, 0, -1, mci->ctl_name, ""); + return; + } + + dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO); + err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); + + ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; + + edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0); + edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1); + + ecc_dimm_addr = ((u64)edea1 << 32) | edea0; + + edac_mc_handle_error(ecc_type, mci, error_cnt, + PFN_DOWN(ecc_dimm_addr), + offset_in_page(ecc_dimm_addr), + syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); +} + +static void bluefield_edac_check(struct mem_ctl_info *mci) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; + + /* + * The memory controller might not be initialized by the firmware + * when there isn't memory, which may lead to bad register readings. + */ + if (mci->edac_cap == EDAC_FLAG_NONE) + return; + + ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT); + single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); + double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); + + if (single_error_count) { + ecc_error |= MLXBF_ECC_ERR__SECC; + + bluefield_gather_report_ecc(mci, single_error_count, 1); + } + + if (double_error_count) { + ecc_error |= MLXBF_ECC_ERR__DECC; + + bluefield_gather_report_ecc(mci, double_error_count, 0); + } + + /* Write to clear reported errors. */ + if (ecc_count) + writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR); +} + +/* Initialize the DIMMs information for the given memory controller. */ +static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + int mem_ctrl_idx = mci->mc_idx; + struct dimm_info *dimm; + u64 smc_info, smc_arg; + int is_empty = 1, i; + + for (i = 0; i < priv->dimm_per_mc; i++) { + dimm = mci->dimms[i]; + + smc_arg = mem_ctrl_idx << 16 | i; + smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg); + + if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { + dimm->mtype = MEM_EMPTY; + continue; + } + + is_empty = 0; + + dimm->edac_mode = EDAC_SECDED; + + if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) + dimm->mtype = MEM_NVDIMM; + else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) + dimm->mtype = MEM_LRDDR4; + else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) + dimm->mtype = MEM_RDDR4; + else + dimm->mtype = MEM_DDR4; + + dimm->nr_pages = + FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * + (SZ_1G / PAGE_SIZE); + dimm->grain = MLXBF_EDAC_ERROR_GRAIN; + + /* Mem controller for BlueField only supports x4, x8 and x16 */ + switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { + case 4: + dimm->dtype = DEV_X4; + break; + case 8: + dimm->dtype = DEV_X8; + break; + case 16: + dimm->dtype = DEV_X16; + break; + default: + dimm->dtype = DEV_UNKNOWN; + } + + priv->dimm_ranks[i] = + FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); + } + + if (is_empty) + mci->edac_cap = EDAC_FLAG_NONE; + else + mci->edac_cap = EDAC_FLAG_SECDED; +} + +static int bluefield_edac_mc_probe(struct platform_device *pdev) +{ + struct bluefield_edac_priv *priv; + struct device *dev = &pdev->dev; + struct edac_mc_layer layers[1]; + struct mem_ctl_info *mci; + struct resource *emi_res; + unsigned int mc_idx, dimm_count; + int rc, ret; + + /* Read the MSS (Memory SubSystem) index from ACPI table. */ + if (device_property_read_u32(dev, "mss_number", &mc_idx)) { + dev_warn(dev, "bf_edac: MSS number unknown\n"); + return -EINVAL; + } + + /* Read the DIMMs per MC from ACPI table. */ + if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { + dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); + return -EINVAL; + } + + if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { + dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); + return -EINVAL; + } + + emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!emi_res) + return -EINVAL; + + layers[0].type = EDAC_MC_LAYER_SLOT; + layers[0].size = dimm_count; + layers[0].is_virt_csrow = true; + + mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); + if (!mci) + return -ENOMEM; + + priv = mci->pvt_info; + + priv->dimm_per_mc = dimm_count; + priv->emi_base = devm_ioremap_resource(dev, emi_res); + if (IS_ERR(priv->emi_base)) { + dev_err(dev, "failed to map EMI IO resource\n"); + ret = PTR_ERR(priv->emi_base); + goto err; + } + + mci->pdev = dev; + mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | + MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + + mci->mod_name = DRIVER_NAME; + mci->ctl_name = "BlueField_Memory_Controller"; + mci->dev_name = dev_name(dev); + mci->edac_check = bluefield_edac_check; + + /* Initialize mci with the actual populated DIMM information. */ + bluefield_edac_init_dimms(mci); + + platform_set_drvdata(pdev, mci); + + /* Register with EDAC core */ + rc = edac_mc_add_mc(mci); + if (rc) { + dev_err(dev, "failed to register with EDAC core\n"); + ret = rc; + goto err; + } + + /* Only POLL mode supported so far. */ + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; + +err: + edac_mc_free(mci); + + return ret; + +} + +static int bluefield_edac_mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + + return 0; +} + +static const struct acpi_device_id bluefield_mc_acpi_ids[] = { + {"MLNXBF08", 0}, + {} +}; + +MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); + +static struct platform_driver bluefield_edac_mc_driver = { + .driver = { + .name = DRIVER_NAME, + .acpi_match_table = bluefield_mc_acpi_ids, + }, + .probe = bluefield_edac_mc_probe, + .remove = bluefield_edac_mc_remove, +}; + +module_platform_driver(bluefield_edac_mc_driver); + +MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); +MODULE_AUTHOR("Mellanox Technologies"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c index 1f943599a8ac..4804332d9946 100644 --- a/drivers/edac/debugfs.c +++ b/drivers/edac/debugfs.c @@ -138,3 +138,14 @@ void edac_debugfs_create_x16(const char *name, umode_t mode, debugfs_create_x16(name, mode, parent, value); } EXPORT_SYMBOL_GPL(edac_debugfs_create_x16); + +/* Wrapper for debugfs_create_x32() */ +void edac_debugfs_create_x32(const char *name, umode_t mode, + struct dentry *parent, u32 *value) +{ + if (!parent) + parent = edac_debugfs; + + debugfs_create_x32(name, mode, parent, value); +} +EXPORT_SYMBOL_GPL(edac_debugfs_create_x32); diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 65cf2b9355c4..8c4d947fb848 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -555,12 +555,16 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info return edac_dev->panic_on_ue; } -void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg) +void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg) { struct edac_device_instance *instance; struct edac_device_block *block = NULL; + if (!count) + return; + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { edac_device_printk(edac_dev, KERN_ERR, "INTERNAL ERROR: 'instance' out of range " @@ -582,27 +586,31 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, if (instance->nr_blocks > 0) { block = instance->blocks + block_nr; - block->counters.ce_count++; + block->counters.ce_count += count; } /* Propagate the count up the 'totals' tree */ - instance->counters.ce_count++; - edac_dev->counters.ce_count++; + instance->counters.ce_count += count; + edac_dev->counters.ce_count += count; if (edac_device_get_log_ce(edac_dev)) edac_device_printk(edac_dev, KERN_WARNING, - "CE: %s instance: %s block: %s '%s'\n", - edac_dev->ctl_name, instance->name, - block ? block->name : "N/A", msg); + "CE: %s instance: %s block: %s count: %d '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", count, msg); } -EXPORT_SYMBOL_GPL(edac_device_handle_ce); +EXPORT_SYMBOL_GPL(edac_device_handle_ce_count); -void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg) +void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg) { struct edac_device_instance *instance; struct edac_device_block *block = NULL; + if (!count) + return; + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { edac_device_printk(edac_dev, KERN_ERR, "INTERNAL ERROR: 'instance' out of range " @@ -624,22 +632,22 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, if (instance->nr_blocks > 0) { block = instance->blocks + block_nr; - block->counters.ue_count++; + block->counters.ue_count += count; } /* Propagate the count up the 'totals' tree */ - instance->counters.ue_count++; - edac_dev->counters.ue_count++; + instance->counters.ue_count += count; + edac_dev->counters.ue_count += count; if (edac_device_get_log_ue(edac_dev)) edac_device_printk(edac_dev, KERN_EMERG, - "UE: %s instance: %s block: %s '%s'\n", - edac_dev->ctl_name, instance->name, - block ? block->name : "N/A", msg); + "UE: %s instance: %s block: %s count: %d '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", count, msg); if (edac_device_get_panic_on_ue(edac_dev)) - panic("EDAC %s: UE instance: %s block %s '%s'\n", - edac_dev->ctl_name, instance->name, - block ? block->name : "N/A", msg); + panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", count, msg); } -EXPORT_SYMBOL_GPL(edac_device_handle_ue); +EXPORT_SYMBOL_GPL(edac_device_handle_ue_count); diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h index 1aaba74ae411..c4c0e0bdce14 100644 --- a/drivers/edac/edac_device.h +++ b/drivers/edac/edac_device.h @@ -286,27 +286,60 @@ extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev); extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev); /** - * edac_device_handle_ue(): - * perform a common output and handling of an 'edac_dev' UE event + * Log correctable errors. * * @edac_dev: pointer to struct &edac_device_ctl_info - * @inst_nr: number of the instance where the UE error happened - * @block_nr: number of the block where the UE error happened + * @inst_nr: number of the instance where the CE error happened + * @count: Number of errors to log. + * @block_nr: number of the block where the CE error happened + * @msg: message to be printed + */ +void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg); + +/** + * Log uncorrectable errors. + * + * @edac_dev: pointer to struct &edac_device_ctl_info + * @inst_nr: number of the instance where the CE error happened + * @count: Number of errors to log. + * @block_nr: number of the block where the CE error happened * @msg: message to be printed */ -extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg); +void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg); + /** - * edac_device_handle_ce(): - * perform a common output and handling of an 'edac_dev' CE event + * edac_device_handle_ce(): Log a single correctable error * * @edac_dev: pointer to struct &edac_device_ctl_info * @inst_nr: number of the instance where the CE error happened * @block_nr: number of the block where the CE error happened * @msg: message to be printed */ -extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg); +static inline void +edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr, + int block_nr, const char *msg) +{ + edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg); +} + +/** + * edac_device_handle_ue(): Log a single uncorrectable error + * + * @edac_dev: pointer to struct &edac_device_ctl_info + * @inst_nr: number of the instance where the UE error happened + * @block_nr: number of the block where the UE error happened + * @msg: message to be printed + */ +static inline void +edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr, + int block_nr, const char *msg) +{ + edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg); +} /** * edac_device_alloc_index: Allocate a unique device index number @@ -316,5 +349,4 @@ extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, */ extern int edac_device_alloc_index(void); extern const char *edac_layer_name[]; - #endif diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 64922c8fa7e3..69e0d90460e6 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -114,8 +114,8 @@ static const struct kernel_param_ops edac_report_ops = { module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); -unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, - unsigned len) +unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf, + unsigned int len) { struct mem_ctl_info *mci = dimm->mci; int i, n, count = 0; @@ -145,15 +145,18 @@ static void edac_mc_dump_channel(struct rank_info *chan) edac_dbg(4, " channel->dimm = %p\n", chan->dimm); } -static void edac_mc_dump_dimm(struct dimm_info *dimm, int number) +static void edac_mc_dump_dimm(struct dimm_info *dimm) { char location[80]; + if (!dimm->nr_pages) + return; + edac_dimm_info_location(dimm, location, sizeof(location)); edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n", dimm->mci->csbased ? "rank" : "dimm", - number, location, dimm->csrow, dimm->cschannel); + dimm->idx, location, dimm->csrow, dimm->cschannel); edac_dbg(4, " dimm = %p\n", dimm); edac_dbg(4, " dimm->label = '%s'\n", dimm->label); edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); @@ -236,9 +239,9 @@ EXPORT_SYMBOL_GPL(edac_mem_types); * At return, the pointer 'p' will be incremented to be used on a next call * to this function. */ -void *edac_align_ptr(void **p, unsigned size, int n_elems) +void *edac_align_ptr(void **p, unsigned int size, int n_elems) { - unsigned align, r; + unsigned int align, r; void *ptr = *p; *p += size * n_elems; @@ -275,38 +278,37 @@ void *edac_align_ptr(void **p, unsigned size, int n_elems) static void _edac_mc_free(struct mem_ctl_info *mci) { - int i, chn, row; struct csrow_info *csr; - const unsigned int tot_dimms = mci->tot_dimms; - const unsigned int tot_channels = mci->num_cschannel; - const unsigned int tot_csrows = mci->nr_csrows; + int i, chn, row; if (mci->dimms) { - for (i = 0; i < tot_dimms; i++) + for (i = 0; i < mci->tot_dimms; i++) kfree(mci->dimms[i]); kfree(mci->dimms); } + if (mci->csrows) { - for (row = 0; row < tot_csrows; row++) { + for (row = 0; row < mci->nr_csrows; row++) { csr = mci->csrows[row]; - if (csr) { - if (csr->channels) { - for (chn = 0; chn < tot_channels; chn++) - kfree(csr->channels[chn]); - kfree(csr->channels); - } - kfree(csr); + if (!csr) + continue; + + if (csr->channels) { + for (chn = 0; chn < mci->num_cschannel; chn++) + kfree(csr->channels[chn]); + kfree(csr->channels); } + kfree(csr); } kfree(mci->csrows); } kfree(mci); } -struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, - unsigned n_layers, +struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, + unsigned int n_layers, struct edac_mc_layer *layers, - unsigned sz_pvt) + unsigned int sz_pvt) { struct mem_ctl_info *mci; struct edac_mc_layer *layer; @@ -314,26 +316,29 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, struct rank_info *chan; struct dimm_info *dimm; u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; - unsigned pos[EDAC_MAX_LAYERS]; - unsigned size, tot_dimms = 1, count = 1; - unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; + unsigned int pos[EDAC_MAX_LAYERS]; + unsigned int idx, size, tot_dimms = 1, count = 1; + unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0; void *pvt, *p, *ptr = NULL; - int i, j, row, chn, n, len, off; + int i, j, row, chn, n, len; bool per_rank = false; - BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); + if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0)) + return NULL; + /* * Calculate the total amount of dimms and csrows/cschannels while * in the old API emulation mode */ - for (i = 0; i < n_layers; i++) { - tot_dimms *= layers[i].size; - if (layers[i].is_virt_csrow) - tot_csrows *= layers[i].size; + for (idx = 0; idx < n_layers; idx++) { + tot_dimms *= layers[idx].size; + + if (layers[idx].is_virt_csrow) + tot_csrows *= layers[idx].size; else - tot_channels *= layers[i].size; + tot_channels *= layers[idx].size; - if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT) + if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT) per_rank = true; } @@ -426,19 +431,15 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, memset(&pos, 0, sizeof(pos)); row = 0; chn = 0; - for (i = 0; i < tot_dimms; i++) { + for (idx = 0; idx < tot_dimms; idx++) { chan = mci->csrows[row]->channels[chn]; - off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]); - if (off < 0 || off >= tot_dimms) { - edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n"); - goto error; - } dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL); if (!dimm) goto error; - mci->dimms[off] = dimm; + mci->dimms[idx] = dimm; dimm->mci = mci; + dimm->idx = idx; /* * Copy DIMM location and initialize it. @@ -504,16 +505,10 @@ void edac_mc_free(struct mem_ctl_info *mci) { edac_dbg(1, "\n"); - /* If we're not yet registered with sysfs free only what was allocated - * in edac_mc_alloc(). - */ - if (!device_is_registered(&mci->dev)) { - _edac_mc_free(mci); - return; - } + if (device_is_registered(&mci->dev)) + edac_unregister_sysfs(mci); - /* the mci instance is freed here, when the sysfs object is dropped */ - edac_unregister_sysfs(mci); + _edac_mc_free(mci); } EXPORT_SYMBOL_GPL(edac_mc_free); @@ -715,6 +710,7 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci, edac_mc_dump_mci(mci); if (edac_debug_level >= 4) { + struct dimm_info *dimm; int i; for (i = 0; i < mci->nr_csrows; i++) { @@ -731,9 +727,9 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci, if (csrow->channels[j]->dimm->nr_pages) edac_mc_dump_channel(csrow->channels[j]); } - for (i = 0; i < mci->tot_dimms; i++) - if (mci->dimms[i]->nr_pages) - edac_mc_dump_dimm(mci->dimms[i], i); + + mci_for_each_dimm(mci, dimm) + edac_mc_dump_dimm(dimm); } #endif mutex_lock(&mem_ctls_mutex); @@ -1056,6 +1052,21 @@ void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, { char detail[80]; int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + u8 grain_bits; + + /* Sanity-check driver-supplied grain value. */ + if (WARN_ON_ONCE(!e->grain)) + e->grain = 1; + + grain_bits = fls_long(e->grain - 1); + + /* Report the error via the trace interface */ + if (IS_ENABLED(CONFIG_RAS)) + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, + e->low_layer, + (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); /* Memory type dependent details about the error */ if (type == HW_EVENT_ERR_CORRECTED) { @@ -1091,11 +1102,11 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { + struct dimm_info *dimm; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; int i, n_labels = 0; - u8 grain_bits; struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); @@ -1151,9 +1162,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, p = e->label; *p = '\0'; - for (i = 0; i < mci->tot_dimms; i++) { - struct dimm_info *dimm = mci->dimms[i]; - + mci_for_each_dimm(mci, dimm) { if (top_layer >= 0 && top_layer != dimm->location[0]) continue; if (mid_layer >= 0 && mid_layer != dimm->location[1]) @@ -1171,37 +1180,37 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ - if (e->enable_per_layer_report && dimm->nr_pages) { - if (n_labels >= EDAC_MAX_LABELS) { - e->enable_per_layer_report = false; - break; - } - n_labels++; - if (p != e->label) { - strcpy(p, OTHER_LABEL); - p += strlen(OTHER_LABEL); - } - strcpy(p, dimm->label); - p += strlen(p); - *p = '\0'; + if (!e->enable_per_layer_report || !dimm->nr_pages) + continue; - /* - * get csrow/channel of the DIMM, in order to allow - * incrementing the compat API counters - */ - edac_dbg(4, "%s csrows map: (%d,%d)\n", - mci->csbased ? "rank" : "dimm", - dimm->csrow, dimm->cschannel); - if (row == -1) - row = dimm->csrow; - else if (row >= 0 && row != dimm->csrow) - row = -2; - - if (chan == -1) - chan = dimm->cschannel; - else if (chan >= 0 && chan != dimm->cschannel) - chan = -2; + if (n_labels >= EDAC_MAX_LABELS) { + e->enable_per_layer_report = false; + break; } + n_labels++; + if (p != e->label) { + strcpy(p, OTHER_LABEL); + p += strlen(OTHER_LABEL); + } + strcpy(p, dimm->label); + p += strlen(p); + + /* + * get csrow/channel of the DIMM, in order to allow + * incrementing the compat API counters + */ + edac_dbg(4, "%s csrows map: (%d,%d)\n", + mci->csbased ? "rank" : "dimm", + dimm->csrow, dimm->cschannel); + if (row == -1) + row = dimm->csrow; + else if (row >= 0 && row != dimm->csrow) + row = -2; + + if (chan == -1) + chan = dimm->cschannel; + else if (chan >= 0 && chan != dimm->cschannel) + chan = -2; } if (!e->enable_per_layer_report) { @@ -1235,16 +1244,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, if (p > e->location) *(p - 1) = '\0'; - /* Report the error via the trace interface */ - grain_bits = fls_long(e->grain) + 1; - - if (IS_ENABLED(CONFIG_RAS)) - trace_mc_event(type, e->msg, e->label, e->error_count, - mci->mc_idx, e->top_layer, e->mid_layer, - e->low_layer, - (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, - grain_bits, e->syndrome, e->other_detail); - edac_raw_mc_handle_error(type, mci, e); } EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h index 4165e15995ad..02aac5c61d00 100644 --- a/drivers/edac/edac_mc.h +++ b/drivers/edac/edac_mc.h @@ -122,10 +122,10 @@ do { \ * On success, return a pointer to struct mem_ctl_info pointer; * %NULL otherwise */ -struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, - unsigned n_layers, +struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, + unsigned int n_layers, struct edac_mc_layer *layers, - unsigned sz_pvt); + unsigned int sz_pvt); /** * edac_get_owner - Return the owner's mod_name of EDAC MC diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 4386ea4b9b5a..c70ec0a306d8 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -131,7 +131,7 @@ static const char * const edac_caps[] = { struct dev_ch_attribute { struct device_attribute attr; - int channel; + unsigned int channel; }; #define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \ @@ -200,7 +200,7 @@ static ssize_t channel_dimm_label_show(struct device *dev, char *data) { struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); + unsigned int chan = to_channel(mattr); struct rank_info *rank = csrow->channels[chan]; /* if field has not been initialized, there is nothing to send */ @@ -216,7 +216,7 @@ static ssize_t channel_dimm_label_store(struct device *dev, const char *data, size_t count) { struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); + unsigned int chan = to_channel(mattr); struct rank_info *rank = csrow->channels[chan]; size_t copy_count = count; @@ -240,7 +240,7 @@ static ssize_t channel_ce_count_show(struct device *dev, struct device_attribute *mattr, char *data) { struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); + unsigned int chan = to_channel(mattr); struct rank_info *rank = csrow->channels[chan]; return sprintf(data, "%u\n", rank->ce_count); @@ -276,10 +276,7 @@ static const struct attribute_group *csrow_attr_groups[] = { static void csrow_attr_release(struct device *dev) { - struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); - - edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); - kfree(csrow); + /* release device with _edac_mc_free() */ } static const struct device_type csrow_attr_type = { @@ -414,14 +411,16 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, dev_set_name(&csrow->dev, "csrow%d", index); dev_set_drvdata(&csrow->dev, csrow); - edac_dbg(0, "creating (virtual) csrow node %s\n", - dev_name(&csrow->dev)); - err = device_add(&csrow->dev); - if (err) + if (err) { + edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev)); put_device(&csrow->dev); + return err; + } - return err; + edac_dbg(0, "device %s created\n", dev_name(&csrow->dev)); + + return 0; } /* Create a CSROW object under specifed edac_mc_device */ @@ -435,12 +434,8 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) if (!nr_pages_per_csrow(csrow)) continue; err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) { - edac_dbg(1, - "failure: create csrow objects for csrow %d\n", - i); + if (err < 0) goto error; - } } return 0; @@ -449,8 +444,7 @@ error: csrow = mci->csrows[i]; if (!nr_pages_per_csrow(csrow)) continue; - - device_del(&mci->csrows[i]->dev); + device_unregister(&mci->csrows[i]->dev); } return err; @@ -559,14 +553,8 @@ static ssize_t dimmdev_ce_count_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); u32 count; - int off; - - off = EDAC_DIMM_OFF(dimm->mci->layers, - dimm->mci->n_layers, - dimm->location[0], - dimm->location[1], - dimm->location[2]); - count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][off]; + + count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx]; return sprintf(data, "%u\n", count); } @@ -576,14 +564,8 @@ static ssize_t dimmdev_ue_count_show(struct device *dev, { struct dimm_info *dimm = to_dimm(dev); u32 count; - int off; - - off = EDAC_DIMM_OFF(dimm->mci->layers, - dimm->mci->n_layers, - dimm->location[0], - dimm->location[1], - dimm->location[2]); - count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][off]; + + count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][dimm->idx]; return sprintf(data, "%u\n", count); } @@ -622,10 +604,7 @@ static const struct attribute_group *dimm_attr_groups[] = { static void dimm_attr_release(struct device *dev) { - struct dimm_info *dimm = container_of(dev, struct dimm_info, dev); - - edac_dbg(1, "Releasing dimm device %s\n", dev_name(dev)); - kfree(dimm); + /* release device with _edac_mc_free() */ } static const struct device_type dimm_attr_type = { @@ -635,8 +614,7 @@ static const struct device_type dimm_attr_type = { /* Create a DIMM object under specifed memory controller device */ static int edac_create_dimm_object(struct mem_ctl_info *mci, - struct dimm_info *dimm, - int index) + struct dimm_info *dimm) { int err; dimm->mci = mci; @@ -646,19 +624,28 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, dimm->dev.parent = &mci->dev; if (mci->csbased) - dev_set_name(&dimm->dev, "rank%d", index); + dev_set_name(&dimm->dev, "rank%d", dimm->idx); else - dev_set_name(&dimm->dev, "dimm%d", index); + dev_set_name(&dimm->dev, "dimm%d", dimm->idx); dev_set_drvdata(&dimm->dev, dimm); pm_runtime_forbid(&mci->dev); err = device_add(&dimm->dev); - if (err) + if (err) { + edac_dbg(1, "failure: create device %s\n", dev_name(&dimm->dev)); put_device(&dimm->dev); + return err; + } - edac_dbg(0, "created rank/dimm device %s\n", dev_name(&dimm->dev)); + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + char location[80]; - return err; + edac_dimm_info_location(dimm, location, sizeof(location)); + edac_dbg(0, "device %s created at location %s\n", + dev_name(&dimm->dev), location); + } + + return 0; } /* @@ -899,10 +886,7 @@ static const struct attribute_group *mci_attr_groups[] = { static void mci_attr_release(struct device *dev) { - struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); - - edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); - kfree(mci); + /* release device with _edac_mc_free() */ } static const struct device_type mci_attr_type = { @@ -921,7 +905,8 @@ static const struct device_type mci_attr_type = { int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, const struct attribute_group **groups) { - int i, err; + struct dimm_info *dimm; + int err; /* get the /sys/devices/system/edac subsys reference */ mci->dev.type = &mci_attr_type; @@ -933,39 +918,26 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); - edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); put_device(&mci->dev); - goto out; + return err; } + edac_dbg(0, "device %s created\n", dev_name(&mci->dev)); + /* * Create the dimm/rank devices */ - for (i = 0; i < mci->tot_dimms; i++) { - struct dimm_info *dimm = mci->dimms[i]; + mci_for_each_dimm(mci, dimm) { /* Only expose populated DIMMs */ if (!dimm->nr_pages) continue; -#ifdef CONFIG_EDAC_DEBUG - edac_dbg(1, "creating dimm%d, located at ", i); - if (edac_debug_level >= 1) { - int lay; - for (lay = 0; lay < mci->n_layers; lay++) - printk(KERN_CONT "%s %d ", - edac_layer_name[mci->layers[lay].type], - dimm->location[lay]); - printk(KERN_CONT "\n"); - } -#endif - err = edac_create_dimm_object(mci, dimm, i); - if (err) { - edac_dbg(1, "failure: create dimm %d obj\n", i); + err = edac_create_dimm_object(mci, dimm); + if (err) goto fail_unregister_dimm; - } } #ifdef CONFIG_EDAC_LEGACY_SYSFS @@ -978,16 +950,12 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, return 0; fail_unregister_dimm: - for (i--; i >= 0; i--) { - struct dimm_info *dimm = mci->dimms[i]; - if (!dimm->nr_pages) - continue; - - device_unregister(&dimm->dev); + mci_for_each_dimm(mci, dimm) { + if (device_is_registered(&dimm->dev)) + device_unregister(&dimm->dev); } device_unregister(&mci->dev); -out: return err; } @@ -996,7 +964,7 @@ out: */ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) { - int i; + struct dimm_info *dimm; edac_dbg(0, "\n"); @@ -1007,18 +975,17 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) edac_delete_csrow_objects(mci); #endif - for (i = 0; i < mci->tot_dimms; i++) { - struct dimm_info *dimm = mci->dimms[i]; + mci_for_each_dimm(mci, dimm) { if (dimm->nr_pages == 0) continue; - edac_dbg(0, "removing device %s\n", dev_name(&dimm->dev)); + edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev)); device_unregister(&dimm->dev); } } void edac_unregister_sysfs(struct mem_ctl_info *mci) { - edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); + edac_dbg(1, "unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); } @@ -1029,7 +996,7 @@ static void mc_attr_release(struct device *dev) * parent device, used to create the /sys/devices/mc sysfs node. * So, there are no attributes on it. */ - edac_dbg(1, "Releasing device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(dev); } @@ -1044,10 +1011,8 @@ int __init edac_mc_sysfs_init(void) int err; mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); - if (!mci_pdev) { - err = -ENOMEM; - goto out; - } + if (!mci_pdev) + return -ENOMEM; mci_pdev->bus = edac_get_sysfs_subsys(); mci_pdev->type = &mc_attr_type; @@ -1055,17 +1020,15 @@ int __init edac_mc_sysfs_init(void) dev_set_name(mci_pdev, "mc"); err = device_add(mci_pdev); - if (err < 0) - goto out_put_device; + if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev)); + put_device(mci_pdev); + return err; + } edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); return 0; - - out_put_device: - put_device(mci_pdev); - out: - return err; } void edac_mc_sysfs_exit(void) diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index b2f59ee76c22..388427d378b1 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -82,6 +82,8 @@ void edac_debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value); void edac_debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value); +void edac_debugfs_create_x32(const char *name, umode_t mode, + struct dentry *parent, u32 *value); #else static inline void edac_debugfs_init(void) { } static inline void edac_debugfs_exit(void) { } @@ -96,6 +98,8 @@ static inline void edac_debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { } static inline void edac_debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { } +static inline void edac_debugfs_create_x32(const char *name, umode_t mode, + struct dentry *parent, u32 *value) { } #endif /* diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 7f19f1c672c3..b99080d8a10c 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -21,14 +21,22 @@ struct ghes_edac_pvt { struct mem_ctl_info *mci; /* Buffers for the error handling routine */ - char detail_location[240]; - char other_detail[160]; + char other_detail[400]; char msg[80]; }; -static atomic_t ghes_init = ATOMIC_INIT(0); +static refcount_t ghes_refcount = REFCOUNT_INIT(0); + +/* + * Access to ghes_pvt must be protected by ghes_lock. The spinlock + * also provides the necessary (implicit) memory barrier for the SMP + * case to make the pointer visible on another CPU. + */ static struct ghes_edac_pvt *ghes_pvt; +/* GHES registration mutex */ +static DEFINE_MUTEX(ghes_reg_mutex); + /* * Sync with other, potentially concurrent callers of * ghes_edac_report_mem_error(). We don't know what the @@ -68,7 +76,7 @@ struct memdev_dmi_entry { struct ghes_edac_dimm_fill { struct mem_ctl_info *mci; - unsigned count; + unsigned int count; }; static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) @@ -79,15 +87,15 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) (*num_dimm)++; } -static int get_dimm_smbios_index(u16 handle) +static int get_dimm_smbios_index(struct mem_ctl_info *mci, u16 handle) { - struct mem_ctl_info *mci = ghes_pvt->mci; - int i; + struct dimm_info *dimm; - for (i = 0; i < mci->tot_dimms; i++) { - if (mci->dimms[i]->smbios_handle == handle) - return i; + mci_for_each_dimm(mci, dimm) { + if (dimm->smbios_handle == handle) + return dimm->idx; } + return -1; } @@ -98,9 +106,7 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) if (dh->type == DMI_ENTRY_MEM_DEVICE) { struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; - struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, - dimm_fill->count, 0, 0); + struct dimm_info *dimm = edac_get_dimm(mci, dimm_fill->count, 0, 0); u16 rdr_mask = BIT(7) | BIT(13); if (entry->size == 0xffff) { @@ -198,13 +204,9 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) enum hw_event_mc_err_type type; struct edac_raw_error_desc *e; struct mem_ctl_info *mci; - struct ghes_edac_pvt *pvt = ghes_pvt; + struct ghes_edac_pvt *pvt; unsigned long flags; char *p; - u8 grain_bits; - - if (!pvt) - return; /* * We can do the locking below because GHES defers error processing @@ -216,12 +218,17 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) spin_lock_irqsave(&ghes_lock, flags); + pvt = ghes_pvt; + if (!pvt) + goto unlock; + mci = pvt->mci; e = &mci->error_desc; /* Cleans the error report buffer */ memset(e, 0, sizeof (*e)); e->error_count = 1; + e->grain = 1; strcpy(e->label, "unknown label"); e->msg = pvt->msg; e->other_detail = pvt->other_detail; @@ -311,13 +318,13 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) /* Error address */ if (mem_err->validation_bits & CPER_MEM_VALID_PA) { - e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; - e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; + e->page_frame_number = PHYS_PFN(mem_err->physical_addr); + e->offset_in_page = offset_in_page(mem_err->physical_addr); } /* Error grain */ if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK) - e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + e->grain = ~mem_err->physical_addr_mask + 1; /* Memory error location, mapped on e->location */ p = e->location; @@ -348,7 +355,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) p += sprintf(p, "DIMM DMI handle: 0x%.4x ", mem_err->mem_dev_handle); - index = get_dimm_smbios_index(mem_err->mem_dev_handle); + index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle); if (index >= 0) { e->top_layer = index; e->enable_per_layer_report = true; @@ -360,6 +367,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) /* All other fields are mapped on e->other_detail */ p = pvt->other_detail; + p += snprintf(p, sizeof(pvt->other_detail), + "APEI location: %s ", e->location); if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { u64 status = mem_err->error_status; @@ -433,16 +442,9 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) if (p > pvt->other_detail) *(p - 1) = '\0'; - /* Generate the trace event */ - grain_bits = fls_long(e->grain); - snprintf(pvt->detail_location, sizeof(pvt->detail_location), - "APEI location: %s %s", e->location, e->other_detail); - trace_mc_event(type, e->msg, e->label, e->error_count, - mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, - (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, - grain_bits, e->syndrome, pvt->detail_location); - edac_raw_mc_handle_error(type, mci, e); + +unlock: spin_unlock_irqrestore(&ghes_lock, flags); } @@ -457,10 +459,12 @@ static struct acpi_platform_list plat_list[] = { int ghes_edac_register(struct ghes *ghes, struct device *dev) { bool fake = false; - int rc, num_dimm = 0; + int rc = 0, num_dimm = 0; struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt; struct edac_mc_layer layers[1]; struct ghes_edac_dimm_fill dimm_fill; + unsigned long flags; int idx = -1; if (IS_ENABLED(CONFIG_X86)) { @@ -472,11 +476,14 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) idx = 0; } + /* finish another registration/unregistration instance first */ + mutex_lock(&ghes_reg_mutex); + /* * We have only one logical memory controller to which all DIMMs belong. */ - if (atomic_inc_return(&ghes_init) > 1) - return 0; + if (refcount_inc_not_zero(&ghes_refcount)) + goto unlock; /* Get the number of DIMMs */ dmi_walk(ghes_edac_count_dimms, &num_dimm); @@ -494,12 +501,13 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt)); if (!mci) { pr_info("Can't allocate memory for EDAC data\n"); - return -ENOMEM; + rc = -ENOMEM; + goto unlock; } - ghes_pvt = mci->pvt_info; - ghes_pvt->ghes = ghes; - ghes_pvt->mci = mci; + pvt = mci->pvt_info; + pvt->ghes = ghes; + pvt->mci = mci; mci->pdev = dev; mci->mtype_cap = MEM_FLAG_EMPTY; @@ -527,8 +535,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) dimm_fill.mci = mci; dmi_walk(ghes_edac_dmidecode, &dimm_fill); } else { - struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, 0, 0, 0); + struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0); dimm->nr_pages = 1; dimm->grain = 128; @@ -541,19 +548,48 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) if (rc < 0) { pr_info("Can't register at EDAC core\n"); edac_mc_free(mci); - return -ENODEV; + rc = -ENODEV; + goto unlock; } - return 0; + + spin_lock_irqsave(&ghes_lock, flags); + ghes_pvt = pvt; + spin_unlock_irqrestore(&ghes_lock, flags); + + /* only set on success */ + refcount_set(&ghes_refcount, 1); + +unlock: + mutex_unlock(&ghes_reg_mutex); + + return rc; } void ghes_edac_unregister(struct ghes *ghes) { struct mem_ctl_info *mci; + unsigned long flags; - if (!ghes_pvt) - return; + mutex_lock(&ghes_reg_mutex); + + if (!refcount_dec_and_test(&ghes_refcount)) + goto unlock; + + /* + * Wait for the irq handler being finished. + */ + spin_lock_irqsave(&ghes_lock, flags); + mci = ghes_pvt ? ghes_pvt->mci : NULL; + ghes_pvt = NULL; + spin_unlock_irqrestore(&ghes_lock, flags); + + if (!mci) + goto unlock; + + mci = edac_mc_del_mc(mci->pdev); + if (mci) + edac_mc_free(mci); - mci = ghes_pvt->mci; - edac_mc_del_mc(mci->pdev); - edac_mc_free(mci); +unlock: + mutex_unlock(&ghes_reg_mutex); } diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 83392f2841de..059eccf0582b 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -123,9 +123,9 @@ static int i10nm_get_all_munits(void) } static const struct x86_cpu_id i10nm_cpuids[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_TREMONT_X, 0, 0 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_TREMONT_D, 0, 0 }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ICELAKE_X, 0, 0 }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ICELAKE_XEON_D, 0, 0 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ICELAKE_D, 0, 0 }, { } }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); @@ -154,8 +154,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci) ndimms = 0; for (j = 0; j < I10NM_NUM_DIMMS; j++) { - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, i, j, 0); + dimm = edac_get_dimm(mci, i, j, 0); mtr = I10NM_GET_DIMMMTR(imc, i, j); mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j); edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n", diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index f564a4a8a4ae..5c1eea96230c 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -324,7 +324,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) pci_read_config_dword(pdev, I3000_MCHBAR, (u32 *) & mchbar); mchbar &= I3000_MCHBAR_MASK; - window = ioremap_nocache(mchbar, I3000_MMR_WINDOW_SIZE); + window = ioremap(mchbar, I3000_MMR_WINDOW_SIZE); if (!window) { printk(KERN_ERR "i3000: cannot map mmio space at 0x%lx\n", mchbar); diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 299b441647cd..a8988db6d423 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -280,7 +280,7 @@ static void __iomem *i3200_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, I3200_MMR_WINDOW_SIZE); if (!window) printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); @@ -392,8 +392,7 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) unsigned long nr_pages; for (j = 0; j < nr_channels; j++) { - struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, i, j, 0); + struct dimm_info *dimm = edac_get_dimm(mci, i, j, 0); nr_pages = drb_to_nr_pages(drbs, stacked, j, i); if (nr_pages == 0) diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 078a7351bf05..1a6f69c859ab 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1275,9 +1275,8 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) if (!MTR_DIMMS_PRESENT(mtr)) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, - channel / MAX_BRANCHES, - channel % MAX_BRANCHES, slot); + dimm = edac_get_dimm(mci, channel / MAX_BRANCHES, + channel % MAX_BRANCHES, slot); csrow_megs = pvt->dimm_info[slot][channel].megabytes; dimm->grain = 8; diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index b506eef6b146..191aa7c19ded 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -259,11 +259,6 @@ static inline u32 i5100_nrecmemb_ras(u32 a) return a & ((1 << 16) - 1); } -static inline u32 i5100_redmemb_ecc_locator(u32 a) -{ - return a & ((1 << 18) - 1); -} - static inline u32 i5100_recmema_merr(u32 a) { return i5100_nrecmema_merr(a); @@ -417,7 +412,8 @@ static const char *i5100_err_msg(unsigned err) } /* convert csrow index into a rank (per channel -- 0..5) */ -static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow) +static unsigned int i5100_csrow_to_rank(const struct mem_ctl_info *mci, + unsigned int csrow) { const struct i5100_priv *priv = mci->pvt_info; @@ -425,7 +421,8 @@ static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow) } /* convert csrow index into a channel (0..1) */ -static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow) +static unsigned int i5100_csrow_to_chan(const struct mem_ctl_info *mci, + unsigned int csrow) { const struct i5100_priv *priv = mci->pvt_info; @@ -484,7 +481,6 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan, u32 dw; u32 dw2; unsigned syndrome = 0; - unsigned ecc_loc = 0; unsigned merr; unsigned bank; unsigned rank; @@ -497,7 +493,6 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan, pci_read_config_dword(pdev, I5100_REDMEMA, &dw2); syndrome = dw2; pci_read_config_dword(pdev, I5100_REDMEMB, &dw2); - ecc_loc = i5100_redmemb_ecc_locator(dw2); } if (i5100_validlog_recmemvalid(dw)) { @@ -653,11 +648,11 @@ static struct pci_dev *pci_get_device_func(unsigned vendor, return ret; } -static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow) +static unsigned long i5100_npages(struct mem_ctl_info *mci, unsigned int csrow) { struct i5100_priv *priv = mci->pvt_info; - const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow); - const unsigned chan = i5100_csrow_to_chan(mci, csrow); + const unsigned int chan_rank = i5100_csrow_to_rank(mci, csrow); + const unsigned int chan = i5100_csrow_to_chan(mci, csrow); unsigned addr_lines; /* dimm present? */ @@ -711,7 +706,6 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci, { struct i5100_priv *priv = mci->pvt_info; u16 w; - unsigned long et; pci_read_config_word(priv->mc, I5100_SPDDATA, &w); if (i5100_spddata_busy(w)) @@ -722,7 +716,6 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci, 0, 0)); /* wait up to 100ms */ - et = jiffies + HZ / 10; udelay(100); while (1) { pci_read_config_word(priv->mc, I5100_SPDDATA, &w); @@ -846,21 +839,17 @@ static void i5100_init_interleaving(struct pci_dev *pdev, static void i5100_init_csrows(struct mem_ctl_info *mci) { - int i; struct i5100_priv *priv = mci->pvt_info; + struct dimm_info *dimm; - for (i = 0; i < mci->tot_dimms; i++) { - struct dimm_info *dimm; - const unsigned long npages = i5100_npages(mci, i); - const unsigned chan = i5100_csrow_to_chan(mci, i); - const unsigned rank = i5100_csrow_to_rank(mci, i); + mci_for_each_dimm(mci, dimm) { + const unsigned long npages = i5100_npages(mci, dimm->idx); + const unsigned int chan = i5100_csrow_to_chan(mci, dimm->idx); + const unsigned int rank = i5100_csrow_to_rank(mci, dimm->idx); if (!npages) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, - chan, rank, 0); - dimm->nr_pages = npages; dimm->grain = 32; dimm->dtype = (priv->mtr[chan][rank].width == 4) ? diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 6f8bcdb9256a..f131c05ade9f 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -548,8 +548,8 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, ras = nrec_ras(info); cas = nrec_cas(info); - edac_dbg(0, "\t\tDIMM= %d Channels= %d,%d (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", - rank, channel, channel + 1, branch >> 1, bank, + edac_dbg(0, "\t\t%s DIMM= %d Channels= %d,%d (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", + type, rank, channel, channel + 1, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas); /* Only 1 bit will be on */ @@ -1054,8 +1054,6 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) u32 actual_tolm; u16 limit; int slot_row; - int maxch; - int maxdimmperch; int way0, way1; pvt = mci->pvt_info; @@ -1065,9 +1063,6 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), &pvt->u.ambase_top); - maxdimmperch = pvt->maxdimmperch; - maxch = pvt->maxch; - edac_dbg(2, "AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); @@ -1170,17 +1165,13 @@ static int i5400_init_dimms(struct mem_ctl_info *mci) { struct i5400_pvt *pvt; struct dimm_info *dimm; - int ndimms, channel_count; - int max_dimms; + int ndimms; int mtr; int size_mb; int channel, slot; pvt = mci->pvt_info; - channel_count = pvt->maxch; - max_dimms = pvt->maxdimmperch; - ndimms = 0; /* @@ -1196,8 +1187,7 @@ static int i5400_init_dimms(struct mem_ctl_info *mci) if (!MTR_DIMMS_PRESENT(mtr)) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, - channel / 2, channel % 2, slot); + dimm = edac_get_dimm(mci, channel / 2, channel % 2, slot); size_mb = pvt->dimm_info[slot][channel].megabytes; diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 7bf910d54d11..2e9bbe56cde9 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -580,7 +580,7 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci) * @ch: Channel number within the branch (0 or 1) * @branch: Branch number (0 or 1) * @dinfo: Pointer to DIMM info where dimm size is stored - * @p_csrow: Pointer to the struct csrow_info that corresponds to that element + * @dimm: Pointer to the struct dimm_info that corresponds to that element */ static int decode_mtr(struct i7300_pvt *pvt, int slot, int ch, int branch, @@ -794,8 +794,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) for (ch = 0; ch < max_channel; ch++) { int channel = to_channel(ch, branch); - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, branch, ch, slot); + dimm = edac_get_dimm(mci, branch, ch, slot); dinfo = &pvt->dimm_info[slot][channel]; @@ -817,7 +816,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) /** * decode_mir() - Decodes Memory Interleave Register (MIR) info - * @int mir_no: number of the MIR register to decode + * @mir_no: number of the MIR register to decode * @mir: array with the MIR data cached on the driver */ static void decode_mir(int mir_no, u16 mir[MAX_MIR]) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index a71cca6eeb33..b3135b208f9a 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -585,8 +585,7 @@ static int get_dimm_config(struct mem_ctl_info *mci) if (!DIMM_PRESENT(dimm_dod[j])) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, - i, j, 0); + dimm = edac_get_dimm(mci, i, j, 0); banks = numbank(MC_DOD_NUMBANK(dimm_dod[j])); ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j])); rows = numrow(MC_DOD_NUMROW(dimm_dod[j])); diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 7c6a2d4d2360..6be99e0d850d 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -485,7 +485,7 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) goto fail0; } mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */ - mch_window = ioremap_nocache(mchbar, 0x1000); + mch_window = ioremap(mchbar, 0x1000); if (!mch_window) { edac_dbg(3, "error ioremapping MCHBAR!\n"); goto fail0; diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index d26300f9cb07..d68346a8e141 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -357,7 +357,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap_nocache(u.mchbar, IE31200_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE); if (!window) ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); @@ -490,9 +490,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) if (dimm_info[j][i].dual_rank) { nr_pages = nr_pages / 2; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, (i * 2) + 1, - j, 0); + dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0); dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* just a guess */ @@ -503,8 +501,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) dimm->dtype = DEV_UNKNOWN; dimm->edac_mode = EDAC_UNKNOWN; } - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, i * 2, j, 0); + dimm = edac_get_dimm(mci, i * 2, j, 0); dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* same guess */ diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index ea622c6f3a39..ea980c556f2e 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -6,7 +6,7 @@ #include "mce_amd.h" -static struct amd_decoder_ops *fam_ops; +static struct amd_decoder_ops fam_ops; static u8 xec_mask = 0xf; @@ -175,6 +175,33 @@ static const char * const smca_ls_mce_desc[] = { "L2 Fill Data error", }; +static const char * const smca_ls2_mce_desc[] = { + "An ECC error was detected on a data cache read by a probe or victimization", + "An ECC error or L2 poison was detected on a data cache read by a load", + "An ECC error was detected on a data cache read-modify-write by a store", + "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization", + "An ECC error or poison bit mismatch was detected on a tag read by a load", + "An ECC error or poison bit mismatch was detected on a tag read by a store", + "An ECC error was detected on an EMEM read by a load", + "An ECC error was detected on an EMEM read-modify-write by a store", + "A parity error was detected in an L1 TLB entry by any access", + "A parity error was detected in an L2 TLB entry by any access", + "A parity error was detected in a PWC entry by any access", + "A parity error was detected in an STQ entry by any access", + "A parity error was detected in an LDQ entry by any access", + "A parity error was detected in a MAB entry by any access", + "A parity error was detected in an SCB entry state field by any access", + "A parity error was detected in an SCB entry address field by any access", + "A parity error was detected in an SCB entry data field by any access", + "A parity error was detected in a WCB entry by any access", + "A poisoned line was detected in an SCB entry by any access", + "A SystemReadDataError error was reported on read data returned from L2 for a load", + "A SystemReadDataError error was reported on read data returned from L2 for an SCB store", + "A SystemReadDataError error was reported on read data returned from L2 for a WCB store", + "A hardware assertion error was reported", + "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access", +}; + static const char * const smca_if_mce_desc[] = { "Op Cache Microtag Probe Port Parity Error", "IC Microtag or Full Tag Multi-hit Error", @@ -378,6 +405,7 @@ struct smca_mce_desc { static struct smca_mce_desc smca_mce_descs[] = { [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, + [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) }, [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, @@ -555,7 +583,7 @@ static void decode_mc0_mce(struct mce *m) : (xec ? "multimatch" : "parity"))); return; } - } else if (fam_ops->mc0_mce(ec, xec)) + } else if (fam_ops.mc0_mce(ec, xec)) ; else pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); @@ -669,7 +697,7 @@ static void decode_mc1_mce(struct mce *m) pr_cont("Hardware Assert.\n"); else goto wrong_mc1_mce; - } else if (fam_ops->mc1_mce(ec, xec)) + } else if (fam_ops.mc1_mce(ec, xec)) ; else goto wrong_mc1_mce; @@ -803,7 +831,7 @@ static void decode_mc2_mce(struct mce *m) pr_emerg(HW_ERR "MC2 Error: "); - if (!fam_ops->mc2_mce(ec, xec)) + if (!fam_ops.mc2_mce(ec, xec)) pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); } @@ -1102,7 +1130,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (m->tsc) pr_emerg(HW_ERR "TSC: %llu\n", m->tsc); - if (!fam_ops) + /* Doesn't matter which member to test. */ + if (!fam_ops.mc0_mce) goto err_code; switch (m->bank) { @@ -1157,80 +1186,73 @@ static int __init mce_amd_init(void) c->x86_vendor != X86_VENDOR_HYGON) return -ENODEV; - fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); - if (!fam_ops) - return -ENOMEM; + if (boot_cpu_has(X86_FEATURE_SMCA)) { + xec_mask = 0x3f; + goto out; + } switch (c->x86) { case 0xf: - fam_ops->mc0_mce = k8_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = k8_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x10: - fam_ops->mc0_mce = f10h_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = f10h_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x11: - fam_ops->mc0_mce = k8_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = k8_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x12: - fam_ops->mc0_mce = f12h_mc0_mce; - fam_ops->mc1_mce = k8_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = f12h_mc0_mce; + fam_ops.mc1_mce = k8_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x14: - fam_ops->mc0_mce = cat_mc0_mce; - fam_ops->mc1_mce = cat_mc1_mce; - fam_ops->mc2_mce = k8_mc2_mce; + fam_ops.mc0_mce = cat_mc0_mce; + fam_ops.mc1_mce = cat_mc1_mce; + fam_ops.mc2_mce = k8_mc2_mce; break; case 0x15: xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f; - fam_ops->mc0_mce = f15h_mc0_mce; - fam_ops->mc1_mce = f15h_mc1_mce; - fam_ops->mc2_mce = f15h_mc2_mce; + fam_ops.mc0_mce = f15h_mc0_mce; + fam_ops.mc1_mce = f15h_mc1_mce; + fam_ops.mc2_mce = f15h_mc2_mce; break; case 0x16: xec_mask = 0x1f; - fam_ops->mc0_mce = cat_mc0_mce; - fam_ops->mc1_mce = cat_mc1_mce; - fam_ops->mc2_mce = f16h_mc2_mce; + fam_ops.mc0_mce = cat_mc0_mce; + fam_ops.mc1_mce = cat_mc1_mce; + fam_ops.mc2_mce = f16h_mc2_mce; break; case 0x17: case 0x18: - xec_mask = 0x3f; - if (!boot_cpu_has(X86_FEATURE_SMCA)) { - printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n"); - goto err_out; - } - break; + pr_warn("Decoding supported only on Scalable MCA processors.\n"); + return -EINVAL; default: printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); - goto err_out; + return -EINVAL; } +out: pr_info("MCE: In-kernel MCE decoding enabled.\n"); mce_register_decode_chain(&amd_mce_dec_nb); return 0; - -err_out: - kfree(fam_ops); - fam_ops = NULL; - return -EINVAL; } early_initcall(mce_amd_init); @@ -1238,7 +1260,6 @@ early_initcall(mce_amd_init); static void __exit mce_amd_exit(void) { mce_unregister_decode_chain(&amd_mce_dec_nb); - kfree(fam_ops); } MODULE_DESCRIPTION("AMD MCE decoder"); diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index ca25f8fe57ef..933f7722b893 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -260,11 +260,14 @@ static u64 get_sideband_reg_base_addr(void) } } +#define DNV_MCHBAR_SIZE 0x8000 +#define DNV_SB_PORT_SIZE 0x10000 static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) { struct pci_dev *pdev; char *base; u64 addr; + unsigned long size; if (op == 4) { pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); @@ -279,15 +282,17 @@ static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *na addr = get_mem_ctrl_hub_base_addr(); if (!addr) return -ENODEV; + size = DNV_MCHBAR_SIZE; } else { /* MMIO via sideband register base address */ addr = get_sideband_reg_base_addr(); if (!addr) return -ENODEV; addr += (port << 16); + size = DNV_SB_PORT_SIZE; } - base = ioremap((resource_size_t)addr, 0x10000); + base = ioremap((resource_size_t)addr, size); if (!base) return -ENODEV; @@ -1226,7 +1231,7 @@ static void apl_get_dimm_config(struct mem_ctl_info *mci) if (!(chan_mask & BIT(i))) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, 0, 0); + dimm = edac_get_dimm(mci, i, 0, 0); if (!dimm) { edac_dbg(0, "No allocated DIMM for channel %d\n", i); continue; @@ -1306,7 +1311,7 @@ static void dnv_get_dimm_config(struct mem_ctl_info *mci) if (!ranks_of_dimm[j]) continue; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0); + dimm = edac_get_dimm(mci, i, j, 0); if (!dimm) { edac_dbg(0, "No allocated DIMM for channel %d DIMM %d\n", i, j); continue; @@ -1533,7 +1538,7 @@ static struct dunit_ops dnv_ops = { static const struct x86_cpu_id pnd2_cpuids[] = { { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X, 0, (kernel_ulong_t)&dnv_ops }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_D, 0, (kernel_ulong_t)&dnv_ops }, { } }; MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 37746b045e18..4957e8ee1879 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -254,18 +254,20 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { * FIXME: Implement the error count reads directly */ -static const u32 correrrcnt[] = { - 0x104, 0x108, 0x10c, 0x110, -}; - #define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31) #define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30) #define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15) #define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14) +#if 0 /* Currently unused*/ +static const u32 correrrcnt[] = { + 0x104, 0x108, 0x10c, 0x110, +}; + static const u32 correrrthrsld[] = { 0x11c, 0x120, 0x124, 0x128, }; +#endif #define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30) #define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14) @@ -1340,7 +1342,7 @@ static void knl_show_mc_route(u32 reg, char *s) */ static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes) { - u64 sad_base, sad_size, sad_limit = 0; + u64 sad_base, sad_limit = 0; u64 tad_base, tad_size, tad_limit, tad_deadspace, tad_livespace; int sad_rule = 0; int tad_rule = 0; @@ -1427,7 +1429,6 @@ static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes) edram_only = KNL_EDRAM_ONLY(dram_rule); sad_limit = pvt->info.sad_limit(dram_rule)+1; - sad_size = sad_limit - sad_base; pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[sad_rule], &interleave_reg); @@ -1620,7 +1621,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, } for (j = 0; j < max_dimms_per_channel; j++) { - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0); + dimm = edac_get_dimm(mci, i, j, 0); if (pvt->info.type == KNIGHTS_LANDING) { pci_read_config_dword(pvt->knl.pci_channel[i], knl_mtr_reg, &mtr); @@ -2952,7 +2953,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; enum hw_event_mc_err_type tp_event; - char *type, *optype, msg[256]; + char *optype, msg[256]; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); @@ -2981,14 +2982,11 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, if (uncorrected_error) { core_err_cnt = 1; if (ripv) { - type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; } else { - type = "NON_FATAL"; tp_event = HW_EVENT_ERR_UNCORRECTED; } } else { - type = "CORRECTED"; tp_event = HW_EVENT_ERR_CORRECTED; } @@ -3200,7 +3198,6 @@ static struct notifier_block sbridge_mce_dec = { static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) { struct mem_ctl_info *mci = sbridge_dev->mci; - struct sbridge_pvt *pvt; if (unlikely(!mci || !mci->pvt_info)) { edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev); @@ -3209,8 +3206,6 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) return; } - pvt = mci->pvt_info; - edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &sbridge_dev->pdev[0]->dev); @@ -3429,7 +3424,7 @@ static const struct x86_cpu_id sbridge_cpuids[] = { INTEL_CPU_FAM6(IVYBRIDGE_X, pci_dev_descr_ibridge_table), INTEL_CPU_FAM6(HASWELL_X, pci_dev_descr_haswell_table), INTEL_CPU_FAM6(BROADWELL_X, pci_dev_descr_broadwell_table), - INTEL_CPU_FAM6(BROADWELL_XEON_D, pci_dev_descr_broadwell_table), + INTEL_CPU_FAM6(BROADWELL_D, pci_dev_descr_broadwell_table), INTEL_CPU_FAM6(XEON_PHI_KNL, pci_dev_descr_knl_table), INTEL_CPU_FAM6(XEON_PHI_KNM, pci_dev_descr_knl_table), { } diff --git a/drivers/edac/sifive_edac.c b/drivers/edac/sifive_edac.c index 413cdb4a591d..3a3dcb14ed99 100644 --- a/drivers/edac/sifive_edac.c +++ b/drivers/edac/sifive_edac.c @@ -10,7 +10,7 @@ #include <linux/edac.h> #include <linux/platform_device.h> #include "edac_module.h" -#include <asm/sifive_l2_cache.h> +#include <soc/sifive/sifive_l2_cache.h> #define DRVNAME "sifive_edac" @@ -54,8 +54,8 @@ static int ecc_register(struct platform_device *pdev) p->dci = edac_device_alloc_ctl_info(0, "sifive_ecc", 1, "sifive_ecc", 1, 1, NULL, 0, edac_device_alloc_index()); - if (IS_ERR(p->dci)) - return PTR_ERR(p->dci); + if (!p->dci) + return -ENOMEM; p->dci->dev = &pdev->dev; p->dci->mod_name = "Sifive ECC Manager"; diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 0fcf3785e8f3..83545b4facb7 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -46,7 +46,8 @@ static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx) } enum munittype { - CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD + CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD, + ERRCHAN0, ERRCHAN1, ERRCHAN2, }; struct munit { @@ -68,6 +69,9 @@ static const struct munit skx_all_munits[] = { { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 }, { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 }, { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 }, + { 0x2043, { PCI_DEVFN(10, 3), PCI_DEVFN(12, 3) }, 2, 2, ERRCHAN0 }, + { 0x2047, { PCI_DEVFN(10, 7), PCI_DEVFN(12, 7) }, 2, 2, ERRCHAN1 }, + { 0x204b, { PCI_DEVFN(11, 3), PCI_DEVFN(13, 3) }, 2, 2, ERRCHAN2 }, { 0x208e, { }, 1, 0, SAD }, { } }; @@ -104,10 +108,18 @@ static int get_all_munits(const struct munit *m) } switch (m->mtype) { - case CHAN0: case CHAN1: case CHAN2: + case CHAN0: + case CHAN1: + case CHAN2: pci_dev_get(pdev); d->imc[i].chan[m->mtype].cdev = pdev; break; + case ERRCHAN0: + case ERRCHAN1: + case ERRCHAN2: + pci_dev_get(pdev); + d->imc[i].chan[m->mtype - ERRCHAN0].edev = pdev; + break; case SAD_ALL: pci_dev_get(pdev); d->sad_all = pdev; @@ -177,8 +189,7 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci) pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg); for (j = 0; j < SKX_NUM_DIMMS; j++) { - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, - mci->n_layers, i, j, 0); + dimm = edac_get_dimm(mci, i, j, 0); pci_read_config_dword(imc->chan[i].cdev, 0x80 + 4 * j, &mtr); if (IS_DIMM_PRESENT(mtr)) { @@ -216,6 +227,39 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci) #define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0) #define SKX_ILV_TARGET(tgt) ((tgt) & 7) +static void skx_show_retry_rd_err_log(struct decoded_addr *res, + char *msg, int len) +{ + u32 log0, log1, log2, log3, log4; + u32 corr0, corr1, corr2, corr3; + struct pci_dev *edev; + int n; + + edev = res->dev->imc[res->imc].chan[res->channel].edev; + + pci_read_config_dword(edev, 0x154, &log0); + pci_read_config_dword(edev, 0x148, &log1); + pci_read_config_dword(edev, 0x150, &log2); + pci_read_config_dword(edev, 0x15c, &log3); + pci_read_config_dword(edev, 0x114, &log4); + + n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x]", + log0, log1, log2, log3, log4); + + pci_read_config_dword(edev, 0x104, &corr0); + pci_read_config_dword(edev, 0x108, &corr1); + pci_read_config_dword(edev, 0x10c, &corr2); + pci_read_config_dword(edev, 0x110, &corr3); + + if (len - n > 0) + snprintf(msg + n, len - n, + " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]", + corr0 & 0xffff, corr0 >> 16, + corr1 & 0xffff, corr1 >> 16, + corr2 & 0xffff, corr2 >> 16, + corr3 & 0xffff, corr3 >> 16); +} + static bool skx_sad_decode(struct decoded_addr *res) { struct skx_dev *d = list_first_entry(skx_edac_list, typeof(*d), list); @@ -659,7 +703,7 @@ static int __init skx_init(void) } } - skx_set_decode(skx_decode); + skx_set_decode(skx_decode, skx_show_retry_rd_err_log); if (nvdimm_count && skx_adxl_get() == -ENODEV) skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index d8ff63d91b86..99bbaf629b8d 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -37,6 +37,7 @@ static char *adxl_msg; static char skx_msg[MSG_SIZE]; static skx_decode_f skx_decode; +static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); @@ -100,6 +101,7 @@ void __exit skx_adxl_put(void) static bool skx_adxl_decode(struct decoded_addr *res) { + struct skx_dev *d; int i, len = 0; if (res->addr >= skx_tohm || (res->addr >= skx_tolm && @@ -118,6 +120,24 @@ static bool skx_adxl_decode(struct decoded_addr *res) res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + if (res->imc > NUM_IMC - 1) { + skx_printk(KERN_ERR, "Bad imc %d\n", res->imc); + return false; + } + + list_for_each_entry(d, &dev_edac_list, list) { + if (d->imc[0].src_id == res->socket) { + res->dev = d; + break; + } + } + + if (!res->dev) { + skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", + res->socket, res->imc); + return false; + } + for (i = 0; i < adxl_component_count; i++) { if (adxl_values[i] == ~0x0ull) continue; @@ -131,9 +151,10 @@ static bool skx_adxl_decode(struct decoded_addr *res) return true; } -void skx_set_decode(skx_decode_f decode) +void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { skx_decode = decode; + skx_show_retry_rd_err_log = show_retry_log; } int skx_get_src_id(struct skx_dev *d, int off, u8 *id) @@ -235,7 +256,7 @@ int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL); if (!pdev) { - skx_printk(KERN_ERR, "Can't get tolm/tohm\n"); + edac_dbg(2, "Can't get tolm/tohm\n"); return -ENODEV; } @@ -452,34 +473,17 @@ static void skx_unregister_mci(struct skx_imc *imc) edac_mc_free(mci); } -static struct mem_ctl_info *get_mci(int src_id, int lmc) -{ - struct skx_dev *d; - - if (lmc > NUM_IMC - 1) { - skx_printk(KERN_ERR, "Bad lmc %d\n", lmc); - return NULL; - } - - list_for_each_entry(d, &dev_edac_list, list) { - if (d->imc[0].src_id == src_id) - return d->imc[lmc].mci; - } - - skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc); - return NULL; -} - static void skx_mce_output_error(struct mem_ctl_info *mci, const struct mce *m, struct decoded_addr *res) { enum hw_event_mc_err_type tp_event; - char *type, *optype; + char *optype; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); bool recoverable; + int len; u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); u32 mscod = GET_BITFIELD(m->status, 16, 31); u32 errcode = GET_BITFIELD(m->status, 0, 15); @@ -490,14 +494,11 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, if (uncorrected_error) { core_err_cnt = 1; if (ripv) { - type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; } else { - type = "NON_FATAL"; tp_event = HW_EVENT_ERR_UNCORRECTED; } } else { - type = "CORRECTED"; tp_event = HW_EVENT_ERR_CORRECTED; } @@ -539,12 +540,12 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, } } if (adxl_component_count) { - snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", + len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, adxl_msg); } else { - snprintf(skx_msg, MSG_SIZE, + len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", @@ -553,6 +554,9 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, res->bank_group, res->bank_address, res->row, res->column); } + if (skx_show_retry_rd_err_log) + skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len); + edac_dbg(0, "%s\n", skx_msg); /* Call the helper to output message */ @@ -583,15 +587,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (adxl_component_count) { if (!skx_adxl_decode(&res)) return NOTIFY_DONE; - - mci = get_mci(res.socket, res.imc); - } else { - if (!skx_decode || !skx_decode(&res)) - return NOTIFY_DONE; - - mci = res.dev->imc[res.imc].mci; + } else if (!skx_decode || !skx_decode(&res)) { + return NOTIFY_DONE; } + mci = res.dev->imc[res.imc].mci; + if (!mci) return NOTIFY_DONE; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 08cc971a50ea..60d1ea669afd 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -64,6 +64,7 @@ struct skx_dev { u8 src_id, node_id; struct skx_channel { struct pci_dev *cdev; + struct pci_dev *edev; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; @@ -113,10 +114,11 @@ struct decoded_addr { typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci); typedef bool (*skx_decode_f)(struct decoded_addr *res); +typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len); int __init skx_adxl_get(void); void __exit skx_adxl_put(void); -void skx_set_decode(skx_decode_f decode); +void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c index 6ac26d1b929f..8be3e89a510e 100644 --- a/drivers/edac/ti_edac.c +++ b/drivers/edac/ti_edac.c @@ -135,7 +135,7 @@ static void ti_edac_setup_dimm(struct mem_ctl_info *mci, u32 type) u32 val; u32 memsize; - dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, 0, 0, 0); + dimm = edac_get_dimm(mci, 0, 0, 0); val = ti_edac_readl(edac, EMIF_SDRAM_CONFIG); diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c index cc779f3f9e2d..a65e2f78a402 100644 --- a/drivers/edac/x38_edac.c +++ b/drivers/edac/x38_edac.c @@ -266,7 +266,7 @@ static void __iomem *x38_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap_nocache(u.mchbar, X38_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, X38_MMR_WINDOW_SIZE); if (!window) printk(KERN_ERR "x38: cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); |