diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 22 | ||||
-rw-r--r-- | drivers/edac/Makefile | 1 | ||||
-rw-r--r-- | drivers/edac/altera_edac.c | 381 | ||||
-rw-r--r-- | drivers/edac/altera_edac.h | 201 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 9 | ||||
-rw-r--r-- | drivers/edac/edac_stub.c | 1 | ||||
-rw-r--r-- | drivers/edac/mce_amd_inj.c | 177 | ||||
-rw-r--r-- | drivers/edac/mpc85xx_edac.c | 10 | ||||
-rw-r--r-- | drivers/edac/mpc85xx_edac.h | 1 | ||||
-rw-r--r-- | drivers/edac/xgene_edac.c | 1215 |
10 files changed, 1834 insertions, 184 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index cb59619df23f..8677ead2a8e1 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -2,15 +2,16 @@ # EDAC Kconfig # Copyright (c) 2008 Doug Thompson www.softwarebitmaker.com # Licensed and distributed under the GPL -# + +config EDAC_ATOMIC_SCRUB + bool config EDAC_SUPPORT bool menuconfig EDAC bool "EDAC (Error Detection And Correction) reporting" - depends on HAS_IOMEM - depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT + depends on HAS_IOMEM && EDAC_SUPPORT help EDAC is designed to report errors in the core system. These are low-level errors that are reported in the CPU or @@ -262,10 +263,10 @@ config EDAC_SBRIDGE config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" - depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) + depends on EDAC_MM_EDAC && FSL_SOC help Support for error detection and correction on the Freescale - MPC8349, MPC8560, MPC8540, MPC8548 + MPC8349, MPC8560, MPC8540, MPC8548, T4240 config EDAC_MV64X60 tristate "Marvell MV64x60" @@ -377,8 +378,8 @@ config EDAC_OCTEON_PCI Cavium Octeon family of SOCs. config EDAC_ALTERA_MC - tristate "Altera SDRAM Memory Controller EDAC" - depends on EDAC_MM_EDAC && ARCH_SOCFPGA + bool "Altera SDRAM Memory Controller EDAC" + depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA help Support for error detection and correction on the Altera SDRAM memory controller. Note that the @@ -392,4 +393,11 @@ config EDAC_SYNOPSYS Support for error detection and correction on the Synopsys DDR memory controller. +config EDAC_XGENE + tristate "APM X-Gene SoC" + depends on EDAC_MM_EDAC && (ARM64 || COMPILE_TEST) + help + Support for error detection and correction on the + APM X-Gene family of SOCs. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index b255f362b1db..28ef2a519f65 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -68,3 +68,4 @@ obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o +obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 3c4929fda9d5..23ef0917483c 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1,5 +1,5 @@ /* - * Copyright Altera Corporation (C) 2014. All rights reserved. + * Copyright Altera Corporation (C) 2014-2015. All rights reserved. * Copyright 2011-2012 Calxeda, Inc. * * This program is free software; you can redistribute it and/or modify it @@ -28,113 +28,92 @@ #include <linux/types.h> #include <linux/uaccess.h> +#include "altera_edac.h" #include "edac_core.h" #include "edac_module.h" #define EDAC_MOD_STR "altera_edac" #define EDAC_VERSION "1" -/* SDRAM Controller CtrlCfg Register */ -#define CTLCFG_OFST 0x00 - -/* SDRAM Controller CtrlCfg Register Bit Masks */ -#define CTLCFG_ECC_EN 0x400 -#define CTLCFG_ECC_CORR_EN 0x800 -#define CTLCFG_GEN_SB_ERR 0x2000 -#define CTLCFG_GEN_DB_ERR 0x4000 - -#define CTLCFG_ECC_AUTO_EN (CTLCFG_ECC_EN | \ - CTLCFG_ECC_CORR_EN) - -/* SDRAM Controller Address Width Register */ -#define DRAMADDRW_OFST 0x2C - -/* SDRAM Controller Address Widths Field Register */ -#define DRAMADDRW_COLBIT_MASK 0x001F -#define DRAMADDRW_COLBIT_SHIFT 0 -#define DRAMADDRW_ROWBIT_MASK 0x03E0 -#define DRAMADDRW_ROWBIT_SHIFT 5 -#define DRAMADDRW_BANKBIT_MASK 0x1C00 -#define DRAMADDRW_BANKBIT_SHIFT 10 -#define DRAMADDRW_CSBIT_MASK 0xE000 -#define DRAMADDRW_CSBIT_SHIFT 13 - -/* SDRAM Controller Interface Data Width Register */ -#define DRAMIFWIDTH_OFST 0x30 - -/* SDRAM Controller Interface Data Width Defines */ -#define DRAMIFWIDTH_16B_ECC 24 -#define DRAMIFWIDTH_32B_ECC 40 - -/* SDRAM Controller DRAM Status Register */ -#define DRAMSTS_OFST 0x38 - -/* SDRAM Controller DRAM Status Register Bit Masks */ -#define DRAMSTS_SBEERR 0x04 -#define DRAMSTS_DBEERR 0x08 -#define DRAMSTS_CORR_DROP 0x10 - -/* SDRAM Controller DRAM IRQ Register */ -#define DRAMINTR_OFST 0x3C - -/* SDRAM Controller DRAM IRQ Register Bit Masks */ -#define DRAMINTR_INTREN 0x01 -#define DRAMINTR_SBEMASK 0x02 -#define DRAMINTR_DBEMASK 0x04 -#define DRAMINTR_CORRDROPMASK 0x08 -#define DRAMINTR_INTRCLR 0x10 - -/* SDRAM Controller Single Bit Error Count Register */ -#define SBECOUNT_OFST 0x40 - -/* SDRAM Controller Single Bit Error Count Register Bit Masks */ -#define SBECOUNT_MASK 0x0F - -/* SDRAM Controller Double Bit Error Count Register */ -#define DBECOUNT_OFST 0x44 - -/* SDRAM Controller Double Bit Error Count Register Bit Masks */ -#define DBECOUNT_MASK 0x0F - -/* SDRAM Controller ECC Error Address Register */ -#define ERRADDR_OFST 0x48 - -/* SDRAM Controller ECC Error Address Register Bit Masks */ -#define ERRADDR_MASK 0xFFFFFFFF +static const struct altr_sdram_prv_data c5_data = { + .ecc_ctrl_offset = CV_CTLCFG_OFST, + .ecc_ctl_en_mask = CV_CTLCFG_ECC_AUTO_EN, + .ecc_stat_offset = CV_DRAMSTS_OFST, + .ecc_stat_ce_mask = CV_DRAMSTS_SBEERR, + .ecc_stat_ue_mask = CV_DRAMSTS_DBEERR, + .ecc_saddr_offset = CV_ERRADDR_OFST, + .ecc_daddr_offset = CV_ERRADDR_OFST, + .ecc_cecnt_offset = CV_SBECOUNT_OFST, + .ecc_uecnt_offset = CV_DBECOUNT_OFST, + .ecc_irq_en_offset = CV_DRAMINTR_OFST, + .ecc_irq_en_mask = CV_DRAMINTR_INTREN, + .ecc_irq_clr_offset = CV_DRAMINTR_OFST, + .ecc_irq_clr_mask = (CV_DRAMINTR_INTRCLR | CV_DRAMINTR_INTREN), + .ecc_cnt_rst_offset = CV_DRAMINTR_OFST, + .ecc_cnt_rst_mask = CV_DRAMINTR_INTRCLR, +#ifdef CONFIG_EDAC_DEBUG + .ce_ue_trgr_offset = CV_CTLCFG_OFST, + .ce_set_mask = CV_CTLCFG_GEN_SB_ERR, + .ue_set_mask = CV_CTLCFG_GEN_DB_ERR, +#endif +}; -/* Altera SDRAM Memory Controller data */ -struct altr_sdram_mc_data { - struct regmap *mc_vbase; +static const struct altr_sdram_prv_data a10_data = { + .ecc_ctrl_offset = A10_ECCCTRL1_OFST, + .ecc_ctl_en_mask = A10_ECCCTRL1_ECC_EN, + .ecc_stat_offset = A10_INTSTAT_OFST, + .ecc_stat_ce_mask = A10_INTSTAT_SBEERR, + .ecc_stat_ue_mask = A10_INTSTAT_DBEERR, + .ecc_saddr_offset = A10_SERRADDR_OFST, + .ecc_daddr_offset = A10_DERRADDR_OFST, + .ecc_irq_en_offset = A10_ERRINTEN_OFST, + .ecc_irq_en_mask = A10_ECC_IRQ_EN_MASK, + .ecc_irq_clr_offset = A10_INTSTAT_OFST, + .ecc_irq_clr_mask = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR), + .ecc_cnt_rst_offset = A10_ECCCTRL1_OFST, + .ecc_cnt_rst_mask = A10_ECC_CNT_RESET_MASK, +#ifdef CONFIG_EDAC_DEBUG + .ce_ue_trgr_offset = A10_DIAGINTTEST_OFST, + .ce_set_mask = A10_DIAGINT_TSERRA_MASK, + .ue_set_mask = A10_DIAGINT_TDERRA_MASK, +#endif }; static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) { struct mem_ctl_info *mci = dev_id; struct altr_sdram_mc_data *drvdata = mci->pvt_info; - u32 status, err_count, err_addr; + const struct altr_sdram_prv_data *priv = drvdata->data; + u32 status, err_count = 1, err_addr; - /* Error Address is shared by both SBE & DBE */ - regmap_read(drvdata->mc_vbase, ERRADDR_OFST, &err_addr); + regmap_read(drvdata->mc_vbase, priv->ecc_stat_offset, &status); - regmap_read(drvdata->mc_vbase, DRAMSTS_OFST, &status); - - if (status & DRAMSTS_DBEERR) { - regmap_read(drvdata->mc_vbase, DBECOUNT_OFST, &err_count); + if (status & priv->ecc_stat_ue_mask) { + regmap_read(drvdata->mc_vbase, priv->ecc_daddr_offset, + &err_addr); + if (priv->ecc_uecnt_offset) + regmap_read(drvdata->mc_vbase, priv->ecc_uecnt_offset, + &err_count); panic("\nEDAC: [%d Uncorrectable errors @ 0x%08X]\n", err_count, err_addr); } - if (status & DRAMSTS_SBEERR) { - regmap_read(drvdata->mc_vbase, SBECOUNT_OFST, &err_count); + if (status & priv->ecc_stat_ce_mask) { + regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset, + &err_addr); + if (priv->ecc_uecnt_offset) + regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset, + &err_count); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, err_addr >> PAGE_SHIFT, err_addr & ~PAGE_MASK, 0, 0, 0, -1, mci->ctl_name, ""); - } - - regmap_write(drvdata->mc_vbase, DRAMINTR_OFST, - (DRAMINTR_INTRCLR | DRAMINTR_INTREN)); + /* Clear IRQ to resume */ + regmap_write(drvdata->mc_vbase, priv->ecc_irq_clr_offset, + priv->ecc_irq_clr_mask); - return IRQ_HANDLED; + return IRQ_HANDLED; + } + return IRQ_NONE; } #ifdef CONFIG_EDAC_DEBUG @@ -144,6 +123,7 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file, { struct mem_ctl_info *mci = file->private_data; struct altr_sdram_mc_data *drvdata = mci->pvt_info; + const struct altr_sdram_prv_data *priv = drvdata->data; u32 *ptemp; dma_addr_t dma_handle; u32 reg, read_reg; @@ -156,8 +136,9 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file, return -ENOMEM; } - regmap_read(drvdata->mc_vbase, CTLCFG_OFST, &read_reg); - read_reg &= ~(CTLCFG_GEN_SB_ERR | CTLCFG_GEN_DB_ERR); + regmap_read(drvdata->mc_vbase, priv->ce_ue_trgr_offset, + &read_reg); + read_reg &= ~(priv->ce_set_mask | priv->ue_set_mask); /* Error are injected by writing a word while the SBE or DBE * bit in the CTLCFG register is set. Reading the word will @@ -166,20 +147,20 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file, if (count == 3) { edac_printk(KERN_ALERT, EDAC_MC, "Inject Double bit error\n"); - regmap_write(drvdata->mc_vbase, CTLCFG_OFST, - (read_reg | CTLCFG_GEN_DB_ERR)); + regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, + (read_reg | priv->ue_set_mask)); } else { edac_printk(KERN_ALERT, EDAC_MC, "Inject Single bit error\n"); - regmap_write(drvdata->mc_vbase, CTLCFG_OFST, - (read_reg | CTLCFG_GEN_SB_ERR)); + regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, + (read_reg | priv->ce_set_mask)); } ptemp[0] = 0x5A5A5A5A; ptemp[1] = 0xA5A5A5A5; /* Clear the error injection bits */ - regmap_write(drvdata->mc_vbase, CTLCFG_OFST, read_reg); + regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, read_reg); /* Ensure it has been written out */ wmb(); @@ -219,50 +200,106 @@ static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci) {} #endif -/* Get total memory size in bytes */ -static u32 altr_sdram_get_total_mem_size(struct regmap *mc_vbase) +/* Get total memory size from Open Firmware DTB */ +static unsigned long get_total_mem(void) { - u32 size, read_reg, row, bank, col, cs, width; - - if (regmap_read(mc_vbase, DRAMADDRW_OFST, &read_reg) < 0) - return 0; - - if (regmap_read(mc_vbase, DRAMIFWIDTH_OFST, &width) < 0) - return 0; - - col = (read_reg & DRAMADDRW_COLBIT_MASK) >> - DRAMADDRW_COLBIT_SHIFT; - row = (read_reg & DRAMADDRW_ROWBIT_MASK) >> - DRAMADDRW_ROWBIT_SHIFT; - bank = (read_reg & DRAMADDRW_BANKBIT_MASK) >> - DRAMADDRW_BANKBIT_SHIFT; - cs = (read_reg & DRAMADDRW_CSBIT_MASK) >> - DRAMADDRW_CSBIT_SHIFT; - - /* Correct for ECC as its not addressible */ - if (width == DRAMIFWIDTH_32B_ECC) - width = 32; - if (width == DRAMIFWIDTH_16B_ECC) - width = 16; - - /* calculate the SDRAM size base on this info */ - size = 1 << (row + bank + col); - size = size * cs * (width / 8); - return size; + struct device_node *np = NULL; + const unsigned int *reg, *reg_end; + int len, sw, aw; + unsigned long start, size, total_mem = 0; + + for_each_node_by_type(np, "memory") { + aw = of_n_addr_cells(np); + sw = of_n_size_cells(np); + reg = (const unsigned int *)of_get_property(np, "reg", &len); + reg_end = reg + (len / sizeof(u32)); + + total_mem = 0; + do { + start = of_read_number(reg, aw); + reg += aw; + size = of_read_number(reg, sw); + reg += sw; + total_mem += size; + } while (reg < reg_end); + } + edac_dbg(0, "total_mem 0x%lx\n", total_mem); + return total_mem; +} + +static const struct of_device_id altr_sdram_ctrl_of_match[] = { + { .compatible = "altr,sdram-edac", .data = (void *)&c5_data}, + { .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data}, + {}, +}; +MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); + +static int a10_init(struct regmap *mc_vbase) +{ + if (regmap_update_bits(mc_vbase, A10_INTMODE_OFST, + A10_INTMODE_SB_INT, A10_INTMODE_SB_INT)) { + edac_printk(KERN_ERR, EDAC_MC, + "Error setting SB IRQ mode\n"); + return -ENODEV; + } + + if (regmap_write(mc_vbase, A10_SERRCNTREG_OFST, 1)) { + edac_printk(KERN_ERR, EDAC_MC, + "Error setting trigger count\n"); + return -ENODEV; + } + + return 0; +} + +static int a10_unmask_irq(struct platform_device *pdev, u32 mask) +{ + void __iomem *sm_base; + int ret = 0; + + if (!request_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32), + dev_name(&pdev->dev))) { + edac_printk(KERN_ERR, EDAC_MC, + "Unable to request mem region\n"); + return -EBUSY; + } + + sm_base = ioremap(A10_SYMAN_INTMASK_CLR, sizeof(u32)); + if (!sm_base) { + edac_printk(KERN_ERR, EDAC_MC, + "Unable to ioremap device\n"); + + ret = -ENOMEM; + goto release; + } + + iowrite32(mask, sm_base); + + iounmap(sm_base); + +release: + release_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32)); + + return ret; } static int altr_sdram_probe(struct platform_device *pdev) { + const struct of_device_id *id; struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; struct altr_sdram_mc_data *drvdata; + const struct altr_sdram_prv_data *priv; struct regmap *mc_vbase; struct dimm_info *dimm; - u32 read_reg, mem_size; - int irq; - int res = 0; + u32 read_reg; + int irq, irq2, res = 0; + unsigned long mem_size, irqflags = 0; + + id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev); + if (!id) + return -ENODEV; - /* Validate the SDRAM controller has ECC enabled */ /* Grab the register range from the sdr controller in device tree */ mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "altr,sdr-syscon"); @@ -272,25 +309,46 @@ static int altr_sdram_probe(struct platform_device *pdev) return -ENODEV; } - if (regmap_read(mc_vbase, CTLCFG_OFST, &read_reg) || - ((read_reg & CTLCFG_ECC_AUTO_EN) != CTLCFG_ECC_AUTO_EN)) { + /* Check specific dependencies for the module */ + priv = of_match_node(altr_sdram_ctrl_of_match, + pdev->dev.of_node)->data; + + /* Validate the SDRAM controller has ECC enabled */ + if (regmap_read(mc_vbase, priv->ecc_ctrl_offset, &read_reg) || + ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) { edac_printk(KERN_ERR, EDAC_MC, "No ECC/ECC disabled [0x%08X]\n", read_reg); return -ENODEV; } /* Grab memory size from device tree. */ - mem_size = altr_sdram_get_total_mem_size(mc_vbase); + mem_size = get_total_mem(); if (!mem_size) { + edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n"); + return -ENODEV; + } + + /* Ensure the SDRAM Interrupt is disabled */ + if (regmap_update_bits(mc_vbase, priv->ecc_irq_en_offset, + priv->ecc_irq_en_mask, 0)) { + edac_printk(KERN_ERR, EDAC_MC, + "Error disabling SDRAM ECC IRQ\n"); + return -ENODEV; + } + + /* Toggle to clear the SDRAM Error count */ + if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset, + priv->ecc_cnt_rst_mask, + priv->ecc_cnt_rst_mask)) { edac_printk(KERN_ERR, EDAC_MC, - "Unable to calculate memory size\n"); + "Error clearing SDRAM ECC count\n"); return -ENODEV; } - /* Ensure the SDRAM Interrupt is disabled and cleared */ - if (regmap_write(mc_vbase, DRAMINTR_OFST, DRAMINTR_INTRCLR)) { + if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset, + priv->ecc_cnt_rst_mask, 0)) { edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC IRQ\n"); + "Error clearing SDRAM ECC count\n"); return -ENODEV; } @@ -301,6 +359,9 @@ static int altr_sdram_probe(struct platform_device *pdev) return -ENODEV; } + /* Arria10 has a 2nd IRQ */ + irq2 = platform_get_irq(pdev, 1); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; layers[0].size = 1; layers[0].is_virt_csrow = true; @@ -315,9 +376,12 @@ static int altr_sdram_probe(struct platform_device *pdev) mci->pdev = &pdev->dev; drvdata = mci->pvt_info; drvdata->mc_vbase = mc_vbase; + drvdata->data = priv; platform_set_drvdata(pdev, mci); if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + edac_printk(KERN_ERR, EDAC_MC, + "Unable to get managed device resource\n"); res = -ENOMEM; goto free; } @@ -342,8 +406,32 @@ static int altr_sdram_probe(struct platform_device *pdev) if (res < 0) goto err; + /* Only the Arria10 has separate IRQs */ + if (irq2 > 0) { + /* Arria10 specific initialization */ + res = a10_init(mc_vbase); + if (res < 0) + goto err2; + + res = devm_request_irq(&pdev->dev, irq2, + altr_sdram_mc_err_handler, + IRQF_SHARED, dev_name(&pdev->dev), mci); + if (res < 0) { + edac_mc_printk(mci, KERN_ERR, + "Unable to request irq %d\n", irq2); + res = -ENODEV; + goto err2; + } + + res = a10_unmask_irq(pdev, A10_DDR0_IRQ_MASK); + if (res < 0) + goto err2; + + irqflags = IRQF_SHARED; + } + res = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler, - 0, dev_name(&pdev->dev), mci); + irqflags, dev_name(&pdev->dev), mci); if (res < 0) { edac_mc_printk(mci, KERN_ERR, "Unable to request irq %d\n", irq); @@ -351,8 +439,9 @@ static int altr_sdram_probe(struct platform_device *pdev) goto err2; } - if (regmap_write(drvdata->mc_vbase, DRAMINTR_OFST, - (DRAMINTR_INTRCLR | DRAMINTR_INTREN))) { + /* Infrastructure ready - enable the IRQ */ + if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset, + priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) { edac_mc_printk(mci, KERN_ERR, "Error enabling SDRAM ECC IRQ\n"); res = -ENODEV; @@ -388,17 +477,31 @@ static int altr_sdram_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id altr_sdram_ctrl_of_match[] = { - { .compatible = "altr,sdram-edac", }, - {}, +/* + * If you want to suspend, need to disable EDAC by removing it + * from the device tree or defconfig. + */ +#ifdef CONFIG_PM +static int altr_sdram_prepare(struct device *dev) +{ + pr_err("Suspend not allowed when EDAC is enabled.\n"); + + return -EPERM; +} + +static const struct dev_pm_ops altr_sdram_pm_ops = { + .prepare = altr_sdram_prepare, }; -MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); +#endif static struct platform_driver altr_sdram_edac_driver = { .probe = altr_sdram_probe, .remove = altr_sdram_remove, .driver = { .name = "altr_sdram_edac", +#ifdef CONFIG_PM + .pm = &altr_sdram_pm_ops, +#endif .of_match_table = altr_sdram_ctrl_of_match, }, }; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h new file mode 100644 index 000000000000..7b64dc7c4eb7 --- /dev/null +++ b/drivers/edac/altera_edac.h @@ -0,0 +1,201 @@ +/* + * + * Copyright (C) 2015 Altera Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _ALTERA_EDAC_H +#define _ALTERA_EDAC_H + +#include <linux/edac.h> +#include <linux/types.h> + +/* SDRAM Controller CtrlCfg Register */ +#define CV_CTLCFG_OFST 0x00 + +/* SDRAM Controller CtrlCfg Register Bit Masks */ +#define CV_CTLCFG_ECC_EN 0x400 +#define CV_CTLCFG_ECC_CORR_EN 0x800 +#define CV_CTLCFG_GEN_SB_ERR 0x2000 +#define CV_CTLCFG_GEN_DB_ERR 0x4000 + +#define CV_CTLCFG_ECC_AUTO_EN (CV_CTLCFG_ECC_EN | \ + CV_CTLCFG_ECC_CORR_EN) + +/* SDRAM Controller Address Width Register */ +#define CV_DRAMADDRW_OFST 0x2C + +/* SDRAM Controller Address Widths Field Register */ +#define DRAMADDRW_COLBIT_MASK 0x001F +#define DRAMADDRW_COLBIT_SHIFT 0 +#define DRAMADDRW_ROWBIT_MASK 0x03E0 +#define DRAMADDRW_ROWBIT_SHIFT 5 +#define CV_DRAMADDRW_BANKBIT_MASK 0x1C00 +#define CV_DRAMADDRW_BANKBIT_SHIFT 10 +#define CV_DRAMADDRW_CSBIT_MASK 0xE000 +#define CV_DRAMADDRW_CSBIT_SHIFT 13 + +/* SDRAM Controller Interface Data Width Register */ +#define CV_DRAMIFWIDTH_OFST 0x30 + +/* SDRAM Controller Interface Data Width Defines */ +#define CV_DRAMIFWIDTH_16B_ECC 24 +#define CV_DRAMIFWIDTH_32B_ECC 40 + +/* SDRAM Controller DRAM Status Register */ +#define CV_DRAMSTS_OFST 0x38 + +/* SDRAM Controller DRAM Status Register Bit Masks */ +#define CV_DRAMSTS_SBEERR 0x04 +#define CV_DRAMSTS_DBEERR 0x08 +#define CV_DRAMSTS_CORR_DROP 0x10 + +/* SDRAM Controller DRAM IRQ Register */ +#define CV_DRAMINTR_OFST 0x3C + +/* SDRAM Controller DRAM IRQ Register Bit Masks */ +#define CV_DRAMINTR_INTREN 0x01 +#define CV_DRAMINTR_SBEMASK 0x02 +#define CV_DRAMINTR_DBEMASK 0x04 +#define CV_DRAMINTR_CORRDROPMASK 0x08 +#define CV_DRAMINTR_INTRCLR 0x10 + +/* SDRAM Controller Single Bit Error Count Register */ +#define CV_SBECOUNT_OFST 0x40 + +/* SDRAM Controller Double Bit Error Count Register */ +#define CV_DBECOUNT_OFST 0x44 + +/* SDRAM Controller ECC Error Address Register */ +#define CV_ERRADDR_OFST 0x48 + +/*-----------------------------------------*/ + +/* SDRAM Controller EccCtrl Register */ +#define A10_ECCCTRL1_OFST 0x00 + +/* SDRAM Controller EccCtrl Register Bit Masks */ +#define A10_ECCCTRL1_ECC_EN 0x001 +#define A10_ECCCTRL1_CNT_RST 0x010 +#define A10_ECCCTRL1_AWB_CNT_RST 0x100 +#define A10_ECC_CNT_RESET_MASK (A10_ECCCTRL1_CNT_RST | \ + A10_ECCCTRL1_AWB_CNT_RST) + +/* SDRAM Controller Address Width Register */ +#define CV_DRAMADDRW 0xFFC2502C +#define A10_DRAMADDRW 0xFFCFA0A8 + +/* SDRAM Controller Address Widths Field Register */ +#define DRAMADDRW_COLBIT_MASK 0x001F +#define DRAMADDRW_COLBIT_SHIFT 0 +#define DRAMADDRW_ROWBIT_MASK 0x03E0 +#define DRAMADDRW_ROWBIT_SHIFT 5 +#define CV_DRAMADDRW_BANKBIT_MASK 0x1C00 +#define CV_DRAMADDRW_BANKBIT_SHIFT 10 +#define CV_DRAMADDRW_CSBIT_MASK 0xE000 +#define CV_DRAMADDRW_CSBIT_SHIFT 13 + +#define A10_DRAMADDRW_BANKBIT_MASK 0x3C00 +#define A10_DRAMADDRW_BANKBIT_SHIFT 10 +#define A10_DRAMADDRW_GRPBIT_MASK 0xC000 +#define A10_DRAMADDRW_GRPBIT_SHIFT 14 +#define A10_DRAMADDRW_CSBIT_MASK 0x70000 +#define A10_DRAMADDRW_CSBIT_SHIFT 16 + +/* SDRAM Controller Interface Data Width Register */ +#define CV_DRAMIFWIDTH 0xFFC25030 +#define A10_DRAMIFWIDTH 0xFFCFB008 + +/* SDRAM Controller Interface Data Width Defines */ +#define CV_DRAMIFWIDTH_16B_ECC 24 +#define CV_DRAMIFWIDTH_32B_ECC 40 + +#define A10_DRAMIFWIDTH_16B 0x0 +#define A10_DRAMIFWIDTH_32B 0x1 +#define A10_DRAMIFWIDTH_64B 0x2 + +/* SDRAM Controller DRAM IRQ Register */ +#define A10_ERRINTEN_OFST 0x10 + +/* SDRAM Controller DRAM IRQ Register Bit Masks */ +#define A10_ERRINTEN_SERRINTEN 0x01 +#define A10_ERRINTEN_DERRINTEN 0x02 +#define A10_ECC_IRQ_EN_MASK (A10_ERRINTEN_SERRINTEN | \ + A10_ERRINTEN_DERRINTEN) + +/* SDRAM Interrupt Mode Register */ +#define A10_INTMODE_OFST 0x1C +#define A10_INTMODE_SB_INT 1 + +/* SDRAM Controller Error Status Register */ +#define A10_INTSTAT_OFST 0x20 + +/* SDRAM Controller Error Status Register Bit Masks */ +#define A10_INTSTAT_SBEERR 0x01 +#define A10_INTSTAT_DBEERR 0x02 + +/* SDRAM Controller ECC Error Address Register */ +#define A10_DERRADDR_OFST 0x2C +#define A10_SERRADDR_OFST 0x30 + +/* SDRAM Controller ECC Diagnostic Register */ +#define A10_DIAGINTTEST_OFST 0x24 + +#define A10_DIAGINT_TSERRA_MASK 0x0001 +#define A10_DIAGINT_TDERRA_MASK 0x0100 + +#define A10_SBERR_IRQ 34 +#define A10_DBERR_IRQ 32 + +/* SDRAM Single Bit Error Count Compare Set Register */ +#define A10_SERRCNTREG_OFST 0x3C + +#define A10_SYMAN_INTMASK_CLR 0xFFD06098 +#define A10_INTMASK_CLR_OFST 0x10 +#define A10_DDR0_IRQ_MASK BIT(17) + +struct altr_sdram_prv_data { + int ecc_ctrl_offset; + int ecc_ctl_en_mask; + int ecc_cecnt_offset; + int ecc_uecnt_offset; + int ecc_stat_offset; + int ecc_stat_ce_mask; + int ecc_stat_ue_mask; + int ecc_saddr_offset; + int ecc_daddr_offset; + int ecc_irq_en_offset; + int ecc_irq_en_mask; + int ecc_irq_clr_offset; + int ecc_irq_clr_mask; + int ecc_cnt_rst_offset; + int ecc_cnt_rst_mask; +#ifdef CONFIG_EDAC_DEBUG + struct edac_dev_sysfs_attribute *eccmgr_sysfs_attr; + int ecc_enable_mask; + int ce_set_mask; + int ue_set_mask; + int ce_ue_trgr_offset; +#endif +}; + +/* Altera SDRAM Memory Controller data */ +struct altr_sdram_mc_data { + struct regmap *mc_vbase; + int sb_irq; + int db_irq; + const struct altr_sdram_prv_data *data; +}; + +#endif /* #ifndef _ALTERA_EDAC_H */ diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index af3be1914dbb..943ed8cf71b9 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -30,11 +30,16 @@ #include <linux/bitops.h> #include <asm/uaccess.h> #include <asm/page.h> -#include <asm/edac.h> #include "edac_core.h" #include "edac_module.h" #include <ras/ras_event.h> +#ifdef CONFIG_EDAC_ATOMIC_SCRUB +#include <asm/edac.h> +#else +#define edac_atomic_scrub(va, size) do { } while (0) +#endif + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -874,7 +879,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset, virt_addr = kmap_atomic(pg); /* Perform architecture specific atomic scrub operation */ - atomic_scrub(virt_addr + offset, size); + edac_atomic_scrub(virt_addr + offset, size); /* Unmap and complete */ kunmap_atomic(virt_addr); diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index 9d9e18aefaaa..ff07aae5b7fb 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -16,7 +16,6 @@ #include <linux/edac.h> #include <linux/atomic.h> #include <linux/device.h> -#include <asm/edac.h> int edac_op_state = EDAC_OPSTATE_INVAL; EXPORT_SYMBOL_GPL(edac_op_state); diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index f7681b553fd5..4c73e4d03d46 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -15,6 +15,8 @@ #include <linux/device.h> #include <linux/module.h> #include <linux/cpu.h> +#include <linux/string.h> +#include <linux/uaccess.h> #include <asm/mce.h> #include "mce_amd.h" @@ -25,6 +27,25 @@ static struct mce i_mce; static struct dentry *dfs_inj; +static u8 n_banks; + +#define MAX_FLAG_OPT_SIZE 3 + +enum injection_type { + SW_INJ = 0, /* SW injection, simply decode the error */ + HW_INJ, /* Trigger a #MC */ + N_INJ_TYPES, +}; + +static const char * const flags_options[] = { + [SW_INJ] = "sw", + [HW_INJ] = "hw", + NULL +}; + +/* Set default injection to SW_INJ */ +static enum injection_type inj_type = SW_INJ; + #define MCE_INJECT_SET(reg) \ static int inj_##reg##_set(void *data, u64 val) \ { \ @@ -79,24 +100,66 @@ static int toggle_hw_mce_inject(unsigned int cpu, bool enable) return err; } -static int flags_get(void *data, u64 *val) +static int __set_inj(const char *buf) { - struct mce *m = (struct mce *)data; + int i; - *val = m->inject_flags; + for (i = 0; i < N_INJ_TYPES; i++) { + if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) { + inj_type = i; + return 0; + } + } + return -EINVAL; +} - return 0; +static ssize_t flags_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[MAX_FLAG_OPT_SIZE]; + int n; + + n = sprintf(buf, "%s\n", flags_options[inj_type]); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, n); } -static int flags_set(void *data, u64 val) +static ssize_t flags_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) { - struct mce *m = (struct mce *)data; + char buf[MAX_FLAG_OPT_SIZE], *__buf; + int err; + size_t ret; - m->inject_flags = (u8)val; - return 0; + if (cnt > MAX_FLAG_OPT_SIZE) + cnt = MAX_FLAG_OPT_SIZE; + + ret = cnt; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt - 1] = 0; + + /* strip whitespace */ + __buf = strstrip(buf); + + err = __set_inj(__buf); + if (err) { + pr_err("%s: Invalid flags value: %s\n", __func__, __buf); + return err; + } + + *ppos += ret; + + return ret; } -DEFINE_SIMPLE_ATTRIBUTE(flags_fops, flags_get, flags_set, "%llu\n"); +static const struct file_operations flags_fops = { + .read = flags_read, + .write = flags_write, + .llseek = generic_file_llseek, +}; /* * On which CPU to inject? @@ -128,21 +191,24 @@ static void do_inject(void) unsigned int cpu = i_mce.extcpu; u8 b = i_mce.bank; - if (!(i_mce.inject_flags & MCJ_EXCEPTION)) { + if (i_mce.misc) + i_mce.status |= MCI_STATUS_MISCV; + + if (inj_type == SW_INJ) { amd_decode_mce(NULL, 0, &i_mce); return; } - get_online_cpus(); - if (!cpu_online(cpu)) - goto err; - /* prep MCE global settings for the injection */ mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV; if (!(i_mce.status & MCI_STATUS_PCC)) mcg_status |= MCG_STATUS_RIPV; + get_online_cpus(); + if (!cpu_online(cpu)) + goto err; + toggle_hw_mce_inject(cpu, true); wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS, @@ -174,11 +240,9 @@ static int inj_bank_set(void *data, u64 val) { struct mce *m = (struct mce *)data; - if (val > 5) { - if (boot_cpu_data.x86 != 0x15 || val > 6) { - pr_err("Non-existent MCE bank: %llu\n", val); - return -EINVAL; - } + if (val >= n_banks) { + pr_err("Non-existent MCE bank: %llu\n", val); + return -EINVAL; } m->bank = val; @@ -187,32 +251,81 @@ static int inj_bank_set(void *data, u64 val) return 0; } -static int inj_bank_get(void *data, u64 *val) -{ - struct mce *m = (struct mce *)data; +MCE_INJECT_GET(bank); - *val = m->bank; - return 0; +DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n"); + +static const char readme_msg[] = +"Description of the files and their usages:\n" +"\n" +"Note1: i refers to the bank number below.\n" +"Note2: See respective BKDGs for the exact bit definitions of the files below\n" +"as they mirror the hardware registers.\n" +"\n" +"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n" +"\t attributes of the error which caused the MCE.\n" +"\n" +"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n" +"\t used for error thresholding purposes and its validity is indicated by\n" +"\t MCi_STATUS[MiscV].\n" +"\n" +"addr:\t Error address value to be written to MCi_ADDR. Log address information\n" +"\t associated with the error.\n" +"\n" +"cpu:\t The CPU to inject the error on.\n" +"\n" +"bank:\t Specify the bank you want to inject the error into: the number of\n" +"\t banks in a processor varies and is family/model-specific, therefore, the\n" +"\t supplied value is sanity-checked. Setting the bank value also triggers the\n" +"\t injection.\n" +"\n" +"flags:\t Injection type to be performed. Writing to this file will trigger a\n" +"\t real machine check, an APIC interrupt or invoke the error decoder routines\n" +"\t for AMD processors.\n" +"\n" +"\t Allowed error injection types:\n" +"\t - \"sw\": Software error injection. Decode error to a human-readable \n" +"\t format only. Safe to use.\n" +"\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n" +"\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n" +"\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n" +"\t before injecting.\n" +"\n"; + +static ssize_t +inj_readme_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_read_from_buffer(ubuf, cnt, ppos, + readme_msg, strlen(readme_msg)); } -DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n"); +static const struct file_operations readme_fops = { + .read = inj_readme_read, +}; static struct dfs_node { char *name; struct dentry *d; const struct file_operations *fops; + umode_t perm; } dfs_fls[] = { - { .name = "status", .fops = &status_fops }, - { .name = "misc", .fops = &misc_fops }, - { .name = "addr", .fops = &addr_fops }, - { .name = "bank", .fops = &bank_fops }, - { .name = "flags", .fops = &flags_fops }, - { .name = "cpu", .fops = &extcpu_fops }, + { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR }, + { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR }, + { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR }, + { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR }, + { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR }, + { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR }, + { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH }, }; static int __init init_mce_inject(void) { int i; + u64 cap; + + rdmsrl(MSR_IA32_MCG_CAP, cap); + n_banks = cap & MCG_BANKCNT_MASK; dfs_inj = debugfs_create_dir("mce-inject", NULL); if (!dfs_inj) @@ -220,7 +333,7 @@ static int __init init_mce_inject(void) for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) { dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name, - S_IRUSR | S_IWUSR, + dfs_fls[i].perm, dfs_inj, &i_mce, dfs_fls[i].fops); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 68bf234bdfe6..23ef8e9f2c9a 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -811,6 +811,8 @@ static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc, } } +#define make64(high, low) (((u64)(high) << 32) | (low)) + static void mpc85xx_mc_check(struct mem_ctl_info *mci) { struct mpc85xx_mc_pdata *pdata = mci->pvt_info; @@ -818,7 +820,7 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci) u32 bus_width; u32 err_detect; u32 syndrome; - u32 err_addr; + u64 err_addr; u32 pfn; int row_index; u32 cap_high; @@ -849,7 +851,9 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci) else syndrome &= 0xffff; - err_addr = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ADDRESS); + err_addr = make64( + in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_EXT_ADDRESS), + in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ADDRESS)); pfn = err_addr >> PAGE_SHIFT; for (row_index = 0; row_index < mci->nr_csrows; row_index++) { @@ -886,7 +890,7 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci) mpc85xx_mc_printk(mci, KERN_ERR, "Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n", cap_high, cap_low, syndrome); - mpc85xx_mc_printk(mci, KERN_ERR, "Err addr: %#8.8x\n", err_addr); + mpc85xx_mc_printk(mci, KERN_ERR, "Err addr: %#8.8llx\n", err_addr); mpc85xx_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn); /* we are out of range */ diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index 4498baf9ce05..9352e88d53e5 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -43,6 +43,7 @@ #define MPC85XX_MC_ERR_INT_EN 0x0e48 #define MPC85XX_MC_CAPTURE_ATRIBUTES 0x0e4c #define MPC85XX_MC_CAPTURE_ADDRESS 0x0e50 +#define MPC85XX_MC_CAPTURE_EXT_ADDRESS 0x0e54 #define MPC85XX_MC_ERR_SBE 0x0e58 #define DSC_MEM_EN 0x80000000 diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c new file mode 100644 index 000000000000..14636e4b6a08 --- /dev/null +++ b/drivers/edac/xgene_edac.c @@ -0,0 +1,1215 @@ +/* + * APM X-Gene SoC EDAC (error detection and correction) + * + * Copyright (c) 2015, Applied Micro Circuits Corporation + * Author: Feng Kan <fkan@apm.com> + * Loc Ho <lho@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/ctype.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/regmap.h> + +#include "edac_core.h" + +#define EDAC_MOD_STR "xgene_edac" + +/* Global error configuration status registers (CSR) */ +#define PCPHPERRINTSTS 0x0000 +#define PCPHPERRINTMSK 0x0004 +#define MCU_CTL_ERR_MASK BIT(12) +#define IOB_PA_ERR_MASK BIT(11) +#define IOB_BA_ERR_MASK BIT(10) +#define IOB_XGIC_ERR_MASK BIT(9) +#define IOB_RB_ERR_MASK BIT(8) +#define L3C_UNCORR_ERR_MASK BIT(5) +#define MCU_UNCORR_ERR_MASK BIT(4) +#define PMD3_MERR_MASK BIT(3) +#define PMD2_MERR_MASK BIT(2) +#define PMD1_MERR_MASK BIT(1) +#define PMD0_MERR_MASK BIT(0) +#define PCPLPERRINTSTS 0x0008 +#define PCPLPERRINTMSK 0x000C +#define CSW_SWITCH_TRACE_ERR_MASK BIT(2) +#define L3C_CORR_ERR_MASK BIT(1) +#define MCU_CORR_ERR_MASK BIT(0) +#define MEMERRINTSTS 0x0010 +#define MEMERRINTMSK 0x0014 + +struct xgene_edac { + struct device *dev; + struct regmap *csw_map; + struct regmap *mcba_map; + struct regmap *mcbb_map; + struct regmap *efuse_map; + void __iomem *pcp_csr; + spinlock_t lock; + struct dentry *dfs; + + struct list_head mcus; + struct list_head pmds; + + struct mutex mc_lock; + int mc_active_mask; + int mc_registered_mask; +}; + +static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val) +{ + *val = readl(edac->pcp_csr + reg); +} + +static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg, + u32 bits_mask) +{ + u32 val; + + spin_lock(&edac->lock); + val = readl(edac->pcp_csr + reg); + val &= ~bits_mask; + writel(val, edac->pcp_csr + reg); + spin_unlock(&edac->lock); +} + +static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg, + u32 bits_mask) +{ + u32 val; + + spin_lock(&edac->lock); + val = readl(edac->pcp_csr + reg); + val |= bits_mask; + writel(val, edac->pcp_csr + reg); + spin_unlock(&edac->lock); +} + +/* Memory controller error CSR */ +#define MCU_MAX_RANK 8 +#define MCU_RANK_STRIDE 0x40 + +#define MCUGECR 0x0110 +#define MCU_GECR_DEMANDUCINTREN_MASK BIT(0) +#define MCU_GECR_BACKUCINTREN_MASK BIT(1) +#define MCU_GECR_CINTREN_MASK BIT(2) +#define MUC_GECR_MCUADDRERREN_MASK BIT(9) +#define MCUGESR 0x0114 +#define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7) +#define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6) +#define MCU_GESR_PHYP_ERR_MASK BIT(3) +#define MCUESRR0 0x0314 +#define MCU_ESRR_MULTUCERR_MASK BIT(3) +#define MCU_ESRR_BACKUCERR_MASK BIT(2) +#define MCU_ESRR_DEMANDUCERR_MASK BIT(1) +#define MCU_ESRR_CERR_MASK BIT(0) +#define MCUESRRA0 0x0318 +#define MCUEBLRR0 0x031c +#define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0) +#define MCUERCRR0 0x0320 +#define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16) +#define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF) +#define MCUSBECNT0 0x0324 +#define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF) + +#define CSW_CSWCR 0x0000 +#define CSW_CSWCR_DUALMCB_MASK BIT(0) + +#define MCBADDRMR 0x0000 +#define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3) +#define MCBADDRMR_DUALMCU_MODE_MASK BIT(2) +#define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1) +#define MCBADDRMR_ADDRESS_MODE_MASK BIT(0) + +struct xgene_edac_mc_ctx { + struct list_head next; + char *name; + struct mem_ctl_info *mci; + struct xgene_edac *edac; + void __iomem *mcu_csr; + u32 mcu_id; +}; + +static ssize_t xgene_edac_mc_err_inject_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct mem_ctl_info *mci = file->private_data; + struct xgene_edac_mc_ctx *ctx = mci->pvt_info; + int i; + + for (i = 0; i < MCU_MAX_RANK; i++) { + writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK | + MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK, + ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE); + } + return count; +} + +static const struct file_operations xgene_edac_mc_debug_inject_fops = { + .open = simple_open, + .write = xgene_edac_mc_err_inject_write, + .llseek = generic_file_llseek, +}; + +static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci) +{ + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return; +#ifdef CONFIG_EDAC_DEBUG + if (!mci->debugfs) + return; + debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci, + &xgene_edac_mc_debug_inject_fops); +#endif +} + +static void xgene_edac_mc_check(struct mem_ctl_info *mci) +{ + struct xgene_edac_mc_ctx *ctx = mci->pvt_info; + unsigned int pcp_hp_stat; + unsigned int pcp_lp_stat; + u32 reg; + u32 rank; + u32 bank; + u32 count; + u32 col_row; + + xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat); + xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat); + if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) || + (MCU_CTL_ERR_MASK & pcp_hp_stat) || + (MCU_CORR_ERR_MASK & pcp_lp_stat))) + return; + + for (rank = 0; rank < MCU_MAX_RANK; rank++) { + reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE); + + /* Detect uncorrectable memory error */ + if (reg & (MCU_ESRR_DEMANDUCERR_MASK | + MCU_ESRR_BACKUCERR_MASK)) { + /* Detected uncorrectable memory error */ + edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene", + "MCU uncorrectable error at rank %d\n", rank); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, ""); + } + + /* Detect correctable memory error */ + if (reg & MCU_ESRR_CERR_MASK) { + bank = readl(ctx->mcu_csr + MCUEBLRR0 + + rank * MCU_RANK_STRIDE); + col_row = readl(ctx->mcu_csr + MCUERCRR0 + + rank * MCU_RANK_STRIDE); + count = readl(ctx->mcu_csr + MCUSBECNT0 + + rank * MCU_RANK_STRIDE); + edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene", + "MCU correctable error at rank %d bank %d column %d row %d count %d\n", + rank, MCU_EBLRR_ERRBANK_RD(bank), + MCU_ERCRR_ERRCOL_RD(col_row), + MCU_ERCRR_ERRROW_RD(col_row), + MCU_SBECNT_COUNT(count)); + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, + 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, ""); + } + + /* Clear all error registers */ + writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE); + writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE); + writel(0x0, ctx->mcu_csr + MCUSBECNT0 + + rank * MCU_RANK_STRIDE); + writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE); + } + + /* Detect memory controller error */ + reg = readl(ctx->mcu_csr + MCUGESR); + if (reg) { + if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK) + edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene", + "MCU address miss-match error\n"); + if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK) + edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene", + "MCU address multi-match error\n"); + + writel(reg, ctx->mcu_csr + MCUGESR); + } +} + +static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable) +{ + struct xgene_edac_mc_ctx *ctx = mci->pvt_info; + unsigned int val; + + if (edac_op_state != EDAC_OPSTATE_INT) + return; + + mutex_lock(&ctx->edac->mc_lock); + + /* + * As there is only single bit for enable error and interrupt mask, + * we must only enable top level interrupt after all MCUs are + * registered. Otherwise, if there is an error and the corresponding + * MCU has not registered, the interrupt will never get cleared. To + * determine all MCU have registered, we will keep track of active + * MCUs and registered MCUs. + */ + if (enable) { + /* Set registered MCU bit */ + ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id; + + /* Enable interrupt after all active MCU registered */ + if (ctx->edac->mc_registered_mask == + ctx->edac->mc_active_mask) { + /* Enable memory controller top level interrupt */ + xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK, + MCU_UNCORR_ERR_MASK | + MCU_CTL_ERR_MASK); + xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK, + MCU_CORR_ERR_MASK); + } + + /* Enable MCU interrupt and error reporting */ + val = readl(ctx->mcu_csr + MCUGECR); + val |= MCU_GECR_DEMANDUCINTREN_MASK | + MCU_GECR_BACKUCINTREN_MASK | + MCU_GECR_CINTREN_MASK | + MUC_GECR_MCUADDRERREN_MASK; + writel(val, ctx->mcu_csr + MCUGECR); + } else { + /* Disable MCU interrupt */ + val = readl(ctx->mcu_csr + MCUGECR); + val &= ~(MCU_GECR_DEMANDUCINTREN_MASK | + MCU_GECR_BACKUCINTREN_MASK | + MCU_GECR_CINTREN_MASK | + MUC_GECR_MCUADDRERREN_MASK); + writel(val, ctx->mcu_csr + MCUGECR); + + /* Disable memory controller top level interrupt */ + xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK, + MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK); + xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK, + MCU_CORR_ERR_MASK); + + /* Clear registered MCU bit */ + ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id); + } + + mutex_unlock(&ctx->edac->mc_lock); +} + +static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx) +{ + unsigned int reg; + u32 mcu_mask; + + if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, ®)) + return 0; + + if (reg & CSW_CSWCR_DUALMCB_MASK) { + /* + * Dual MCB active - Determine if all 4 active or just MCU0 + * and MCU2 active + */ + if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, ®)) + return 0; + mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5; + } else { + /* + * Single MCB active - Determine if MCU0/MCU1 or just MCU0 + * active + */ + if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, ®)) + return 0; + mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1; + } + + /* Save active MC mask if hasn't set already */ + if (!ctx->edac->mc_active_mask) + ctx->edac->mc_active_mask = mcu_mask; + + return (mcu_mask & (1 << mc_idx)) ? 1 : 0; +} + +static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct xgene_edac_mc_ctx tmp_ctx; + struct xgene_edac_mc_ctx *ctx; + struct resource res; + int rc; + + memset(&tmp_ctx, 0, sizeof(tmp_ctx)); + tmp_ctx.edac = edac; + + if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL)) + return -ENOMEM; + + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { + dev_err(edac->dev, "no MCU resource address\n"); + goto err_group; + } + tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res); + if (IS_ERR(tmp_ctx.mcu_csr)) { + dev_err(edac->dev, "unable to map MCU resource\n"); + rc = PTR_ERR(tmp_ctx.mcu_csr); + goto err_group; + } + + /* Ignore non-active MCU */ + if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) { + dev_err(edac->dev, "no memory-controller property\n"); + rc = -ENODEV; + goto err_group; + } + if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) { + rc = -ENODEV; + goto err_group; + } + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 4; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 2; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers, + sizeof(*ctx)); + if (!mci) { + rc = -ENOMEM; + goto err_group; + } + + ctx = mci->pvt_info; + *ctx = tmp_ctx; /* Copy over resource value */ + ctx->name = "xgene_edac_mc_err"; + ctx->mci = mci; + mci->pdev = &mci->dev; + mci->ctl_name = ctx->name; + mci->dev_name = ctx->name; + + mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 | + MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + mci->mod_ver = "0.1"; + mci->ctl_page_to_phys = NULL; + mci->scrub_cap = SCRUB_FLAG_HW_SRC; + mci->scrub_mode = SCRUB_HW_SRC; + + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = xgene_edac_mc_check; + + if (edac_mc_add_mc(mci)) { + dev_err(edac->dev, "edac_mc_add_mc failed\n"); + rc = -EINVAL; + goto err_free; + } + + xgene_edac_mc_create_debugfs_node(mci); + + list_add(&ctx->next, &edac->mcus); + + xgene_edac_mc_irq_ctl(mci, true); + + devres_remove_group(edac->dev, xgene_edac_mc_add); + + dev_info(edac->dev, "X-Gene EDAC MC registered\n"); + return 0; + +err_free: + edac_mc_free(mci); +err_group: + devres_release_group(edac->dev, xgene_edac_mc_add); + return rc; +} + +static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu) +{ + xgene_edac_mc_irq_ctl(mcu->mci, false); + edac_mc_del_mc(&mcu->mci->dev); + edac_mc_free(mcu->mci); + return 0; +} + +/* CPU L1/L2 error CSR */ +#define MAX_CPU_PER_PMD 2 +#define CPU_CSR_STRIDE 0x00100000 +#define CPU_L2C_PAGE 0x000D0000 +#define CPU_MEMERR_L2C_PAGE 0x000E0000 +#define CPU_MEMERR_CPU_PAGE 0x000F0000 + +#define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000 +#define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004 +#define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24) +#define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16) +#define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8) +#define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4) +#define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2) +#define MEMERR_CPU_ICFESR_CERR_MASK BIT(0) +#define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c +#define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24) +#define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16) +#define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8) +#define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4) +#define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2) +#define MEMERR_CPU_LSUESR_CERR_MASK BIT(0) +#define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008 +#define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010 +#define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014 +#define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24) +#define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16) +#define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8) +#define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7) +#define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4) +#define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2) +#define MEMERR_CPU_MMUESR_CERR_MASK BIT(0) +#define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804 +#define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c +#define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814 + +#define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000 +#define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004 +#define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24) +#define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18) +#define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17) +#define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13) +#define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10) +#define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8) +#define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3) +#define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2) +#define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1) +#define MEMERR_L2C_L2ESR_ERR_MASK BIT(0) +#define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008 +#define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010 +#define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c +#define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014 +#define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1) +#define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0) +#define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018 +#define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c +#define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804 + +/* + * Processor Module Domain (PMD) context - Context for a pair of processsors. + * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of + * its own L1 cache. + */ +struct xgene_edac_pmd_ctx { + struct list_head next; + struct device ddev; + char *name; + struct xgene_edac *edac; + struct edac_device_ctl_info *edac_dev; + void __iomem *pmd_csr; + u32 pmd; + int version; +}; + +static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev, + int cpu_idx) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_f; + u32 val; + + pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE; + + val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET); + if (val) { + dev_err(edac_dev->dev, + "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n", + ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val, + MEMERR_CPU_ICFESR_ERRWAY_RD(val), + MEMERR_CPU_ICFESR_ERRINDEX_RD(val), + MEMERR_CPU_ICFESR_ERRINFO_RD(val)); + if (val & MEMERR_CPU_ICFESR_CERR_MASK) + dev_err(edac_dev->dev, + "One or more correctable error\n"); + if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK) + dev_err(edac_dev->dev, "Multiple correctable error\n"); + switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) { + case 1: + dev_err(edac_dev->dev, "L1 TLB multiple hit\n"); + break; + case 2: + dev_err(edac_dev->dev, "Way select multiple hit\n"); + break; + case 3: + dev_err(edac_dev->dev, "Physical tag parity error\n"); + break; + case 4: + case 5: + dev_err(edac_dev->dev, "L1 data parity error\n"); + break; + case 6: + dev_err(edac_dev->dev, "L1 pre-decode parity error\n"); + break; + } + + /* Clear any HW errors */ + writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET); + + if (val & (MEMERR_CPU_ICFESR_CERR_MASK | + MEMERR_CPU_ICFESR_MULTCERR_MASK)) + edac_device_handle_ce(edac_dev, 0, 0, + edac_dev->ctl_name); + } + + val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET); + if (val) { + dev_err(edac_dev->dev, + "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n", + ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val, + MEMERR_CPU_LSUESR_ERRWAY_RD(val), + MEMERR_CPU_LSUESR_ERRINDEX_RD(val), + MEMERR_CPU_LSUESR_ERRINFO_RD(val)); + if (val & MEMERR_CPU_LSUESR_CERR_MASK) + dev_err(edac_dev->dev, + "One or more correctable error\n"); + if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK) + dev_err(edac_dev->dev, "Multiple correctable error\n"); + switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) { + case 0: + dev_err(edac_dev->dev, "Load tag error\n"); + break; + case 1: + dev_err(edac_dev->dev, "Load data error\n"); + break; + case 2: + dev_err(edac_dev->dev, "WSL multihit error\n"); + break; + case 3: + dev_err(edac_dev->dev, "Store tag error\n"); + break; + case 4: + dev_err(edac_dev->dev, + "DTB multihit from load pipeline error\n"); + break; + case 5: + dev_err(edac_dev->dev, + "DTB multihit from store pipeline error\n"); + break; + } + + /* Clear any HW errors */ + writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET); + + if (val & (MEMERR_CPU_LSUESR_CERR_MASK | + MEMERR_CPU_LSUESR_MULTCERR_MASK)) + edac_device_handle_ce(edac_dev, 0, 0, + edac_dev->ctl_name); + } + + val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET); + if (val) { + dev_err(edac_dev->dev, + "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n", + ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val, + MEMERR_CPU_MMUESR_ERRWAY_RD(val), + MEMERR_CPU_MMUESR_ERRINDEX_RD(val), + MEMERR_CPU_MMUESR_ERRINFO_RD(val), + val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : + "ICF"); + if (val & MEMERR_CPU_MMUESR_CERR_MASK) + dev_err(edac_dev->dev, + "One or more correctable error\n"); + if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK) + dev_err(edac_dev->dev, "Multiple correctable error\n"); + switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) { + case 0: + dev_err(edac_dev->dev, "Stage 1 UTB hit error\n"); + break; + case 1: + dev_err(edac_dev->dev, "Stage 1 UTB miss error\n"); + break; + case 2: + dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n"); + break; + case 3: + dev_err(edac_dev->dev, + "TMO operation single bank error\n"); + break; + case 4: + dev_err(edac_dev->dev, "Stage 2 UTB error\n"); + break; + case 5: + dev_err(edac_dev->dev, "Stage 2 UTB miss error\n"); + break; + case 6: + dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n"); + break; + case 7: + dev_err(edac_dev->dev, + "TMO operation multiple bank error\n"); + break; + } + + /* Clear any HW errors */ + writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET); + + edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); + } +} + +static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_d; + void __iomem *pg_e; + u32 val_hi; + u32 val_lo; + u32 val; + + /* Check L2 */ + pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE; + val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET); + if (val) { + val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET); + val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET); + dev_err(edac_dev->dev, + "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n", + ctx->pmd, val, val_hi, val_lo); + dev_err(edac_dev->dev, + "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n", + MEMERR_L2C_L2ESR_ERRSYN_RD(val), + MEMERR_L2C_L2ESR_ERRWAY_RD(val), + MEMERR_L2C_L2ESR_ERRCPU_RD(val), + MEMERR_L2C_L2ESR_ERRGROUP_RD(val), + MEMERR_L2C_L2ESR_ERRACTION_RD(val)); + + if (val & MEMERR_L2C_L2ESR_ERR_MASK) + dev_err(edac_dev->dev, + "One or more correctable error\n"); + if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK) + dev_err(edac_dev->dev, "Multiple correctable error\n"); + if (val & MEMERR_L2C_L2ESR_UCERR_MASK) + dev_err(edac_dev->dev, + "One or more uncorrectable error\n"); + if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK) + dev_err(edac_dev->dev, + "Multiple uncorrectable error\n"); + + switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) { + case 0: + dev_err(edac_dev->dev, "Outbound SDB parity error\n"); + break; + case 1: + dev_err(edac_dev->dev, "Inbound SDB parity error\n"); + break; + case 2: + dev_err(edac_dev->dev, "Tag ECC error\n"); + break; + case 3: + dev_err(edac_dev->dev, "Data ECC error\n"); + break; + } + + /* Clear any HW errors */ + writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET); + + if (val & (MEMERR_L2C_L2ESR_ERR_MASK | + MEMERR_L2C_L2ESR_MULTICERR_MASK)) + edac_device_handle_ce(edac_dev, 0, 0, + edac_dev->ctl_name); + if (val & (MEMERR_L2C_L2ESR_UCERR_MASK | + MEMERR_L2C_L2ESR_MULTUCERR_MASK)) + edac_device_handle_ue(edac_dev, 0, 0, + edac_dev->ctl_name); + } + + /* Check if any memory request timed out on L2 cache */ + pg_d = ctx->pmd_csr + CPU_L2C_PAGE; + val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET); + if (val) { + val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET); + val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET); + dev_err(edac_dev->dev, + "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n", + ctx->pmd, val, val_hi, val_lo); + writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET); + } +} + +static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + unsigned int pcp_hp_stat; + int i; + + xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat); + if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat)) + return; + + /* Check CPU L1 error */ + for (i = 0; i < MAX_CPU_PER_PMD; i++) + xgene_edac_pmd_l1_check(edac_dev, i); + + /* Check CPU L2 error */ + xgene_edac_pmd_l2_check(edac_dev); +} + +static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev, + int cpu) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE + + CPU_MEMERR_CPU_PAGE; + + /* + * Enable CPU memory error: + * MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA + */ + writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET); + writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET); + writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET); +} + +static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE; + void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE; + + /* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */ + writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET); + /* Configure L2C HW request time out feature if supported */ + if (ctx->version > 1) + writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET); +} + +static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev, + bool enable) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + int i; + + /* Enable PMD error interrupt */ + if (edac_dev->op_state == OP_RUNNING_INTERRUPT) { + if (enable) + xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK, + PMD0_MERR_MASK << ctx->pmd); + else + xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK, + PMD0_MERR_MASK << ctx->pmd); + } + + if (enable) { + xgene_edac_pmd_hw_cfg(edac_dev); + + /* Two CPUs per a PMD */ + for (i = 0; i < MAX_CPU_PER_PMD; i++) + xgene_edac_pmd_cpu_hw_cfg(edac_dev, i); + } +} + +static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dev = file->private_data; + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *cpux_pg_f; + int i; + + for (i = 0; i < MAX_CPU_PER_PMD; i++) { + cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE + + CPU_MEMERR_CPU_PAGE; + + writel(MEMERR_CPU_ICFESR_MULTCERR_MASK | + MEMERR_CPU_ICFESR_CERR_MASK, + cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET); + writel(MEMERR_CPU_LSUESR_MULTCERR_MASK | + MEMERR_CPU_LSUESR_CERR_MASK, + cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET); + writel(MEMERR_CPU_MMUESR_MULTCERR_MASK | + MEMERR_CPU_MMUESR_CERR_MASK, + cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET); + } + return count; +} + +static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dev = file->private_data; + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE; + + writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK | + MEMERR_L2C_L2ESR_MULTICERR_MASK | + MEMERR_L2C_L2ESR_UCERR_MASK | + MEMERR_L2C_L2ESR_ERR_MASK, + pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET); + return count; +} + +static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = { + { + .open = simple_open, + .write = xgene_edac_pmd_l1_inject_ctrl_write, + .llseek = generic_file_llseek, }, + { + .open = simple_open, + .write = xgene_edac_pmd_l2_inject_ctrl_write, + .llseek = generic_file_llseek, }, + { } +}; + +static void xgene_edac_pmd_create_debugfs_nodes( + struct edac_device_ctl_info *edac_dev) +{ + struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; + struct dentry *edac_debugfs; + char name[30]; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return; + + /* + * Todo: Switch to common EDAC debug file system for edac device + * when available. + */ + if (!ctx->edac->dfs) { + ctx->edac->dfs = debugfs_create_dir(edac_dev->dev->kobj.name, + NULL); + if (!ctx->edac->dfs) + return; + } + sprintf(name, "PMD%d", ctx->pmd); + edac_debugfs = debugfs_create_dir(name, ctx->edac->dfs); + if (!edac_debugfs) + return; + + debugfs_create_file("l1_inject_ctrl", S_IWUSR, edac_debugfs, edac_dev, + &xgene_edac_pmd_debug_inject_fops[0]); + debugfs_create_file("l2_inject_ctrl", S_IWUSR, edac_debugfs, edac_dev, + &xgene_edac_pmd_debug_inject_fops[1]); +} + +static int xgene_edac_pmd_available(u32 efuse, int pmd) +{ + return (efuse & (1 << pmd)) ? 0 : 1; +} + +static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np, + int version) +{ + struct edac_device_ctl_info *edac_dev; + struct xgene_edac_pmd_ctx *ctx; + struct resource res; + char edac_name[10]; + u32 pmd; + int rc; + u32 val; + + if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL)) + return -ENOMEM; + + /* Determine if this PMD is disabled */ + if (of_property_read_u32(np, "pmd-controller", &pmd)) { + dev_err(edac->dev, "no pmd-controller property\n"); + rc = -ENODEV; + goto err_group; + } + rc = regmap_read(edac->efuse_map, 0, &val); + if (rc) + goto err_group; + if (!xgene_edac_pmd_available(val, pmd)) { + rc = -ENODEV; + goto err_group; + } + + sprintf(edac_name, "l2c%d", pmd); + edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx), + edac_name, 1, "l2c", 1, 2, NULL, + 0, edac_device_alloc_index()); + if (!edac_dev) { + rc = -ENOMEM; + goto err_group; + } + + ctx = edac_dev->pvt_info; + ctx->name = "xgene_pmd_err"; + ctx->pmd = pmd; + ctx->edac = edac; + ctx->edac_dev = edac_dev; + ctx->ddev = *edac->dev; + ctx->version = version; + edac_dev->dev = &ctx->ddev; + edac_dev->ctl_name = ctx->name; + edac_dev->dev_name = ctx->name; + edac_dev->mod_name = EDAC_MOD_STR; + + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { + dev_err(edac->dev, "no PMD resource address\n"); + goto err_free; + } + ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res); + if (IS_ERR(ctx->pmd_csr)) { + dev_err(edac->dev, + "devm_ioremap_resource failed for PMD resource address\n"); + rc = PTR_ERR(ctx->pmd_csr); + goto err_free; + } + + if (edac_op_state == EDAC_OPSTATE_POLL) + edac_dev->edac_check = xgene_edac_pmd_check; + + xgene_edac_pmd_create_debugfs_nodes(edac_dev); + + rc = edac_device_add_device(edac_dev); + if (rc > 0) { + dev_err(edac->dev, "edac_device_add_device failed\n"); + rc = -ENOMEM; + goto err_free; + } + + if (edac_op_state == EDAC_OPSTATE_INT) + edac_dev->op_state = OP_RUNNING_INTERRUPT; + + list_add(&ctx->next, &edac->pmds); + + xgene_edac_pmd_hw_ctl(edac_dev, 1); + + devres_remove_group(edac->dev, xgene_edac_pmd_add); + + dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd); + return 0; + +err_free: + edac_device_free_ctl_info(edac_dev); +err_group: + devres_release_group(edac->dev, xgene_edac_pmd_add); + return rc; +} + +static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd) +{ + struct edac_device_ctl_info *edac_dev = pmd->edac_dev; + + xgene_edac_pmd_hw_ctl(edac_dev, 0); + edac_device_del_device(edac_dev->dev); + edac_device_free_ctl_info(edac_dev); + return 0; +} + +static irqreturn_t xgene_edac_isr(int irq, void *dev_id) +{ + struct xgene_edac *ctx = dev_id; + struct xgene_edac_pmd_ctx *pmd; + unsigned int pcp_hp_stat; + unsigned int pcp_lp_stat; + + xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat); + xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat); + if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) || + (MCU_CTL_ERR_MASK & pcp_hp_stat) || + (MCU_CORR_ERR_MASK & pcp_lp_stat)) { + struct xgene_edac_mc_ctx *mcu; + + list_for_each_entry(mcu, &ctx->mcus, next) { + xgene_edac_mc_check(mcu->mci); + } + } + + list_for_each_entry(pmd, &ctx->pmds, next) { + if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat) + xgene_edac_pmd_check(pmd->edac_dev); + } + + return IRQ_HANDLED; +} + +static int xgene_edac_probe(struct platform_device *pdev) +{ + struct xgene_edac *edac; + struct device_node *child; + struct resource *res; + int rc; + + edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); + if (!edac) + return -ENOMEM; + + edac->dev = &pdev->dev; + platform_set_drvdata(pdev, edac); + INIT_LIST_HEAD(&edac->mcus); + INIT_LIST_HEAD(&edac->pmds); + spin_lock_init(&edac->lock); + mutex_init(&edac->mc_lock); + + edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-csw"); + if (IS_ERR(edac->csw_map)) { + dev_err(edac->dev, "unable to get syscon regmap csw\n"); + rc = PTR_ERR(edac->csw_map); + goto out_err; + } + + edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-mcba"); + if (IS_ERR(edac->mcba_map)) { + dev_err(edac->dev, "unable to get syscon regmap mcba\n"); + rc = PTR_ERR(edac->mcba_map); + goto out_err; + } + + edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-mcbb"); + if (IS_ERR(edac->mcbb_map)) { + dev_err(edac->dev, "unable to get syscon regmap mcbb\n"); + rc = PTR_ERR(edac->mcbb_map); + goto out_err; + } + edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "regmap-efuse"); + if (IS_ERR(edac->efuse_map)) { + dev_err(edac->dev, "unable to get syscon regmap efuse\n"); + rc = PTR_ERR(edac->efuse_map); + goto out_err; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(edac->pcp_csr)) { + dev_err(&pdev->dev, "no PCP resource address\n"); + rc = PTR_ERR(edac->pcp_csr); + goto out_err; + } + + if (edac_op_state == EDAC_OPSTATE_INT) { + int irq; + int i; + + for (i = 0; i < 3; i++) { + irq = platform_get_irq(pdev, i); + if (irq < 0) { + dev_err(&pdev->dev, "No IRQ resource\n"); + rc = -EINVAL; + goto out_err; + } + rc = devm_request_irq(&pdev->dev, irq, + xgene_edac_isr, IRQF_SHARED, + dev_name(&pdev->dev), edac); + if (rc) { + dev_err(&pdev->dev, + "Could not request IRQ %d\n", irq); + goto out_err; + } + } + } + + for_each_child_of_node(pdev->dev.of_node, child) { + if (!of_device_is_available(child)) + continue; + if (of_device_is_compatible(child, "apm,xgene-edac-mc")) + xgene_edac_mc_add(edac, child); + if (of_device_is_compatible(child, "apm,xgene-edac-pmd")) + xgene_edac_pmd_add(edac, child, 1); + if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2")) + xgene_edac_pmd_add(edac, child, 2); + } + + return 0; + +out_err: + return rc; +} + +static int xgene_edac_remove(struct platform_device *pdev) +{ + struct xgene_edac *edac = dev_get_drvdata(&pdev->dev); + struct xgene_edac_mc_ctx *mcu; + struct xgene_edac_mc_ctx *temp_mcu; + struct xgene_edac_pmd_ctx *pmd; + struct xgene_edac_pmd_ctx *temp_pmd; + + list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next) { + xgene_edac_mc_remove(mcu); + } + + list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next) { + xgene_edac_pmd_remove(pmd); + } + return 0; +} + +static const struct of_device_id xgene_edac_of_match[] = { + { .compatible = "apm,xgene-edac" }, + {}, +}; +MODULE_DEVICE_TABLE(of, xgene_edac_of_match); + +static struct platform_driver xgene_edac_driver = { + .probe = xgene_edac_probe, + .remove = xgene_edac_remove, + .driver = { + .name = "xgene-edac", + .owner = THIS_MODULE, + .of_match_table = xgene_edac_of_match, + }, +}; + +static int __init xgene_edac_init(void) +{ + int rc; + + /* Make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_INT: + break; + default: + edac_op_state = EDAC_OPSTATE_INT; + break; + } + + rc = platform_driver_register(&xgene_edac_driver); + if (rc) { + edac_printk(KERN_ERR, EDAC_MOD_STR, + "EDAC fails to register\n"); + goto reg_failed; + } + + return 0; + +reg_failed: + return rc; +} +module_init(xgene_edac_init); + +static void __exit xgene_edac_exit(void) +{ + platform_driver_unregister(&xgene_edac_driver); +} +module_exit(xgene_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Feng Kan <fkan@apm.com>"); +MODULE_DESCRIPTION("APM X-Gene EDAC driver"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, + "EDAC error reporting state: 0=Poll, 2=Interrupt"); |