diff options
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r-- | drivers/edac/amd64_edac.c | 653 |
1 files changed, 387 insertions, 266 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 873437be86d9..9fbad908a854 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -16,12 +16,11 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; +static struct amd64_family_type *fam_type; + /* Per-node stuff */ static struct ecc_settings **ecc_stngs; -/* Number of Unified Memory Controllers */ -static u8 num_umcs; - /* * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- @@ -215,7 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) scrubval = scrubrates[i].scrubval; - if (pvt->fam == 0x17 || pvt->fam == 0x18) { + if (pvt->umc) { __f17h_set_scrubval(pvt, scrubval); } else if (pvt->fam == 0x15 && pvt->model == 0x60) { f15h_select_dct(pvt, 0); @@ -257,18 +256,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci) int i, retval = -EINVAL; u32 scrubval = 0; - switch (pvt->fam) { - case 0x15: - /* Erratum #505 */ - if (pvt->model < 0x10) - f15h_select_dct(pvt, 0); - - if (pvt->model == 0x60) - amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); - break; - - case 0x17: - case 0x18: + if (pvt->umc) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); if (scrubval & BIT(0)) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); @@ -277,11 +265,15 @@ static int get_scrub_rate(struct mem_ctl_info *mci) } else { scrubval = 0; } - break; + } else if (pvt->fam == 0x15) { + /* Erratum #505 */ + if (pvt->model < 0x10) + f15h_select_dct(pvt, 0); - default: + if (pvt->model == 0x60) + amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); + } else { amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); - break; } scrubval = scrubval & 0x001F; @@ -454,7 +446,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, for (i = 0; i < pvt->csels[dct].m_cnt; i++) #define for_each_umc(i) \ - for (i = 0; i < num_umcs; i++) + for (i = 0; i < fam_type->max_mcs; i++) /* * @input_addr is an InputAddr associated with the node given by mci. Return the @@ -788,51 +780,45 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) (dclr & BIT(15)) ? "yes" : "no"); } -/* - * The Address Mask should be a contiguous set of bits in the non-interleaved - * case. So to check for CS interleaving, find the most- and least-significant - * bits of the mask, generate a contiguous bitmask, and compare the two. - */ -static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs) +#define CS_EVEN_PRIMARY BIT(0) +#define CS_ODD_PRIMARY BIT(1) +#define CS_EVEN_SECONDARY BIT(2) +#define CS_ODD_SECONDARY BIT(3) + +#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) +#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) + +static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { - u32 mask = pvt->csels[ctrl].csmasks[cs >> 1]; - u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1; - u32 test_mask = GENMASK(msb, lsb); + int cs_mode = 0; + + if (csrow_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_PRIMARY; + + if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_PRIMARY; - edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask); + /* Asymmetric dual-rank DIMM support. */ + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_SECONDARY; - return mask ^ test_mask; + return cs_mode; } static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) { - int dimm, size0, size1, cs0, cs1; + int dimm, size0, size1, cs0, cs1, cs_mode; edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); - for (dimm = 0; dimm < 4; dimm++) { - size0 = 0; + for (dimm = 0; dimm < 2; dimm++) { cs0 = dimm * 2; - - if (csrow_enabled(cs0, ctrl, pvt)) - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0); - - size1 = 0; cs1 = dimm * 2 + 1; - if (csrow_enabled(cs1, ctrl, pvt)) { - /* - * CS interleaving is only supported if both CSes have - * the same amount of memory. Because they are - * interleaved, it will look like both CSes have the - * full amount of memory. Save the size for both as - * half the amount we found on CS0, if interleaved. - */ - if (f17_cs_interleaved(pvt, ctrl, cs1)) - size1 = size0 = (size0 >> 1); - else - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); - } + cs_mode = f17_get_cs_mode(dimm, ctrl, pvt); + + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", cs0, size0, @@ -942,89 +928,119 @@ static void prep_chip_selects(struct amd64_pvt *pvt) } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; + } else if (pvt->fam >= 0x17) { + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 4; + pvt->csels[umc].m_cnt = 2; + } + } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; } } +static void read_umc_base_mask(struct amd64_pvt *pvt) +{ + u32 umc_base_reg, umc_base_reg_sec; + u32 umc_mask_reg, umc_mask_reg_sec; + u32 base_reg, base_reg_sec; + u32 mask_reg, mask_reg_sec; + u32 *base, *base_sec; + u32 *mask, *mask_sec; + int cs, umc; + + for_each_umc(umc) { + umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; + umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC; + + for_each_chip_select(cs, umc, pvt) { + base = &pvt->csels[umc].csbases[cs]; + base_sec = &pvt->csels[umc].csbases_sec[cs]; + + base_reg = umc_base_reg + (cs * 4); + base_reg_sec = umc_base_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + + if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec)) + edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base_sec, base_reg_sec); + } + + umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; + umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; + + for_each_chip_select_mask(cs, umc, pvt) { + mask = &pvt->csels[umc].csmasks[cs]; + mask_sec = &pvt->csels[umc].csmasks_sec[cs]; + + mask_reg = umc_mask_reg + (cs * 4); + mask_reg_sec = umc_mask_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec)) + edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask_sec, mask_reg_sec); + } + } +} + /* * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers */ static void read_dct_base_mask(struct amd64_pvt *pvt) { - int base_reg0, base_reg1, mask_reg0, mask_reg1, cs; + int cs; prep_chip_selects(pvt); - if (pvt->umc) { - base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR; - base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR; - mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK; - mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK; - } else { - base_reg0 = DCSB0; - base_reg1 = DCSB1; - mask_reg0 = DCSM0; - mask_reg1 = DCSM1; - } + if (pvt->umc) + return read_umc_base_mask(pvt); for_each_chip_select(cs, 0, pvt) { - int reg0 = base_reg0 + (cs * 4); - int reg1 = base_reg1 + (cs * 4); + int reg0 = DCSB0 + (cs * 4); + int reg1 = DCSB1 + (cs * 4); u32 *base0 = &pvt->csels[0].csbases[cs]; u32 *base1 = &pvt->csels[1].csbases[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n", - cs, *base0, reg0); - - if (!amd_smn_read(pvt->mc_node_id, reg1, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n", - cs, *base1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", - cs, *base0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) + edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", + cs, *base0, reg0); - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", - cs, *base1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) + edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", + cs, *base1, (pvt->fam == 0x10) ? reg1 + : reg0); } for_each_chip_select_mask(cs, 0, pvt) { - int reg0 = mask_reg0 + (cs * 4); - int reg1 = mask_reg1 + (cs * 4); + int reg0 = DCSM0 + (cs * 4); + int reg1 = DCSM1 + (cs * 4); u32 *mask0 = &pvt->csels[0].csmasks[cs]; u32 *mask1 = &pvt->csels[1].csmasks[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n", - cs, *mask0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) + edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", + cs, *mask0, reg0); - if (!amd_smn_read(pvt->mc_node_id, reg1, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n", - cs, *mask1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", - cs, *mask0, reg0); - - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", - cs, *mask1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) + edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", + cs, *mask1, (pvt->fam == 0x10) ? reg1 + : reg0); } } @@ -1032,6 +1048,16 @@ static void determine_memory_type(struct amd64_pvt *pvt) { u32 dram_ctrl, dcsm; + if (pvt->umc) { + if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) + pvt->dram_type = MEM_LRDDR4; + else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) + pvt->dram_type = MEM_RDDR4; + else + pvt->dram_type = MEM_DDR4; + return; + } + switch (pvt->fam) { case 0xf: if (pvt->ext_model >= K8_REV_F) @@ -1077,16 +1103,6 @@ static void determine_memory_type(struct amd64_pvt *pvt) case 0x16: goto ddr3; - case 0x17: - case 0x18: - if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) - pvt->dram_type = MEM_LRDDR4; - else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) - pvt->dram_type = MEM_RDDR4; - else - pvt->dram_type = MEM_DDR4; - return; - default: WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); pvt->dram_type = MEM_EMPTY; @@ -1556,18 +1572,58 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr3_cs_size(cs_mode, false); } -static int f17_base_addr_to_cs_size(struct amd64_pvt *pvt, u8 umc, +static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { - u32 base_addr = pvt->csels[umc].csbases[csrow_nr]; + u32 addr_mask_orig, addr_mask_deinterleaved; + u32 msb, weight, num_zero_bits; + int dimm, size = 0; + + /* No Chip Selects are enabled. */ + if (!cs_mode) + return size; + + /* Requested size of an even CS but none are enabled. */ + if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) + return size; - /* Each mask is used for every two base addresses. */ - u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr >> 1]; + /* Requested size of an odd CS but none are enabled. */ + if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) + return size; - /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - u32 size = ((addr_mask >> 1) - (base_addr >> 1) + 1) >> 1; + /* + * There is one mask per DIMM, and two Chip Selects per DIMM. + * CS0 and CS1 -> DIMM0 + * CS2 and CS3 -> DIMM1 + */ + dimm = csrow_nr >> 1; - edac_dbg(1, "BaseAddr: 0x%x, AddrMask: 0x%x\n", base_addr, addr_mask); + /* Asymmetric dual-rank DIMM support. */ + if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) + addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; + else + addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + + /* + * The number of zero bits in the mask is equal to the number of bits + * in a full mask minus the number of bits in the current mask. + * + * The MSB is the number of bits in the full mask because BIT[0] is + * always 0. + */ + msb = fls(addr_mask_orig) - 1; + weight = hweight_long(addr_mask_orig); + num_zero_bits = msb - weight; + + /* Take the number of zero bits off from the top of the mask. */ + addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); + + edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); + edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + + /* Register [31:1] = Address [39:9]. Size is in kBs here. */ + size = (addr_mask_deinterleaved >> 2) + 1; /* Return size in MBs. */ return size >> 10; @@ -2160,6 +2216,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "K8", .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, + .max_mcs = 2, .ops = { .early_channel_count = k8_early_channel_count, .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, @@ -2170,6 +2227,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F10h", .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2180,6 +2238,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h", .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2190,6 +2249,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h_M30h", .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2200,6 +2260,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h_M60h", .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2210,6 +2271,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F16h", .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2220,6 +2282,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F16h_M30h", .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2230,27 +2293,50 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h", .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, [F17_M10H_CPUS] = { .ctl_name = "F17h_M10h", .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, [F17_M30H_CPUS] = { .ctl_name = "F17h_M30h", .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, + .max_mcs = 8, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, + [F17_M70H_CPUS] = { + .ctl_name = "F17h_M70h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, + .max_mcs = 2, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, + [F19_CPUS] = { + .ctl_name = "F19h", + .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6, + .max_mcs = 8, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, }; @@ -2537,13 +2623,6 @@ static void decode_umc_error(int node_id, struct mce *m) err.channel = find_umc_channel(m); - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { - err.err_code = ERR_NORM_ADDR; - goto log_error; - } - - error_address_to_page_and_offset(sys_addr, &err); - if (!(m->status & MCI_STATUS_SYNDV)) { err.err_code = ERR_SYND; goto log_error; @@ -2560,6 +2639,13 @@ static void decode_umc_error(int node_id, struct mce *m) err.csrow = m->synd & 0x7; + if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { + err.err_code = ERR_NORM_ADDR; + goto log_error; + } + + error_address_to_page_and_offset(sys_addr, &err); + log_error: __log_ecc_error(mci, &err, ecc_type); } @@ -2765,8 +2851,6 @@ skip: edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); determine_ecc_sym_sz(pvt); - - dump_misc_regs(pvt); } /* @@ -2809,10 +2893,12 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) int csrow_nr = csrow_nr_orig; u32 cs_mode, nr_pages; - if (!pvt->umc) + if (!pvt->umc) { csrow_nr >>= 1; - - cs_mode = DBAM_DIMM(csrow_nr, dbam); + cs_mode = DBAM_DIMM(csrow_nr, dbam); + } else { + cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt); + } nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); nr_pages <<= 20 - PAGE_SHIFT; @@ -2824,6 +2910,50 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) return nr_pages; } +static int init_csrows_df(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + enum edac_type edac_mode = EDAC_NONE; + enum dev_type dev_type = DEV_UNKNOWN; + struct dimm_info *dimm; + int empty = 1; + u8 umc, cs; + + if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) { + edac_mode = EDAC_S16ECD16ED; + dev_type = DEV_X16; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S8ECD8ED) { + edac_mode = EDAC_S8ECD8ED; + dev_type = DEV_X8; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) { + edac_mode = EDAC_S4ECD4ED; + dev_type = DEV_X4; + } else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) { + edac_mode = EDAC_SECDED; + } + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + if (!csrow_enabled(cs, umc, pvt)) + continue; + + empty = 0; + dimm = mci->csrows[cs]->channels[umc]->dimm; + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, cs); + + dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); + dimm->mtype = pvt->dram_type; + dimm->edac_mode = edac_mode; + dimm->dtype = dev_type; + dimm->grain = 64; + } + } + + return empty; +} + /* * Initialize the array of csrow attribute instances, based on the values * from pci config hardware registers. @@ -2838,15 +2968,16 @@ static int init_csrows(struct mem_ctl_info *mci) int nr_pages = 0; u32 val; - if (!pvt->umc) { - amd64_read_pci_cfg(pvt->F3, NBCFG, &val); + if (pvt->umc) + return init_csrows_df(mci); - pvt->nbcfg = val; + amd64_read_pci_cfg(pvt->F3, NBCFG, &val); - edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", - pvt->mc_node_id, val, - !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); - } + pvt->nbcfg = val; + + edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", + pvt->mc_node_id, val, + !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); /* * We iterate over DCT0 here but we look at DCT1 in parallel, if needed. @@ -2883,13 +3014,7 @@ static int init_csrows(struct mem_ctl_info *mci) edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); /* Determine DIMM ECC mode: */ - if (pvt->umc) { - if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) - edac_mode = EDAC_S4ECD4ED; - else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) - edac_mode = EDAC_SECDED; - - } else if (pvt->nbcfg & NBCFG_ECC_ENABLE) { + if (pvt->nbcfg & NBCFG_ECC_ENABLE) { edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ? EDAC_S4ECD4ED : EDAC_SECDED; @@ -2899,6 +3024,7 @@ static int init_csrows(struct mem_ctl_info *mci) dimm = csrow->channels[j]->dimm; dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; + dimm->grain = 64; } } @@ -3065,43 +3191,27 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, amd64_warn("Error restoring NB MCGCTL settings!\n"); } -/* - * EDAC requires that the BIOS have ECC enabled before - * taking over the processing of ECC errors. A command line - * option allows to force-enable hardware ECC later in - * enable_ecc_error_reporting(). - */ -static const char *ecc_msg = - "ECC disabled in the BIOS or no ECC capability, module will not load.\n" - " Either enable ECC checking or force module loading by setting " - "'ecc_enable_override'.\n" - " (Note that use of the override may cause unknown side effects.)\n"; - -static bool ecc_enabled(struct pci_dev *F3, u16 nid) +static bool ecc_enabled(struct amd64_pvt *pvt) { + u16 nid = pvt->mc_node_id; bool nb_mce_en = false; u8 ecc_en = 0, i; u32 value; if (boot_cpu_data.x86 >= 0x17) { u8 umc_en_mask = 0, ecc_en_mask = 0; + struct amd64_umc *umc; for_each_umc(i) { - u32 base = get_umc_base(i); + umc = &pvt->umc[i]; /* Only check enabled UMCs. */ - if (amd_smn_read(nid, base + UMCCH_SDP_CTRL, &value)) - continue; - - if (!(value & UMC_SDP_INIT)) + if (!(umc->sdp_ctrl & UMC_SDP_INIT)) continue; umc_en_mask |= BIT(i); - if (amd_smn_read(nid, base + UMCCH_UMC_CAP_HI, &value)) - continue; - - if (value & UMC_ECC_ENABLED) + if (umc->umc_cap_hi & UMC_ECC_ENABLED) ecc_en_mask |= BIT(i); } @@ -3114,7 +3224,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) /* Assume UMC MCA banks are enabled. */ nb_mce_en = true; } else { - amd64_read_pci_cfg(F3, NBCFG, &value); + amd64_read_pci_cfg(pvt->F3, NBCFG, &value); ecc_en = !!(value & NBCFG_ECC_ENABLE); @@ -3127,22 +3237,24 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) amd64_info("Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); - if (!ecc_en || !nb_mce_en) { - amd64_info("%s", ecc_msg); + if (!ecc_en || !nb_mce_en) return false; - } - return true; + else + return true; } static inline void f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { - u8 i, ecc_en = 1, cpk_en = 1; + u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1; for_each_umc(i) { if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED); cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP); + + dev_x4 &= !!(pvt->umc[i].dimm_cfg & BIT(6)); + dev_x16 &= !!(pvt->umc[i].dimm_cfg & BIT(7)); } } @@ -3150,13 +3262,19 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) if (ecc_en) { mci->edac_ctl_cap |= EDAC_FLAG_SECDED; - if (cpk_en) + if (!cpk_en) + return; + + if (dev_x4) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; + else if (dev_x16) + mci->edac_ctl_cap |= EDAC_FLAG_S16ECD16ED; + else + mci->edac_ctl_cap |= EDAC_FLAG_S8ECD8ED; } } -static void setup_mci_misc_attrs(struct mem_ctl_info *mci, - struct amd64_family_type *fam) +static void setup_mci_misc_attrs(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; @@ -3175,7 +3293,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, mci->edac_cap = determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = fam->ctl_name; + mci->ctl_name = fam_type->ctl_name; mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; @@ -3189,8 +3307,6 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, */ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) { - struct amd64_family_type *fam_type = NULL; - pvt->ext_model = boot_cpu_data.x86_model >> 4; pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; @@ -3241,6 +3357,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F17_M30H_CPUS]; pvt->ops = &family_types[F17_M30H_CPUS].ops; break; + } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) { + fam_type = &family_types[F17_M70H_CPUS]; + pvt->ops = &family_types[F17_M70H_CPUS].ops; + break; } /* fall through */ case 0x18: @@ -3251,6 +3371,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) family_types[F17_CPUS].ctl_name = "F18h"; break; + case 0x19: + fam_type = &family_types[F19_CPUS]; + pvt->ops = &family_types[F19_CPUS].ops; + family_types[F19_CPUS].ctl_name = "F19h"; + break; + default: amd64_err("Unsupported family!\n"); return NULL; @@ -3274,51 +3400,15 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; -/* Set the number of Unified Memory Controllers in the system. */ -static void compute_num_umcs(void) +static int hw_info_get(struct amd64_pvt *pvt) { - u8 model = boot_cpu_data.x86_model; - - if (boot_cpu_data.x86 < 0x17) - return; - - if (model >= 0x30 && model <= 0x3f) - num_umcs = 8; - else - num_umcs = 2; - - edac_dbg(1, "Number of UMCs: %x", num_umcs); -} - -static int init_one_instance(unsigned int nid) -{ - struct pci_dev *F3 = node_to_amd_nb(nid)->misc; - struct amd64_family_type *fam_type = NULL; - struct mem_ctl_info *mci = NULL; - struct edac_mc_layer layers[2]; - struct amd64_pvt *pvt = NULL; u16 pci_id1, pci_id2; - int err = 0, ret; - - ret = -ENOMEM; - pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); - if (!pvt) - goto err_ret; - - pvt->mc_node_id = nid; - pvt->F3 = F3; - - ret = -EINVAL; - fam_type = per_family_init(pvt); - if (!fam_type) - goto err_free; + int ret = -EINVAL; if (pvt->fam >= 0x17) { - pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL); - if (!pvt->umc) { - ret = -ENOMEM; - goto err_free; - } + pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); + if (!pvt->umc) + return -ENOMEM; pci_id1 = fam_type->f0_id; pci_id2 = fam_type->f6_id; @@ -3327,21 +3417,37 @@ static int init_one_instance(unsigned int nid) pci_id2 = fam_type->f2_id; } - err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); - if (err) - goto err_post_init; + ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); + if (ret) + return ret; read_mc_regs(pvt); + return 0; +} + +static void hw_info_put(struct amd64_pvt *pvt) +{ + if (pvt->F0 || pvt->F1) + free_mc_sibling_devs(pvt); + + kfree(pvt->umc); +} + +static int init_one_instance(struct amd64_pvt *pvt) +{ + struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; + int ret = -EINVAL; + /* * We need to determine how many memory channels there are. Then use * that information for calculating the size of the dynamic instance * tables in the 'mci' structure. */ - ret = -EINVAL; pvt->channel_count = pvt->ops->early_channel_count(pvt); if (pvt->channel_count < 0) - goto err_siblings; + return ret; ret = -ENOMEM; layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; @@ -3353,24 +3459,18 @@ static int init_one_instance(unsigned int nid) * Always allocate two channels since we can have setups with DIMMs on * only one channel. Also, this simplifies handling later for the price * of a couple of KBs tops. - * - * On Fam17h+, the number of controllers may be greater than two. So set - * the size equal to the maximum number of UMCs. */ - if (pvt->fam >= 0x17) - layers[1].size = num_umcs; - else - layers[1].size = 2; + layers[1].size = fam_type->max_mcs; layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); + mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); if (!mci) - goto err_siblings; + return ret; mci->pvt_info = pvt; mci->pdev = &pvt->F3->dev; - setup_mci_misc_attrs(mci, fam_type); + setup_mci_misc_attrs(mci); if (init_csrows(mci)) mci->edac_cap = EDAC_FLAG_NONE; @@ -3378,31 +3478,30 @@ static int init_one_instance(unsigned int nid) ret = -ENODEV; if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) { edac_dbg(1, "failed edac_mc_add_mc()\n"); - goto err_add_mc; + edac_mc_free(mci); + return ret; } return 0; +} -err_add_mc: - edac_mc_free(mci); - -err_siblings: - free_mc_sibling_devs(pvt); - -err_post_init: - if (pvt->fam >= 0x17) - kfree(pvt->umc); +static bool instance_has_memory(struct amd64_pvt *pvt) +{ + bool cs_enabled = false; + int cs = 0, dct = 0; -err_free: - kfree(pvt); + for (dct = 0; dct < fam_type->max_mcs; dct++) { + for_each_chip_select(cs, dct, pvt) + cs_enabled |= csrow_enabled(cs, dct, pvt); + } -err_ret: - return ret; + return cs_enabled; } static int probe_one_instance(unsigned int nid) { struct pci_dev *F3 = node_to_amd_nb(nid)->misc; + struct amd64_pvt *pvt = NULL; struct ecc_settings *s; int ret; @@ -3413,8 +3512,29 @@ static int probe_one_instance(unsigned int nid) ecc_stngs[nid] = s; - if (!ecc_enabled(F3, nid)) { - ret = 0; + pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); + if (!pvt) + goto err_settings; + + pvt->mc_node_id = nid; + pvt->F3 = F3; + + fam_type = per_family_init(pvt); + if (!fam_type) + goto err_enable; + + ret = hw_info_get(pvt); + if (ret < 0) + goto err_enable; + + ret = 0; + if (!instance_has_memory(pvt)) { + amd64_info("Node %d: No DIMMs detected.\n", nid); + goto err_enable; + } + + if (!ecc_enabled(pvt)) { + ret = -ENODEV; if (!ecc_enable_override) goto err_enable; @@ -3429,7 +3549,7 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } - ret = init_one_instance(nid); + ret = init_one_instance(pvt); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); @@ -3439,9 +3559,15 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } + dump_misc_regs(pvt); + return ret; err_enable: + hw_info_put(pvt); + kfree(pvt); + +err_settings: kfree(s); ecc_stngs[nid] = NULL; @@ -3456,9 +3582,6 @@ static void remove_one_instance(unsigned int nid) struct mem_ctl_info *mci; struct amd64_pvt *pvt; - mci = find_mci_by_dev(&F3->dev); - WARN_ON(!mci); - /* Remove from EDAC CORE tracking list */ mci = edac_mc_del_mc(&F3->dev); if (!mci) @@ -3468,14 +3591,13 @@ static void remove_one_instance(unsigned int nid) restore_ecc_error_reporting(s, nid, F3); - free_mc_sibling_devs(pvt); - kfree(ecc_stngs[nid]); ecc_stngs[nid] = NULL; /* Free the EDAC CORE resources */ mci->pvt_info = NULL; + hw_info_put(pvt); kfree(pvt); edac_mc_free(mci); } @@ -3510,6 +3632,7 @@ static const struct x86_cpu_id amd64_cpuids[] = { { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_AMD, 0x19, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { } }; MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); @@ -3541,8 +3664,6 @@ static int __init amd64_edac_init(void) if (!msrs) goto err_free; - compute_num_umcs(); - for (i = 0; i < amd_nb_num(); i++) { err = probe_one_instance(i); if (err) { |