diff options
Diffstat (limited to 'drivers/edac/i7core_edac.c')
-rw-r--r-- | drivers/edac/i7core_edac.c | 415 |
1 files changed, 377 insertions, 38 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index f6cf448d69b4..70ad8923f1d7 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -31,11 +31,13 @@ #include <linux/pci_ids.h> #include <linux/slab.h> #include <linux/delay.h> +#include <linux/dmi.h> #include <linux/edac.h> #include <linux/mmzone.h> -#include <linux/edac_mce.h> #include <linux/smp.h> +#include <asm/mce.h> #include <asm/processor.h> +#include <asm/div64.h> #include "edac_core.h" @@ -78,6 +80,8 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); /* OFFSETS for Device 0 Function 0 */ #define MC_CFG_CONTROL 0x90 + #define MC_CFG_UNLOCK 0x02 + #define MC_CFG_LOCK 0x00 /* OFFSETS for Device 3 Function 0 */ @@ -98,6 +102,15 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); #define DIMM0_COR_ERR(r) ((r) & 0x7fff) /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */ +#define MC_SSRCONTROL 0x48 + #define SSR_MODE_DISABLE 0x00 + #define SSR_MODE_ENABLE 0x01 + #define SSR_MODE_MASK 0x03 + +#define MC_SCRUB_CONTROL 0x4c + #define STARTSCRUB (1 << 24) + #define SCRUBINTERVAL_MASK 0xffffff + #define MC_COR_ECC_CNT_0 0x80 #define MC_COR_ECC_CNT_1 0x84 #define MC_COR_ECC_CNT_2 0x88 @@ -253,10 +266,7 @@ struct i7core_pvt { unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS]; int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS]; - unsigned int is_registered; - - /* mcelog glue */ - struct edac_mce edac_mce; + bool is_registered, enable_scrub; /* Fifo double buffers */ struct mce mce_entry[MCE_LOG_LEN]; @@ -268,6 +278,9 @@ struct i7core_pvt { /* Count indicator to show errors not got */ unsigned mce_overrun; + /* DCLK Frequency used for computing scrub rate */ + int dclk_freq; + /* Struct to control EDAC polling */ struct edac_pci_ctl_info *i7core_pci; }; @@ -281,8 +294,7 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = { /* Memory controller */ { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) }, { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) }, - - /* Exists only for RDIMM */ + /* Exists only for RDIMM */ { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 }, { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) }, @@ -303,6 +315,16 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = { { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) }, { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) }, { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) }, + + /* Generic Non-core registers */ + /* + * This is the PCI device on i7core and on Xeon 35xx (8086:2c41) + * On Xeon 55xx, however, it has a different id (8086:2c40). So, + * the probing code needs to test for the other address in case of + * failure of this one + */ + { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) }, + }; static const struct pci_id_descr pci_dev_descr_lynnfield[] = { @@ -319,6 +341,12 @@ static const struct pci_id_descr pci_dev_descr_lynnfield[] = { { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) }, { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) }, { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) }, + + /* + * This is the PCI device has an alternate address on some + * processors like Core i7 860 + */ + { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) }, }; static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = { @@ -346,6 +374,10 @@ static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = { { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) }, { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) }, { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) }, + + /* Generic Non-core registers */ + { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) }, + }; #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } @@ -714,6 +746,10 @@ static int get_dimm_config(const struct mem_ctl_info *mci) csr->edac_mode = mode; csr->mtype = mtype; + snprintf(csr->channels[0].label, + sizeof(csr->channels[0].label), + "CPU#%uChannel#%u_DIMM#%u", + pvt->i7core_dev->socket, i, j); csrow++; } @@ -731,7 +767,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci) debugf1("\t\t%#x\t%#x\t%#x\n", (value[j] >> 27) & 0x1, (value[j] >> 24) & 0x7, - (value[j] && ((1 << 24) - 1))); + (value[j] & ((1 << 24) - 1))); } return 0; @@ -1324,6 +1360,20 @@ static int i7core_get_onedevice(struct pci_dev **prev, pdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_descr->dev_id, *prev); + /* + * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs + * is at addr 8086:2c40, instead of 8086:2c41. So, we need + * to probe for the alternate address in case of failure + */ + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev); + + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT, + *prev); + if (!pdev) { if (*prev) { *prev = pdev; @@ -1444,8 +1494,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci, struct i7core_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; int i, func, slot; + char *family; - pvt->is_registered = 0; + pvt->is_registered = false; + pvt->enable_scrub = false; for (i = 0; i < i7core_dev->n_devs; i++) { pdev = i7core_dev->pdev[i]; if (!pdev) @@ -1461,9 +1513,37 @@ static int mci_bind_devs(struct mem_ctl_info *mci, if (unlikely(func > MAX_CHAN_FUNC)) goto error; pvt->pci_ch[slot - 4][func] = pdev; - } else if (!slot && !func) + } else if (!slot && !func) { pvt->pci_noncore = pdev; - else + + /* Detect the processor family */ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_I7_NONCORE: + family = "Xeon 35xx/ i7core"; + pvt->enable_scrub = false; + break; + case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT: + family = "i7-800/i5-700"; + pvt->enable_scrub = false; + break; + case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE: + family = "Xeon 34xx"; + pvt->enable_scrub = false; + break; + case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT: + family = "Xeon 55xx"; + pvt->enable_scrub = true; + break; + case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2: + family = "Xeon 56xx / i7-900"; + pvt->enable_scrub = true; + break; + default: + family = "unknown"; + pvt->enable_scrub = false; + } + debugf0("Detected a processor type %s\n", family); + } else goto error; debugf0("Associated fn %d.%d, dev = %p, socket %d\n", @@ -1472,7 +1552,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci, if (PCI_SLOT(pdev->devfn) == 3 && PCI_FUNC(pdev->devfn) == 2) - pvt->is_registered = 1; + pvt->is_registered = true; } return 0; @@ -1826,33 +1906,43 @@ check_ce_error: * WARNING: As this routine should be called at NMI time, extra care should * be taken to avoid deadlocks, and to be as fast as possible. */ -static int i7core_mce_check_error(void *priv, struct mce *mce) +static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, + void *data) { - struct mem_ctl_info *mci = priv; - struct i7core_pvt *pvt = mci->pvt_info; + struct mce *mce = (struct mce *)data; + struct i7core_dev *i7_dev; + struct mem_ctl_info *mci; + struct i7core_pvt *pvt; + + i7_dev = get_i7core_dev(mce->socketid); + if (!i7_dev) + return NOTIFY_BAD; + + mci = i7_dev->mci; + pvt = mci->pvt_info; /* * Just let mcelog handle it if the error is * outside the memory controller */ if (((mce->status & 0xffff) >> 7) != 1) - return 0; + return NOTIFY_DONE; /* Bank 8 registers are the only ones that we know how to handle */ if (mce->bank != 8) - return 0; + return NOTIFY_DONE; #ifdef CONFIG_SMP /* Only handle if it is the right mc controller */ - if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) - return 0; + if (mce->socketid != pvt->i7core_dev->socket) + return NOTIFY_DONE; #endif smp_rmb(); if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { smp_wmb(); pvt->mce_overrun++; - return 0; + return NOTIFY_DONE; } /* Copy memory error at the ringbuffer */ @@ -1865,7 +1955,240 @@ static int i7core_mce_check_error(void *priv, struct mce *mce) i7core_check_error(mci); /* Advise mcelog that the errors were handled */ - return 1; + return NOTIFY_STOP; +} + +static struct notifier_block i7_mce_dec = { + .notifier_call = i7core_mce_check_error, +}; + +struct memdev_dmi_entry { + u8 type; + u8 length; + u16 handle; + u16 phys_mem_array_handle; + u16 mem_err_info_handle; + u16 total_width; + u16 data_width; + u16 size; + u8 form; + u8 device_set; + u8 device_locator; + u8 bank_locator; + u8 memory_type; + u16 type_detail; + u16 speed; + u8 manufacturer; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 attributes; + u32 extended_size; + u16 conf_mem_clk_speed; +} __attribute__((__packed__)); + + +/* + * Decode the DRAM Clock Frequency, be paranoid, make sure that all + * memory devices show the same speed, and if they don't then consider + * all speeds to be invalid. + */ +static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq) +{ + int *dclk_freq = _dclk_freq; + u16 dmi_mem_clk_speed; + + if (*dclk_freq == -1) + return; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) { + struct memdev_dmi_entry *memdev_dmi_entry = + (struct memdev_dmi_entry *)dh; + unsigned long conf_mem_clk_speed_offset = + (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed - + (unsigned long)&memdev_dmi_entry->type; + unsigned long speed_offset = + (unsigned long)&memdev_dmi_entry->speed - + (unsigned long)&memdev_dmi_entry->type; + + /* Check that a DIMM is present */ + if (memdev_dmi_entry->size == 0) + return; + + /* + * Pick the configured speed if it's available, otherwise + * pick the DIMM speed, or we don't have a speed. + */ + if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) { + dmi_mem_clk_speed = + memdev_dmi_entry->conf_mem_clk_speed; + } else if (memdev_dmi_entry->length > speed_offset) { + dmi_mem_clk_speed = memdev_dmi_entry->speed; + } else { + *dclk_freq = -1; + return; + } + + if (*dclk_freq == 0) { + /* First pass, speed was 0 */ + if (dmi_mem_clk_speed > 0) { + /* Set speed if a valid speed is read */ + *dclk_freq = dmi_mem_clk_speed; + } else { + /* Otherwise we don't have a valid speed */ + *dclk_freq = -1; + } + } else if (*dclk_freq > 0 && + *dclk_freq != dmi_mem_clk_speed) { + /* + * If we have a speed, check that all DIMMS are the same + * speed, otherwise set the speed as invalid. + */ + *dclk_freq = -1; + } + } +} + +/* + * The default DCLK frequency is used as a fallback if we + * fail to find anything reliable in the DMI. The value + * is taken straight from the datasheet. + */ +#define DEFAULT_DCLK_FREQ 800 + +static int get_dclk_freq(void) +{ + int dclk_freq = 0; + + dmi_walk(decode_dclk, (void *)&dclk_freq); + + if (dclk_freq < 1) + return DEFAULT_DCLK_FREQ; + + return dclk_freq; +} + +/* + * set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate + * to hardware according to SCRUBINTERVAL formula + * found in datasheet. + */ +static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) +{ + struct i7core_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + u32 dw_scrub; + u32 dw_ssr; + + /* Get data from the MC register, function 2 */ + pdev = pvt->pci_mcr[2]; + if (!pdev) + return -ENODEV; + + pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub); + + if (new_bw == 0) { + /* Prepare to disable petrol scrub */ + dw_scrub &= ~STARTSCRUB; + /* Stop the patrol scrub engine */ + write_and_test(pdev, MC_SCRUB_CONTROL, + dw_scrub & ~SCRUBINTERVAL_MASK); + + /* Get current status of scrub rate and set bit to disable */ + pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr); + dw_ssr &= ~SSR_MODE_MASK; + dw_ssr |= SSR_MODE_DISABLE; + } else { + const int cache_line_size = 64; + const u32 freq_dclk_mhz = pvt->dclk_freq; + unsigned long long scrub_interval; + /* + * Translate the desired scrub rate to a register value and + * program the corresponding register value. + */ + scrub_interval = (unsigned long long)freq_dclk_mhz * + cache_line_size * 1000000; + do_div(scrub_interval, new_bw); + + if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK) + return -EINVAL; + + dw_scrub = SCRUBINTERVAL_MASK & scrub_interval; + + /* Start the patrol scrub engine */ + pci_write_config_dword(pdev, MC_SCRUB_CONTROL, + STARTSCRUB | dw_scrub); + + /* Get current status of scrub rate and set bit to enable */ + pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr); + dw_ssr &= ~SSR_MODE_MASK; + dw_ssr |= SSR_MODE_ENABLE; + } + /* Disable or enable scrubbing */ + pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr); + + return new_bw; +} + +/* + * get_sdram_scrub_rate This routine convert current scrub rate value + * into byte/sec bandwidth accourding to + * SCRUBINTERVAL formula found in datasheet. + */ +static int get_sdram_scrub_rate(struct mem_ctl_info *mci) +{ + struct i7core_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev; + const u32 cache_line_size = 64; + const u32 freq_dclk_mhz = pvt->dclk_freq; + unsigned long long scrub_rate; + u32 scrubval; + + /* Get data from the MC register, function 2 */ + pdev = pvt->pci_mcr[2]; + if (!pdev) + return -ENODEV; + + /* Get current scrub control data */ + pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval); + + /* Mask highest 8-bits to 0 */ + scrubval &= SCRUBINTERVAL_MASK; + if (!scrubval) + return 0; + + /* Calculate scrub rate value into byte/sec bandwidth */ + scrub_rate = (unsigned long long)freq_dclk_mhz * + 1000000 * cache_line_size; + do_div(scrub_rate, scrubval); + return (int)scrub_rate; +} + +static void enable_sdram_scrub_setting(struct mem_ctl_info *mci) +{ + struct i7core_pvt *pvt = mci->pvt_info; + u32 pci_lock; + + /* Unlock writes to pci registers */ + pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock); + pci_lock &= ~0x3; + pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, + pci_lock | MC_CFG_UNLOCK); + + mci->set_sdram_scrub_rate = set_sdram_scrub_rate; + mci->get_sdram_scrub_rate = get_sdram_scrub_rate; +} + +static void disable_sdram_scrub_setting(struct mem_ctl_info *mci) +{ + struct i7core_pvt *pvt = mci->pvt_info; + u32 pci_lock; + + /* Lock writes to pci registers */ + pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock); + pci_lock &= ~0x3; + pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, + pci_lock | MC_CFG_LOCK); } static void i7core_pci_ctl_create(struct i7core_pvt *pvt) @@ -1874,7 +2197,8 @@ static void i7core_pci_ctl_create(struct i7core_pvt *pvt) &pvt->i7core_dev->pdev[0]->dev, EDAC_MOD_STR); if (unlikely(!pvt->i7core_pci)) - pr_warn("Unable to setup PCI error report via EDAC\n"); + i7core_printk(KERN_WARNING, + "Unable to setup PCI error report via EDAC\n"); } static void i7core_pci_ctl_release(struct i7core_pvt *pvt) @@ -1906,8 +2230,11 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", __func__, mci, &i7core_dev->pdev[0]->dev); - /* Disable MCE NMI handler */ - edac_mce_unregister(&pvt->edac_mce); + /* Disable scrubrate setting */ + if (pvt->enable_scrub) + disable_sdram_scrub_setting(mci); + + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec); /* Disable EDAC polling */ i7core_pci_ctl_release(pvt); @@ -1979,6 +2306,10 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* Set the function pointer to an actual operation function */ mci->edac_check = i7core_check_error; + /* Enable scrubrate setting */ + if (pvt->enable_scrub) + enable_sdram_scrub_setting(mci); + /* add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { debugf0("MC: " __FILE__ @@ -2002,21 +2333,13 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* allocating generic PCI control info */ i7core_pci_ctl_create(pvt); - /* Registers on edac_mce in order to receive memory errors */ - pvt->edac_mce.priv = mci; - pvt->edac_mce.check_error = i7core_mce_check_error; - rc = edac_mce_register(&pvt->edac_mce); - if (unlikely(rc < 0)) { - debugf0("MC: " __FILE__ - ": %s(): failed edac_mce_register()\n", __func__); - goto fail1; - } + /* DCLK for scrub rate setting */ + pvt->dclk_freq = get_dclk_freq(); + + atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec); return 0; -fail1: - i7core_pci_ctl_release(pvt); - edac_mc_del_mc(mci->dev); fail0: kfree(mci->ctl_name); edac_mc_free(mci); @@ -2035,7 +2358,7 @@ fail0: static int __devinit i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - int rc; + int rc, count = 0; struct i7core_dev *i7core_dev; /* get the pci devices we want to reserve for our use */ @@ -2055,12 +2378,28 @@ static int __devinit i7core_probe(struct pci_dev *pdev, goto fail0; list_for_each_entry(i7core_dev, &i7core_edac_list, list) { + count++; rc = i7core_register_mci(i7core_dev); if (unlikely(rc < 0)) goto fail1; } - i7core_printk(KERN_INFO, "Driver loaded.\n"); + /* + * Nehalem-EX uses a different memory controller. However, as the + * memory controller is not visible on some Nehalem/Nehalem-EP, we + * need to indirectly probe via a X58 PCI device. The same devices + * are found on (some) Nehalem-EX. So, on those machines, the + * probe routine needs to return -ENODEV, as the actual Memory + * Controller registers won't be detected. + */ + if (!count) { + rc = -ENODEV; + goto fail1; + } + + i7core_printk(KERN_INFO, + "Driver loaded, %d memory controller(s) found.\n", + count); mutex_unlock(&i7core_edac_lock); return 0; |