diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 5 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 67 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 46 |
4 files changed, 75 insertions, 45 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 475cb4f5f14f..c805a06e14c3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -48,7 +48,7 @@ static struct dentry *dfs_inj; static u8 n_banks; -#define MAX_FLAG_OPT_SIZE 3 +#define MAX_FLAG_OPT_SIZE 4 #define NBCFG 0x44 enum injection_type { diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 5bbd06f38ff6..f34d89c01edc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -160,6 +160,11 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), USER ), + MCESEV( + PANIC, "Data load in unrecoverable area of kernel", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), + KERNEL + ), #endif MCESEV( PANIC, "Action required: unknown MCACOD", diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 42cf2880d0ed..8c50754c09c1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll); static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, struct pt_regs *regs) { - int i, ret = 0; char *tmp; + int i; for (i = 0; i < mca_cfg.banks; i++) { m->status = mce_rdmsrl(msr_ops.status(i)); - if (m->status & MCI_STATUS_VAL) { - __set_bit(i, validp); - if (quirk_no_way_out) - quirk_no_way_out(i, m, regs); - } + if (!(m->status & MCI_STATUS_VAL)) + continue; + + __set_bit(i, validp); + if (quirk_no_way_out) + quirk_no_way_out(i, m, regs); if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + mce_read_aux(m, i); *msg = tmp; - ret = 1; + return 1; } } - return ret; + return 0; } /* @@ -1205,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code) lmce = m.mcgstatus & MCG_STATUS_LMCES; /* + * Local machine check may already know that we have to panic. + * Broadcast machine check begins rendezvous in mce_start() * Go through all banks in exclusion of the other CPUs. This way we * don't report duplicated events on shared banks because the first one - * to see it will clear it. If this is a Local MCE, then no need to - * perform rendezvous. + * to see it will clear it. */ - if (!lmce) + if (lmce) { + if (no_way_out) + mce_panic("Fatal local machine check", &m, msg); + } else { order = mce_start(&no_way_out); + } for (i = 0; i < cfg->banks; i++) { __clear_bit(i, toclear); @@ -1287,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code) no_way_out = worst >= MCE_PANIC_SEVERITY; } else { /* - * Local MCE skipped calling mce_reign() - * If we found a fatal error, we need to panic here. + * If there was a fatal machine check we should have + * already called mce_panic earlier in this function. + * Since we re-read the banks, we might have found + * something new. Check again to see if we found a + * fatal error. We call "mce_severity()" again to + * make sure we have the right "msg". */ - if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) - mce_panic("Machine check from unknown source", - NULL, NULL); + if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) { + mce_severity(&m, cfg->tolerant, &msg, true); + mce_panic("Local fatal machine check!", &m, msg); + } } /* @@ -1457,7 +1469,7 @@ static int __mcheck_cpu_mce_banks_init(void) int i; u8 num_banks = mca_cfg.banks; - mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; @@ -1727,6 +1739,21 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) } } +static void mce_centaur_feature_init(struct cpuinfo_x86 *c) +{ + struct mca_config *cfg = &mca_cfg; + + /* + * All newer Centaur CPUs support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || + c->x86 > 6) { + if (cfg->monarch_timeout < 0) + cfg->monarch_timeout = USEC_PER_SEC; + } +} + static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { @@ -1739,6 +1766,9 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) mce_amd_feature_init(c); break; } + case X86_VENDOR_CENTAUR: + mce_centaur_feature_init(c); + break; default: break; @@ -2135,9 +2165,6 @@ static ssize_t store_int_with_restart(struct device *s, if (check_interval == old_check_interval) return ret; - if (check_interval < 1) - check_interval = 1; - mutex_lock(&mce_sysfs_mutex); mce_restart(); mutex_unlock(&mce_sysfs_mutex); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f7666eef4a87..dd33c357548f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = { [SMCA_SMU] = { "smu", "System Management Unit" }, }; +static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = +{ + [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 } +}; + const char *smca_get_name(enum smca_bank_types t) { if (t >= N_SMCA_BANK_TYPES) @@ -431,8 +436,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } -static u32 smca_get_block_address(unsigned int cpu, unsigned int bank, - unsigned int block) +static u32 smca_get_block_address(unsigned int bank, unsigned int block) { u32 low, high; u32 addr = 0; @@ -443,24 +447,30 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank, if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); + /* Check our cache first: */ + if (smca_bank_addrs[bank][block] != -1) + return smca_bank_addrs[bank][block]; + /* * For SMCA enabled processors, BLKPTR field of the first MISC register * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). */ - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - return addr; + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + goto out; if (!(low & MCI_CONFIG_MCAX)) - return addr; + goto out; - if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && + if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && (low & MASK_BLKPTR_LO)) - return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); +out: + smca_bank_addrs[bank][block] = addr; return addr; } -static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high, +static u32 get_block_address(u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block) { u32 addr = 0, offset = 0; @@ -468,20 +478,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) return addr; - /* Get address from already initialized block. */ - if (per_cpu(threshold_banks, cpu)) { - struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank]; - - if (bankp && bankp->blocks) { - struct threshold_block *blockp = &bankp->blocks[block]; - - if (blockp) - return blockp->address; - } - } - if (mce_flags.smca) - return smca_get_block_address(cpu, bank, block); + return smca_get_block_address(bank, block); /* Fall back to method we used for older processors: */ switch (block) { @@ -559,7 +557,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) smca_configure(bank, cpu); for (block = 0; block < NR_BLOCKS; ++block) { - address = get_block_address(cpu, address, low, high, bank, block); + address = get_block_address(address, low, high, bank, block); if (!address) break; @@ -1176,7 +1174,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (err) goto out_free; recurse: - address = get_block_address(cpu, address, low, high, bank, ++block); + address = get_block_address(address, low, high, bank, ++block); if (!address) return 0; @@ -1386,7 +1384,7 @@ int mce_threshold_create_device(unsigned int cpu) if (bp) return 0; - bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks, + bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *), GFP_KERNEL); if (!bp) return -ENOMEM; |

