diff options
Diffstat (limited to 'arch/x86/kernel')
53 files changed, 339 insertions, 347 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2f69e3b184f6..8a5cddac7d44 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -82,6 +82,12 @@ physid_mask_t phys_cpu_present_map; static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; /* + * This variable controls which CPUs receive external NMIs. By default, + * external NMIs are delivered only to the BSP. + */ +static int apic_extnmi = APIC_EXTNMI_BSP; + +/* * Map cpu index to physical APIC ID */ DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); @@ -1161,6 +1167,8 @@ void __init init_bsp_APIC(void) value = APIC_DM_NMI; if (!lapic_is_integrated()) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; + if (apic_extnmi == APIC_EXTNMI_NONE) + value |= APIC_LVT_MASKED; apic_write(APIC_LVT1, value); } @@ -1378,9 +1386,11 @@ void setup_local_APIC(void) apic_write(APIC_LVT0, value); /* - * only the BP should see the LINT1 NMI signal, obviously. + * Only the BSP sees the LINT1 NMI signal by default. This can be + * modified by apic_extnmi= boot option. */ - if (!cpu) + if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) || + apic_extnmi == APIC_EXTNMI_ALL) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; @@ -2270,6 +2280,7 @@ static struct { unsigned int apic_tmict; unsigned int apic_tdcr; unsigned int apic_thmr; + unsigned int apic_cmci; } apic_pm_state; static int lapic_suspend(void) @@ -2299,6 +2310,10 @@ static int lapic_suspend(void) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 6) + apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI); +#endif local_irq_save(flags); disable_local_APIC(); @@ -2355,10 +2370,14 @@ static void lapic_resume(void) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_INTEL) +#ifdef CONFIG_X86_THERMAL_VECTOR if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 6) + apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci); +#endif if (maxlvt >= 4) apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); @@ -2548,3 +2567,23 @@ static int __init apic_set_disabled_cpu_apicid(char *arg) return 0; } early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); + +static int __init apic_set_extnmi(char *arg) +{ + if (!arg) + return -EINVAL; + + if (!strncmp("all", arg, 3)) + apic_extnmi = APIC_EXTNMI_ALL; + else if (!strncmp("none", arg, 4)) + apic_extnmi = APIC_EXTNMI_NONE; + else if (!strncmp("bsp", arg, 3)) + apic_extnmi = APIC_EXTNMI_BSP; + else { + pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg); + return -EINVAL; + } + + return 0; +} +early_param("apic_extnmi", apic_set_extnmi); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index f92ab36979a2..9968f30cca3e 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -185,6 +185,7 @@ static struct apic apic_flat = { .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .send_IPI = default_send_IPI_single, .send_IPI_mask = flat_send_IPI_mask, .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, .send_IPI_allbutself = flat_send_IPI_allbutself, @@ -230,17 +231,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) -{ - default_send_IPI_mask_sequence_phys(cpumask, vector); -} - -static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) -{ - default_send_IPI_mask_allbutself_phys(cpumask, vector); -} - static void physflat_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -248,7 +238,7 @@ static void physflat_send_IPI_allbutself(int vector) static void physflat_send_IPI_all(int vector) { - physflat_send_IPI_mask(cpu_online_mask, vector); + default_send_IPI_mask_sequence_phys(cpu_online_mask, vector); } static int physflat_probe(void) @@ -292,8 +282,9 @@ static struct apic apic_physflat = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, + .send_IPI = default_send_IPI_single_phys, + .send_IPI_mask = default_send_IPI_mask_sequence_phys, + .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_phys, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_all = physflat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 0d96749cfcac..331a7a07c48f 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -30,6 +30,7 @@ #include <asm/e820.h> static void noop_init_apic_ldr(void) { } +static void noop_send_IPI(int cpu, int vector) { } static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } static void noop_send_IPI_allbutself(int vector) { } @@ -144,6 +145,7 @@ struct apic apic_noop = { .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .send_IPI = noop_send_IPI, .send_IPI_mask = noop_send_IPI_mask, .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, .send_IPI_allbutself = noop_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 2bd2292a316d..c80c02c6ec49 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -273,6 +273,7 @@ static const struct apic apic_numachip1 __refconst = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, .send_IPI_allbutself = numachip_send_IPI_allbutself, @@ -324,6 +325,7 @@ static const struct apic apic_numachip2 __refconst = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, .send_IPI_allbutself = numachip_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 971cf8875939..cf9bd896c12d 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -96,11 +96,6 @@ static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_phys(mask, vector); -} - static void bigsmp_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -108,7 +103,7 @@ static void bigsmp_send_IPI_allbutself(int vector) static void bigsmp_send_IPI_all(int vector) { - bigsmp_send_IPI_mask(cpu_online_mask, vector); + default_send_IPI_mask_sequence_phys(cpu_online_mask, vector); } static int dmi_bigsmp; /* can be set by dmi scanners */ @@ -180,7 +175,8 @@ static struct apic apic_bigsmp = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - .send_IPI_mask = bigsmp_send_IPI_mask, + .send_IPI = default_send_IPI_single_phys, + .send_IPI_mask = default_send_IPI_mask_sequence_phys, .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = bigsmp_send_IPI_allbutself, .send_IPI_all = bigsmp_send_IPI_all, diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 62071569bd50..eb45fc9b6124 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -18,6 +18,16 @@ #include <asm/proto.h> #include <asm/ipi.h> +void default_send_IPI_single_phys(int cpu, int vector) +{ + unsigned long flags; + + local_irq_save(flags); + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, cpu), + vector, APIC_DEST_PHYSICAL); + local_irq_restore(flags); +} + void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) { unsigned long query_cpu; @@ -55,6 +65,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, local_irq_restore(flags); } +/* + * Helper function for APICs which insist on cpumasks + */ +void default_send_IPI_single(int cpu, int vector) +{ + apic->send_IPI_mask(cpumask_of(cpu), vector); +} + #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 5f1feb6854af..ade25320df96 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -96,8 +96,8 @@ static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info, return arg->msi_hwirq; } -static int pci_msi_prepare(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *arg) +int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, + msi_alloc_info_t *arg) { struct pci_dev *pdev = to_pci_dev(dev); struct msi_desc *desc = first_pci_msi_entry(pdev); @@ -113,11 +113,13 @@ static int pci_msi_prepare(struct irq_domain *domain, struct device *dev, return 0; } +EXPORT_SYMBOL_GPL(pci_msi_prepare); -static void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) { arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc); } +EXPORT_SYMBOL_GPL(pci_msi_set_desc); static struct msi_domain_ops pci_msi_domain_ops = { .get_hwirq = pci_msi_get_hwirq, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 7694ae6c1199..f316e34abb42 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -105,6 +105,7 @@ static struct apic apic_default = { .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .send_IPI = default_send_IPI_single, .send_IPI_mask = default_send_IPI_mask_logical, .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, .send_IPI_allbutself = default_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 861bc59c8f25..908cb37da171 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -29,6 +29,7 @@ struct apic_chip_data { }; struct irq_domain *x86_vector_domain; +EXPORT_SYMBOL_GPL(x86_vector_domain); static DEFINE_RAW_SPINLOCK(vector_lock); static cpumask_var_t vector_cpumask; static struct irq_chip lapic_controller; @@ -66,6 +67,7 @@ struct irq_cfg *irqd_cfg(struct irq_data *irq_data) return data ? &data->cfg : NULL; } +EXPORT_SYMBOL_GPL(irqd_cfg); struct irq_cfg *irq_cfg(unsigned int irq) { diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index cc8311c4d298..aca8b75c1552 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -23,6 +23,14 @@ static inline u32 x2apic_cluster(int cpu) return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; } +static void x2apic_send_IPI(int cpu, int vector) +{ + u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); + + x2apic_wrmsr_fence(); + __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); +} + static void __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) { @@ -266,6 +274,7 @@ static struct apic apic_x2apic_cluster = { .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_allbutself = x2apic_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 662e9150ea6f..a1242e2c12e6 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -36,6 +36,14 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); } +static void x2apic_send_IPI(int cpu, int vector) +{ + u32 dest = per_cpu(x86_cpu_to_apicid, cpu); + + x2apic_wrmsr_fence(); + __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); +} + static void __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) { @@ -122,6 +130,7 @@ static struct apic apic_x2apic_phys = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_allbutself = x2apic_send_IPI_allbutself, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 4a139465f1d4..d760c6bb37b5 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -406,6 +406,7 @@ static struct apic __refdata apic_x2apic_uv_x = { .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, + .send_IPI = uv_send_IPI_one, .send_IPI_mask = uv_send_IPI_mask, .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_allbutself = uv_send_IPI_allbutself, diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 439df975bc7a..84a7524b202c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -65,9 +65,6 @@ void common(void) { OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); -#ifdef CONFIG_X86_32 - OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); -#endif OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index d8f42f902a0f..f2edafb5f24e 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -23,7 +23,6 @@ int main(void) { #ifdef CONFIG_PARAVIRT OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); - OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); BLANK(); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a8816b325162..e678ddeed030 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -304,7 +304,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) int cpu = smp_processor_id(); /* get information required for multi-node processors */ - if (cpu_has_topoext) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { u32 eax, ebx, ecx, edx; cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); @@ -678,9 +678,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c) * Disable it on the affected CPUs. */ if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) { - if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { + if (!rdmsrl_safe(MSR_F15H_IC_CFG, &value) && !(value & 0x1E)) { value |= 0x1E; - wrmsrl_safe(0xc0011021, value); + wrmsrl_safe(MSR_F15H_IC_CFG, value); } } } @@ -922,7 +922,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) void set_dr_addr_mask(unsigned long mask, int dr) { - if (!cpu_has_bpext) + if (!boot_cpu_has(X86_FEATURE_BPEXT)) return; switch (dr) { diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index d8fba5c15fbd..ae20be6e483c 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -43,7 +43,7 @@ static void init_c3(struct cpuinfo_x86 *c) /* store Centaur Extended Feature Flags as * word 5 of the CPU capability bit array */ - c->x86_capability[5] = cpuid_edx(0xC0000001); + c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001); } #ifdef CONFIG_X86_32 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c2b7522cbf35..37830de8f60a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -581,14 +581,9 @@ void cpu_detect(struct cpuinfo_x86 *c) u32 junk, tfms, cap0, misc; cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; - - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xf) << 4; + c->x86 = x86_family(tfms); + c->x86_model = x86_model(tfms); + c->x86_mask = x86_stepping(tfms); if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; @@ -599,50 +594,47 @@ void cpu_detect(struct cpuinfo_x86 *c) void get_cpu_cap(struct cpuinfo_x86 *c) { - u32 tfms, xlvl; - u32 ebx; + u32 eax, ebx, ecx, edx; /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; + c->x86_capability[CPUID_1_ECX] = ecx; + c->x86_capability[CPUID_1_EDX] = edx; } /* Additional Intel-defined flags: level 0x00000007 */ if (c->cpuid_level >= 0x00000007) { - u32 eax, ebx, ecx, edx; - cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[9] = ebx; + c->x86_capability[CPUID_7_0_EBX] = ebx; + + c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); } /* Extended state features: level 0x0000000d */ if (c->cpuid_level >= 0x0000000d) { - u32 eax, ebx, ecx, edx; - cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx); - c->x86_capability[10] = eax; + c->x86_capability[CPUID_D_1_EAX] = eax; } /* Additional Intel-defined flags: level 0x0000000F */ if (c->cpuid_level >= 0x0000000F) { - u32 eax, ebx, ecx, edx; /* QoS sub-leaf, EAX=0Fh, ECX=0 */ cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[11] = edx; + c->x86_capability[CPUID_F_0_EDX] = edx; + if (cpu_has(c, X86_FEATURE_CQM_LLC)) { /* will be overridden if occupancy monitoring exists */ c->x86_cache_max_rmid = ebx; /* QoS sub-leaf, EAX=0Fh, ECX=1 */ cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); - c->x86_capability[12] = edx; + c->x86_capability[CPUID_F_1_EDX] = edx; + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) { c->x86_cache_max_rmid = ecx; c->x86_cache_occ_scale = ebx; @@ -654,22 +646,24 @@ void get_cpu_cap(struct cpuinfo_x86 *c) } /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; + eax = cpuid_eax(0x80000000); + c->extended_cpuid_level = eax; - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); + if ((eax & 0xffff0000) == 0x80000000) { + if (eax >= 0x80000001) { + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + + c->x86_capability[CPUID_8000_0001_ECX] = ecx; + c->x86_capability[CPUID_8000_0001_EDX] = edx; } } if (c->extended_cpuid_level >= 0x80000008) { - u32 eax = cpuid_eax(0x80000008); + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; - c->x86_capability[13] = cpuid_ebx(0x80000008); + c->x86_capability[CPUID_8000_0008_EBX] = ebx; } #ifdef CONFIG_X86_32 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) @@ -679,6 +673,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000007) c->x86_power = cpuid_edx(0x80000007); + if (c->extended_cpuid_level >= 0x8000000a) + c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); + init_scattered_cpuid_features(c); } @@ -1185,7 +1182,7 @@ void syscall_init(void) * They both write to the same internal register. STAR allows to * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION @@ -1443,7 +1440,9 @@ void cpu_init(void) printk(KERN_INFO "Initializing CPU#%d\n", cpu); - if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de) + if (cpu_feature_enabled(X86_FEATURE_VME) || + cpu_has_tsc || + boot_cpu_has(X86_FEATURE_DE)) cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_current_idt(); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 209ac1e7d1f0..565648bc1a0a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -445,7 +445,8 @@ static void init_intel(struct cpuinfo_x86 *c) if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { + + if (boot_cpu_has(X86_FEATURE_DS)) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index e38d338a6447..0b6c52388cf4 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -591,7 +591,7 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) unsigned edx; if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (cpu_has_topoext) + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &edx); else @@ -637,7 +637,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) void init_amd_cacheinfo(struct cpuinfo_x86 *c) { - if (cpu_has_topoext) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { num_cache_leaves = find_num_cache_leaves(c); } else if (c->extended_cpuid_level >= 0x80000006) { if (cpuid_edx(0x80000006) & 0xf000) @@ -809,7 +809,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, struct cacheinfo *this_leaf; int i, sibling; - if (cpu_has_topoext) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { unsigned int apicid, nshared, first, last; this_leaf = this_cpu_ci->info_list + index; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7e8a736d09db..a006f4cd792b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -114,7 +114,6 @@ static struct work_struct mce_work; static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); -static int mce_usable_address(struct mce *m); /* * CPU/chipset specific EDAC code can register a notifier call here to print @@ -475,6 +474,28 @@ static void mce_report_event(struct pt_regs *regs) irq_work_queue(&mce_irq_work); } +/* + * Check if the address reported by the CPU is in a format we can parse. + * It would be possible to add code for most other cases, but all would + * be somewhat complicated (e.g. segment offset would require an instruction + * parser). So only support physical addresses up to page granuality for now. + */ +static int mce_usable_address(struct mce *m) +{ + if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) + return 0; + + /* Checks after this one are Intel-specific: */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 1; + + if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) + return 0; + if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) + return 0; + return 1; +} + static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, void *data) { @@ -484,7 +505,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (!mce) return NOTIFY_DONE; - if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) { + if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; memory_failure(pfn, MCE_VECTOR, 0); } @@ -522,10 +543,10 @@ static bool memory_error(struct mce *m) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor == X86_VENDOR_AMD) { - /* - * coming soon - */ - return false; + /* ErrCodeExt[20:16] */ + u8 xec = (m->status >> 16) & 0x1f; + + return (xec == 0x0 || xec == 0x8); } else if (c->x86_vendor == X86_VENDOR_INTEL) { /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes @@ -567,7 +588,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); */ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { - bool error_logged = false; + bool error_seen = false; struct mce m; int severity; int i; @@ -601,6 +622,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) continue; + error_seen = true; + mce_read_aux(&m, i); if (!(flags & MCP_TIMESTAMP)) @@ -608,27 +631,24 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); - /* - * In the cases where we don't have a valid address after all, - * do not add it into the ring buffer. - */ - if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { - if (m.status & MCI_STATUS_ADDRV) { + if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) + if (m.status & MCI_STATUS_ADDRV) m.severity = severity; - m.usable_addr = mce_usable_address(&m); - - if (!mce_gen_pool_add(&m)) - mce_schedule_work(); - } - } /* * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. */ - if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) { - error_logged = true; + if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) mce_log(&m); + else if (mce_usable_address(&m)) { + /* + * Although we skipped logging this, we still want + * to take action. Add to the pool so the registered + * notifiers will see it. + */ + if (!mce_gen_pool_add(&m)) + mce_schedule_work(); } /* @@ -644,7 +664,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) sync_core(); - return error_logged; + return error_seen; } EXPORT_SYMBOL_GPL(machine_check_poll); @@ -931,23 +951,6 @@ reset: return ret; } -/* - * Check if the address reported by the CPU is in a format we can parse. - * It would be possible to add code for most other cases, but all would - * be somewhat complicated (e.g. segment offset would require an instruction - * parser). So only support physical addresses up to page granuality for now. - */ -static int mce_usable_address(struct mce *m) -{ - if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) - return 0; - if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) - return 0; - if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) - return 0; - return 1; -} - static void mce_clear_state(unsigned long *toclear) { int i; @@ -1100,7 +1103,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* assuming valid severity level != 0 */ m.severity = severity; - m.usable_addr = mce_usable_address(&m); mce_log(&m); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b3e94ef461fd..faec7120c508 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -129,8 +129,8 @@ void __init load_ucode_bsp(void) if (!have_cpuid_p()) return; - vendor = x86_vendor(); - family = x86_family(); + vendor = x86_cpuid_vendor(); + family = x86_cpuid_family(); switch (vendor) { case X86_VENDOR_INTEL: @@ -165,8 +165,8 @@ void load_ucode_ap(void) if (!have_cpuid_p()) return; - vendor = x86_vendor(); - family = x86_family(); + vendor = x86_cpuid_vendor(); + family = x86_cpuid_family(); switch (vendor) { case X86_VENDOR_INTEL: @@ -206,8 +206,8 @@ void reload_early_microcode(void) { int vendor, family; - vendor = x86_vendor(); - family = x86_family(); + vendor = x86_cpuid_vendor(); + family = x86_cpuid_family(); switch (vendor) { case X86_VENDOR_INTEL: diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ce47402eb2f9..ee81c544ee0d 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -145,10 +145,10 @@ matching_model_microcode(struct microcode_header_intel *mc_header, int ext_sigcount, i; struct extended_signature *ext_sig; - fam = __x86_family(sig); + fam = x86_family(sig); model = x86_model(sig); - fam_ucode = __x86_family(mc_header->sig); + fam_ucode = x86_family(mc_header->sig); model_ucode = x86_model(mc_header->sig); if (fam == fam_ucode && model == model_ucode) @@ -163,7 +163,7 @@ matching_model_microcode(struct microcode_header_intel *mc_header, ext_sigcount = ext_header->count; for (i = 0; i < ext_sigcount; i++) { - fam_ucode = __x86_family(ext_sig->sig); + fam_ucode = x86_family(ext_sig->sig); model_ucode = x86_model(ext_sig->sig); if (fam == fam_ucode && model == model_ucode) @@ -365,7 +365,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_cpuid(&eax, &ebx, &ecx, &edx); csig.sig = eax; - family = __x86_family(csig.sig); + family = x86_family(csig.sig); model = x86_model(csig.sig); if ((model >= 5) || (family > 6)) { @@ -521,16 +521,12 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp) { #ifdef CONFIG_X86_64 unsigned int eax = 0x00000001, ebx, ecx = 0, edx; - unsigned int family, model, stepping; char name[30]; native_cpuid(&eax, &ebx, &ecx, &edx); - family = __x86_family(eax); - model = x86_model(eax); - stepping = eax & 0xf; - - sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping); + sprintf(name, "intel-ucode/%02x-%02x-%02x", + x86_family(eax), x86_model(eax), x86_stepping(eax)); return get_builtin_firmware(cp, name); #else diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 70d7c93f4550..0d98503c2245 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -593,9 +593,16 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size, unsigned long x_remove_base, unsigned long x_remove_size, int i) { - static struct range range_new[RANGE_NUM]; + /* + * range_new should really be an automatic variable, but + * putting 4096 bytes on the stack is frowned upon, to put it + * mildly. It is safe to make it a static __initdata variable, + * since mtrr_calc_range_state is only called during init and + * there's no way it will call itself recursively. + */ + static struct range range_new[RANGE_NUM] __initdata; unsigned long range_sums_new; - static int nr_range_new; + int nr_range_new; int num_reg; /* Convert ranges to var ranges state: */ diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 3b533cf37c74..c870af161008 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -349,7 +349,7 @@ static void get_fixed_ranges(mtrr_type *frs) void mtrr_save_fixed_ranges(void *info) { - if (cpu_has_mtrr) + if (boot_cpu_has(X86_FEATURE_MTRR)) get_fixed_ranges(mtrr_state.fixed_ranges); } diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index f891b4750f04..5c3d149ee91c 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -682,7 +682,7 @@ void __init mtrr_bp_init(void) phys_addr = 32; - if (cpu_has_mtrr) { + if (boot_cpu_has(X86_FEATURE_MTRR)) { mtrr_if = &generic_mtrr_ops; size_or_mask = SIZE_OR_MASK_BITS(36); size_and_mask = 0x00f00000; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 05e76bf65781..58610539b048 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -160,7 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) if (offset) return offset; - if (!cpu_has_perfctr_core) + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) offset = index; else offset = index << 1; @@ -652,7 +652,7 @@ static __initconst const struct x86_pmu amd_pmu = { static int __init amd_core_pmu_init(void) { - if (!cpu_has_perfctr_core) + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; switch (boot_cpu_data.x86) { diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index cc6cedb8f25d..49742746a6c9 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -523,10 +523,10 @@ static int __init amd_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) goto fail_nodev; - if (!cpu_has_topoext) + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) goto fail_nodev; - if (cpu_has_perfctr_nb) { + if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { amd_uncore_nb = alloc_percpu(struct amd_uncore *); if (!amd_uncore_nb) { ret = -ENOMEM; @@ -540,7 +540,7 @@ static int __init amd_uncore_init(void) ret = 0; } - if (cpu_has_perfctr_l2) { + if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) { amd_uncore_l2 = alloc_percpu(struct amd_uncore *); if (!amd_uncore_l2) { ret = -ENOMEM; @@ -583,10 +583,11 @@ fail_online: /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */ amd_uncore_nb = amd_uncore_l2 = NULL; - if (cpu_has_perfctr_l2) + + if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) perf_pmu_unregister(&amd_l2_pmu); fail_l2: - if (cpu_has_perfctr_nb) + if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) perf_pmu_unregister(&amd_nb_pmu); if (amd_uncore_l2) free_percpu(amd_uncore_l2); diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 136ac74dee82..819d94982e07 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -33,28 +33,27 @@ static int __init x86_rdrand_setup(char *s) __setup("nordrand", x86_rdrand_setup); /* - * Force a reseed cycle; we are architecturally guaranteed a reseed - * after no more than 512 128-bit chunks of random data. This also - * acts as a test of the CPU capability. + * RDRAND has Built-In-Self-Test (BIST) that runs on every invocation. + * Run the instruction a few times as a sanity check. + * If it fails, it is simple to disable RDRAND here. */ -#define RESEED_LOOP ((512*128)/sizeof(unsigned long)) +#define SANITY_CHECK_LOOPS 8 void x86_init_rdrand(struct cpuinfo_x86 *c) { #ifdef CONFIG_ARCH_RANDOM unsigned long tmp; - int i, count, ok; + int i; if (!cpu_has(c, X86_FEATURE_RDRAND)) - return; /* Nothing to do */ + return; - for (count = i = 0; i < RESEED_LOOP; i++) { - ok = rdrand_long(&tmp); - if (ok) - count++; + for (i = 0; i < SANITY_CHECK_LOOPS; i++) { + if (!rdrand_long(&tmp)) { + clear_cpu_cap(c, X86_FEATURE_RDRAND); + printk_once(KERN_WARNING "rdrand: disabled\n"); + return; + } } - - if (count != RESEED_LOOP) - clear_cpu_cap(c, X86_FEATURE_RDRAND); #endif } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 608fb26c7254..8cb57df9398d 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,32 +31,12 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 }, - { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, - { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, - { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, - { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, - { X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 }, - { X86_FEATURE_HWP_NOTIFY, CR_EAX, 8, 0x00000006, 0 }, - { X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 }, - { X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 }, - { X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 }, { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 }, - { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, - { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, - { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, - { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, - { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 }, - { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 }, - { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 }, - { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 }, - { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 }, - { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 3fa0e5ad86b4..252da7aceca6 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -12,7 +12,7 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) xlvl = cpuid_eax(0x80860000); if ((xlvl & 0xffff0000) == 0x80860000) { if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); + c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } } @@ -82,7 +82,7 @@ static void init_transmeta(struct cpuinfo_x86 *c) /* Unhide possibly hidden capability flags */ rdmsr(0x80860004, cap_mask, uk); wrmsr(0x80860004, ~0, uk); - c->x86_capability[0] = cpuid_edx(0x00000001); + c->x86_capability[CPUID_1_EDX] = cpuid_edx(0x00000001); wrmsr(0x80860004, cap_mask, uk); /* All Transmeta CPUs have a constant TSC */ diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index bd3507da39f0..2836de390f95 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -58,28 +58,6 @@ static void cpuid_smp_cpuid(void *cmd_block) &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx); } -static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - struct inode *inode = file->f_mapping->host; - - mutex_lock(&inode->i_mutex); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - mutex_unlock(&inode->i_mutex); - return ret; -} - static ssize_t cpuid_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -132,7 +110,7 @@ static int cpuid_open(struct inode *inode, struct file *file) */ static const struct file_operations cpuid_fops = { .owner = THIS_MODULE, - .llseek = cpuid_seek, + .llseek = no_seek_end_llseek, .read = cpuid_read, .open = cpuid_open, }; diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index be39b5fde4b9..7b2978ab30df 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -12,7 +12,7 @@ */ static void fpu__init_cpu_ctx_switch(void) { - if (!cpu_has_eager_fpu) + if (!boot_cpu_has(X86_FEATURE_EAGER_FPU)) stts(); else clts(); @@ -143,9 +143,18 @@ static void __init fpu__init_system_generic(void) unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -/* Enforce that 'MEMBER' is the last field of 'TYPE': */ +/* Get alignment of the TYPE. */ +#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) + +/* + * Enforce that 'MEMBER' is the last field of 'TYPE'. + * + * Align the computed size with alignment of the TYPE, + * because that's how C aligns structs. + */ #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ - BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER)) + BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \ + TYPE_ALIGN(TYPE))) /* * We append the 'struct fpu' to the task_struct: @@ -188,7 +197,7 @@ static void __init fpu__init_task_struct_size(void) */ static void __init fpu__init_system_xstate_size_legacy(void) { - static int on_boot_cpu = 1; + static int on_boot_cpu __initdata = 1; WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -278,7 +287,7 @@ __setup("eagerfpu=", eager_fpu_setup); */ static void __init fpu__init_system_ctx_switch(void) { - static bool on_boot_cpu = 1; + static bool on_boot_cpu __initdata = 1; WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -287,7 +296,7 @@ static void __init fpu__init_system_ctx_switch(void) current_thread_info()->status = 0; /* Auto enable eagerfpu for xsaveopt */ - if (cpu_has_xsaveopt && eagerfpu != DISABLE) + if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) eagerfpu = ENABLE; if (xfeatures_mask & XFEATURE_MASK_EAGER) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 70fc312221fc..40f100285984 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -297,7 +297,7 @@ static void __init setup_xstate_comp(void) */ static void __init setup_init_fpu_buf(void) { - static int on_boot_cpu = 1; + static int on_boot_cpu __initdata = 1; WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -608,7 +608,7 @@ static void fpu__init_disable_system_xstate(void) void __init fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; - static int on_boot_cpu = 1; + static int on_boot_cpu __initdata = 1; int err; WARN_ON_FPU(!on_boot_cpu); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 311bcf338f07..29408d6d6626 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -105,14 +105,14 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, { unsigned char replaced[MCOUNT_INSN_SIZE]; + ftrace_expected = old_code; + /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ @@ -154,6 +154,8 @@ int ftrace_make_nop(struct module *mod, if (addr == MCOUNT_ADDR) return ftrace_modify_code_direct(rec->ip, old, new); + ftrace_expected = NULL; + /* Normal cases use add_brk_on_nop */ WARN_ONCE(1, "invalid use of ftrace_make_nop"); return -EINVAL; @@ -220,6 +222,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { WARN_ON(1); + ftrace_expected = NULL; return -EINVAL; } @@ -314,6 +317,8 @@ static int add_break(unsigned long ip, const char *old) if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; + ftrace_expected = old; + /* Make sure it is what we expect it to be */ if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) return -EINVAL; @@ -413,6 +418,8 @@ static int remove_breakpoint(struct dyn_ftrace *rec) ftrace_addr = ftrace_get_addr_curr(rec); nop = ftrace_call_replace(ip, ftrace_addr); + ftrace_expected = nop; + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 50a3fad5b89f..2bcfb5f2bc44 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -300,6 +300,10 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; if (bp->attr.bp_addr & (bp->attr.bp_len - 1)) return -EINVAL; + + if (!boot_cpu_has(X86_FEATURE_BPEXT)) + return -EOPNOTSUPP; + /* * It's impossible to use a range breakpoint to fake out * user vs kernel detection because bp_len - 1 can't @@ -307,8 +311,6 @@ static int arch_build_bp_info(struct perf_event *bp) * breakpoints, then we'll have to check for kprobe-blacklisted * addresses anywhere in the range. */ - if (!cpu_has_bpext) - return -EOPNOTSUPP; info->mask = bp->attr.bp_len - 1; info->len = X86_BREAKPOINT_LEN_1; } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2bd81e302427..72cef58693c7 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmclock); static struct pvclock_vsyscall_time_info *hv_clock; static struct pvclock_wall_clock wall_clock; +struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) +{ + return hv_clock; +} + /* * The wallclock is the time of day when we booted. Since then, some time may * have elapsed since the hypervisor wrote the data. So we try to account for @@ -305,7 +310,6 @@ int __init kvm_setup_vsyscall_timeinfo(void) { #ifdef CONFIG_X86_64 int cpu; - int ret; u8 flags; struct pvclock_vcpu_time_info *vcpu_time; unsigned int size; @@ -325,11 +329,6 @@ int __init kvm_setup_vsyscall_timeinfo(void) return 1; } - if ((ret = pvclock_init_vsyscall(hv_clock, size))) { - put_cpu(); - return ret; - } - put_cpu(); kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 113e70784854..64f9616f93f1 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -45,28 +45,6 @@ static struct class *msr_class; -static loff_t msr_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - struct inode *inode = file_inode(file); - - mutex_lock(&inode->i_mutex); - switch (orig) { - case SEEK_SET: - file->f_pos = offset; - ret = file->f_pos; - break; - case SEEK_CUR: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - mutex_unlock(&inode->i_mutex); - return ret; -} - static ssize_t msr_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -194,7 +172,7 @@ static int msr_open(struct inode *inode, struct file *file) */ static const struct file_operations msr_fops = { .owner = THIS_MODULE, - .llseek = msr_seek, + .llseek = no_seek_end_llseek, .read = msr_read, .write = msr_write, .open = msr_open, diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 697f90db0e37..8a2cdd736fa4 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -29,6 +29,7 @@ #include <asm/mach_traps.h> #include <asm/nmi.h> #include <asm/x86_init.h> +#include <asm/reboot.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> @@ -231,7 +232,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) #endif if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); + nmi_panic(regs, "NMI: Not continuing"); pr_emerg("Dazed and confused, but trying to continue\n"); @@ -255,8 +256,16 @@ io_check_error(unsigned char reason, struct pt_regs *regs) reason, smp_processor_id()); show_regs(regs); - if (panic_on_io_nmi) - panic("NMI IOCK error: Not continuing"); + if (panic_on_io_nmi) { + nmi_panic(regs, "NMI IOCK error: Not continuing"); + + /* + * If we end up here, it means we have received an NMI while + * processing panic(). Simply return without delaying and + * re-enabling NMIs. + */ + return; + } /* Re-enable the IOCK line, wait for a few seconds */ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; @@ -297,7 +306,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) pr_emerg("Do you have a strange power saving mode enabled?\n"); if (unknown_nmi_panic || panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); + nmi_panic(regs, "NMI: Not continuing"); pr_emerg("Dazed and confused, but trying to continue\n"); } @@ -348,8 +357,19 @@ static void default_do_nmi(struct pt_regs *regs) return; } - /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ - raw_spin_lock(&nmi_reason_lock); + /* + * Non-CPU-specific NMI: NMI sources can be processed on any CPU. + * + * Another CPU may be processing panic routines while holding + * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping, + * and if so, call its callback directly. If there is no CPU preparing + * crash dump, we simply loop here. + */ + while (!raw_spin_trylock(&nmi_reason_lock)) { + run_crash_ipi_callback(regs); + cpu_relax(); + } + reason = x86_platform.get_nmi_reason(); if (reason & NMI_REASON_MASK) { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c2130aef3f9d..f08ac28b8136 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -74,16 +74,6 @@ void __init default_banner(void) /* Undefined instruction for dealing with missing ops pointers. */ static const unsigned char ud2a[] = { 0x0f, 0x0b }; -unsigned paravirt_patch_nop(void) -{ - return 0; -} - -unsigned paravirt_patch_ignore(unsigned len) -{ - return len; -} - struct branch { unsigned char opcode; u32 delta; @@ -133,7 +123,6 @@ static void *get_call_destination(u8 type) .pv_time_ops = pv_time_ops, .pv_cpu_ops = pv_cpu_ops, .pv_irq_ops = pv_irq_ops, - .pv_apic_ops = pv_apic_ops, .pv_mmu_ops = pv_mmu_ops, #ifdef CONFIG_PARAVIRT_SPINLOCKS .pv_lock_ops = pv_lock_ops, @@ -152,8 +141,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, /* If there's no function, patch it with a ud2a (BUG) */ ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); else if (opfunc == _paravirt_nop) - /* If the operation is a nop, then nop the callsite */ - ret = paravirt_patch_nop(); + ret = 0; /* identity functions just return their single argument */ else if (opfunc == _paravirt_ident_32) @@ -162,10 +150,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || -#ifdef CONFIG_X86_32 - type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || -#endif - type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) /* If operation requires a jmp, then jmp */ ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); @@ -220,8 +204,6 @@ static u64 native_steal_clock(int cpu) /* These are in entry.S */ extern void native_iret(void); -extern void native_irq_enable_sysexit(void); -extern void native_usergs_sysret32(void); extern void native_usergs_sysret64(void); static struct resource reserve_ioports = { @@ -379,13 +361,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .load_sp0 = native_load_sp0, -#if defined(CONFIG_X86_32) - .irq_enable_sysexit = native_irq_enable_sysexit, -#endif #ifdef CONFIG_X86_64 -#ifdef CONFIG_IA32_EMULATION - .usergs_sysret32 = native_usergs_sysret32, -#endif .usergs_sysret64 = native_usergs_sysret64, #endif .iret = native_iret, @@ -403,12 +379,6 @@ NOKPROBE_SYMBOL(native_get_debugreg); NOKPROBE_SYMBOL(native_set_debugreg); NOKPROBE_SYMBOL(native_load_idt); -struct pv_apic_ops pv_apic_ops = { -#ifdef CONFIG_X86_LOCAL_APIC - .startup_ipi_hook = paravirt_nop, -#endif -}; - #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) /* 32-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) @@ -444,9 +414,6 @@ struct pv_mmu_ops pv_mmu_ops = { .set_pmd = native_set_pmd, .set_pmd_at = native_set_pmd_at, .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - .pmd_update = paravirt_nop, - .pmd_update_defer = paravirt_nop, .ptep_modify_prot_start = __ptep_modify_prot_start, .ptep_modify_prot_commit = __ptep_modify_prot_commit, @@ -492,6 +459,5 @@ struct pv_mmu_ops pv_mmu_ops = { EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_mmu_ops); -EXPORT_SYMBOL_GPL(pv_apic_ops); EXPORT_SYMBOL_GPL(pv_info); EXPORT_SYMBOL (pv_irq_ops); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index c89f50a76e97..158dc0650d5d 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); @@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, restore_fl); PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_cpu_ops, iret); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 8aa05583bc42..e70087a04cc8 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -13,9 +13,7 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit"); DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); -DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); DEF_NATIVE(, mov32, "mov %edi, %eax"); @@ -55,7 +53,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); PATCH_SITE(pv_cpu_ops, swapgs); PATCH_SITE(pv_mmu_ops, read_cr2); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 0497f719977d..833b1d329c47 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -180,13 +180,13 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl); static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); static void get_tce_space_from_tar(void); -static struct cal_chipset_ops calgary_chip_ops = { +static const struct cal_chipset_ops calgary_chip_ops = { .handle_quirks = calgary_handle_quirks, .tce_cache_blast = calgary_tce_cache_blast, .dump_error_regs = calgary_dump_error_regs }; -static struct cal_chipset_ops calioc2_chip_ops = { +static const struct cal_chipset_ops calioc2_chip_ops = { .handle_quirks = calioc2_handle_quirks, .tce_cache_blast = calioc2_tce_cache_blast, .dump_error_regs = calioc2_dump_error_regs diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index adf0392d549a..7c577a178859 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -88,7 +88,7 @@ int __init pci_swiotlb_detect_4gb(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if (!no_iommu && max_pfn > MAX_DMA32_PFN) + if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif return swiotlb; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e835d263a33b..b9d99e0f82c4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -125,7 +125,7 @@ void release_thread(struct task_struct *dead_task) if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, - dead_task->mm->context.ldt, + dead_task->mm->context.ldt->entries, dead_task->mm->context.ldt->size); BUG(); } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 558f50edebca..32e9d9cbb884 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -124,21 +124,6 @@ const char *regs_query_register_name(unsigned int offset) return NULL; } -static const int arg_offs_table[] = { -#ifdef CONFIG_X86_32 - [0] = offsetof(struct pt_regs, ax), - [1] = offsetof(struct pt_regs, dx), - [2] = offsetof(struct pt_regs, cx) -#else /* CONFIG_X86_64 */ - [0] = offsetof(struct pt_regs, di), - [1] = offsetof(struct pt_regs, si), - [2] = offsetof(struct pt_regs, dx), - [3] = offsetof(struct pt_regs, cx), - [4] = offsetof(struct pt_regs, r8), - [5] = offsetof(struct pt_regs, r9) -#endif -}; - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2f355d229a58..99bfc025111d 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -140,27 +140,3 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } - -#ifdef CONFIG_X86_64 -/* - * Initialize the generic pvclock vsyscall state. This will allocate - * a/some page(s) for the per-vcpu pvclock information, set up a - * fixmap mapping for the page(s) - */ - -int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, - int size) -{ - int idx; - - WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); - - for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { - __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, - __pa(i) + (idx*PAGE_SIZE), - PAGE_KERNEL_VVAR); - } - - return 0; -} -#endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 02693dd9a079..d64889aa2d46 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -718,6 +718,7 @@ static int crashing_cpu; static nmi_shootdown_cb shootdown_callback; static atomic_t waiting_for_crash_ipi; +static int crash_ipi_issued; static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) { @@ -780,6 +781,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) smp_send_nmi_allbutself(); + /* Kick CPUs looping in NMI context. */ + WRITE_ONCE(crash_ipi_issued, 1); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { mdelay(1); @@ -788,9 +792,35 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) /* Leave the nmi callback set */ } + +/* + * Check if the crash dumping IPI got issued and if so, call its callback + * directly. This function is used when we have already been in NMI handler. + * It doesn't return. + */ +void run_crash_ipi_callback(struct pt_regs *regs) +{ + if (crash_ipi_issued) + crash_nmi_callback(0, regs); +} + +/* Override the weak function in kernel/panic.c */ +void nmi_panic_self_stop(struct pt_regs *regs) +{ + while (1) { + /* If no CPU is preparing crash dump, we simply loop here. */ + run_crash_ipi_callback(regs); + cpu_relax(); + } +} + #else /* !CONFIG_SMP */ void nmi_shootdown_cpus(nmi_shootdown_cb callback) { /* No other CPUs to shoot down */ } + +void run_crash_ipi_callback(struct pt_regs *regs) +{ +} #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d2bbe343fda7..d3d80e6d42a2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1048,6 +1048,8 @@ void __init setup_arch(char **cmdline_p) if (mtrr_trim_uncached_memory(max_pfn)) max_pfn = e820_end_of_ram_pfn(); + max_possible_pfn = max_pfn; + #ifdef CONFIG_X86_32 /* max_low_pfn get updated here */ find_low_pfn_range(); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 12c8286206ce..658777cf3851 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -125,12 +125,12 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + apic->send_IPI(cpu, RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); + apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR); } void native_send_call_func_ipi(const struct cpumask *mask) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fbabe4fcc7fb..24d57f77b3c1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -304,7 +304,7 @@ do { \ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { - if (cpu_has_topoext) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; if (c->phys_proc_id == o->phys_proc_id && @@ -630,13 +630,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) num_starts = 0; /* - * Paravirt / VMI wants a startup IPI hook here to set up the - * target processor state. - */ - startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, - stack_start); - - /* * Run STARTUP IPI loop. */ pr_debug("#startup loops: %d\n", num_starts); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c7c4d9c51e99..3d743da828d3 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1185,8 +1185,6 @@ void __init tsc_init(void) u64 lpj; int cpu; - x86_init.timers.tsc_pre_init(); - if (!cpu_has_tsc) { setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 524619351961..483231ebbb0b 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -357,8 +357,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) tss = &per_cpu(cpu_tss, get_cpu()); /* make room for real-mode segments */ tsk->thread.sp0 += 16; - if (cpu_has_sep) + + if (static_cpu_has_safe(X86_FEATURE_SEP)) tsk->thread.sysenter_cs = 0; + load_sp0(tss, &tsk->thread); put_cpu(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 3839628d962e..dad5fe9633a3 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -68,7 +68,6 @@ struct x86_init_ops x86_init __initdata = { .timers = { .setup_percpu_clockev = setup_boot_APIC_clock, - .tsc_pre_init = x86_init_noop, .timer_init = hpet_time_init, .wallclock_init = x86_init_noop, }, |