diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 30 | ||||
-rw-r--r-- | arch/x86/kernel/apic/msi.c | 128 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/if.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/rdtgroup.c | 59 | ||||
-rw-r--r-- | arch/x86/kernel/crash_core_32.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/crash_core_64.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_32.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 19 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/sys_x86_64.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/time.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 74 | ||||
-rw-r--r-- | arch/x86/kernel/x86_init.c | 1 |
18 files changed, 246 insertions, 221 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6175e370ee4a..9b294c13809a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += ftrace_$(BITS).o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o +obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 34360ca301a2..15ac0d5f4b40 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,6 +23,7 @@ #include <asm/nmi.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> +#include <asm/insn.h> #include <asm/io.h> #include <asm/fixmap.h> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 28446fa6bf18..5f973fed3c9f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -830,8 +830,17 @@ bool __init apic_needs_pit(void) if (!tsc_khz || !cpu_khz) return true; - /* Is there an APIC at all? */ - if (!boot_cpu_has(X86_FEATURE_APIC)) + /* Is there an APIC at all or is it disabled? */ + if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) + return true; + + /* + * If interrupt delivery mode is legacy PIC or virtual wire without + * configuration, the local APIC timer wont be set up. Make sure + * that the PIT is initialized. + */ + if (apic_intr_mode == APIC_PIC || + apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) return true; /* Virt guests may lack ARAT, but still have DEADLINE */ @@ -1322,7 +1331,7 @@ void __init sync_Arb_IDs(void) enum apic_intr_mode_id apic_intr_mode __ro_after_init; -static int __init apic_intr_mode_select(void) +static int __init __apic_intr_mode_select(void) { /* Check kernel option */ if (disable_apic) { @@ -1384,6 +1393,12 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; } +/* Select the interrupt delivery mode for the BSP */ +void __init apic_intr_mode_select(void) +{ + apic_intr_mode = __apic_intr_mode_select(); +} + /* * An initial setup of the virtual wire mode. */ @@ -1440,8 +1455,6 @@ void __init apic_intr_mode_init(void) { bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); - apic_intr_mode = apic_intr_mode_select(); - switch (apic_intr_mode) { case APIC_PIC: pr_info("APIC: Keep in PIC mode(8259)\n"); @@ -2626,6 +2639,13 @@ static int lapic_suspend(void) #endif local_irq_save(flags); + + /* + * Mask IOAPIC before disabling the local APIC to prevent stale IRR + * entries on some implementations. + */ + mask_ioapic_entries(); + disable_local_APIC(); irq_remapping_disable(); diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 7f7533462474..159bd0cb8548 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -23,10 +23,8 @@ static struct irq_domain *msi_default_domain; -static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); - msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) @@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) MSI_DATA_VECTOR(cfg->vector); } +static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg); +} + +static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) +{ + struct msi_msg msg[2] = { [1] = { }, }; + + __irq_msi_compose_msg(cfg, msg); + irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg); +} + +static int +msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) +{ + struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd); + struct irq_data *parent = irqd->parent_data; + unsigned int cpu; + int ret; + + /* Save the current configuration */ + cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); + old_cfg = *cfg; + + /* Allocate a new target vector */ + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + /* + * For non-maskable and non-remapped MSI interrupts the migration + * to a different destination CPU and a different vector has to be + * done careful to handle the possible stray interrupt which can be + * caused by the non-atomic update of the address/data pair. + * + * Direct update is possible when: + * - The MSI is maskable (remapped MSI does not use this code path)). + * The quirk bit is not set in this case. + * - The new vector is the same as the old vector + * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The new destination CPU is the same as the old destination CPU + */ + if (!irqd_msi_nomask_quirk(irqd) || + cfg->vector == old_cfg.vector || + old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + cfg->dest_apicid == old_cfg.dest_apicid) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Paranoia: Validate that the interrupt target is the local + * CPU. + */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Redirect the interrupt to the new vector on the current CPU + * first. This might cause a spurious interrupt on this vector if + * the device raises an interrupt right between this update and the + * update to the final destination CPU. + * + * If the vector is in use then the installed device handler will + * denote it as spurious which is no harm as this is a rare event + * and interrupt handlers have to cope with spurious interrupts + * anyway. If the vector is unused, then it is marked so it won't + * trigger the 'No irq handler for vector' warning in do_IRQ(). + * + * This requires to hold vector lock to prevent concurrent updates to + * the affected vector. + */ + lock_vector_lock(); + + /* + * Mark the new target vector on the local CPU if it is currently + * unused. Reuse the VECTOR_RETRIGGERED state which is also used in + * the CPU hotplug path for a similar purpose. This cannot be + * undone here as the current CPU has interrupts disabled and + * cannot handle the interrupt before the whole set_affinity() + * section is done. In the CPU unplug case, the current CPU is + * about to vanish and will not handle any interrupts anymore. The + * vector is cleaned up when the CPU comes online again. + */ + if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) + this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); + + /* Redirect it to the new vector on the local CPU temporarily */ + old_cfg.vector = cfg->vector; + irq_msi_update_msg(irqd, &old_cfg); + + /* Now transition it to the target CPU */ + irq_msi_update_msg(irqd, cfg); + + /* + * All interrupts after this point are now targeted at the new + * vector/CPU. + * + * Drop vector lock before testing whether the temporary assignment + * to the local CPU was hit by an interrupt raised in the device, + * because the retrigger function acquires vector lock again. + */ + unlock_vector_lock(); + + /* + * Check whether the transition raced with a device interrupt and + * is pending in the local APICs IRR. It is safe to do this outside + * of vector lock as the irq_desc::lock of this interrupt is still + * held and interrupts are disabled: The check is not accessing the + * underlying vector store. It's just checking the local APIC's + * IRR. + */ + if (lapic_vector_set_in_irr(cfg->vector)) + irq_data_get_irq_chip(irqd)->irq_retrigger(irqd); + + return ret; +} + /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. @@ -58,6 +177,7 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_set_affinity = msi_set_affinity, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -146,6 +266,8 @@ void __init arch_init_msi_domain(struct irq_domain *parent) } if (!msi_default_domain) pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); + else + msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; } #ifdef CONFIG_IRQ_REMAP diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 86b8241c8209..52c9bfbbdb2a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -164,22 +164,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); -static int __init x86_mpx_setup(char *s) -{ - /* require an exact match without trailing characters */ - if (strlen(s)) - return 0; - - /* do not emit a message if the feature is not present */ - if (!boot_cpu_has(X86_FEATURE_MPX)) - return 1; - - setup_clear_cpu_cap(X86_FEATURE_MPX); - pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n"); - return 1; -} -__setup("nompx", x86_mpx_setup); - #ifdef CONFIG_X86_64 static int __init x86_nopcid_setup(char *s) { @@ -306,8 +290,6 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) static __init int setup_disable_smep(char *arg) { setup_clear_cpu_cap(X86_FEATURE_SMEP); - /* Check for things that depend on SMEP being enabled: */ - check_mpx_erratum(&boot_cpu_data); return 1; } __setup("nosmep", setup_disable_smep); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 57473e2c0869..be82cd5841c3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -32,41 +32,6 @@ #endif /* - * Just in case our CPU detection goes bad, or you have a weird system, - * allow a way to override the automatic disabling of MPX. - */ -static int forcempx; - -static int __init forcempx_setup(char *__unused) -{ - forcempx = 1; - - return 1; -} -__setup("intel-skd-046-workaround=disable", forcempx_setup); - -void check_mpx_erratum(struct cpuinfo_x86 *c) -{ - if (forcempx) - return; - /* - * Turn off the MPX feature on CPUs where SMEP is not - * available or disabled. - * - * Works around Intel Erratum SKD046: "Branch Instructions - * May Initialize MPX Bound Registers Incorrectly". - * - * This might falsely disable MPX on systems without - * SMEP, like Atom processors without SMEP. But there - * is no such hardware known at the moment. - */ - if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) { - setup_clear_cpu_cap(X86_FEATURE_MPX); - pr_warn("x86/mpx: Disabling MPX since SMEP not present\n"); - } -} - -/* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists * CPU models in which having conflicting memory types still leads to @@ -330,7 +295,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); } - check_mpx_erratum(c); check_memory_type_self_snoop_errata(c); /* diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index da532f656a7b..a5c506f6da7f 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -396,15 +396,16 @@ static int mtrr_open(struct inode *inode, struct file *file) return single_open(file, mtrr_seq_show, NULL); } -static const struct file_operations mtrr_fops = { - .owner = THIS_MODULE, - .open = mtrr_open, - .read = seq_read, - .llseek = seq_lseek, - .write = mtrr_write, - .unlocked_ioctl = mtrr_ioctl, - .compat_ioctl = mtrr_ioctl, - .release = mtrr_close, +static const struct proc_ops mtrr_proc_ops = { + .proc_open = mtrr_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = mtrr_write, + .proc_ioctl = mtrr_ioctl, +#ifdef CONFIG_COMPAT + .proc_compat_ioctl = mtrr_ioctl, +#endif + .proc_release = mtrr_close, }; static int __init mtrr_if_init(void) @@ -417,7 +418,7 @@ static int __init mtrr_if_init(void) (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) return -ENODEV; - proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); + proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_proc_ops); return 0; } arch_initcall(mtrr_if_init); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1504bcabc63c..064e9ef44cd6 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2060,7 +2060,7 @@ static int rdt_get_tree(struct fs_context *fc) if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, - NULL, "mon_groups", + &rdtgroup_default, "mon_groups", &kn_mongrp); if (ret < 0) goto out_info; @@ -2127,25 +2127,20 @@ enum rdt_param { nr__rdt_params }; -static const struct fs_parameter_spec rdt_param_specs[] = { +static const struct fs_parameter_spec rdt_fs_parameters[] = { fsparam_flag("cdp", Opt_cdp), fsparam_flag("cdpl2", Opt_cdpl2), fsparam_flag("mba_MBps", Opt_mba_mbps), {} }; -static const struct fs_parameter_description rdt_fs_parameters = { - .name = "rdt", - .specs = rdt_param_specs, -}; - static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct rdt_fs_context *ctx = rdt_fc2context(fc); struct fs_parse_result result; int opt; - opt = fs_parse(fc, &rdt_fs_parameters, param, &result); + opt = fs_parse(fc, rdt_fs_parameters, param, &result); if (opt < 0) return opt; @@ -2295,7 +2290,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { free_rmid(sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); - kfree(sentry); + + if (atomic_read(&sentry->waitcount) != 0) + sentry->flags = RDT_DELETED; + else + kfree(sentry); } } @@ -2333,7 +2332,11 @@ static void rmdir_all_sub(void) kernfs_remove(rdtgrp->kn); list_del(&rdtgrp->rdtgroup_list); - kfree(rdtgrp); + + if (atomic_read(&rdtgrp->waitcount) != 0) + rdtgrp->flags = RDT_DELETED; + else + kfree(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2370,7 +2373,7 @@ static void rdt_kill_sb(struct super_block *sb) static struct file_system_type rdt_fs_type = { .name = "resctrl", .init_fs_context = rdt_init_fs_context, - .parameters = &rdt_fs_parameters, + .parameters = rdt_fs_parameters, .kill_sb = rdt_kill_sb, }; @@ -2536,7 +2539,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, /* * Create the mon_data directory first. */ - ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); + ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); if (ret) return ret; @@ -2726,7 +2729,6 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) } static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, - struct kernfs_node *prgrp_kn, const char *name, umode_t mode, enum rdt_group_type rtype, struct rdtgroup **r) { @@ -2735,7 +2737,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, uint files = 0; int ret; - prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); + prdtgrp = rdtgroup_kn_lock_live(parent_kn); if (!prdtgrp) { ret = -ENODEV; goto out_unlock; @@ -2808,7 +2810,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_activate(kn); /* - * The caller unlocks the prgrp_kn upon success. + * The caller unlocks the parent_kn upon success. */ return 0; @@ -2819,7 +2821,7 @@ out_destroy: out_free_rgrp: kfree(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2836,15 +2838,12 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) * to monitor a subset of tasks and cpus in its parent ctrl_mon group. */ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, - struct kernfs_node *prgrp_kn, - const char *name, - umode_t mode) + const char *name, umode_t mode) { struct rdtgroup *rdtgrp, *prgrp; int ret; - ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP, - &rdtgrp); + ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp); if (ret) return ret; @@ -2857,7 +2856,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, */ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2866,7 +2865,6 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, * to allocate and monitor resources. */ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, - struct kernfs_node *prgrp_kn, const char *name, umode_t mode) { struct rdtgroup *rdtgrp; @@ -2874,8 +2872,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, u32 closid; int ret; - ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP, - &rdtgrp); + ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp); if (ret) return ret; @@ -2900,7 +2897,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * Create an empty mon_groups directory to hold the subset * of tasks and cpus to monitor. */ - ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); goto out_del_list; @@ -2916,7 +2913,7 @@ out_id_free: out_common_fail: mkdir_rdt_prepare_clean(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2949,14 +2946,14 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, * subdirectory */ if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn) - return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode); + return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); /* * If RDT monitoring is supported and the parent directory is a valid * "mon_groups" directory, add a monitoring subdirectory. */ if (rdt_mon_capable && is_mon_groups(parent_kn, name)) - return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode); + return rdtgroup_mkdir_mon(parent_kn, name, mode); return -EPERM; } @@ -3042,13 +3039,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); + rdtgroup_ctrl_remove(kn, rdtgrp); + /* * Free all the child monitor group rmids. */ free_all_child_rdtgrp(rdtgrp); - rdtgroup_ctrl_remove(kn, rdtgrp); - return 0; } diff --git a/arch/x86/kernel/crash_core_32.c b/arch/x86/kernel/crash_core_32.c new file mode 100644 index 000000000000..c0159a7bca6d --- /dev/null +++ b/arch/x86/kernel/crash_core_32.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/crash_core.h> + +#include <asm/pgtable.h> +#include <asm/setup.h> + +void arch_crash_save_vmcoreinfo(void) +{ +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifdef CONFIG_X86_PAE + VMCOREINFO_CONFIG(X86_PAE); +#endif +} diff --git a/arch/x86/kernel/crash_core_64.c b/arch/x86/kernel/crash_core_64.c new file mode 100644 index 000000000000..845a57eb4eb7 --- /dev/null +++ b/arch/x86/kernel/crash_core_64.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/crash_core.h> + +#include <asm/pgtable.h> +#include <asm/setup.h> + +void arch_crash_save_vmcoreinfo(void) +{ + u64 sme_mask = sme_me_mask; + + VMCOREINFO_NUMBER(phys_base); + VMCOREINFO_SYMBOL(init_top_pgt); + vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", + pgtable_l5_enabled()); + +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); + VMCOREINFO_NUMBER(sme_mask); +} diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 81045aabb6f4..d817f255aed8 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -736,6 +736,9 @@ static __init int kvm_setup_pv_tlb_flush(void) { int cpu; + if (!kvm_para_available() || nopv) + return 0; + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 7b45e8daad22..02bddfc122a4 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -250,15 +250,3 @@ void machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } - -void arch_crash_save_vmcoreinfo(void) -{ -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif -#ifdef CONFIG_X86_PAE - VMCOREINFO_CONFIG(X86_PAE); -#endif -} - diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 16e125a50b33..ad5cdd6a5f23 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -398,25 +398,6 @@ void machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } -void arch_crash_save_vmcoreinfo(void) -{ - u64 sme_mask = sme_me_mask; - - VMCOREINFO_NUMBER(phys_base); - VMCOREINFO_SYMBOL(init_top_pgt); - vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", - pgtable_l5_enabled()); - -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif - vmcoreinfo_append_str("KERNELOFFSET=%lx\n", - kaslr_offset()); - VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); - VMCOREINFO_NUMBER(sme_mask); -} - /* arch-dependent functionality related to kexec file-based syscall */ #ifdef CONFIG_KEXEC_FILE diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1e4c20a1efec..a74262c71484 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -893,8 +893,6 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = _brk_end; - mpx_mm_init(&init_mm); - code_resource.start = __pa_symbol(_text); code_resource.end = __pa_symbol(_etext)-1; rodata_resource.start = __pa_symbol(__start_rodata); diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index f7476ce23b6e..ca3c11a17b5a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -22,7 +22,6 @@ #include <asm/elf.h> #include <asm/ia32.h> #include <asm/syscalls.h> -#include <asm/mpx.h> /* * Align a virtual address to avoid aliasing in the I$ on AMD F15h. @@ -137,10 +136,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_unmapped_area_info info; unsigned long begin, end; - addr = mpx_unmapped_area_check(addr, len, flags); - if (IS_ERR_VALUE(addr)) - return addr; - if (flags & MAP_FIXED) return addr; @@ -180,10 +175,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, unsigned long addr = addr0; struct vm_unmapped_area_info info; - addr = mpx_unmapped_area_check(addr, len, flags); - if (IS_ERR_VALUE(addr)) - return addr; - /* requested length too big for entire address space */ if (len > TASK_SIZE) return -ENOMEM; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 7ce29cee9f9e..d8673d8a779b 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -91,10 +91,18 @@ void __init hpet_time_init(void) static __init void x86_late_time_init(void) { + /* + * Before PIT/HPET init, select the interrupt mode. This is required + * to make the decision whether PIT should be initialized correct. + */ + x86_init.irqs.intr_mode_select(); + + /* Setup the legacy timers */ x86_init.timers.timer_init(); + /* - * After PIT/HPET timers init, select and setup - * the final interrupt mode for delivering IRQs. + * After PIT/HPET timers init, set up the final interrupt mode for + * delivering IRQs. */ x86_init.irqs.intr_mode_init(); tsc_init(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9e6f822922a3..6ef00eb6fbb9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -52,8 +52,6 @@ #include <asm/mach_traps.h> #include <asm/alternative.h> #include <asm/fpu/xstate.h> -#include <asm/trace/mpx.h> -#include <asm/mpx.h> #include <asm/vm86.h> #include <asm/umip.h> #include <asm/insn.h> @@ -436,8 +434,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { - const struct mpx_bndcsr *bndcsr; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); if (notify_die(DIE_TRAP, "bounds", regs, error_code, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) @@ -447,76 +443,6 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) if (!user_mode(regs)) die("bounds", regs, error_code); - if (!cpu_feature_enabled(X86_FEATURE_MPX)) { - /* The exception is not from Intel MPX */ - goto exit_trap; - } - - /* - * We need to look at BNDSTATUS to resolve this exception. - * A NULL here might mean that it is in its 'init state', - * which is all zeros which indicates MPX was not - * responsible for the exception. - */ - bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); - if (!bndcsr) - goto exit_trap; - - trace_bounds_exception_mpx(bndcsr); - /* - * The error code field of the BNDSTATUS register communicates status - * information of a bound range exception #BR or operation involving - * bound directory. - */ - switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { - case 2: /* Bound directory has invalid entry. */ - if (mpx_handle_bd_fault()) - goto exit_trap; - break; /* Success, it was handled */ - case 1: /* Bound violation. */ - { - struct task_struct *tsk = current; - struct mpx_fault_info mpx; - - if (mpx_fault_info(&mpx, regs)) { - /* - * We failed to decode the MPX instruction. Act as if - * the exception was not caused by MPX. - */ - goto exit_trap; - } - /* - * Success, we decoded the instruction and retrieved - * an 'mpx' containing the address being accessed - * which caused the exception. This information - * allows and application to possibly handle the - * #BR exception itself. - */ - if (!do_trap_no_signal(tsk, X86_TRAP_BR, "bounds", regs, - error_code)) - break; - - show_signal(tsk, SIGSEGV, "trap ", "bounds", regs, error_code); - - force_sig_bnderr(mpx.addr, mpx.lower, mpx.upper); - break; - } - case 0: /* No exception caused by Intel MPX operations. */ - goto exit_trap; - default: - die("bounds", regs, error_code); - } - - return; - -exit_trap: - /* - * This path out is for all the cases where we could not - * handle the exception in some way (like allocating a - * table or telling userspace about it. We will also end - * up here if the kernel has MPX turned off at compile - * time.. - */ do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 23e25f3034c2..85f1a90c55cd 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -80,6 +80,7 @@ struct x86_init_ops x86_init __initdata = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, .trap_init = x86_init_noop, + .intr_mode_select = apic_intr_mode_select, .intr_mode_init = apic_intr_mode_init }, |