diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/acpi/boot.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/aperture_64.c | 20 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cyrix.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/amd.c | 62 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/apei.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 30 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/severity.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/core.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/internal.h | 16 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/monitor.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/rdtgroup.c | 185 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 42 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/hw_breakpoint.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kexec-bzimage64.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/kvmclock.c | 20 | ||||
-rw-r--r-- | arch/x86/kernel/mpparse.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/unwind_frame.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/unwind_orc.c | 17 |
23 files changed, 343 insertions, 169 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2624de16cd7a..8dcbf6890714 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -935,6 +935,9 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) #define HPET_RESOURCE_NAME_SIZE 9 hpet_res = memblock_alloc(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE, SMP_CACHE_BYTES); + if (!hpet_res) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); hpet_res->name = (void *)&hpet_res[1]; hpet_res->flags = IORESOURCE_MEM; diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 58176b56354e..294ed4392a0e 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "AGP: " fmt #include <linux/kernel.h> +#include <linux/kcore.h> #include <linux/types.h> #include <linux/init.h> #include <linux/memblock.h> @@ -57,7 +58,7 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; -#ifdef CONFIG_PROC_VMCORE +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE) /* * If the first kernel maps the aperture over e820 RAM, the kdump kernel will * use the same range because it will remain configured in the northbridge. @@ -66,20 +67,25 @@ int fix_aperture __initdata = 1; */ static unsigned long aperture_pfn_start, aperture_page_count; -static int gart_oldmem_pfn_is_ram(unsigned long pfn) +static int gart_mem_pfn_is_ram(unsigned long pfn) { return likely((pfn < aperture_pfn_start) || (pfn >= aperture_pfn_start + aperture_page_count)); } -static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +static void __init exclude_from_core(u64 aper_base, u32 aper_order) { aperture_pfn_start = aper_base >> PAGE_SHIFT; aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; - WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram)); +#ifdef CONFIG_PROC_VMCORE + WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram)); +#endif +#ifdef CONFIG_PROC_KCORE + WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram)); +#endif } #else -static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +static void exclude_from_core(u64 aper_base, u32 aper_order) { } #endif @@ -474,7 +480,7 @@ out: * may have allocated the range over its e820 RAM * and fixed up the northbridge */ - exclude_from_vmcore(last_aper_base, last_aper_order); + exclude_from_core(last_aper_base, last_aper_order); return 1; } @@ -520,7 +526,7 @@ out: * overlap with the first kernel's memory. We can't access the * range through vmcore even though it should be part of the dump. */ - exclude_from_vmcore(aper_alloc, aper_order); + exclude_from_core(aper_alloc, aper_order); /* Fix up the north bridges */ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 264e3221d923..53aa234a6803 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2581,6 +2581,8 @@ static struct resource * __init ioapic_setup_resources(void) n *= nr_ioapics; mem = memblock_alloc(n, SMP_CACHE_BYTES); + if (!mem) + panic("%s: Failed to allocate %lu bytes\n", __func__, n); res = (void *)mem; mem += sizeof(struct resource) * nr_ioapics; @@ -2625,6 +2627,9 @@ fake_ioapic_page: #endif ioapic_phys = (unsigned long)memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!ioapic_phys) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index d12226f60168..1d9b8aaea06c 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -124,7 +124,7 @@ static void set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -135,11 +135,11 @@ static void set_cx86_memwb(void) pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } /* @@ -153,14 +153,14 @@ static void geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -296,7 +296,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -319,7 +319,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); } else { /* A 6x86MX - it has the bug. */ set_cpu_bug(c, X86_BUG_COMA); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 89298c83de53..e64de5149e50 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = { [SMCA_FP] = { "floating_point", "Floating Point Unit" }, [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, [SMCA_CS] = { "coherent_slave", "Coherent Slave" }, + [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, [SMCA_UMC] = { "umc", "Unified Memory Controller" }, [SMCA_PB] = { "param_block", "Parameter Block" }, [SMCA_PSP] = { "psp", "Platform Security Processor" }, + [SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, [SMCA_SMU] = { "smu", "System Management Unit" }, + [SMCA_SMU_V2] = { "smu", "System Management Unit" }, + [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, + [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, + [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, }; static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = @@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = { { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 }, /* ZN Core (HWID=0xB0) MCA types */ - { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF }, + { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF }, { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, /* HWID 0xB0 MCATYPE 0x4 is Reserved */ - { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF }, + { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF }, { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F }, { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF }, /* Data Fabric MCA types */ { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF }, - { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF }, + { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F }, + { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF }, /* Unified Memory Controller MCA type */ - { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F }, + { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF }, /* Parameter Block MCA type */ { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 }, /* Platform Security Processor MCA type */ { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 }, + { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF }, /* System Management Unit MCA type */ { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, + { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF }, + + /* Microprocessor 5 Unit MCA type */ + { SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF }, + + /* Northbridge IO Unit MCA type */ + { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F }, + + /* PCI Express Unit MCA type */ + { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F }, }; struct smca_bank smca_banks[MAX_NR_BANKS]; @@ -545,6 +563,40 @@ out: return offset; } +/* + * Turn off MC4_MISC thresholding banks on all family 0x15 models since + * they're not supported there. + */ +void disable_err_thresholding(struct cpuinfo_x86 *c) +{ + int i; + u64 hwcr; + bool need_toggle; + u32 msrs[] = { + 0x00000413, /* MC4_MISC0 */ + 0xc0000408, /* MC4_MISC1 */ + }; + + if (c->x86 != 0x15) + return; + + rdmsrl(MSR_K7_HWCR, hwcr); + + /* McStatusWrEn has to be set */ + need_toggle = !(hwcr & BIT(18)); + + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); + + /* Clear CntP bit safely */ + for (i = 0; i < ARRAY_SIZE(msrs); i++) + msr_clear_bit(msrs[i], 62); + + /* restore old settings */ + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr); +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int bank, block, cpu = smp_processor_id(); int offset = -1; + disable_err_thresholding(c); + for (bank = 0; bank < mca_cfg.banks; ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c index 1d9b3ce662a0..c038e5c00a59 100644 --- a/arch/x86/kernel/cpu/mce/apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -64,11 +64,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); #define CPER_CREATOR_MCE \ - UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ - 0x64, 0x90, 0xb8, 0x9d) + GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ + 0x64, 0x90, 0xb8, 0x9d) #define CPER_SECTION_TYPE_MCE \ - UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ - 0x04, 0x4a, 0x38, 0xfc) + GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ + 0x04, 0x4a, 0x38, 0xfc) /* * CPER specification (in UEFI specification 2.3 appendix N) requires @@ -135,7 +135,7 @@ retry: goto out; /* try to skip other type records in storage */ else if (rc != sizeof(rcd) || - uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) + !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE)) goto retry; memcpy(m, &rcd.mce, sizeof(*m)); rc = sizeof(*m); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 6ce290c506d9..b7fb541a4873 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 0x15 && c->x86_model <= 0xf) mce_flags.overflow_recov = 1; - /* - * Turn off MC4_MISC thresholding banks on those models since - * they're not supported there. - */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { - int i; - u64 hwcr; - bool need_toggle; - u32 msrs[] = { - 0x00000413, /* MC4_MISC0 */ - 0xc0000408, /* MC4_MISC1 */ - }; - - rdmsrl(MSR_K7_HWCR, hwcr); - - /* McStatusWrEn has to be set */ - need_toggle = !(hwcr & BIT(18)); - - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); - - /* Clear CntP bit safely */ - for (i = 0; i < ARRAY_SIZE(msrs); i++) - msr_clear_bit(msrs[i], 62); - - /* restore old settings */ - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr); - } } if (c->x86_vendor == X86_VENDOR_INTEL) { diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index dc3e26e905a3..65201e180fe0 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -165,6 +165,11 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), KERNEL ), + MCESEV( + PANIC, "Instruction fetch error in kernel", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), + KERNEL + ), #endif MCESEV( PANIC, "Action required: unknown MCACOD", diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 97f9ada9ceda..5260185cbf7b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -608,6 +608,8 @@ static int microcode_reload_late(void) if (ret > 0) microcode_check(); + pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode); + return ret; } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e81a2db42df7..3fa238a137d2 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -328,6 +328,18 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; # endif + + /* + * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic, + * set x2apic destination mode to physcial mode when x2apic is available + * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs + * have 8-bit APIC id. + */ +# ifdef CONFIG_X86_X2APIC + if (x2apic_supported()) + x2apic_phys = 1; +# endif + #endif } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 822b7db634ee..e49b77283924 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <linux/kernfs.h> +#include <linux/fs_context.h> #include <linux/jump_label.h> #define MSR_IA32_L3_QOS_CFG 0xc81 @@ -40,6 +41,21 @@ #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) + +struct rdt_fs_context { + struct kernfs_fs_context kfc; + bool enable_cdpl2; + bool enable_cdpl3; + bool enable_mba_mbps; +}; + +static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) +{ + struct kernfs_fs_context *kfc = fc->fs_private; + + return container_of(kfc, struct rdt_fs_context, kfc); +} + DECLARE_STATIC_KEY_FALSE(rdt_enable_key); /** diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f33f11f69078..1573a0a6b525 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -501,11 +501,8 @@ out_unlock: void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms) { unsigned long delay = msecs_to_jiffies(delay_ms); - struct rdt_resource *r; int cpu; - r = &rdt_resources_all[RDT_RESOURCE_L3]; - cpu = cpumask_any(&dom->cpu_mask); dom->cqm_work_cpu = cpu; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 8388adf241b2..399601eda8e4 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -24,6 +24,7 @@ #include <linux/cpu.h> #include <linux/debugfs.h> #include <linux/fs.h> +#include <linux/fs_parser.h> #include <linux/sysfs.h> #include <linux/kernfs.h> #include <linux/seq_buf.h> @@ -32,6 +33,7 @@ #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/task_work.h> +#include <linux/user_namespace.h> #include <uapi/linux/magic.h> @@ -1858,46 +1860,6 @@ static void cdp_disable_all(void) cdpl2_disable(); } -static int parse_rdtgroupfs_options(char *data) -{ - char *token, *o = data; - int ret = 0; - - while ((token = strsep(&o, ",")) != NULL) { - if (!*token) { - ret = -EINVAL; - goto out; - } - - if (!strcmp(token, "cdp")) { - ret = cdpl3_enable(); - if (ret) - goto out; - } else if (!strcmp(token, "cdpl2")) { - ret = cdpl2_enable(); - if (ret) - goto out; - } else if (!strcmp(token, "mba_MBps")) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - ret = set_mba_sc(true); - else - ret = -EINVAL; - if (ret) - goto out; - } else { - ret = -EINVAL; - goto out; - } - } - - return 0; - -out: - pr_err("Invalid mount option \"%s\"\n", token); - - return ret; -} - /* * We don't allow rdtgroup directories to be created anywhere * except the root directory. Thus when looking for the rdtgroup @@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, struct kernfs_node **mon_data_kn); -static struct dentry *rdt_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +static int rdt_enable_ctx(struct rdt_fs_context *ctx) +{ + int ret = 0; + + if (ctx->enable_cdpl2) + ret = cdpl2_enable(); + + if (!ret && ctx->enable_cdpl3) + ret = cdpl3_enable(); + + if (!ret && ctx->enable_mba_mbps) + ret = set_mba_sc(true); + + return ret; +} + +static int rdt_get_tree(struct fs_context *fc) { + struct rdt_fs_context *ctx = rdt_fc2context(fc); struct rdt_domain *dom; struct rdt_resource *r; - struct dentry *dentry; int ret; cpus_read_lock(); @@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, * resctrl file system can only be mounted once. */ if (static_branch_unlikely(&rdt_enable_key)) { - dentry = ERR_PTR(-EBUSY); + ret = -EBUSY; goto out; } - ret = parse_rdtgroupfs_options(data); - if (ret) { - dentry = ERR_PTR(ret); + ret = rdt_enable_ctx(ctx); + if (ret < 0) goto out_cdp; - } closid_init(); ret = rdtgroup_create_info_dir(rdtgroup_default.kn); - if (ret) { - dentry = ERR_PTR(ret); - goto out_cdp; - } + if (ret < 0) + goto out_mba; if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, NULL, "mon_groups", &kn_mongrp); - if (ret) { - dentry = ERR_PTR(ret); + if (ret < 0) goto out_info; - } kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); - if (ret) { - dentry = ERR_PTR(ret); + if (ret < 0) goto out_mongrp; - } kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } ret = rdt_pseudo_lock_init(); - if (ret) { - dentry = ERR_PTR(ret); + if (ret) goto out_mondata; - } - dentry = kernfs_mount(fs_type, flags, rdt_root, - RDTGROUP_SUPER_MAGIC, NULL); - if (IS_ERR(dentry)) + ret = kernfs_get_tree(fc); + if (ret < 0) goto out_psl; if (rdt_alloc_capable) @@ -2059,14 +2024,95 @@ out_mongrp: kernfs_remove(kn_mongrp); out_info: kernfs_remove(kn_info); +out_mba: + if (ctx->enable_mba_mbps) + set_mba_sc(false); out_cdp: cdp_disable_all(); out: rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); cpus_read_unlock(); + return ret; +} + +enum rdt_param { + Opt_cdp, + Opt_cdpl2, + Opt_mba_mpbs, + nr__rdt_params +}; + +static const struct fs_parameter_spec rdt_param_specs[] = { + fsparam_flag("cdp", Opt_cdp), + fsparam_flag("cdpl2", Opt_cdpl2), + fsparam_flag("mba_mpbs", Opt_mba_mpbs), + {} +}; + +static const struct fs_parameter_description rdt_fs_parameters = { + .name = "rdt", + .specs = rdt_param_specs, +}; + +static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, &rdt_fs_parameters, param, &result); + if (opt < 0) + return opt; - return dentry; + switch (opt) { + case Opt_cdp: + ctx->enable_cdpl3 = true; + return 0; + case Opt_cdpl2: + ctx->enable_cdpl2 = true; + return 0; + case Opt_mba_mpbs: + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return -EINVAL; + ctx->enable_mba_mbps = true; + return 0; + } + + return -EINVAL; +} + +static void rdt_fs_context_free(struct fs_context *fc) +{ + struct rdt_fs_context *ctx = rdt_fc2context(fc); + + kernfs_free_fs_context(fc); + kfree(ctx); +} + +static const struct fs_context_operations rdt_fs_context_ops = { + .free = rdt_fs_context_free, + .parse_param = rdt_parse_param, + .get_tree = rdt_get_tree, +}; + +static int rdt_init_fs_context(struct fs_context *fc) +{ + struct rdt_fs_context *ctx; + + ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->kfc.root = rdt_root; + ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; + fc->fs_private = &ctx->kfc; + fc->ops = &rdt_fs_context_ops; + if (fc->user_ns) + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(&init_user_ns); + fc->global = true; + return 0; } static int reset_all_ctrls(struct rdt_resource *r) @@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb) } static struct file_system_type rdt_fs_type = { - .name = "resctrl", - .mount = rdt_mount, - .kill_sb = rdt_kill_sb, + .name = "resctrl", + .init_fs_context = rdt_init_fs_context, + .parameters = &rdt_fs_parameters, + .kill_sb = rdt_kill_sb, }; static int mon_addfile(struct kernfs_node *parent_kn, const char *name, diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a687d10da417..2879e234e193 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -14,6 +14,7 @@ #include <linux/acpi.h> #include <linux/firmware-map.h> #include <linux/sort.h> +#include <linux/memory_hotplug.h> #include <asm/e820/api.h> #include <asm/setup.h> @@ -775,7 +776,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) { u64 addr; - addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); + addr = memblock_phys_alloc(size, align); if (addr) { e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n"); @@ -878,6 +879,10 @@ static int __init parse_memopt(char *p) e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); +#ifdef CONFIG_MEMORY_HOTPLUG + max_mem_size = mem_size; +#endif + return 0; } early_param("mem", parse_memopt); @@ -1092,6 +1097,9 @@ void __init e820__reserve_resources(void) res = memblock_alloc(sizeof(*res) * e820_table->nr_entries, SMP_CACHE_BYTES); + if (!res) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(*res) * e820_table->nr_entries); e820_res = res; for (i = 0; i < e820_table->nr_entries; i++) { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3e3789c8f8e1..ef49517f6bb2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -49,7 +49,7 @@ int ftrace_arch_code_modify_post_process(void) union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; struct { - unsigned char e8; + unsigned char op; int offset; } __attribute__((packed)); }; @@ -59,20 +59,23 @@ static int ftrace_calc_offset(long ip, long addr) return (int)(addr - ip); } -static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +static unsigned char * +ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; - calc.e8 = 0xe8; + calc.op = op; calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); - /* - * No locking needed, this must be called via kstop_machine - * which in essence is like running on a uniprocessor machine. - */ return calc.code; } +static unsigned char * +ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + return ftrace_text_replace(0xe8, ip, addr); +} + static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -665,22 +668,6 @@ int __init ftrace_dyn_arch_init(void) return 0; } -#if defined(CONFIG_X86_64) || defined(CONFIG_FUNCTION_GRAPH_TRACER) -static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) -{ - static union ftrace_code_union calc; - - /* Jmp not a call (ignore the .e8) */ - calc.e8 = 0xe9; - calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); - - /* - * ftrace external locks synchronize the access to the static variable. - */ - return calc.code; -} -#endif - /* Currently only x86_64 supports dynamic trampolines */ #ifdef CONFIG_X86_64 @@ -892,8 +879,8 @@ static void *addr_from_call(void *ptr) return NULL; /* Make sure this is a call */ - if (WARN_ON_ONCE(calc.e8 != 0xe8)) { - pr_warn("Expected e8, got %x\n", calc.e8); + if (WARN_ON_ONCE(calc.op != 0xe8)) { + pr_warn("Expected e8, got %x\n", calc.op); return NULL; } @@ -964,6 +951,11 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_graph_call(void); +static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) +{ + return ftrace_text_replace(0xe9, ip, addr); +} + static int ftrace_mod_jmp(unsigned long ip, void *func) { unsigned char *new; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dfd3aca82c61..fb32925a2e62 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -905,6 +905,8 @@ int __init hpet_enable(void) return 0; hpet_set_mapping(); + if (!hpet_virt_address) + return 0; /* * Read the period and check for a sane value: diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index ff9bfd40429e..d73083021002 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -354,6 +354,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, #endif default: WARN_ON_ONCE(1); + return -EINVAL; } /* diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 1f3b77367948..22f60dd26460 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -538,9 +538,17 @@ static int bzImage64_cleanup(void *loader_data) #ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) { - return verify_pefile_signature(kernel, kernel_len, - VERIFY_USE_SECONDARY_KEYRING, - VERIFYING_KEXEC_PE_SIGNATURE); + int ret; + + ret = verify_pefile_signature(kernel, kernel_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_KEXEC_PE_SIGNATURE); + if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) { + ret = verify_pefile_signature(kernel, kernel_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_KEXEC_PE_SIGNATURE); + } + return ret; } #endif diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e811d4d1c824..904494b924c1 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -104,12 +104,8 @@ static u64 kvm_sched_clock_read(void) static inline void kvm_sched_clock_init(bool stable) { - if (!stable) { - pv_ops.time.sched_clock = kvm_clock_read; + if (!stable) clear_sched_clock_stable(); - return; - } - kvm_sched_clock_offset = kvm_clock_read(); pv_ops.time.sched_clock = kvm_sched_clock_read; @@ -355,6 +351,20 @@ void __init kvmclock_init(void) machine_ops.crash_shutdown = kvm_crash_shutdown; #endif kvm_get_preset_lpj(); + + /* + * X86_FEATURE_NONSTOP_TSC is TSC runs at constant rate + * with P/T states and does not stop in deep C-states. + * + * Invariant TSC exposed by host means kvmclock is not necessary: + * can use TSC as clocksource. + * + */ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && + boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && + !check_tsc_unstable()) + kvm_clock.rating = 299; + clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); pv_info.name = "KVM"; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3482460d984d..1bfe5c6e6cfe 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -598,8 +598,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) mpf_base = base; mpf_found = true; - pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", - base, base + sizeof(*mpf) - 1, mpf); + pr_info("found SMP MP-table at [mem %#010lx-%#010lx]\n", + base, base + sizeof(*mpf) - 1); memblock_reserve(base, sizeof(*mpf)); if (mpf->physptr) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 13af08827eef..4bf46575568a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -106,22 +106,22 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, void *ptr; if (!node_online(node) || !NODE_DATA(node)) { - ptr = memblock_alloc_from_nopanic(size, align, goal); + ptr = memblock_alloc_from(size, align, goal); pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", cpu, size, __pa(ptr)); } else { - ptr = memblock_alloc_try_nid_nopanic(size, align, goal, - MEMBLOCK_ALLOC_ACCESSIBLE, - node); + ptr = memblock_alloc_try_nid(size, align, goal, + MEMBLOCK_ALLOC_ACCESSIBLE, + node); pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", cpu, size, node, __pa(ptr)); } return ptr; #else - return memblock_alloc_from_nopanic(size, align, goal); + return memblock_alloc_from(size, align, goal); #endif } diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 3dc26f95d46e..9b9fd4826e7a 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -320,10 +320,14 @@ bool unwind_next_frame(struct unwind_state *state) } /* Get the next frame pointer: */ - if (state->regs) + if (state->next_bp) { + next_bp = state->next_bp; + state->next_bp = NULL; + } else if (state->regs) { next_bp = (unsigned long *)state->regs->bp; - else + } else { next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp); + } /* Move to the next frame if it's safe: */ if (!update_stack_state(state, next_bp)) @@ -398,6 +402,21 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, bp = get_frame_pointer(task, regs); + /* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer. + * That means that SP points into the middle of an incomplete frame: + * *SP is a return pointer, and *(SP-sizeof(unsigned long)) is where we + * would have written a frame pointer if we hadn't crashed. + * Pretend that the frame is complete and that BP points to it, but save + * the real BP so that we can use it when looking for the next frame. + */ + if (regs && regs->ip == 0 && + (unsigned long *)kernel_stack_pointer(regs) >= first_frame) { + state->next_bp = bp; + bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1; + } + /* Initialize stack info and make sure the frame data is accessible: */ get_stack_info(bp, state->task, &state->stack_info, &state->stack_mask); @@ -410,7 +429,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, */ while (!unwind_done(state) && (!on_stack(&state->stack_info, first_frame, sizeof(long)) || - state->bp < first_frame)) + (state->next_bp == NULL && state->bp < first_frame))) unwind_next_frame(state); } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 26038eacf74a..89be1be1790c 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -113,6 +113,20 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip) } #endif +/* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer, + * and we don't have unwind information for NULL. + * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function + * pointer into its parent and then continue normally from there. + */ +static struct orc_entry null_orc_entry = { + .sp_offset = sizeof(long), + .sp_reg = ORC_REG_SP, + .bp_reg = ORC_REG_UNDEFINED, + .type = ORC_TYPE_CALL +}; + static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -120,6 +134,9 @@ static struct orc_entry *orc_find(unsigned long ip) if (!orc_init) return NULL; + if (ip == 0) + return &null_orc_entry; + /* For non-init vmlinux addresses, use the fast lookup table: */ if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) { unsigned int idx, start, stop; |