diff options
Diffstat (limited to 'arch/x86')
36 files changed, 333 insertions, 116 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cb6e3a219294..297789aef9fa 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_API if X86_64 + select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_UACCESS_MCSAFE if X86_64 @@ -182,6 +183,7 @@ config X86 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION select HAVE_STACK_VALIDATION if X86_64 + select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER @@ -333,6 +335,9 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y +config DYNAMIC_PHYSICAL_MASK + bool + config PGTABLE_LEVELS int default 5 if X86_5LEVEL @@ -1485,6 +1490,7 @@ config ARCH_HAS_MEM_ENCRYPT config AMD_MEM_ENCRYPT bool "AMD Secure Memory Encryption (SME) support" depends on X86_64 && CPU_SUP_AMD + select DYNAMIC_PHYSICAL_MASK ---help--- Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 192e4d2f9efc..c6dd1d980081 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -72,6 +72,9 @@ config EARLY_PRINTK_USB_XDBC You should normally say N here, unless you want to debug early crashes or need a very simple printk logging facility. +config MCSAFE_TEST + def_bool n + config X86_PTDUMP_CORE def_bool n diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c index 522d11431433..748456c365f4 100644 --- a/arch/x86/boot/compressed/kaslr_64.c +++ b/arch/x86/boot/compressed/kaslr_64.c @@ -69,6 +69,8 @@ static struct alloc_pgt_data pgt_data; /* The top level page table entry pointer. */ static unsigned long top_level_pgt; +phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; + /* * Mapping information structure passed to kernel_ident_mapping_init(). * Due to relocation, pointers must be assigned at run time not build time. @@ -81,6 +83,9 @@ void initialize_identity_maps(void) /* If running as an SEV guest, the encryption mask is required. */ set_sev_encryption_mask(); + /* Exclude the encryption mask from __PHYSICAL_MASK */ + physical_mask &= ~sme_me_mask; + /* Init mapping_info with run-time function/buffer pointers. */ mapping_info.alloc_pgt_page = alloc_pgt_page; mapping_info.context = &pgt_data; diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index fbf6a6c3fd2d..92190879b228 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -164,6 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(regs); } if (cached_flags & _TIF_USER_RETURN_NOTIFY) @@ -254,6 +255,8 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax)) local_irq_enable(); + rseq_syscall(regs); + /* * First do one-time work. If these work items are enabled, we * want to run them exactly once per syscall exit with IRQs on. diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 14a2f996e543..3cf7b533b3d1 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -397,3 +397,4 @@ 383 i386 statx sys_statx __ia32_sys_statx 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents +386 i386 rseq sys_rseq __ia32_sys_rseq diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index cd36232ab62f..f0b1709a5ffb 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -342,6 +342,7 @@ 331 common pkey_free __x64_sys_pkey_free 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents +334 common rseq __x64_sys_rseq # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 08acd954f00e..74a9e06b6cfd 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -436,6 +436,8 @@ static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {} #endif /* CONFIG_X86_LOCAL_APIC */ +extern void apic_ack_irq(struct irq_data *data); + static inline void ack_APIC_irq(void) { /* diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index fb00a2fca990..5701f5cecd31 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -282,7 +282,9 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h new file mode 100644 index 000000000000..eb59804b6201 --- /dev/null +++ b/arch/x86/include/asm/mcsafe_test.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MCSAFE_TEST_H_ +#define _MCSAFE_TEST_H_ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_MCSAFE_TEST +extern unsigned long mcsafe_test_src; +extern unsigned long mcsafe_test_dst; + +static inline void mcsafe_inject_src(void *addr) +{ + if (addr) + mcsafe_test_src = (unsigned long) addr; + else + mcsafe_test_src = ~0UL; +} + +static inline void mcsafe_inject_dst(void *addr) +{ + if (addr) + mcsafe_test_dst = (unsigned long) addr; + else + mcsafe_test_dst = ~0UL; +} +#else /* CONFIG_MCSAFE_TEST */ +static inline void mcsafe_inject_src(void *addr) +{ +} + +static inline void mcsafe_inject_dst(void *addr) +{ +} +#endif /* CONFIG_MCSAFE_TEST */ + +#else /* __ASSEMBLY__ */ +#include <asm/export.h> + +#ifdef CONFIG_MCSAFE_TEST +.macro MCSAFE_TEST_CTL + .pushsection .data + .align 8 + .globl mcsafe_test_src + mcsafe_test_src: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_src) + .globl mcsafe_test_dst + mcsafe_test_dst: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_dst) + .popsection +.endm + +.macro MCSAFE_TEST_SRC reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_src, %r9 + ja \target +.endm + +.macro MCSAFE_TEST_DST reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_dst, %r9 + ja \target +.endm +#else +.macro MCSAFE_TEST_CTL +.endm + +.macro MCSAFE_TEST_SRC reg count target +.endm + +.macro MCSAFE_TEST_DST reg count target +.endm +#endif /* CONFIG_MCSAFE_TEST */ +#endif /* __ASSEMBLY__ */ +#endif /* _MCSAFE_TEST_H_ */ diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 1e53560a84bb..c85e15010f48 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -17,7 +17,6 @@ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) -#define __PHYSICAL_MASK ((phys_addr_t)(__sme_clr((1ULL << __PHYSICAL_MASK_SHIFT) - 1))) #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) /* Cast *PAGE_MASK to a signed type so that it is sign-extended if @@ -55,6 +54,13 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK +extern phys_addr_t physical_mask; +#define __PHYSICAL_MASK physical_mask +#else +#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1)) +#endif + extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 3c5385f9a88f..0fdcd21dadbd 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -216,7 +216,7 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) } #endif -static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) +static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; @@ -230,7 +230,7 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) *p4dp = native_make_p4d(native_pgd_val(pgd)); } -static inline void native_p4d_clear(p4d_t *p4d) +static __always_inline void native_p4d_clear(p4d_t *p4d) { native_set_p4d(p4d, native_make_p4d(0)); } diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 1e5a40673953..99fff853c944 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -65,7 +65,6 @@ #define _PAGE_PKEY_BIT2 (_AT(pteval_t, 0)) #define _PAGE_PKEY_BIT3 (_AT(pteval_t, 0)) #endif -#define __HAVE_ARCH_PTE_SPECIAL #define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \ _PAGE_PKEY_BIT1 | \ diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 22647a642e98..0af81b590a0c 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -236,7 +236,7 @@ TRACE_EVENT(vector_alloc, TP_PROTO(unsigned int irq, unsigned int vector, bool reserved, int ret), - TP_ARGS(irq, vector, ret, reserved), + TP_ARGS(irq, vector, reserved, ret), TP_STRUCT__entry( __field( unsigned int, irq ) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2d27236c16a3..b85a7c54c6a1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -301,5 +301,6 @@ extern struct x86_apic_ops x86_apic_ops; extern void x86_early_init_platform_quirks(void); extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); +extern bool x86_pnpbios_disabled(void); #endif diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7553819c74c3..3982f79d2377 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1851,7 +1851,7 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data) * intr-remapping table entry. Hence for the io-apic * EOI we use the pin number. */ - ack_APIC_irq(); + apic_ack_irq(irq_data); eoi_ioapic_pin(data->entry.vector, data); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index bb6f7a2148d7..35aaee4fc028 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -235,6 +235,15 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) return 0; + /* + * Careful here. @apicd might either have move_in_progress set or + * be enqueued for cleanup. Assigning a new vector would either + * leave a stale vector on some CPU around or in case of a pending + * cleanup corrupt the hlist. + */ + if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist)) + return -EBUSY; + vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); if (vector > 0) apic_update_vector(irqd, vector, cpu); @@ -579,8 +588,7 @@ error: static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) { - unsigned int cpu, vector, prev_cpu, prev_vector; - struct apic_chip_data *apicd; + struct apic_chip_data apicd; unsigned long flags; int irq; @@ -596,24 +604,26 @@ static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, return; } - apicd = irqd->chip_data; - if (!apicd) { + if (!irqd->chip_data) { seq_printf(m, "%*sVector: Not assigned\n", ind, ""); return; } raw_spin_lock_irqsave(&vector_lock, flags); - cpu = apicd->cpu; - vector = apicd->vector; - prev_cpu = apicd->prev_cpu; - prev_vector = apicd->prev_vector; + memcpy(&apicd, irqd->chip_data, sizeof(apicd)); raw_spin_unlock_irqrestore(&vector_lock, flags); - seq_printf(m, "%*sVector: %5u\n", ind, "", vector); - seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu); - if (prev_vector) { - seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector); - seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu); + + seq_printf(m, "%*sVector: %5u\n", ind, "", apicd.vector); + seq_printf(m, "%*sTarget: %5u\n", ind, "", apicd.cpu); + if (apicd.prev_vector) { + seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", apicd.prev_vector); + seq_printf(m, "%*sPrevious target: %5u\n", ind, "", apicd.prev_cpu); } + seq_printf(m, "%*smove_in_progress: %u\n", ind, "", apicd.move_in_progress ? 1 : 0); + seq_printf(m, "%*sis_managed: %u\n", ind, "", apicd.is_managed ? 1 : 0); + seq_printf(m, "%*scan_reserve: %u\n", ind, "", apicd.can_reserve ? 1 : 0); + seq_printf(m, "%*shas_reserved: %u\n", ind, "", apicd.has_reserved ? 1 : 0); + seq_printf(m, "%*scleanup_pending: %u\n", ind, "", !hlist_unhashed(&apicd.clist)); } #endif @@ -800,13 +810,18 @@ static int apic_retrigger_irq(struct irq_data *irqd) return 1; } -void apic_ack_edge(struct irq_data *irqd) +void apic_ack_irq(struct irq_data *irqd) { - irq_complete_move(irqd_cfg(irqd)); irq_move_irq(irqd); ack_APIC_irq(); } +void apic_ack_edge(struct irq_data *irqd) +{ + irq_complete_move(irqd_cfg(irqd)); + apic_ack_irq(irqd); +} + static struct irq_chip lapic_controller = { .name = "APIC", .irq_ack = apic_ack_edge, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7416fc206b4a..cd0fda1fff6d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -529,18 +529,15 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) if (mode == SPEC_STORE_BYPASS_DISABLE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); /* - * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses - * a completely different MSR and bit dependent on family. + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may + * use a completely different MSR and bit dependent on family. */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + x86_amd_ssb_disable(); + else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - break; - case X86_VENDOR_AMD: - x86_amd_ssb_disable(); - break; } } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 95c8e507580d..910b47ee8078 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -803,6 +803,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_STIBP); set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } + + if (cpu_has(c, X86_FEATURE_AMD_SSBD)) { + set_cpu_cap(c, X86_FEATURE_SSBD); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD); + } } void get_cpu_cap(struct cpuinfo_x86 *c) @@ -992,7 +998,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO)) + !(ia32_cap & ARCH_CAP_SSB_NO) && + !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_meltdown)) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 24bfa63e86cf..ec4754f81cbd 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -845,6 +845,8 @@ static __init void rdt_quirks(void) case INTEL_FAM6_SKYLAKE_X: if (boot_cpu_data.x86_stepping <= 4) set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); + else + set_rdt_options("!l3cat"); } } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 77e201301528..08286269fd24 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -70,7 +70,7 @@ static DEFINE_MUTEX(microcode_mutex); /* * Serialize late loading so that CPUs get updated one-by-one. */ -static DEFINE_SPINLOCK(update_lock); +static DEFINE_RAW_SPINLOCK(update_lock); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; @@ -560,9 +560,9 @@ static int __reload_late(void *info) if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) return -1; - spin_lock(&update_lock); + raw_spin_lock(&update_lock); apply_microcode_local(&err); - spin_unlock(&update_lock); + raw_spin_unlock(&update_lock); /* siblings return UCODE_OK because their engine got updated already */ if (err > UCODE_NFOUND) { diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 558444b23923..c610f47373e4 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -106,17 +106,9 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) memset(line, 0, LINE_SIZE); - length = len; - length--; - - if (length > LINE_SIZE - 1) - length = LINE_SIZE - 1; - + length = strncpy_from_user(line, buf, LINE_SIZE - 1); if (length < 0) - return -EINVAL; - - if (copy_from_user(line, buf, length)) - return -EFAULT; + return length; linelen = strlen(line); ptr = line + linelen - 1; @@ -149,17 +141,16 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EINVAL; ptr = skip_spaces(ptr + 5); - for (i = 0; i < MTRR_NUM_TYPES; ++i) { - if (strcmp(ptr, mtrr_strings[i])) - continue; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); - if (err < 0) - return err; - return len; - } - return -EINVAL; + i = match_string(mtrr_strings, MTRR_NUM_TYPES, ptr); + if (i < 0) + return i; + + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); + if (err < 0) + return err; + return len; } static long diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index 8eeaa81de066..0a3e70fd00d6 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -9,10 +9,12 @@ * your option) any later version. */ +#include <linux/dmi.h> #include <linux/init.h> #include <linux/syscore_ops.h> #include <asm/dma.h> +#include <asm/x86_init.h> /* * This module just handles suspend/resume issues with the @@ -49,6 +51,29 @@ static struct syscore_ops i8237_syscore_ops = { static int __init i8237A_init_ops(void) { + /* + * From SKL PCH onwards, the legacy DMA device is removed in which the + * I/O ports (81h-83h, 87h, 89h-8Bh, 8Fh) related to it are removed + * as well. All removed ports must return 0xff for a inb() request. + * + * Note: DMA_PAGE_2 (port 0x81) should not be checked for detecting + * the presence of DMA device since it may be used by BIOS to decode + * LPC traffic for POST codes. Original LPC only decodes one byte of + * port 0x80 but some BIOS may choose to enhance PCH LPC port 0x8x + * decoding. + */ + if (dma_inb(DMA_PAGE_0) == 0xFF) + return -ENODEV; + + /* + * It is not required to load this driver as newer SoC may not + * support 8237 DMA or bus mastering from LPC. Platform firmware + * must announce the support for such legacy devices via + * ACPI_FADT_LEGACY_DEVICES field in FADT table. + */ + if (x86_pnpbios_disabled() && dmi_get_bios_year() >= 2017) + return -ENODEV; + register_syscore_ops(&i8237_syscore_ops); return 0; } diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 2c3a1b4294eb..74383a3780dc 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -317,15 +317,12 @@ void __init idt_setup_apic_and_irq_gates(void) set_intr_gate(i, entry); } - for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { #ifdef CONFIG_X86_LOCAL_APIC + for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { set_bit(i, system_vectors); set_intr_gate(i, spurious_interrupt); -#else - entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); - set_intr_gate(i, entry); -#endif } +#endif } /** diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index 235fe6008ac8..b348a672f71d 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -33,9 +33,14 @@ void __init x86_early_init_platform_quirks(void) x86_platform.set_legacy_features(); } +bool __init x86_pnpbios_disabled(void) +{ + return x86_platform.legacy.devices.pnpbios == 0; +} + #if defined(CONFIG_PNPBIOS) bool __init arch_pnpbios_disabled(void) { - return x86_platform.legacy.devices.pnpbios == 0; + return x86_pnpbios_disabled(); } #endif diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index da270b95fe4d..445ca11ff863 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -688,6 +688,12 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) sigset_t *set = sigmask_to_save(); compat_sigset_t *cset = (compat_sigset_t *) set; + /* + * Increment event counter and perform fixup for the pre-signal + * frame. + */ + rseq_signal_deliver(regs); + /* Set up the stack frame */ if (is_ia32_frame(ksig)) { if (ksig->ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 92bf2f2e7cdd..f4f30d0c25c4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -379,7 +379,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); + F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | + F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -664,7 +665,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + /* + * The preference is to use SPEC CTRL MSR instead of the + * VIRT_SPEC MSR. + */ + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->ebx |= F(VIRT_SSBD); break; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 26110c202b19..950ec50f77c3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4115,7 +4115,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; msr_info->data = svm->spec_ctrl; @@ -4217,11 +4218,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; svm->spec_ctrl = data; diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index c3b527a9f95d..298ef1479240 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -3,6 +3,7 @@ #include <linux/linkage.h> #include <asm/errno.h> #include <asm/cpufeatures.h> +#include <asm/mcsafe_test.h> #include <asm/alternative-asm.h> #include <asm/export.h> @@ -183,6 +184,9 @@ ENTRY(memcpy_orig) ENDPROC(memcpy_orig) #ifndef CONFIG_UML + +MCSAFE_TEST_CTL + /* * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. @@ -206,6 +210,8 @@ ENTRY(__memcpy_mcsafe) subl %ecx, %edx .L_read_leading_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes + MCSAFE_TEST_DST %rdi 1 .E_leading_bytes .L_write_leading_bytes: movb %al, (%rdi) incq %rsi @@ -221,6 +227,8 @@ ENTRY(__memcpy_mcsafe) .L_read_words: movq (%rsi), %r8 + MCSAFE_TEST_SRC %rsi 8 .E_read_words + MCSAFE_TEST_DST %rdi 8 .E_write_words .L_write_words: movq %r8, (%rdi) addq $8, %rsi @@ -237,6 +245,8 @@ ENTRY(__memcpy_mcsafe) movl %edx, %ecx .L_read_trailing_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes + MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes .L_write_trailing_bytes: movb %al, (%rdi) incq %rsi diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 1b2197d13832..7ae36868aed2 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -527,6 +527,7 @@ void __init sme_enable(struct boot_params *bp) /* SEV state cannot be controlled by a command line option */ sme_me_mask = me_mask; sev_enabled = true; + physical_mask &= ~sme_me_mask; return; } @@ -561,4 +562,6 @@ void __init sme_enable(struct boot_params *bp) sme_me_mask = 0; else sme_me_mask = active_by_default ? me_mask : 0; + + physical_mask &= ~sme_me_mask; } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ffc8c13c50e4..47b5951e592b 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -8,6 +8,11 @@ #include <asm/fixmap.h> #include <asm/mtrr.h> +#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK +phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; +EXPORT_SYMBOL(physical_mask); +#endif + #define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) #ifdef CONFIG_HIGHPTE @@ -114,13 +119,12 @@ static inline void pgd_list_del(pgd_t *pgd) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) { - BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); - virt_to_page(pgd)->index = (pgoff_t)mm; + virt_to_page(pgd)->pt_mm = mm; } struct mm_struct *pgd_page_get_mm(struct page *page) { - return (struct mm_struct *)page->index; + return page->pt_mm; } static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index f0114007e915..e5f753cbb1c3 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -59,24 +59,15 @@ int early_pci_allowed(void) void early_dump_pci_device(u8 bus, u8 slot, u8 func) { + u32 value[256 / 4]; int i; - int j; - u32 val; - printk(KERN_INFO "pci 0000:%02x:%02x.%d config space:", - bus, slot, func); + pr_info("pci 0000:%02x:%02x.%d config space:\n", bus, slot, func); - for (i = 0; i < 256; i += 4) { - if (!(i & 0x0f)) - printk("\n %02x:",i); + for (i = 0; i < 256; i += 4) + value[i / 4] = read_pci_config(bus, slot, func, i); - val = read_pci_config(bus, slot, func, i); - for (j = 0; j < 4; j++) { - printk(" %02x", val & 0xff); - val >>= 8; - } - } - printk("\n"); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, value, 256, false); } void early_dump_pci_devices(void) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 54ef19e90705..13f4485ca388 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -636,6 +636,10 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334a, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334b, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334c, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334d, quirk_no_aersid); #ifdef CONFIG_PHYS_ADDR_T_64BIT diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index b36caae0fb2f..b96d38288c60 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -615,7 +615,7 @@ static int uv2_3_wait_completion(struct bau_desc *bau_desc, /* spin on the status MMR, waiting for it to go idle */ while (descriptor_stat != UV2H_DESC_IDLE) { - if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { + if (descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) { /* * A h/w bug on the destination side may * have prevented the message being marked diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index e4cb9f4cde8a..fc13cbbb2dce 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -47,11 +47,6 @@ static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info) static void uv_noop(struct irq_data *data) { } -static void uv_ack_apic(struct irq_data *data) -{ - ack_APIC_irq(); -} - static int uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -73,7 +68,7 @@ static struct irq_chip uv_irq_chip = { .name = "UV-CORE", .irq_mask = uv_noop, .irq_unmask = uv_noop, - .irq_eoi = uv_ack_apic, + .irq_eoi = apic_ack_irq, .irq_set_affinity = uv_set_irq_affinity, }; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2d76106788a3..96fc2f0fdbfe 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -63,37 +63,44 @@ static noinline void xen_flush_tlb_all(void) #define REMAP_BATCH_SIZE 16 struct remap_data { - xen_pfn_t *mfn; + xen_pfn_t *pfn; bool contiguous; + bool no_translate; pgprot_t prot; struct mmu_update *mmu_update; }; -static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, +static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot)); + pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); - /* If we have a contiguous range, just update the mfn itself, - else update pointer to be "next mfn". */ + /* + * If we have a contiguous range, just update the pfn itself, + * else update pointer to be "next pfn". + */ if (rmd->contiguous) - (*rmd->mfn)++; + (*rmd->pfn)++; else - rmd->mfn++; + rmd->pfn++; - rmd->mmu_update->ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; + rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; + rmd->mmu_update->ptr |= rmd->no_translate ? + MMU_PT_UPDATE_NO_TRANSLATE : + MMU_NORMAL_PT_UPDATE; rmd->mmu_update->val = pte_val_ma(pte); rmd->mmu_update++; return 0; } -static int do_remap_gfn(struct vm_area_struct *vma, +static int do_remap_pfn(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t *gfn, int nr, + xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned domid, + unsigned int domid, + bool no_translate, struct page **pages) { int err = 0; @@ -104,11 +111,14 @@ static int do_remap_gfn(struct vm_area_struct *vma, BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); - rmd.mfn = gfn; + rmd.pfn = pfn; rmd.prot = prot; - /* We use the err_ptr to indicate if there we are doing a contiguous - * mapping or a discontigious mapping. */ + /* + * We use the err_ptr to indicate if there we are doing a contiguous + * mapping or a discontigious mapping. + */ rmd.contiguous = !err_ptr; + rmd.no_translate = no_translate; while (nr) { int index = 0; @@ -119,7 +129,7 @@ static int do_remap_gfn(struct vm_area_struct *vma, rmd.mmu_update = mmu_update; err = apply_to_page_range(vma->vm_mm, addr, range, - remap_area_mfn_pte_fn, &rmd); + remap_area_pfn_pte_fn, &rmd); if (err) goto out; @@ -173,7 +183,8 @@ int xen_remap_domain_gfn_range(struct vm_area_struct *vma, if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; - return do_remap_gfn(vma, addr, &gfn, nr, NULL, prot, domid, pages); + return do_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false, + pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range); @@ -192,10 +203,25 @@ int xen_remap_domain_gfn_array(struct vm_area_struct *vma, * cause of "wrong memory was mapped in". */ BUG_ON(err_ptr == NULL); - return do_remap_gfn(vma, addr, gfn, nr, err_ptr, prot, domid, pages); + return do_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid, + false, pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array); +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned int domid, struct page **pages) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -EOPNOTSUPP; + + return do_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, + true, pages); +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); + /* Returns: 0 success */ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, int nr, struct page **pages) diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index e1a5fbeae08d..ca2d3b2bf2af 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -54,12 +54,19 @@ * charge of setting up it's own stack, GDT and IDT. */ +#define PVH_GDT_ENTRY_CS 1 +#define PVH_GDT_ENTRY_DS 2 +#define PVH_GDT_ENTRY_CANARY 3 +#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8) +#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) +#define PVH_CANARY_SEL (PVH_GDT_ENTRY_CANARY * 8) + ENTRY(pvh_start_xen) cld lgdt (_pa(gdt)) - mov $(__BOOT_DS),%eax + mov $PVH_DS_SEL,%eax mov %eax,%ds mov %eax,%es mov %eax,%ss @@ -93,11 +100,17 @@ ENTRY(pvh_start_xen) mov %eax, %cr0 /* Jump to 64-bit mode. */ - ljmp $__KERNEL_CS, $_pa(1f) + ljmp $PVH_CS_SEL, $_pa(1f) /* 64-bit entry point. */ .code64 1: + /* Set base address in stack canary descriptor. */ + mov $MSR_GS_BASE,%ecx + mov $_pa(canary), %eax + xor %edx, %edx + wrmsr + call xen_prepare_pvh /* startup_64 expects boot_params in %rsi. */ @@ -107,6 +120,17 @@ ENTRY(pvh_start_xen) #else /* CONFIG_X86_64 */ + /* Set base address in stack canary descriptor. */ + movl $_pa(gdt_start),%eax + movl $_pa(canary),%ecx + movw %cx, (PVH_GDT_ENTRY_CANARY * 8) + 2(%eax) + shrl $16, %ecx + movb %cl, (PVH_GDT_ENTRY_CANARY * 8) + 4(%eax) + movb %ch, (PVH_GDT_ENTRY_CANARY * 8) + 7(%eax) + + mov $PVH_CANARY_SEL,%eax + mov %eax,%gs + call mk_early_pgtbl_32 mov $_pa(initial_page_table), %eax @@ -116,13 +140,13 @@ ENTRY(pvh_start_xen) or $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 - ljmp $__BOOT_CS, $1f + ljmp $PVH_CS_SEL, $1f 1: call xen_prepare_pvh mov $_pa(pvh_bootparams), %esi /* startup_32 doesn't expect paging and PAE to be on. */ - ljmp $__BOOT_CS, $_pa(2f) + ljmp $PVH_CS_SEL, $_pa(2f) 2: mov %cr0, %eax and $~X86_CR0_PG, %eax @@ -131,7 +155,7 @@ ENTRY(pvh_start_xen) and $~X86_CR4_PAE, %eax mov %eax, %cr4 - ljmp $__BOOT_CS, $_pa(startup_32) + ljmp $PVH_CS_SEL, $_pa(startup_32) #endif END(pvh_start_xen) @@ -143,16 +167,19 @@ gdt: .word 0 gdt_start: .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* reserved */ #ifdef CONFIG_X86_64 - .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* __KERNEL_CS */ + .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* PVH_CS_SEL */ #else - .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */ + .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* PVH_CS_SEL */ #endif - .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */ + .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */ + .quad GDT_ENTRY(0x4090, 0, 0x18) /* PVH_CANARY_SEL */ gdt_end: - .balign 4 + .balign 16 +canary: + .fill 48, 1, 0 + early_stack: .fill 256, 1, 0 early_stack_end: |