diff options
Diffstat (limited to 'arch/x86/include/asm')
130 files changed, 1768 insertions, 1865 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 8b52bc5ddf69..ea34464d6221 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -7,7 +7,6 @@ generated-y += unistd_32_ia32.h generated-y += unistd_64_x32.h generated-y += xen-hypercalls.h -generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += export.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index aac686e1e005..ca0976456a6b 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -13,7 +13,6 @@ #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mpspec.h> -#include <asm/realmode.h> #include <asm/x86_init.h> #ifdef CONFIG_ACPI_APEI @@ -62,7 +61,7 @@ static inline void acpi_disable_pci(void) extern int (*acpi_suspend_lowlevel)(void); /* Physical address to resume after wakeup */ -#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start)) +unsigned long acpi_get_wakeup_address(void); /* * Check if the CPU can handle C2 and deeper @@ -117,6 +116,12 @@ static inline bool acpi_has_cpu_in_madt(void) return !!acpi_lapic; } +#define ACPI_HAVE_ARCH_SET_ROOT_POINTER +static inline void acpi_arch_set_root_pointer(u64 addr) +{ + x86_init.acpi.set_root_pointer(addr); +} + #define ACPI_HAVE_ARCH_GET_ROOT_POINTER static inline u64 acpi_arch_get_root_pointer(void) { @@ -125,6 +130,7 @@ static inline u64 acpi_arch_get_root_pointer(void) void acpi_generic_reduced_hw_init(void); +void x86_default_set_root_pointer(u64 addr); u64 x86_default_get_root_pointer(void); #else /* !CONFIG_ACPI */ @@ -138,6 +144,8 @@ static inline void disable_acpi(void) { } static inline void acpi_generic_reduced_hw_init(void) { } +static inline void x86_default_set_root_pointer(u64 addr) { } + static inline u64 x86_default_get_root_pointer(void) { return 0; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 094fbc9c0b1c..13adca37c99a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -201,10 +201,10 @@ static inline int alternatives_text_reserved(void *start, void *end) * without volatile and memory clobber. */ #define alternative(oldinstr, newinstr, feature) \ - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ - asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") /* * Alternative inline assembly with input. @@ -218,7 +218,7 @@ static inline int alternatives_text_reserved(void *start, void *end) * Leaving an unused argument 0 to keep API compatibility. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ : : "i" (0), ## input) /* @@ -231,18 +231,18 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \ feature2, input...) \ - asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ newinstr2, feature2) \ : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, feature, output, input...) \ - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ : output : "i" (0), ## input) /* Like alternative_io, but for replacing a direct call with another one. */ #define alternative_call(oldfunc, newfunc, feature, output, input...) \ - asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ + asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) /* @@ -253,7 +253,7 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ output, input...) \ - asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ + asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ "call %P[new2]", feature2) \ : output, ASM_CALL_CONSTRAINT \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e647aa095867..19e94af9cc5d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -136,9 +136,11 @@ extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); +extern void apic_soft_disable(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); +extern void apic_intr_mode_select(void); extern void apic_intr_mode_init(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); @@ -176,6 +178,8 @@ extern void lapic_online(void); extern void lapic_offline(void); extern bool apic_needs_pit(void); +extern void apic_send_IPI_allbutself(unsigned int vector); + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 @@ -185,6 +189,7 @@ static inline void disable_local_APIC(void) { } # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } static inline void init_bsp_APIC(void) { } +static inline void apic_intr_mode_select(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } @@ -449,6 +454,14 @@ static inline void ack_APIC_irq(void) apic_eoi(); } + +static inline bool lapic_vector_set_in_irr(unsigned int vector) +{ + u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + + return !!(irr & (1U << (vector % 32))); +} + static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); @@ -465,12 +478,6 @@ static inline unsigned default_get_apic_id(unsigned long x) #define TRAMPOLINE_PHYS_LOW 0x467 #define TRAMPOLINE_PHYS_HIGH 0x469 -#ifdef CONFIG_X86_64 -extern void apic_send_IPI_self(int vector); - -DECLARE_PER_CPU(int, x2apic_extra_bits); -#endif - extern void generic_bigsmp_probe(void); #ifdef CONFIG_X86_LOCAL_APIC @@ -506,8 +513,10 @@ extern int default_check_phys_apicid_present(int phys_apicid); #ifdef CONFIG_SMP bool apic_id_is_primary_thread(unsigned int id); +void apic_smt_update(void); #else static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } +static inline void apic_smt_update(void) { } #endif extern void irq_enter(void); diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h deleted file mode 100644 index d3a2b3876ce6..000000000000 --- a/arch/x86/include/asm/apic_flat_64.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_APIC_FLAT_64_H -#define _ASM_X86_APIC_FLAT_64_H - -extern void flat_init_apic_ldr(void); - -#endif - diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index af45e1452f09..7a4bb1bd4bdb 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -27,7 +27,7 @@ /* Unconditional execution of RDRAND and RDSEED */ -static inline bool rdrand_long(unsigned long *v) +static inline bool __must_check rdrand_long(unsigned long *v) { bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; @@ -41,7 +41,7 @@ static inline bool rdrand_long(unsigned long *v) return false; } -static inline bool rdrand_int(unsigned int *v) +static inline bool __must_check rdrand_int(unsigned int *v) { bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; @@ -55,7 +55,7 @@ static inline bool rdrand_int(unsigned int *v) return false; } -static inline bool rdseed_long(unsigned long *v) +static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; asm volatile(RDSEED_LONG @@ -64,7 +64,7 @@ static inline bool rdseed_long(unsigned long *v) return ok; } -static inline bool rdseed_int(unsigned int *v) +static inline bool __must_check rdseed_int(unsigned int *v) { bool ok; asm volatile(RDSEED_INT @@ -73,10 +73,6 @@ static inline bool rdseed_int(unsigned int *v) return ok; } -/* Conditional execution based on CPU type */ -#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) -#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) - /* * These are the generic interfaces; they must not be declared if the * stubs in <linux/random.h> are to be invoked, @@ -84,24 +80,24 @@ static inline bool rdseed_int(unsigned int *v) */ #ifdef CONFIG_ARCH_RANDOM -static inline bool arch_get_random_long(unsigned long *v) +static inline bool __must_check arch_get_random_long(unsigned long *v) { - return arch_has_random() ? rdrand_long(v) : false; + return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_long(v) : false; } -static inline bool arch_get_random_int(unsigned int *v) +static inline bool __must_check arch_get_random_int(unsigned int *v) { - return arch_has_random() ? rdrand_int(v) : false; + return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_int(v) : false; } -static inline bool arch_get_random_seed_long(unsigned long *v) +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { - return arch_has_random_seed() ? rdseed_long(v) : false; + return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_long(v) : false; } -static inline bool arch_get_random_seed_int(unsigned int *v) +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { - return arch_has_random_seed() ? rdseed_int(v) : false; + return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_int(v) : false; } extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 3ff577c0b102..cd339b88d5d4 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -7,9 +7,11 @@ # define __ASM_FORM_RAW(x) x # define __ASM_FORM_COMMA(x) x, #else -# define __ASM_FORM(x) " " #x " " -# define __ASM_FORM_RAW(x) #x -# define __ASM_FORM_COMMA(x) " " #x "," +#include <linux/stringify.h> + +# define __ASM_FORM(x) " " __stringify(x) " " +# define __ASM_FORM_RAW(x) __stringify(x) +# define __ASM_FORM_COMMA(x) " " __stringify(x) "," #endif #ifndef __x86_64__ @@ -139,9 +141,6 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) - # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ _ASM_ALIGN ; \ @@ -170,9 +169,6 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) - /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 84f848c2541a..7f828fe49797 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -49,8 +49,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define array_index_mask_nospec array_index_mask_nospec /* Prevent speculative execution past this barrier. */ -#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ - "lfence", X86_FEATURE_LFENCE_RDTSC) +#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC) #define dma_rmb() barrier() #define dma_wmb() barrier() diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index ba15d53c1ca7..062cdecb2f24 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -45,14 +45,13 @@ * We do the locked ops that don't return the old value as * a mask operation on a byte. */ -#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) #define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) static __always_inline void arch_set_bit(long nr, volatile unsigned long *addr) { - if (IS_IMMEDIATE(nr)) { + if (__builtin_constant_p(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr)) @@ -72,7 +71,7 @@ arch___set_bit(long nr, volatile unsigned long *addr) static __always_inline void arch_clear_bit(long nr, volatile unsigned long *addr) { - if (IS_IMMEDIATE(nr)) { + if (__builtin_constant_p(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)~CONST_MASK(nr))); @@ -123,7 +122,7 @@ arch___change_bit(long nr, volatile unsigned long *addr) static __always_inline void arch_change_bit(long nr, volatile unsigned long *addr) { - if (IS_IMMEDIATE(nr)) { + if (__builtin_constant_p(nr)) { asm volatile(LOCK_PREFIX "xorb %1,%0" : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr))); @@ -389,7 +388,9 @@ static __always_inline int fls64(__u64 x) #include <asm-generic/bitops/const_hweight.h> -#include <asm-generic/bitops-instrumented.h> +#include <asm-generic/bitops/instrumented-atomic.h> +#include <asm-generic/bitops/instrumented-non-atomic.h> +#include <asm-generic/bitops/instrumented-lock.h> #include <asm-generic/bitops/le.h> diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index f5e90a849bca..981fe923a59f 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -59,7 +59,6 @@ static void sanitize_boot_params(struct boot_params *boot_params) BOOT_PARAM_PRESERVE(apm_bios_info), BOOT_PARAM_PRESERVE(tboot_addr), BOOT_PARAM_PRESERVE(ist_info), - BOOT_PARAM_PRESERVE(acpi_rsdp_addr), BOOT_PARAM_PRESERVE(hd0_info), BOOT_PARAM_PRESERVE(hd1_info), BOOT_PARAM_PRESERVE(sys_desc_table), @@ -71,6 +70,8 @@ static void sanitize_boot_params(struct boot_params *boot_params) BOOT_PARAM_PRESERVE(eddbuf_entries), BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), + BOOT_PARAM_PRESERVE(secure_boot), + BOOT_PARAM_PRESERVE(hdr), BOOT_PARAM_PRESERVE(e820_table), BOOT_PARAM_PRESERVE(eddbuf), }; diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 6804d6642767..facba9bc30ca 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -32,7 +32,7 @@ #define _BUG_FLAGS(ins, flags) \ do { \ - asm volatile("1:\t" ins "\n" \ + asm_inline volatile("1:\t" ins "\n" \ ".pushsection __bug_table,\"aw\"\n" \ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \ @@ -49,7 +49,7 @@ do { \ #define _BUG_FLAGS(ins, flags) \ do { \ - asm volatile("1:\t" ins "\n" \ + asm_inline volatile("1:\t" ins "\n" \ ".pushsection __bug_table,\"aw\"\n" \ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ "\t.word %c0" "\t# bug_entry::flags\n" \ diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h index 542509b53e0f..92ae28389940 100644 --- a/arch/x86/include/asm/bugs.h +++ b/arch/x86/include/asm/bugs.h @@ -6,16 +6,12 @@ extern void check_bugs(void); -#if defined(CONFIG_CPU_SUP_INTEL) -void check_mpx_erratum(struct cpuinfo_x86 *c); -#else -static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {} -#endif - #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else static inline int ppro_with_ram_bug(void) { return 0; } #endif +extern void cpu_bugs_smt_update(void); + #endif /* _ASM_X86_BUGS_H */ diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h deleted file mode 100644 index facd374a1bf7..000000000000 --- a/arch/x86/include/asm/calgary.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Derived from include/asm-powerpc/iommu.h - * - * Copyright IBM Corporation, 2006-2007 - * - * Author: Jon Mason <jdmason@us.ibm.com> - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - */ - -#ifndef _ASM_X86_CALGARY_H -#define _ASM_X86_CALGARY_H - -#include <linux/spinlock.h> -#include <linux/device.h> -#include <linux/dma-mapping.h> -#include <linux/timer.h> -#include <asm/types.h> - -struct iommu_table { - const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ - unsigned long it_base; /* mapped address of tce table */ - unsigned long it_hint; /* Hint for next alloc */ - unsigned long *it_map; /* A simple allocation bitmap for now */ - void __iomem *bbar; /* Bridge BAR */ - u64 tar_val; /* Table Address Register */ - struct timer_list watchdog_timer; - spinlock_t it_lock; /* Protects it_map */ - unsigned int it_size; /* Size of iommu table in entries */ - unsigned char it_busno; /* Bus number this table belongs to */ -}; - -struct cal_chipset_ops { - void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev); - void (*tce_cache_blast)(struct iommu_table *tbl); - void (*dump_error_regs)(struct iommu_table *tbl); -}; - -#define TCE_TABLE_SIZE_UNSPECIFIED ~0 -#define TCE_TABLE_SIZE_64K 0 -#define TCE_TABLE_SIZE_128K 1 -#define TCE_TABLE_SIZE_256K 2 -#define TCE_TABLE_SIZE_512K 3 -#define TCE_TABLE_SIZE_1M 4 -#define TCE_TABLE_SIZE_2M 5 -#define TCE_TABLE_SIZE_4M 6 -#define TCE_TABLE_SIZE_8M 7 - -extern int use_calgary; - -#ifdef CONFIG_CALGARY_IOMMU -extern int detect_calgary(void); -#else -static inline int detect_calgary(void) { return -ENODEV; } -#endif - -#endif /* _ASM_X86_CALGARY_H */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 22c4dfe65992..52e9f3480f69 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -177,23 +177,6 @@ typedef struct user_regs_struct compat_elf_gregset_t; (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) #endif -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - static inline void __user *arch_compat_alloc_user_space(long len) { compat_uptr_t sp; diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index cff3f3f3bfe0..02c0078d3787 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_CPU_ENTRY_AREA_H #define _ASM_X86_CPU_ENTRY_AREA_H @@ -6,6 +6,7 @@ #include <linux/percpu-defs.h> #include <asm/processor.h> #include <asm/intel_ds.h> +#include <asm/pgtable_areas.h> #ifdef CONFIG_X86_64 @@ -65,6 +66,13 @@ enum exception_stack_ordering { #endif +#ifdef CONFIG_X86_32 +struct doublefault_stack { + unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)]; + struct x86_hw_tss tss; +} __aligned(PAGE_SIZE); +#endif + /* * cpu_entry_area is a percpu region that contains things needed by the CPU * and early entry/exit code. Real types aren't used for all fields here @@ -78,10 +86,19 @@ struct cpu_entry_area { /* * The GDT is just below entry_stack and thus serves (on x86_64) as - * a a read-only guard page. + * a read-only guard page. On 32-bit the GDT must be writeable, so + * it needs an extra guard page. */ +#ifdef CONFIG_X86_32 + char guard_entry_stack[PAGE_SIZE]; +#endif struct entry_stack_page entry_stack_page; +#ifdef CONFIG_X86_32 + char guard_doublefault_stack[PAGE_SIZE]; + struct doublefault_stack doublefault_stack; +#endif + /* * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because * we need task switches to work, and task switches write to the TSS. @@ -94,7 +111,6 @@ struct cpu_entry_area { */ struct cea_exception_stacks estacks; #endif -#ifdef CONFIG_CPU_SUP_INTEL /* * Per CPU debug store for Intel performance monitoring. Wastes a * full page at the moment. @@ -105,11 +121,13 @@ struct cpu_entry_area { * Reserve enough fixmap PTEs. */ struct debug_store_buffers cpu_debug_buffers; -#endif }; -#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) + +/* Total size includes the readonly IDT mapping page as well: */ +#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); @@ -117,14 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); -#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE -#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) - -#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) - -#define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) - extern struct cpu_entry_area *get_cpu_entry_area(int cpu); static inline struct entry_stack *cpu_entry_stack(int cpu) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 58acda503817..59bf91c57aa8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -61,6 +61,13 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) +/* + * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the + * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all + * header macros which use NCAPINTS need to be changed. The duplicated macro + * use causes the compiler to issue errors for all headers so that all usage + * sites can be corrected. + */ #define REQUIRED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index e880f2408e29..f3327cb56edf 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -96,7 +96,6 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -221,6 +220,7 @@ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -232,6 +232,8 @@ #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -291,6 +293,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ @@ -355,6 +358,8 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ @@ -397,5 +402,7 @@ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h new file mode 100644 index 000000000000..c8b39c6716ff --- /dev/null +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARCH_HALTPOLL_H +#define _ARCH_HALTPOLL_H + +void arch_haltpoll_enable(unsigned int cpu); +void arch_haltpoll_disable(unsigned int cpu); + +#endif diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index 0acf5ee45a21..f58de66091e5 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -2,10 +2,17 @@ #ifndef _ASM_X86_CRASH_H #define _ASM_X86_CRASH_H +struct kimage; + int crash_load_segments(struct kimage *image); -int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params); void crash_smp_send_stop(void); +#ifdef CONFIG_KEXEC_CORE +void __init crash_reserve_low_1M(void); +#else +static inline void __init crash_reserve_low_1M(void) { } +#endif + #endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/crypto/aes.h b/arch/x86/include/asm/crypto/aes.h deleted file mode 100644 index c508521dd190..000000000000 --- a/arch/x86/include/asm/crypto/aes.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_AES_H -#define ASM_X86_AES_H - -#include <linux/crypto.h> -#include <crypto/aes.h> - -void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, - const u8 *src); -void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, - const u8 *src); -#endif diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index a5d86fc0593f..f6d91861cb14 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -26,71 +26,66 @@ struct camellia_xts_ctx { extern int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, - unsigned int key_len, u32 *flags); + unsigned int key_len); extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); /* regular block cipher functions */ -asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); /* 2-way parallel cipher functions */ -asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); /* 16-way parallel cipher functions (avx/aes-ni) */ -asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); - -asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) +asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); + +asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); + +asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); + +static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk(ctx, dst, src, false); } -static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) +static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk(ctx, dst, src, true); } -static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, +static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk_2way(ctx, dst, src, false); } -static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst, +static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk_2way(ctx, dst, src, true); } /* glue helpers */ -extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src); -extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, +extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src); +extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, +extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); +extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); #endif /* ASM_X86_CAMELLIA_H */ diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index d1818634ae7e..777c0f63418c 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -11,18 +11,13 @@ #include <asm/fpu/api.h> #include <crypto/b128ops.h> -typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); -typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); -typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, +typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src); +typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src); +typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src, +typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) -#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) -#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) -#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn)) - struct common_glue_func_entry { unsigned int num_blocks; /* number of blocks that @fn will process */ union { @@ -114,9 +109,10 @@ extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); + void *crypt_ctx, bool decrypt); -extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, - le128 *iv, common_glue_func_t fn); +extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, + const u8 *src, le128 *iv, + common_glue_func_t fn); #endif /* _CRYPTO_GLUE_HELPER_H */ diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index db7c9cc32234..251c2c89d7cf 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -15,26 +15,26 @@ struct serpent_xts_ctx { struct serpent_ctx crypt_ctx; }; -asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); -asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, +extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); +extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv); +extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv); extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/include/asm/crypto/serpent-sse2.h index 1a345e8a7496..860ca248914b 100644 --- a/arch/x86/include/asm/crypto/serpent-sse2.h +++ b/arch/x86/include/asm/crypto/serpent-sse2.h @@ -9,25 +9,23 @@ #define SERPENT_PARALLEL_BLOCKS 4 -asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src); -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_4way(ctx, dst, src, false); } -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, + u8 *dst, const u8 *src) { __serpent_enc_blk_4way(ctx, dst, src, true); } -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) { serpent_dec_blk_4way(ctx, dst, src); } @@ -36,25 +34,23 @@ static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, #define SERPENT_PARALLEL_BLOCKS 8 -asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src); -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_8way(ctx, dst, src, false); } -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, + u8 *dst, const u8 *src) { __serpent_enc_blk_8way(ctx, dst, src, true); } -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) { serpent_dec_blk_8way(ctx, dst, src); } diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index f618bf272b90..2c377a8042e1 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -7,22 +7,19 @@ #include <crypto/b128ops.h> /* regular block cipher functions from twofish_x86_64 module */ -asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src); /* 3-way parallel cipher functions */ -asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src); /* helpers from twofish_x86_64-3way module */ -extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); -extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, +extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src); +extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, +extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); #endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index a8f6c809d9b1..7e31f7f1bb06 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -6,20 +6,7 @@ struct dev_archdata { #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif -#ifdef CONFIG_STA2X11 - bool is_sta2x11; -#endif -}; - -#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) -struct dma_domain { - struct list_head node; - const struct dma_map_ops *dma_ops; - int domain_nr; }; -void add_dma_domain(struct dma_domain *domain); -void del_dma_domain(struct dma_domain *domain); -#endif struct pdev_archdata { }; diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index a5ea841cc6d2..4ea8584682f9 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -10,19 +10,13 @@ * cpu_feature_enabled(). */ -#ifdef CONFIG_X86_INTEL_MPX -# define DISABLE_MPX 0 -#else -# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) -#endif - #ifdef CONFIG_X86_SMAP # define DISABLE_SMAP 0 #else # define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31)) #endif -#ifdef CONFIG_X86_INTEL_UMIP +#ifdef CONFIG_X86_UMIP # define DISABLE_UMIP 0 #else # define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31)) @@ -74,7 +68,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP) +#define DISABLED_MASK9 (DISABLE_SMAP) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 #define DISABLED_MASK12 0 diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index 20a46150e0a8..9b8cb50768c2 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -73,6 +73,19 @@ static inline u64 mul_u32_u32(u32 a, u32 b) #else # include <asm-generic/div64.h> + +static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div) +{ + u64 q; + + asm ("mulq %2; divq %3" : "=a" (q) + : "a" (a), "rm" ((u64)mul), "rm" ((u64)div) + : "rdx"); + + return q; +} +#define mul_u64_u32_div mul_u64_u32_div + #endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_DIV64_H */ diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h deleted file mode 100644 index 1a19251eaac9..000000000000 --- a/arch/x86/include/asm/dma-direct.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_DMA_DIRECT_H -#define ASM_X86_DMA_DIRECT_H 1 - -bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); - -#endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h new file mode 100644 index 000000000000..af9a14ac8962 --- /dev/null +++ b/arch/x86/include/asm/doublefault.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DOUBLEFAULT_H +#define _ASM_X86_DOUBLEFAULT_H + +#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT) +extern void doublefault_init_cpu_tss(void); +#else +static inline void doublefault_init_cpu_tss(void) +{ +} +#endif + +#endif /* _ASM_X86_DOUBLEFAULT_H */ diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h index c3aa4b5e49e2..314f75d886d0 100644 --- a/arch/x86/include/asm/e820/types.h +++ b/arch/x86/include/asm/e820/types.h @@ -29,6 +29,14 @@ enum e820_type { E820_TYPE_PRAM = 12, /* + * Special-purpose memory is indicated to the system via the + * EFI_MEMORY_SP attribute. Define an e820 translation of this + * memory type for the purpose of reserving this range and + * marking it with the IORES_DESC_SOFT_RESERVED designation. + */ + E820_TYPE_SOFT_RESERVED = 0xefffffff, + + /* * Reserved RAM used by the kernel itself if * CONFIG_INTEL_TXT=y is enabled, memory of this type * will be included in the S3 integrity calculation diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 606a4b6a9812..86169a24b0d8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -8,6 +8,7 @@ #include <asm/tlb.h> #include <asm/nospec-branch.h> #include <asm/mmu_context.h> +#include <linux/build_bug.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -19,13 +20,16 @@ * This is the main reason why we're doing stable VA mappings for RT * services. * - * This flag is used in conjunction with a chicken bit called - * "efi=old_map" which can be used as a fallback to the old runtime - * services mapping method in case there's some b0rkage with a - * particular EFI implementation (haha, it is hard to hold up the - * sarcasm here...). + * SGI UV1 machines are known to be incompatible with this scheme, so we + * provide an opt-out for these machines via a DMI quirk that sets the + * attribute below. */ -#define EFI_OLD_MEMMAP EFI_ARCH_1 +#define EFI_UV1_MEMMAP EFI_ARCH_1 + +static inline bool efi_have_uv1_memmap(void) +{ + return IS_ENABLED(CONFIG_X86_UV) && efi_enabled(EFI_UV1_MEMMAP); +} #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" @@ -34,10 +38,46 @@ #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF -#ifdef CONFIG_X86_32 +/* + * The EFI services are called through variadic functions in many cases. These + * functions are implemented in assembler and support only a fixed number of + * arguments. The macros below allows us to check at build time that we don't + * try to call them with too many arguments. + * + * __efi_nargs() will return the number of arguments if it is 7 or less, and + * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it + * impossible to calculate the exact number of arguments beyond some + * pre-defined limit. The maximum number of arguments currently supported by + * any of the thunks is 7, so this is good enough for now and can be extended + * in the obvious way if we ever need more. + */ + +#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__) +#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \ + __efi_arg_sentinel(7), __efi_arg_sentinel(6), \ + __efi_arg_sentinel(5), __efi_arg_sentinel(4), \ + __efi_arg_sentinel(3), __efi_arg_sentinel(2), \ + __efi_arg_sentinel(1), __efi_arg_sentinel(0)) +#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...) \ + __take_second_arg(n, \ + ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; })) +#define __efi_arg_sentinel(n) , n + +/* + * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis + * represents more than n arguments. + */ -extern asmlinkage unsigned long efi_call_phys(void *, ...); +#define __efi_nargs_check(f, n, ...) \ + __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n) +#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n) +#define __efi_nargs_check__(f, p, n) ({ \ + BUILD_BUG_ON_MSG( \ + (p) > (n), \ + #f " called with too many arguments (" #p ">" #n ")"); \ +}) +#ifdef CONFIG_X86_32 #define arch_efi_call_virt_setup() \ ({ \ kernel_fpu_begin(); \ @@ -51,13 +91,7 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); }) -/* - * Wrap all the virtual calls in a way that forces the parameters on the stack. - */ -#define arch_efi_call_virt(p, f, args...) \ -({ \ - ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \ -}) +#define arch_efi_call_virt(p, f, args...) p->f(args) #define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) @@ -65,9 +99,12 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); #define EFI_LOADER_SIGNATURE "EL64" -extern asmlinkage u64 efi_call(void *fp, ...); +extern asmlinkage u64 __efi_call(void *fp, ...); -#define efi_call_phys(f, args...) efi_call((f), args) +#define efi_call(...) ({ \ + __efi_nargs_check(efi_call, 7, __VA_ARGS__); \ + __efi_call(__VA_ARGS__); \ +}) /* * struct efi_scratch - Scratch space used while switching to/from efi_mm @@ -85,7 +122,7 @@ struct efi_scratch { kernel_fpu_begin(); \ firmware_restrict_branch_speculation_start(); \ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(&efi_mm); \ }) @@ -94,7 +131,7 @@ struct efi_scratch { #define arch_efi_call_virt_teardown() \ ({ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(efi_scratch.prev_mm); \ \ firmware_restrict_branch_speculation_end(); \ @@ -121,8 +158,6 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, extern struct efi_scratch efi_scratch; extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); -extern pgd_t * __init efi_call_phys_prolog(void); -extern void __init efi_call_phys_epilog(pgd_t *save_pgd); extern void __init efi_print_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); @@ -140,7 +175,8 @@ extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); -extern void efi_reserve_boot_services(void); +extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); +extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); struct efi_setup_data { u64 fw_vendor; @@ -153,102 +189,174 @@ struct efi_setup_data { extern u64 efi_setup; #ifdef CONFIG_EFI +extern efi_status_t __efi64_thunk(u32, ...); -static inline bool efi_is_native(void) +#define efi64_thunk(...) ({ \ + __efi_nargs_check(efi64_thunk, 6, __VA_ARGS__); \ + __efi64_thunk(__VA_ARGS__); \ +}) + +static inline bool efi_is_mixed(void) { - return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return false; + return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT); } static inline bool efi_runtime_supported(void) { - if (efi_is_native()) - return true; - - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) + if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT)) return true; - return false; + return IS_ENABLED(CONFIG_EFI_MIXED); } extern void parse_efi_setup(u64 phys_addr, u32 data_len); extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); -#ifdef CONFIG_EFI_MIXED extern void efi_thunk_runtime_setup(void); -extern efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map); -#else -static inline void efi_thunk_runtime_setup(void) {} -static inline efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - return EFI_SUCCESS; -} -#endif /* CONFIG_EFI_MIXED */ - +efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map); /* arch specific definitions used by the stub code */ -struct efi_config { - u64 image_handle; - u64 table; - u64 runtime_services; - u64 boot_services; - u64 text_output; - efi_status_t (*call)(unsigned long, ...); - bool is64; -} __packed; - -__pure const struct efi_config *__efi_early(void); +__attribute_const__ bool efi_is_64bit(void); -static inline bool efi_is_64bit(void) +static inline bool efi_is_native(void) { if (!IS_ENABLED(CONFIG_X86_64)) - return false; - + return true; if (!IS_ENABLED(CONFIG_EFI_MIXED)) return true; + return efi_is_64bit(); +} + +#define efi_mixed_mode_cast(attr) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(u32, __typeof__(attr)), \ + (unsigned long)(attr), (attr)) + +#define efi_table_attr(inst, attr) \ + (efi_is_native() \ + ? inst->attr \ + : (__typeof__(inst->attr)) \ + efi_mixed_mode_cast(inst->mixed_mode.attr)) + +/* + * The following macros allow translating arguments if necessary from native to + * mixed mode. The use case for this is to initialize the upper 32 bits of + * output parameters, and where the 32-bit method requires a 64-bit argument, + * which must be split up into two arguments to be thunked properly. + * + * As examples, the AllocatePool boot service returns the address of the + * allocation, but it will not set the high 32 bits of the address. To ensure + * that the full 64-bit address is initialized, we zero-init the address before + * calling the thunk. + * + * The FreePages boot service takes a 64-bit physical address even in 32-bit + * mode. For the thunk to work correctly, a native 64-bit call of + * free_pages(addr, size) + * must be translated to + * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size) + * so that the two 32-bit halves of addr get pushed onto the stack separately. + */ - return __efi_early()->is64; +static inline void *efi64_zero_upper(void *p) +{ + ((u32 *)p)[1] = 0; + return p; } -#define efi_table_attr(table, attr, instance) \ - (efi_is_64bit() ? \ - ((table##_64_t *)(unsigned long)instance)->attr : \ - ((table##_32_t *)(unsigned long)instance)->attr) +#define __efi64_argmap_free_pages(addr, size) \ + ((addr), 0, (size)) + +#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \ + ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver)) + +#define __efi64_argmap_allocate_pool(type, size, buffer) \ + ((type), (size), efi64_zero_upper(buffer)) + +#define __efi64_argmap_handle_protocol(handle, protocol, interface) \ + ((handle), (protocol), efi64_zero_upper(interface)) -#define efi_call_proto(protocol, f, instance, ...) \ - __efi_early()->call(efi_table_attr(protocol, f, instance), \ - instance, ##__VA_ARGS__) +#define __efi64_argmap_locate_protocol(protocol, reg, interface) \ + ((protocol), (reg), efi64_zero_upper(interface)) + +/* PCI I/O */ +#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \ + ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \ + efi64_zero_upper(dev), efi64_zero_upper(func)) + +/* + * The macros below handle the plumbing for the argument mapping. To add a + * mapping for a specific EFI method, simply define a macro + * __efi64_argmap_<method name>, following the examples above. + */ -#define efi_call_early(f, ...) \ - __efi_early()->call(efi_table_attr(efi_boot_services, f, \ - __efi_early()->boot_services), __VA_ARGS__) +#define __efi64_thunk_map(inst, func, ...) \ + efi64_thunk(inst->mixed_mode.func, \ + __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \ + (__VA_ARGS__))) -#define __efi_call_early(f, ...) \ - __efi_early()->call((unsigned long)f, __VA_ARGS__); +#define __efi64_argmap(mapped, args) \ + __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args) +#define __efi64_argmap__0(mapped, args) __efi_eval mapped +#define __efi64_argmap__1(mapped, args) __efi_eval args -#define efi_call_runtime(f, ...) \ - __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ - __efi_early()->runtime_services), __VA_ARGS__) +#define __efi_eat(...) +#define __efi_eval(...) __VA_ARGS__ + +/* The three macros below handle dispatching via the thunk if needed */ + +#define efi_call_proto(inst, func, ...) \ + (efi_is_native() \ + ? inst->func(inst, ##__VA_ARGS__) \ + : __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__)) + +#define efi_bs_call(func, ...) \ + (efi_is_native() \ + ? efi_system_table()->boottime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + boottime), func, __VA_ARGS__)) + +#define efi_rt_call(func, ...) \ + (efi_is_native() \ + ? efi_system_table()->runtime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + runtime), func, __VA_ARGS__)) extern bool efi_reboot_required(void); +extern bool efi_is_table_address(unsigned long phys_addr); +extern void efi_find_mirror(void); +extern void efi_reserve_boot_services(void); #else static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} static inline bool efi_reboot_required(void) { return false; } +static inline bool efi_is_table_address(unsigned long phys_addr) +{ + return false; +} +static inline void efi_find_mirror(void) +{ +} +static inline void efi_reserve_boot_services(void) +{ +} #endif /* CONFIG_EFI */ +#ifdef CONFIG_EFI_FAKE_MEMMAP +extern void __init efi_fake_memmap_early(void); +#else +static inline void efi_fake_memmap_early(void) +{ +} +#endif + #endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h new file mode 100644 index 000000000000..70f5b98a5286 --- /dev/null +++ b/arch/x86/include/asm/emulate_prefix.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EMULATE_PREFIX_H +#define _ASM_X86_EMULATE_PREFIX_H + +/* + * Virt escape sequences to trigger instruction emulation; + * ideally these would decode to 'whole' instruction and not destroy + * the instruction stream; sadly this is not true for the 'kvm' one :/ + */ + +#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */ +#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */ + +#endif diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h deleted file mode 100644 index 47b7a1296245..000000000000 --- a/arch/x86/include/asm/error-injection.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ERROR_INJECTION_H -#define _ASM_ERROR_INJECTION_H - -#include <linux/compiler.h> -#include <linux/linkage.h> -#include <asm/ptrace.h> -#include <asm-generic/error-injection.h> - -asmlinkage void just_return_func(void); -void override_function_with_return(struct pt_regs *regs); - -#endif /* _ASM_ERROR_INJECTION_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 9da8cccdf3fb..28183ee3cc42 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -42,8 +42,7 @@ * Because of this, FIXADDR_TOP x86 integration was left as later work. */ #ifdef CONFIG_X86_32 -/* used by vmalloc.c, vsyscall.lds.S. - * +/* * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ @@ -120,7 +119,7 @@ enum fixed_addresses { * before ioremap() is functional. * * If necessary we round it up to the next 512 pages boundary so - * that we can have a single pgd entry and a single pte table: + * that we can have a single pmd entry and a single pte table: */ #define NR_FIX_BTMAPS 64 #define FIX_BTMAPS_SLOTS 8 @@ -157,7 +156,7 @@ extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 4c95c365058a..44c48e34d799 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -509,7 +509,7 @@ static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) { - return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; + return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; } /* diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 287f1f7b2e52..85be2f506272 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -16,7 +16,6 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #ifndef __ASSEMBLY__ -extern void mcount(void); extern atomic_t modifying_ftrace_code; extern void __fentry__(void); @@ -29,14 +28,25 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +/* + * When a ftrace registered caller is tracing a function that is + * also set by a register_ftrace_direct() call, it needs to be + * differentiated in the ftrace_caller trampoline. To do this, we + * place the direct caller in the ORIG_AX part of pt_regs. This + * tells the ftrace_caller that there's a direct caller. + */ +static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ + /* Emulate a call */ + regs->orig_ax = addr; +} + #ifdef CONFIG_DYNAMIC_FTRACE struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; -int ftrace_int3_handler(struct pt_regs *regs); - #define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index cbd97e22d2f3..4154bc5f6a4e 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -153,8 +153,8 @@ extern char irq_entries_start[]; extern char spurious_entries_start[]; #define VECTOR_UNUSED NULL -#define VECTOR_SHUTDOWN ((void *)~0UL) -#define VECTOR_RETRIGGERED ((void *)~1UL) +#define VECTOR_SHUTDOWN ((void *)-1L) +#define VECTOR_RETRIGGERED ((void *)-2L) typedef struct irq_desc* vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index af78cd72b8f3..92abc1e42bfc 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -13,6 +13,16 @@ #include <asm/page.h> /* + * While not explicitly listed in the TLFS, Hyper-V always runs with a page size + * of 4096. These definitions are used when communicating with Hyper-V using + * guest physical pages and guest physical page addresses, since the guest page + * size may not be 4096 on all architectures. + */ +#define HV_HYP_PAGE_SHIFT 12 +#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) +#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) + +/* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). */ @@ -76,6 +86,8 @@ #define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11) /* AccessReenlightenmentControls privilege */ #define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) +/* AccessTscInvariantControls privilege */ +#define HV_X64_ACCESS_TSC_INVARIANT BIT(15) /* * Feature identification: indicates which flags were specified at partition @@ -170,7 +182,15 @@ /* Recommend using enlightened VMCS */ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) +/* + * Virtual processor will never share a physical core with another virtual + * processor, except for virtual processors that are reported as sibling SMT + * threads. + */ +#define HV_X64_NO_NONARCH_CORESHARING BIT(18) + /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ +#define HV_X64_NESTED_DIRECT_FLUSH BIT(17) #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) #define HV_X64_NESTED_MSR_BITMAP BIT(19) @@ -260,6 +280,9 @@ #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 +/* TSC invariant control */ +#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 + /* * Declare the MSR used to setup pages used to communicate with the hypervisor. */ @@ -514,14 +537,24 @@ struct hv_timer_message_payload { __u64 delivery_time; /* When the message was delivered */ } __packed; +struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; + __u32 reserved:31; + } features; + struct { + __u32 reserved; + } hypercallControls; +} __packed; + /* Define virtual processor assist page structure. */ struct hv_vp_assist_page { __u32 apic_assist; - __u32 reserved; - __u64 vtl_control[2]; - __u64 nested_enlightenments_control[2]; - __u32 enlighten_vmentry; - __u32 padding; + __u32 reserved1; + __u64 vtl_control[3]; + struct hv_nested_enlightenments_control nested_control; + __u8 enlighten_vmentry; + __u8 reserved2[7]; __u64 current_nested_vmcs; } __packed; @@ -776,7 +809,8 @@ union hv_synic_sint { u64 reserved1:8; u64 masked:1; u64 auto_eoi:1; - u64 reserved2:46; + u64 polling:1; + u64 reserved2:45; } __packed; }; @@ -847,7 +881,7 @@ union hv_gpa_page_range { * count is equal with how many entries of union hv_gpa_page_range can * be populated into the input parameter page. */ -#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) / \ +#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ sizeof(union hv_gpa_page_range)) struct hv_guest_mapping_flush_list { @@ -872,4 +906,7 @@ struct hv_tlb_flush_ex { u64 gva_list[]; } __packed; +struct hv_partition_assist_pg { + u32 tlb_lock_count; +}; #endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 154f27be8bfc..5c1ae3eff9d4 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -45,6 +45,7 @@ struct insn { struct insn_field immediate2; /* for 64bit imm or seg16 */ }; + int emulate_prefix_size; insn_attr_t attr; unsigned char opnd_bytes; unsigned char addr_bytes; @@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn) return (insn->vex_prefix.nbytes == 4); } +static inline int insn_has_emulate_prefix(struct insn *insn) +{ + return !!insn->emulate_prefix_size; +} + /* Ensure this instruction is decoded completely */ static inline int insn_complete(struct insn *insn) { diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 0278aa66ef62..4981c293f926 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -5,12 +5,34 @@ /* * "Big Core" Processors (Branded as Core, Xeon, etc...) * - * The "_X" parts are generally the EP and EX Xeons, or the - * "Extreme" ones, like Broadwell-E, or Atom microserver. - * * While adding a new CPUID for a new microarchitecture, add a new * group to keep logically sorted out in chronological order. Within * that group keep the CPUID for the variants sorted by model number. + * + * The defined symbol names have the following form: + * INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF} + * where: + * OPTFAMILY Describes the family of CPUs that this belongs to. Default + * is assumed to be "_CORE" (and should be omitted). Other values + * currently in use are _ATOM and _XEON_PHI + * MICROARCH Is the code name for the micro-architecture for this core. + * N.B. Not the platform name. + * OPTDIFF If needed, a short string to differentiate by market segment. + * + * Common OPTDIFFs: + * + * - regular client parts + * _L - regular mobile parts + * _G - parts with extra graphics on + * _X - regular server parts + * _D - micro server parts + * + * Historical OPTDIFFs: + * + * _EP - 2 socket server parts + * _EX - 4+ socket server parts + * + * The #define line may optionally include a comment including platform names. */ #define INTEL_FAM6_CORE_YONAH 0x0E @@ -34,30 +56,36 @@ #define INTEL_FAM6_IVYBRIDGE 0x3A #define INTEL_FAM6_IVYBRIDGE_X 0x3E -#define INTEL_FAM6_HASWELL_CORE 0x3C +#define INTEL_FAM6_HASWELL 0x3C #define INTEL_FAM6_HASWELL_X 0x3F -#define INTEL_FAM6_HASWELL_ULT 0x45 -#define INTEL_FAM6_HASWELL_GT3E 0x46 +#define INTEL_FAM6_HASWELL_L 0x45 +#define INTEL_FAM6_HASWELL_G 0x46 -#define INTEL_FAM6_BROADWELL_CORE 0x3D -#define INTEL_FAM6_BROADWELL_GT3E 0x47 +#define INTEL_FAM6_BROADWELL 0x3D +#define INTEL_FAM6_BROADWELL_G 0x47 #define INTEL_FAM6_BROADWELL_X 0x4F -#define INTEL_FAM6_BROADWELL_XEON_D 0x56 +#define INTEL_FAM6_BROADWELL_D 0x56 -#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E -#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E +#define INTEL_FAM6_SKYLAKE_L 0x4E +#define INTEL_FAM6_SKYLAKE 0x5E #define INTEL_FAM6_SKYLAKE_X 0x55 -#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E -#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E +#define INTEL_FAM6_KABYLAKE_L 0x8E +#define INTEL_FAM6_KABYLAKE 0x9E -#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66 +#define INTEL_FAM6_CANNONLAKE_L 0x66 #define INTEL_FAM6_ICELAKE_X 0x6A -#define INTEL_FAM6_ICELAKE_XEON_D 0x6C -#define INTEL_FAM6_ICELAKE_DESKTOP 0x7D -#define INTEL_FAM6_ICELAKE_MOBILE 0x7E +#define INTEL_FAM6_ICELAKE_D 0x6C +#define INTEL_FAM6_ICELAKE 0x7D +#define INTEL_FAM6_ICELAKE_L 0x7E #define INTEL_FAM6_ICELAKE_NNPI 0x9D +#define INTEL_FAM6_TIGERLAKE_L 0x8C +#define INTEL_FAM6_TIGERLAKE 0x8D + +#define INTEL_FAM6_COMETLAKE 0xA5 +#define INTEL_FAM6_COMETLAKE_L 0xA6 + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ @@ -68,17 +96,22 @@ #define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ #define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ -#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */ +#define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */ #define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ #define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ +#define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ -#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ +#define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */ + +/* Note: the micro-architecture is "Goldmont Plus" */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ -#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ +#define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */ +#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ +#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h index 9e7adcdbe031..e6da1ce26256 100644 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -31,30 +31,13 @@ #if IS_ENABLED(CONFIG_INTEL_PMC_IPC) -int intel_pmc_ipc_simple_command(int cmd, int sub); -int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen, u32 dptr, u32 sptr); int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen); int intel_pmc_s0ix_counter_read(u64 *data); -int intel_pmc_gcr_read(u32 offset, u32 *data); int intel_pmc_gcr_read64(u32 offset, u64 *data); -int intel_pmc_gcr_write(u32 offset, u32 data); -int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val); #else -static inline int intel_pmc_ipc_simple_command(int cmd, int sub) -{ - return -EINVAL; -} - -static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen, u32 dptr, u32 sptr) -{ - return -EINVAL; -} - static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen) { @@ -66,26 +49,11 @@ static inline int intel_pmc_s0ix_counter_read(u64 *data) return -EINVAL; } -static inline int intel_pmc_gcr_read(u32 offset, u32 *data) -{ - return -EINVAL; -} - static inline int intel_pmc_gcr_read64(u32 offset, u64 *data) { return -EINVAL; } -static inline int intel_pmc_gcr_write(u32 offset, u32 data) -{ - return -EINVAL; -} - -static inline int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val) -{ - return -EINVAL; -} - #endif /*CONFIG_INTEL_PMC_IPC*/ #endif diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h index 634f99b1dc22..423b788f495e 100644 --- a/arch/x86/include/asm/intel_pt.h +++ b/arch/x86/include/asm/intel_pt.h @@ -28,10 +28,12 @@ enum pt_capabilities { void cpu_emergency_stop_pt(void); extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap); extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap); +extern int is_intel_pt_event(struct perf_event *event); #else static inline void cpu_emergency_stop_pt(void) {} static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; } static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; } +static inline int is_intel_pt_event(struct perf_event *event) { return 0; } #endif #endif /* _ASM_X86_INTEL_PT_H */ diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 4a8c6e817398..2a1442ba6e78 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -22,24 +22,12 @@ /* Read single register */ int intel_scu_ipc_ioread8(u16 addr, u8 *data); -/* Read two sequential registers */ -int intel_scu_ipc_ioread16(u16 addr, u16 *data); - -/* Read four sequential registers */ -int intel_scu_ipc_ioread32(u16 addr, u32 *data); - /* Read a vector */ int intel_scu_ipc_readv(u16 *addr, u8 *data, int len); /* Write single register */ int intel_scu_ipc_iowrite8(u16 addr, u8 data); -/* Write two sequential registers */ -int intel_scu_ipc_iowrite16(u16 addr, u16 data); - -/* Write four sequential registers */ -int intel_scu_ipc_iowrite32(u16 addr, u32 data); - /* Write a vector */ int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); @@ -50,14 +38,6 @@ int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); int intel_scu_ipc_simple_command(int cmd, int sub); int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, u32 *out, int outlen); -int intel_scu_ipc_raw_command(int cmd, int sub, u8 *in, int inlen, - u32 *out, int outlen, u32 dptr, u32 sptr); - -/* I2C control api */ -int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data); - -/* Update FW version */ -int intel_scu_ipc_fw_update(u8 *buffer, u32 length); extern struct blocking_notifier_head intel_scu_notifier; diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h index 214394860632..2f77e31a1283 100644 --- a/arch/x86/include/asm/intel_telemetry.h +++ b/arch/x86/include/asm/intel_telemetry.h @@ -40,13 +40,10 @@ struct telemetry_evtmap { struct telemetry_unit_config { struct telemetry_evtmap *telem_evts; void __iomem *regmap; - u32 ssram_base_addr; u8 ssram_evts_used; u8 curr_period; u8 max_period; u8 min_period; - u32 ssram_size; - }; struct telemetry_plt_config { diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 6bed97ff6db2..e1aa17a468a8 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -180,8 +180,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) * The default ioremap() behavior is non-cached; if you need something * else, you probably want one of the following. */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -#define ioremap_nocache ioremap_nocache extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); #define ioremap_uc ioremap_uc extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); @@ -205,10 +203,7 @@ extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ -static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) -{ - return ioremap_nocache(offset, size); -} +void __iomem *ioremap(resource_size_t offset, unsigned long size); #define ioremap ioremap extern void iounmap(volatile void __iomem *addr); @@ -404,4 +399,40 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, extern bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size); +/** + * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units + * @__dst: destination, in MMIO space (must be 512-bit aligned) + * @src: source + * @count: number of 512 bits quantities to submit + * + * Submit data from kernel space to MMIO space, in units of 512 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + * + * Warning: Do not use this helper unless your driver has checked that the CPU + * instruction is supported on the platform. + */ +static inline void iosubmit_cmds512(void __iomem *__dst, const void *src, + size_t count) +{ + /* + * Note that this isn't an "on-stack copy", just definition of "dst" + * as a pointer to 64-bytes of stuff that is going to be overwritten. + * In the MOVDIR64B case that may be needed as you can use the + * MOVDIR64B instruction to copy arbitrary memory around. This trick + * lets the compiler know how much gets clobbered. + */ + volatile struct { char _[64]; } *dst = __dst; + const u8 *from = src; + const u8 *end = from + count * 64; + + while (from < end) { + /* MOVDIR64B [rdx], rax */ + asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" + : "=m" (dst) + : "d" (from), "a" (dst)); + from += 64; + } +} + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h new file mode 100644 index 000000000000..02c6ef8f7667 --- /dev/null +++ b/arch/x86/include/asm/io_bitmap.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IOBITMAP_H +#define _ASM_X86_IOBITMAP_H + +#include <linux/refcount.h> +#include <asm/processor.h> + +struct io_bitmap { + u64 sequence; + refcount_t refcnt; + /* The maximum number of bytes to copy so all zero bits are covered */ + unsigned int max; + unsigned long bitmap[IO_BITMAP_LONGS]; +}; + +struct task_struct; + +#ifdef CONFIG_X86_IOPL_IOPERM +void io_bitmap_share(struct task_struct *tsk); +void io_bitmap_exit(void); + +void tss_update_io_bitmap(void); +#else +static inline void io_bitmap_share(struct task_struct *tsk) { } +static inline void io_bitmap_exit(void) { } +static inline void tss_update_io_bitmap(void) { } +#endif + +#endif diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index baedab8ac538..bf1ed2ddc74b 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,11 +2,28 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H +#include <linux/acpi.h> + +#include <asm/e820/api.h> + extern int force_iommu, no_iommu; extern int iommu_detected; -extern int iommu_pass_through; /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) +static inline int __init +arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) +{ + u64 start = rmrr->base_address; + u64 end = rmrr->end_address + 1; + + if (e820__mapped_all(start, end, E820_TYPE_RESERVED)) + return 0; + + pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n", + start, end - 1); + return -EINVAL; +} + #endif /* _ASM_X86_IOMMU_H */ diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h deleted file mode 100644 index f73076be546a..000000000000 --- a/arch/x86/include/asm/ipi.h +++ /dev/null @@ -1,109 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _ASM_X86_IPI_H -#define _ASM_X86_IPI_H - -#ifdef CONFIG_X86_LOCAL_APIC - -/* - * Copyright 2004 James Cleverdon, IBM. - * - * Generic APIC InterProcessor Interrupt code. - * - * Moved to include file by James Cleverdon from - * arch/x86-64/kernel/smp.c - * - * Copyrights from kernel/smp.c: - * - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> - * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> - * (c) 2002,2003 Andi Kleen, SuSE Labs. - */ - -#include <asm/hw_irq.h> -#include <asm/apic.h> -#include <asm/smp.h> - -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector, - unsigned int dest) -{ - unsigned int icr = shortcut | dest; - - switch (vector) { - default: - icr |= APIC_DM_FIXED | vector; - break; - case NMI_VECTOR: - icr |= APIC_DM_NMI; - break; - } - return icr; -} - -static inline int __prepare_ICR2(unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -static inline void __xapic_wait_icr_idle(void) -{ - while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); - -/* - * This is used to send an IPI with no shorthand notation (the destination is - * specified in bits 56 to 63 of the ICR). - */ -void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest); - -extern void default_send_IPI_single(int cpu, int vector); -extern void default_send_IPI_single_phys(int cpu, int vector); -extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, - int vector); -extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, - int vector); - -/* Avoid include hell */ -#define NMI_VECTOR 0x02 - -extern int no_broadcast; - -static inline void __default_local_send_IPI_allbutself(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask_allbutself(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); -} - -static inline void __default_local_send_IPI_all(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); -} - -#ifdef CONFIG_X86_32 -extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, - int vector); -extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, - int vector); -extern void default_send_IPI_mask_logical(const struct cpumask *mask, - int vector); -extern void default_send_IPI_allbutself(int vector); -extern void default_send_IPI_all(int vector); -extern void default_send_IPI_self(int vector); -#endif - -#endif - -#endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 8f95686ec27e..a176f6165d85 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -34,7 +34,7 @@ extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs); extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); -extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs); +extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs); extern __visible unsigned int do_IRQ(struct pt_regs *regs); diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 75f1e35e7c15..247ab14c6309 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -33,6 +33,7 @@ enum show_regs_mode { }; extern void die(const char *, struct pt_regs *,long); +void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); extern void __show_regs(struct pt_regs *regs, enum show_regs_mode); diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 5e7d6b46de97..6802c59e8252 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -66,10 +66,6 @@ struct kimage; # define KEXEC_ARCH KEXEC_ARCH_X86_64 #endif -/* Memory to backup during crash kdump */ -#define KEXEC_BACKUP_SRC_START (0UL) -#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ - /* * This function is responsible for capturing register states if coming * via panic otherwise just fix up the ss and sp if coming via kernel @@ -154,12 +150,6 @@ struct kimage_arch { pud_t *pud; pmd_t *pmd; pte_t *pte; - /* Details of backup region */ - unsigned long backup_src_start; - unsigned long backup_src_sz; - - /* Physical address of backup segment */ - unsigned long backup_load_addr; /* Core ELF header buffer */ void *elf_headers; diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 5dc909d9ad81..95b1f053bd96 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -11,12 +11,11 @@ #include <asm-generic/kprobes.h> -#define BREAKPOINT_INSTRUCTION 0xcc - #ifdef CONFIG_KPROBES #include <linux/types.h> #include <linux/ptrace.h> #include <linux/percpu.h> +#include <asm/text-patching.h> #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -25,10 +24,7 @@ struct pt_regs; struct kprobe; typedef u8 kprobe_opcode_t; -#define RELATIVEJUMP_OPCODE 0xe9 -#define RELATIVEJUMP_SIZE 5 -#define RELATIVECALL_OPCODE 0xe8 -#define RELATIVE_ADDR_SIZE 4 + #define MAX_STACK_SIZE 64 #define CUR_STACK_SIZE(ADDR) \ (current_top_of_stack() - (unsigned long)(ADDR)) @@ -43,11 +39,11 @@ extern __visible kprobe_opcode_t optprobe_template_entry[]; extern __visible kprobe_opcode_t optprobe_template_val[]; extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; -#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) +#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE) #define MAX_OPTINSN_SIZE \ (((unsigned long)optprobe_template_end - \ (unsigned long)optprobe_template_entry) + \ - MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) + MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE) extern const int kretprobe_blacklist_size; @@ -73,7 +69,7 @@ struct arch_specific_insn { struct arch_optimized_insn { /* copy of the original instructions */ - kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE]; + kprobe_opcode_t copied_insn[DISP32_SIZE]; /* detour code buffer */ kprobe_opcode_t *insn; /* the size of instructions copied to detour code buffer */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index feab24cac610..03946eb3e2b9 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -222,6 +222,10 @@ struct x86_emulate_ops { bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit); + bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt); + bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); + bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt); + void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); @@ -229,7 +233,7 @@ struct x86_emulate_ops { int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate); void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt); - + int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr); }; typedef u32 __attribute__((vector_size(16))) sse128_t; @@ -429,6 +433,7 @@ enum x86_intercept { x86_intercept_ins, x86_intercept_out, x86_intercept_outs, + x86_intercept_xsetbv, nr_x86_intercepts }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74e88e5edd9c..40a0c0fd95ca 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -78,6 +78,8 @@ #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) #define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) +#define KVM_REQ_APICV_UPDATE \ + KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -156,10 +158,8 @@ enum kvm_reg { VCPU_REGS_R15 = __VCPU_REGS_R15, #endif VCPU_REGS_RIP, - NR_VCPU_REGS -}; + NR_VCPU_REGS, -enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR3, VCPU_EXREG_RFLAGS, @@ -177,6 +177,11 @@ enum { VCPU_SREG_LDTR, }; +enum exit_fastpath_completion { + EXIT_FASTPATH_NONE, + EXIT_FASTPATH_SKIP_EMUL_INS, +}; + #include <asm/kvm_emulate.h> #define KVM_NR_MEM_OBJS 40 @@ -219,13 +224,6 @@ enum { PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) -/* - * The mask used to denote special SPTEs, which can be either MMIO SPTEs or - * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting - * with the SVE bit in EPT PTEs. - */ -#define SPTE_SPECIAL_MASK (1ULL << 62) - /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 /* @@ -319,8 +317,12 @@ struct kvm_rmap_head { struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head lpage_disallowed_link; + bool unsync; + u8 mmu_valid_gen; bool mmio_cached; + bool lpage_disallowed; /* Can't be replaced by an equiv large page */ /* * The following two entries are used to key the shadow page in the @@ -383,12 +385,12 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, - struct x86_exception *exception); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, + u32 access, struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, @@ -457,6 +459,11 @@ struct kvm_pmc { u64 eventsel; struct perf_event *perf_event; struct kvm_vcpu *vcpu; + /* + * eventsel value for general purpose counters, + * ctrl value for fixed counters. + */ + u64 current_config; }; struct kvm_pmu { @@ -475,7 +482,21 @@ struct kvm_pmu { struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; struct irq_work irq_work; - u64 reprogram_pmi; + DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); + DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); + DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); + + /* + * The gate to release perf_events not marked in + * pmc_in_use only once in a vcpu time slice. + */ + bool need_cleanup; + + /* + * The total number of programmed perf_events and it helps to avoid + * redundant check before cleanup if guest don't use vPMU at all. + */ + u8 event_count; }; struct kvm_pmu_ops; @@ -568,6 +589,7 @@ struct kvm_vcpu_arch { u64 smbase; u64 smi_count; bool tpr_access_reporting; + bool xsaves_enabled; u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; @@ -591,7 +613,7 @@ struct kvm_vcpu_arch { * Paging state of an L2 guest (used for nested npt) * * This context will save all necessary information to walk page tables - * of the an L2 guest. This context is only initialized for page table + * of an L2 guest. This context is only initialized for page table * walking and not for faulting since we never handle l2 page faults on * the host. */ @@ -670,10 +692,10 @@ struct kvm_vcpu_arch { bool pvclock_set_guest_stopped_request; struct { + u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_hva_cache stime; - struct kvm_steal_time steal; + struct gfn_to_pfn_cache cache; } st; u64 tsc_offset; @@ -717,7 +739,7 @@ struct kvm_vcpu_arch { /* Cache MMIO info */ u64 mmio_gva; - unsigned access; + unsigned mmio_access; gfn_t mmio_gfn; u64 mmio_gen; @@ -759,9 +781,19 @@ struct kvm_vcpu_arch { u64 msr_kvm_poll_control; /* - * Indicate whether the access faults on its page table in guest - * which is set when fix page fault and used to detect unhandeable - * instruction. + * Indicates the guest is trying to write a gfn that contains one or + * more of the PTEs used to translate the write itself, i.e. the access + * is changing its own translation in the guest page tables. KVM exits + * to userspace if emulation of the faulting instruction fails and this + * flag is set, as KVM cannot make forward progress. + * + * If emulation fails for a write to guest page tables, KVM unprotects + * (zaps) the shadow page for the target gfn and resumes the guest to + * retry the non-emulatable instruction (on hardware). Unprotecting the + * gfn doesn't allow forward progress for a self-changing access because + * doing so also zaps the translation for the gfn, i.e. retrying the + * instruction will hit a !PRESENT fault, which results in a new shadow + * page and sends KVM back to square one. */ bool write_fault_to_shadow_pgtable; @@ -843,6 +875,8 @@ struct kvm_hv { /* How many vCPUs have VP index != vCPU index */ atomic_t num_mismatched_vp_indexes; + + struct hv_partition_assist_pg *hv_pa_pg; }; enum kvm_irqchip_mode { @@ -851,16 +885,25 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; +#define APICV_INHIBIT_REASON_DISABLE 0 +#define APICV_INHIBIT_REASON_HYPERV 1 +#define APICV_INHIBIT_REASON_NESTED 2 +#define APICV_INHIBIT_REASON_IRQWIN 3 +#define APICV_INHIBIT_REASON_PIT_REINJ 4 + struct kvm_arch { unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; + u8 mmu_valid_gen; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. */ struct list_head active_mmu_pages; + struct list_head zapped_obsolete_pages; + struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; @@ -879,6 +922,7 @@ struct kvm_arch { struct kvm_apic_map *apic_map; bool apic_access_page_done; + unsigned long apicv_inhibit_reasons; gpa_t wall_clock; @@ -935,6 +979,7 @@ struct kvm_arch { bool exception_payload_enabled; struct kvm_pmu_event_filter *pmu_event_filter; + struct task_struct *nx_lpage_recovery_thread; }; struct kvm_vm_stat { @@ -948,6 +993,7 @@ struct kvm_vm_stat { ulong mmu_unsync; ulong remote_tlb_flush; ulong lpages; + ulong nx_lpage_splits; ulong max_mmu_page_hash_collisions; }; @@ -1000,6 +1046,11 @@ struct kvm_lapic_irq { bool msi_redir_hint; }; +static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) +{ + return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; +} + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -1018,7 +1069,7 @@ struct kvm_x86_ops { void (*vm_destroy)(struct kvm *kvm); /* Create, but do not attach this VCPU */ - struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); + int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); @@ -1037,7 +1088,6 @@ struct kvm_x86_ops { struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); - void (*decache_cr3)(struct kvm_vcpu *vcpu); void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -1069,8 +1119,9 @@ struct kvm_x86_ops { void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); void (*run)(struct kvm_vcpu *vcpu); - int (*handle_exit)(struct kvm_vcpu *vcpu); - void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + int (*handle_exit)(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath); + int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, @@ -1086,7 +1137,8 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - bool (*get_enable_apicv)(struct kvm_vcpu *vcpu); + bool (*check_apicv_inhibit_reasons)(ulong bit); + void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); @@ -1119,11 +1171,13 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion *exit_fastpath); bool (*mpx_supported)(void); bool (*xsaves_supported)(void); bool (*umip_emulated)(void); bool (*pt_supported)(void); + bool (*pku_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); @@ -1191,7 +1245,7 @@ struct kvm_x86_ops { int (*set_nested_state)(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, struct kvm_nested_state *kvm_state); - void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); + bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); int (*smi_allowed)(struct kvm_vcpu *vcpu); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); @@ -1209,6 +1263,9 @@ struct kvm_x86_ops { uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); + + bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); + int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -1308,26 +1365,53 @@ extern u64 kvm_default_tsc_scaling_ratio; extern u64 kvm_mce_cap_supported; -enum emulation_result { - EMULATE_DONE, /* no further processing */ - EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ - EMULATE_FAIL, /* can't emulate this instruction */ -}; - +/* + * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing + * userspace I/O) to indicate that the emulation context + * should be resued as is, i.e. skip initialization of + * emulation context, instruction fetch and decode. + * + * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware. + * Indicates that only select instructions (tagged with + * EmulateOnUD) should be emulated (to minimize the emulator + * attack surface). See also EMULTYPE_TRAP_UD_FORCED. + * + * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to + * decode the instruction length. For use *only* by + * kvm_x86_ops->skip_emulated_instruction() implementations. + * + * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to + * retry native execution under certain conditions. + * + * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was + * triggered by KVM's magic "force emulation" prefix, + * which is opt in via module param (off by default). + * Bypasses EmulateOnUD restriction despite emulating + * due to an intercepted #UD (see EMULTYPE_TRAP_UD). + * Used to test the full emulator from userspace. + * + * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware + * backdoor emulation, which is opt in via module param. + * VMware backoor emulation handles select instructions + * and reinjects the #GP for all other cases. + */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_ALLOW_RETRY (1 << 3) -#define EMULTYPE_NO_UD_ON_FAIL (1 << 4) -#define EMULTYPE_VMWARE (1 << 5) +#define EMULTYPE_TRAP_UD_FORCED (1 << 4) +#define EMULTYPE_VMWARE_GP (1 << 5) int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); -int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); -int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); +int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); +int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); +int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); +int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); +int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); struct x86_emulate_ctxt; @@ -1413,11 +1497,15 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); -void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); +bool kvm_apicv_activated(struct kvm *kvm); +void kvm_apicv_init(struct kvm *kvm, bool enable); +void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); +void kvm_request_apicv_update(struct kvm *kvm, bool activate, + unsigned long bit); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); @@ -1500,7 +1588,7 @@ enum { #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) -asmlinkage void __noreturn kvm_spurious_fault(void); +asmlinkage void kvm_spurious_fault(void); /* * Hardware virtualization extension instructions may fault if a @@ -1508,24 +1596,14 @@ asmlinkage void __noreturn kvm_spurious_fault(void); * Usually after catching the fault we just panic; during reboot * instead the instruction is ignored. */ -#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ +#define __kvm_handle_fault_on_reboot(insn) \ "666: \n\t" \ insn "\n\t" \ "jmp 668f \n\t" \ "667: \n\t" \ "call kvm_spurious_fault \n\t" \ "668: \n\t" \ - ".pushsection .fixup, \"ax\" \n\t" \ - "700: \n\t" \ - cleanup_insn "\n\t" \ - "cmpb $0, kvm_rebooting\n\t" \ - "je 667b \n\t" \ - "jmp 668b \n\t" \ - ".popsection \n\t" \ - _ASM_EXTABLE(666b, 700b) - -#define __kvm_handle_fault_on_reboot(insn) \ - ____kvm_handle_fault_on_reboot(insn, "") + _ASM_EXTABLE(666b, 667b) #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -1554,6 +1632,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); void kvm_make_mclock_inprogress_request(struct kvm *kvm); void kvm_make_scan_ioapic_request(struct kvm *kvm); +void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, + unsigned long *vcpu_bitmap); void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); @@ -1571,7 +1651,6 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); int kvm_is_in_guest(void); int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); -int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); @@ -1581,6 +1660,13 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq *irq); +static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) +{ + /* We can only post Fixed and LowPrio IRQs */ + return (irq->delivery_mode == dest_Fixed || + irq->delivery_mode == dest_LowestPrio); +} + static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { if (kvm_x86_ops->vcpu_blocking) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 14caa9d9fb7f..365111789cc6 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -13,10 +13,6 @@ #ifdef __ASSEMBLY__ -#define GLOBAL(name) \ - .globl name; \ - name: - #if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) #define __ALIGN .p2align 4, 0x90 #define __ALIGN_STR __stringify(__ALIGN) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dc2d4b206ab7..4359b955e0b7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -144,7 +144,7 @@ struct mce_log_buffer { enum mce_notifier_prios { MCE_PRIO_FIRST = INT_MAX, - MCE_PRIO_SRAO = INT_MAX - 1, + MCE_PRIO_UC = INT_MAX - 1, MCE_PRIO_EXTLOG = INT_MAX - 2, MCE_PRIO_NFIT = INT_MAX - 3, MCE_PRIO_EDAC = INT_MAX - 4, @@ -290,6 +290,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ + SMCA_LS_V2, /* Load Store */ SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 0c196c47d621..848ce43b9040 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -92,6 +92,16 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; +static inline bool mem_encrypt_active(void) +{ + return sme_me_mask; +} + +static inline u64 sme_get_me_mask(void) +{ + return sme_me_mask; +} + #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h new file mode 100644 index 000000000000..9c2447b3555d --- /dev/null +++ b/arch/x86/include/asm/memtype.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MEMTYPE_H +#define _ASM_X86_MEMTYPE_H + +#include <linux/types.h> +#include <asm/pgtable_types.h> + +extern bool pat_enabled(void); +extern void pat_disable(const char *reason); +extern void pat_init(void); +extern void init_cache_modes(void); + +extern int memtype_reserve(u64 start, u64 end, + enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); +extern int memtype_free(u64 start, u64 end); + +extern int memtype_kernel_map_sync(u64 base, unsigned long size, + enum page_cache_mode pcm); + +extern int memtype_reserve_io(resource_size_t start, resource_size_t end, + enum page_cache_mode *pcm); + +extern void memtype_free_io(resource_size_t start, resource_size_t end); + +extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); + +#endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 209492849566..6685e1218959 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -53,6 +53,6 @@ static inline void __init load_ucode_amd_bsp(unsigned int family) {} static inline void load_ucode_amd_ap(unsigned int family) {} static inline int __init save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } -void reload_ucode_amd(void) {} +static inline void reload_ucode_amd(void) {} #endif #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index e78c7db87801..bdeae9291e5c 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -50,10 +50,6 @@ typedef struct { u16 pkey_allocation_map; s16 execute_only_pkey; #endif -#ifdef CONFIG_X86_INTEL_MPX - /* address of the bounds directory */ - void __user *bd_addr; -#endif } mm_context_t; #define INIT_MM_CONTEXT(mm) \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 9024236693d2..b538d9ddee9c 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -12,7 +12,6 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> -#include <asm/mpx.h> #include <asm/debugreg.h> extern atomic64_t last_mm_ctx_id; @@ -26,18 +25,20 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, #ifdef CONFIG_PERF_EVENTS +DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); -static inline void load_mm_cr4(struct mm_struct *mm) +static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) { if (static_branch_unlikely(&rdpmc_always_available_key) || - atomic_read(&mm->context.perf_rdpmc_allowed)) - cr4_set_bits(X86_CR4_PCE); + (!static_branch_unlikely(&rdpmc_never_available_key) && + atomic_read(&mm->context.perf_rdpmc_allowed))) + cr4_set_bits_irqsoff(X86_CR4_PCE); else - cr4_clear_bits(X86_CR4_PCE); + cr4_clear_bits_irqsoff(X86_CR4_PCE); } #else -static inline void load_mm_cr4(struct mm_struct *mm) {} +static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {} #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL @@ -67,14 +68,6 @@ struct ldt_struct { int slot; }; -/* This is a multiple of PAGE_SIZE. */ -#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) - -static inline void *ldt_slot_va(int slot) -{ - return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); -} - /* * Used for LDT copy/destruction. */ @@ -97,87 +90,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { } static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif -static inline void load_mm_ldt(struct mm_struct *mm) -{ #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; - - /* READ_ONCE synchronizes with smp_store_release */ - ldt = READ_ONCE(mm->context.ldt); - - /* - * Any change to mm->context.ldt is followed by an IPI to all - * CPUs with the mm active. The LDT will not be freed until - * after the IPI is handled by all such CPUs. This means that, - * if the ldt_struct changes before we return, the values we see - * will be safe, and the new values will be loaded before we run - * any user code. - * - * NB: don't try to convert this to use RCU without extreme care. - * We would still need IRQs off, because we don't want to change - * the local LDT after an IPI loaded a newer value than the one - * that we can see. - */ - - if (unlikely(ldt)) { - if (static_cpu_has(X86_FEATURE_PTI)) { - if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { - /* - * Whoops -- either the new LDT isn't mapped - * (if slot == -1) or is mapped into a bogus - * slot (if slot > 1). - */ - clear_LDT(); - return; - } - - /* - * If page table isolation is enabled, ldt->entries - * will not be mapped in the userspace pagetables. - * Tell the CPU to access the LDT through the alias - * at ldt_slot_va(ldt->slot). - */ - set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); - } else { - set_ldt(ldt->entries, ldt->nr_entries); - } - } else { - clear_LDT(); - } +extern void load_mm_ldt(struct mm_struct *mm); +extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); #else +static inline void load_mm_ldt(struct mm_struct *mm) +{ clear_LDT(); -#endif } - static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) { -#ifdef CONFIG_MODIFY_LDT_SYSCALL - /* - * Load the LDT if either the old or new mm had an LDT. - * - * An mm will never go from having an LDT to not having an LDT. Two - * mms never share an LDT, so we don't gain anything by checking to - * see whether the LDT changed. There's also no guarantee that - * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, - * then prev->context.ldt will also be non-NULL. - * - * If we really cared, we could optimize the case where prev == next - * and we're exiting lazy mode. Most of the time, if this happens, - * we don't actually need to reload LDTR, but modify_ldt() is mostly - * used by legacy code and emulators where we don't need this level of - * performance. - * - * This uses | instead of || because it generates better code. - */ - if (unlikely((unsigned long)prev->context.ldt | - (unsigned long)next->context.ldt)) - load_mm_ldt(next); -#endif - DEBUG_LOCKS_WARN_ON(preemptible()); } +#endif -void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); /* * Init a new mm. Used on mm copies, like at fork() @@ -272,34 +199,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm) } #endif -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ - mpx_mm_init(mm); -} - static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - /* - * mpx_notify_unmap() goes and reads a rarely-hot - * cacheline in the mm_struct. That can be expensive - * enough to be seen in profiles. - * - * The mpx_notify_unmap() call and its contents have been - * observed to affect munmap() performance on hardware - * where MPX is not present. - * - * The unlikely() optimizes for the fast case: no MPX - * in the CPU, or no MPX use in the process. Even if - * we get this wrong (in the unlikely event that MPX - * is widely enabled on some system) the overhead of - * MPX itself (reading bounds tables) is expected to - * overwhelm the overhead of getting this unlikely() - * consistently wrong. - */ - if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) - mpx_notify_unmap(mm, start, end); } /* diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 7948a17febb4..c215d2762488 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -15,6 +15,8 @@ struct mod_arch_specific { #ifdef CONFIG_X86_64 /* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M486SX +#define MODULE_PROC_FAMILY "486SX " #elif defined CONFIG_M486 #define MODULE_PROC_FAMILY "486 " #elif defined CONFIG_M586 diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h deleted file mode 100644 index 143a5c193ed3..000000000000 --- a/arch/x86/include/asm/mpx.h +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MPX_H -#define _ASM_X86_MPX_H - -#include <linux/types.h> -#include <linux/mm_types.h> - -#include <asm/ptrace.h> -#include <asm/insn.h> - -/* - * NULL is theoretically a valid place to put the bounds - * directory, so point this at an invalid address. - */ -#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1) -#define MPX_BNDCFG_ENABLE_FLAG 0x1 -#define MPX_BD_ENTRY_VALID_FLAG 0x1 - -/* - * The upper 28 bits [47:20] of the virtual address in 64-bit - * are used to index into bounds directory (BD). - * - * The directory is 2G (2^31) in size, and with 8-byte entries - * it has 2^28 entries. - */ -#define MPX_BD_SIZE_BYTES_64 (1UL<<31) -#define MPX_BD_ENTRY_BYTES_64 8 -#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) - -/* - * The 32-bit directory is 4MB (2^22) in size, and with 4-byte - * entries it has 2^20 entries. - */ -#define MPX_BD_SIZE_BYTES_32 (1UL<<22) -#define MPX_BD_ENTRY_BYTES_32 4 -#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) - -/* - * A 64-bit table is 4MB total in size, and an entry is - * 4 64-bit pointers in size. - */ -#define MPX_BT_SIZE_BYTES_64 (1UL<<22) -#define MPX_BT_ENTRY_BYTES_64 32 -#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64) - -/* - * A 32-bit table is 16kB total in size, and an entry is - * 4 32-bit pointers in size. - */ -#define MPX_BT_SIZE_BYTES_32 (1UL<<14) -#define MPX_BT_ENTRY_BYTES_32 16 -#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32) - -#define MPX_BNDSTA_TAIL 2 -#define MPX_BNDCFG_TAIL 12 -#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) -#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) -#define MPX_BNDSTA_ERROR_CODE 0x3 - -struct mpx_fault_info { - void __user *addr; - void __user *lower; - void __user *upper; -}; - -#ifdef CONFIG_X86_INTEL_MPX - -extern int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); -extern int mpx_handle_bd_fault(void); - -static inline int kernel_managing_mpx_tables(struct mm_struct *mm) -{ - return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR); -} - -static inline void mpx_mm_init(struct mm_struct *mm) -{ - /* - * NULL is theoretically a valid place to put the bounds - * directory, so point this at an invalid address. - */ - mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; -} - -extern void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, unsigned long end); -extern unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags); - -#else -static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) -{ - return -EINVAL; -} -static inline int mpx_handle_bd_fault(void) -{ - return -EINVAL; -} -static inline int kernel_managing_mpx_tables(struct mm_struct *mm) -{ - return 0; -} -static inline void mpx_mm_init(struct mm_struct *mm) -{ -} -static inline void mpx_notify_unmap(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ -} - -static inline unsigned long mpx_unmapped_area_check(unsigned long addr, - unsigned long len, unsigned long flags) -{ - return addr; -} -#endif /* CONFIG_X86_INTEL_MPX */ - -#endif /* _ASM_X86_MPX_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 2ef31cc8c529..6b79515abb82 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -218,7 +218,9 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); - +void *hv_alloc_hyperv_page(void); +void *hv_alloc_hyperv_zeroed_page(void); +void hv_free_hyperv_page(unsigned long addr); void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); @@ -241,6 +243,8 @@ static inline void hv_apic_init(void) {} #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} +static inline void *hv_alloc_hyperv_page(void) { return NULL; } +static inline void hv_free_hyperv_page(unsigned long addr) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 6b4fc2788078..ebe1685e92dd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -93,6 +93,18 @@ * Microarchitectural Data * Sampling (MDS) vulnerabilities. */ +#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* + * The processor is not susceptible to a + * machine check error due to modifying the + * code page size along with either the + * physical address or cache type + * without TLB invalidation. + */ +#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ +#define ARCH_CAP_TAA_NO BIT(8) /* + * Not susceptible to + * TSX Async Abort (TAA) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -103,6 +115,10 @@ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e +#define MSR_IA32_TSX_CTRL 0x00000122 +#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ +#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 @@ -375,15 +391,26 @@ /* Alternative perfctr range with full access. */ #define MSR_IA32_PMC0 0x000004c1 -/* AMD64 MSRs. Not complete. See the architecture manual for a more - complete list. */ +/* Auto-reload via MSR instead of DS area */ +#define MSR_RELOAD_PMC0 0x000014c1 +#define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* + * AMD64 MSRs. Not complete. See the architecture manual for a more + * complete list. + */ #define MSR_AMD64_PATCH_LEVEL 0x0000008b #define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD_PERF_CTL 0xc0010062 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD_PPIN_CTL 0xc00102f0 +#define MSR_AMD_PPIN 0xc00102f1 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_BU_CFG2 0xc001102a @@ -531,7 +558,14 @@ #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_EBC_FREQUENCY_ID 0x0000002c #define MSR_SMI_COUNT 0x00000034 -#define MSR_IA32_FEATURE_CONTROL 0x0000003a + +/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */ +#define MSR_IA32_FEAT_CTL 0x0000003a +#define FEAT_CTL_LOCKED BIT(0) +#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) +#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_LMCE_ENABLED BIT(20) + #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 @@ -539,11 +573,6 @@ #define MSR_IA32_XSS 0x00000da0 -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) -#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) -#define FEATURE_CONTROL_LMCE (1<<20) - #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE (1<<11) @@ -560,9 +589,6 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 #define INTEL_PERF_CTL_MASK 0xffff -#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 -#define MSR_AMD_PERF_STATUS 0xc0010063 -#define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 5cc3930cb465..86f20d520a07 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -233,8 +233,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) * Thus, use the preferred barrier on the respective CPU, aiming for * RDTSCP as the default. */ - asm volatile(ALTERNATIVE_3("rdtsc", - "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC, + asm volatile(ALTERNATIVE_2("rdtsc", "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC, "rdtscp", X86_FEATURE_RDTSCP) : EAX_EDX_RET(val, low, high) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index dbff1456d215..829df26fd7a3 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -24,7 +24,7 @@ #define _ASM_X86_MTRR_H #include <uapi/asm/mtrr.h> -#include <asm/pat.h> +#include <asm/memtype.h> /* @@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) } static inline void mtrr_bp_init(void) { - pat_disable("MTRRs disabled, skipping PAT initialization too."); + pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel."); } #define mtrr_ap_init() do {} while (0) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index e28f8b723b5c..9d5252c9685c 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -21,7 +21,7 @@ #define MWAIT_ECX_INTERRUPT_BREAK 0x1 #define MWAITX_ECX_TIMER_ENABLE BIT(1) #define MWAITX_MAX_LOOPS ((u32)-1) -#define MWAITX_DISABLE_CSTATES 0xf +#define MWAITX_DISABLE_CSTATES 0xf0 static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 75ded1d13d98..9d5d949e662e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -41,7 +41,6 @@ struct nmiaction { struct list_head list; nmi_handler_t handler; u64 max_duration; - struct irq_work irq_work; unsigned long flags; const char *name; }; diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 109f974f9835..07e95dcb40ad 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -37,7 +37,6 @@ */ #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ /* * Google experimented with loop-unrolling and this turned out to be @@ -192,7 +191,7 @@ " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ - "903: addl $4, %%esp;\n" \ + "903: lea 4(%%esp), %%esp;\n" \ " pushl %[thunk_target];\n" \ " ret;\n" \ " .align 16\n" \ @@ -314,7 +313,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); #include <asm/segment.h> /** - * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the @@ -337,7 +336,7 @@ static inline void mds_clear_cpu_buffers(void) } /** - * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability * * Clear CPU buffers if the corresponding static key is enabled */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index dce26f1d13e1..86e7317eb31f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -139,18 +139,6 @@ static inline void __write_cr4(unsigned long x) PVOP_VCALL1(cpu.write_cr4, x); } -#ifdef CONFIG_X86_64 -static inline unsigned long read_cr8(void) -{ - return PVOP_CALL0(unsigned long, cpu.read_cr8); -} - -static inline void write_cr8(unsigned long x) -{ - PVOP_VCALL1(cpu.write_cr8, x); -} -#endif - static inline void arch_safe_halt(void) { PVOP_VCALL0(irq.safe_halt); @@ -306,10 +294,6 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } -static inline void set_iopl_mask(unsigned mask) -{ - PVOP_VCALL1(cpu.set_iopl_mask, mask); -} static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 639b2df445ee..84812964d3dd 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -119,11 +119,6 @@ struct pv_cpu_ops { void (*write_cr4)(unsigned long); -#ifdef CONFIG_X86_64 - unsigned long (*read_cr8)(void); - void (*write_cr8)(unsigned long); -#endif - /* Segment descriptor handling */ void (*load_tr_desc)(void); void (*load_gdt)(const struct desc_ptr *); @@ -145,8 +140,6 @@ struct pv_cpu_ops { void (*load_sp0)(unsigned long sp0); - void (*set_iopl_mask)(unsigned mask); - void (*wbinvd)(void); /* cpuid emulation, mostly so that caps bits can be disabled */ diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h deleted file mode 100644 index 92015c65fa2a..000000000000 --- a/arch/x86/include/asm/pat.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PAT_H -#define _ASM_X86_PAT_H - -#include <linux/types.h> -#include <asm/pgtable_types.h> - -bool pat_enabled(void); -void pat_disable(const char *reason); -extern void pat_init(void); -extern void init_cache_modes(void); - -extern int reserve_memtype(u64 start, u64 end, - enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); -extern int free_memtype(u64 start, u64 end); - -extern int kernel_map_sync_memtype(u64 base, unsigned long size, - enum page_cache_mode pcm); - -int io_reserve_memtype(resource_size_t start, resource_size_t end, - enum page_cache_mode *pcm); - -void io_free_memtype(resource_size_t start, resource_size_t end); - -bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); - -#endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index e662f987dfa2..7ccb338507e3 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -9,11 +9,9 @@ #include <linux/scatterlist.h> #include <linux/numa.h> #include <asm/io.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/x86_init.h> -#ifdef __KERNEL__ - struct pci_sysdata { int domain; /* PCI domain */ int node; /* NUMA node */ @@ -27,7 +25,7 @@ struct pci_sysdata { void *fwnode; /* IRQ domain for MSI assignment */ #endif #if IS_ENABLED(CONFIG_VMD) - bool vmd_domain; /* True if in Intel VMD domain */ + struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ #endif }; @@ -35,14 +33,17 @@ extern int pci_routeirq; extern int noioapicquirk; extern int noioapicreroute; +static inline struct pci_sysdata *to_pci_sysdata(const struct pci_bus *bus) +{ + return bus->sysdata; +} + #ifdef CONFIG_PCI #ifdef CONFIG_PCI_DOMAINS static inline int pci_domain_nr(struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; - - return sd->domain; + return to_pci_sysdata(bus)->domain; } static inline int pci_proc_domain(struct pci_bus *bus) @@ -54,24 +55,20 @@ static inline int pci_proc_domain(struct pci_bus *bus) #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN static inline void *_pci_root_bus_fwnode(struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; - - return sd->fwnode; + return to_pci_sysdata(bus)->fwnode; } #define pci_root_bus_fwnode _pci_root_bus_fwnode #endif +#if IS_ENABLED(CONFIG_VMD) static inline bool is_vmd(struct pci_bus *bus) { -#if IS_ENABLED(CONFIG_VMD) - struct pci_sysdata *sd = bus->sysdata; - - return sd->vmd_domain; -#else - return false; -#endif + return to_pci_sysdata(bus)->vmd_dev != NULL; } +#else +#define is_vmd(bus) false +#endif /* CONFIG_VMD */ /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes @@ -118,11 +115,6 @@ void native_restore_msi_irqs(struct pci_dev *dev); #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif -#endif /* __KERNEL__ */ - -#ifdef CONFIG_X86_64 -#include <asm/pci_64.h> -#endif /* generic pci stuff */ #include <asm-generic/pci.h> @@ -131,9 +123,7 @@ void native_restore_msi_irqs(struct pci_dev *dev); /* Returns the node based on pci bus */ static inline int __pcibus_to_node(const struct pci_bus *bus) { - const struct pci_sysdata *sd = bus->sysdata; - - return sd->node; + return to_pci_sysdata(bus)->node; } static inline const struct cpumask * diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h deleted file mode 100644 index f5411de0ae11..000000000000 --- a/arch/x86/include/asm/pci_64.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PCI_64_H -#define _ASM_X86_PCI_64_H - -#ifdef __KERNEL__ - -#ifdef CONFIG_CALGARY_IOMMU -static inline void *pci_iommu(struct pci_bus *bus) -{ - struct pci_sysdata *sd = bus->sysdata; - return sd->iommu; -} - -static inline void set_pci_iommu(struct pci_bus *bus, void *val) -{ - struct pci_sysdata *sd = bus->sysdata; - sd->iommu = val; -} -#endif /* CONFIG_CALGARY_IOMMU */ - -extern int (*pci_config_read)(int seg, int bus, int dev, int fn, - int reg, int len, u32 *value); -extern int (*pci_config_write)(int seg, int bus, int dev, int fn, - int reg, int len, u32 value); - -#endif /* __KERNEL__ */ - -#endif /* _ASM_X86_PCI_64_H */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 1392d5e6e8d6..29964b0e1075 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -252,16 +252,20 @@ struct pebs_lbr { #define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) #define IBSCTL_LVT_OFFSET_MASK 0x0F -/* ibs fetch bits/masks */ +/* IBS fetch bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT 0xFFFF0000ULL #define IBS_FETCH_MAX_CNT 0x0000FFFFULL -/* ibs op bits/masks */ -/* lower 4 bits of the current count are ignored: */ -#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32) +/* + * IBS op bits/masks + * The lower 7 bits of the current count are random bits + * preloaded by hardware and ignored in software + */ +#define IBS_OP_CUR_CNT (0xFFF80ULL<<32) +#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32) #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) @@ -318,17 +322,10 @@ struct perf_guest_switch_msr { u64 host, guest; }; -extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); extern void perf_check_microcode(void); extern int x86_perf_rdpmc_index(struct perf_event *event); #else -static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) -{ - *nr = 0; - return NULL; -} - static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) { memset(cap, 0, sizeof(*cap)); @@ -338,8 +335,23 @@ static inline void perf_events_lapic_init(void) { } static inline void perf_check_microcode(void) { } #endif +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) +extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); +#else +static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) +{ + *nr = 0; + return NULL; +} +#endif + #ifdef CONFIG_CPU_SUP_INTEL extern void intel_pt_handle_vmx(int on); +#else +static inline void intel_pt_handle_vmx(int on) +{ + +} #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index e3633795fb22..5afb5e0fe903 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -36,39 +36,41 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) #define pmd_read_atomic pmd_read_atomic /* - * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with - * a "*pmdp" dereference done by gcc. Problem is, in certain places - * where pte_offset_map_lock is called, concurrent page faults are + * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by GCC. Problem is, in certain places + * where pte_offset_map_lock() is called, concurrent page faults are * allowed, if the mmap_sem is hold for reading. An example is mincore * vs page faults vs MADV_DONTNEED. On the page fault side - * pmd_populate rightfully does a set_64bit, but if we're reading the + * pmd_populate() rightfully does a set_64bit(), but if we're reading the * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen - * because gcc will not read the 64bit of the pmd atomically. To fix - * this all places running pmd_offset_map_lock() while holding the + * because GCC will not read the 64-bit value of the pmd atomically. + * + * To fix this all places running pte_offset_map_lock() while holding the * mmap_sem in read mode, shall read the pmdp pointer using this - * function to know if the pmd is null nor not, and in turn to know if - * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd + * function to know if the pmd is null or not, and in turn to know if + * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd * operations. * - * Without THP if the mmap_sem is hold for reading, the pmd can only - * transition from null to not null while pmd_read_atomic runs. So + * Without THP if the mmap_sem is held for reading, the pmd can only + * transition from null to not null while pmd_read_atomic() runs. So * we can always return atomic pmd values with this function. * - * With THP if the mmap_sem is hold for reading, the pmd can become + * With THP if the mmap_sem is held for reading, the pmd can become * trans_huge or none or point to a pte (and in turn become "stable") - * at any time under pmd_read_atomic. We could read it really - * atomically here with a atomic64_read for the THP enabled case (and + * at any time under pmd_read_atomic(). We could read it truly + * atomically here with an atomic64_read() for the THP enabled case (and * it would be a whole lot simpler), but to avoid using cmpxchg8b we * only return an atomic pmdval if the low part of the pmdval is later - * found stable (i.e. pointing to a pte). And we're returning a none - * pmdval if the low part of the pmd is none. In some cases the high - * and low part of the pmdval returned may not be consistent if THP is - * enabled (the low part may point to previously mapped hugepage, - * while the high part may point to a more recently mapped hugepage), - * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part - * of the pmd to be read atomically to decide if the pmd is unstable - * or not, with the only exception of when the low part of the pmd is - * zero in which case we return a none pmd. + * found to be stable (i.e. pointing to a pte). We are also returning a + * 'none' (zero) pmdval if the low part of the pmd is zero. + * + * In some cases the high and low part of the pmdval returned may not be + * consistent if THP is enabled (the low part may point to previously + * mapped hugepage, while the high part may point to a more recently + * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only + * needs the low part of the pmd to be read atomically to decide if the + * pmd is unstable or not, with the only exception when the low part + * of the pmd is zero, in which case we return a 'none' pmd. */ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0bc530c4eb13..7e118660bbd9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -29,8 +29,9 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); -void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); -void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); +void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, + bool user); void ptdump_walk_pgd_level_checkwx(void); void ptdump_walk_user_pgd_level_checkwx(void); @@ -239,6 +240,7 @@ static inline unsigned long pgd_pfn(pgd_t pgd) return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +#define p4d_leaf p4d_large static inline int p4d_large(p4d_t p4d) { /* No 512 GiB pages yet */ @@ -247,6 +249,7 @@ static inline int p4d_large(p4d_t p4d) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) +#define pmd_leaf pmd_large static inline int pmd_large(pmd_t pte) { return pmd_flags(pte) & _PAGE_PSE; @@ -874,6 +877,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); } +#define pud_leaf pud_large static inline int pud_large(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == @@ -885,6 +889,7 @@ static inline int pud_bad(pud_t pud) return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } #else +#define pud_leaf pud_large static inline int pud_large(pud_t pud) { return 0; @@ -1233,6 +1238,7 @@ static inline bool pgdp_maps_userspace(void *__ptr) return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); } +#define pgd_leaf pgd_large static inline int pgd_large(pgd_t pgd) { return 0; } #ifdef CONFIG_PAGE_TABLE_ISOLATION @@ -1463,6 +1469,12 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } +#define arch_faults_on_old_pte arch_faults_on_old_pte +static inline bool arch_faults_on_old_pte(void) +{ + return false; +} + #include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index c78da8eda8f2..0dca7f7aeff2 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -29,8 +29,6 @@ extern pgd_t swapper_pg_dir[1024]; extern pgd_t initial_page_table[1024]; extern pmd_t initial_pg_pmd[]; -static inline void pgtable_cache_init(void) { } -static inline void check_pgt_cache(void) { } void paging_init(void); void sync_initial_page_table(void); diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h new file mode 100644 index 000000000000..b6355416a15a --- /dev/null +++ b/arch/x86/include/asm/pgtable_32_areas.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_PGTABLE_32_AREAS_H +#define _ASM_X86_PGTABLE_32_AREAS_H + +#include <asm/cpu_entry_area.h> + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) + +#ifndef __ASSEMBLY__ +extern bool __vmalloc_start_set; /* set once high_memory is set */ +#endif + +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE)) + +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) + +#define LDT_BASE_ADDR \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) + +#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) + +#define PKMAP_BASE \ + ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) + +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#else +# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) +#endif + +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + +#endif /* _ASM_X86_PGTABLE_32_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index b0bc0fff5f1f..5356a46b0373 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PGTABLE_32_DEFS_H -#define _ASM_X86_PGTABLE_32_DEFS_H +#ifndef _ASM_X86_PGTABLE_32_TYPES_H +#define _ASM_X86_PGTABLE_32_TYPES_H /* * The Linux x86 paging architecture is 'compile-time dual-mode', it @@ -20,54 +20,4 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* Just any arbitrary offset to the start of the vmalloc VM area: the - * current 8MB value just means that there will be a 8MB "hole" after the - * physical memory until the kernel virtual memory starts. That means that - * any out-of-bounds memory accesses will hopefully be caught. - * The vmalloc() routines leaves a hole of 4kB between each vmalloced - * area for the same reason. ;) - */ -#define VMALLOC_OFFSET (8 * 1024 * 1024) - -#ifndef __ASSEMBLY__ -extern bool __vmalloc_start_set; /* set once high_memory is set */ -#endif - -#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -/* - * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c - * to avoid include recursion hell - */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) - -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ - & PMD_MASK) - -#define LDT_BASE_ADDR \ - ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) - -#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) - -#define PKMAP_BASE \ - ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) - -#ifdef CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) -#else -# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) -#endif - -#define MODULES_VADDR VMALLOC_START -#define MODULES_END VMALLOC_END -#define MODULES_LEN (MODULES_VADDR - MODULES_END) - -#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) - -#endif /* _ASM_X86_PGTABLE_32_DEFS_H */ +#endif /* _ASM_X86_PGTABLE_32_TYPES_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 4990d26dfc73..0b6c4042942a 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -241,9 +241,6 @@ extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#define pgtable_cache_init() do { } while (0) -#define check_pgt_cache() do { } while (0) - #define PAGE_AGP PAGE_KERNEL_NOCACHE #define HAVE_PAGE_AGP 1 diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h new file mode 100644 index 000000000000..d34cce1b995c --- /dev/null +++ b/arch/x86/include/asm/pgtable_areas.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_PGTABLE_AREAS_H +#define _ASM_X86_PGTABLE_AREAS_H + +#ifdef CONFIG_X86_32 +# include <asm/pgtable_32_areas.h> +#endif + +/* Single page reserved for the readonly IDT mapping: */ +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) + +#endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b5e49e6bac63..0239998d8cdc 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -110,11 +110,6 @@ #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\ - _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_ACCESSED | _PAGE_DIRTY) - /* * Set of bits not changed in pte_modify. The pte's * protection key is treated like _PAGE_RW, for @@ -136,80 +131,93 @@ */ #ifndef __ASSEMBLY__ enum page_cache_mode { - _PAGE_CACHE_MODE_WB = 0, - _PAGE_CACHE_MODE_WC = 1, + _PAGE_CACHE_MODE_WB = 0, + _PAGE_CACHE_MODE_WC = 1, _PAGE_CACHE_MODE_UC_MINUS = 2, - _PAGE_CACHE_MODE_UC = 3, - _PAGE_CACHE_MODE_WT = 4, - _PAGE_CACHE_MODE_WP = 5, - _PAGE_CACHE_MODE_NUM = 8 + _PAGE_CACHE_MODE_UC = 3, + _PAGE_CACHE_MODE_WT = 4, + _PAGE_CACHE_MODE_WP = 5, + + _PAGE_CACHE_MODE_NUM = 8 }; #endif -#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) -#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) -#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) +#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) - -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) -#define PAGE_COPY PAGE_COPY_NOEXEC -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) - -#define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) -#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) - -#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) -#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) -#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP) - -#define __PAGE_KERNEL_IO (__PAGE_KERNEL) -#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) +#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) -#ifndef __ASSEMBLY__ +#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) +#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) -#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) +#define __PP _PAGE_PRESENT +#define __RW _PAGE_RW +#define _USR _PAGE_USER +#define ___A _PAGE_ACCESSED +#define ___D _PAGE_DIRTY +#define ___G _PAGE_GLOBAL +#define __NX _PAGE_NX + +#define _ENC _PAGE_ENC +#define __WP _PAGE_CACHE_WP +#define __NC _PAGE_NOCACHE +#define _PSE _PAGE_PSE + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pg(x) __pgprot(x) + +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) + +#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G) +#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0) +#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) + +#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) +#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) +#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) +#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) +#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) +#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) +#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) +#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) +#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) +#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) + + +#define __PAGE_KERNEL_IO __PAGE_KERNEL +#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ - _PAGE_DIRTY | _PAGE_ENC) -#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) -#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) -#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) +#ifndef __ASSEMBLY__ -#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL) -#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP) +#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC) +#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC) +#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0) +#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0) -#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask) +#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask) -#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC) -#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL) -#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) -#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) -#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) +#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC) +#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0) +#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) +#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) +#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) +#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC) +#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) +#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) +#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) +#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC) -#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO) -#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE) +#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE) #endif /* __ASSEMBLY__ */ @@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte) return native_pte_val(pte) & PTE_FLAGS_MASK; } -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; extern uint8_t __pte2cachemode_tbl[8]; @@ -561,6 +566,10 @@ static inline void update_page_count(int level, unsigned long pages) { } extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, unsigned int *level); + +struct mm_struct; +extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address, + unsigned int *level); extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 99a7fa9ab0a3..3d4cb83a8828 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -102,7 +102,7 @@ static __always_inline bool should_resched(int preempt_offset) return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION extern asmlinkage void ___preempt_schedule(void); # define __preempt_schedule() \ asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6e0a3b43d027..09705ccc393c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -7,6 +7,7 @@ /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; +struct io_bitmap; struct vm86; #include <asm/math_emu.h> @@ -24,6 +25,7 @@ struct vm86; #include <asm/special_insns.h> #include <asm/fpu/types.h> #include <asm/unwind_hints.h> +#include <asm/vmxfeatures.h> #include <linux/personality.h> #include <linux/cache.h> @@ -84,6 +86,9 @@ struct cpuinfo_x86 { /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; #endif +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + __u32 vmx_capability[NVMXINTS]; +#endif __u8 x86_virt_bits; __u8 x86_phys_bits; /* CPUID returned core id bits: */ @@ -93,7 +98,15 @@ struct cpuinfo_x86 { __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; - __u32 x86_capability[NCAPINTS + NBUGINTS]; + /* + * Align to size of unsigned long because the x86_capability array + * is passed to bitops which require the alignment. Use unnamed + * union to enforce the array is aligned to size of unsigned long. + */ + union { + __u32 x86_capability[NCAPINTS + NBUGINTS]; + unsigned long x86_capability_alignment; + }; char x86_vendor_id[16]; char x86_model_id[64]; /* in KB - valid for CPUS which support this call: */ @@ -157,7 +170,6 @@ enum cpuid_regs_idx { extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; -extern struct x86_hw_tss doublefault_tss; extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; @@ -328,10 +340,32 @@ struct x86_hw_tss { * IO-bitmap sizes: */ #define IO_BITMAP_BITS 65536 -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) -#define INVALID_IO_BITMAP_OFFSET 0x8000 +#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) + +#define IO_BITMAP_OFFSET_VALID_MAP \ + (offsetof(struct tss_struct, io_bitmap.bitmap) - \ + offsetof(struct tss_struct, x86_tss)) + +#define IO_BITMAP_OFFSET_VALID_ALL \ + (offsetof(struct tss_struct, io_bitmap.mapall) - \ + offsetof(struct tss_struct, x86_tss)) + +#ifdef CONFIG_X86_IOPL_IOPERM +/* + * sizeof(unsigned long) coming from an extra "long" at the end of the + * iobitmap. The limit is inclusive, i.e. the last valid byte. + */ +# define __KERNEL_TSS_LIMIT \ + (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ + sizeof(unsigned long) - 1) +#else +# define __KERNEL_TSS_LIMIT \ + (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) +#endif + +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ +#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { unsigned long words[64]; @@ -341,13 +375,21 @@ struct entry_stack_page { struct entry_stack stack; } __aligned(PAGE_SIZE); -struct tss_struct { +/* + * All IO bitmap related data stored in the TSS: + */ +struct x86_io_bitmap { + /* The sequence number of the last active bitmap. */ + u64 prev_sequence; + /* - * The fixed hardware portion. This must not cross a page boundary - * at risk of violating the SDM's advice and potentially triggering - * errata. + * Store the dirty size of the last io bitmap offender. The next + * one will have to do the cleanup as the switch out to a non io + * bitmap user will just set x86_tss.io_bitmap_base to a value + * outside of the TSS limit. So for sane tasks there is no need to + * actually touch the io_bitmap at all. */ - struct x86_hw_tss x86_tss; + unsigned int prev_max; /* * The extra 1 is there because the CPU will access an @@ -355,21 +397,28 @@ struct tss_struct { * bitmap. The extra byte must be all 1 bits, and must * be within the limit. */ - unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; + unsigned long bitmap[IO_BITMAP_LONGS + 1]; + + /* + * Special I/O bitmap to emulate IOPL(3). All bytes zero, + * except the additional byte at the end. + */ + unsigned long mapall[IO_BITMAP_LONGS + 1]; +}; + +struct tss_struct { + /* + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering + * errata. + */ + struct x86_hw_tss x86_tss; + + struct x86_io_bitmap io_bitmap; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); -/* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte - */ -#define __KERNEL_TSS_LIMIT \ - (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) - /* Per CPU interrupt stacks */ struct irq_stack { char stack[IRQ_STACK_SIZE]; @@ -480,10 +529,14 @@ struct thread_struct { struct vm86 *vm86; #endif /* IO permissions: */ - unsigned long *io_bitmap_ptr; - unsigned long iopl; - /* Max allowed port in the bitmap, in bytes: */ - unsigned io_bitmap_max; + struct io_bitmap *io_bitmap; + + /* + * IOPL. Priviledge level dependent I/O permission which is + * emulated via the I/O bitmap to prevent user space from disabling + * interrupts. + */ + unsigned long iopl_emul; mm_segment_t addr_limit; @@ -515,25 +568,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -/* - * Set IOPL bits in EFLAGS from given mask - */ -static inline void native_set_iopl_mask(unsigned mask) -{ -#ifdef CONFIG_X86_32 - unsigned int reg; - - asm volatile ("pushfl;" - "popl %0;" - "andl %1, %0;" - "orl %2, %0;" - "pushl %0;" - "popfl" - : "=&r" (reg) - : "i" (~X86_EFLAGS_IOPL), "r" (mask)); -#endif -} - static inline void native_load_sp0(unsigned long sp0) { @@ -573,7 +607,6 @@ static inline void load_sp0(unsigned long sp0) native_load_sp0(sp0); } -#define set_iopl_mask native_set_iopl_mask #endif /* CONFIG_PARAVIRT_XXL */ /* Free all resources held by a thread. */ @@ -841,7 +874,6 @@ static inline void spin_lock_prefetch(const void *x) #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ - .io_bitmap_ptr = NULL, \ .addr_limit = KERNEL_DS, \ } @@ -915,24 +947,6 @@ extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); -/* Register/unregister a process' MPX related resource */ -#define MPX_ENABLE_MANAGEMENT() mpx_enable_management() -#define MPX_DISABLE_MANAGEMENT() mpx_disable_management() - -#ifdef CONFIG_X86_INTEL_MPX -extern int mpx_enable_management(void); -extern int mpx_disable_management(void); -#else -static inline int mpx_enable_management(void) -{ - return -EINVAL; -} -static inline int mpx_disable_management(void) -{ - return -EINVAL; -} -#endif /* CONFIG_X86_INTEL_MPX */ - #ifdef CONFIG_CPU_SUP_AMD extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); @@ -958,7 +972,7 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) extern unsigned long arch_align_stack(unsigned long sp); void free_init_pages(const char *what, unsigned long begin, unsigned long end); -extern void free_kernel_image_pages(void *begin, void *end); +extern void free_kernel_image_pages(const char *what, void *begin, void *end); void default_idle(void); #ifdef CONFIG_XEN @@ -968,7 +982,6 @@ bool xen_set_default_idle(void); #endif void stop_this_cpu(void *dummy); -void df_debug(struct pt_regs *regs, long error_code); void microcode_check(void); enum l1tf_mitigations { diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h index 5df09a0b80b8..07375b476c4f 100644 --- a/arch/x86/include/asm/pti.h +++ b/arch/x86/include/asm/pti.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PTI_H #define _ASM_X86_PTI_H #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 332eb3525867..6d6475fdd327 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs) #endif } +/* + * Determine whether the register set came from any context that is running in + * 64-bit mode. + */ +static inline bool any_64bit_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_64 + return !user_mode(regs) || user_64bit_mode(regs); +#else + return false; +#endif +} + #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp @@ -339,27 +352,17 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, #define ARCH_HAS_USER_SINGLE_STEP_REPORT -/* - * When hitting ptrace_stop(), we cannot return using SYSRET because - * that does not restore the full CPU state, only a minimal set. The - * ptracer can change arbitrary register values, which is usually okay - * because the usual ptrace stops run off the signal delivery path which - * forces IRET; however, ptrace_event() stops happen in arbitrary places - * in the kernel and don't force IRET path. - * - * So force IRET path after a ptrace stop. - */ -#define arch_ptrace_stop_needed(code, info) \ -({ \ - force_iret(); \ - false; \ -}) - struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); +#ifdef CONFIG_X86_64 +# define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t) +#else +# define do_set_thread_area_64(p, s, t) (0) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h index 92c34e517da1..5528e9325049 100644 --- a/arch/x86/include/asm/purgatory.h +++ b/arch/x86/include/asm/purgatory.h @@ -6,16 +6,6 @@ #include <linux/purgatory.h> extern void purgatory(void); -/* - * These forward declarations serve two purposes: - * - * 1) Make sparse happy when checking arch/purgatory - * 2) Document that these are required to be global so the symbol - * lookup in kexec works - */ -extern unsigned long purgatory_backup_dest; -extern unsigned long purgatory_backup_src; -extern unsigned long purgatory_backup_sz; #endif /* __ASSEMBLY__ */ #endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index bd5ac6cc37db..444d6fd9a6d8 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -63,10 +63,25 @@ static inline bool vcpu_is_preempted(long cpu) #endif #ifdef CONFIG_PARAVIRT +/* + * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack. + * + * Native (and PV wanting native due to vCPU pinning) should disable this key. + * It is done in this backwards fashion to only have a single direction change, + * which removes ordering between native_pv_spin_init() and HV setup. + */ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); void native_pv_lock_init(void) __init; +/* + * Shortcut for the queued_spin_lock_slowpath() function that allows + * virt to hijack it. + * + * Returns: + * true - lock has been negotiated, all done; + * false - queued_spin_lock_slowpath() will do its thing. + */ #define virt_spin_lock virt_spin_lock static inline bool virt_spin_lock(struct qspinlock *lock) { diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index c53682303c9c..b35030eeec36 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -14,13 +14,12 @@ #include <linux/types.h> #include <asm/io.h> -/* This must match data at realmode.S */ +/* This must match data at realmode/rm/header.S */ struct real_mode_header { u32 text_start; u32 ro_end; /* SMP trampoline */ u32 trampoline_start; - u32 trampoline_status; u32 trampoline_header; #ifdef CONFIG_X86_64 u32 trampoline_pgd; @@ -37,7 +36,7 @@ struct real_mode_header { #endif }; -/* This must match data at trampoline_32/64.S */ +/* This must match data at realmode/rm/trampoline_{32,64}.S */ struct trampoline_header { #ifdef CONFIG_X86_32 u32 start; diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h deleted file mode 100644 index 232f856e0db0..000000000000 --- a/arch/x86/include/asm/refcount.h +++ /dev/null @@ -1,126 +0,0 @@ -#ifndef __ASM_X86_REFCOUNT_H -#define __ASM_X86_REFCOUNT_H -/* - * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from - * PaX/grsecurity. - */ -#include <linux/refcount.h> -#include <asm/bug.h> - -/* - * This is the first portion of the refcount error handling, which lives in - * .text.unlikely, and is jumped to from the CPU flag check (in the - * following macros). This saves the refcount value location into CX for - * the exception handler to use (in mm/extable.c), and then triggers the - * central refcount exception. The fixup address for the exception points - * back to the regular execution flow in .text. - */ -#define _REFCOUNT_EXCEPTION \ - ".pushsection .text..refcount\n" \ - "111:\tlea %[var], %%" _ASM_CX "\n" \ - "112:\t" ASM_UD2 "\n" \ - ASM_UNREACHABLE \ - ".popsection\n" \ - "113:\n" \ - _ASM_EXTABLE_REFCOUNT(112b, 113b) - -/* Trigger refcount exception if refcount result is negative. */ -#define REFCOUNT_CHECK_LT_ZERO \ - "js 111f\n\t" \ - _REFCOUNT_EXCEPTION - -/* Trigger refcount exception if refcount result is zero or negative. */ -#define REFCOUNT_CHECK_LE_ZERO \ - "jz 111f\n\t" \ - REFCOUNT_CHECK_LT_ZERO - -/* Trigger refcount exception unconditionally. */ -#define REFCOUNT_ERROR \ - "jmp 111f\n\t" \ - _REFCOUNT_EXCEPTION - -static __always_inline void refcount_add(unsigned int i, refcount_t *r) -{ - asm volatile(LOCK_PREFIX "addl %1,%0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : "ir" (i) - : "cc", "cx"); -} - -static __always_inline void refcount_inc(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "incl %0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline void refcount_dec(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "decl %0\n\t" - REFCOUNT_CHECK_LE_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline __must_check -bool refcount_sub_and_test(unsigned int i, refcount_t *r) -{ - bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "er", i, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; -} - -static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) -{ - bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; -} - -static __always_inline __must_check -bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ - int c, result; - - c = atomic_read(&(r->refs)); - do { - if (unlikely(c == 0)) - return false; - - result = c + i; - - /* Did we try to increment from/to an undesirable state? */ - if (unlikely(c < 0 || c == INT_MAX || result < c)) { - asm volatile(REFCOUNT_ERROR - : : [var] "m" (r->refs.counter) - : "cc", "cx"); - break; - } - - } while (!atomic_try_cmpxchg(&(r->refs), &c, result)); - - return c != 0; -} - -static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) -{ - return refcount_add_not_zero(1, r); -} - -#endif diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h deleted file mode 100644 index 0a21986d2238..000000000000 --- a/arch/x86/include/asm/rio.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Derived from include/asm-x86/mach-summit/mach_mpparse.h - * and include/asm-x86/mach-default/bios_ebda.h - * - * Author: Laurent Vivier <Laurent.Vivier@bull.net> - */ - -#ifndef _ASM_X86_RIO_H -#define _ASM_X86_RIO_H - -#define RIO_TABLE_VERSION 3 - -struct rio_table_hdr { - u8 version; /* Version number of this data structure */ - u8 num_scal_dev; /* # of Scalability devices */ - u8 num_rio_dev; /* # of RIO I/O devices */ -} __attribute__((packed)); - -struct scal_detail { - u8 node_id; /* Scalability Node ID */ - u32 CBAR; /* Address of 1MB register space */ - u8 port0node; /* Node ID port connected to: 0xFF=None */ - u8 port0port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 port1node; /* Node ID port connected to: 0xFF = None */ - u8 port1port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 port2node; /* Node ID port connected to: 0xFF = None */ - u8 port2port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 chassis_num; /* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { - u8 node_id; /* RIO Node ID */ - u32 BBAR; /* Address of 1MB register space */ - u8 type; /* Type of device */ - u8 owner_id; /* Node ID of Hurricane that owns this */ - /* node */ - u8 port0node; /* Node ID port connected to: 0xFF=None */ - u8 port0port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 port1node; /* Node ID port connected to: 0xFF=None */ - u8 port1port; /* Port num port connected to: 0,1,2, or */ - /* 0xFF=None */ - u8 first_slot; /* Lowest slot number below this Calgary */ - u8 status; /* Bit 0 = 1 : the XAPIC is used */ - /* = 0 : the XAPIC is not used, ie: */ - /* ints fwded to another XAPIC */ - /* Bits1:7 Reserved */ - u8 WP_index; /* instance index - lower ones have */ - /* lower slot numbers/PCI bus numbers */ - u8 chassis_num; /* 1 based Chassis number */ -} __attribute__((packed)); - -enum { - HURR_SCALABILTY = 0, /* Hurricane Scalability info */ - HURR_RIOIB = 2, /* Hurricane RIOIB info */ - COMPAT_CALGARY = 4, /* Compatibility Calgary */ - ALT_CALGARY = 5, /* Second Planar Calgary */ -}; - -#endif /* _ASM_X86_RIO_H */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 71b32f2570ab..036c360910c5 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -6,7 +6,6 @@ #include <asm/extable.h> extern char __brk_base[], __brk_limit[]; -extern struct exception_table_entry __stop___ex_table[]; extern char __end_rodata_aligned[]; #if defined(CONFIG_X86_64) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index ac3892920419..6669164abadc 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -31,6 +31,18 @@ */ #define SEGMENT_RPL_MASK 0x3 +/* + * When running on Xen PV, the actual privilege level of the kernel is 1, + * not 0. Testing the Requested Privilege Level in a segment selector to + * determine whether the context is user mode or kernel mode with + * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level + * matches the 0x3 mask. + * + * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV + * kernels because privilege level 2 is never used. + */ +#define USER_SEGMENT_RPL_MASK 0x2 + /* User mode is privilege level 3: */ #define USER_RPL 0x3 diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ae7b909dc242..64c3dce374e5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -40,7 +40,6 @@ int _set_memory_wt(unsigned long addr, int numpages); int _set_memory_wb(unsigned long addr, int numpages); int set_memory_uc(unsigned long addr, int numpages); int set_memory_wc(unsigned long addr, int numpages); -int set_memory_wt(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); @@ -48,11 +47,6 @@ int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); int set_memory_np_noalias(unsigned long addr, int numpages); -int set_memory_array_uc(unsigned long *addr, int addrinarray); -int set_memory_array_wc(unsigned long *addr, int addrinarray); -int set_memory_array_wt(unsigned long *addr, int addrinarray); -int set_memory_array_wb(unsigned long *addr, int addrinarray); - int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); int set_pages_array_wt(struct page **pages, int addrinarray); @@ -80,8 +74,6 @@ int set_pages_array_wb(struct page **pages, int addrinarray); int set_pages_uc(struct page *page, int numpages); int set_pages_wb(struct page *page, int numpages); -int set_pages_x(struct page *page, int numpages); -int set_pages_nx(struct page *page, int numpages); int set_pages_ro(struct page *page, int numpages); int set_pages_rw(struct page *page, int numpages); @@ -89,8 +81,6 @@ int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); extern int kernel_set_to_readonly; -void set_kernel_text_rw(void); -void set_kernel_text_ro(void); #ifdef CONFIG_X86_64 static inline int set_mce_nospec(unsigned long pfn) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index e1356a3b8223..e15f364efbcc 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -143,6 +143,7 @@ void play_dead_common(void); void wbinvd_on_cpu(int cpu); int wbinvd_on_all_cpus(void); +void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 219be88a59d2..6d37b8fcfc77 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -73,20 +73,6 @@ static inline unsigned long native_read_cr4(void) void native_write_cr4(unsigned long val); -#ifdef CONFIG_X86_64 -static inline unsigned long native_read_cr8(void) -{ - unsigned long cr8; - asm volatile("movq %%cr8,%0" : "=r" (cr8)); - return cr8; -} - -static inline void native_write_cr8(unsigned long val) -{ - asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); -} -#endif - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS static inline u32 rdpkru(void) { @@ -200,16 +186,6 @@ static inline void wbinvd(void) #ifdef CONFIG_X86_64 -static inline unsigned long read_cr8(void) -{ - return native_read_cr8(); -} - -static inline void write_cr8(unsigned long x) -{ - native_write_cr8(x); -} - static inline void load_gs_index(unsigned selector) { native_load_gs_index(selector); diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index a7af9f53c0cb..35bb35d28733 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -34,7 +34,7 @@ struct saved_context { */ unsigned long kernelmode_gs_base, usermode_gs_base, fs_base; - unsigned long cr0, cr2, cr3, cr4, cr8; + unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; bool misc_enable_saved; struct saved_msrs saved_msrs; diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index dec9c1e84c78..6ece8561ba66 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -52,6 +52,7 @@ enum { INTERCEPT_MWAIT, INTERCEPT_MWAIT_COND, INTERCEPT_XSETBV, + INTERCEPT_RDPRU, }; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 18a4b6890fa8..0e059b73437b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -103,7 +103,17 @@ static inline void update_task_stack(struct task_struct *task) if (static_cpu_has(X86_FEATURE_XENPV)) load_sp0(task_top_of_stack(task)); #endif +} +static inline void kthread_frame_init(struct inactive_task_frame *frame, + unsigned long fun, unsigned long arg) +{ + frame->bx = fun; +#ifdef CONFIG_X86_32 + frame->di = arg; +#else + frame->r12 = arg; +#endif } #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 2dc4a021beea..8db3fdb6102e 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[]; extern const sys_call_ptr_t ia32_sys_call_table[]; #endif +#ifdef CONFIG_X86_X32_ABI +extern const sys_call_ptr_t x32_sys_call_table[]; +#endif + /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index e046a405743d..e2389ce9bf58 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,6 +6,8 @@ #ifndef _ASM_X86_SYSCALL_WRAPPER_H #define _ASM_X86_SYSCALL_WRAPPER_H +struct pt_regs; + /* Mapping of registers to parameters for syscalls on x86-64 and x32 */ #define SC_X86_64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ @@ -28,13 +30,21 @@ * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this * case as well. */ +#define __IA32_COMPAT_SYS_STUB0(x, name) \ + asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO); \ + asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys_##name(); \ + } + #define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\ { \ return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + } #define __IA32_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \ @@ -48,16 +58,23 @@ * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias * named __ia32_sys_*() */ -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ - asmlinkage long __x64_sys_##sname(void) -#define COND_SYSCALL(name) \ - cond_syscall(__x64_sys_##name); \ - cond_syscall(__ia32_sys_##name) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) + +#define COND_SYSCALL(name) \ + asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ + { \ + return sys_ni_syscall(); \ + } \ + asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\ + { \ + return sys_ni_syscall(); \ + } #define SYS_NI(name) \ SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \ @@ -75,15 +92,24 @@ * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common * with x86_64 obviously do not need such care. */ +#define __X32_COMPAT_SYS_STUB0(x, name, ...) \ + asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO); \ + asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys_##name();\ + } + #define __X32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\ { \ return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + } #else /* CONFIG_X86_X32 */ +#define __X32_COMPAT_SYS_STUB0(x, name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #endif /* CONFIG_X86_X32 */ @@ -94,6 +120,17 @@ * mapping of registers to parameters, we need to generate stubs for each * of them. */ +#define COMPAT_SYSCALL_DEFINE0(name) \ + static long __se_compat_sys_##name(void); \ + static inline long __do_compat_sys_##name(void); \ + __IA32_COMPAT_SYS_STUB0(x, name) \ + __X32_COMPAT_SYS_STUB0(x, name) \ + static long __se_compat_sys_##name(void) \ + { \ + return __do_compat_sys_##name(); \ + } \ + static inline long __do_compat_sys_##name(void) + #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ @@ -181,15 +218,19 @@ * macros to work correctly. */ #ifndef SYSCALL_DEFINE0 -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - asmlinkage long __x64_sys_##sname(void) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) #endif #ifndef COND_SYSCALL -#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name) +#define COND_SYSCALL(name) \ + asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ + { \ + return sys_ni_syscall(); \ + } #endif #ifndef SYS_NI @@ -201,7 +242,6 @@ * For VSYSCALLS, we need to declare these three syscalls with the new * pt_regs-based calling convention for in-kernel use. */ -struct pt_regs; asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs); asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs); asmlinkage long __x64_sys_time(const struct pt_regs *regs); diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h deleted file mode 100644 index 6ed2deacf1d0..000000000000 --- a/arch/x86/include/asm/tce.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file is derived from asm-powerpc/tce.h. - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - * Author: Jon Mason <jdmason@us.ibm.com> - */ - -#ifndef _ASM_X86_TCE_H -#define _ASM_X86_TCE_H - -extern unsigned int specified_table_size; -struct iommu_table; - -#define TCE_ENTRY_SIZE 8 /* in bytes */ - -#define TCE_READ_SHIFT 0 -#define TCE_WRITE_SHIFT 1 -#define TCE_HUBID_SHIFT 2 /* unused */ -#define TCE_RSVD_SHIFT 8 /* unused */ -#define TCE_RPN_SHIFT 12 -#define TCE_UNUSED_SHIFT 48 /* unused */ - -#define TCE_RPN_MASK 0x0000fffffffff000ULL - -extern void tce_build(struct iommu_table *tbl, unsigned long index, - unsigned int npages, unsigned long uaddr, int direction); -extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages); -extern void * __init alloc_tce_table(void); -extern void __init free_tce_table(void *tbl); -extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar); - -#endif /* _ASM_X86_TCE_H */ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 70c09967a999..67315fa3956a 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -25,13 +25,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, */ #define POKE_MAX_OPCODE_SIZE 5 -struct text_poke_loc { - void *detour; - void *addr; - size_t len; - const char opcode[POKE_MAX_OPCODE_SIZE]; -}; - extern void text_poke_early(void *addr, const void *opcode, size_t len); /* @@ -45,14 +38,81 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); * no thread can be preempted in the instructions being modified (no iret to an * invalid instruction possible) or if the instructions are changed from a * consistent state to another consistent state atomically. - * On the local CPU you need to be protected again NMI or MCE handlers seeing an - * inconsistent instruction while you patch. + * On the local CPU you need to be protected against NMI or MCE handlers seeing + * an inconsistent instruction while you patch. */ extern void *text_poke(void *addr, const void *opcode, size_t len); +extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); -extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); -extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); +extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); + +extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate); +extern void text_poke_finish(void); + +#define INT3_INSN_SIZE 1 +#define INT3_INSN_OPCODE 0xCC + +#define CALL_INSN_SIZE 5 +#define CALL_INSN_OPCODE 0xE8 + +#define JMP32_INSN_SIZE 5 +#define JMP32_INSN_OPCODE 0xE9 + +#define JMP8_INSN_SIZE 2 +#define JMP8_INSN_OPCODE 0xEB + +#define DISP32_SIZE 4 + +static inline int text_opcode_size(u8 opcode) +{ + int size = 0; + +#define __CASE(insn) \ + case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break + + switch(opcode) { + __CASE(INT3); + __CASE(CALL); + __CASE(JMP32); + __CASE(JMP8); + } + +#undef __CASE + + return size; +} + +union text_poke_insn { + u8 text[POKE_MAX_OPCODE_SIZE]; + struct { + u8 opcode; + s32 disp; + } __attribute__((packed)); +}; + +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + int size = text_opcode_size(opcode); + + insn.opcode = opcode; + + if (size > 1) { + insn.disp = (long)dest - (long)(addr + size); + if (size == 2) { + /* + * Ensure that for JMP9 the displacement + * actually fits the signed byte. + */ + BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); + } + } + + return &insn.text; +} + extern int after_bootmem; extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init unsigned long poking_addr; @@ -63,9 +123,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } -#define INT3_INSN_SIZE 1 -#define CALL_INSN_SIZE 5 - static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { /* @@ -73,6 +130,9 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) * stack where the break point happened, and the saving of * pt_regs. We can extend the original stack because of * this gap. See the idtentry macro's create_gap option. + * + * Similarly entry_32.S will have a gap on the stack for (any) hardware + * exception and pt_regs; see FIXUP_FRAME. */ regs->sp -= sizeof(unsigned long); *(unsigned long *)regs->sp = val; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f9453536f9bb..cf4327986e98 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -143,8 +143,8 @@ struct thread_info { _TIF_NOHZ) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW_BASE \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ +#define _TIF_WORK_CTXSW_BASE \ + (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* @@ -156,8 +156,14 @@ struct thread_info { # define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) #endif -#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +#ifdef CONFIG_X86_IOPL_IOPERM +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ + _TIF_IO_BITMAP) +#else +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY) +#endif + +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define STACK_WARN (THREAD_SIZE/8) @@ -233,15 +239,6 @@ static inline int arch_within_stack_frames(const void * const stack, current_thread_info()->status & TS_COMPAT) #endif -/* - * Force syscall return via IRET by making it look as if there was - * some work pending. IRET is our most capable (but slowest) syscall - * return path, which is able to restore modified SS, CS and certain - * EFLAGS values that other (fast) syscall return instructions - * are not able to restore properly. - */ -#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME) - extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index f23e7aaff4cd..820082bd6880 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -29,8 +29,8 @@ static inline void tlb_flush(struct mmu_gather *tlb) * shootdown, enablement code for several hypervisors overrides * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing * a hypercall. To keep software pagetable walkers safe in this case we - * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment - * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h + * switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the comment + * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h * for more details. */ static inline void __tlb_remove_table(void *table) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index dee375831962..6f66d841262d 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -290,26 +290,42 @@ static inline void __cr4_set(unsigned long cr4) } /* Set in this cpu's CR4. */ -static inline void cr4_set_bits(unsigned long mask) +static inline void cr4_set_bits_irqsoff(unsigned long mask) { - unsigned long cr4, flags; + unsigned long cr4; - local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); if ((cr4 | mask) != cr4) __cr4_set(cr4 | mask); - local_irq_restore(flags); } /* Clear in this cpu's CR4. */ -static inline void cr4_clear_bits(unsigned long mask) +static inline void cr4_clear_bits_irqsoff(unsigned long mask) { - unsigned long cr4, flags; + unsigned long cr4; - local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); if ((cr4 & ~mask) != cr4) __cr4_set(cr4 & ~mask); +} + +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits(unsigned long mask) +{ + unsigned long flags; + + local_irq_save(flags); + cr4_set_bits_irqsoff(mask); + local_irq_restore(flags); +} + +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits(unsigned long mask) +{ + unsigned long flags; + + local_irq_save(flags); + cr4_clear_bits_irqsoff(mask); local_irq_restore(flags); } diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h index ace464f09681..4d705cb4d63b 100644 --- a/arch/x86/include/asm/trace/hyperv.h +++ b/arch/x86/include/asm/trace/hyperv.h @@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask, __entry->ncpus, __entry->vector) ); +TRACE_EVENT(hyperv_send_ipi_one, + TP_PROTO(int cpu, + int vector), + TP_ARGS(cpu, vector), + TP_STRUCT__entry( + __field(int, cpu) + __field(int, vector) + ), + TP_fast_assign(__entry->cpu = cpu; + __entry->vector = vector; + ), + TP_printk("cpu %d vector %x", + __entry->cpu, __entry->vector) + ); + #endif /* CONFIG_HYPERV */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h deleted file mode 100644 index 54133017267c..000000000000 --- a/arch/x86/include/asm/trace/mpx.h +++ /dev/null @@ -1,134 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mpx - -#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_MPX_H - -#include <linux/tracepoint.h> - -#ifdef CONFIG_X86_INTEL_MPX - -TRACE_EVENT(mpx_bounds_register_exception, - - TP_PROTO(void __user *addr_referenced, - const struct mpx_bndreg *bndreg), - TP_ARGS(addr_referenced, bndreg), - - TP_STRUCT__entry( - __field(void __user *, addr_referenced) - __field(u64, lower_bound) - __field(u64, upper_bound) - ), - - TP_fast_assign( - __entry->addr_referenced = addr_referenced; - __entry->lower_bound = bndreg->lower_bound; - __entry->upper_bound = bndreg->upper_bound; - ), - /* - * Note that we are printing out the '~' of the upper - * bounds register here. It is actually stored in its - * one's complement form so that its 'init' state - * corresponds to all 0's. But, that looks like - * gibberish when printed out, so print out the 1's - * complement instead of the actual value here. Note - * though that you still need to specify filters for the - * actual value, not the displayed one. - */ - TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx", - __entry->addr_referenced, - __entry->lower_bound, - ~__entry->upper_bound - ) -); - -TRACE_EVENT(bounds_exception_mpx, - - TP_PROTO(const struct mpx_bndcsr *bndcsr), - TP_ARGS(bndcsr), - - TP_STRUCT__entry( - __field(u64, bndcfgu) - __field(u64, bndstatus) - ), - - TP_fast_assign( - /* need to get rid of the 'const' on bndcsr */ - __entry->bndcfgu = (u64)bndcsr->bndcfgu; - __entry->bndstatus = (u64)bndcsr->bndstatus; - ), - - TP_printk("bndcfgu:0x%llx bndstatus:0x%llx", - __entry->bndcfgu, - __entry->bndstatus) -); - -DECLARE_EVENT_CLASS(mpx_range_trace, - - TP_PROTO(unsigned long start, - unsigned long end), - TP_ARGS(start, end), - - TP_STRUCT__entry( - __field(unsigned long, start) - __field(unsigned long, end) - ), - - TP_fast_assign( - __entry->start = start; - __entry->end = end; - ), - - TP_printk("[0x%p:0x%p]", - (void *)__entry->start, - (void *)__entry->end - ) -); - -DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap, - TP_PROTO(unsigned long start, unsigned long end), - TP_ARGS(start, end) -); - -DEFINE_EVENT(mpx_range_trace, mpx_unmap_search, - TP_PROTO(unsigned long start, unsigned long end), - TP_ARGS(start, end) -); - -TRACE_EVENT(mpx_new_bounds_table, - - TP_PROTO(unsigned long table_vaddr), - TP_ARGS(table_vaddr), - - TP_STRUCT__entry( - __field(unsigned long, table_vaddr) - ), - - TP_fast_assign( - __entry->table_vaddr = table_vaddr; - ), - - TP_printk("table vaddr:%p", (void *)__entry->table_vaddr) -); - -#else - -/* - * This gets used outside of MPX-specific code, so we need a stub. - */ -static inline -void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr) -{ -} - -#endif /* CONFIG_X86_INTEL_MPX */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH asm/trace/ -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE mpx -#endif /* _TRACE_MPX_H */ - -/* This part must be outside protection */ -#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index b25e633033c3..ffa0dc8a535e 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -69,6 +69,9 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code); dotraplinkage void do_bounds(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); +#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT) +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); +#endif dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 9c4435307ff8..61d93f062a36 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -444,8 +444,10 @@ __pu_label: \ ({ \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ + __typeof__(ptr) __gu_ptr = (ptr); \ + __typeof__(size) __gu_size = (size); \ __uaccess_begin_nospec(); \ - __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ + __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ __builtin_expect(__gu_err, 0); \ @@ -732,5 +734,28 @@ do { \ if (unlikely(__gu_err)) goto err_label; \ } while (0) +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)src,(type __user *)dst,label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h index db43f2a0d92c..aeed98c3c9e1 100644 --- a/arch/x86/include/asm/umip.h +++ b/arch/x86/include/asm/umip.h @@ -4,9 +4,9 @@ #include <linux/types.h> #include <asm/ptrace.h> -#ifdef CONFIG_X86_INTEL_UMIP +#ifdef CONFIG_X86_UMIP bool fixup_umip_exception(struct pt_regs *regs); #else static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; } -#endif /* CONFIG_X86_INTEL_UMIP */ +#endif /* CONFIG_X86_UMIP */ #endif /* _ASM_X86_UMIP_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 097589753fec..a7dd080749ce 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -5,12 +5,6 @@ #include <uapi/asm/unistd.h> -# ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT)) -# else -# define __SYSCALL_MASK (~0) -# endif - # ifdef CONFIG_X86_32 # include <asm/unistd_32.h> diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 0bcdb1279361..f5e2eb12cb71 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -86,6 +86,14 @@ UNWIND_HINT sp_offset=\sp_offset .endm +.macro UNWIND_HINT_SAVE + UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE + UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +.endm + #else /* !__ASSEMBLY__ */ #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 6e7caf65fa40..389174eaec79 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -138,7 +138,7 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus); -extern void uv_bios_init(void); +extern int uv_bios_init(void); extern unsigned long sn_rtc_cycles_per_second; extern int uv_type; diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index e60c45fd3679..45ea95ce79b4 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -12,13 +12,26 @@ struct mm_struct; #ifdef CONFIG_X86_UV #include <linux/efi.h> +#define UV_PROC_NODE "sgi_uv" + +static inline int uv(int uvtype) +{ + /* uv(0) is "any" */ + if (uvtype >= 0 && uvtype <= 30) + return 1 << uvtype; + return 1; +} + +extern unsigned long uv_systab_phys; + extern enum uv_system_type get_uv_system_type(void); static inline bool is_early_uv_system(void) { - return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab); + return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR; } extern int is_uv_system(void); -extern int is_uv_hubless(void); +extern int is_uv_hubbed(int uvtype); +extern int is_uv_hubless(int uvtype); extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_system_init(void); @@ -30,7 +43,8 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } -static inline int is_uv_hubless(void) { return 0; } +static inline int is_uv_hubbed(int uv) { return 0; } +static inline int is_uv_hubless(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } static inline const struct cpumask * diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 44cf6d6deb7a..950cd1395d5d 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -19,6 +19,7 @@ #include <linux/topology.h> #include <asm/types.h> #include <asm/percpu.h> +#include <asm/uv/uv.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/bios.h> #include <asm/irq_vectors.h> @@ -243,83 +244,61 @@ static inline int uv_hub_info_check(int version) #define UV4_HUB_REVISION_BASE 7 #define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */ -#ifdef UV1_HUB_IS_SUPPORTED +/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */ static inline int is_uv1_hub(void) { - return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE; -} +#ifdef UV1_HUB_IS_SUPPORTED + return is_uv_hubbed(uv(1)); #else -static inline int is_uv1_hub(void) -{ return 0; -} #endif +} -#ifdef UV2_HUB_IS_SUPPORTED static inline int is_uv2_hub(void) { - return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) && - (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE)); -} +#ifdef UV2_HUB_IS_SUPPORTED + return is_uv_hubbed(uv(2)); #else -static inline int is_uv2_hub(void) -{ return 0; -} #endif +} -#ifdef UV3_HUB_IS_SUPPORTED static inline int is_uv3_hub(void) { - return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) && - (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE)); -} +#ifdef UV3_HUB_IS_SUPPORTED + return is_uv_hubbed(uv(3)); #else -static inline int is_uv3_hub(void) -{ return 0; -} #endif +} /* First test "is UV4A", then "is UV4" */ -#ifdef UV4A_HUB_IS_SUPPORTED -static inline int is_uv4a_hub(void) -{ - return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE); -} -#else static inline int is_uv4a_hub(void) { +#ifdef UV4A_HUB_IS_SUPPORTED + if (is_uv_hubbed(uv(4))) + return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE); +#endif return 0; } -#endif -#ifdef UV4_HUB_IS_SUPPORTED static inline int is_uv4_hub(void) { - return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE; -} +#ifdef UV4_HUB_IS_SUPPORTED + return is_uv_hubbed(uv(4)); #else -static inline int is_uv4_hub(void) -{ return 0; -} #endif +} static inline int is_uvx_hub(void) { - if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) - return uv_hub_info->hub_revision; - - return 0; + return (is_uv_hubbed(-2) >= uv(2)); } static inline int is_uv_hub(void) { -#ifdef UV1_HUB_IS_SUPPORTED - return uv_hub_info->hub_revision; -#endif - return is_uvx_hub(); + return is_uv1_hub() || is_uvx_hub(); } union uvh_apicid { diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 230474e2ddb5..bbcdc7b8f963 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -21,6 +21,7 @@ struct vdso_image { long sym_vvar_page; long sym_pvclock_page; long sym_hvclock_page; + long sym_timens_page; long sym_VDSO32_NOTE_MASK; long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index ba71a63cdac4..6ee1f7dba34b 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -21,6 +21,7 @@ #include <clocksource/hyperv_timer.h> #define __vdso_data (VVAR(_vdso_data)) +#define __timens_vdso_data (TIMENS(_vdso_data)) #define VDSO_HAS_TIME 1 @@ -51,11 +52,18 @@ extern struct pvclock_vsyscall_time_info pvclock_page __attribute__((visibility("hidden"))); #endif -#ifdef CONFIG_HYPERV_TSCPAGE +#ifdef CONFIG_HYPERV_TIMER extern struct ms_hyperv_tsc_page hvclock_page __attribute__((visibility("hidden"))); #endif +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return __timens_vdso_data; +} +#endif + #ifndef BUILD_VDSO32 static __always_inline @@ -96,8 +104,6 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #else -#define VDSO_HAS_32BIT_FALLBACK 1 - static __always_inline long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { @@ -228,7 +234,7 @@ static u64 vread_pvclock(void) } #endif -#ifdef CONFIG_HYPERV_TSCPAGE +#ifdef CONFIG_HYPERV_TIMER static u64 vread_hvclock(void) { return hv_read_tsc_page(&hvclock_page); @@ -251,7 +257,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode) return vread_pvclock(); } #endif -#ifdef CONFIG_HYPERV_TSCPAGE +#ifdef CONFIG_HYPERV_TIMER if (clock_mode == VCLOCK_HVCLOCK) { barrier(); return vread_hvclock(); diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h new file mode 100644 index 000000000000..29837740b520 --- /dev/null +++ b/arch/x86/include/asm/vmalloc.h @@ -0,0 +1,6 @@ +#ifndef _ASM_X86_VMALLOC_H +#define _ASM_X86_VMALLOC_H + +#include <asm/pgtable_areas.h> + +#endif /* _ASM_X86_VMALLOC_H */ diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h new file mode 100644 index 000000000000..ac9fc51e2b18 --- /dev/null +++ b/arch/x86/include/asm/vmware.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +#ifndef _ASM_X86_VMWARE_H +#define _ASM_X86_VMWARE_H + +#include <asm/cpufeatures.h> +#include <asm/alternative.h> +#include <linux/stringify.h> + +/* + * The hypercall definitions differ in the low word of the %edx argument + * in the following way: the old port base interface uses the port + * number to distinguish between high- and low bandwidth versions. + * + * The new vmcall interface instead uses a set of flags to select + * bandwidth mode and transfer direction. The flags should be loaded + * into %dx by any user and are automatically replaced by the port + * number if the VMWARE_HYPERVISOR_PORT method is used. + * + * In short, new driver code should strictly use the new definition of + * %dx content. + */ + +/* Old port-based version */ +#define VMWARE_HYPERVISOR_PORT 0x5658 +#define VMWARE_HYPERVISOR_PORT_HB 0x5659 + +/* Current vmcall / vmmcall version */ +#define VMWARE_HYPERVISOR_HB BIT(0) +#define VMWARE_HYPERVISOR_OUT BIT(1) + +/* The low bandwidth call. The low word of edx is presumed clear. */ +#define VMWARE_HYPERCALL \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \ + "inl (%%dx), %%eax", \ + "vmcall", X86_FEATURE_VMCALL, \ + "vmmcall", X86_FEATURE_VMW_VMMCALL) + +/* + * The high bandwidth out call. The low word of edx is presumed to have the + * HB and OUT bits set. + */ +#define VMWARE_HYPERCALL_HB_OUT \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ + "rep outsb", \ + "vmcall", X86_FEATURE_VMCALL, \ + "vmmcall", X86_FEATURE_VMW_VMMCALL) + +/* + * The high bandwidth in call. The low word of edx is presumed to have the + * HB bit set. + */ +#define VMWARE_HYPERCALL_HB_IN \ + ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ + "rep insb", \ + "vmcall", X86_FEATURE_VMCALL, \ + "vmmcall", X86_FEATURE_VMW_VMMCALL) +#endif diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index a39136b0d509..2a85287b3685 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -15,66 +15,70 @@ #include <linux/bitops.h> #include <linux/types.h> #include <uapi/asm/vmx.h> +#include <asm/vmxfeatures.h> + +#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) /* * Definitions of Primary Processor-Based VM-Execution Controls. */ -#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 -#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 -#define CPU_BASED_HLT_EXITING 0x00000080 -#define CPU_BASED_INVLPG_EXITING 0x00000200 -#define CPU_BASED_MWAIT_EXITING 0x00000400 -#define CPU_BASED_RDPMC_EXITING 0x00000800 -#define CPU_BASED_RDTSC_EXITING 0x00001000 -#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 -#define CPU_BASED_CR3_STORE_EXITING 0x00010000 -#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 -#define CPU_BASED_CR8_STORE_EXITING 0x00100000 -#define CPU_BASED_TPR_SHADOW 0x00200000 -#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 -#define CPU_BASED_MOV_DR_EXITING 0x00800000 -#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 -#define CPU_BASED_USE_IO_BITMAPS 0x02000000 -#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 -#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 -#define CPU_BASED_MONITOR_EXITING 0x20000000 -#define CPU_BASED_PAUSE_EXITING 0x40000000 -#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 +#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(INTR_WINDOW_EXITING) +#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(USE_TSC_OFFSETTING) +#define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING) +#define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING) +#define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING) +#define CPU_BASED_RDPMC_EXITING VMCS_CONTROL_BIT(RDPMC_EXITING) +#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING) +#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING) +#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING) +#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING) +#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING) +#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR) +#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(NMI_WINDOW_EXITING) +#define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING) +#define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING) +#define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS) +#define CPU_BASED_MONITOR_TRAP_FLAG VMCS_CONTROL_BIT(MONITOR_TRAP_FLAG) +#define CPU_BASED_USE_MSR_BITMAPS VMCS_CONTROL_BIT(USE_MSR_BITMAPS) +#define CPU_BASED_MONITOR_EXITING VMCS_CONTROL_BIT(MONITOR_EXITING) +#define CPU_BASED_PAUSE_EXITING VMCS_CONTROL_BIT(PAUSE_EXITING) +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS VMCS_CONTROL_BIT(SEC_CONTROLS) #define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 /* * Definitions of Secondary Processor-Based VM-Execution Controls. */ -#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 -#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 -#define SECONDARY_EXEC_DESC 0x00000004 -#define SECONDARY_EXEC_RDTSCP 0x00000008 -#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 -#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 -#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 -#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 -#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 -#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 -#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 -#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 -#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 -#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 -#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 -#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 -#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 -#define SECONDARY_EXEC_ENABLE_PML 0x00020000 -#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000 -#define SECONDARY_EXEC_XSAVES 0x00100000 -#define SECONDARY_EXEC_PT_USE_GPA 0x01000000 -#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 -#define SECONDARY_EXEC_TSC_SCALING 0x02000000 - -#define PIN_BASED_EXT_INTR_MASK 0x00000001 -#define PIN_BASED_NMI_EXITING 0x00000008 -#define PIN_BASED_VIRTUAL_NMIS 0x00000020 -#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 -#define PIN_BASED_POSTED_INTR 0x00000080 +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES) +#define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT) +#define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING) +#define SECONDARY_EXEC_RDTSCP VMCS_CONTROL_BIT(RDTSCP) +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC) +#define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID) +#define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING) +#define SECONDARY_EXEC_UNRESTRICTED_GUEST VMCS_CONTROL_BIT(UNRESTRICTED_GUEST) +#define SECONDARY_EXEC_APIC_REGISTER_VIRT VMCS_CONTROL_BIT(APIC_REGISTER_VIRT) +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY VMCS_CONTROL_BIT(VIRT_INTR_DELIVERY) +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING VMCS_CONTROL_BIT(PAUSE_LOOP_EXITING) +#define SECONDARY_EXEC_RDRAND_EXITING VMCS_CONTROL_BIT(RDRAND_EXITING) +#define SECONDARY_EXEC_ENABLE_INVPCID VMCS_CONTROL_BIT(INVPCID) +#define SECONDARY_EXEC_ENABLE_VMFUNC VMCS_CONTROL_BIT(VMFUNC) +#define SECONDARY_EXEC_SHADOW_VMCS VMCS_CONTROL_BIT(SHADOW_VMCS) +#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING) +#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) +#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) +#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) +#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES) +#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) +#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) +#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000 + +#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) +#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) +#define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS) +#define PIN_BASED_VMX_PREEMPTION_TIMER VMCS_CONTROL_BIT(PREEMPTION_TIMER) +#define PIN_BASED_POSTED_INTR VMCS_CONTROL_BIT(POSTED_INTR) #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 @@ -110,9 +114,12 @@ #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 #define VMX_MISC_ZERO_LEN_INS 0x40000000 +#define VMX_MISC_MSR_LIST_MULTIPLIER 512 /* VMFUNC functions */ -#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 +#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28) + +#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) #define VMFUNC_EPTP_ENTRIES 512 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) @@ -562,6 +569,20 @@ enum vm_instruction_error_number { VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28, }; +/* + * VM-instruction errors that can be encountered on VM-Enter, used to trace + * nested VM-Enter failures reported by hardware. Errors unique to VM-Enter + * from a SMI Transfer Monitor are not included as things have gone seriously + * sideways if we get one of those... + */ +#define VMX_VMENTER_INSTRUCTION_ERRORS \ + { VMXERR_VMLAUNCH_NONCLEAR_VMCS, "VMLAUNCH_NONCLEAR_VMCS" }, \ + { VMXERR_VMRESUME_NONLAUNCHED_VMCS, "VMRESUME_NONLAUNCHED_VMCS" }, \ + { VMXERR_VMRESUME_AFTER_VMXOFF, "VMRESUME_AFTER_VMXOFF" }, \ + { VMXERR_ENTRY_INVALID_CONTROL_FIELD, "VMENTRY_INVALID_CONTROL_FIELD" }, \ + { VMXERR_ENTRY_INVALID_HOST_STATE_FIELD, "VMENTRY_INVALID_HOST_STATE_FIELD" }, \ + { VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS, "VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" } + enum vmx_l1d_flush_state { VMENTER_L1D_FLUSH_AUTO, VMENTER_L1D_FLUSH_NEVER, diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h new file mode 100644 index 000000000000..a50e4a0de315 --- /dev/null +++ b/arch/x86/include/asm/vmxfeatures.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VMXFEATURES_H +#define _ASM_X86_VMXFEATURES_H + +/* + * Defines VMX CPU feature bits + */ +#define NVMXINTS 3 /* N 32-bit words worth of info */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + +/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */ +#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* "" VM-Exit on vectored interrupts */ +#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* "" VM-Exit on NMIs */ +#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */ +#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* VMX Preemption Timer */ +#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* Posted Interrupts */ + +/* EPT/VPID features, scattered to bits 16-23 */ +#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* INVVPID is supported */ +#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */ +#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* EPT Accessed/Dirty bits */ +#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* 1GB EPT pages */ + +/* Aggregated APIC features 24-27 */ +#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* TPR shadow + virt APIC */ +#define VMX_FEATURE_APICV ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */ + +/* VM-Functions, shifted to bits 28-31 */ +#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */ + +/* Primary Processor-Based VM-Execution Controls, word 1 */ +#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */ +#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */ +#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */ +#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */ +#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */ +#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* "" VM-Exit on RDPMC */ +#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */ +#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */ +#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */ +#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */ +#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */ +#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */ +#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */ +#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */ +#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/ +#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */ +#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */ +#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* "" VM-Exit based on MSR index */ +#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */ +#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */ +#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */ + +/* Secondary Processor-Based VM-Execution Controls, word 2 */ +#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */ +#define VMX_FEATURE_EPT ( 2*32+ 1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */ +#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* "" VM-Exit on {S,L}*DT instructions */ +#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* "" Enable RDTSCP in guest */ +#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* "" Virtualize X2APIC for the guest */ +#define VMX_FEATURE_VPID ( 2*32+ 5) /* Virtual Processor ID (TLB ASID modifier) */ +#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* "" VM-Exit on WBINVD */ +#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* Allow Big Real Mode and other "invalid" states */ +#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */ +#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */ +#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */ +#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* "" VM-Exit on RDRAND*/ +#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* "" Enable INVPCID in guest */ +#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */ +#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */ +#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */ +#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */ +#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */ +#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */ +#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */ +#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */ +#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ +#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */ +#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ +#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ + +#endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index 32f5d9a0b90e..183e98e49ab9 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -19,10 +19,10 @@ #ifndef _ASM_X86_VVAR_H #define _ASM_X86_VVAR_H -#if defined(__VVAR_KERNEL_LDS) - -/* The kernel linker script defines its own magic to put vvars in the - * right place. +#ifdef EMIT_VVAR +/* + * EMIT_VVAR() is used by the kernel linker script to put vvars in the + * right place. Also, it's used by kernel code to import offsets values. */ #define DECLARE_VVAR(offset, type, name) \ EMIT_VVAR(name, offset) @@ -33,9 +33,12 @@ extern char __vvar_page; #define DECLARE_VVAR(offset, type, name) \ extern type vvar_ ## name[CS_BASES] \ - __attribute__((visibility("hidden"))); + __attribute__((visibility("hidden"))); \ + extern type timens_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); \ #define VVAR(name) (vvar_ ## name) +#define TIMENS(name) (timens_ ## name) #define DEFINE_VVAR(type, name) \ type name[CS_BASES] \ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index ac0934189017..96d9cd208610 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -51,12 +51,14 @@ struct x86_init_resources { * are set up. * @intr_init: interrupt init code * @trap_init: platform specific trap setup + * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); + void (*intr_mode_select)(void); void (*intr_mode_init)(void); }; @@ -134,10 +136,12 @@ struct x86_hyper_init { /** * struct x86_init_acpi - x86 ACPI init functions + * @set_root_poitner: set RSDP address * @get_root_pointer: get RSDP address * @reduced_hw_early_init: hardware reduced platform early init */ struct x86_init_acpi { + void (*set_root_pointer)(u64 addr); u64 (*get_root_pointer)(void); void (*reduced_hw_early_init)(void); }; diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 42e1245af0d8..ff4b52e37e60 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num); void xen_arch_unregister_cpu(int num); #endif -extern void xen_set_iopl_mask(unsigned mask); - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 62ca03ef5c65..9139b3e86316 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -379,12 +379,9 @@ struct xen_pmu_arch { * Prefix forces emulation of some non-trapping instructions. * Currently only CPUID. */ -#ifdef __ASSEMBLY__ -#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; -#define XEN_CPUID XEN_EMULATE_PREFIX cpuid -#else -#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " -#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" -#endif +#include <asm/emulate_prefix.h> + +#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;) +#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid) #endif /* _ASM_X86_XEN_INTERFACE_H */ diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h index 116777e7f387..63cd41b2e17a 100644 --- a/arch/x86/include/asm/xen/page-coherent.h +++ b/arch/x86/include/asm/xen/page-coherent.h @@ -21,18 +21,4 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, free_pages((unsigned long) cpu_addr, get_order(size)); } -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) { } - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) { } - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) { } - #endif /* _ASM_X86_XEN_PAGE_COHERENT_H */ |