diff options
Diffstat (limited to 'arch/x86/include')
167 files changed, 3829 insertions, 2386 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index de690c2d2e33..a0ab9ab61c75 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -8,5 +8,6 @@ generated-y += xen-hypercalls.h generic-y += dma-contiguous.h generic-y += early_ioremap.h +generic-y += export.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 11881726ed37..2f01eb4d6208 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -31,6 +31,7 @@ #include <asm/mmu.h> #include <asm/mpspec.h> #include <asm/realmode.h> +#include <asm/x86_init.h> #ifdef CONFIG_ACPI_APEI # include <asm/pgtable_types.h> @@ -133,6 +134,16 @@ static inline bool acpi_has_cpu_in_madt(void) return !!acpi_lapic; } +#define ACPI_HAVE_ARCH_GET_ROOT_POINTER +static inline u64 acpi_arch_get_root_pointer(void) +{ + return x86_init.acpi.get_root_pointer(); +} + +void acpi_generic_reduced_hw_init(void); + +u64 x86_default_get_root_pointer(void); + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 @@ -142,6 +153,13 @@ static inline void acpi_noirq_set(void) { } static inline void acpi_disable_pci(void) { } static inline void disable_acpi(void) { } +static inline void acpi_generic_reduced_hw_init(void) { } + +static inline u64 x86_default_get_root_pointer(void) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #define ARCH_HAS_POWER_INIT 1 diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 31b627b43a8e..8e4ea39e55d0 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -7,16 +7,24 @@ #include <asm/asm.h> #ifdef CONFIG_SMP - .macro LOCK_PREFIX -672: lock +.macro LOCK_PREFIX_HERE .pushsection .smp_locks,"a" .balign 4 - .long 672b - . + .long 671f - . # offset .popsection - .endm +671: +.endm + +.macro LOCK_PREFIX insn:vararg + LOCK_PREFIX_HERE + lock \insn +.endm #else - .macro LOCK_PREFIX - .endm +.macro LOCK_PREFIX_HERE +.endm + +.macro LOCK_PREFIX insn:vararg +.endm #endif /* diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 4cd6a3b71824..d7faa16622d8 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -31,15 +31,8 @@ */ #ifdef CONFIG_SMP -#define LOCK_PREFIX_HERE \ - ".pushsection .smp_locks,\"a\"\n" \ - ".balign 4\n" \ - ".long 671f - .\n" /* offset */ \ - ".popsection\n" \ - "671:" - -#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " - +#define LOCK_PREFIX_HERE "LOCK_PREFIX_HERE\n\t" +#define LOCK_PREFIX "LOCK_PREFIX " #else /* ! CONFIG_SMP */ #define LOCK_PREFIX_HERE "" #define LOCK_PREFIX "" diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index fddb6d26239f..1ae4e5791afa 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -103,6 +103,9 @@ static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) static inline bool amd_gart_present(void) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return false; + /* GART present only on Fam15h, upto model 0fh */ if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10)) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 98722773391d..130e81e10fc7 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -10,6 +10,7 @@ #include <asm/fixmap.h> #include <asm/mpspec.h> #include <asm/msr.h> +#include <asm/hardirq.h> #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -138,7 +139,6 @@ extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); extern void apic_intr_mode_init(void); -extern void setup_local_APIC(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); @@ -183,6 +183,7 @@ static inline void disable_local_APIC(void) { } # define setup_boot_APIC_clock x86_init_noop # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } +static inline void init_bsp_APIC(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } @@ -304,12 +305,6 @@ struct apic { u32 irq_delivery_mode; u32 irq_dest_mode; - /* Functions and data related to vector allocation */ - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, - const struct cpumask *mask); - int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); u32 (*calc_dest_apicid)(unsigned int cpu); /* ICR related functions */ @@ -319,7 +314,7 @@ struct apic { /* Probe, setup and smpboot functions */ int (*probe)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_valid)(int apicid); + int (*apic_id_valid)(u32 apicid); int (*apic_id_registered)(void); bool (*check_apicid_used)(physid_mask_t *map, int apicid); @@ -442,6 +437,8 @@ static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {} #endif /* CONFIG_X86_LOCAL_APIC */ +extern void apic_ack_irq(struct irq_data *data); + static inline void ack_APIC_irq(void) { /* @@ -492,36 +489,33 @@ static inline unsigned int read_apic_id(void) return apic->get_apic_id(reg); } -extern int default_apic_id_valid(int apicid); +extern int default_apic_id_valid(u32 apicid); extern int default_acpi_madt_oem_check(char *, char *); extern void default_setup_apic_routing(void); extern u32 apic_default_calc_apicid(unsigned int cpu); extern u32 apic_flat_calc_apicid(unsigned int cpu); -extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); -extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); extern bool default_check_apicid_used(physid_mask_t *map, int apicid); -extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask); -extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask); extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); #endif /* CONFIG_X86_LOCAL_APIC */ +#ifdef CONFIG_SMP +bool apic_id_is_primary_thread(unsigned int id); +#else +static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } +#endif + extern void irq_enter(void); extern void irq_exit(void); static inline void entering_irq(void) { irq_enter(); + kvm_set_cpu_l1tf_flush_l1d(); } static inline void entering_ack_irq(void) @@ -534,6 +528,7 @@ static inline void ipi_entering_ack_irq(void) { irq_enter(); ack_APIC_irq(); + kvm_set_cpu_l1tf_flush_l1d(); } static inline void exiting_irq(void) diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index c356098b6fb9..4d4015ddcf26 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,8 +7,6 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H -#include <asm/nospec-branch.h> - #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -34,7 +32,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -47,7 +44,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -60,7 +56,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -73,7 +68,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 386a6900e206..21b086786404 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -46,6 +46,65 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". @@ -61,16 +120,32 @@ /* Exception table entry */ #ifdef __ASSEMBLY__ # define _ASM_EXTABLE_HANDLE(from, to, handler) \ - .pushsection "__ex_table","a" ; \ - .balign 4 ; \ - .long (from) - . ; \ - .long (to) - . ; \ - .long (handler) - . ; \ + ASM_EXTABLE_HANDLE from to handler + +.macro ASM_EXTABLE_HANDLE from:req to:req handler:req + .pushsection "__ex_table","a" + .balign 4 + .long (\from) - . + .long (\to) - . + .long (\handler) - . .popsection +.endm +#else /* __ASSEMBLY__ */ + +# define _ASM_EXTABLE_HANDLE(from, to, handler) \ + "ASM_EXTABLE_HANDLE from=" #from " to=" #to \ + " handler=\"" #handler "\"\n\t" + +/* For C file, we already have NOKPROBE_SYMBOL macro */ + +#endif /* __ASSEMBLY__ */ # define _ASM_EXTABLE(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) +# define _ASM_EXTABLE_UA(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) + # define _ASM_EXTABLE_FAULT(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) @@ -86,6 +161,7 @@ _ASM_PTR (entry); \ .popsection +#ifdef __ASSEMBLY__ .macro ALIGN_DESTINATION /* check for bad alignment of destination */ movl %edi,%ecx @@ -106,37 +182,12 @@ jmp copy_user_handle_tail .previous - _ASM_EXTABLE(100b,103b) - _ASM_EXTABLE(101b,103b) + _ASM_EXTABLE_UA(100b, 103b) + _ASM_EXTABLE_UA(101b, 103b) .endm - -#else -# define _EXPAND_EXTABLE_HANDLE(x) #x -# define _ASM_EXTABLE_HANDLE(from, to, handler) \ - " .pushsection \"__ex_table\",\"a\"\n" \ - " .balign 4\n" \ - " .long (" #from ") - .\n" \ - " .long (" #to ") - .\n" \ - " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ - " .popsection\n" - -# define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) - -# define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) - -# define _ASM_EXTABLE_EX(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) - -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) - -/* For C file, we already have NOKPROBE_SYMBOL macro */ -#endif +#endif /* __ASSEMBLY__ */ #ifndef __ASSEMBLY__ -#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -146,6 +197,5 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif -#endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 72759f131cc5..ea3d95275b43 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -17,36 +17,40 @@ #define ATOMIC_INIT(i) { (i) } /** - * atomic_read - read atomic variable + * arch_atomic_read - read atomic variable * @v: pointer of type atomic_t * * Atomically reads the value of @v. */ -static __always_inline int atomic_read(const atomic_t *v) +static __always_inline int arch_atomic_read(const atomic_t *v) { + /* + * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, + * it's non-inlined function that increases binary size and stack usage. + */ return READ_ONCE((v)->counter); } /** - * atomic_set - set atomic variable + * arch_atomic_set - set atomic variable * @v: pointer of type atomic_t * @i: required value * * Atomically sets the value of @v to @i. */ -static __always_inline void atomic_set(atomic_t *v, int i) +static __always_inline void arch_atomic_set(atomic_t *v, int i) { WRITE_ONCE(v->counter, i); } /** - * atomic_add - add integer to atomic variable + * arch_atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v. */ -static __always_inline void atomic_add(int i, atomic_t *v) +static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) @@ -54,13 +58,13 @@ static __always_inline void atomic_add(int i, atomic_t *v) } /** - * atomic_sub - subtract integer from atomic variable + * arch_atomic_sub - subtract integer from atomic variable * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v. */ -static __always_inline void atomic_sub(int i, atomic_t *v) +static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) @@ -68,7 +72,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v) } /** - * atomic_sub_and_test - subtract value from variable and test result + * arch_atomic_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic_t * @@ -76,63 +80,68 @@ static __always_inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) +static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); + return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); } +#define arch_atomic_sub_and_test arch_atomic_sub_and_test /** - * atomic_inc - increment atomic variable + * arch_atomic_inc - increment atomic variable * @v: pointer of type atomic_t * * Atomically increments @v by 1. */ -static __always_inline void atomic_inc(atomic_t *v) +static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); } +#define arch_atomic_inc arch_atomic_inc /** - * atomic_dec - decrement atomic variable + * arch_atomic_dec - decrement atomic variable * @v: pointer of type atomic_t * * Atomically decrements @v by 1. */ -static __always_inline void atomic_dec(atomic_t *v) +static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); } +#define arch_atomic_dec arch_atomic_dec /** - * atomic_dec_and_test - decrement and test + * arch_atomic_dec_and_test - decrement and test * @v: pointer of type atomic_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -static __always_inline bool atomic_dec_and_test(atomic_t *v) +static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); + return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); } +#define arch_atomic_dec_and_test arch_atomic_dec_and_test /** - * atomic_inc_and_test - increment and test + * arch_atomic_inc_and_test - increment and test * @v: pointer of type atomic_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ -static __always_inline bool atomic_inc_and_test(atomic_t *v) +static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); + return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); } +#define arch_atomic_inc_and_test arch_atomic_inc_and_test /** - * atomic_add_negative - add and test if negative + * arch_atomic_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic_t * @@ -140,65 +149,63 @@ static __always_inline bool atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static __always_inline bool atomic_add_negative(int i, atomic_t *v) +static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); + return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); } +#define arch_atomic_add_negative arch_atomic_add_negative /** - * atomic_add_return - add integer and return + * arch_atomic_add_return - add integer and return * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns @i + @v */ -static __always_inline int atomic_add_return(int i, atomic_t *v) +static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } /** - * atomic_sub_return - subtract integer and return + * arch_atomic_sub_return - subtract integer and return * @v: pointer of type atomic_t * @i: integer value to subtract * * Atomically subtracts @i from @v and returns @v - @i */ -static __always_inline int atomic_sub_return(int i, atomic_t *v) +static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { - return atomic_add_return(-i, v); + return arch_atomic_add_return(-i, v); } -#define atomic_inc_return(v) (atomic_add_return(1, v)) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) - -static __always_inline int atomic_fetch_add(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return xadd(&v->counter, i); } -static __always_inline int atomic_fetch_sub(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) { return xadd(&v->counter, -i); } -static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { - return cmpxchg(&v->counter, old, new); + return arch_cmpxchg(&v->counter, old, new); } -#define atomic_try_cmpxchg atomic_try_cmpxchg -static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) +#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg +static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { return try_cmpxchg(&v->counter, old, new); } -static inline int atomic_xchg(atomic_t *v, int new) +static inline int arch_atomic_xchg(atomic_t *v, int new) { - return xchg(&v->counter, new); + return arch_xchg(&v->counter, new); } -static inline void atomic_and(int i, atomic_t *v) +static inline void arch_atomic_and(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "andl %1,%0" : "+m" (v->counter) @@ -206,16 +213,16 @@ static inline void atomic_and(int i, atomic_t *v) : "memory"); } -static inline int atomic_fetch_and(int i, atomic_t *v) +static inline int arch_atomic_fetch_and(int i, atomic_t *v) { - int val = atomic_read(v); + int val = arch_atomic_read(v); - do { } while (!atomic_try_cmpxchg(v, &val, val & i)); + do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i)); return val; } -static inline void atomic_or(int i, atomic_t *v) +static inline void arch_atomic_or(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "orl %1,%0" : "+m" (v->counter) @@ -223,16 +230,16 @@ static inline void atomic_or(int i, atomic_t *v) : "memory"); } -static inline int atomic_fetch_or(int i, atomic_t *v) +static inline int arch_atomic_fetch_or(int i, atomic_t *v) { - int val = atomic_read(v); + int val = arch_atomic_read(v); - do { } while (!atomic_try_cmpxchg(v, &val, val | i)); + do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i)); return val; } -static inline void atomic_xor(int i, atomic_t *v) +static inline void arch_atomic_xor(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "xorl %1,%0" : "+m" (v->counter) @@ -240,40 +247,21 @@ static inline void atomic_xor(int i, atomic_t *v) : "memory"); } -static inline int atomic_fetch_xor(int i, atomic_t *v) +static inline int arch_atomic_fetch_xor(int i, atomic_t *v) { - int val = atomic_read(v); + int val = arch_atomic_read(v); - do { } while (!atomic_try_cmpxchg(v, &val, val ^ i)); + do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i)); return val; } -/** - * __atomic_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as @v was not already @u. - * Returns the old value of @v. - */ -static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) -{ - int c = atomic_read(v); - - do { - if (unlikely(c == u)) - break; - } while (!atomic_try_cmpxchg(v, &c, c + a)); - - return c; -} - #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else # include <asm/atomic64_64.h> #endif +#include <asm-generic/atomic-instrumented.h> + #endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 97c46b8169b7..6a5b0ec460da 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -62,7 +62,7 @@ ATOMIC64_DECL(add_unless); #undef ATOMIC64_EXPORT /** - * atomic64_cmpxchg - cmpxchg atomic64 variable + * arch_atomic64_cmpxchg - cmpxchg atomic64 variable * @v: pointer to type atomic64_t * @o: expected value * @n: new value @@ -71,20 +71,21 @@ ATOMIC64_DECL(add_unless); * the old value. */ -static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) +static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o, + long long n) { - return cmpxchg64(&v->counter, o, n); + return arch_cmpxchg64(&v->counter, o, n); } /** - * atomic64_xchg - xchg atomic64 variable + * arch_atomic64_xchg - xchg atomic64 variable * @v: pointer to type atomic64_t * @n: value to assign * * Atomically xchgs the value of @v to @n and returns * the old value. */ -static inline long long atomic64_xchg(atomic64_t *v, long long n) +static inline long long arch_atomic64_xchg(atomic64_t *v, long long n) { long long o; unsigned high = (unsigned)(n >> 32); @@ -96,13 +97,13 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n) } /** - * atomic64_set - set atomic64 variable + * arch_atomic64_set - set atomic64 variable * @v: pointer to type atomic64_t * @i: value to assign * * Atomically sets the value of @v to @n. */ -static inline void atomic64_set(atomic64_t *v, long long i) +static inline void arch_atomic64_set(atomic64_t *v, long long i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; @@ -112,26 +113,26 @@ static inline void atomic64_set(atomic64_t *v, long long i) } /** - * atomic64_read - read atomic64 variable + * arch_atomic64_read - read atomic64 variable * @v: pointer to type atomic64_t * * Atomically reads the value of @v and returns it. */ -static inline long long atomic64_read(const atomic64_t *v) +static inline long long arch_atomic64_read(const atomic64_t *v) { long long r; alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); return r; - } +} /** - * atomic64_add_return - add and return + * arch_atomic64_add_return - add and return * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v and returns @i + *@v */ -static inline long long atomic64_add_return(long long i, atomic64_t *v) +static inline long long arch_atomic64_add_return(long long i, atomic64_t *v) { alternative_atomic64(add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -142,7 +143,7 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) /* * Other variants with different arithmetic operators: */ -static inline long long atomic64_sub_return(long long i, atomic64_t *v) +static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v) { alternative_atomic64(sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -150,30 +151,32 @@ static inline long long atomic64_sub_return(long long i, atomic64_t *v) return i; } -static inline long long atomic64_inc_return(atomic64_t *v) +static inline long long arch_atomic64_inc_return(atomic64_t *v) { long long a; alternative_atomic64(inc_return, "=&A" (a), "S" (v) : "memory", "ecx"); return a; } +#define arch_atomic64_inc_return arch_atomic64_inc_return -static inline long long atomic64_dec_return(atomic64_t *v) +static inline long long arch_atomic64_dec_return(atomic64_t *v) { long long a; alternative_atomic64(dec_return, "=&A" (a), "S" (v) : "memory", "ecx"); return a; } +#define arch_atomic64_dec_return arch_atomic64_dec_return /** - * atomic64_add - add integer to atomic64 variable + * arch_atomic64_add - add integer to atomic64 variable * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v. */ -static inline long long atomic64_add(long long i, atomic64_t *v) +static inline long long arch_atomic64_add(long long i, atomic64_t *v) { __alternative_atomic64(add, add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -182,13 +185,13 @@ static inline long long atomic64_add(long long i, atomic64_t *v) } /** - * atomic64_sub - subtract the atomic64 variable + * arch_atomic64_sub - subtract the atomic64 variable * @i: integer value to subtract * @v: pointer to type atomic64_t * * Atomically subtracts @i from @v. */ -static inline long long atomic64_sub(long long i, atomic64_t *v) +static inline long long arch_atomic64_sub(long long i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -197,85 +200,33 @@ static inline long long atomic64_sub(long long i, atomic64_t *v) } /** - * atomic64_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -static inline int atomic64_sub_and_test(long long i, atomic64_t *v) -{ - return atomic64_sub_return(i, v) == 0; -} - -/** - * atomic64_inc - increment atomic64 variable + * arch_atomic64_inc - increment atomic64 variable * @v: pointer to type atomic64_t * * Atomically increments @v by 1. */ -static inline void atomic64_inc(atomic64_t *v) +static inline void arch_atomic64_inc(atomic64_t *v) { __alternative_atomic64(inc, inc_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } +#define arch_atomic64_inc arch_atomic64_inc /** - * atomic64_dec - decrement atomic64 variable + * arch_atomic64_dec - decrement atomic64 variable * @v: pointer to type atomic64_t * * Atomically decrements @v by 1. */ -static inline void atomic64_dec(atomic64_t *v) +static inline void arch_atomic64_dec(atomic64_t *v) { __alternative_atomic64(dec, dec_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } +#define arch_atomic64_dec arch_atomic64_dec /** - * atomic64_dec_and_test - decrement and test - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -static inline int atomic64_dec_and_test(atomic64_t *v) -{ - return atomic64_dec_return(v) == 0; -} - -/** - * atomic64_inc_and_test - increment and test - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -static inline int atomic64_inc_and_test(atomic64_t *v) -{ - return atomic64_inc_return(v) == 0; -} - -/** - * atomic64_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -static inline int atomic64_add_negative(long long i, atomic64_t *v) -{ - return atomic64_add_return(i, v) < 0; -} - -/** - * atomic64_add_unless - add unless the number is a given value + * arch_atomic64_add_unless - add unless the number is a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. @@ -283,7 +234,8 @@ static inline int atomic64_add_negative(long long i, atomic64_t *v) * Atomically adds @a to @v, so long as it was not @u. * Returns non-zero if the add was done, zero otherwise. */ -static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +static inline int arch_atomic64_add_unless(atomic64_t *v, long long a, + long long u) { unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); @@ -293,90 +245,91 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) return (int)a; } - -static inline int atomic64_inc_not_zero(atomic64_t *v) +static inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; alternative_atomic64(inc_not_zero, "=&a" (r), "S" (v) : "ecx", "edx", "memory"); return r; } +#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero -static inline long long atomic64_dec_if_positive(atomic64_t *v) +static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) { long long r; alternative_atomic64(dec_if_positive, "=&A" (r), "S" (v) : "ecx", "memory"); return r; } +#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive #undef alternative_atomic64 #undef __alternative_atomic64 -static inline void atomic64_and(long long i, atomic64_t *v) +static inline void arch_atomic64_and(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c & i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; } -static inline long long atomic64_fetch_and(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c & i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; return old; } -static inline void atomic64_or(long long i, atomic64_t *v) +static inline void arch_atomic64_or(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c | i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; } -static inline long long atomic64_fetch_or(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c | i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; return old; } -static inline void atomic64_xor(long long i, atomic64_t *v) +static inline void arch_atomic64_xor(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; } -static inline long long atomic64_fetch_xor(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; return old; } -static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c + i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) c = old; return old; } -#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) +#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), (v)) #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 738495caf05f..dadc20adba21 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -11,37 +11,37 @@ #define ATOMIC64_INIT(i) { (i) } /** - * atomic64_read - read atomic64 variable + * arch_atomic64_read - read atomic64 variable * @v: pointer of type atomic64_t * * Atomically reads the value of @v. * Doesn't imply a read memory barrier. */ -static inline long atomic64_read(const atomic64_t *v) +static inline long arch_atomic64_read(const atomic64_t *v) { return READ_ONCE((v)->counter); } /** - * atomic64_set - set atomic64 variable + * arch_atomic64_set - set atomic64 variable * @v: pointer to type atomic64_t * @i: required value * * Atomically sets the value of @v to @i. */ -static inline void atomic64_set(atomic64_t *v, long i) +static inline void arch_atomic64_set(atomic64_t *v, long i) { WRITE_ONCE(v->counter, i); } /** - * atomic64_add - add integer to atomic64 variable + * arch_atomic64_add - add integer to atomic64 variable * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v. */ -static __always_inline void atomic64_add(long i, atomic64_t *v) +static __always_inline void arch_atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) @@ -49,13 +49,13 @@ static __always_inline void atomic64_add(long i, atomic64_t *v) } /** - * atomic64_sub - subtract the atomic64 variable + * arch_atomic64_sub - subtract the atomic64 variable * @i: integer value to subtract * @v: pointer to type atomic64_t * * Atomically subtracts @i from @v. */ -static inline void atomic64_sub(long i, atomic64_t *v) +static inline void arch_atomic64_sub(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) @@ -63,7 +63,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) } /** - * atomic64_sub_and_test - subtract value from variable and test result + * arch_atomic64_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer to type atomic64_t * @@ -71,65 +71,70 @@ static inline void atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline bool atomic64_sub_and_test(long i, atomic64_t *v) +static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); + return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } +#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test /** - * atomic64_inc - increment atomic64 variable + * arch_atomic64_inc - increment atomic64 variable * @v: pointer to type atomic64_t * * Atomically increments @v by 1. */ -static __always_inline void atomic64_inc(atomic64_t *v) +static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) : "m" (v->counter)); } +#define arch_atomic64_inc arch_atomic64_inc /** - * atomic64_dec - decrement atomic64 variable + * arch_atomic64_dec - decrement atomic64 variable * @v: pointer to type atomic64_t * * Atomically decrements @v by 1. */ -static __always_inline void atomic64_dec(atomic64_t *v) +static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) : "m" (v->counter)); } +#define arch_atomic64_dec arch_atomic64_dec /** - * atomic64_dec_and_test - decrement and test + * arch_atomic64_dec_and_test - decrement and test * @v: pointer to type atomic64_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -static inline bool atomic64_dec_and_test(atomic64_t *v) +static inline bool arch_atomic64_dec_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); + return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); } +#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test /** - * atomic64_inc_and_test - increment and test + * arch_atomic64_inc_and_test - increment and test * @v: pointer to type atomic64_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ -static inline bool atomic64_inc_and_test(atomic64_t *v) +static inline bool arch_atomic64_inc_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); + return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); } +#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test /** - * atomic64_add_negative - add and test if negative + * arch_atomic64_add_negative - add and test if negative * @i: integer value to add * @v: pointer to type atomic64_t * @@ -137,97 +142,56 @@ static inline bool atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline bool atomic64_add_negative(long i, atomic64_t *v) +static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); + return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } +#define arch_atomic64_add_negative arch_atomic64_add_negative /** - * atomic64_add_return - add and return + * arch_atomic64_add_return - add and return * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v and returns @i + @v */ -static __always_inline long atomic64_add_return(long i, atomic64_t *v) +static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v) { return i + xadd(&v->counter, i); } -static inline long atomic64_sub_return(long i, atomic64_t *v) +static inline long arch_atomic64_sub_return(long i, atomic64_t *v) { - return atomic64_add_return(-i, v); + return arch_atomic64_add_return(-i, v); } -static inline long atomic64_fetch_add(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_add(long i, atomic64_t *v) { return xadd(&v->counter, i); } -static inline long atomic64_fetch_sub(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v) { return xadd(&v->counter, -i); } -#define atomic64_inc_return(v) (atomic64_add_return(1, (v))) -#define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) - -static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) +static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new) { - return cmpxchg(&v->counter, old, new); + return arch_cmpxchg(&v->counter, old, new); } -#define atomic64_try_cmpxchg atomic64_try_cmpxchg -static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) +#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg +static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) { return try_cmpxchg(&v->counter, old, new); } -static inline long atomic64_xchg(atomic64_t *v, long new) -{ - return xchg(&v->counter, new); -} - -/** - * atomic64_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns the old value of @v. - */ -static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) -{ - s64 c = atomic64_read(v); - do { - if (unlikely(c == u)) - return false; - } while (!atomic64_try_cmpxchg(v, &c, c + a)); - return true; -} - -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) - -/* - * atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ -static inline long atomic64_dec_if_positive(atomic64_t *v) +static inline long arch_atomic64_xchg(atomic64_t *v, long new) { - s64 dec, c = atomic64_read(v); - do { - dec = c - 1; - if (unlikely(dec < 0)) - break; - } while (!atomic64_try_cmpxchg(v, &c, dec)); - return dec; + return arch_xchg(&v->counter, new); } -static inline void atomic64_and(long i, atomic64_t *v) +static inline void arch_atomic64_and(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "andq %1,%0" : "+m" (v->counter) @@ -235,16 +199,16 @@ static inline void atomic64_and(long i, atomic64_t *v) : "memory"); } -static inline long atomic64_fetch_and(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) { - s64 val = atomic64_read(v); + s64 val = arch_atomic64_read(v); do { - } while (!atomic64_try_cmpxchg(v, &val, val & i)); + } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); return val; } -static inline void atomic64_or(long i, atomic64_t *v) +static inline void arch_atomic64_or(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "orq %1,%0" : "+m" (v->counter) @@ -252,16 +216,16 @@ static inline void atomic64_or(long i, atomic64_t *v) : "memory"); } -static inline long atomic64_fetch_or(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) { - s64 val = atomic64_read(v); + s64 val = arch_atomic64_read(v); do { - } while (!atomic64_try_cmpxchg(v, &val, val | i)); + } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); return val; } -static inline void atomic64_xor(long i, atomic64_t *v) +static inline void arch_atomic64_xor(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "xorq %1,%0" : "+m" (v->counter) @@ -269,12 +233,12 @@ static inline void atomic64_xor(long i, atomic64_t *v) : "memory"); } -static inline long atomic64_fetch_xor(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v) { - s64 val = atomic64_read(v); + s64 val = arch_atomic64_read(v); do { - } while (!atomic64_try_cmpxchg(v, &val, val ^ i)); + } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); return val; } diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 042b5e892ed1..14de0432d288 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -38,7 +38,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, { unsigned long mask; - asm ("cmp %1,%2; sbb %0,%0;" + asm volatile ("cmp %1,%2; sbb %0,%0;" :"=r" (mask) :"g"(size),"r" (index) :"cc"); diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 9f645ba57dbb..124f9195eb3e 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -217,8 +217,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), - *addr, "Ir", nr, "%0", c); + return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr); } /** @@ -264,8 +263,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * */ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), - *addr, "Ir", nr, "%0", c); + return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr); } /** @@ -318,8 +316,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon */ static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), - *addr, "Ir", nr, "%0", c); + return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 6804d6642767..5090035e6d16 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -4,6 +4,8 @@ #include <linux/stringify.h> +#ifndef __ASSEMBLY__ + /* * Despite that some emulators terminate on UD2, we use it for WARN(). * @@ -20,53 +22,15 @@ #define LEN_UD2 2 -#ifdef CONFIG_GENERIC_BUG - -#ifdef CONFIG_X86_32 -# define __BUG_REL(val) ".long " __stringify(val) -#else -# define __BUG_REL(val) ".long " __stringify(val) " - 2b" -#endif - -#ifdef CONFIG_DEBUG_BUGVERBOSE - -#define _BUG_FLAGS(ins, flags) \ -do { \ - asm volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"aw\"\n" \ - "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ - "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \ - "\t.word %c1" "\t# bug_entry::line\n" \ - "\t.word %c2" "\t# bug_entry::flags\n" \ - "\t.org 2b+%c3\n" \ - ".popsection" \ - : : "i" (__FILE__), "i" (__LINE__), \ - "i" (flags), \ - "i" (sizeof(struct bug_entry))); \ -} while (0) - -#else /* !CONFIG_DEBUG_BUGVERBOSE */ - #define _BUG_FLAGS(ins, flags) \ do { \ - asm volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"aw\"\n" \ - "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ - "\t.word %c0" "\t# bug_entry::flags\n" \ - "\t.org 2b+%c1\n" \ - ".popsection" \ - : : "i" (flags), \ + asm volatile("ASM_BUG ins=\"" ins "\" file=%c0 line=%c1 " \ + "flags=%c2 size=%c3" \ + : : "i" (__FILE__), "i" (__LINE__), \ + "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) -#endif /* CONFIG_DEBUG_BUGVERBOSE */ - -#else - -#define _BUG_FLAGS(ins, flags) asm volatile(ins) - -#endif /* CONFIG_GENERIC_BUG */ - #define HAVE_ARCH_BUG #define BUG() \ do { \ @@ -82,4 +46,54 @@ do { \ #include <asm-generic/bug.h> +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_GENERIC_BUG + +#ifdef CONFIG_X86_32 +.macro __BUG_REL val:req + .long \val +.endm +#else +.macro __BUG_REL val:req + .long \val - 2b +.endm +#endif + +#ifdef CONFIG_DEBUG_BUGVERBOSE + +.macro ASM_BUG ins:req file:req line:req flags:req size:req +1: \ins + .pushsection __bug_table,"aw" +2: __BUG_REL val=1b # bug_entry::bug_addr + __BUG_REL val=\file # bug_entry::file + .word \line # bug_entry::line + .word \flags # bug_entry::flags + .org 2b+\size + .popsection +.endm + +#else /* !CONFIG_DEBUG_BUGVERBOSE */ + +.macro ASM_BUG ins:req file:req line:req flags:req size:req +1: \ins + .pushsection __bug_table,"aw" +2: __BUG_REL val=1b # bug_entry::bug_addr + .word \flags # bug_entry::flags + .org 2b+\size + .popsection +.endm + +#endif /* CONFIG_DEBUG_BUGVERBOSE */ + +#else /* CONFIG_GENERIC_BUG */ + +.macro ASM_BUG ins:req file:req line:req flags:req size:req + \ins +.endm + +#endif /* CONFIG_GENERIC_BUG */ + +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h new file mode 100644 index 000000000000..86b63c7feab7 --- /dev/null +++ b/arch/x86/include/asm/cacheinfo.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CACHEINFO_H +#define _ASM_X86_CACHEINFO_H + +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); + +#endif /* _ASM_X86_CACHEINFO_H */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 56bd436ed01b..bfb85e5844ab 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -75,7 +75,7 @@ extern void __add_wrong_size(void) * use "asm volatile" and "memory" clobbers to prevent gcc from moving * information around. */ -#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") +#define arch_xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") /* * Atomic compare and exchange. Compare OLD with MEM, if identical, @@ -145,13 +145,13 @@ extern void __add_wrong_size(void) # include <asm/cmpxchg_64.h> #endif -#define cmpxchg(ptr, old, new) \ +#define arch_cmpxchg(ptr, old, new) \ __cmpxchg(ptr, old, new, sizeof(*(ptr))) -#define sync_cmpxchg(ptr, old, new) \ +#define arch_sync_cmpxchg(ptr, old, new) \ __sync_cmpxchg(ptr, old, new, sizeof(*(ptr))) -#define cmpxchg_local(ptr, old, new) \ +#define arch_cmpxchg_local(ptr, old, new) \ __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) @@ -221,7 +221,7 @@ extern void __add_wrong_size(void) #define __try_cmpxchg(ptr, pold, new, size) \ __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) -#define try_cmpxchg(ptr, pold, new) \ +#define try_cmpxchg(ptr, pold, new) \ __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) /* @@ -242,18 +242,20 @@ extern void __add_wrong_size(void) BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ - asm volatile(pfx "cmpxchg%c4b %2; sete %0" \ - : "=a" (__ret), "+d" (__old2), \ - "+m" (*(p1)), "+m" (*(p2)) \ - : "i" (2 * sizeof(long)), "a" (__old1), \ + asm volatile(pfx "cmpxchg%c5b %1" \ + CC_SET(e) \ + : CC_OUT(e) (__ret), \ + "+m" (*(p1)), "+m" (*(p2)), \ + "+a" (__old1), "+d" (__old2) \ + : "i" (2 * sizeof(long)), \ "b" (__new1), "c" (__new2)); \ __ret; \ }) -#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ +#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) -#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ +#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ __cmpxchg_double(, p1, p2, o1, o2, n1, n2) #endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 1732704f0445..1a2eafca7038 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -36,10 +36,10 @@ static inline void set_64bit(volatile u64 *ptr, u64 value) } #ifdef CONFIG_X86_CMPXCHG64 -#define cmpxchg64(ptr, o, n) \ +#define arch_cmpxchg64(ptr, o, n) \ ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ (unsigned long long)(n))) -#define cmpxchg64_local(ptr, o, n) \ +#define arch_cmpxchg64_local(ptr, o, n) \ ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ (unsigned long long)(n))) #endif @@ -76,7 +76,7 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) * to simulate the cmpxchg8b on the 80386 and 80486 CPU. */ -#define cmpxchg64(ptr, o, n) \ +#define arch_cmpxchg64(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __old = (o); \ @@ -93,7 +93,7 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) __ret; }) -#define cmpxchg64_local(ptr, o, n) \ +#define arch_cmpxchg64_local(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __old = (o); \ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 03cad196a301..072e5459fe2f 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -7,16 +7,16 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) *ptr = val; } -#define cmpxchg64(ptr, o, n) \ +#define arch_cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg((ptr), (o), (n)); \ + arch_cmpxchg((ptr), (o), (n)); \ }) -#define cmpxchg64_local(ptr, o, n) \ +#define arch_cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg_local((ptr), (o), (n)); \ + arch_cmpxchg_local((ptr), (o), (n)); \ }) #define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index e1c8dab86670..22c4dfe65992 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -12,49 +12,23 @@ #include <asm/user32.h> #include <asm/unistd.h> +#include <asm-generic/compat.h> + #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "i686\0\0" -typedef u32 compat_size_t; -typedef s32 compat_ssize_t; -typedef s32 compat_time_t; -typedef s32 compat_clock_t; -typedef s32 compat_pid_t; typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; typedef u32 __compat_uid32_t; typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; -typedef u32 compat_ino_t; typedef u16 compat_dev_t; -typedef s32 compat_off_t; -typedef s64 compat_loff_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef s32 compat_daddr_t; typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; -typedef s32 compat_timer_t; -typedef s32 compat_key_t; - -typedef s32 compat_int_t; -typedef s32 compat_long_t; typedef s64 __attribute__((aligned(4))) compat_s64; -typedef u32 compat_uint_t; -typedef u32 compat_ulong_t; -typedef u32 compat_u32; typedef u64 __attribute__((aligned(4))) compat_u64; -typedef u32 compat_uptr_t; - -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; struct compat_stat { compat_dev_t st_dev; @@ -145,10 +119,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - compat_time_t sem_otime; - compat_ulong_t __unused1; - compat_time_t sem_ctime; - compat_ulong_t __unused2; + compat_ulong_t sem_otime; + compat_ulong_t sem_otime_high; + compat_ulong_t sem_ctime; + compat_ulong_t sem_ctime_high; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; @@ -156,12 +130,12 @@ struct compat_semid64_ds { struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; - compat_time_t msg_stime; - compat_ulong_t __unused1; - compat_time_t msg_rtime; - compat_ulong_t __unused2; - compat_time_t msg_ctime; - compat_ulong_t __unused3; + compat_ulong_t msg_stime; + compat_ulong_t msg_stime_high; + compat_ulong_t msg_rtime; + compat_ulong_t msg_rtime_high; + compat_ulong_t msg_ctime; + compat_ulong_t msg_ctime_high; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; @@ -174,12 +148,12 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; compat_size_t shm_segsz; - compat_time_t shm_atime; - compat_ulong_t __unused1; - compat_time_t shm_dtime; - compat_ulong_t __unused2; - compat_time_t shm_ctime; - compat_ulong_t __unused3; + compat_ulong_t shm_atime; + compat_ulong_t shm_atime_high; + compat_ulong_t shm_dtime; + compat_ulong_t shm_dtime_high; + compat_ulong_t shm_ctime; + compat_ulong_t shm_ctime_high; compat_pid_t shm_cpid; compat_pid_t shm_lpid; compat_ulong_t shm_nattch; @@ -243,14 +217,21 @@ static inline bool in_x32_syscall(void) return false; } -static inline bool in_compat_syscall(void) +static inline bool in_32bit_syscall(void) { return in_ia32_syscall() || in_x32_syscall(); } + +#ifdef CONFIG_COMPAT +static inline bool in_compat_syscall(void) +{ + return in_32bit_syscall(); +} #define in_compat_syscall in_compat_syscall /* override the generic impl */ +#endif struct compat_siginfo; int __copy_siginfo_to_user32(struct compat_siginfo __user *to, - const siginfo_t *from, bool x32_ABI); + const kernel_siginfo_t *from, bool x32_ABI); #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 4a7884b8dca5..29c706415443 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -30,8 +30,6 @@ struct cpu_entry_area { */ struct tss_struct tss; - char entry_trampoline[PAGE_SIZE]; - #ifdef CONFIG_X86_64 /* * Exception stacks used for IST entries. diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 736771c9822e..7d442722ef24 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -2,10 +2,10 @@ #ifndef _ASM_X86_CPUFEATURE_H #define _ASM_X86_CPUFEATURE_H -#include <asm/processor.h> - -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +#include <asm/processor.h> #include <asm/asm.h> #include <linux/bitops.h> @@ -140,7 +140,20 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) +#if defined(__clang__) && !defined(CC_HAVE_ASM_GOTO) + +/* + * Workaround for the sake of BPF compilation which utilizes kernel + * headers, but clang does not support ASM GOTO and fails the build. + */ +#ifndef __BPF_TRACING__ +#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" +#endif + +#define static_cpu_has(bit) boot_cpu_has(bit) + +#else + /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These will statically patch the target code for additional @@ -148,37 +161,10 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline __pure bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" - "2:\n" - ".skip -(((5f-4f) - (2b-1b)) > 0) * " - "((5f-4f) - (2b-1b)),0x90\n" - "3:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 4f - .\n" /* repl offset */ - " .word %P[always]\n" /* always replace */ - " .byte 3b - 1b\n" /* src len */ - " .byte 5f - 4f\n" /* repl len */ - " .byte 3b - 2b\n" /* pad len */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "4: jmp %l[t_no]\n" - "5:\n" - ".previous\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 0\n" /* no replacement */ - " .word %P[feature]\n" /* feature bit */ - " .byte 3b - 1b\n" /* src len */ - " .byte 0\n" /* repl len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .altinstr_aux,\"ax\"\n" - "6:\n" - " testb %[bitnum],%[cap_byte]\n" - " jnz %l[t_yes]\n" - " jmp %l[t_no]\n" - ".previous\n" + asm_volatile_goto("STATIC_CPU_HAS bitnum=%[bitnum] " + "cap_byte=\"%[cap_byte]\" " + "feature=%P[feature] t_yes=%l[t_yes] " + "t_no=%l[t_no] always=%P[always]" : : [feature] "i" (bit), [always] "i" (X86_FEATURE_ALWAYS), [bitnum] "i" (1 << (bit & 7)), @@ -196,12 +182,6 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) -#else -/* - * Fall back to dynamic for gcc versions which don't support asm goto. Should be - * a minority now anyway. - */ -#define static_cpu_has(bit) boot_cpu_has(bit) #endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) @@ -219,5 +199,44 @@ t_no: #define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ boot_cpu_data.x86_model -#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ +#else /* __ASSEMBLY__ */ + +.macro STATIC_CPU_HAS bitnum:req cap_byte:req feature:req t_yes:req t_no:req always:req +1: + jmp 6f +2: + .skip -(((5f-4f) - (2b-1b)) > 0) * ((5f-4f) - (2b-1b)),0x90 +3: + .section .altinstructions,"a" + .long 1b - . /* src offset */ + .long 4f - . /* repl offset */ + .word \always /* always replace */ + .byte 3b - 1b /* src len */ + .byte 5f - 4f /* repl len */ + .byte 3b - 2b /* pad len */ + .previous + .section .altinstr_replacement,"ax" +4: + jmp \t_no +5: + .previous + .section .altinstructions,"a" + .long 1b - . /* src offset */ + .long 0 /* no replacement */ + .word \feature /* feature bit */ + .byte 3b - 1b /* src len */ + .byte 0 /* repl len */ + .byte 0 /* pad len */ + .previous + .section .altinstr_aux,"ax" +6: + testb \bitnum,\cap_byte + jnz \t_yes + jmp \t_no + .previous +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ #endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d554c11e01ff..28c4a502b419 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,6 @@ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ @@ -207,13 +206,21 @@ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ - +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -224,7 +231,7 @@ #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ - +#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -274,9 +281,12 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -320,6 +330,9 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ @@ -332,7 +345,9 @@ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* * BUG word(s) @@ -362,5 +377,7 @@ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 10f8d590bcfe..a5d86fc0593f 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -2,8 +2,9 @@ #ifndef ASM_X86_CAMELLIA_H #define ASM_X86_CAMELLIA_H -#include <linux/kernel.h> +#include <crypto/b128ops.h> #include <linux/crypto.h> +#include <linux/kernel.h> #define CAMELLIA_MIN_KEY_SIZE 16 #define CAMELLIA_MAX_KEY_SIZE 32 @@ -11,16 +12,13 @@ #define CAMELLIA_TABLE_BYTE_LEN 272 #define CAMELLIA_PARALLEL_BLOCKS 2 +struct crypto_skcipher; + struct camellia_ctx { u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; u32 key_length; }; -struct camellia_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct camellia_ctx camellia_ctx; -}; - struct camellia_xts_ctx { struct camellia_ctx tweak_ctx; struct camellia_ctx crypt_ctx; @@ -30,11 +28,7 @@ extern int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len, u32 *flags); -extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); -extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm); - -extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, +extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); /* regular block cipher functions */ diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 553a03de55c3..d1818634ae7e 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -45,7 +45,7 @@ struct common_glue_ctx { }; static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, - struct blkcipher_desc *desc, + struct skcipher_walk *walk, bool fpu_enabled, unsigned int nbytes) { if (likely(fpu_blocks_limit < 0)) @@ -61,33 +61,6 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, if (nbytes < bsize * (unsigned int)fpu_blocks_limit) return false; - if (desc) { - /* prevent sleeping if FPU is in use */ - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - } - - kernel_fpu_begin(); - return true; -} - -static inline bool glue_skwalk_fpu_begin(unsigned int bsize, - int fpu_blocks_limit, - struct skcipher_walk *walk, - bool fpu_enabled, unsigned int nbytes) -{ - if (likely(fpu_blocks_limit < 0)) - return false; - - if (fpu_enabled) - return true; - - /* - * Vector-registers are only used when chunk to be processed is large - * enough, so do not enable FPU until it is necessary. - */ - if (nbytes < bsize * (unsigned int)fpu_blocks_limit) - return false; - /* prevent sleeping if FPU is in use */ skcipher_walk_atomise(walk); @@ -126,41 +99,17 @@ static inline void le128_inc(le128 *i) i->b = cpu_to_le64(b); } -extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); - -extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - -extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - -extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); - -extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); - -extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); +extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); + +extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, + struct skcipher_request *req); + +extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); + +extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index c958b7bd0fcb..db7c9cc32234 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -2,15 +2,13 @@ #ifndef ASM_X86_SERPENT_AVX_H #define ASM_X86_SERPENT_AVX_H -#include <linux/crypto.h> +#include <crypto/b128ops.h> #include <crypto/serpent.h> +#include <linux/types.h> -#define SERPENT_PARALLEL_BLOCKS 8 +struct crypto_skcipher; -struct serpent_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct serpent_ctx serpent_ctx; -}; +#define SERPENT_PARALLEL_BLOCKS 8 struct serpent_xts_ctx { struct serpent_ctx tweak_ctx; @@ -38,12 +36,7 @@ extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - -extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm); - -extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, +extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); #endif diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 65bb80adba3e..f618bf272b90 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -4,19 +4,8 @@ #include <linux/crypto.h> #include <crypto/twofish.h> -#include <crypto/lrw.h> #include <crypto/b128ops.h> -struct twofish_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct twofish_ctx twofish_ctx; -}; - -struct twofish_xts_ctx { - struct twofish_ctx tweak_ctx; - struct twofish_ctx crypt_ctx; -}; - /* regular block cipher functions from twofish_x86_64 module */ asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, const u8 *src); @@ -36,12 +25,4 @@ extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - -extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); - -extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - #endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 4505ac2735ad..9e5ca30738e5 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -8,7 +8,7 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7); -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL /* * These special macros can be used to get or set a debugging register */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 13c5ee878a47..68a99d2a5f33 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -108,7 +108,7 @@ static inline int desc_empty(const void *ptr) return !(desc[0] | desc[1]); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define load_TR_desc() native_load_tr_desc() @@ -134,7 +134,7 @@ static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { } -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 5e12c63b47aa..a8f6c809d9b1 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -6,6 +6,9 @@ struct dev_archdata { #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif +#ifdef CONFIG_STA2X11 + bool is_sta2x11; +#endif }; #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h index 1295bc622ebe..1a19251eaac9 100644 --- a/arch/x86/include/asm/dma-direct.h +++ b/arch/x86/include/asm/dma-direct.h @@ -2,29 +2,8 @@ #ifndef ASM_X86_DMA_DIRECT_H #define ASM_X86_DMA_DIRECT_H 1 -#include <linux/mem_encrypt.h> - -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); -#else -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(paddr); -} +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(daddr); -} -#endif /* CONFIG_X86_DMA_REMAP */ #endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6277c83c0eb1..ce4d176b3d13 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -30,43 +30,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -int arch_dma_supported(struct device *dev, u64 mask); -#define arch_dma_supported arch_dma_supported - -bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); +bool arch_dma_alloc_attrs(struct device **dev); #define arch_dma_alloc_attrs arch_dma_alloc_attrs -extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs); - -extern void dma_generic_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - -static inline unsigned long dma_alloc_coherent_mask(struct device *dev, - gfp_t gfp) -{ - unsigned long dma_mask = 0; - - dma_mask = dev->coherent_dma_mask; - if (!dma_mask) - dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32); - - return dma_mask; -} - -static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) -{ - unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); - - if (dma_mask <= DMA_BIT_MASK(24)) - gfp |= GFP_DMA; -#ifdef CONFIG_X86_64 - if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; -#endif - return gfp; -} - #endif diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index 0ab2ab27ad1f..b825cb201251 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h @@ -4,8 +4,8 @@ #include <linux/compiler.h> #include <linux/init.h> +#include <linux/io.h> -#include <asm/io.h> #include <asm/setup.h> static __always_inline __init void *dmi_alloc(unsigned len) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a399c1ebf6f0..eea40d52ca78 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -7,6 +7,7 @@ #include <asm/processor-flags.h> #include <asm/tlb.h> #include <asm/nospec-branch.h> +#include <asm/mmu_context.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -69,14 +70,13 @@ extern asmlinkage u64 efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) /* - * Scratch space used for switching the pagetable in the EFI stub + * struct efi_scratch - Scratch space used while switching to/from efi_mm + * @phys_stack: stack used during EFI Mixed Mode + * @prev_mm: store/restore stolen mm_struct while switching to/from efi_mm */ struct efi_scratch { - u64 r15; - u64 prev_cr3; - pgd_t *efi_pgt; - bool use_pgd; - u64 phys_stack; + u64 phys_stack; + struct mm_struct *prev_mm; } __packed; #define arch_efi_call_virt_setup() \ @@ -86,11 +86,8 @@ struct efi_scratch { __kernel_fpu_begin(); \ firmware_restrict_branch_speculation_start(); \ \ - if (efi_scratch.use_pgd) { \ - efi_scratch.prev_cr3 = __read_cr3(); \ - write_cr3((unsigned long)efi_scratch.efi_pgt); \ - __flush_tlb_all(); \ - } \ + if (!efi_enabled(EFI_OLD_MEMMAP)) \ + efi_switch_mm(&efi_mm); \ }) #define arch_efi_call_virt(p, f, args...) \ @@ -98,10 +95,8 @@ struct efi_scratch { #define arch_efi_call_virt_teardown() \ ({ \ - if (efi_scratch.use_pgd) { \ - write_cr3(efi_scratch.prev_cr3); \ - __flush_tlb_all(); \ - } \ + if (!efi_enabled(EFI_OLD_MEMMAP)) \ + efi_switch_mm(efi_scratch.prev_mm); \ \ firmware_restrict_branch_speculation_end(); \ __kernel_fpu_end(); \ @@ -144,6 +139,8 @@ extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); +extern void efi_switch_mm(struct mm_struct *mm); +extern void efi_recover_from_page_fault(unsigned long phys_addr); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 0d157d2a1e2a..69c0f892e310 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -10,6 +10,7 @@ #include <asm/ptrace.h> #include <asm/user.h> #include <asm/auxvec.h> +#include <asm/fsgsbase.h> typedef unsigned long elf_greg_t; @@ -62,8 +63,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t; #define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ #define R_X86_64_8 14 /* Direct 8 bit sign extended */ #define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ - -#define R_X86_64_NUM 16 +#define R_X86_64_PC64 24 /* Place relative 64-bit signed */ /* * These are used to set parameters in the core dumps. @@ -205,7 +205,6 @@ void set_personality_ia32(bool); #define ELF_CORE_COPY_REGS(pr_reg, regs) \ do { \ - unsigned long base; \ unsigned v; \ (pr_reg)[0] = (regs)->r15; \ (pr_reg)[1] = (regs)->r14; \ @@ -228,8 +227,8 @@ do { \ (pr_reg)[18] = (regs)->flags; \ (pr_reg)[19] = (regs)->sp; \ (pr_reg)[20] = (regs)->ss; \ - rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base; \ - rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base; \ + (pr_reg)[21] = x86_fsbase_read_cpu(); \ + (pr_reg)[22] = x86_gsbase_read_cpu_inactive(); \ asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h deleted file mode 100644 index 2a51d66689c5..000000000000 --- a/arch/x86/include/asm/export.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_64BIT -#define KSYM_ALIGN 16 -#endif -#include <asm-generic/export.h> diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index f9c3a5d502f4..d8c2198d543b 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -29,7 +29,8 @@ struct pt_regs; (b)->handler = (tmp).handler - (delta); \ } while (0) -extern int fixup_exception(struct pt_regs *regs, int trapnr); +extern int fixup_exception(struct pt_regs *regs, int trapnr, + unsigned long error_code, unsigned long fault_addr); extern int fixup_bug(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); extern void early_fixup_exception(struct pt_regs *regs, int trapnr); diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e203169931c7..50ba74a34a37 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -14,6 +14,16 @@ #ifndef _ASM_X86_FIXMAP_H #define _ASM_X86_FIXMAP_H +/* + * Exposed to assembly code for setting up initial page tables. Cannot be + * calculated in assembly code (fixmap entries are an enum), but is sanity + * checked in the actual fixmap C code to make sure that the fixmap is + * covered fully. + */ +#define FIXMAP_PMD_NUM 2 +/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ +#define FIXMAP_PMD_TOP 507 + #ifndef __ASSEMBLY__ #include <linux/kernel.h> #include <asm/acpi.h> @@ -152,7 +162,7 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a38bf5a1e37a..5f7290e6e954 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) "3: movl $-2,%[err]\n\t" \ "jmp 2b\n\t" \ ".popsection\n\t" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UA(1b, 3b) \ : [err] "=r" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") @@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h new file mode 100644 index 000000000000..eb377b6e9eed --- /dev/null +++ b/arch/x86/include/asm/fsgsbase.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_FSGSBASE_H +#define _ASM_FSGSBASE_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_64 + +#include <asm/msr-index.h> + +/* + * Read/write a task's FSBASE or GSBASE. This returns the value that + * the FS/GS base would have (if the task were to be resumed). These + * work on the current task or on a non-running (typically stopped + * ptrace child) task. + */ +extern unsigned long x86_fsbase_read_task(struct task_struct *task); +extern unsigned long x86_gsbase_read_task(struct task_struct *task); +extern int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase); +extern int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase); + +/* Helper functions for reading/writing FS/GS base */ + +static inline unsigned long x86_fsbase_read_cpu(void) +{ + unsigned long fsbase; + + rdmsrl(MSR_FS_BASE, fsbase); + + return fsbase; +} + +static inline unsigned long x86_gsbase_read_cpu_inactive(void) +{ + unsigned long gsbase; + + rdmsrl(MSR_KERNEL_GS_BASE, gsbase); + + return gsbase; +} + +extern void x86_fsbase_write_cpu(unsigned long fsbase); +extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase); + +#endif /* CONFIG_X86_64 */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_FSGSBASE_H */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 09ad88572746..cf350639e76d 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -46,10 +46,24 @@ int ftrace_int3_handler(struct pt_regs *regs); #endif /* CONFIG_FUNCTION_TRACER */ -#if !defined(__ASSEMBLY__) && !defined(COMPILE_OFFSETS) +#ifndef __ASSEMBLY__ + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + /* + * Compare the symbol name with the system call name. Skip the + * "__x64_sys", "__ia32_sys" or simple "sys" prefix. + */ + return !strcmp(sym + 3, name + 3) || + (!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) || + (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)); +} + +#ifndef COMPILE_OFFSETS #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION) -#include <asm/compat.h> +#include <linux/compat.h> /* * Because ia32 syscalls do not map to x86_64 syscall numbers @@ -62,11 +76,10 @@ int ftrace_int3_handler(struct pt_regs *regs); #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1 static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) { - if (in_compat_syscall()) - return true; - return false; + return in_32bit_syscall(); } #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */ -#endif /* !__ASSEMBLY__ && !COMPILE_OFFSETS */ +#endif /* !COMPILE_OFFSETS */ +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index de4d68852d3a..13c83fe97988 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -20,7 +20,7 @@ "3:\tmov\t%3, %1\n" \ "\tjmp\t2b\n" \ "\t.previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UA(1b, 3b) \ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ : "i" (-EFAULT), "0" (oparg), "1" (0)) @@ -36,8 +36,8 @@ "4:\tmov\t%5, %1\n" \ "\tjmp\t3b\n" \ "\t.previous\n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UA(1b, 4b) \ + _ASM_EXTABLE_UA(2b, 4b) \ : "=&a" (oldval), "=&r" (ret), \ "+m" (*uaddr), "=&r" (tem) \ : "r" (oparg), "i" (-EFAULT), "1" (0)) diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 7c341a74ec8c..d9069bb26c7f 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -3,10 +3,12 @@ #define _ASM_X86_HARDIRQ_H #include <linux/threads.h> -#include <linux/irq.h> typedef struct { - unsigned int __softirq_pending; + u16 __softirq_pending; +#if IS_ENABLED(CONFIG_KVM_INTEL) + u8 kvm_cpu_l1tf_flush_l1d; +#endif unsigned int __nmi_count; /* arch dependent */ #ifdef CONFIG_X86_LOCAL_APIC unsigned int apic_timer_irqs; /* arch dependent */ @@ -40,6 +42,7 @@ typedef struct { #endif #if IS_ENABLED(CONFIG_HYPERV) unsigned int irq_hv_reenlightenment_count; + unsigned int hyperv_stimer0_count; #endif } ____cacheline_aligned irq_cpustat_t; @@ -49,14 +52,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) -#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING - -#define set_softirq_pending(x) \ - this_cpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x)) - extern void ack_bad_irq(unsigned int irq); extern u64 arch_irq_stat_cpu(unsigned int cpu); @@ -65,4 +60,24 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu); extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat + +#if IS_ENABLED(CONFIG_KVM_INTEL) +static inline void kvm_set_cpu_l1tf_flush_l1d(void) +{ + __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1); +} + +static inline void kvm_clear_cpu_l1tf_flush_l1d(void) +{ + __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0); +} + +static inline bool kvm_get_cpu_l1tf_flush_l1d(void) +{ + return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d); +} +#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */ +static inline void kvm_set_cpu_l1tf_flush_l1d(void) { } +#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */ + #endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 5ed826da5e07..7469d321f072 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -13,75 +13,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, return 0; } -/* - * If the arch doesn't supply something else, assume that hugepage - * size aligned regions are ok without further preparation. - */ -static inline int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len) -{ - struct hstate *h = hstate_file(file); - if (len & ~huge_page_mask(h)) - return -EINVAL; - if (addr & ~huge_page_mask(h)) - return -EINVAL; - return 0; -} - -static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, - unsigned long ceiling) -{ - free_pgd_range(tlb, addr, end, floor, ceiling); -} - -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - set_pte_at(mm, addr, ptep, pte); -} - -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - return ptep_get_and_clear(mm, addr, ptep); -} - -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - ptep_clear_flush(vma, addr, ptep); -} - -static inline int huge_pte_none(pte_t pte) -{ - return pte_none(pte); -} - -static inline pte_t huge_pte_wrprotect(pte_t pte) -{ - return pte_wrprotect(pte); -} - -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - ptep_set_wrprotect(mm, addr, ptep); -} - -static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) -{ - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} - -static inline pte_t huge_ptep_get(pte_t *ptep) -{ - return *ptep; -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index f59c39835a5a..a1f0e90d0818 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -49,11 +49,14 @@ static inline int hw_breakpoint_slots(int type) return HBP_NUM; } +struct perf_event_attr; struct perf_event; struct pmu; -extern int arch_check_bp_in_kernelspace(struct perf_event *bp); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); +extern int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/asm/hyperv-tlfs.h index 099414345865..4139f7650fe5 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -1,6 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_X86_HYPERV_H -#define _ASM_X86_HYPERV_H + +/* + * This file contains definitions from Hyper-V Hypervisor Top-Level Functional + * Specification (TLFS): + * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs + */ + +#ifndef _ASM_X86_HYPERV_TLFS_H +#define _ASM_X86_HYPERV_TLFS_H #include <linux/types.h> @@ -14,6 +21,7 @@ #define HYPERV_CPUID_FEATURES 0x40000003 #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 #define HYPERV_CPUID_MIN 0x40000005 @@ -27,9 +35,11 @@ /* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ #define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0) /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ -#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) +#define HV_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) /* Partition reference TSC MSR is available */ -#define HV_X64_MSR_REFERENCE_TSC_AVAILABLE (1 << 9) +#define HV_MSR_REFERENCE_TSC_AVAILABLE (1 << 9) +/* Partition Guest IDLE MSR is available */ +#define HV_X64_MSR_GUEST_IDLE_AVAILABLE (1 << 10) /* A partition's reference time stamp counter (TSC) page */ #define HV_X64_MSR_REFERENCE_TSC 0x40000021 @@ -52,7 +62,7 @@ * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through * HV_X64_MSR_STIMER3_COUNT) available */ -#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) +#define HV_MSR_SYNTIMER_AVAILABLE (1 << 3) /* * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) * are available @@ -77,6 +87,9 @@ /* Crash MSR available */ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10) +/* stimer Direct Mode is available */ +#define HV_STIMER_DIRECT_MODE_AVAILABLE (1 << 19) + /* * Feature identification: EBX indicates which flags were specified at * partition creation. The format is the same as the partition creation @@ -149,17 +162,26 @@ #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) /* - * Virtual APIC support + * Recommend not using Auto End-Of-Interrupt feature + */ +#define HV_DEPRECATING_AEOI_RECOMMENDED (1 << 9) + +/* + * Recommend using cluster IPI hypercalls. */ -#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9) +#define HV_X64_CLUSTER_IPI_RECOMMENDED (1 << 10) /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) +/* Recommend using enlightened VMCS */ +#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14) + /* - * Crash notification flag. + * Crash notification flags. */ -#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) +#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62) +#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63) /* MSR used to identify the guest OS. */ #define HV_X64_MSR_GUEST_OS_ID 0x40000000 @@ -189,7 +211,7 @@ #define HV_X64_MSR_EOI 0x40000070 #define HV_X64_MSR_ICR 0x40000071 #define HV_X64_MSR_TPR 0x40000072 -#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 /* Define synthetic interrupt controller model specific registers. */ #define HV_X64_MSR_SCONTROL 0x40000080 @@ -226,6 +248,9 @@ #define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 #define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +/* Hyper-V guest idle MSR */ +#define HV_X64_MSR_GUEST_IDLE 0x400000F0 + /* Hyper-V guest crash notification MSR's */ #define HV_X64_MSR_CRASH_P0 0x40000100 #define HV_X64_MSR_CRASH_P1 0x40000101 @@ -237,9 +262,62 @@ #define HV_X64_MSR_CRASH_PARAMS \ (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor. + */ +union hv_x64_msr_hypercall_contents { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:11; + u64 guest_physical_address:52; + }; +}; + +/* + * TSC page layout. + */ +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; + u64 reserved2[509]; +}; + +/* + * The guest OS needs to register the guest ID with the hypervisor. + * The guest ID is a 64 bit entity and the structure of this ID is + * specified in the Hyper-V specification: + * + * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx + * + * While the current guideline does not specify how Linux guest ID(s) + * need to be generated, our plan is to publish the guidelines for + * Linux and other guest operating systems that currently are hosted + * on Hyper-V. The implementation here conforms to this yet + * unpublished guidelines. + * + * + * Bit(s) + * 63 - Indicates if the OS is Open Source or not; 1 is Open Source + * 62:56 - Os Type; Linux is 0x100 + * 55:48 - Distro specific identification + * 47:16 - Linux kernel version number + * 15:0 - Distro specific identification + * + * + */ + +#define HV_LINUX_VENDOR_ID 0x8100 + /* TSC emulation after migration */ #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +/* Nested features (CPUID 0x4000000A) EAX */ +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) +#define HV_X64_NESTED_MSR_BITMAP BIT(19) + struct hv_reenlightenment_control { __u64 vector:8; __u64 reserved1:8; @@ -266,19 +344,28 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_IPI_LOW_VECTOR 0x10 +#define HV_IPI_HIGH_VECTOR 0xff + /* Declare the various hypercall operations. */ #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_SEND_IPI 0x000b #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 +#define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af + +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) -#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 -#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) +/* Hyper-V Enlightened VMCS version mask in nested features CPUID */ +#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 @@ -294,16 +381,26 @@ struct hv_tsc_emulation_status { #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARCE_4K, + HV_GENERIC_SET_SPARSE_4K, HV_GENERIC_SET_ALL, }; +#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) +#define HV_HYPERCALL_FAST_BIT BIT(16) +#define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_REP_COMP_OFFSET 32 +#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) +#define HV_HYPERCALL_REP_START_OFFSET 48 +#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) + /* hypercall status code */ #define HV_STATUS_SUCCESS 0 #define HV_STATUS_INVALID_HYPERCALL_CODE 2 #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 #define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INVALID_PARAMETER 5 #define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_PORT_ID 17 #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 @@ -318,6 +415,8 @@ typedef struct _HV_REFERENCE_TSC_PAGE { #define HV_SYNIC_SINT_COUNT (16) /* Define the expected SynIC version. */ #define HV_SYNIC_VERSION_1 (0x1) +/* Valid SynIC vectors are 16-255. */ +#define HV_SYNIC_FIRST_VALID_VECTOR (16) #define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) #define HV_SYNIC_SIMP_ENABLE (1ULL << 0) @@ -412,10 +511,266 @@ struct hv_timer_message_payload { __u64 delivery_time; /* When the message was delivered */ }; +/* Define virtual processor assist page structure. */ +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved; + __u64 vtl_control[2]; + __u64 nested_enlightenments_control[2]; + __u32 enlighten_vmentry; + __u64 current_nested_vmcs; +}; + +struct hv_enlightened_vmcs { + u32 revision_id; + u32 abort; + + u16 host_es_selector; + u16 host_cs_selector; + u16 host_ss_selector; + u16 host_ds_selector; + u16 host_fs_selector; + u16 host_gs_selector; + u16 host_tr_selector; + + u64 host_ia32_pat; + u64 host_ia32_efer; + + u64 host_cr0; + u64 host_cr3; + u64 host_cr4; + + u64 host_ia32_sysenter_esp; + u64 host_ia32_sysenter_eip; + u64 host_rip; + u32 host_ia32_sysenter_cs; + + u32 pin_based_vm_exec_control; + u32 vm_exit_controls; + u32 secondary_vm_exec_control; + + u64 io_bitmap_a; + u64 io_bitmap_b; + u64 msr_bitmap; + + u16 guest_es_selector; + u16 guest_cs_selector; + u16 guest_ss_selector; + u16 guest_ds_selector; + u16 guest_fs_selector; + u16 guest_gs_selector; + u16 guest_ldtr_selector; + u16 guest_tr_selector; + + u32 guest_es_limit; + u32 guest_cs_limit; + u32 guest_ss_limit; + u32 guest_ds_limit; + u32 guest_fs_limit; + u32 guest_gs_limit; + u32 guest_ldtr_limit; + u32 guest_tr_limit; + u32 guest_gdtr_limit; + u32 guest_idtr_limit; + + u32 guest_es_ar_bytes; + u32 guest_cs_ar_bytes; + u32 guest_ss_ar_bytes; + u32 guest_ds_ar_bytes; + u32 guest_fs_ar_bytes; + u32 guest_gs_ar_bytes; + u32 guest_ldtr_ar_bytes; + u32 guest_tr_ar_bytes; + + u64 guest_es_base; + u64 guest_cs_base; + u64 guest_ss_base; + u64 guest_ds_base; + u64 guest_fs_base; + u64 guest_gs_base; + u64 guest_ldtr_base; + u64 guest_tr_base; + u64 guest_gdtr_base; + u64 guest_idtr_base; + + u64 padding64_1[3]; + + u64 vm_exit_msr_store_addr; + u64 vm_exit_msr_load_addr; + u64 vm_entry_msr_load_addr; + + u64 cr3_target_value0; + u64 cr3_target_value1; + u64 cr3_target_value2; + u64 cr3_target_value3; + + u32 page_fault_error_code_mask; + u32 page_fault_error_code_match; + + u32 cr3_target_count; + u32 vm_exit_msr_store_count; + u32 vm_exit_msr_load_count; + u32 vm_entry_msr_load_count; + + u64 tsc_offset; + u64 virtual_apic_page_addr; + u64 vmcs_link_pointer; + + u64 guest_ia32_debugctl; + u64 guest_ia32_pat; + u64 guest_ia32_efer; + + u64 guest_pdptr0; + u64 guest_pdptr1; + u64 guest_pdptr2; + u64 guest_pdptr3; + + u64 guest_pending_dbg_exceptions; + u64 guest_sysenter_esp; + u64 guest_sysenter_eip; + + u32 guest_activity_state; + u32 guest_sysenter_cs; + + u64 cr0_guest_host_mask; + u64 cr4_guest_host_mask; + u64 cr0_read_shadow; + u64 cr4_read_shadow; + u64 guest_cr0; + u64 guest_cr3; + u64 guest_cr4; + u64 guest_dr7; + + u64 host_fs_base; + u64 host_gs_base; + u64 host_tr_base; + u64 host_gdtr_base; + u64 host_idtr_base; + u64 host_rsp; + + u64 ept_pointer; + + u16 virtual_processor_id; + u16 padding16[3]; + + u64 padding64_2[5]; + u64 guest_physical_address; + + u32 vm_instruction_error; + u32 vm_exit_reason; + u32 vm_exit_intr_info; + u32 vm_exit_intr_error_code; + u32 idt_vectoring_info_field; + u32 idt_vectoring_error_code; + u32 vm_exit_instruction_len; + u32 vmx_instruction_info; + + u64 exit_qualification; + u64 exit_io_instruction_ecx; + u64 exit_io_instruction_esi; + u64 exit_io_instruction_edi; + u64 exit_io_instruction_eip; + + u64 guest_linear_address; + u64 guest_rsp; + u64 guest_rflags; + + u32 guest_interruptibility_info; + u32 cpu_based_vm_exec_control; + u32 exception_bitmap; + u32 vm_entry_controls; + u32 vm_entry_intr_info_field; + u32 vm_entry_exception_error_code; + u32 vm_entry_instruction_len; + u32 tpr_threshold; + + u64 guest_rip; + + u32 hv_clean_fields; + u32 hv_padding_32; + u32 hv_synthetic_controls; + struct { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 reserved:30; + } hv_enlightenments_control; + u32 hv_vp_id; + + u64 hv_vm_id; + u64 partition_assist_page; + u64 padding64_4[4]; + u64 guest_bndcfgs; + u64 padding64_5[7]; + u64 xss_exit_bitmap; + u64 padding64_6[7]; +}; + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF + #define HV_STIMER_ENABLE (1ULL << 0) #define HV_STIMER_PERIODIC (1ULL << 1) #define HV_STIMER_LAZY (1ULL << 2) #define HV_STIMER_AUTOENABLE (1ULL << 3) #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +}; + +/* HvCallSendSyntheticClusterIpi hypercall */ +struct hv_send_ipi { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + +/* HvCallSendSyntheticClusterIpiEx hypercall */ +struct hv_send_ipi_ex { + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +}; + +/* HvFlushGuestPhysicalAddressSpace hypercalls */ +struct hv_guest_mapping_flush { + u64 address_space; + u64 flags; +}; + +/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ +struct hv_tlb_flush { + u64 address_space; + u64 flags; + u64 processor_mask; + u64 gva_list[]; +}; + +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ +struct hv_tlb_flush_ex { + u64 address_space; + u64 flags; + struct hv_vpset hv_vp_set; + u64 gva_list[]; +}; + #endif diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 5cdcdbd4d892..89789e8c80f6 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -3,6 +3,7 @@ #define _ASM_X86_I8259_H #include <linux/delay.h> +#include <asm/io.h> extern unsigned int cached_irq_mask; diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index b3e32b010ab1..c2c01f84df75 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + #endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index cf090e584202..0dd6b0f4000e 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -8,9 +8,6 @@ * The "_X" parts are generally the EP and EX Xeons, or the * "Extreme" ones, like Broadwell-E. * - * Things ending in "2" are usually because we have no better - * name for them. There's no processor called "SILVERMONT2". - * * While adding a new CPUID for a new microarchitecture, add a new * group to keep logically sorted out in chronological order. Within * that group keep the CPUID for the variants sorted by model number. @@ -57,23 +54,40 @@ /* "Small Core" Processors (Atom) */ -#define INTEL_FAM6_ATOM_PINEVIEW 0x1C -#define INTEL_FAM6_ATOM_LINCROFT 0x26 -#define INTEL_FAM6_ATOM_PENWELL 0x27 -#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 -#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 -#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ -#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ -#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ -#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ -#define INTEL_FAM6_ATOM_GOLDMONT 0x5C -#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ -#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A +#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ +#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ + +#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ +#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ +#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ + +#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ +#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */ +#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ + +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ +#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ + +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ +#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ +#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ +/* Useful macros */ +#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \ +{ \ + .vendor = X86_VENDOR_INTEL, \ + .family = _family, \ + .model = _model, \ + .feature = X86_FEATURE_ANY, \ + .driver_data = (kernel_ulong_t)&_driver_data \ +} + +#define INTEL_CPU_FAM6(_model, _driver_data) \ + INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data) + #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index fe04491130ae..52f815a80539 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -80,35 +80,6 @@ enum intel_mid_cpu_type { extern enum intel_mid_cpu_type __intel_mid_cpu_chip; -/** - * struct intel_mid_ops - Interface between intel-mid & sub archs - * @arch_setup: arch_setup function to re-initialize platform - * structures (x86_init, x86_platform_init) - * - * This structure can be extended if any new interface is required - * between intel-mid & its sub arch files. - */ -struct intel_mid_ops { - void (*arch_setup)(void); -}; - -/* Helper API's for INTEL_MID_OPS_INIT */ -#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid) \ - [cpuid] = get_##cpuname##_ops - -/* Maximum number of CPU ops */ -#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *)) - -/* - * For every new cpu addition, a weak get_<cpuname>_ops() function needs be - * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h. - */ -#define INTEL_MID_OPS_INIT { \ - DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \ - DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \ - DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \ -}; - #ifdef CONFIG_X86_INTEL_MID static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) @@ -136,20 +107,6 @@ enum intel_mid_timer_options { extern enum intel_mid_timer_options intel_mid_timer_options; -/* - * Penwell uses spread spectrum clock, so the freq number is not exactly - * the same as reported by MSR based on SDM. - */ -#define FSB_FREQ_83SKU 83200 -#define FSB_FREQ_100SKU 99840 -#define FSB_FREQ_133SKU 133000 - -#define FSB_FREQ_167SKU 167000 -#define FSB_FREQ_200SKU 200000 -#define FSB_FREQ_267SKU 267000 -#define FSB_FREQ_333SKU 333000 -#define FSB_FREQ_400SKU 400000 - /* Bus Select SoC Fuse value */ #define BSEL_SOC_FUSE_MASK 0x7 /* FSB 133MHz */ diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h index 62a9f4966b42..ae26df1c2789 100644 --- a/arch/x86/include/asm/intel_ds.h +++ b/arch/x86/include/asm/intel_ds.h @@ -8,6 +8,7 @@ /* The maximal number of PEBS events: */ #define MAX_PEBS_EVENTS 8 +#define MAX_FIXED_PEBS_EVENTS 3 /* * A debug store configuration. @@ -23,7 +24,7 @@ struct debug_store { u64 pebs_index; u64 pebs_absolute_maximum; u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; + u64 pebs_event_reset[MAX_PEBS_EVENTS + MAX_FIXED_PEBS_EVENTS]; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); diff --git a/arch/x86/include/asm/intel_mid_vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h index 35555016b1be..0b44b1abe4d9 100644 --- a/arch/x86/include/asm/intel_mid_vrtc.h +++ b/arch/x86/include/asm/intel_mid_vrtc.h @@ -4,7 +4,7 @@ extern unsigned char vrtc_cmos_read(unsigned char reg); extern void vrtc_cmos_write(unsigned char val, unsigned char reg); -extern void vrtc_get_time(struct timespec *now); -extern int vrtc_set_mmss(const struct timespec *now); +extern void vrtc_get_time(struct timespec64 *now); +extern int vrtc_set_mmss(const struct timespec64 *now); #endif diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h new file mode 100644 index 000000000000..3cb002b1d0f9 --- /dev/null +++ b/arch/x86/include/asm/intel_pconfig.h @@ -0,0 +1,65 @@ +#ifndef _ASM_X86_INTEL_PCONFIG_H +#define _ASM_X86_INTEL_PCONFIG_H + +#include <asm/asm.h> +#include <asm/processor.h> + +enum pconfig_target { + INVALID_TARGET = 0, + MKTME_TARGET = 1, + PCONFIG_TARGET_NR +}; + +int pconfig_target_supported(enum pconfig_target target); + +enum pconfig_leaf { + MKTME_KEY_PROGRAM = 0, + PCONFIG_LEAF_INVALID, +}; + +#define PCONFIG ".byte 0x0f, 0x01, 0xc5" + +/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */ + +/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */ +#define MKTME_KEYID_SET_KEY_DIRECT 0 +#define MKTME_KEYID_SET_KEY_RANDOM 1 +#define MKTME_KEYID_CLEAR_KEY 2 +#define MKTME_KEYID_NO_ENCRYPT 3 + +/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */ +#define MKTME_AES_XTS_128 (1 << 8) + +/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */ +#define MKTME_PROG_SUCCESS 0 +#define MKTME_INVALID_PROG_CMD 1 +#define MKTME_ENTROPY_ERROR 2 +#define MKTME_INVALID_KEYID 3 +#define MKTME_INVALID_ENC_ALG 4 +#define MKTME_DEVICE_BUSY 5 + +/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */ +struct mktme_key_program { + u16 keyid; + u32 keyid_ctrl; + u8 __rsvd[58]; + u8 key_field_1[64]; + u8 key_field_2[64]; +} __packed __aligned(256); + +static inline int mktme_key_program(struct mktme_key_program *key_program) +{ + unsigned long rax = MKTME_KEY_PROGRAM; + + if (!pconfig_target_supported(MKTME_TARGET)) + return -ENXIO; + + asm volatile(PCONFIG + : "=a" (rax), "=b" (key_program) + : "0" (rax), "1" (key_program) + : "memory", "cc"); + + return rax; +} + +#endif /* _ASM_X86_INTEL_PCONFIG_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index f6e5b9375d8c..832da8229cc7 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -94,10 +94,10 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) #ifdef CONFIG_X86_64 -build_mmio_read(readq, "q", unsigned long, "=r", :"memory") -build_mmio_read(__readq, "q", unsigned long, "=r", ) -build_mmio_write(writeq, "q", unsigned long, "r", :"memory") -build_mmio_write(__writeq, "q", unsigned long, "r", ) +build_mmio_read(readq, "q", u64, "=r", :"memory") +build_mmio_read(__readq, "q", u64, "=r", ) +build_mmio_write(writeq, "q", u64, "r", :"memory") +build_mmio_write(__writeq, "q", u64, "r", ) #define readq_relaxed(a) __readq(a) #define writeq_relaxed(v, a) __writeq(v, a) @@ -187,11 +187,12 @@ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size) #define ioremap_nocache ioremap_nocache extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); #define ioremap_uc ioremap_uc - extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); #define ioremap_cache ioremap_cache extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); #define ioremap_prot ioremap_prot +extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size); +#define ioremap_encrypted ioremap_encrypted /** * ioremap - map bus memory into CPU space @@ -369,18 +370,6 @@ extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); extern bool is_early_ioremap_ptep(pte_t *ptep); -#ifdef CONFIG_XEN -#include <xen/xen.h> -struct bio_vec; - -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); - -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) -#endif /* CONFIG_XEN */ - #define IO_SPACE_LIMIT 0xffff #include <asm-generic/io.h> diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index a8834dd546cd..fd20a2334885 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -183,16 +183,17 @@ extern void disable_ioapic_support(void); extern void __init io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); -extern void native_disable_io_apic(void); +extern void native_restore_boot_irq_mode(void); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { - return x86_io_apic_ops.read(apic, reg); + return x86_apic_ops.io_apic_read(apic, reg); } extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); -extern void disable_IO_APIC(void); +extern void clear_IO_APIC(void); +extern void restore_boot_irq_mode(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); extern void print_IO_APICs(void); #else /* !CONFIG_X86_IO_APIC */ @@ -228,10 +229,11 @@ static inline void mp_save_irq(struct mpc_intsrc *m) { } static inline void disable_ioapic_support(void) { } static inline void io_apic_init_mappings(void) { } #define native_io_apic_read NULL -#define native_disable_io_apic NULL +#define native_restore_boot_irq_mode NULL static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } +static inline void restore_boot_irq_mode(void) { } #endif diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 1e5d5d92eb40..baedab8ac538 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,13 +2,10 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern const struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; -int x86_dma_supported(struct device *dev, u64 mask); - /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index 3de0489deade..5270ff39b9af 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -105,8 +105,10 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask); * the PMIC bus while another driver is also accessing the PMIC bus various bad * things happen. * - * To avoid these problems this function must be called before accessing the - * P-Unit or the PMIC, be it through iosf_mbi* functions or through other means. + * Call this function before sending requests to the P-Unit which may make it + * access the PMIC, be it through iosf_mbi* functions or through other means. + * This function will block all kernel access to the PMIC I2C bus, so that the + * P-Unit can safely access the PMIC over the shared I2C bus. * * Note on these systems the i2c-bus driver will request a sempahore from the * P-Unit for exclusive access to the PMIC bus when i2c drivers are accessing @@ -123,6 +125,31 @@ void iosf_mbi_punit_acquire(void); void iosf_mbi_punit_release(void); /** + * iosf_mbi_block_punit_i2c_access() - Block P-Unit accesses to the PMIC bus + * + * Call this function to block P-Unit access to the PMIC I2C bus, so that the + * kernel can safely access the PMIC over the shared I2C bus. + * + * This function acquires the P-Unit bus semaphore and notifies + * pmic_bus_access_notifier listeners that they may no longer access the + * P-Unit in a way which may cause it to access the shared I2C bus. + * + * Note this function may be called multiple times and the bus will not + * be released until iosf_mbi_unblock_punit_i2c_access() has been called the + * same amount of times. + * + * Return: Nonzero on error + */ +int iosf_mbi_block_punit_i2c_access(void); + +/* + * iosf_mbi_unblock_punit_i2c_access() - Release PMIC I2C bus block + * + * Release i2c access block gotten through iosf_mbi_block_punit_i2c_access(). + */ +void iosf_mbi_unblock_punit_i2c_access(void); + +/** * iosf_mbi_register_pmic_bus_access_notifier - Register PMIC bus notifier * * This function can be used by drivers which may need to acquire P-Unit @@ -159,14 +186,6 @@ int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( struct notifier_block *nb); /** - * iosf_mbi_call_pmic_bus_access_notifier_chain - Call PMIC bus notifier chain - * - * @val: action to pass into listener's notifier_call function - * @v: data pointer to pass into listener's notifier_call function - */ -int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v); - -/** * iosf_mbi_assert_punit_acquired - Assert that the P-Unit has been acquired. */ void iosf_mbi_assert_punit_acquired(void); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 023b4a9fc846..67ed72f31cc2 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -33,6 +33,11 @@ enum irq_remap_cap { IRQ_POSTING_CAP = 0, }; +enum { + IRQ_REMAP_XAPIC_MODE, + IRQ_REMAP_X2APIC_MODE, +}; + struct vcpu_data { u64 pi_desc_addr; /* Physical address of PI Descriptor */ u32 vector; /* Guest vector of the interrupt */ @@ -40,6 +45,8 @@ struct vcpu_data { #ifdef CONFIG_IRQ_REMAP +extern raw_spinlock_t irq_2_ir_lock; + extern bool irq_remapping_cap(enum irq_remap_cap cap); extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index e71c1120426b..548d90bbf919 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -34,11 +34,6 @@ * (0x80 is the syscall vector, 0x30-0x3f are for ISA) */ #define FIRST_EXTERNAL_VECTOR 0x20 -/* - * We start allocating at 0x21 to spread out vectors evenly between - * priority levels. (0x80 is the syscall vector) - */ -#define VECTOR_OFFSET_START 1 /* * Reserve the lowest usable vector (and hence lowest priority) 0x20 for @@ -106,9 +101,10 @@ #if IS_ENABLED(CONFIG_HYPERV) #define HYPERV_REENLIGHTENMENT_VECTOR 0xee +#define HYPERV_STIMER0_VECTOR 0xed #endif -#define LOCAL_TIMER_VECTOR 0xed +#define LOCAL_TIMER_VECTOR 0xec #define NR_VECTORS 256 @@ -118,8 +114,6 @@ #define FIRST_SYSTEM_VECTOR NR_VECTORS #endif -#define FPU_IRQ 13 - /* * Size the maximum number of interrupts. * diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 89f08955fff7..058e40fed167 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -13,7 +13,9 @@ * Interrupt control: */ -static inline unsigned long native_save_fl(void) +/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ +extern inline unsigned long native_save_fl(void); +extern inline unsigned long native_save_fl(void) { unsigned long flags; @@ -31,7 +33,8 @@ static inline unsigned long native_save_fl(void) return flags; } -static inline void native_restore_fl(unsigned long flags) +extern inline void native_restore_fl(unsigned long flags); +extern inline void native_restore_fl(unsigned long flags) { asm volatile("push %0 ; popf" : /* no output */ @@ -61,7 +64,7 @@ static inline __cpuidle void native_halt(void) #endif -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #ifndef __ASSEMBLY__ @@ -120,6 +123,10 @@ static inline notrace unsigned long arch_local_irq_save(void) #define DISABLE_INTERRUPTS(x) cli #ifdef CONFIG_X86_64 +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(x) pushfq; popq %rax +#endif + #define SWAPGS swapgs /* * Currently paravirt can't handle swapgs nicely when we @@ -132,8 +139,6 @@ static inline notrace unsigned long arch_local_irq_save(void) */ #define SWAPGS_UNSAFE_STACK swapgs -#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ - #define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ @@ -142,18 +147,12 @@ static inline notrace unsigned long arch_local_irq_save(void) swapgs; \ sysretl -#ifdef CONFIG_DEBUG_ENTRY -#define SAVE_FLAGS(x) pushfq; popq %rax -#endif #else #define INTERRUPT_RETURN iret -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit -#define GET_CR0_INTO_EAX movl %cr0, %eax #endif - #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ #ifndef __ASSEMBLY__ static inline int arch_irqs_disabled_flags(unsigned long flags) diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h index 875b54376689..a34897aef2c2 100644 --- a/arch/x86/include/asm/jailhouse_para.h +++ b/arch/x86/include/asm/jailhouse_para.h @@ -1,7 +1,7 @@ -/* SPDX-License-Identifier: GPL2.0 */ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * Jailhouse paravirt_ops implementation + * Jailhouse paravirt detection * * Copyright (c) Siemens AG, 2015-2017 * diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 8c0de4282659..a5fb34fe56a4 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -2,19 +2,6 @@ #ifndef _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H -#ifndef HAVE_JUMP_LABEL -/* - * For better or for worse, if jump labels (the gcc extension) are missing, - * then the entire static branch patching infrastructure is compiled out. - * If that happens, the code in here will malfunction. Raise a compiler - * error instead. - * - * In theory, jump labels and the static branch patching infrastructure - * could be decoupled to fix this. - */ -#error asm/jump_label.h included on a non-jump-label kernel -#endif - #define JUMP_LABEL_NOP_SIZE 5 #ifdef CONFIG_X86_64 @@ -33,14 +20,9 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:" - ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" - ".popsection \n\t" - : : "i" (key), "i" (branch) : : l_yes); - + asm_volatile_goto("STATIC_BRANCH_NOP l_yes=\"%l[l_yes]\" key=\"%c0\" " + "branch=\"%c1\"" + : : "i" (key), "i" (branch) : : l_yes); return false; l_yes: return true; @@ -48,13 +30,8 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:" - ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" - "2:\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" - ".popsection \n\t" + asm_volatile_goto("STATIC_BRANCH_JMP l_yes=\"%l[l_yes]\" key=\"%c0\" " + "branch=\"%c1\"" : : "i" (key), "i" (branch) : : l_yes); return false; @@ -62,49 +39,28 @@ l_yes: return true; } -#ifdef CONFIG_X86_64 -typedef u64 jump_label_t; -#else -typedef u32 jump_label_t; -#endif - -struct jump_entry { - jump_label_t code; - jump_label_t target; - jump_label_t key; -}; - #else /* __ASSEMBLY__ */ -.macro STATIC_JUMP_IF_TRUE target, key, def -.Lstatic_jump_\@: - .if \def - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .else - .byte STATIC_KEY_INIT_NOP - .endif +.macro STATIC_BRANCH_NOP l_yes:req key:req branch:req +.Lstatic_branch_nop_\@: + .byte STATIC_KEY_INIT_NOP +.Lstatic_branch_no_after_\@: .pushsection __jump_table, "aw" _ASM_ALIGN - _ASM_PTR .Lstatic_jump_\@, \target, \key + .long .Lstatic_branch_nop_\@ - ., \l_yes - . + _ASM_PTR \key + \branch - . .popsection .endm -.macro STATIC_JUMP_IF_FALSE target, key, def -.Lstatic_jump_\@: - .if \def - .byte STATIC_KEY_INIT_NOP - .else - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .endif +.macro STATIC_BRANCH_JMP l_yes:req key:req branch:req +.Lstatic_branch_jmp_\@: + .byte 0xe9 + .long \l_yes - .Lstatic_branch_jmp_after_\@ +.Lstatic_branch_jmp_after_\@: .pushsection __jump_table, "aw" _ASM_ALIGN - _ASM_PTR .Lstatic_jump_\@, \target, \key + 1 + .long .Lstatic_branch_jmp_\@ - ., \l_yes - . + _ASM_PTR \key + \branch - . .popsection .endm diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 460991e3b529..db7ba2feb947 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -5,10 +5,6 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY -extern unsigned long page_offset_base; -extern unsigned long vmalloc_base; -extern unsigned long vmemmap_base; - void kernel_randomize_memory(void); #else static inline void kernel_randomize_memory(void) { } diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 395c9631e000..75f1e35e7c15 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -22,10 +22,20 @@ enum die_val { DIE_NMIUNKNOWN, }; +enum show_regs_mode { + SHOW_REGS_SHORT, + /* + * For when userspace crashed, but we don't think it's our fault, and + * therefore don't print kernel registers. + */ + SHOW_REGS_USER, + SHOW_REGS_ALL +}; + extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); -extern void __show_regs(struct pt_regs *regs, int all); +extern void __show_regs(struct pt_regs *regs, enum show_regs_mode); extern void show_iret_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h index 9f07cff43705..df89ee7d3e9e 100644 --- a/arch/x86/include/asm/kexec-bzimage64.h +++ b/arch/x86/include/asm/kexec-bzimage64.h @@ -2,6 +2,6 @@ #ifndef _ASM_KEXEC_BZIMAGE64_H #define _ASM_KEXEC_BZIMAGE64_H -extern struct kexec_file_ops kexec_bzImage64_ops; +extern const struct kexec_file_ops kexec_bzImage64_ops; #endif /* _ASM_KEXE_BZIMAGE64_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index f327236f0fa7..003f2daa3b0f 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -21,6 +21,7 @@ #ifndef __ASSEMBLY__ #include <linux/string.h> +#include <linux/kernel.h> #include <asm/page.h> #include <asm/ptrace.h> @@ -67,7 +68,7 @@ struct kimage; /* Memory to backup during crash kdump */ #define KEXEC_BACKUP_SRC_START (0UL) -#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */ +#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ /* * CPU does not save ss and sp on stack if execution is already @@ -132,7 +133,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs, asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs)); asm volatile("pushfq; popq %0" :"=m"(newregs->flags)); #endif - newregs->ip = (unsigned long)current_text_addr(); + newregs->ip = _THIS_IP_; } } diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 367d99cff426..c8cec1b39b88 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -78,7 +78,7 @@ struct arch_specific_insn { * boostable = true: This instruction has been boosted: we have * added a relative jump after the instruction copy in insn, * so no single-step and fixup are needed (unless there's - * a post_handler or break_handler). + * a post_handler). */ bool boostable; bool if_modifier; @@ -111,9 +111,6 @@ struct kprobe_ctlblk { unsigned long kprobe_status; unsigned long kprobe_old_flags; unsigned long kprobe_saved_flags; - unsigned long *jprobe_saved_sp; - struct pt_regs jprobe_saved_regs; - kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; struct prev_kprobe prev_kprobe; }; diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b24b1c8b3979..93c4bf598fb0 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -107,11 +107,12 @@ struct x86_emulate_ops { * @addr: [IN ] Linear address from which to read. * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. + * @system:[IN ] Whether the access is forced to be at CPL0. */ int (*read_std)(struct x86_emulate_ctxt *ctxt, unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); + struct x86_exception *fault, bool system); /* * read_phys: Read bytes of standard (non-emulated/special) memory. @@ -129,10 +130,11 @@ struct x86_emulate_ops { * @addr: [IN ] Linear address to which to write. * @val: [OUT] Value write to memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to write to memory. + * @system:[IN ] Whether the access is forced to be at CPL0. */ int (*write_std)(struct x86_emulate_ctxt *ctxt, unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); + struct x86_exception *fault, bool system); /* * fetch: Read bytes of standard (non-emulated/special) memory. * Used for instruction fetch. @@ -362,6 +364,10 @@ struct x86_emulate_ctxt { #define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574 #define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273 +#define X86EMUL_CPUID_VENDOR_HygonGenuine_ebx 0x6f677948 +#define X86EMUL_CPUID_VENDOR_HygonGenuine_ecx 0x656e6975 +#define X86EMUL_CPUID_VENDOR_HygonGenuine_edx 0x6e65476e + #define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547 #define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e #define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69 diff --git a/arch/x86/include/asm/kvm_guest.h b/arch/x86/include/asm/kvm_guest.h deleted file mode 100644 index 46185263d9c2..000000000000 --- a/arch/x86/include/asm/kvm_guest.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_KVM_GUEST_H -#define _ASM_X86_KVM_GUEST_H - -int kvm_setup_vsyscall_timeinfo(void); - -#endif /* _ASM_X86_KVM_GUEST_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b605a5b6a30c..55e51ff7e421 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -17,6 +17,7 @@ #include <linux/tracepoint.h> #include <linux/cpumask.h> #include <linux/irq_work.h> +#include <linux/irq.h> #include <linux/kvm.h> #include <linux/kvm_para.h> @@ -34,6 +35,7 @@ #include <asm/msr-index.h> #include <asm/asm.h> #include <asm/kvm_page_track.h> +#include <asm/hyperv-tlfs.h> #define KVM_MAX_VCPUS 288 #define KVM_SOFT_MAX_VCPUS 240 @@ -53,6 +55,7 @@ #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) +#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5) #define KVM_REQ_EVENT KVM_ARCH_REQ(6) #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) @@ -73,13 +76,14 @@ #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) +#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) +#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR3_PCID_INVD BIT_64(63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ @@ -98,7 +102,15 @@ #define UNMAPPED_GVA (~(gpa_t)0) /* KVM Hugepage definitions for x86 */ -#define KVM_NR_PAGE_SIZES 3 +enum { + PT_PAGE_TABLE_LEVEL = 1, + PT_DIRECTORY_LEVEL = 2, + PT_PDPE_LEVEL = 3, + /* set max level to the biggest one */ + PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL, +}; +#define KVM_NR_PAGE_SIZES (PT_MAX_HUGEPAGE_LEVEL - \ + PT_PAGE_TABLE_LEVEL + 1) #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) @@ -173,6 +185,7 @@ enum { #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) +#define DR6_BT (1 << 15) #define DR6_RTM (1 << 16) #define DR6_FIXED_1 0xfffe0ff0 #define DR6_INIT 0xffff0ff0 @@ -243,7 +256,7 @@ struct kvm_mmu_memory_cache { * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. */ union kvm_mmu_page_role { - unsigned word; + u32 word; struct { unsigned level:4; unsigned cr4_pae:1; @@ -256,7 +269,8 @@ union kvm_mmu_page_role { unsigned smep_andnot_wp:1; unsigned smap_andnot_wp:1; unsigned ad_disabled:1; - unsigned :7; + unsigned guest_mode:1; + unsigned :6; /* * This is left at the top of the word so that @@ -268,6 +282,34 @@ union kvm_mmu_page_role { }; }; +union kvm_mmu_extended_role { +/* + * This structure complements kvm_mmu_page_role caching everything needed for + * MMU configuration. If nothing in both these structures changed, MMU + * re-configuration can be skipped. @valid bit is set on first usage so we don't + * treat all-zero structure as valid data. + */ + u32 word; + struct { + unsigned int valid:1; + unsigned int execonly:1; + unsigned int cr0_pg:1; + unsigned int cr4_pse:1; + unsigned int cr4_pke:1; + unsigned int cr4_smap:1; + unsigned int cr4_smep:1; + unsigned int cr4_la57:1; + }; +}; + +union kvm_mmu_role { + u64 as_u64; + struct { + union kvm_mmu_page_role base; + union kvm_mmu_extended_role ext; + }; +}; + struct kvm_rmap_head { unsigned long val; }; @@ -275,18 +317,18 @@ struct kvm_rmap_head { struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + bool unsync; /* * The following two entries are used to key the shadow page in the * hash table. */ - gfn_t gfn; union kvm_mmu_page_role role; + gfn_t gfn; u64 *spt; /* hold the gfn of each spte inside spt */ gfn_t *gfns; - bool unsync; int root_count; /* Currently serving as active root */ unsigned int unsync_children; struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ @@ -322,6 +364,16 @@ struct rsvd_bits_validate { u64 bad_mt_xwr; }; +struct kvm_mmu_root_info { + gpa_t cr3; + hpa_t hpa; +}; + +#define KVM_MMU_ROOT_INFO_INVALID \ + ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE }) + +#define KVM_MMU_NUM_PREV_ROOTS 3 + /* * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the @@ -341,15 +393,16 @@ struct kvm_mmu { struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); - void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); + void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa); void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); hpa_t root_hpa; - union kvm_mmu_page_role base_role; + union kvm_mmu_role mmu_role; u8 root_level; u8 shadow_root_level; u8 ept_ad; bool direct_map; + struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; /* * Bitmap; bit set = permission fault @@ -474,6 +527,7 @@ struct kvm_vcpu_hv { struct kvm_hyperv_exit exit; struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); + cpumask_t tlb_flush; }; struct kvm_vcpu_arch { @@ -498,6 +552,7 @@ struct kvm_vcpu_arch { u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool apicv_active; + bool load_eoi_exitmap_pending; DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; int32_t apic_arb_prio; @@ -516,7 +571,13 @@ struct kvm_vcpu_arch { * the paging mode of the l1 guest. This context is always used to * handle faults. */ - struct kvm_mmu mmu; + struct kvm_mmu *mmu; + + /* Non-nested MMU for L1 */ + struct kvm_mmu root_mmu; + + /* L1 MMU when running nested */ + struct kvm_mmu guest_mmu; /* * Paging state of an L2 guest (used for nested npt) @@ -567,11 +628,13 @@ struct kvm_vcpu_arch { bool has_error_code; u8 nr; u32 error_code; + unsigned long payload; + bool has_payload; u8 nested_apf; } exception; struct kvm_queued_interrupt { - bool pending; + bool injected; bool soft; u8 nr; } interrupt; @@ -708,6 +771,9 @@ struct kvm_vcpu_arch { /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; + + /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ + bool l1tf_flush_l1d; }; struct kvm_lpage_info { @@ -754,6 +820,15 @@ struct kvm_hv { u64 hv_crash_ctl; HV_REFERENCE_TSC_PAGE tsc_ref; + + struct idr conn_to_evt; + + u64 hv_reenlightenment_control; + u64 hv_tsc_emulation_control; + u64 hv_tsc_emulation_status; + + /* How many vCPUs have VP index != vCPU index */ + atomic_t num_mismatched_vp_indexes; }; enum kvm_irqchip_mode { @@ -762,15 +837,6 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; -struct kvm_sev_info { - bool active; /* SEV enabled guest */ - unsigned int asid; /* ASID used for this guest */ - unsigned int handle; /* SEV firmware handle */ - int fd; /* SEV device fd */ - unsigned long pages_locked; /* Number of pages locked */ - struct list_head regions_list; /* List of registered regions */ -}; - struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -800,13 +866,13 @@ struct kvm_arch { struct mutex apic_map_lock; struct kvm_apic_map *apic_map; - unsigned int tss_addr; bool apic_access_page_done; gpa_t wall_clock; - bool ept_identity_pagetable_done; - gpa_t ept_identity_map_addr; + bool mwait_in_guest; + bool hlt_in_guest; + bool pause_in_guest; unsigned long irq_sources_bitmap; s64 kvmclock_offset; @@ -849,17 +915,11 @@ struct kvm_arch { bool disabled_lapic_found; - /* Struct members for AVIC */ - u32 avic_vm_id; - u32 ldr_mode; - struct page *avic_logical_id_table_page; - struct page *avic_physical_id_table_page; - struct hlist_node hnode; - bool x2apic_format; bool x2apic_broadcast_quirk_disabled; - struct kvm_sev_info sev_info; + bool guest_can_read_msr_platform_info; + bool exception_payload_enabled; }; struct kvm_vm_stat { @@ -888,6 +948,7 @@ struct kvm_vcpu_stat { u64 signal_exits; u64 irq_window_exits; u64 nmi_window_exits; + u64 l1d_flush; u64 halt_exits; u64 halt_successful_poll; u64 halt_attempted_poll; @@ -933,9 +994,11 @@ struct kvm_x86_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); - bool (*cpu_has_high_real_mode_segbase)(void); + bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); + struct kvm *(*vm_alloc)(void); + void (*vm_free)(struct kvm *); int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); @@ -978,6 +1041,15 @@ struct kvm_x86_ops { void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); + int (*tlb_remote_flush)(struct kvm *kvm); + + /* + * Flush any TLB entries associated with the given GVA. + * Does not need to flush GPA->HPA mappings. + * Can potentially get non-canonical addresses through INVLPGs, which + * the implementation may choose to ignore if appropriate. + */ + void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); void (*run)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu); @@ -1001,12 +1073,14 @@ struct kvm_x86_ops { void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); + bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); - void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); int (*get_tdp_level)(struct kvm_vcpu *vcpu); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); @@ -1019,6 +1093,7 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); + u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); @@ -1032,6 +1107,7 @@ struct kvm_x86_ops { bool (*umip_emulated)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1088,6 +1164,14 @@ struct kvm_x86_ops { void (*setup_mce)(struct kvm_vcpu *vcpu); + int (*get_nested_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + unsigned user_data_size); + int (*set_nested_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state); + void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); + int (*smi_allowed)(struct kvm_vcpu *vcpu); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase); @@ -1098,6 +1182,9 @@ struct kvm_x86_ops { int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*get_msr_feature)(struct kvm_msr_entry *entry); + + int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, + uint16_t *vmcs_version); }; struct kvm_arch_async_pf { @@ -1109,12 +1196,32 @@ struct kvm_arch_async_pf { extern struct kvm_x86_ops *kvm_x86_ops; +#define __KVM_HAVE_ARCH_VM_ALLOC +static inline struct kvm *kvm_arch_alloc_vm(void) +{ + return kvm_x86_ops->vm_alloc(); +} + +static inline void kvm_arch_free_vm(struct kvm *kvm) +{ + return kvm_x86_ops->vm_free(kvm); +} + +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB +static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +{ + if (kvm_x86_ops->tlb_remote_flush && + !kvm_x86_ops->tlb_remote_flush(kvm)) + return 0; + else + return -ENOTSUPP; +} + int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -void kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, @@ -1185,17 +1292,12 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -#define EMULTYPE_RETRY (1 << 3) -#define EMULTYPE_NO_REEXECUTE (1 << 4) -int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, - int emulation_type, void *insn, int insn_len); - -static inline int emulate_instruction(struct kvm_vcpu *vcpu, - int emulation_type) -{ - return x86_emulate_instruction(vcpu, 0, - emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); -} +#define EMULTYPE_ALLOW_RETRY (1 << 3) +#define EMULTYPE_NO_UD_ON_FAIL (1 << 4) +#define EMULTYPE_VMWARE (1 << 5) +int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); +int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, + void *insn, int insn_len); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); @@ -1204,8 +1306,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); struct x86_emulate_ctxt; -int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); -int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port); +int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_vcpu_halt(struct kvm_vcpu *vcpu); @@ -1259,6 +1360,10 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state, return !!(*irq_state); } +#define KVM_MMU_ROOT_CURRENT BIT(0) +#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) +#define KVM_MMU_ROOTS_ALL (~0UL) + int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); @@ -1270,6 +1375,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + ulong roots_to_free); gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, @@ -1288,7 +1395,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); +void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush); void kvm_enable_tdp(void); void kvm_disable_tdp(void); @@ -1391,7 +1499,6 @@ asmlinkage void kvm_spurious_fault(void); ____kvm_handle_fault_on_reboot(insn, "") #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); @@ -1403,6 +1510,11 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); +int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, + unsigned long ipi_bitmap_high, u32 min, + unsigned long icr, int op_64_bit); + +u64 kvm_get_arch_capabilities(void); void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); @@ -1426,6 +1538,7 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); +void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); int kvm_is_in_guest(void); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 7b407dda2bd7..4c723632c036 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -7,7 +7,6 @@ #include <uapi/asm/kvm_para.h> extern void kvmclock_init(void); -extern int kvm_register_clock(char *txt); #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); @@ -88,6 +87,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, #ifdef CONFIG_KVM_GUEST bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); +unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); @@ -115,6 +115,11 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } +static inline unsigned int kvm_arch_para_hints(void) +{ + return 0; +} + static inline u32 kvm_read_and_reset_pf_reason(void) { return 0; diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index c91083c59845..349a47acaa4a 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -53,7 +53,7 @@ static inline void local_sub(long i, local_t *l) */ static inline bool local_sub_and_test(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e); + return GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, e, "er", i); } /** @@ -66,7 +66,7 @@ static inline bool local_sub_and_test(long i, local_t *l) */ static inline bool local_dec_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e); + return GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, e); } /** @@ -79,7 +79,7 @@ static inline bool local_dec_and_test(local_t *l) */ static inline bool local_inc_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e); + return GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, e); } /** @@ -93,7 +93,7 @@ static inline bool local_inc_and_test(local_t *l) */ static inline bool local_add_negative(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s); + return GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, s, "er", i); } /** diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index 1775a32f7ea6..97198001e567 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void) unsigned char rtc_cmos_read(unsigned char addr); void rtc_cmos_write(unsigned char val, unsigned char addr); -extern int mach_set_rtc_mmss(const struct timespec *now); -extern void mach_get_cmos_time(struct timespec *now); +extern int mach_set_rtc_mmss(const struct timespec64 *now); +extern void mach_get_cmos_time(struct timespec64 *now); #define RTC_IRQ 8 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 96ea4b5ba658..c1a812bd5a27 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -10,41 +10,44 @@ /* MCG_CAP register defines */ #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ -#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ -#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ -#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ +#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */ +#define MCG_EXT_P BIT_ULL(9) /* Extended registers available */ +#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */ #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ #define MCG_EXT_CNT_SHIFT 16 #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) -#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ -#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */ -#define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */ +#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */ +#define MCG_ELOG_P BIT_ULL(26) /* Extended error log supported */ +#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */ /* MCG_STATUS register defines */ -#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ -#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ -#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ -#define MCG_STATUS_LMCES (1ULL<<3) /* LMCE signaled */ +#define MCG_STATUS_RIPV BIT_ULL(0) /* restart ip valid */ +#define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */ +#define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */ +#define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */ /* MCG_EXT_CTL register defines */ -#define MCG_EXT_CTL_LMCE_EN (1ULL<<0) /* Enable LMCE */ +#define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */ /* MCi_STATUS register defines */ -#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ -#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ -#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ -#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ -#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ -#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ -#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ -#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ -#define MCI_STATUS_AR (1ULL<<55) /* Action required */ +#define MCI_STATUS_VAL BIT_ULL(63) /* valid error */ +#define MCI_STATUS_OVER BIT_ULL(62) /* previous errors lost */ +#define MCI_STATUS_UC BIT_ULL(61) /* uncorrected error */ +#define MCI_STATUS_EN BIT_ULL(60) /* error enabled */ +#define MCI_STATUS_MISCV BIT_ULL(59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV BIT_ULL(58) /* addr reg. valid */ +#define MCI_STATUS_PCC BIT_ULL(57) /* processor context corrupt */ +#define MCI_STATUS_S BIT_ULL(56) /* Signaled machine check */ +#define MCI_STATUS_AR BIT_ULL(55) /* Action required */ +#define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */ +#define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38) +#define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT) /* AMD-specific bits */ -#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ -#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */ -#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */ -#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ +#define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ +#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ +#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ +#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ /* * McaX field if set indicates a given bank supports MCA extensions: @@ -84,7 +87,7 @@ #define MCI_MISC_ADDR_GENERIC 7 /* generic */ /* CTL2 register defines */ -#define MCI_CTL2_CMCI_EN (1ULL << 30) +#define MCI_CTL2_CMCI_EN BIT_ULL(30) #define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL #define MCJ_CTX_MASK 3 @@ -138,58 +141,6 @@ struct mce_log_buffer { struct mce entry[MCE_LOG_LEN]; }; -struct mca_config { - bool dont_log_ce; - bool cmci_disabled; - bool lmce_disabled; - bool ignore_ce; - bool disabled; - bool ser; - bool recovery; - bool bios_cmci_threshold; - u8 banks; - s8 bootlog; - int tolerant; - int monarch_timeout; - int panic_timeout; - u32 rip_msr; -}; - -struct mce_vendor_flags { - /* - * Indicates that overflow conditions are not fatal, when set. - */ - __u64 overflow_recov : 1, - - /* - * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and - * Recovery. It indicates support for data poisoning in HW and deferred - * error interrupts. - */ - succor : 1, - - /* - * (AMD) SMCA: This bit indicates support for Scalable MCA which expands - * the register space for each MCA bank and also increases number of - * banks. Also, to accommodate the new banks and registers, the MCA - * register space is moved to a new MSR range. - */ - smca : 1, - - __reserved_0 : 61; -}; - -struct mca_msr_regs { - u32 (*ctl) (int bank); - u32 (*status) (int bank); - u32 (*addr) (int bank); - u32 (*misc) (int bank); -}; - -extern struct mce_vendor_flags mce_flags; - -extern struct mca_msr_regs msr_ops; - enum mce_notifier_prios { MCE_PRIO_FIRST = INT_MAX, MCE_PRIO_SRAO = INT_MAX - 1, @@ -200,6 +151,7 @@ enum mce_notifier_prios { MCE_PRIO_LOWEST = 0, }; +struct notifier_block; extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); @@ -265,8 +217,12 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; #endif +static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } + int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); +bool mce_is_correctable(struct mce *m); +int mce_usable_address(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); @@ -346,6 +302,7 @@ enum smca_bank_types { SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ + SMCA_RESERVED, /* Reserved */ SMCA_EX, /* Execution Unit */ SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h new file mode 100644 index 000000000000..eb59804b6201 --- /dev/null +++ b/arch/x86/include/asm/mcsafe_test.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MCSAFE_TEST_H_ +#define _MCSAFE_TEST_H_ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_MCSAFE_TEST +extern unsigned long mcsafe_test_src; +extern unsigned long mcsafe_test_dst; + +static inline void mcsafe_inject_src(void *addr) +{ + if (addr) + mcsafe_test_src = (unsigned long) addr; + else + mcsafe_test_src = ~0UL; +} + +static inline void mcsafe_inject_dst(void *addr) +{ + if (addr) + mcsafe_test_dst = (unsigned long) addr; + else + mcsafe_test_dst = ~0UL; +} +#else /* CONFIG_MCSAFE_TEST */ +static inline void mcsafe_inject_src(void *addr) +{ +} + +static inline void mcsafe_inject_dst(void *addr) +{ +} +#endif /* CONFIG_MCSAFE_TEST */ + +#else /* __ASSEMBLY__ */ +#include <asm/export.h> + +#ifdef CONFIG_MCSAFE_TEST +.macro MCSAFE_TEST_CTL + .pushsection .data + .align 8 + .globl mcsafe_test_src + mcsafe_test_src: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_src) + .globl mcsafe_test_dst + mcsafe_test_dst: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_dst) + .popsection +.endm + +.macro MCSAFE_TEST_SRC reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_src, %r9 + ja \target +.endm + +.macro MCSAFE_TEST_DST reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_dst, %r9 + ja \target +.endm +#else +.macro MCSAFE_TEST_CTL +.endm + +.macro MCSAFE_TEST_SRC reg count target +.endm + +.macro MCSAFE_TEST_DST reg count target +.endm +#endif /* CONFIG_MCSAFE_TEST */ +#endif /* __ASSEMBLY__ */ +#endif /* _MCSAFE_TEST_H_ */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 22c5f3e6f820..616f8e637bc3 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -22,6 +22,7 @@ #ifdef CONFIG_AMD_MEM_ENCRYPT extern u64 sme_me_mask; +extern bool sev_enabled; void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, unsigned long decrypted_kernel_vaddr, @@ -47,12 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); - -void swiotlb_set_mem_attributes(void *vaddr, unsigned long size); +void __init mem_encrypt_free_decrypted_mem(void); bool sme_active(void); bool sev_active(void); +#define __bss_decrypted __attribute__((__section__(".bss..decrypted"))) + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL @@ -78,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +#define __bss_decrypted + #endif /* CONFIG_AMD_MEM_ENCRYPT */ /* @@ -89,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; #define __sme_pa(x) (__pa(x) | sme_me_mask) #define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) +extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; + #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 6cf0e4cb7b97..2b7cc5397f80 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -6,20 +6,6 @@ #include <linux/earlycpio.h> #include <linux/initrd.h> -#define native_rdmsr(msr, val1, val2) \ -do { \ - u64 __val = __rdmsr((msr)); \ - (void)((val1) = (u32)__val); \ - (void)((val2) = (u32)(__val >> 32)); \ -} while (0) - -#define native_wrmsr(msr, low, high) \ - __wrmsr(msr, low, high) - -#define native_wrmsrl(msr, val) \ - __wrmsr((msr), (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)) - struct ucode_patch { struct list_head plist; void *data; /* Intel uses only this one */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 1de72ce514cd..0ca50611e8ce 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -16,19 +16,20 @@ extern atomic64_t last_mm_ctx_id; -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { } -#endif /* !CONFIG_PARAVIRT */ +#endif /* !CONFIG_PARAVIRT_XXL */ #ifdef CONFIG_PERF_EVENTS -extern struct static_key rdpmc_always_available; + +DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); static inline void load_mm_cr4(struct mm_struct *mm) { - if (static_key_false(&rdpmc_always_available) || + if (static_branch_unlikely(&rdpmc_always_available_key) || atomic_read(&mm->context.perf_rdpmc_allowed)) cr4_set_bits(X86_CR4_PCE); else @@ -70,12 +71,7 @@ struct ldt_struct { static inline void *ldt_slot_va(int slot) { -#ifdef CONFIG_X86_64 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); -#else - BUG(); - return (void *)fix_to_virt(FIX_HOLE); -#endif } /* @@ -192,7 +188,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { - /* pkey 0 is the default and always allocated */ + /* pkey 0 is the default and allocated implicitly */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; @@ -287,21 +283,6 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, mpx_notify_unmap(mm, vma, start, end); } -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -static inline int vma_pkey(struct vm_area_struct *vma) -{ - unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 | - VM_PKEY_BIT2 | VM_PKEY_BIT3; - - return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT; -} -#else -static inline int vma_pkey(struct vm_area_struct *vma) -{ - return 0; -} -#endif - /* * We only want to enforce protection keys on the current process * because we effectively have no access to PKRU for other diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index 61eb4b63c5ec..d0b1434fb0b6 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -57,8 +57,14 @@ #define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) #define MPX_BNDSTA_ERROR_CODE 0x3 +struct mpx_fault_info { + void __user *addr; + void __user *lower; + void __user *upper; +}; + #ifdef CONFIG_X86_INTEL_MPX -siginfo_t *mpx_generate_siginfo(struct pt_regs *regs); +int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); int mpx_handle_bd_fault(void); static inline int kernel_managing_mpx_tables(struct mm_struct *mm) { @@ -78,9 +84,9 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags); #else -static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) +static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) { - return NULL; + return -EINVAL; } static inline int mpx_handle_bd_fault(void) { diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 25283f7eb299..1d0a7778e163 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -6,32 +6,16 @@ #include <linux/atomic.h> #include <linux/nmi.h> #include <asm/io.h> -#include <asm/hyperv.h> +#include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> -/* - * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent - * is set by CPUID(HVCPUID_VERSION_FEATURES). - */ -enum hv_cpuid_function { - HVCPUID_VERSION_FEATURES = 0x00000001, - HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, - HVCPUID_INTERFACE = 0x40000001, - - /* - * The remaining functions depend on the value of - * HVCPUID_INTERFACE - */ - HVCPUID_VERSION = 0x40000002, - HVCPUID_FEATURES = 0x40000003, - HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, - HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, -}; +#define VP_INVAL U32_MAX struct ms_hyperv_info { u32 features; u32 misc_features; u32 hints; + u32 nested_features; u32 max_vp_index; u32 max_lp_index; }; @@ -39,57 +23,7 @@ struct ms_hyperv_info { extern struct ms_hyperv_info ms_hyperv; /* - * Declare the MSR used to setup pages used to communicate with the hypervisor. - */ -union hv_x64_msr_hypercall_contents { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:11; - u64 guest_physical_address:52; - }; -}; - -/* - * TSC page layout. - */ - -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; - u64 reserved2[509]; -}; - -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * - */ - -#define HV_LINUX_VENDOR_ID 0x8100 - -/* - * Generate the guest ID based on the guideline described above. + * Generate the guest ID. */ static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, @@ -142,8 +76,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) } } -#define hv_init_timer(timer, tick) wrmsrl(timer, tick) -#define hv_init_timer_config(config, val) wrmsrl(config, val) +#define hv_init_timer(timer, tick) \ + wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick) +#define hv_init_timer_config(timer, val) \ + wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val) #define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val) #define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val) @@ -156,8 +92,13 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index) -#define hv_get_synint_state(int_num, val) rdmsrl(int_num, val) -#define hv_set_synint_state(int_num, val) wrmsrl(int_num, val) +#define hv_get_synint_state(int_num, val) \ + rdmsrl(HV_X64_MSR_SINT0 + int_num, val) +#define hv_set_synint_state(int_num, val) \ + wrmsrl(HV_X64_MSR_SINT0 + int_num, val) + +#define hv_get_crash_ctl(val) \ + rdmsrl(HV_X64_MSR_CRASH_CTL, val) void hyperv_callback_vector(void); void hyperv_reenlightenment_vector(void); @@ -173,9 +114,23 @@ void hv_remove_kexec_handler(void); void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); +/* + * Routines for stimer0 Direct Mode handling. + * On x86/x64, there are no percpu actions to take. + */ +void hv_stimer0_vector_handler(struct pt_regs *regs); +void hv_stimer0_callback_vector(void); +int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)); +void hv_remove_stimer0_irq(int irq); + +static inline void hv_enable_stimer0_percpu_irq(int irq) {} +static inline void hv_disable_stimer0_percpu_irq(int irq) {} + + #if IS_ENABLED(CONFIG_HYPERV) extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; +extern void __percpu **hyperv_pcpu_input_arg; static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { @@ -215,14 +170,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_status; } -#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) -#define HV_HYPERCALL_FAST_BIT BIT(16) -#define HV_HYPERCALL_VARHEAD_OFFSET 17 -#define HV_HYPERCALL_REP_COMP_OFFSET 32 -#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) -#define HV_HYPERCALL_REP_START_OFFSET 48 -#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) - /* Fast hypercall with 8 bytes of input and no output */ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) { @@ -254,6 +201,40 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) return hv_status; } +/* Fast hypercall with 16 bytes of input */ +static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) +{ + u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; + +#ifdef CONFIG_X86_64 + { + __asm__ __volatile__("mov %4, %%r8\n" + CALL_NOSPEC + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input1) + : "r" (input2), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "r8", "r9", "r10", "r11"); + } +#else + { + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + u32 input2_hi = upper_32_bits(input2); + u32 input2_lo = lower_32_bits(input2); + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), ASM_CALL_CONSTRAINT + : "A" (control), "b" (input1_hi), + "D"(input2_hi), "S"(input2_lo), + THUNK_TARGET(hv_hypercall_pg) + : "cc"); + } +#endif + return hv_status; +} + /* * Rep hypercalls. Callers of this functions are supposed to ensure that * rep_count and varhead_size comply with Hyper-V hypercall definition. @@ -294,6 +275,15 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, */ extern u32 *hv_vp_index; extern u32 hv_max_vp_index; +extern struct hv_vp_assist_page **hv_vp_assist_page; + +static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) +{ + if (!hv_vp_assist_page) + return NULL; + + return hv_vp_assist_page[cpu]; +} /** * hv_cpu_number_to_vp_number() - Map CPU to VP. @@ -311,10 +301,45 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } -void hyperv_init(void); +static inline int cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + + /* valid_bank_mask can represent up to 64 banks */ + if (hv_max_vp_index / 64 >= 64) + return 0; + + /* + * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex + * structs are not cleared between calls, we risk flushing unneeded + * vCPUs otherwise. + */ + for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) + vpset->bank_contents[vcpu_bank] = 0; + + /* + * Some banks may end up being empty but this is acceptable. + */ + for_each_cpu(cpu, cpus) { + vcpu = hv_cpu_number_to_vp_number(cpu); + if (vcpu == VP_INVAL) + return -1; + vcpu_bank = vcpu / 64; + vcpu_offset = vcpu % 64; + __set_bit(vcpu_offset, (unsigned long *) + &vpset->bank_contents[vcpu_bank]); + if (vcpu_bank >= nr_bank) + nr_bank = vcpu_bank + 1; + } + vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); + return nr_bank; +} + +void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); -void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs, long err); +void hyperv_report_panic_msg(phys_addr_t pa, size_t size); bool hv_is_hyperv_initialized(void); void hyperv_cleanup(void); @@ -322,6 +347,16 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); +int hyperv_flush_guest_mapping(u64 as); + +#ifdef CONFIG_X86_64 +void hv_apic_init(void); +void __init hv_init_spinlocks(void); +bool hv_vcpu_is_preempted(int vcpu); +#else +static inline void hv_apic_init(void) {} +#endif + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline bool hv_is_hyperv_initialized(void) { return false; } @@ -330,6 +365,11 @@ static inline void hyperv_setup_mmu_ops(void) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; +static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) +{ + return NULL; +} +static inline int hyperv_flush_guest_mapping(u64 as) { return -1; } #endif /* CONFIG_HYPERV */ #ifdef CONFIG_HYPERV_TSCPAGE diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index c9084dedfcfa..80f4a4f38c79 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -42,6 +42,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -60,14 +62,26 @@ #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO (1 << 4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ + +#define MSR_IA32_FLUSH_CMD 0x0000010b +#define L1D_FLUSH (1 << 0) /* + * Writeback and invalidate the + * L1 data cache. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e @@ -150,6 +164,7 @@ #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) @@ -340,6 +355,8 @@ #define MSR_AMD64_SEV_ENABLED_BIT 0 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 @@ -353,7 +370,21 @@ /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 +#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL +#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) +#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4) +#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6) +#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8) +#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10) + #define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR +#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2) +#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4) +#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6) +#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8) +#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10) + #define MSR_F15H_NB_PERF_CTL 0xc0010240 #define MSR_F15H_NB_PERF_CTR 0xc0010241 #define MSR_F15H_PTSC 0xc0010280 diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 30df295f6d94..91e4cf189914 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -108,6 +108,20 @@ static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) : : "c" (msr), "a"(low), "d" (high) : "memory"); } +#define native_rdmsr(msr, val1, val2) \ +do { \ + u64 __val = __rdmsr((msr)); \ + (void)((val1) = (u32)__val); \ + (void)((val2) = (u32)(__val >> 32)); \ +} while (0) + +#define native_wrmsr(msr, low, high) \ + __wrmsr(msr, low, high) + +#define native_wrmsrl(msr, val) \ + __wrmsr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)) + static inline unsigned long long native_read_msr(unsigned int msr) { unsigned long long val; @@ -218,9 +232,6 @@ static __always_inline unsigned long long rdtsc_ordered(void) return rdtsc(); } -/* Deprecated, keep it for a cycle for easier merging: */ -#define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0) - static inline unsigned long long native_read_pmc(int counter) { DECLARE_ARGS(val, low, high); @@ -231,7 +242,7 @@ static inline unsigned long long native_read_pmc(int counter) return EAX_EDX_VAL(val, low, high); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #include <linux/errno.h> @@ -294,7 +305,7 @@ do { \ #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) -#endif /* !CONFIG_PARAVIRT */ +#endif /* !CONFIG_PARAVIRT_XXL */ /* * 64-bit version of wrmsr_safe(): diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f928ad9b143f..80dc14422495 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -170,11 +170,15 @@ */ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ - ALTERNATIVE( \ + ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ "call __x86_indirect_thunk_%V[thunk_target]\n", \ - X86_FEATURE_RETPOLINE) + X86_FEATURE_RETPOLINE, \ + "lfence;\n" \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) #elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) @@ -184,7 +188,8 @@ * here, anyway. */ # define CALL_NOSPEC \ - ALTERNATIVE( \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ " jmp 904f;\n" \ @@ -199,7 +204,11 @@ " ret;\n" \ " .align 16\n" \ "904: call 901b;\n", \ - X86_FEATURE_RETPOLINE) + X86_FEATURE_RETPOLINE, \ + "lfence;\n" \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #else /* No retpoline for C / inline asm */ @@ -214,7 +223,15 @@ enum spectre_v2_mitigation { SPECTRE_V2_RETPOLINE_MINIMAL_AMD, SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS, + SPECTRE_V2_IBRS_ENHANCED, +}; + +/* The Speculative Store Bypass disable variants */ +enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, + SPEC_STORE_BYPASS_PRCTL, + SPEC_STORE_BYPASS_SECCOMP, }; extern char __indirect_thunk_start[]; @@ -241,22 +258,27 @@ static inline void vmexit_fill_RSB(void) #endif } -#define alternative_msr_write(_msr, _val, _feature) \ - asm volatile(ALTERNATIVE("", \ - "movl %[msr], %%ecx\n\t" \ - "movl %[val], %%eax\n\t" \ - "movl $0, %%edx\n\t" \ - "wrmsr", \ - _feature) \ - : : [msr] "i" (_msr), [val] "i" (_val) \ - : "eax", "ecx", "edx", "memory") +static __always_inline +void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) +{ + asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) + : : "c" (msr), + "a" ((u32)val), + "d" ((u32)(val >> 32)), + [feature] "i" (feature) + : "memory"); +} static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, - X86_FEATURE_USE_IBPB); + u64 val = PRED_CMD_IBPB; + + alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. @@ -265,14 +287,18 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ + u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ + \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + u64 val = x86_spec_ctrl_base; \ + \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) @@ -291,16 +317,20 @@ do { \ * lfence * jmp spec_trap * do_rop: - * mov %rax,(%rsp) + * mov %rax,(%rsp) for x86_64 + * mov %edx,(%esp) for x86_32 * retq * * Without retpolines configured: * - * jmp *%rax + * jmp *%rax for x86_64 + * jmp *%edx for x86_32 */ #ifdef CONFIG_RETPOLINE -# define RETPOLINE_RAX_BPF_JIT_SIZE 17 -# define RETPOLINE_RAX_BPF_JIT() \ +# ifdef CONFIG_X86_64 +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 +# define RETPOLINE_RAX_BPF_JIT() \ +do { \ EMIT1_off32(0xE8, 7); /* callq do_rop */ \ /* spec_trap: */ \ EMIT2(0xF3, 0x90); /* pause */ \ @@ -308,11 +338,30 @@ do { \ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ /* do_rop: */ \ EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ - EMIT1(0xC3); /* retq */ -#else -# define RETPOLINE_RAX_BPF_JIT_SIZE 2 -# define RETPOLINE_RAX_BPF_JIT() \ - EMIT2(0xFF, 0xE0); /* jmp *%rax */ + EMIT1(0xC3); /* retq */ \ +} while (0) +# else /* !CONFIG_X86_64 */ +# define RETPOLINE_EDX_BPF_JIT() \ +do { \ + EMIT1_off32(0xE8, 7); /* call do_rop */ \ + /* spec_trap: */ \ + EMIT2(0xF3, 0x90); /* pause */ \ + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ + /* do_rop: */ \ + EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ + EMIT1(0xC3); /* ret */ \ +} while (0) +# endif +#else /* !CONFIG_RETPOLINE */ +# ifdef CONFIG_X86_64 +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT2(0xFF, 0xE0); /* jmp *%rax */ +# else /* !CONFIG_X86_64 */ +# define RETPOLINE_EDX_BPF_JIT() \ + EMIT2(0xFF, 0xE2) /* jmp *%edx */ +# endif #endif #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 9c9dc579bd7d..46f516dd80ce 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -88,6 +88,7 @@ struct orc_entry { unsigned sp_reg:4; unsigned bp_reg:4; unsigned type:2; + unsigned end:1; } __packed; /* @@ -101,6 +102,7 @@ struct unwind_hint { s16 sp_offset; u8 sp_reg; u8 type; + u8 end; }; #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index aa30c3241ea7..0d5c739eebd7 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -29,8 +29,13 @@ #define N_EXCEPTION_STACKS 1 #ifdef CONFIG_X86_PAE -/* 44=32+12, the limit we can fit into an unsigned long pfn */ -#define __PHYSICAL_MASK_SHIFT 44 +/* + * This is beyond the 44 bit limit imposed by the 32bit long pfns, + * but we need the full mask to make sure inverted PROT_NONE + * entries have all the host bits set in a guest. + * The real limit is still 44 bits. + */ +#define __PHYSICAL_MASK_SHIFT 52 #define __VIRTUAL_MASK_SHIFT 32 #else /* !CONFIG_X86_PAE */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index d652a3808065..939b1cff4a7b 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -11,6 +11,10 @@ extern unsigned long max_pfn; extern unsigned long phys_base; +extern unsigned long page_offset_base; +extern unsigned long vmalloc_base; +extern unsigned long vmemmap_base; + static inline unsigned long __phys_addr_nodebug(unsigned long x) { unsigned long y = x - __START_KERNEL_map; @@ -47,7 +51,7 @@ static inline void clear_page(void *page) clear_page_erms, X86_FEATURE_ERMS, "=D" (page), "0" (page) - : "memory", "rax", "rcx"); + : "cc", "memory", "rax", "rcx"); } void copy_page(void *to, void *from); diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index e1407312c412..8f657286d599 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -33,41 +33,44 @@ /* * Set __PAGE_OFFSET to the most negative possible address + - * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a - * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's - * what Xen requires. + * PGDIR_SIZE*17 (pgd slot 273). + * + * The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for + * a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary, + * but it's what Xen requires. */ -#ifdef CONFIG_X86_5LEVEL -#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL) -#else -#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) -#endif +#define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL) +#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL) -#ifdef CONFIG_RANDOMIZE_MEMORY +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base #else -#define __PAGE_OFFSET __PAGE_OFFSET_BASE -#endif /* CONFIG_RANDOMIZE_MEMORY */ +#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4 +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#ifdef CONFIG_X86_5LEVEL + #define __PHYSICAL_MASK_SHIFT 52 -#define __VIRTUAL_MASK_SHIFT 56 + +#ifdef CONFIG_X86_5LEVEL +#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47) #else -#define __PHYSICAL_MASK_SHIFT 46 #define __VIRTUAL_MASK_SHIFT 47 #endif /* - * Kernel image size is limited to 1GiB due to the fixmap living in the - * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use - * 512MiB by default, leaving 1.5GiB for modules once the page tables - * are fully set up. If kernel ASLR is configured, it can extend the - * kernel page table mapping, reducing the size of the modules area. + * Maximum kernel image size is limited to 1 GiB, due to the fixmap living + * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). + * + * On KASLR use 1 GiB by default, leaving 1 GiB for modules once the + * page tables are fully set up. + * + * If KASLR is disabled we can shrink it to 0.5 GiB and increase the size + * of the modules area to 1.5 GiB. */ -#if defined(CONFIG_RANDOMIZE_BASE) +#ifdef CONFIG_RANDOMIZE_BASE #define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024) #else #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 1e53560a84bb..c85e15010f48 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -17,7 +17,6 @@ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) -#define __PHYSICAL_MASK ((phys_addr_t)(__sme_clr((1ULL << __PHYSICAL_MASK_SHIFT) - 1))) #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) /* Cast *PAGE_MASK to a signed type so that it is sign-extended if @@ -55,6 +54,13 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK +extern phys_addr_t physical_mask; +#define __PHYSICAL_MASK physical_mask +#else +#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1)) +#endif + extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c83a2f418cea..4bf42f9e4eea 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -17,16 +17,73 @@ #include <linux/cpumask.h> #include <asm/frame.h> +static inline unsigned long long paravirt_sched_clock(void) +{ + return PVOP_CALL0(unsigned long long, time.sched_clock); +} + +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +static inline u64 paravirt_steal_clock(int cpu) +{ + return PVOP_CALL1(u64, time.steal_clock, cpu); +} + +/* The paravirtualized I/O functions */ +static inline void slow_down_io(void) +{ + pv_ops.cpu.io_delay(); +#ifdef REALLY_SLOW_IO + pv_ops.cpu.io_delay(); + pv_ops.cpu.io_delay(); + pv_ops.cpu.io_delay(); +#endif +} + +static inline void __flush_tlb(void) +{ + PVOP_VCALL0(mmu.flush_tlb_user); +} + +static inline void __flush_tlb_global(void) +{ + PVOP_VCALL0(mmu.flush_tlb_kernel); +} + +static inline void __flush_tlb_one_user(unsigned long addr) +{ + PVOP_VCALL1(mmu.flush_tlb_one_user, addr); +} + +static inline void flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) +{ + PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info); +} + +static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); +} + +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) +{ + PVOP_VCALL1(mmu.exit_mmap, mm); +} + +#ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { - PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0); + PVOP_VCALL1(cpu.load_sp0, sp0); } /* The paravirtualized CPUID instruction. */ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); + PVOP_VCALL4(cpu.cpuid, eax, ebx, ecx, edx); } /* @@ -34,98 +91,98 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, */ static inline unsigned long paravirt_get_debugreg(int reg) { - return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); + return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg); } #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) static inline void set_debugreg(unsigned long val, int reg) { - PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); + PVOP_VCALL2(cpu.set_debugreg, reg, val); } static inline unsigned long read_cr0(void) { - return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); + return PVOP_CALL0(unsigned long, cpu.read_cr0); } static inline void write_cr0(unsigned long x) { - PVOP_VCALL1(pv_cpu_ops.write_cr0, x); + PVOP_VCALL1(cpu.write_cr0, x); } static inline unsigned long read_cr2(void) { - return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); + return PVOP_CALL0(unsigned long, mmu.read_cr2); } static inline void write_cr2(unsigned long x) { - PVOP_VCALL1(pv_mmu_ops.write_cr2, x); + PVOP_VCALL1(mmu.write_cr2, x); } static inline unsigned long __read_cr3(void) { - return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); + return PVOP_CALL0(unsigned long, mmu.read_cr3); } static inline void write_cr3(unsigned long x) { - PVOP_VCALL1(pv_mmu_ops.write_cr3, x); + PVOP_VCALL1(mmu.write_cr3, x); } static inline void __write_cr4(unsigned long x) { - PVOP_VCALL1(pv_cpu_ops.write_cr4, x); + PVOP_VCALL1(cpu.write_cr4, x); } #ifdef CONFIG_X86_64 static inline unsigned long read_cr8(void) { - return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); + return PVOP_CALL0(unsigned long, cpu.read_cr8); } static inline void write_cr8(unsigned long x) { - PVOP_VCALL1(pv_cpu_ops.write_cr8, x); + PVOP_VCALL1(cpu.write_cr8, x); } #endif static inline void arch_safe_halt(void) { - PVOP_VCALL0(pv_irq_ops.safe_halt); + PVOP_VCALL0(irq.safe_halt); } static inline void halt(void) { - PVOP_VCALL0(pv_irq_ops.halt); + PVOP_VCALL0(irq.halt); } static inline void wbinvd(void) { - PVOP_VCALL0(pv_cpu_ops.wbinvd); + PVOP_VCALL0(cpu.wbinvd); } #define get_kernel_rpl() (pv_info.kernel_rpl) static inline u64 paravirt_read_msr(unsigned msr) { - return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr); + return PVOP_CALL1(u64, cpu.read_msr, msr); } static inline void paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { - PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); + PVOP_VCALL3(cpu.write_msr, msr, low, high); } static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) { - return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err); + return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err); } static inline int paravirt_write_msr_safe(unsigned msr, unsigned low, unsigned high) { - return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high); + return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high); } #define rdmsr(msr, val1, val2) \ @@ -170,23 +227,9 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) return err; } -static inline unsigned long long paravirt_sched_clock(void) -{ - return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); -} - -struct static_key; -extern struct static_key paravirt_steal_enabled; -extern struct static_key paravirt_steal_rq_enabled; - -static inline u64 paravirt_steal_clock(int cpu) -{ - return PVOP_CALL1(u64, pv_time_ops.steal_clock, cpu); -} - static inline unsigned long long paravirt_read_pmc(int counter) { - return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); + return PVOP_CALL1(u64, cpu.read_pmc, counter); } #define rdpmc(counter, low, high) \ @@ -200,161 +243,127 @@ do { \ static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { - PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); + PVOP_VCALL2(cpu.alloc_ldt, ldt, entries); } static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { - PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries); + PVOP_VCALL2(cpu.free_ldt, ldt, entries); } static inline void load_TR_desc(void) { - PVOP_VCALL0(pv_cpu_ops.load_tr_desc); + PVOP_VCALL0(cpu.load_tr_desc); } static inline void load_gdt(const struct desc_ptr *dtr) { - PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); + PVOP_VCALL1(cpu.load_gdt, dtr); } static inline void load_idt(const struct desc_ptr *dtr) { - PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); + PVOP_VCALL1(cpu.load_idt, dtr); } static inline void set_ldt(const void *addr, unsigned entries) { - PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); + PVOP_VCALL2(cpu.set_ldt, addr, entries); } static inline unsigned long paravirt_store_tr(void) { - return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); + return PVOP_CALL0(unsigned long, cpu.store_tr); } + #define store_tr(tr) ((tr) = paravirt_store_tr()) static inline void load_TLS(struct thread_struct *t, unsigned cpu) { - PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); + PVOP_VCALL2(cpu.load_tls, t, cpu); } #ifdef CONFIG_X86_64 static inline void load_gs_index(unsigned int gs) { - PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs); + PVOP_VCALL1(cpu.load_gs_index, gs); } #endif static inline void write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { - PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc); + PVOP_VCALL3(cpu.write_ldt_entry, dt, entry, desc); } static inline void write_gdt_entry(struct desc_struct *dt, int entry, void *desc, int type) { - PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type); + PVOP_VCALL4(cpu.write_gdt_entry, dt, entry, desc, type); } static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { - PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); + PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } static inline void set_iopl_mask(unsigned mask) { - PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); -} - -/* The paravirtualized I/O functions */ -static inline void slow_down_io(void) -{ - pv_cpu_ops.io_delay(); -#ifdef REALLY_SLOW_IO - pv_cpu_ops.io_delay(); - pv_cpu_ops.io_delay(); - pv_cpu_ops.io_delay(); -#endif + PVOP_VCALL1(cpu.set_iopl_mask, mask); } static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { - PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); + PVOP_VCALL2(mmu.activate_mm, prev, next); } static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); -} - -static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) -{ - PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); -} - -static inline void __flush_tlb(void) -{ - PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); -} -static inline void __flush_tlb_global(void) -{ - PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); -} -static inline void __flush_tlb_one_user(unsigned long addr) -{ - PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr); -} - -static inline void flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ - PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info); + PVOP_VCALL2(mmu.dup_mmap, oldmm, mm); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) { - return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); + return PVOP_CALL1(int, mmu.pgd_alloc, mm); } static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) { - PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); + PVOP_VCALL2(mmu.pgd_free, mm, pgd); } static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); + PVOP_VCALL2(mmu.alloc_pte, mm, pfn); } static inline void paravirt_release_pte(unsigned long pfn) { - PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); + PVOP_VCALL1(mmu.release_pte, pfn); } static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); + PVOP_VCALL2(mmu.alloc_pmd, mm, pfn); } static inline void paravirt_release_pmd(unsigned long pfn) { - PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); + PVOP_VCALL1(mmu.release_pmd, pfn); } static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); + PVOP_VCALL2(mmu.alloc_pud, mm, pfn); } static inline void paravirt_release_pud(unsigned long pfn) { - PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); + PVOP_VCALL1(mmu.release_pud, pfn); } static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) { - PVOP_VCALL2(pv_mmu_ops.alloc_p4d, mm, pfn); + PVOP_VCALL2(mmu.alloc_p4d, mm, pfn); } static inline void paravirt_release_p4d(unsigned long pfn) { - PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn); + PVOP_VCALL1(mmu.release_p4d, pfn); } static inline pte_t __pte(pteval_t val) @@ -362,13 +371,9 @@ static inline pte_t __pte(pteval_t val) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pteval_t, - pv_mmu_ops.make_pte, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pteval_t, mmu.make_pte, val, (u64)val >> 32); else - ret = PVOP_CALLEE1(pteval_t, - pv_mmu_ops.make_pte, - val); + ret = PVOP_CALLEE1(pteval_t, mmu.make_pte, val); return (pte_t) { .pte = ret }; } @@ -378,11 +383,10 @@ static inline pteval_t pte_val(pte_t pte) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val, + ret = PVOP_CALLEE2(pteval_t, mmu.pte_val, pte.pte, (u64)pte.pte >> 32); else - ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val, - pte.pte); + ret = PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte); return ret; } @@ -392,11 +396,9 @@ static inline pgd_t __pgd(pgdval_t val) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pgdval_t, mmu.make_pgd, val, (u64)val >> 32); else - ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd, - val); + ret = PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val); return (pgd_t) { ret }; } @@ -406,11 +408,10 @@ static inline pgdval_t pgd_val(pgd_t pgd) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val, + ret = PVOP_CALLEE2(pgdval_t, mmu.pgd_val, pgd.pgd, (u64)pgd.pgd >> 32); else - ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd); + ret = PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd); return ret; } @@ -421,8 +422,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long a { pteval_t ret; - ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, - mm, addr, ptep); + ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, mm, addr, ptep); return (pte_t) { .pte = ret }; } @@ -432,20 +432,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long a { if (sizeof(pteval_t) > sizeof(long)) /* 5 arg words */ - pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte); + pv_ops.mmu.ptep_modify_prot_commit(mm, addr, ptep, pte); else - PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit, + PVOP_VCALL4(mmu.ptep_modify_prot_commit, mm, addr, ptep, pte.pte); } static inline void set_pte(pte_t *ptep, pte_t pte) { if (sizeof(pteval_t) > sizeof(long)) - PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, - pte.pte, (u64)pte.pte >> 32); + PVOP_VCALL3(mmu.set_pte, ptep, pte.pte, (u64)pte.pte >> 32); else - PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, - pte.pte); + PVOP_VCALL2(mmu.set_pte, ptep, pte.pte); } static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, @@ -453,9 +451,9 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, { if (sizeof(pteval_t) > sizeof(long)) /* 5 arg words */ - pv_mmu_ops.set_pte_at(mm, addr, ptep, pte); + pv_ops.mmu.set_pte_at(mm, addr, ptep, pte); else - PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); + PVOP_VCALL4(mmu.set_pte_at, mm, addr, ptep, pte.pte); } static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) @@ -463,9 +461,9 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) pmdval_t val = native_pmd_val(pmd); if (sizeof(pmdval_t) > sizeof(long)) - PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32); + PVOP_VCALL3(mmu.set_pmd, pmdp, val, (u64)val >> 32); else - PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); + PVOP_VCALL2(mmu.set_pmd, pmdp, val); } #if CONFIG_PGTABLE_LEVELS >= 3 @@ -474,11 +472,9 @@ static inline pmd_t __pmd(pmdval_t val) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pmdval_t, mmu.make_pmd, val, (u64)val >> 32); else - ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd, - val); + ret = PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val); return (pmd_t) { ret }; } @@ -488,11 +484,10 @@ static inline pmdval_t pmd_val(pmd_t pmd) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val, + ret = PVOP_CALLEE2(pmdval_t, mmu.pmd_val, pmd.pmd, (u64)pmd.pmd >> 32); else - ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd); + ret = PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd); return ret; } @@ -502,39 +497,23 @@ static inline void set_pud(pud_t *pudp, pud_t pud) pudval_t val = native_pud_val(pud); if (sizeof(pudval_t) > sizeof(long)) - PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, - val, (u64)val >> 32); + PVOP_VCALL3(mmu.set_pud, pudp, val, (u64)val >> 32); else - PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, - val); + PVOP_VCALL2(mmu.set_pud, pudp, val); } #if CONFIG_PGTABLE_LEVELS >= 4 static inline pud_t __pud(pudval_t val) { pudval_t ret; - if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud, - val, (u64)val >> 32); - else - ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud, - val); + ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val); return (pud_t) { ret }; } static inline pudval_t pud_val(pud_t pud) { - pudval_t ret; - - if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val, - pud.pud, (u64)pud.pud >> 32); - else - ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val, - pud.pud); - - return ret; + return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud); } static inline void pud_clear(pud_t *pudp) @@ -546,39 +525,39 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { p4dval_t val = native_p4d_val(p4d); - if (sizeof(p4dval_t) > sizeof(long)) - PVOP_VCALL3(pv_mmu_ops.set_p4d, p4dp, - val, (u64)val >> 32); - else - PVOP_VCALL2(pv_mmu_ops.set_p4d, p4dp, - val); + PVOP_VCALL2(mmu.set_p4d, p4dp, val); } #if CONFIG_PGTABLE_LEVELS >= 5 static inline p4d_t __p4d(p4dval_t val) { - p4dval_t ret = PVOP_CALLEE1(p4dval_t, pv_mmu_ops.make_p4d, val); + p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val); return (p4d_t) { ret }; } static inline p4dval_t p4d_val(p4d_t p4d) { - return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d); + return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d); } -static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) { - pgdval_t val = native_pgd_val(pgd); - - PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val); + PVOP_VCALL2(mmu.set_pgd, pgdp, native_pgd_val(pgd)); } -static inline void pgd_clear(pgd_t *pgdp) -{ - set_pgd(pgdp, __pgd(0)); -} +#define set_pgd(pgdp, pgdval) do { \ + if (pgtable_l5_enabled()) \ + __set_pgd(pgdp, pgdval); \ + else \ + set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \ +} while (0) + +#define pgd_clear(pgdp) do { \ + if (pgtable_l5_enabled()) \ + set_pgd(pgdp, __pgd(0)); \ +} while (0) #endif /* CONFIG_PGTABLE_LEVELS == 5 */ @@ -596,19 +575,18 @@ static inline void p4d_clear(p4d_t *p4dp) 64-bit pte atomically */ static inline void set_pte_atomic(pte_t *ptep, pte_t pte) { - PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, - pte.pte, pte.pte >> 32); + PVOP_VCALL3(mmu.set_pte_atomic, ptep, pte.pte, pte.pte >> 32); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); + PVOP_VCALL3(mmu.pte_clear, mm, addr, ptep); } static inline void pmd_clear(pmd_t *pmdp) { - PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); + PVOP_VCALL1(mmu.pmd_clear, pmdp); } #else /* !CONFIG_X86_PAE */ static inline void set_pte_atomic(pte_t *ptep, pte_t pte) @@ -631,64 +609,68 @@ static inline void pmd_clear(pmd_t *pmdp) #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(struct task_struct *prev) { - PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); + PVOP_VCALL1(cpu.start_context_switch, prev); } static inline void arch_end_context_switch(struct task_struct *next) { - PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); + PVOP_VCALL1(cpu.end_context_switch, next); } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { - PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); + PVOP_VCALL0(mmu.lazy_mode.enter); } static inline void arch_leave_lazy_mmu_mode(void) { - PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); + PVOP_VCALL0(mmu.lazy_mode.leave); } static inline void arch_flush_lazy_mmu_mode(void) { - PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush); + PVOP_VCALL0(mmu.lazy_mode.flush); } static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) { - pv_mmu_ops.set_fixmap(idx, phys, flags); + pv_ops.mmu.set_fixmap(idx, phys, flags); } +#endif #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { - PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val); + PVOP_VCALL2(lock.queued_spin_lock_slowpath, lock, val); } static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) { - PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock); + PVOP_VCALLEE1(lock.queued_spin_unlock, lock); } static __always_inline void pv_wait(u8 *ptr, u8 val) { - PVOP_VCALL2(pv_lock_ops.wait, ptr, val); + PVOP_VCALL2(lock.wait, ptr, val); } static __always_inline void pv_kick(int cpu) { - PVOP_VCALL1(pv_lock_ops.kick, cpu); + PVOP_VCALL1(lock.kick, cpu); } static __always_inline bool pv_vcpu_is_preempted(long cpu) { - return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); + return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu); } +void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock); +bool __raw_callee_save___native_vcpu_is_preempted(long cpu); + #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 @@ -768,24 +750,25 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) #define __PV_IS_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { func }) +#ifdef CONFIG_PARAVIRT_XXL static inline notrace unsigned long arch_local_save_flags(void) { - return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); + return PVOP_CALLEE0(unsigned long, irq.save_fl); } static inline notrace void arch_local_irq_restore(unsigned long f) { - PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); + PVOP_VCALLEE1(irq.restore_fl, f); } static inline notrace void arch_local_irq_disable(void) { - PVOP_VCALLEE0(pv_irq_ops.irq_disable); + PVOP_VCALLEE0(irq.irq_disable); } static inline notrace void arch_local_irq_enable(void) { - PVOP_VCALLEE0(pv_irq_ops.irq_enable); + PVOP_VCALLEE0(irq.irq_enable); } static inline notrace unsigned long arch_local_irq_save(void) @@ -796,6 +779,7 @@ static inline notrace unsigned long arch_local_irq_save(void) arch_local_irq_disable(); return f; } +#endif /* Make sure as little as possible of this mess escapes. */ @@ -817,7 +801,7 @@ extern void default_banner(void); #else /* __ASSEMBLY__ */ -#define _PVSITE(ptype, clobbers, ops, word, algn) \ +#define _PVSITE(ptype, ops, word, algn) \ 771:; \ ops; \ 772:; \ @@ -826,7 +810,6 @@ extern void default_banner(void); word 771b; \ .byte ptype; \ .byte 772b-771b; \ - .short clobbers; \ .popsection @@ -858,8 +841,8 @@ extern void default_banner(void); COND_POP(set, CLBR_RCX, rcx); \ COND_POP(set, CLBR_RAX, rax) -#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) -#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) +#define PARA_PATCH(off) ((off) / 8) +#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) #else #define PV_SAVE_REGS(set) \ @@ -873,46 +856,41 @@ extern void default_banner(void); COND_POP(set, CLBR_EDI, edi); \ COND_POP(set, CLBR_EAX, eax) -#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) -#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) +#define PARA_PATCH(off) ((off) / 4) +#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4) #define PARA_INDIRECT(addr) *%cs:addr #endif +#ifdef CONFIG_PARAVIRT_XXL #define INTERRUPT_RETURN \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ - ANNOTATE_RETPOLINE_SAFE; \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);) + PARA_SITE(PARA_PATCH(PV_CPU_iret), \ + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);) #define DISABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ + PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ + PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable), \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) +#endif -#ifdef CONFIG_X86_32 -#define GET_CR0_INTO_EAX \ - push %ecx; push %edx; \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ - pop %edx; pop %ecx -#else /* !CONFIG_X86_32 */ - +#ifdef CONFIG_X86_64 +#ifdef CONFIG_PARAVIRT_XXL /* * If swapgs is used while the userspace stack is still current, * there's no way to call a pvop. The PV replacement *must* be * inlined, or the swapgs instruction must be trapped and emulated. */ #define SWAPGS_UNSAFE_STACK \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - swapgs) + PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs) /* * Note: swapgs is very special, and in practise is either going to be @@ -921,44 +899,51 @@ extern void default_banner(void); * it. */ #define SWAPGS \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ + PARA_SITE(PARA_PATCH(PV_CPU_swapgs), \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \ ) +#endif #define GET_CR2_INTO_RAX \ ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); + call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); +#ifdef CONFIG_PARAVIRT_XXL #define USERGS_SYSRET64 \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ - CLBR_NONE, \ - ANNOTATE_RETPOLINE_SAFE; \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);) + PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);) #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ + PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif +#endif #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop +#endif /* !CONFIG_PARAVIRT */ + #ifndef __ASSEMBLY__ +#ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { } +#endif +#ifndef CONFIG_PARAVIRT static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { } +#endif #endif /* __ASSEMBLY__ */ -#endif /* !CONFIG_PARAVIRT */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 180bc0bff0fb..26942ad63830 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -54,6 +54,7 @@ struct desc_struct; struct task_struct; struct cpumask; struct flush_tlb_info; +struct mmu_gather; /* * Wrapper type for pointers to code which uses the non-standard @@ -65,12 +66,14 @@ struct paravirt_callee_save { /* general info */ struct pv_info { +#ifdef CONFIG_PARAVIRT_XXL unsigned int kernel_rpl; int shared_kernel_pmd; #ifdef CONFIG_X86_64 u16 extra_user_64bit_cs; /* __USER_CS if none */ #endif +#endif const char *name; }; @@ -84,17 +87,18 @@ struct pv_init_ops { * the number of bytes of code generated, as we nop pad the * rest in generic code. */ - unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, + unsigned (*patch)(u8 type, void *insnbuf, unsigned long addr, unsigned len); } __no_randomize_layout; - +#ifdef CONFIG_PARAVIRT_XXL struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ void (*enter)(void); void (*leave)(void); void (*flush)(void); } __no_randomize_layout; +#endif struct pv_time_ops { unsigned long long (*sched_clock)(void); @@ -103,6 +107,9 @@ struct pv_time_ops { struct pv_cpu_ops { /* hooks for various privileged instructions */ + void (*io_delay)(void); + +#ifdef CONFIG_PARAVIRT_XXL unsigned long (*get_debugreg)(int regno); void (*set_debugreg)(int regno, unsigned long value); @@ -140,7 +147,6 @@ struct pv_cpu_ops { void (*set_iopl_mask)(unsigned mask); void (*wbinvd)(void); - void (*io_delay)(void); /* cpuid emulation, mostly so that caps bits can be disabled */ void (*cpuid)(unsigned int *eax, unsigned int *ebx, @@ -175,9 +181,11 @@ struct pv_cpu_ops { void (*start_context_switch)(struct task_struct *prev); void (*end_context_switch)(struct task_struct *next); +#endif } __no_randomize_layout; struct pv_irq_ops { +#ifdef CONFIG_PARAVIRT_XXL /* * Get/set interrupt state. save_fl and restore_fl are only * expected to use X86_EFLAGS_IF; all other bits @@ -194,33 +202,34 @@ struct pv_irq_ops { void (*safe_halt)(void); void (*halt)(void); - +#endif } __no_randomize_layout; struct pv_mmu_ops { + /* TLB operations */ + void (*flush_tlb_user)(void); + void (*flush_tlb_kernel)(void); + void (*flush_tlb_one_user)(unsigned long addr); + void (*flush_tlb_others)(const struct cpumask *cpus, + const struct flush_tlb_info *info); + + void (*tlb_remove_table)(struct mmu_gather *tlb, void *table); + + /* Hook for intercepting the destruction of an mm_struct. */ + void (*exit_mmap)(struct mm_struct *mm); + +#ifdef CONFIG_PARAVIRT_XXL unsigned long (*read_cr2)(void); void (*write_cr2)(unsigned long); unsigned long (*read_cr3)(void); void (*write_cr3)(unsigned long); - /* - * Hooks for intercepting the creation/use/destruction of an - * mm_struct. - */ + /* Hooks for intercepting the creation/use of an mm_struct. */ void (*activate_mm)(struct mm_struct *prev, struct mm_struct *next); void (*dup_mmap)(struct mm_struct *oldmm, struct mm_struct *mm); - void (*exit_mmap)(struct mm_struct *mm); - - - /* TLB operations */ - void (*flush_tlb_user)(void); - void (*flush_tlb_kernel)(void); - void (*flush_tlb_one_user)(unsigned long addr); - void (*flush_tlb_others)(const struct cpumask *cpus, - const struct flush_tlb_info *info); /* Hooks for allocating and freeing a pagetable top-level */ int (*pgd_alloc)(struct mm_struct *mm); @@ -295,6 +304,7 @@ struct pv_mmu_ops { an mfn. We can tell which is which from the index. */ void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); +#endif } __no_randomize_layout; struct arch_spinlock; @@ -318,48 +328,31 @@ struct pv_lock_ops { * number for each function using the offset which we use to indicate * what to patch. */ struct paravirt_patch_template { - struct pv_init_ops pv_init_ops; - struct pv_time_ops pv_time_ops; - struct pv_cpu_ops pv_cpu_ops; - struct pv_irq_ops pv_irq_ops; - struct pv_mmu_ops pv_mmu_ops; - struct pv_lock_ops pv_lock_ops; + struct pv_init_ops init; + struct pv_time_ops time; + struct pv_cpu_ops cpu; + struct pv_irq_ops irq; + struct pv_mmu_ops mmu; + struct pv_lock_ops lock; } __no_randomize_layout; extern struct pv_info pv_info; -extern struct pv_init_ops pv_init_ops; -extern struct pv_time_ops pv_time_ops; -extern struct pv_cpu_ops pv_cpu_ops; -extern struct pv_irq_ops pv_irq_ops; -extern struct pv_mmu_ops pv_mmu_ops; -extern struct pv_lock_ops pv_lock_ops; +extern struct paravirt_patch_template pv_ops; #define PARAVIRT_PATCH(x) \ (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) #define paravirt_type(op) \ [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ - [paravirt_opptr] "i" (&(op)) + [paravirt_opptr] "i" (&(pv_ops.op)) #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) -/* - * Generate some code, and mark it as patchable by the - * apply_paravirt() alternate instruction patcher. - */ -#define _paravirt_alt(insn_string, type, clobber) \ - "771:\n\t" insn_string "\n" "772:\n" \ - ".pushsection .parainstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR " 771b\n" \ - " .byte " type "\n" \ - " .byte 772b-771b\n" \ - " .short " clobber "\n" \ - ".popsection\n" - /* Generate patchable code, with the default asm parameters. */ -#define paravirt_alt(insn_string) \ - _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") +#define paravirt_call \ + "PARAVIRT_CALL type=\"%c[paravirt_typenum]\"" \ + " clobber=\"%c[paravirt_clobber]\"" \ + " pv_opptr=\"%c[paravirt_opptr]\";" /* Simple instruction patching code. */ #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" @@ -368,36 +361,18 @@ extern struct pv_lock_ops pv_lock_ops; __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \ asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name)) -unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); -unsigned paravirt_patch_call(void *insnbuf, - const void *target, u16 tgt_clobbers, - unsigned long addr, u16 site_clobbers, - unsigned len); -unsigned paravirt_patch_jmp(void *insnbuf, const void *target, - unsigned long addr, unsigned len); -unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, +unsigned paravirt_patch_default(u8 type, void *insnbuf, unsigned long addr, unsigned len); unsigned paravirt_patch_insns(void *insnbuf, unsigned len, const char *start, const char *end); -unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len); +unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len); int paravirt_disable_iospace(void); /* - * This generates an indirect call based on the operation type number. - * The type number, computed in PARAVIRT_PATCH, is derived from the - * offset into the paravirt_patch_template structure, and can therefore be - * freely converted back into a structure offset. - */ -#define PARAVIRT_CALL \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%c[paravirt_opptr];" - -/* * These macros are intended to wrap calls through one of the paravirt * ops structs, so that they can be later identified and patched at * runtime. @@ -507,9 +482,9 @@ int paravirt_disable_iospace(void); #endif /* CONFIG_X86_32 */ #ifdef CONFIG_PARAVIRT_DEBUG -#define PVOP_TEST_NULL(op) BUG_ON(op == NULL) +#define PVOP_TEST_NULL(op) BUG_ON(pv_ops.op == NULL) #else -#define PVOP_TEST_NULL(op) ((void)op) +#define PVOP_TEST_NULL(op) ((void)pv_ops.op) #endif #define PVOP_RETMASK(rettype) \ @@ -534,7 +509,7 @@ int paravirt_disable_iospace(void); /* since this condition will never hold */ \ if (sizeof(rettype) > sizeof(unsigned long)) { \ asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ + paravirt_call \ post \ : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ @@ -544,7 +519,7 @@ int paravirt_disable_iospace(void); __ret = (rettype)((((u64)__edx) << 32) | __eax); \ } else { \ asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ + paravirt_call \ post \ : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ @@ -571,7 +546,7 @@ int paravirt_disable_iospace(void); PVOP_VCALL_ARGS; \ PVOP_TEST_NULL(op); \ asm volatile(pre \ - paravirt_alt(PARAVIRT_CALL) \ + paravirt_call \ post \ : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ @@ -675,7 +650,6 @@ void paravirt_leave_lazy_mmu(void); void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); -u32 _paravirt_ident_32(u32); u64 _paravirt_ident_64(u64); #define paravirt_nop ((void *)_paravirt_nop) @@ -685,12 +659,31 @@ struct paravirt_patch_site { u8 *instr; /* original instructions */ u8 instrtype; /* type of this instruction */ u8 len; /* length of original instruction */ - u16 clobbers; /* what registers you may clobber */ }; extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; +#else /* __ASSEMBLY__ */ + +/* + * This generates an indirect call based on the operation type number. + * The type number, computed in PARAVIRT_PATCH, is derived from the + * offset into the paravirt_patch_template structure, and can therefore be + * freely converted back into a structure offset. + */ +.macro PARAVIRT_CALL type:req clobber:req pv_opptr:req +771: ANNOTATE_RETPOLINE_SAFE + call *\pv_opptr +772: .pushsection .parainstructions,"a" + _ASM_ALIGN + _ASM_PTR 771b + .byte \type + .byte 772b-771b + .short \clobber + .popsection +.endm + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PARAVIRT_TYPES_H */ diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h index e1084f71a295..94597a3cf3d0 100644 --- a/arch/x86/include/asm/pci-direct.h +++ b/arch/x86/include/asm/pci-direct.h @@ -15,8 +15,4 @@ extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); extern int early_pci_allowed(void); - -extern unsigned int pci_early_dump_regs; -extern void early_dump_pci_device(u8 bus, u8 slot, u8 func); -extern void early_dump_pci_devices(void); #endif /* _ASM_X86_PCI_DIRECT_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index d32175e30259..662963681ea6 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -117,9 +117,6 @@ void native_restore_msi_irqs(struct pci_dev *dev); #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif - -#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) - #endif /* __KERNEL__ */ #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index eb66fa9cd0fc..959d618dbb17 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -151,6 +151,8 @@ extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, phys_addr_t addr); extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end); extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); +extern struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start, + int end, u64 addr); extern struct list_head pci_mmcfg_list; diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index a06b07399d17..1a19d11cfbbd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -185,22 +185,22 @@ do { \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(1)",%0" \ + asm volatile(op "b "__percpu_arg(1)",%0"\ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(1)",%0" \ + asm volatile(op "w "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(1)",%0" \ + asm volatile(op "l "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(1)",%0" \ + asm volatile(op "q "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ @@ -450,9 +450,10 @@ do { \ bool __ret; \ typeof(pcp1) __o1 = (o1), __n1 = (n1); \ typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ - : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \ - : "b" (__n1), "c" (__n2), "a" (__o1)); \ + asm volatile("cmpxchg8b "__percpu_arg(1) \ + CC_SET(z) \ + : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \ + : "b" (__n1), "c" (__n2)); \ __ret; \ }) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 12f54082f4c8..8bdf74902293 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -46,6 +46,14 @@ #define INTEL_ARCH_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) +#define AMD64_L3_SLICE_SHIFT 48 +#define AMD64_L3_SLICE_MASK \ + ((0xFULL) << AMD64_L3_SLICE_SHIFT) + +#define AMD64_L3_THREAD_SHIFT 56 +#define AMD64_L3_THREAD_MASK \ + ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ ARCH_PERFMON_EVENTSEL_UMASK | \ @@ -270,6 +278,7 @@ struct perf_guest_switch_msr { extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); extern void perf_check_microcode(void); +extern int x86_perf_rdpmc_index(struct perf_event *event); #else static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) { diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index aff42e1da6ee..ec7f43327033 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -8,7 +8,7 @@ static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm) @@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #if CONFIG_PGTABLE_LEVELS > 4 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { + if (!pgtable_l5_enabled()) + return; paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } @@ -182,6 +184,9 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) { + if (!pgtable_l5_enabled()) + return; + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); free_page((unsigned long)p4d); } @@ -191,7 +196,8 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { - ___p4d_free_tlb(tlb, p4d); + if (pgtable_l5_enabled()) + ___p4d_free_tlb(tlb, p4d); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 685ffe8a0eaf..60d0f9015317 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -95,4 +95,21 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) +/* No inverted PFNs on 2 level page tables */ + +static inline u64 protnone_mask(u64 val) +{ + return 0; +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) +{ + return val; +} + +static inline bool __pte_needs_invert(u64 val) +{ + return false; +} + #endif /* _ASM_X86_PGTABLE_2LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index f982ef808e7e..6deb6cd236e3 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -35,4 +35,7 @@ typedef union { #define PTRS_PER_PTE 1024 +/* This covers all VMSPLIT_* and VMSPLIT_*_OPT variants */ +#define PGD_KERNEL_START (CONFIG_PAGE_OFFSET >> PGDIR_SHIFT) + #endif /* _ASM_X86_PGTABLE_2LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index f24df59c40b2..f8b1ad2c3828 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_H #define _ASM_X86_PGTABLE_3LEVEL_H +#include <asm/atomic64_32.h> + /* * Intel Physical Address Extension (PAE) Mode - three-level page * tables on PPro+ CPUs. @@ -98,6 +100,9 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) static inline void native_set_pud(pud_t *pudp, pud_t pud) { +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd); +#endif set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); } @@ -147,10 +152,7 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; - /* xchg acts as a barrier before the setting of the high bits */ - res.pte_low = xchg(&ptep->pte_low, 0); - res.pte_high = ptep->pte_high; - ptep->pte_high = 0; + res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0); return res; } @@ -229,6 +231,10 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp) { union split_pud res, *orig = (union split_pud *)pudp; +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0)); +#endif + /* xchg acts as a barrier before setting of the high bits */ res.pud_low = xchg(&orig->pud_low, 0); res.pud_high = orig->pud_high; @@ -241,12 +247,43 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp) #endif /* Encode and de-code a swap entry */ +#define SWP_TYPE_BITS 5 + +#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) + +/* We always extract/encode the offset by shifting it all the way up, and then down again */ +#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS) + #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) #define __swp_type(x) (((x).val) & 0x1f) #define __swp_offset(x) ((x).val >> 5) #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) -#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) -#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) + +/* + * Normally, __swp_entry() converts from arch-independent swp_entry_t to + * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result + * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the + * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to + * __swp_entry_to_pte() through the following helper macro based on 64bit + * __swp_entry(). + */ +#define __swp_pteval_entry(type, offset) ((pteval_t) { \ + (~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + | ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) }) + +#define __swp_entry_to_pte(x) ((pte_t){ .pte = \ + __swp_pteval_entry(__swp_type(x), __swp_offset(x)) }) +/* + * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent + * swp_entry_t, but also has to convert it from 64bit to the 32bit + * intermediate representation, using the following macros based on 64bit + * __swp_type() and __swp_offset(). + */ +#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS))) +#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)) + +#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \ + __pteval_swp_offset(pte))) #define gup_get_pte gup_get_pte /* @@ -295,4 +332,6 @@ static inline pte_t gup_get_pte(pte_t *ptep) return pte; } +#include <asm/pgtable-invert.h> + #endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 876b4c77d983..33845d36897c 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -20,10 +20,11 @@ typedef union { } pte_t; #endif /* !__ASSEMBLY__ */ -#ifdef CONFIG_PARAVIRT -#define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd) +#ifdef CONFIG_PARAVIRT_XXL +#define SHARED_KERNEL_PMD ((!static_cpu_has(X86_FEATURE_PTI) && \ + (pv_info.shared_kernel_pmd))) #else -#define SHARED_KERNEL_PMD 1 +#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI)) #endif /* @@ -44,5 +45,7 @@ typedef union { */ #define PTRS_PER_PTE 512 +#define MAX_POSSIBLE_PHYSMEM_BITS 36 +#define PGD_KERNEL_START (CONFIG_PAGE_OFFSET >> PGDIR_SHIFT) #endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h new file mode 100644 index 000000000000..a0c1525f1b6f --- /dev/null +++ b/arch/x86/include/asm/pgtable-invert.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_PGTABLE_INVERT_H +#define _ASM_PGTABLE_INVERT_H 1 + +#ifndef __ASSEMBLY__ + +/* + * A clear pte value is special, and doesn't get inverted. + * + * Note that even users that only pass a pgprot_t (rather + * than a full pte) won't trigger the special zero case, + * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED + * set. So the all zero case really is limited to just the + * cleared page table entry case. + */ +static inline bool __pte_needs_invert(u64 val) +{ + return val && !(val & _PAGE_PRESENT); +} + +/* Get a mask to xor with the page table entry to get the correct pfn. */ +static inline u64 protnone_mask(u64 val) +{ + return __pte_needs_invert(val) ? ~0ull : 0; +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) +{ + /* + * When a PTE transitions from NONE to !NONE or vice-versa + * invert the PFN part to stop speculation. + * pte_pfn undoes this when needed. + */ + if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) + val = (val & ~mask) | (~val & mask); + return val; +} + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index b444d83cfc95..40616e805292 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -30,11 +30,14 @@ int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); void ptdump_walk_pgd_level_checkwx(void); +void ptdump_walk_user_pgd_level_checkwx(void); #ifdef CONFIG_DEBUG_WX -#define debug_checkwx() ptdump_walk_pgd_level_checkwx() +#define debug_checkwx() ptdump_walk_pgd_level_checkwx() +#define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() #else -#define debug_checkwx() do { } while (0) +#define debug_checkwx() do { } while (0) +#define debug_checkwx_user() do { } while (0) #endif /* @@ -52,9 +55,9 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); extern pmdval_t early_pmd_flags; -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> -#else /* !CONFIG_PARAVIRT */ +#else /* !CONFIG_PARAVIRT_XXL */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) @@ -65,7 +68,7 @@ extern pmdval_t early_pmd_flags; #ifndef __PAGETABLE_P4D_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) -#define pgd_clear(pgd) native_pgd_clear(pgd) +#define pgd_clear(pgd) (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0) #endif #ifndef set_p4d @@ -109,8 +112,7 @@ extern pmdval_t early_pmd_flags; #define __pte(x) native_make_pte(x) #define arch_end_context_switch(prev) do {} while(0) - -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ /* * The following only work if pte_present() is true. @@ -185,19 +187,29 @@ static inline int pte_special(pte_t pte) return pte_flags(pte) & _PAGE_SPECIAL; } +/* Entries that were set to PROT_NONE are inverted */ + +static inline u64 protnone_mask(u64 val); + static inline unsigned long pte_pfn(pte_t pte) { - return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; + phys_addr_t pfn = pte_val(pte); + pfn ^= protnone_mask(pfn); + return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; } static inline unsigned long pmd_pfn(pmd_t pmd) { - return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; + phys_addr_t pfn = pmd_val(pmd); + pfn ^= protnone_mask(pfn); + return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } static inline unsigned long pud_pfn(pud_t pud) { - return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT; + phys_addr_t pfn = pud_val(pud); + pfn ^= protnone_mask(pfn); + return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; } static inline unsigned long p4d_pfn(p4d_t p4d) @@ -400,11 +412,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_RW); } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) -{ - return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE); -} - static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); @@ -459,11 +466,6 @@ static inline pud_t pud_mkwrite(pud_t pud) return pud_set_flags(pud, _PAGE_RW); } -static inline pud_t pud_mknotpresent(pud_t pud) -{ - return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE); -} - #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline int pte_soft_dirty(pte_t pte) { @@ -526,45 +528,82 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot) return protval; } +static inline pgprotval_t check_pgprot(pgprot_t pgprot) +{ + pgprotval_t massaged_val = massage_pgprot(pgprot); + + /* mmdebug.h can not be included here because of dependencies */ +#ifdef CONFIG_DEBUG_VM + WARN_ONCE(pgprot_val(pgprot) != massaged_val, + "attempted to set unsupported pgprot: %016llx " + "bits: %016llx supported: %016llx\n", + (u64)pgprot_val(pgprot), + (u64)pgprot_val(pgprot) ^ massaged_val, + (u64)__supported_pte_mask); +#endif + + return massaged_val; +} + static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | - massage_pgprot(pgprot)); + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PTE_PFN_MASK; + return __pte(pfn | check_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | - massage_pgprot(pgprot)); + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PHYSICAL_PMD_PAGE_MASK; + return __pmd(pfn | check_pgprot(pgprot)); } static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) { - return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) | - massage_pgprot(pgprot)); + phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; + pfn ^= protnone_mask(pgprot_val(pgprot)); + pfn &= PHYSICAL_PUD_PAGE_MASK; + return __pud(pfn | check_pgprot(pgprot)); +} + +static inline pmd_t pmd_mknotpresent(pmd_t pmd) +{ + return pfn_pmd(pmd_pfn(pmd), + __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } +static inline pud_t pud_mknotpresent(pud_t pud) +{ + return pfn_pud(pud_pfn(pud), + __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); +} + +static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pteval_t val = pte_val(pte); + pteval_t val = pte_val(pte), oldval = val; /* * Chop off the NX bit (if present), and add the NX portion of * the newprot (if present): */ val &= _PAGE_CHG_MASK; - val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; - + val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK; + val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); return __pte(val); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmdval_t val = pmd_val(pmd); + pmdval_t val = pmd_val(pmd), oldval = val; val &= _HPAGE_CHG_MASK; - val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK; - + val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; + val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); return __pmd(val); } @@ -584,6 +623,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + return canon_pgprot(prot); +} + static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) @@ -618,8 +662,31 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, pmd_t *populate_extra_pmd(unsigned long vaddr); pte_t *populate_extra_pte(unsigned long vaddr); + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd); + +/* + * Take a PGD location (pgdp) and a pgd value that needs to be set there. + * Populates the user and returns the resulting PGD that must be set in + * the kernel copy of the page tables. + */ +static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return pgd; + return __pti_set_user_pgtbl(pgdp, pgd); +} +#else /* CONFIG_PAGE_TABLE_ISOLATION */ +static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + #endif /* __ASSEMBLY__ */ + #ifdef CONFIG_X86_32 # include <asm/pgtable_32.h> #else @@ -859,6 +926,8 @@ static inline unsigned long p4d_index(unsigned long address) #if CONFIG_PGTABLE_LEVELS > 4 static inline int pgd_present(pgd_t pgd) { + if (!pgtable_l5_enabled()) + return 1; return pgd_flags(pgd) & _PAGE_PRESENT; } @@ -876,6 +945,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) /* to find an entry in a page-table-directory. */ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { + if (!pgtable_l5_enabled()) + return (p4d_t *)pgd; return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); } @@ -883,6 +954,9 @@ static inline int pgd_bad(pgd_t pgd) { unsigned long ignore_flags = _PAGE_USER; + if (!pgtable_l5_enabled()) + return 0; + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; @@ -891,6 +965,8 @@ static inline int pgd_bad(pgd_t pgd) static inline int pgd_none(pgd_t pgd) { + if (!pgtable_l5_enabled()) + return 0; /* * There is no need to do a workaround for the KNL stray * A/D bit erratum here. PGDs only point to page tables @@ -1118,11 +1194,75 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, return xchg(pmdp, pmd); } else { pmd_t old = *pmdp; - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); return old; } } #endif +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * + * Returns true for parts of the PGD that map userspace and + * false for the parts that map the kernel. + */ +static inline bool pgdp_maps_userspace(void *__ptr) +{ + unsigned long ptr = (unsigned long)__ptr; + + return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); +} + +static inline int pgd_large(pgd_t pgd) { return 0; } + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages + * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and + * the user one is in the last 4k. To switch between them, you + * just need to flip the 12th bit in their addresses. + */ +#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT + +/* + * This generates better code than the inline assembly in + * __set_bit(). + */ +static inline void *ptr_set_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr |= BIT(bit); + return (void *)__ptr; +} +static inline void *ptr_clear_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr &= ~BIT(bit); + return (void *)__ptr; +} + +static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) +{ + return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) +{ + return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) +{ + return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) +{ + return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); @@ -1289,6 +1429,14 @@ static inline bool pud_access_permitted(pud_t pud, bool write) return __pte_access_permitted(pud_val(pud), write); } +#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1 +extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot); + +static inline bool arch_has_pfn_modify_check(void) +{ + return boot_cpu_has_bug(X86_BUG_L1TF); +} + #include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0777e18a1d23..b0bc0fff5f1f 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -15,6 +15,8 @@ # include <asm/pgtable-2level_types.h> #endif +#define pgtable_l5_enabled() 0 + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) @@ -48,13 +50,18 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ & PMD_MASK) -#define PKMAP_BASE \ +#define LDT_BASE_ADDR \ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) +#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) + +#define PKMAP_BASE \ + ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) + #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) #else -# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE) +# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) #endif #define MODULES_VADDR VMALLOC_START diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 1149d2112b2e..9c85b54bf03c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <linux/bitops.h> #include <linux/threads.h> +#include <asm/fixmap.h> extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; @@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; -extern pte_t level1_fixmap_pgt[512]; +extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt @@ -55,15 +56,15 @@ struct mm_struct; void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); -static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) +static inline void native_set_pte(pte_t *ptep, pte_t pte) { - *ptep = native_make_pte(0); + WRITE_ONCE(*ptep, pte); } -static inline void native_set_pte(pte_t *ptep, pte_t pte) +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - *ptep = pte; + native_set_pte(ptep, native_make_pte(0)); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) @@ -73,7 +74,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); } static inline void native_pmd_clear(pmd_t *pmd) @@ -109,7 +110,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) static inline void native_set_pud(pud_t *pudp, pud_t pud) { - *pudp = pud; + WRITE_ONCE(*pudp, pud); } static inline void native_pud_clear(pud_t *pud) @@ -132,115 +133,28 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp) #endif } -#ifdef CONFIG_PAGE_TABLE_ISOLATION -/* - * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages - * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and - * the user one is in the last 4k. To switch between them, you - * just need to flip the 12th bit in their addresses. - */ -#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT - -/* - * This generates better code than the inline assembly in - * __set_bit(). - */ -static inline void *ptr_set_bit(void *ptr, int bit) -{ - unsigned long __ptr = (unsigned long)ptr; - - __ptr |= BIT(bit); - return (void *)__ptr; -} -static inline void *ptr_clear_bit(void *ptr, int bit) -{ - unsigned long __ptr = (unsigned long)ptr; - - __ptr &= ~BIT(bit); - return (void *)__ptr; -} - -static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) -{ - return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); -} - -static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) -{ - return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); -} - -static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) -{ - return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); -} - -static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) -{ - return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); -} -#endif /* CONFIG_PAGE_TABLE_ISOLATION */ - -/* - * Page table pages are page-aligned. The lower half of the top - * level is used for userspace and the top half for the kernel. - * - * Returns true for parts of the PGD that map userspace and - * false for the parts that map the kernel. - */ -static inline bool pgdp_maps_userspace(void *__ptr) +static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { - unsigned long ptr = (unsigned long)__ptr; - - return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2); -} + pgd_t pgd; -#ifdef CONFIG_PAGE_TABLE_ISOLATION -pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd); + if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + WRITE_ONCE(*p4dp, p4d); + return; + } -/* - * Take a PGD location (pgdp) and a pgd value that needs to be set there. - * Populates the user and returns the resulting PGD that must be set in - * the kernel copy of the page tables. - */ -static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) -{ - if (!static_cpu_has(X86_FEATURE_PTI)) - return pgd; - return __pti_set_user_pgd(pgdp, pgd); -} -#else -static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) -{ - return pgd; -} -#endif - -static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) -{ -#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) - p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); -#else - *p4dp = p4d; -#endif + pgd = native_make_pgd(native_p4d_val(p4d)); + pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); + WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); } static inline void native_p4d_clear(p4d_t *p4d) { -#ifdef CONFIG_X86_5LEVEL native_set_p4d(p4d, native_make_p4d(0)); -#else - native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)}); -#endif } static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION - *pgdp = pti_set_user_pgd(pgdp, pgd); -#else - *pgdp = pgd; -#endif + WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); } static inline void native_pgd_clear(pgd_t *pgd) @@ -258,7 +172,6 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); /* * Level 4 access. */ -static inline int pgd_large(pgd_t pgd) { return 0; } #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) /* PUD - Level3 access */ @@ -276,7 +189,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | OFFSET (14->63) | TYPE (9-13) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -289,20 +202,34 @@ static inline int pgd_large(pgd_t pgd) { return 0; } * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. + * + * The offset is inverted by a binary not operation to make the high + * physical bits set. */ -#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) -#define SWP_TYPE_BITS 5 -/* Place the offset above the type: */ -#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS) +#define SWP_TYPE_BITS 5 + +#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) + +/* We always extract/encode the offset by shifting it all the way up, and then down again */ +#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) -#define __swp_type(x) (((x).val >> (SWP_TYPE_FIRST_BIT)) \ - & ((1U << SWP_TYPE_BITS) - 1)) -#define __swp_offset(x) ((x).val >> SWP_OFFSET_FIRST_BIT) -#define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << (SWP_TYPE_FIRST_BIT)) \ - | ((offset) << SWP_OFFSET_FIRST_BIT) }) +/* Extract the high bits for type */ +#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) + +/* Shift up (to get rid of type), then down to get value */ +#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) + +/* + * Shift the offset up "too far" by TYPE bits, then down again + * The offset is inverted by a binary not operation to make the high + * physical bits set. + */ +#define __swp_entry(type, offset) ((swp_entry_t) { \ + (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ + | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) + #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) @@ -346,5 +273,7 @@ static inline bool gup_fast_permitted(unsigned long start, int nr_pages, return true; } +#include <asm/pgtable-invert.h> + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6b8f73dcbc2c..84bd9bdc1987 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -20,6 +20,29 @@ typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; +#ifdef CONFIG_X86_5LEVEL +extern unsigned int __pgtable_l5_enabled; + +#ifdef USE_EARLY_PGTABLE_L5 +/* + * cpu_feature_enabled() is not available in early boot code. + * Use variable instead. + */ +static inline bool pgtable_l5_enabled(void) +{ + return __pgtable_l5_enabled; +} +#else +#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57) +#endif /* USE_EARLY_PGTABLE_L5 */ + +#else +#define pgtable_l5_enabled() 0 +#endif /* CONFIG_X86_5LEVEL */ + +extern unsigned int pgdir_shift; +extern unsigned int ptrs_per_p4d; + #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 @@ -29,24 +52,28 @@ typedef struct { pteval_t pte; } pte_t; /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 48 +#define PGDIR_SHIFT pgdir_shift #define PTRS_PER_PGD 512 /* * 4th level page in 5-level paging case */ -#define P4D_SHIFT 39 -#define PTRS_PER_P4D 512 -#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) -#define P4D_MASK (~(P4D_SIZE - 1)) +#define P4D_SHIFT 39 +#define MAX_PTRS_PER_P4D 512 +#define PTRS_PER_P4D ptrs_per_p4d +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE - 1)) + +#define MAX_POSSIBLE_PHYSMEM_BITS 52 #else /* CONFIG_X86_5LEVEL */ /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 39 -#define PTRS_PER_PGD 512 +#define PGDIR_SHIFT 39 +#define PTRS_PER_PGD 512 +#define MAX_PTRS_PER_P4D 1 #endif /* CONFIG_X86_5LEVEL */ @@ -82,31 +109,32 @@ typedef struct { pteval_t pte; } pte_t; * range must not overlap with anything except the KASAN shadow area, which * is correct as KASAN disables KASLR. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM (1UL << MAX_PHYSMEM_BITS) -#ifdef CONFIG_X86_5LEVEL -# define VMALLOC_SIZE_TB _AC(12800, UL) -# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) -# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) -# define LDT_PGD_ENTRY _AC(-112, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) -#else -# define VMALLOC_SIZE_TB _AC(32, UL) -# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) -# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -# define LDT_PGD_ENTRY _AC(-3, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) -#endif - -#ifdef CONFIG_RANDOMIZE_MEMORY +#define LDT_PGD_ENTRY -240UL +#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) +#define LDT_END_ADDR (LDT_BASE_ADDR + PGDIR_SIZE) + +#define __VMALLOC_BASE_L4 0xffffc90000000000UL +#define __VMALLOC_BASE_L5 0xffa0000000000000UL + +#define VMALLOC_SIZE_TB_L4 32UL +#define VMALLOC_SIZE_TB_L5 12800UL + +#define __VMEMMAP_BASE_L4 0xffffea0000000000UL +#define __VMEMMAP_BASE_L5 0xffd4000000000000UL + +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base +# define VMALLOC_SIZE_TB (pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) # define VMEMMAP_START vmemmap_base #else -# define VMALLOC_START __VMALLOC_BASE -# define VMEMMAP_START __VMEMMAP_BASE -#endif /* CONFIG_RANDOMIZE_MEMORY */ +# define VMALLOC_START __VMALLOC_BASE_L4 +# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 +# define VMEMMAP_START __VMEMMAP_BASE_L4 +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) +#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ @@ -124,4 +152,6 @@ typedef struct { pteval_t pte; } pte_t; #define EARLY_DYNAMIC_PAGE_TABLES 64 +#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t)) + #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index acfe755562a6..106b7d0e2dae 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -50,6 +50,7 @@ #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) +#define _PAGE_SOFTW3 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW3) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) @@ -65,7 +66,6 @@ #define _PAGE_PKEY_BIT2 (_AT(pteval_t, 0)) #define _PAGE_PKEY_BIT3 (_AT(pteval_t, 0)) #endif -#define __HAVE_ARCH_PTE_SPECIAL #define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \ _PAGE_PKEY_BIT1 | \ @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* @@ -196,19 +196,21 @@ enum page_cache_mode { #define __PAGE_KERNEL_NOENC (__PAGE_KERNEL) #define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP) -#define PAGE_KERNEL __pgprot(__PAGE_KERNEL | _PAGE_ENC) -#define PAGE_KERNEL_NOENC __pgprot(__PAGE_KERNEL) -#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) -#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) -#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) +#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask) -#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) -#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) +#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC) +#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL) +#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) +#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) +#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) +#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) +#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) +#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) +#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) + +#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE) #endif /* __ASSEMBLY__ */ @@ -265,14 +267,37 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; typedef struct { pgdval_t pgd; } pgd_t; +#ifdef CONFIG_X86_PAE + +/* + * PHYSICAL_PAGE_MASK might be non-constant when SME is compiled in, so we can't + * use it here. + */ + +#define PGD_PAE_PAGE_MASK ((signed long)PAGE_MASK) +#define PGD_PAE_PHYS_MASK (((1ULL << __PHYSICAL_MASK_SHIFT)-1) & PGD_PAE_PAGE_MASK) + +/* + * PAE allows Base Address, P, PWT, PCD and AVL bits to be set in PGD entries. + * All other bits are Reserved MBZ + */ +#define PGD_ALLOWED_BITS (PGD_PAE_PHYS_MASK | _PAGE_PRESENT | \ + _PAGE_PWT | _PAGE_PCD | \ + _PAGE_SOFTW1 | _PAGE_SOFTW2 | _PAGE_SOFTW3) + +#else +/* No need to mask any bits for !PAE */ +#define PGD_ALLOWED_BITS (~0ULL) +#endif + static inline pgd_t native_make_pgd(pgdval_t val) { - return (pgd_t) { val }; + return (pgd_t) { val & PGD_ALLOWED_BITS }; } static inline pgdval_t native_pgd_val(pgd_t pgd) { - return pgd.pgd; + return pgd.pgd & PGD_ALLOWED_BITS; } static inline pgdval_t pgd_flags(pgd_t pgd) @@ -483,6 +508,7 @@ static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot) typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; +extern pteval_t __default_kernel_pte_mask; extern void set_nx(void); extern int nx_enabled; diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index a0ba1ffda0df..19b137f1b3be 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -2,11 +2,18 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H +#define ARCH_DEFAULT_PKEY 0 + #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); +static inline bool arch_pkeys_enabled(void) +{ + return boot_cpu_has(X86_FEATURE_OSPKE); +} + /* * Try to dedicate one of the protection keys to be used as an * execute-only protection key. @@ -15,7 +22,7 @@ extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return 0; + return ARCH_DEFAULT_PKEY; return __execute_only_pkey(mm); } @@ -49,13 +56,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { /* * "Allocated" pkeys are those that have been returned - * from pkey_alloc(). pkey 0 is special, and never - * returned from pkey_alloc(). + * from pkey_alloc() or pkey 0 which is allocated + * implicitly when the mm is created. */ - if (pkey <= 0) + if (pkey < 0) return false; if (pkey >= arch_max_pkey()) return false; + /* + * The exec-only pkey is set in the allocation map, but + * is not available to any of the user interfaces like + * mprotect_pkey(). + */ + if (pkey == mm->context.execute_only_pkey) + return false; + return mm_pkey_allocation_map(mm) & (1U << pkey); } @@ -106,4 +121,12 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); extern void copy_init_pkru_to_fpregs(void); +static inline int vma_pkey(struct vm_area_struct *vma) +{ + unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 | + VM_PKEY_BIT2 | VM_PKEY_BIT3; + + return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT; +} + #endif /*_ASM_X86_PKEYS_H */ diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h index 5973a2f3db3d..059823bb8af7 100644 --- a/arch/x86/include/asm/platform_sst_audio.h +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -135,6 +135,7 @@ struct sst_platform_info { const struct sst_res_info *res_info; const struct sst_lib_dnld_info *lib_info; const char *platform; + bool streams_lost_on_suspend; }; int add_sst_platform_device(void); #endif diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 7f2dbd91fc74..90cb2f36c042 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -88,7 +88,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); + return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); } /* diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 625a52a5594f..02c2cbda4a74 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -39,10 +39,6 @@ #define CR3_PCID_MASK 0xFFFull #define CR3_NOFLUSH BIT_ULL(63) -#ifdef CONFIG_PAGE_TABLE_ISOLATION -# define X86_CR3_PTI_PCID_USER_BIT 11 -#endif - #else /* * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save @@ -53,4 +49,8 @@ #define CR3_NOFLUSH 0 #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define X86_CR3_PTI_PCID_USER_BIT 11 +#endif + #endif /* _ASM_X86_PROCESSOR_FLAGS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b0ccd4847a58..071b2a6fff85 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -42,18 +42,6 @@ struct vm86; #define NET_IP_ALIGN 0 #define HBP_NUM 4 -/* - * Default implementation of macro that returns current - * instruction pointer ("program counter"). - */ -static inline void *current_text_addr(void) -{ - void *pc; - - asm volatile("mov $1f, %0; 1:":"=r" (pc)); - - return pc; -} /* * These alignment constraints are for performance in the vSMP case, @@ -132,6 +120,8 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; + /* Address space bits used by the cache internally */ + u8 x86_cache_bits; unsigned initialized : 1; } __randomize_layout; @@ -153,7 +143,8 @@ enum cpuid_regs_idx { #define X86_VENDOR_CENTAUR 5 #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 -#define X86_VENDOR_NUM 9 +#define X86_VENDOR_HYGON 9 +#define X86_VENDOR_NUM 10 #define X86_VENDOR_UNKNOWN 0xff @@ -181,20 +172,16 @@ extern const struct seq_operations cpuinfo_op; extern void cpu_detect(struct cpuinfo_x86 *c); +static inline unsigned long long l1tf_pfn_limit(void) +{ + return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); +} + extern void early_cpu_init(void); extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); -extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); -extern u32 get_scattered_cpuid_leaf(unsigned int level, - unsigned int sub_leaf, - enum cpuid_regs_idx reg); -extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); -extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); - -extern void detect_extended_topology(struct cpuinfo_x86 *c); -extern void detect_ht(struct cpuinfo_x86 *c); #ifdef CONFIG_X86_32 extern int have_cpuid_p(void); @@ -317,7 +304,13 @@ struct x86_hw_tss { */ u64 sp1; + /* + * Since Linux does not use ring 2, the 'sp2' slot is unused by + * hardware. entry_SYSCALL_64 uses it as scratch space to stash + * the user RSP value. + */ u64 sp2; + u64 reserved2; u64 ist[7]; u32 reserved3; @@ -407,11 +400,21 @@ union irq_stack_union { DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; DECLARE_INIT_PER_CPU(irq_stack_union); +static inline unsigned long cpu_kernelmode_gs_base(int cpu) +{ + return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu); +} + DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); extern asmlinkage void ignore_sysret(void); + +#if IS_ENABLED(CONFIG_KVM) +/* Save actual FS/GS selectors and bases to current->thread */ +void save_fsgs_for_kvm(void); +#endif #else /* X86_64 */ -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR /* * Make sure stack canary segment base is cached-aligned: * "For Intel Atom processors, avoid non zero segment base address @@ -570,7 +573,7 @@ static inline bool on_thread_stack(void) current_stack_pointer) < THREAD_SIZE; } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define __cpuid native_cpuid @@ -581,7 +584,7 @@ static inline void load_sp0(unsigned long sp0) } #define set_iopl_mask native_set_iopl_mask -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); @@ -739,13 +742,11 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, extern void enable_sep_cpu(void); extern int sysenter_setup(void); -extern void early_trap_init(void); void early_trap_pf_init(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; -extern void cpu_set_gdt(int); extern void switch_to_new_gdt(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); @@ -967,6 +968,7 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) extern unsigned long arch_align_stack(unsigned long sp); extern void free_init_pages(char *what, unsigned long begin, unsigned long end); +extern void free_kernel_image_pages(void *begin, void *end); void default_idle(void); #ifdef CONFIG_XEN @@ -978,4 +980,16 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); void microcode_check(void); + +enum l1tf_mitigations { + L1TF_MITIGATION_OFF, + L1TF_MITIGATION_FLUSH_NOWARN, + L1TF_MITIGATION_FLUSH, + L1TF_MITIGATION_FLUSH_NOSMT, + L1TF_MITIGATION_FULL, + L1TF_MITIGATION_FULL_FORCE +}; + +extern enum l1tf_mitigations l1tf_mitigation; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h index 0b5ef05b2d2d..5df09a0b80b8 100644 --- a/arch/x86/include/asm/pti.h +++ b/arch/x86/include/asm/pti.h @@ -6,6 +6,7 @@ #ifdef CONFIG_PAGE_TABLE_ISOLATION extern void pti_init(void); extern void pti_check_boottime_disable(void); +extern void pti_finalize(void); #else static inline void pti_check_boottime_disable(void) { } #endif diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6de1fd3d0097..8a7fc0cca2d1 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -37,8 +37,10 @@ struct pt_regs { unsigned short __esh; unsigned short fs; unsigned short __fsh; + /* On interrupt, gs and __gsh store the vector number. */ unsigned short gs; unsigned short __gsh; + /* On interrupt, this is the error code. */ unsigned long orig_ax; unsigned long ip; unsigned short cs; @@ -144,7 +146,7 @@ static inline int v8086_mode(struct pt_regs *regs) static inline bool user_64bit_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_64 -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL /* * On non-paravirt systems, this is the only long mode CPL 3 * selector. We do not allow long mode selectors in the LDT. @@ -237,23 +239,89 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, } /** + * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns the address of the @n th entry of the + * kernel stack which is specified by @regs. If the @n th entry is NOT in + * the kernel stack, this returns NULL. + */ +static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return addr; + else + return NULL; +} + +/* To avoid include hell, we can't include uaccess.h */ +extern long probe_kernel_read(void *dst, const void *src, size_t size); + +/** * regs_get_kernel_stack_nth() - get Nth entry of the stack * @regs: pt_regs which contains kernel stack pointer. * @n: stack entry number. * * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * is specified by @regs. If the @n th entry is NOT in the kernel stack * this returns 0. */ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) { - unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); - addr += n; - if (regs_within_kernel_stack(regs, (unsigned long)addr)) - return *addr; - else - return 0; + unsigned long *addr; + unsigned long val; + long ret; + + addr = regs_get_kernel_stack_nth_addr(regs, n); + if (addr) { + ret = probe_kernel_read(&val, addr, sizeof(val)); + if (!ret) + return val; + } + return 0; +} + +/** + * regs_get_kernel_argument() - get Nth function argument in kernel + * @regs: pt_regs of that context + * @n: function argument number (start from 0) + * + * regs_get_argument() returns @n th argument of the function call. + * Note that this chooses most probably assignment, in some case + * it can be incorrect. + * This is expected to be called from kprobes or ftrace with regs + * where the top of stack is the return address. + */ +static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, + unsigned int n) +{ + static const unsigned int argument_offs[] = { +#ifdef __i386__ + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), +#define NR_REG_ARGUMENTS 3 +#else + offsetof(struct pt_regs, di), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), +#define NR_REG_ARGUMENTS 6 +#endif + }; + + if (n >= NR_REG_ARGUMENTS) { + n -= NR_REG_ARGUMENTS - 1; + return regs_get_kernel_stack_nth(regs, n); + } else + return regs_get_register(regs, argument_offs[n]); } #define arch_has_single_step() (1) @@ -263,7 +331,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, #define arch_has_block_step() (boot_cpu_data.x86 >= 6) #endif -#define ARCH_HAS_USER_SINGLE_STEP_INFO +#define ARCH_HAS_USER_SINGLE_STEP_REPORT /* * When hitting ptrace_stop(), we cannot return using SYSRET because diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index a7471dcd2205..b6033680d458 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -12,7 +12,7 @@ void pvclock_set_flags(u8 flags); unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, - struct timespec *ts); + struct timespec64 *ts); void pvclock_resume(void); void pvclock_touch_watchdogs(void); diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 5e16b5d40d32..bd5ac6cc37db 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -6,6 +6,32 @@ #include <asm/cpufeature.h> #include <asm-generic/qspinlock_types.h> #include <asm/paravirt.h> +#include <asm/rmwcc.h> + +#define _Q_PENDING_LOOPS (1 << 9) + +#define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire +static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) +{ + u32 val; + + /* + * We can't use GEN_BINARY_RMWcc() inside an if() stmt because asm goto + * and CONFIG_PROFILE_ALL_BRANCHES=y results in a label inside a + * statement expression, which GCC doesn't like. + */ + val = GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c, + "I", _Q_PENDING_OFFSET) * _Q_PENDING_VAL; + val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK; + + return val; +} + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); #define queued_spin_unlock queued_spin_unlock /** @@ -16,15 +42,9 @@ */ static inline void native_queued_spin_unlock(struct qspinlock *lock) { - smp_store_release((u8 *)lock, 0); + smp_store_release(&lock->locked, 0); } -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_init_lock_hash(void); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); - static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { pv_queued_spin_lock_slowpath(lock, val); @@ -40,11 +60,6 @@ static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } -#else -static inline void queued_spin_unlock(struct qspinlock *lock) -{ - native_queued_spin_unlock(lock); -} #endif #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 923307ea11c7..159622ee0674 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); * * void __pv_queued_spin_unlock(struct qspinlock *lock) * { - * struct __qspinlock *l = (void *)lock; - * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); * * if (likely(lockval == _Q_LOCKED_VAL)) * return; @@ -44,7 +43,7 @@ asm (".pushsection .text;" "push %rdx;" "mov $0x1,%eax;" "xor %edx,%edx;" - "lock cmpxchg %dl,(%rdi);" + LOCK_PREFIX "cmpxchg %dl,(%rdi);" "cmp $0x1,%al;" "jne .slowpath;" "pop %rdx;" diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index 4cf11d88d3b3..a8b5e1e13319 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -4,7 +4,43 @@ * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from * PaX/grsecurity. */ + +#ifdef __ASSEMBLY__ + +#include <asm/asm.h> +#include <asm/bug.h> + +.macro REFCOUNT_EXCEPTION counter:req + .pushsection .text..refcount +111: lea \counter, %_ASM_CX +112: ud2 + ASM_UNREACHABLE + .popsection +113: _ASM_EXTABLE_REFCOUNT(112b, 113b) +.endm + +/* Trigger refcount exception if refcount result is negative. */ +.macro REFCOUNT_CHECK_LT_ZERO counter:req + js 111f + REFCOUNT_EXCEPTION counter="\counter" +.endm + +/* Trigger refcount exception if refcount result is zero or negative. */ +.macro REFCOUNT_CHECK_LE_ZERO counter:req + jz 111f + REFCOUNT_CHECK_LT_ZERO counter="\counter" +.endm + +/* Trigger refcount exception unconditionally. */ +.macro REFCOUNT_ERROR counter:req + jmp 111f + REFCOUNT_EXCEPTION counter="\counter" +.endm + +#else /* __ASSEMBLY__ */ + #include <linux/refcount.h> +#include <asm/bug.h> /* * This is the first portion of the refcount error handling, which lives in @@ -14,34 +50,11 @@ * central refcount exception. The fixup address for the exception points * back to the regular execution flow in .text. */ -#define _REFCOUNT_EXCEPTION \ - ".pushsection .text..refcount\n" \ - "111:\tlea %[counter], %%" _ASM_CX "\n" \ - "112:\t" ASM_UD2 "\n" \ - ASM_UNREACHABLE \ - ".popsection\n" \ - "113:\n" \ - _ASM_EXTABLE_REFCOUNT(112b, 113b) - -/* Trigger refcount exception if refcount result is negative. */ -#define REFCOUNT_CHECK_LT_ZERO \ - "js 111f\n\t" \ - _REFCOUNT_EXCEPTION - -/* Trigger refcount exception if refcount result is zero or negative. */ -#define REFCOUNT_CHECK_LE_ZERO \ - "jz 111f\n\t" \ - REFCOUNT_CHECK_LT_ZERO - -/* Trigger refcount exception unconditionally. */ -#define REFCOUNT_ERROR \ - "jmp 111f\n\t" \ - _REFCOUNT_EXCEPTION static __always_inline void refcount_add(unsigned int i, refcount_t *r) { asm volatile(LOCK_PREFIX "addl %1,%0\n\t" - REFCOUNT_CHECK_LT_ZERO + "REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\"" : [counter] "+m" (r->refs.counter) : "ir" (i) : "cc", "cx"); @@ -50,7 +63,7 @@ static __always_inline void refcount_add(unsigned int i, refcount_t *r) static __always_inline void refcount_inc(refcount_t *r) { asm volatile(LOCK_PREFIX "incl %0\n\t" - REFCOUNT_CHECK_LT_ZERO + "REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\"" : [counter] "+m" (r->refs.counter) : : "cc", "cx"); } @@ -58,7 +71,7 @@ static __always_inline void refcount_inc(refcount_t *r) static __always_inline void refcount_dec(refcount_t *r) { asm volatile(LOCK_PREFIX "decl %0\n\t" - REFCOUNT_CHECK_LE_ZERO + "REFCOUNT_CHECK_LE_ZERO counter=\"%[counter]\"" : [counter] "+m" (r->refs.counter) : : "cc", "cx"); } @@ -66,14 +79,17 @@ static __always_inline void refcount_dec(refcount_t *r) static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) { - GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "er", i, "%0", e, "cx"); + + return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", + "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"", + r->refs.counter, e, "er", i, "cx"); } static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) { - GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "%0", e, "cx"); + return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", + "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"", + r->refs.counter, e, "cx"); } static __always_inline __must_check @@ -90,7 +106,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) /* Did we try to increment from/to an undesirable state? */ if (unlikely(c < 0 || c == INT_MAX || result < c)) { - asm volatile(REFCOUNT_ERROR + asm volatile("REFCOUNT_ERROR counter=\"%[counter]\"" : : [counter] "m" (r->refs.counter) : "cc", "cx"); break; @@ -106,4 +122,6 @@ static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) return refcount_add_not_zero(1, r); } +#endif /* __ASSEMBLY__ */ + #endif diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fb3a6de7440b..6847d85400a8 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -53,12 +53,6 @@ # define NEED_MOVBE 0 #endif -#ifdef CONFIG_X86_5LEVEL -# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#else -# define NEED_LA57 0 -#endif - #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -104,7 +98,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 (NEED_LA57) +#define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 4914a3e7c803..46ac84b506f5 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,56 +2,69 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc +/* This counts to 12. Any more, it will return 13th argument. */ +#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#define __RMWcc_CONCAT(a, b) a ## b +#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b) + #define __CLOBBERS_MEM(clb...) "memory", ## clb #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) /* Use asm goto */ -#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ -do { \ +#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ +({ \ + bool c = false; \ asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ - : : [counter] "m" (var), ## __VA_ARGS__ \ + : : [var] "m" (_var), ## __VA_ARGS__ \ : clobbers : cc_label); \ - return 0; \ -cc_label: \ - return 1; \ -} while (0) - -#define __BINARY_RMWcc_ARG " %1, " - + if (0) { \ +cc_label: c = true; \ + } \ + c; \ +}) #else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ /* Use flags output or a set instruction */ -#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ -do { \ +#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ +({ \ bool c; \ asm volatile (fullop CC_SET(cc) \ - : [counter] "+m" (var), CC_OUT(cc) (c) \ + : [var] "+m" (_var), CC_OUT(cc) (c) \ : __VA_ARGS__ : clobbers); \ - return c; \ -} while (0) - -#define __BINARY_RMWcc_ARG " %2, " + c; \ +}) #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ -#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ +#define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) -#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\ - __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM(clobbers)) +#define GEN_UNARY_RMWcc_3(op, var, cc) \ + GEN_UNARY_RMWcc_4(op, var, cc, "%[var]") -#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ - __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ - __CLOBBERS_MEM(), vcon (val)) +#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X) + +#define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0) \ + __GEN_RMWcc(op " %[val], " arg0, var, cc, \ + __CLOBBERS_MEM(), [val] vcon (_val)) + +#define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val) \ + GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]") + +#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X) + +#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...) \ + __GEN_RMWcc(op " %[var]\n\t" suffix, var, cc, \ + __CLOBBERS_MEM(clobbers)) -#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \ - clobbers...) \ - __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM(clobbers), vcon (val)) +#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, cc, vcon, _val, clobbers...)\ + __GEN_RMWcc(op " %[val], %[var]\n\t" suffix, var, cc, \ + __CLOBBERS_MEM(clobbers), [val] vcon (_val)) #endif /* _ASM_X86_RMWcc */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 5c019d23d06b..8ea1cfdbeabc 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -7,10 +7,10 @@ extern char __brk_base[], __brk_limit[]; extern struct exception_table_entry __stop___ex_table[]; +extern char __end_rodata_aligned[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; -extern char __entry_trampoline_start[], __entry_trampoline_end[]; #endif #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 8f09012b92e7..ac3892920419 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -146,7 +146,7 @@ # define __KERNEL_PERCPU 0 #endif -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR # define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) #else # define __KERNEL_STACK_CANARY 0 @@ -186,8 +186,7 @@ #define GDT_ENTRY_TLS_MIN 12 #define GDT_ENTRY_TLS_MAX 14 -/* Abused to load per CPU data from limit */ -#define GDT_ENTRY_PER_CPU 15 +#define GDT_ENTRY_CPUNODE 15 /* * Number of entries in the GDT table: @@ -207,11 +206,11 @@ #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) #define __USER32_DS __USER_DS #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) -#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3) +#define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3) #endif -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL # define get_kernel_rpl() 0 #endif @@ -225,6 +224,47 @@ #define GDT_ENTRY_TLS_ENTRIES 3 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) +#ifdef CONFIG_X86_64 + +/* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */ +#define VDSO_CPUNODE_BITS 12 +#define VDSO_CPUNODE_MASK 0xfff + +#ifndef __ASSEMBLY__ + +/* Helper functions to store/load CPU and node numbers */ + +static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node) +{ + return (node << VDSO_CPUNODE_BITS) | cpu; +} + +static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) +{ + unsigned int p; + + /* + * Load CPU and node number from the GDT. LSL is faster than RDTSCP + * and works on all CPUs. This is volatile so that it orders + * correctly with respect to barrier() and to keep GCC from cleverly + * hoisting it out of the calling function. + * + * If RDPID is available, use it. + */ + alternative_io ("lsl %[seg],%[p]", + ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ + X86_FEATURE_RDPID, + [p] "=a" (p), [seg] "r" (__CPUNODE_SEG)); + + if (cpu) + *cpu = (p & VDSO_CPUNODE_MASK); + if (node) + *node = (p >> VDSO_CPUNODE_BITS); +} + +#endif /* !__ASSEMBLY__ */ +#endif /* CONFIG_X86_64 */ + #ifdef __KERNEL__ /* diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index bd090367236c..07a25753e85c 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -46,6 +46,7 @@ int set_memory_np(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); +int set_memory_np_noalias(unsigned long addr, int numpages); int set_memory_array_uc(unsigned long *addr, int addrinarray); int set_memory_array_wc(unsigned long *addr, int addrinarray); @@ -88,4 +89,46 @@ extern int kernel_set_to_readonly; void set_kernel_text_rw(void); void set_kernel_text_ro(void); +#ifdef CONFIG_X86_64 +static inline int set_mce_nospec(unsigned long pfn) +{ + unsigned long decoy_addr; + int rc; + + /* + * Mark the linear address as UC to make sure we don't log more + * errors because of speculative access to the page. + * We would like to just call: + * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1); + * but doing that would radically increase the odds of a + * speculative access to the poison page because we'd have + * the virtual address of the kernel 1:1 mapping sitting + * around in registers. + * Instead we get tricky. We create a non-canonical address + * that looks just like the one we want, but has bit 63 flipped. + * This relies on set_memory_uc() properly sanitizing any __pa() + * results with __PHYSICAL_MASK or PTE_PFN_MASK. + */ + decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); + + rc = set_memory_uc(decoy_addr, 1); + if (rc) + pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); + return rc; +} +#define set_mce_nospec set_mce_nospec + +/* Restore full speculative operation to the pfn. */ +static inline int clear_mce_nospec(unsigned long pfn) +{ + return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1); +} +#define clear_mce_nospec clear_mce_nospec +#else +/* + * Few people would run a 32-bit kernel on a machine that supports + * recoverable errors because they have too much memory to boot 32-bit. + */ +#endif + #endif /* _ASM_X86_SET_MEMORY_H */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 5f9012ff52ed..33d3c88a7225 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -39,6 +39,7 @@ extern void do_signal(struct pt_regs *regs); #define __ARCH_HAS_SA_RESTORER +#include <asm/asm.h> #include <uapi/asm/sigcontext.h> #ifdef __i386__ @@ -86,9 +87,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig) static inline int __gen_sigismember(sigset_t *set, int _sig) { - unsigned char ret; - asm("btl %2,%1\n\tsetc %0" - : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); + bool ret; + asm("btl %2,%1" CC_SET(c) + : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1)); return ret; } diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a4189762b266..547c4fe50711 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -171,22 +171,11 @@ static inline int wbinvd_on_all_cpus(void) wbinvd(); return 0; } -#define smp_num_siblings 1 #endif /* CONFIG_SMP */ extern unsigned disabled_cpus; #ifdef CONFIG_X86_LOCAL_APIC - -#ifndef CONFIG_X86_64 -static inline int logical_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - -#endif - extern int hard_smp_processor_id(void); #else /* CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 4fc1e9d3c43e..199218719a86 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,13 +27,8 @@ # endif #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ -# ifdef CONFIG_X86_5LEVEL -# define MAX_PHYSADDR_BITS 52 -# define MAX_PHYSMEM_BITS 52 -# else -# define MAX_PHYSADDR_BITS 44 -# define MAX_PHYSMEM_BITS 46 -# endif +# define MAX_PHYSADDR_BITS (pgtable_l5_enabled() ? 52 : 44) +# define MAX_PHYSMEM_BITS (pgtable_l5_enabled() ? 52 : 46) #endif #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h new file mode 100644 index 000000000000..ae7c2c5cd7f0 --- /dev/null +++ b/arch/x86/include/asm/spec-ctrl.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPECCTRL_H_ +#define _ASM_X86_SPECCTRL_H_ + +#include <linux/thread_info.h> +#include <asm/nospec-branch.h> + +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter and also + * the guest's version of VIRT_SPEC_CTRL, if emulated. + */ +extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); + +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); +} + +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); +} + +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_ssbd_mask; + +static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) +{ + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; +} + +#ifdef CONFIG_SMP +extern void speculative_store_bypass_ht_init(void); +#else +static inline void speculative_store_bypass_ht_init(void) { } +#endif + +extern void speculative_store_bypass_update(unsigned long tif); + +static inline void speculative_store_bypass_update_current(void) +{ + speculative_store_bypass_update(current_thread_info()->flags); +} + +#endif diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 317fc59b512c..43c029cdc3fe 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -141,7 +141,7 @@ static inline unsigned long __read_cr4(void) return native_read_cr4(); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else @@ -208,7 +208,7 @@ static inline void load_gs_index(unsigned selector) #endif -#endif/* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ static inline void clflush(volatile void *__p) { diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 371b3a4af000..8ec97a62c245 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -34,7 +34,7 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR #include <asm/tsc.h> #include <asm/processor.h> @@ -105,7 +105,7 @@ static inline void load_stack_canary_segment(void) #endif } -#else /* CC_STACKPROTECTOR */ +#else /* STACKPROTECTOR */ #define GDT_STACK_CANARY_INIT @@ -121,5 +121,5 @@ static inline void load_stack_canary_segment(void) #endif } -#endif /* CC_STACKPROTECTOR */ +#endif /* STACKPROTECTOR */ #endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f73706878772..f335aad404a4 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -87,8 +87,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl); -extern unsigned int code_bytes; - /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; @@ -113,4 +111,6 @@ static inline unsigned long caller_frame_pointer(void) return (unsigned long)frame; } +void show_opcodes(struct pt_regs *regs, const char *loglvl); +void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 533f74c300c2..7ad41bfcc16c 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -116,7 +116,8 @@ int strcmp(const char *cs, const char *ct); #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); +__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, + size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -131,14 +132,15 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key); * actually do machine check recovery. Everyone else can just * use memcpy(). * - * Return 0 for success, -EFAULT for fail + * Return 0 for success, or number of bytes not copied if there was an + * exception. */ -static __always_inline __must_check int +static __always_inline __must_check unsigned long memcpy_mcsafe(void *dst, const void *src, size_t cnt) { #ifdef CONFIG_X86_MCE if (static_branch_unlikely(&mcsafe_key)) - return memcpy_mcsafe_unrolled(dst, src, cnt); + return __memcpy_mcsafe(dst, src, cnt); else #endif memcpy(dst, src, cnt); @@ -147,7 +149,25 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 -void memcpy_flushcache(void *dst, const void *src, size_t cnt); +void __memcpy_flushcache(void *dst, const void *src, size_t cnt); +static __always_inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) +{ + if (__builtin_constant_p(cnt)) { + switch (cnt) { + case 4: + asm ("movntil %1, %0" : "=m"(*(u32 *)dst) : "r"(*(u32 *)src)); + return; + case 8: + asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src)); + return; + case 16: + asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src)); + asm ("movntiq %1, %0" : "=m"(*(u64 *)(dst + 8)) : "r"(*(u64 *)(src + 8))); + return; + } + } + __memcpy_flushcache(dst, src, cnt); +} #endif #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h index ecffe81ff65c..a892494ca5e4 100644 --- a/arch/x86/include/asm/suspend.h +++ b/arch/x86/include/asm/suspend.h @@ -4,3 +4,11 @@ #else # include <asm/suspend_64.h> #endif +extern unsigned long restore_jump_address __visible; +extern unsigned long jump_address_phys; +extern unsigned long restore_cr3 __visible; +extern unsigned long temp_pgt __visible; +extern unsigned long relocated_restore_code __visible; +extern int relocate_restore_code(void); +/* Defined in hibernate_asm_32/64.S */ +extern asmlinkage __visible int restore_image(void); diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 8be6afb58471..fdbd9d7b7bca 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -32,4 +32,8 @@ struct saved_context { unsigned long return_address; } __attribute__((packed)); +/* routines for saving/restoring kernel state */ +extern char core_restore_code[]; +extern char restore_registers[]; + #endif /* _ASM_X86_SUSPEND_32_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0487ac054870..93b462e48067 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -60,7 +60,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 intercept_dr; u32 intercept_exceptions; u64 intercept; - u8 reserved_1[42]; + u8 reserved_1[40]; + u16 pause_filter_thresh; u16 pause_filter_count; u64 iopm_base_pa; u64 msrpm_base_pa; diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 1c6a6cb230ff..ff6c92eff035 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void) { } #endif - -extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs); -extern void x86_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - #endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index eb5f7999a893..36bd243843d6 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -87,15 +87,25 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread) #endif /* This is used when switching tasks or entering/exiting vm86 mode. */ -static inline void update_sp0(struct task_struct *task) +static inline void update_task_stack(struct task_struct *task) { - /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */ + /* sp0 always points to the entry trampoline stack, which is constant: */ #ifdef CONFIG_X86_32 - load_sp0(task->thread.sp0); + if (static_cpu_has(X86_FEATURE_XENPV)) + load_sp0(task->thread.sp0); + else + this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0); #else + /* + * x86-64 updates x86_tss.sp1 via cpu_current_top_of_stack. That + * doesn't work on x86-32 because sp1 and + * cpu_current_top_of_stack have different values (because of + * the non-zero stack-padding on 32bit). + */ if (static_cpu_has(X86_FEATURE_XENPV)) load_sp0(task_top_of_stack(task)); #endif + } #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h deleted file mode 100644 index 906794aa034e..000000000000 --- a/arch/x86/include/asm/sys_ia32.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * sys_ia32.h - Linux ia32 syscall interfaces - * - * Copyright (c) 2008 Jaswinder Singh Rajput - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#ifndef _ASM_X86_SYS_IA32_H -#define _ASM_X86_SYS_IA32_H - -#ifdef CONFIG_COMPAT - -#include <linux/compiler.h> -#include <linux/linkage.h> -#include <linux/types.h> -#include <linux/signal.h> -#include <asm/compat.h> -#include <asm/ia32.h> - -/* ia32/sys_ia32.c */ -asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long, - unsigned long); -asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long, - unsigned long); - -asmlinkage long compat_sys_x86_stat64(const char __user *, - struct stat64 __user *); -asmlinkage long compat_sys_x86_lstat64(const char __user *, - struct stat64 __user *); -asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *, - struct stat64 __user *, int); -struct mmap_arg_struct32; -asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); - -asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *, - int); - -asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, - u32); -asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, - u32, u32); - -asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32, - int); - -asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int, - size_t); -asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int, - unsigned int, unsigned int, - int); -asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, - size_t, int); -asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, - unsigned int, unsigned int); -asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *, - unsigned long, int __user *); - -/* ia32/ia32_signal.c */ -asmlinkage long sys32_sigreturn(void); -asmlinkage long sys32_rt_sigreturn(void); - -#endif /* CONFIG_COMPAT */ - -#endif /* _ASM_X86_SYS_IA32_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 03eedc21246d..d653139857af 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -20,9 +20,13 @@ #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> +#ifdef CONFIG_X86_64 +typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *); +#else typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#endif /* CONFIG_X86_64 */ extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h new file mode 100644 index 000000000000..e046a405743d --- /dev/null +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * syscall_wrapper.h - x86 specific wrappers to syscall definitions + */ + +#ifndef _ASM_X86_SYSCALL_WRAPPER_H +#define _ASM_X86_SYSCALL_WRAPPER_H + +/* Mapping of registers to parameters for syscalls on x86-64 and x32 */ +#define SC_X86_64_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,regs->di,,regs->si,,regs->dx \ + ,,regs->r10,,regs->r8,,regs->r9) \ + +/* Mapping of registers to parameters for syscalls on i386 */ +#define SC_IA32_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \ + ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ + ,,(unsigned int)regs->di,,(unsigned int)regs->bp) + +#ifdef CONFIG_IA32_EMULATION +/* + * For IA32 emulation, we need to handle "compat" syscalls *and* create + * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the + * ia32 regs in the proper order for shared or "common" syscalls. As some + * syscalls may not be implemented, we need to expand COND_SYSCALL in + * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this + * case as well. + */ +#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ + asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \ + asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ + } \ + +#define __IA32_SYS_STUBx(x, name, ...) \ + asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO); \ + asmlinkage long __ia32_sys##name(const struct pt_regs *regs) \ + { \ + return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ + } + +/* + * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias + * named __ia32_sys_*() + */ +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(void); \ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ + asmlinkage long __x64_sys_##sname(void) + +#define COND_SYSCALL(name) \ + cond_syscall(__x64_sys_##name); \ + cond_syscall(__ia32_sys_##name) + +#define SYS_NI(name) \ + SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \ + SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers) + +#else /* CONFIG_IA32_EMULATION */ +#define __IA32_COMPAT_SYS_STUBx(x, name, ...) +#define __IA32_SYS_STUBx(x, fullname, name, ...) +#endif /* CONFIG_IA32_EMULATION */ + + +#ifdef CONFIG_X86_X32 +/* + * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware + * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common + * with x86_64 obviously do not need such care. + */ +#define __X32_COMPAT_SYS_STUBx(x, name, ...) \ + asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \ + asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ + } \ + +#else /* CONFIG_X86_X32 */ +#define __X32_COMPAT_SYS_STUBx(x, name, ...) +#endif /* CONFIG_X86_X32 */ + + +#ifdef CONFIG_COMPAT +/* + * Compat means IA32_EMULATION and/or X86_X32. As they use a different + * mapping of registers to parameters, we need to generate stubs for each + * of them. + */ +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + __IA32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \ + __X32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \ + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + } \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/* + * As some compat syscalls may not be implemented, we need to expand + * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in + * kernel/time/posix-stubs.c to cover this case as well. + */ +#define COND_SYSCALL_COMPAT(name) \ + cond_syscall(__ia32_compat_sys_##name); \ + cond_syscall(__x32_compat_sys_##name) + +#define COMPAT_SYS_NI(name) \ + SYSCALL_ALIAS(__ia32_compat_sys_##name, sys_ni_posix_timers); \ + SYSCALL_ALIAS(__x32_compat_sys_##name, sys_ni_posix_timers) + +#endif /* CONFIG_COMPAT */ + + +/* + * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes + * struct pt_regs *regs as the only argument of the syscall stub named + * __x64_sys_*(). It decodes just the registers it needs and passes them on to + * the __se_sys_*() wrapper performing sign extension and then to the + * __do_sys_*() function doing the actual job. These wrappers and functions + * are inlined (at least in very most cases), meaning that the assembly looks + * as follows (slightly re-ordered for better readability): + * + * <__x64_sys_recv>: <-- syscall with 4 parameters + * callq <__fentry__> + * + * mov 0x70(%rdi),%rdi <-- decode regs->di + * mov 0x68(%rdi),%rsi <-- decode regs->si + * mov 0x60(%rdi),%rdx <-- decode regs->dx + * mov 0x38(%rdi),%rcx <-- decode regs->r10 + * + * xor %r9d,%r9d <-- clear %r9 + * xor %r8d,%r8d <-- clear %r8 + * + * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom() + * which takes 6 arguments + * + * cltq <-- extend return value to 64-bit + * retq <-- return + * + * This approach avoids leaking random user-provided register content down + * the call chain. + * + * If IA32_EMULATION is enabled, this macro generates an additional wrapper + * named __ia32_sys_*() which decodes the struct pt_regs *regs according + * to the i386 calling convention (bx, cx, dx, si, di, bp). + */ +#define __SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long __x64_sys##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO); \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + asmlinkage long __x64_sys##name(const struct pt_regs *regs) \ + { \ + return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ + } \ + __IA32_SYS_STUBx(x, name, __VA_ARGS__) \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ + return ret; \ + } \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/* + * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for + * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not + * hurt, we only need to re-define it here to keep the naming congruent to + * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() + * macros to work correctly. + */ +#ifndef SYSCALL_DEFINE0 +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(void); \ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + asmlinkage long __x64_sys_##sname(void) +#endif + +#ifndef COND_SYSCALL +#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name) +#endif + +#ifndef SYS_NI +#define SYS_NI(name) SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); +#endif + + +/* + * For VSYSCALLS, we need to declare these three syscalls with the new + * pt_regs-based calling convention for in-kernel use. + */ +struct pt_regs; +asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs); +asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs); +asmlinkage long __x64_sys_time(const struct pt_regs *regs); + +#endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index bad25bb80679..9fa979dd0d9d 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -17,6 +17,13 @@ /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); + +#ifdef CONFIG_X86_32 +/* + * These definitions are only valid on pure 32-bit systems; x86-64 uses a + * different syscall calling convention + */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); @@ -31,25 +38,14 @@ asmlinkage long sys_set_thread_area(struct user_desc __user *); asmlinkage long sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ -#ifdef CONFIG_X86_32 /* kernel/signal.c */ -asmlinkage unsigned long sys_sigreturn(void); +asmlinkage long sys_sigreturn(void); /* kernel/vm86_32.c */ struct vm86_struct; asmlinkage long sys_vm86old(struct vm86_struct __user *); asmlinkage long sys_vm86(unsigned long, unsigned long); -#else /* CONFIG_X86_32 */ - -/* X86_64 only */ -/* kernel/process_64.c */ -asmlinkage long sys_arch_prctl(int, unsigned long); - -/* kernel/sys_x86_64.c */ -asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); - #endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_SYSCALLS_H */ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 2ecd34e2d46c..e85ff65c43c3 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -37,5 +37,6 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern int after_bootmem; #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a5d9521bb2cb..2ff2a30a264f 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,6 +79,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_SSBD 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -105,6 +106,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_SSBD (1 << TIF_SSBD) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -144,7 +146,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index cb0a1f470980..404b8b1d44f5 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -6,16 +6,23 @@ #define tlb_end_vma(tlb, vma) do { } while (0) #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) -#define tlb_flush(tlb) \ -{ \ - if (!tlb->fullmm && !tlb->need_flush_all) \ - flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \ - else \ - flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \ -} +static inline void tlb_flush(struct mmu_gather *tlb); #include <asm-generic/tlb.h> +static inline void tlb_flush(struct mmu_gather *tlb) +{ + unsigned long start = 0UL, end = TLB_FLUSH_ALL; + unsigned int stride_shift = tlb_get_unmap_shift(tlb); + + if (!tlb->fullmm && !tlb->need_flush_all) { + start = tlb->start; + end = tlb->end; + } + + flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables); +} + /* * While x86 architecture in general requires an IPI to perform TLB * shootdown, enablement code for several hypervisors overrides diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 84137c22fdfa..d760611cfc35 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -131,7 +131,12 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID)); + /* + * Use boot_cpu_has() instead of this_cpu_has() as this function + * might be called during early boot. This should work even after + * boot because all CPU's the have same capabilities: + */ + VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID)); return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; } @@ -143,22 +148,6 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) #define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) #endif -static inline bool tlb_defer_switch_to_init_mm(void) -{ - /* - * If we have PCID, then switching to init_mm is reasonably - * fast. If we don't have PCID, then switching to init_mm is - * quite slow, so we try to defer it in the hopes that we can - * avoid it entirely. The latter approach runs the risk of - * receiving otherwise unnecessary IPIs. - * - * This choice is just a heuristic. The tlb code can handle this - * function returning true or false regardless of whether we have - * PCID. - */ - return !static_cpu_has(X86_FEATURE_PCID); -} - struct tlb_context { u64 ctx_id; u64 tlb_gen; @@ -170,8 +159,16 @@ struct tlb_state { * are on. This means that it may not match current->active_mm, * which will contain the previous user mm when we're in lazy TLB * mode even if we've already switched back to swapper_pg_dir. + * + * During switch_mm_irqs_off(), loaded_mm will be set to + * LOADED_MM_SWITCHING during the brief interrupts-off window + * when CR3 and loaded_mm would otherwise be inconsistent. This + * is for nmi_uaccess_okay()'s benefit. */ struct mm_struct *loaded_mm; + +#define LOADED_MM_SWITCHING ((struct mm_struct *)1) + u16 loaded_mm_asid; u16 next_asid; /* last user mm's ctx id */ @@ -241,6 +238,38 @@ struct tlb_state { }; DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); +/* + * Blindly accessing user memory from NMI context can be dangerous + * if we're in the middle of switching the current user task or + * switching the loaded mm. It can also be dangerous if we + * interrupted some kernel code that was temporarily using a + * different mm. + */ +static inline bool nmi_uaccess_okay(void) +{ + struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); + struct mm_struct *current_mm = current->mm; + + VM_WARN_ON_ONCE(!loaded_mm); + + /* + * The condition we want to check is + * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, + * if we're running in a VM with shadow paging, and nmi_uaccess_okay() + * is supposed to be reasonably fast. + * + * Instead, we check the almost equivalent but somewhat conservative + * condition below, and we rely on the fact that switch_mm_irqs_off() + * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3. + */ + if (loaded_mm != current_mm) + return false; + + VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); + + return true; +} + /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { @@ -424,6 +453,12 @@ static inline void __native_flush_tlb_one_user(unsigned long addr) */ static inline void __flush_tlb_all(void) { + /* + * This is to catch users with enabled preemption and the PGE feature + * and don't trigger the warning in __native_flush_tlb(). + */ + VM_WARN_ON_ONCE(preemptible()); + if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); } else { @@ -502,23 +537,30 @@ struct flush_tlb_info { unsigned long start; unsigned long end; u64 new_tlb_gen; + unsigned int stride_shift; + bool freed_tables; }; #define local_flush_tlb() __flush_tlb() -#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) +#define flush_tlb_mm(mm) \ + flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) -#define flush_tlb_range(vma, start, end) \ - flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) +#define flush_tlb_range(vma, start, end) \ + flush_tlb_mm_range((vma)->vm_mm, start, end, \ + ((vma)->vm_flags & VM_HUGETLB) \ + ? huge_page_shift(hstate_vma(vma)) \ + : PAGE_SHIFT, false) extern void flush_tlb_all(void); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, - unsigned long end, unsigned long vmflag); + unsigned long end, unsigned int stride_shift, + bool freed_tables); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) { - flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); + flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); } void native_flush_tlb_others(const struct cpumask *cpumask, @@ -547,6 +589,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); #ifndef CONFIG_PARAVIRT #define flush_tlb_others(mask, info) \ native_flush_tlb_others(mask, info) + +#define paravirt_tlb_remove_table(tlb, page) \ + tlb_remove_page(tlb, (void *)(page)) #endif #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c1d2a9892352..453cf38a1c33 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -123,13 +123,17 @@ static inline int topology_max_smt_threads(void) } int topology_update_package_map(unsigned int apicid, unsigned int cpu); -extern int topology_phys_to_logical_pkg(unsigned int pkg); +int topology_phys_to_logical_pkg(unsigned int pkg); +bool topology_is_primary_thread(unsigned int cpu); +bool topology_smt_supported(void); #else #define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } +static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } +static inline bool topology_smt_supported(void) { return false; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h index 4253bca99989..2e6245a023ef 100644 --- a/arch/x86/include/asm/trace/hyperv.h +++ b/arch/x86/include/asm/trace/hyperv.h @@ -28,6 +28,35 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others, __entry->addr, __entry->end) ); +TRACE_EVENT(hyperv_nested_flush_guest_mapping, + TP_PROTO(u64 as, int ret), + TP_ARGS(as, ret), + + TP_STRUCT__entry( + __field(u64, as) + __field(int, ret) + ), + TP_fast_assign(__entry->as = as; + __entry->ret = ret; + ), + TP_printk("address space %llx ret %d", __entry->as, __entry->ret) + ); + +TRACE_EVENT(hyperv_send_ipi_mask, + TP_PROTO(const struct cpumask *cpus, + int vector), + TP_ARGS(cpus, vector), + TP_STRUCT__entry( + __field(unsigned int, ncpus) + __field(int, vector) + ), + TP_fast_assign(__entry->ncpus = cpumask_weight(cpus); + __entry->vector = vector; + ), + TP_printk("ncpus %d vector %x", + __entry->ncpus, __entry->vector) + ); + #endif /* CONFIG_HYPERV */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 22647a642e98..0af81b590a0c 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -236,7 +236,7 @@ TRACE_EVENT(vector_alloc, TP_PROTO(unsigned int irq, unsigned int vector, bool reserved, int ret), - TP_ARGS(irq, vector, ret, reserved), + TP_ARGS(irq, vector, reserved, ret), TP_STRUCT__entry( __field( unsigned int, irq ) diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index 7bd92db09e8d..54133017267c 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h @@ -11,12 +11,12 @@ TRACE_EVENT(mpx_bounds_register_exception, - TP_PROTO(void *addr_referenced, + TP_PROTO(void __user *addr_referenced, const struct mpx_bndreg *bndreg), TP_ARGS(addr_referenced, bndreg), TP_STRUCT__entry( - __field(void *, addr_referenced) + __field(void __user *, addr_referenced) __field(u64, lower_bound) __field(u64, upper_bound) ), diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index cf5d53c3f9ea..eb5bbfeccb66 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -31,14 +31,15 @@ static inline cycles_t get_cycles(void) } extern struct system_counterval_t convert_art_to_tsc(u64 art); +extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); -extern void tsc_early_delay_calibrate(void); +extern void tsc_early_init(void); extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); extern void mark_tsc_async_resets(char *reason); -extern unsigned long native_calibrate_cpu(void); +extern unsigned long native_calibrate_cpu_early(void); extern unsigned long native_calibrate_tsc(void); extern unsigned long long native_sched_clock_from_tsc(u64 tsc); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index aae77eb8491c..b5e58cc0c5e7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -198,8 +198,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) "4: movl %3,%0\n" \ " jmp 3b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UA(1b, 4b) \ + _ASM_EXTABLE_UA(2b, 4b) \ : "=r" (err) \ : "A" (x), "r" (addr), "i" (errret), "0" (err)) @@ -340,8 +340,8 @@ do { \ " xorl %%edx,%%edx\n" \ " jmp 3b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UA(1b, 4b) \ + _ASM_EXTABLE_UA(2b, 4b) \ : "=r" (retval), "=&A"(x) \ : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \ "i" (errret), "0" (retval)); \ @@ -386,7 +386,7 @@ do { \ " xor"itype" %"rtype"1,%"rtype"1\n" \ " jmp 2b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UA(1b, 3b) \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) @@ -398,7 +398,7 @@ do { \ "3: mov %3,%0\n" \ " jmp 2b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UA(1b, 3b) \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) @@ -474,7 +474,7 @@ struct __large_struct { unsigned long buf[100]; }; "3: mov %3,%0\n" \ " jmp 2b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UA(1b, 3b) \ : "=r"(err) \ : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) @@ -602,7 +602,7 @@ extern void __cmpxchg_wrong_size(void) "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ "\t.previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UA(1b, 3b) \ : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ : "i" (-EFAULT), "q" (__new), "1" (__old) \ : "memory" \ @@ -618,7 +618,7 @@ extern void __cmpxchg_wrong_size(void) "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ "\t.previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UA(1b, 3b) \ : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ : "i" (-EFAULT), "r" (__new), "1" (__old) \ : "memory" \ @@ -634,7 +634,7 @@ extern void __cmpxchg_wrong_size(void) "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ "\t.previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UA(1b, 3b) \ : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ : "i" (-EFAULT), "r" (__new), "1" (__old) \ : "memory" \ @@ -653,7 +653,7 @@ extern void __cmpxchg_wrong_size(void) "3:\tmov %3, %0\n" \ "\tjmp 2b\n" \ "\t.previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UA(1b, 3b) \ : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ : "i" (-EFAULT), "r" (__new), "1" (__old) \ : "memory" \ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 62546b3a398e..a9d637bc301d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -47,6 +47,22 @@ copy_user_generic(void *to, const void *from, unsigned len) } static __always_inline __must_check unsigned long +copy_to_user_mcsafe(void *to, const void *from, unsigned len) +{ + unsigned long ret; + + __uaccess_begin(); + /* + * Note, __memcpy_mcsafe() is explicitly used since it can + * handle exceptions / faults. memcpy_mcsafe() may fall back to + * memcpy() which lacks this handling. + */ + ret = __memcpy_mcsafe(to, from, len); + __uaccess_end(); + return ret; +} + +static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { int ret = 0; @@ -194,4 +210,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) unsigned long copy_user_handle_tail(char *to, char *from, unsigned len); +unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len); + #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 51c4eee00732..dc4ed8bc2382 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -24,6 +24,7 @@ # include <asm/unistd_64.h> # include <asm/unistd_64_x32.h> # define __ARCH_WANT_COMPAT_SYS_TIME +# define __ARCH_WANT_SYS_UTIME32 # define __ARCH_WANT_COMPAT_SYS_PREADV64 # define __ARCH_WANT_COMPAT_SYS_PWRITEV64 # define __ARCH_WANT_COMPAT_SYS_PREADV64V2 @@ -31,13 +32,13 @@ # endif +# define __ARCH_WANT_NEW_STAT # define __ARCH_WANT_OLD_READDIR # define __ARCH_WANT_OLD_STAT # define __ARCH_WANT_SYS_ALARM # define __ARCH_WANT_SYS_FADVISE64 # define __ARCH_WANT_SYS_GETHOSTNAME # define __ARCH_WANT_SYS_GETPGRP -# define __ARCH_WANT_SYS_LLSEEK # define __ARCH_WANT_SYS_NICE # define __ARCH_WANT_SYS_OLDUMOUNT # define __ARCH_WANT_SYS_OLD_GETRLIMIT diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index bae46fc6b9de..0bcdb1279361 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -26,7 +26,7 @@ * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL +.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0 #ifdef CONFIG_STACK_VALIDATION .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints @@ -35,12 +35,14 @@ .short \sp_offset .byte \sp_reg .byte \type + .byte \end + .balign 4 .popsection #endif .endm .macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1 .endm .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0 @@ -86,19 +88,21 @@ #else /* !__ASSEMBLY__ */ -#define UNWIND_HINT(sp_reg, sp_offset, type) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "987: \n\t" \ ".pushsection .discard.unwind_hints\n\t" \ /* struct unwind_hint */ \ ".long 987b - .\n\t" \ - ".short " __stringify(sp_offset) "\n\t" \ + ".short " __stringify(sp_offset) "\n\t" \ ".byte " __stringify(sp_reg) "\n\t" \ ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(end) "\n\t" \ + ".balign 4 \n\t" \ ".popsection\n\t" -#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE) +#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0) -#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE) +#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0) #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index a80c0673798f..e60c45fd3679 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -10,8 +10,13 @@ struct cpumask; struct mm_struct; #ifdef CONFIG_X86_UV +#include <linux/efi.h> extern enum uv_system_type get_uv_system_type(void); +static inline bool is_early_uv_system(void) +{ + return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab); +} extern int is_uv_system(void); extern int is_uv_hubless(void); extern void uv_cpu_init(void); @@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, #else /* X86_UV */ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubless(void) { return 0; } static inline void uv_cpu_init(void) { } diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index fb856c9f0449..913a133f8e6f 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -5,33 +5,46 @@ #include <linux/compiler.h> #include <linux/clocksource.h> +#include <uapi/linux/time.h> + #ifdef BUILD_VDSO32_64 typedef u64 gtod_long_t; #else typedef unsigned long gtod_long_t; #endif + +/* + * There is one of these objects in the vvar page for each + * vDSO-accelerated clockid. For high-resolution clocks, this encodes + * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse + * clocks, this encodes the actual time. + * + * To confuse the reader, for high-resolution clocks, nsec is left-shifted + * by vsyscall_gtod_data.shift. + */ +struct vgtod_ts { + u64 sec; + u64 nsec; +}; + +#define VGTOD_BASES (CLOCK_TAI + 1) +#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI)) +#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) + /* * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time * so be carefull by modifying this structure. */ struct vsyscall_gtod_data { - unsigned seq; - - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - - /* open coded 'struct timespec' */ - u64 wall_time_snsec; - gtod_long_t wall_time_sec; - gtod_long_t monotonic_time_sec; - u64 monotonic_time_snsec; - gtod_long_t wall_time_coarse_sec; - gtod_long_t wall_time_coarse_nsec; - gtod_long_t monotonic_time_coarse_sec; - gtod_long_t monotonic_time_coarse_nsec; + unsigned int seq; + + int vclock_mode; + u64 cycle_last; + u64 mask; + u32 mult; + u32 shift; + + struct vgtod_ts basetime[VGTOD_BASES]; int tz_minuteswest; int tz_dsttime; @@ -44,9 +57,9 @@ static inline bool vclock_was_used(int vclock) return READ_ONCE(vclocks_used) & (1 << vclock); } -static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) +static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s) { - unsigned ret; + unsigned int ret; repeat: ret = READ_ONCE(s->seq); @@ -59,7 +72,7 @@ repeat: } static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, - unsigned start) + unsigned int start) { smp_rmb(); return unlikely(s->seq != start); @@ -77,30 +90,4 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s) ++s->seq; } -#ifdef CONFIG_X86_64 - -#define VGETCPU_CPU_MASK 0xfff - -static inline unsigned int __getcpu(void) -{ - unsigned int p; - - /* - * Load per CPU data from GDT. LSL is faster than RDTSCP and - * works on all CPUs. This is volatile so that it orders - * correctly wrt barrier() and to keep gcc from cleverly - * hoisting it out of the calling function. - * - * If RDPID is available, use it. - */ - alternative_io ("lsl %[p],%[seg]", - ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ - X86_FEATURE_RDPID, - [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); - - return p; -} - -#endif /* CONFIG_X86_64 */ - #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 0116b2ee9e64..1fc7a0d1e877 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -40,7 +40,7 @@ static inline int cpu_has_vmx(void) */ static inline void cpu_vmxoff(void) { - asm volatile (ASM_VMX_VMXOFF : : : "cc"); + asm volatile ("vmxoff"); cr4_clear_bits(X86_CR4_VMXE); } @@ -83,9 +83,10 @@ static inline void cpu_emergency_vmxoff(void) */ static inline int cpu_has_svm(const char **msg) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) { if (msg) - *msg = "not amd"; + *msg = "not amd or hygon"; return 0; } diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 5db8b0b10766..ade0f153947d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -74,6 +74,7 @@ #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 #define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 #define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_XSAVES 0x00100000 @@ -114,6 +115,7 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +#define VMX_MISC_ZERO_LEN_INS 0x40000000 /* VMFUNC functions */ #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 @@ -207,9 +209,13 @@ enum vmcs_field { EPTP_LIST_ADDRESS = 0x00002024, EPTP_LIST_ADDRESS_HIGH = 0x00002025, VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, + ENCLS_EXITING_BITMAP = 0x0000202E, + ENCLS_EXITING_BITMAP_HIGH = 0x0000202F, TSC_MULTIPLIER = 0x00002032, TSC_MULTIPLIER_HIGH = 0x00002033, GUEST_PHYSICAL_ADDRESS = 0x00002400, @@ -349,11 +355,13 @@ enum vmcs_field { #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_RESERVED (1 << 8) /* reserved */ #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ #define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ +#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define GUEST_INTR_STATE_STI 0x00000001 @@ -495,19 +503,6 @@ enum vmcs_field { #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul - -#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" -#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" -#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" -#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" -#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" -#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" -#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" -#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" -#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" - struct vmx_msr_entry { u32 index; u32 reserved; @@ -571,4 +566,15 @@ enum vm_instruction_error_number { VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28, }; +enum vmx_l1d_flush_state { + VMENTER_L1D_FLUSH_AUTO, + VMENTER_L1D_FLUSH_NEVER, + VMENTER_L1D_FLUSH_COND, + VMENTER_L1D_FLUSH_ALWAYS, + VMENTER_L1D_FLUSH_EPT_DISABLED, + VMENTER_L1D_FLUSH_NOT_REQUIRED, +}; + +extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; + #endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index fc2f082ac635..0f842104862c 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -122,12 +122,24 @@ struct x86_init_pci { * @guest_late_init: guest late init * @x2apic_available: X2APIC detection * @init_mem_mapping: setup early mappings during init_mem_mapping() + * @init_after_bootmem: guest init after boot allocator is finished */ struct x86_hyper_init { void (*init_platform)(void); void (*guest_late_init)(void); bool (*x2apic_available)(void); void (*init_mem_mapping)(void); + void (*init_after_bootmem)(void); +}; + +/** + * struct x86_init_acpi - x86 ACPI init functions + * @get_root_pointer: get RSDP address + * @reduced_hw_early_init: hardware reduced platform early init + */ +struct x86_init_acpi { + u64 (*get_root_pointer)(void); + void (*reduced_hw_early_init)(void); }; /** @@ -144,6 +156,7 @@ struct x86_init_ops { struct x86_init_iommu iommu; struct x86_init_pci pci; struct x86_hyper_init hyper; + struct x86_init_acpi acpi; }; /** @@ -157,7 +170,7 @@ struct x86_cpuinit_ops { void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); }; -struct timespec; +struct timespec64; /** * struct x86_legacy_devices - legacy x86 devices @@ -251,8 +264,8 @@ struct x86_hyper_runtime { struct x86_platform_ops { unsigned long (*calibrate_cpu)(void); unsigned long (*calibrate_tsc)(void); - void (*get_wallclock)(struct timespec *ts); - int (*set_wallclock)(const struct timespec *ts); + void (*get_wallclock)(struct timespec64 *ts); + int (*set_wallclock)(const struct timespec64 *ts); void (*iommu_shutdown)(void); bool (*is_untracked_pat_range)(u64 start, u64 end); void (*nmi_init)(void); @@ -274,19 +287,22 @@ struct x86_msi_ops { void (*restore_msi_irqs)(struct pci_dev *dev); }; -struct x86_io_apic_ops { - unsigned int (*read) (unsigned int apic, unsigned int reg); - void (*disable)(void); +struct x86_apic_ops { + unsigned int (*io_apic_read) (unsigned int apic, unsigned int reg); + void (*restore)(void); }; extern struct x86_init_ops x86_init; extern struct x86_cpuinit_ops x86_cpuinit; extern struct x86_platform_ops x86_platform; extern struct x86_msi_ops x86_msi; -extern struct x86_io_apic_ops x86_io_apic_ops; +extern struct x86_apic_ops x86_apic_ops; extern void x86_early_init_platform_quirks(void); extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); +extern bool x86_pnpbios_disabled(void); + +void x86_verify_bootdata_version(void); #endif diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index d383140e1dc8..068d9b067c83 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_XEN_EVENTS_H #define _ASM_X86_XEN_EVENTS_H +#include <xen/xen.h> + enum ipi_vector { XEN_RESCHEDULE_VECTOR, XEN_CALL_FUNCTION_VECTOR, diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index bfd882617613..ef05bea7010d 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -197,36 +197,38 @@ extern struct { char _entry[32]; } hypercall_page[]; (type)__res; \ }) -#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ -({ \ - __HYPERCALL_DECLS; \ - __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \ - asm volatile (__HYPERCALL \ - : __HYPERCALL_5PARAM \ - : __HYPERCALL_ENTRY(name) \ - : __HYPERCALL_CLOBBER5); \ - (type)__res; \ -}) - static inline long -privcmd_call(unsigned call, - unsigned long a1, unsigned long a2, - unsigned long a3, unsigned long a4, - unsigned long a5) +xen_single_call(unsigned int call, + unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, + unsigned long a5) { __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); - stac(); asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM : [thunk_target] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); - clac(); return (long)__res; } +static inline long +privcmd_call(unsigned int call, + unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, + unsigned long a5) +{ + long res; + + stac(); + res = xen_single_call(call, a1, a2, a3, a4, a5); + clac(); + + return res; +} + static inline int HYPERVISOR_set_trap_table(struct trap_info *table) { @@ -254,47 +256,12 @@ HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) } static inline int -HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) -{ - return _hypercall2(int, stack_switch, ss, esp); -} - -#ifdef CONFIG_X86_32 -static inline int -HYPERVISOR_set_callbacks(unsigned long event_selector, - unsigned long event_address, - unsigned long failsafe_selector, - unsigned long failsafe_address) -{ - return _hypercall4(int, set_callbacks, - event_selector, event_address, - failsafe_selector, failsafe_address); -} -#else /* CONFIG_X86_64 */ -static inline int -HYPERVISOR_set_callbacks(unsigned long event_address, - unsigned long failsafe_address, - unsigned long syscall_address) -{ - return _hypercall3(int, set_callbacks, - event_address, failsafe_address, - syscall_address); -} -#endif /* CONFIG_X86_{32,64} */ - -static inline int HYPERVISOR_callback_op(int cmd, void *arg) { return _hypercall2(int, callback_op, cmd, arg); } static inline int -HYPERVISOR_fpu_taskswitch(int set) -{ - return _hypercall1(int, fpu_taskswitch, set); -} - -static inline int HYPERVISOR_sched_op(int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); @@ -406,19 +373,6 @@ HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) } static inline int -HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val, - unsigned long flags, domid_t domid) -{ - if (sizeof(new_val) == sizeof(long)) - return _hypercall4(int, update_va_mapping_otherdomain, va, - new_val.pte, flags, domid); - else - return _hypercall5(int, update_va_mapping_otherdomain, va, - new_val.pte, new_val.pte >> 32, - flags, domid); -} - -static inline int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) { return _hypercall2(int, vm_assist, cmd, type); @@ -452,12 +406,6 @@ HYPERVISOR_suspend(unsigned long start_info_mfn) return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn); } -static inline int -HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) -{ - return _hypercall2(int, nmi_op, op, arg); -} - static inline unsigned long __must_check HYPERVISOR_hvm_op(int op, void *arg) { @@ -516,39 +464,6 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, } static inline void -MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd, - void *uop, unsigned int count) -{ - mcl->op = __HYPERVISOR_grant_table_op; - mcl->args[0] = cmd; - mcl->args[1] = (unsigned long)uop; - mcl->args[2] = count; - - trace_xen_mc_entry(mcl, 3); -} - -static inline void -MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va, - pte_t new_val, unsigned long flags, - domid_t domid) -{ - mcl->op = __HYPERVISOR_update_va_mapping_otherdomain; - mcl->args[0] = va; - if (sizeof(new_val) == sizeof(long)) { - mcl->args[1] = new_val.pte; - mcl->args[2] = flags; - mcl->args[3] = domid; - } else { - mcl->args[1] = new_val.pte; - mcl->args[2] = new_val.pte >> 32; - mcl->args[3] = flags; - mcl->args[4] = domid; - } - - trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 4 : 5); -} - -static inline void MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, struct desc_struct desc) { @@ -569,16 +484,6 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, } static inline void -MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg) -{ - mcl->op = __HYPERVISOR_memory_op; - mcl->args[0] = cmd; - mcl->args[1] = (unsigned long)arg; - - trace_xen_mc_entry(mcl, 2); -} - -static inline void MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, int count, int *success_count, domid_t domid) { @@ -605,16 +510,6 @@ MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, } static inline void -MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries) -{ - mcl->op = __HYPERVISOR_set_gdt; - mcl->args[0] = (unsigned long)frames; - mcl->args[1] = entries; - - trace_xen_mc_entry(mcl, 2); -} - -static inline void MULTI_stack_switch(struct multicall_entry *mcl, unsigned long ss, unsigned long esp) { diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 123e669bf363..790ce08e41f2 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -9,7 +9,7 @@ #include <linux/mm.h> #include <linux/device.h> -#include <linux/uaccess.h> +#include <asm/extable.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -93,12 +93,39 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, */ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) { - return __put_user(val, (unsigned long __user *)addr); + int ret = 0; + + asm volatile("1: mov %[val], %[ptr]\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: sub $1, %[ret]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [ret] "+r" (ret), [ptr] "=m" (*addr) + : [val] "r" (val)); + + return ret; } -static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val) +static inline int xen_safe_read_ulong(const unsigned long *addr, + unsigned long *val) { - return __get_user(*val, (unsigned long __user *)addr); + int ret = 0; + unsigned long rval = ~0ul; + + asm volatile("1: mov %[ptr], %[rval]\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: sub $1, %[ret]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [ret] "+r" (ret), [rval] "+r" (rval) + : [ptr] "m" (*addr)); + *val = rval; + + return ret; } #ifdef CONFIG_XEN_PV diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index aebf60357758..22f89d040ddd 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -16,6 +16,9 @@ #define RAMDISK_PROMPT_FLAG 0x8000 #define RAMDISK_LOAD_FLAG 0x4000 +/* version flags */ +#define VERSION_WRITTEN 0x8000 + /* loadflags */ #define LOADED_HIGH (1<<0) #define KASLR_FLAG (1<<1) @@ -86,6 +89,7 @@ struct setup_header { __u64 pref_address; __u32 init_size; __u32 handover_offset; + __u64 acpi_rsdp_addr; } __attribute__((packed)); struct sys_desc_table { @@ -137,15 +141,15 @@ struct boot_e820_entry { * setup data structure. */ struct jailhouse_setup_data { - u16 version; - u16 compatible_version; - u16 pm_timer_address; - u16 num_cpus; - u64 pci_mmconfig_base; - u32 tsc_khz; - u32 apic_khz; - u8 standard_ioapic; - u8 cpu_ids[255]; + __u16 version; + __u16 compatible_version; + __u16 pm_timer_address; + __u16 num_cpus; + __u64 pci_mmconfig_base; + __u32 tsc_khz; + __u32 apic_khz; + __u8 standard_ioapic; + __u8 cpu_ids[255]; } __attribute__((packed)); /* The so-called "zeropage" */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index f3a960488eae..dabfcf7c3941 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 pending; __u32 error_code; } exception; struct { @@ -322,7 +323,9 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u8 reserved[27]; + __u8 exception_has_payload; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ @@ -354,11 +357,67 @@ struct kvm_xcrs { __u64 padding[16]; }; -/* definition of registers in kvm_run */ +#define KVM_SYNC_X86_REGS (1UL << 0) +#define KVM_SYNC_X86_SREGS (1UL << 1) +#define KVM_SYNC_X86_EVENTS (1UL << 2) + +#define KVM_SYNC_X86_VALID_FIELDS \ + (KVM_SYNC_X86_REGS| \ + KVM_SYNC_X86_SREGS| \ + KVM_SYNC_X86_EVENTS) + +/* kvm_sync_regs struct included by kvm_run struct */ struct kvm_sync_regs { + /* Members of this structure are potentially malicious. + * Care must be taken by code reading, esp. interpreting, + * data fields from them inside KVM to prevent TOCTOU and + * double-fetch types of vulnerabilities. + */ + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu_events events; }; #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) + +#define KVM_STATE_NESTED_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_RUN_PENDING 0x00000002 +#define KVM_STATE_NESTED_EVMCS 0x00000004 + +#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_SMM_VMXON 0x00000002 + +struct kvm_vmx_nested_state { + __u64 vmxon_pa; + __u64 vmcs_pa; + + struct { + __u16 flags; + } smm; +}; + +/* for KVM_CAP_NESTED_STATE */ +struct kvm_nested_state { + /* KVM_STATE_* flags */ + __u16 flags; + + /* 0 for VMX, 1 for SVM. */ + __u16 format; + + /* 128 for SVM, 128 + VMCS size for VMX. */ + __u32 size; + + union { + /* VMXON, VMCS */ + struct kvm_vmx_nested_state vmx; + + /* Pad the header to 128 bytes. */ + __u8 pad[120]; + }; + + __u8 data[0]; +}; #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 6cfa9c8cb7d6..19980ec1a316 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -3,15 +3,16 @@ #define _UAPI_ASM_X86_KVM_PARA_H #include <linux/types.h> -#include <asm/hyperv.h> /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It * should be used to determine that a VM is running under KVM. */ #define KVM_CPUID_SIGNATURE 0x40000000 -/* This CPUID returns a feature bitmap in eax. Before enabling a particular - * paravirtualization, the appropriate feature bit should be checked. +/* This CPUID returns two feature bitmaps in eax, edx. Before enabling + * a particular paravirtualization, the appropriate feature bit should + * be checked in eax. The performance hint feature bit should be checked + * in edx. */ #define KVM_CPUID_FEATURES 0x40000001 #define KVM_FEATURE_CLOCKSOURCE 0 @@ -27,6 +28,9 @@ #define KVM_FEATURE_PV_UNHALT 7 #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 +#define KVM_FEATURE_PV_SEND_IPI 11 + +#define KVM_HINTS_REALTIME 0 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 435db58a7bad..955c2a2e1cf9 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -5,32 +5,36 @@ #include <linux/types.h> #include <linux/ioctl.h> -/* Fields are zero when not available */ +/* + * Fields are zero when not available. Also, this struct is shared with + * userspace mcelog and thus must keep existing fields at current offsets. + * Only add new fields to the end of the structure + */ struct mce { - __u64 status; - __u64 misc; - __u64 addr; - __u64 mcgstatus; - __u64 ip; - __u64 tsc; /* cpu time stamp counter */ - __u64 time; /* wall time_t when error was detected */ - __u8 cpuvendor; /* cpu vendor as encoded in system.h */ - __u8 inject_flags; /* software inject flags */ - __u8 severity; + __u64 status; /* Bank's MCi_STATUS MSR */ + __u64 misc; /* Bank's MCi_MISC MSR */ + __u64 addr; /* Bank's MCi_ADDR MSR */ + __u64 mcgstatus; /* Machine Check Global Status MSR */ + __u64 ip; /* Instruction Pointer when the error happened */ + __u64 tsc; /* CPU time stamp counter */ + __u64 time; /* Wall time_t when error was detected */ + __u8 cpuvendor; /* Kernel's X86_VENDOR enum */ + __u8 inject_flags; /* Software inject flags */ + __u8 severity; /* Error severity */ __u8 pad; - __u32 cpuid; /* CPUID 1 EAX */ - __u8 cs; /* code segment */ - __u8 bank; /* machine check bank */ - __u8 cpu; /* cpu number; obsolete; use extcpu now */ - __u8 finished; /* entry is valid */ - __u32 extcpu; /* linux cpu number that detected the error */ - __u32 socketid; /* CPU socket ID */ - __u32 apicid; /* CPU initial apic ID */ - __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ - __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ - __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ - __u64 ppin; /* Protected Processor Inventory Number */ - __u32 microcode;/* Microcode revision */ + __u32 cpuid; /* CPUID 1 EAX */ + __u8 cs; /* Code segment */ + __u8 bank; /* Machine check bank reporting the error */ + __u8 cpu; /* CPU number; obsoleted by extcpu */ + __u8 finished; /* Entry is valid */ + __u32 extcpu; /* Linux CPU number that detected the error */ + __u32 socketid; /* CPU socket ID */ + __u32 apicid; /* CPU initial APIC ID */ + __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ + __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ + __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + __u64 ppin; /* Protected Processor Inventory Number */ + __u32 microcode; /* Microcode revision */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h index 809134c644a6..90ab9a795b49 100644 --- a/arch/x86/include/uapi/asm/msgbuf.h +++ b/arch/x86/include/uapi/asm/msgbuf.h @@ -1 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_X64_MSGBUF_H +#define __ASM_X64_MSGBUF_H + +#if !defined(__x86_64__) || !defined(__ILP32__) #include <asm-generic/msgbuf.h> +#else +/* + * The msqid64_ds structure for x86 architecture with x32 ABI. + * + * On x86-32 and x86-64 we can just use the generic definition, but + * x32 uses the same binary layout as x86_64, which is differnet + * from other 32-bit architectures. + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ + __kernel_time_t msg_rtime; /* last msgrcv time */ + __kernel_time_t msg_ctime; /* last change time */ + __kernel_ulong_t msg_cbytes; /* current number of bytes on queue */ + __kernel_ulong_t msg_qnum; /* number of messages in queue */ + __kernel_ulong_t msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + __kernel_ulong_t __unused4; + __kernel_ulong_t __unused5; +}; + +#endif + +#endif /* __ASM_GENERIC_MSGBUF_H */ diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h index cabd7476bd6c..89de6cd9f0a7 100644 --- a/arch/x86/include/uapi/asm/sembuf.h +++ b/arch/x86/include/uapi/asm/sembuf.h @@ -8,15 +8,24 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values + * + * x86_64 and x32 incorrectly added padding here, so the structures + * are still incompatible with the padding on x86. */ struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ +#ifdef __i386__ + unsigned long sem_otime; /* last semop time */ + unsigned long sem_otime_high; + unsigned long sem_ctime; /* last change time */ + unsigned long sem_ctime_high; +#else __kernel_time_t sem_otime; /* last semop time */ __kernel_ulong_t __unused1; __kernel_time_t sem_ctime; /* last change time */ __kernel_ulong_t __unused2; +#endif __kernel_ulong_t sem_nsems; /* no. of semaphores in array */ __kernel_ulong_t __unused3; __kernel_ulong_t __unused4; diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h index 83c05fc2de38..644421f3823b 100644 --- a/arch/x86/include/uapi/asm/shmbuf.h +++ b/arch/x86/include/uapi/asm/shmbuf.h @@ -1 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_X86_SHMBUF_H +#define __ASM_X86_SHMBUF_H + +#if !defined(__x86_64__) || !defined(__ILP32__) #include <asm-generic/shmbuf.h> +#else +/* + * The shmid64_ds structure for x86 architecture with x32 ABI. + * + * On x86-32 and x86-64 we can just use the generic definition, but + * x32 uses the same binary layout as x86_64, which is differnet + * from other 32-bit architectures. + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ + __kernel_time_t shm_dtime; /* last detach time */ + __kernel_time_t shm_ctime; /* last change time */ + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + __kernel_ulong_t shm_nattch; /* no. of current attaches */ + __kernel_ulong_t __unused4; + __kernel_ulong_t __unused5; +}; + +struct shminfo64 { + __kernel_ulong_t shmmax; + __kernel_ulong_t shmmin; + __kernel_ulong_t shmmni; + __kernel_ulong_t shmseg; + __kernel_ulong_t shmall; + __kernel_ulong_t __unused1; + __kernel_ulong_t __unused2; + __kernel_ulong_t __unused3; + __kernel_ulong_t __unused4; +}; + +#endif + +#endif /* __ASM_X86_SHMBUF_H */ diff --git a/arch/x86/include/uapi/asm/siginfo.h b/arch/x86/include/uapi/asm/siginfo.h index b3d157957177..6642d8be40c4 100644 --- a/arch/x86/include/uapi/asm/siginfo.h +++ b/arch/x86/include/uapi/asm/siginfo.h @@ -7,8 +7,6 @@ typedef long long __kernel_si_clock_t __attribute__((aligned(4))); # define __ARCH_SI_CLOCK_T __kernel_si_clock_t # define __ARCH_SI_ATTRIBUTES __attribute__((aligned(8))) -# else /* x86-64 */ -# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) # endif #endif |

