diff options
Diffstat (limited to 'arch/x86/include')
36 files changed, 477 insertions, 212 deletions
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 4cd6a3b71824..0660e14690c8 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -174,7 +174,7 @@ static inline int alternatives_text_reserved(void *start, void *end) /* * Alternative inline assembly with input. * - * Pecularities: + * Peculiarities: * No memory clobber here. * Argument numbers start with 1. * Best is to use constraints that are fixed size (like (%1) ... "r") diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 34a10b2d5b73..fc0693569f7a 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -5,15 +5,9 @@ #include <asm/cpufeatures.h> #ifdef CONFIG_64BIT -/* popcnt %edi, %eax */ -#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7" -/* popcnt %rdi, %rax */ -#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" #define REG_IN "D" #define REG_OUT "a" #else -/* popcnt %eax, %eax */ -#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" #define REG_IN "a" #define REG_OUT "a" #endif @@ -24,7 +18,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res; - asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) + asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT) : "="REG_OUT (res) : REG_IN (w)); @@ -52,7 +46,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) { unsigned long res; - asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) + asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT) : "="REG_OUT (res) : REG_IN (w)); diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index bfb85e5844ab..a8bfac131256 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -7,7 +7,7 @@ #include <asm/alternative.h> /* Provides LOCK_PREFIX */ /* - * Non-existant functions to indicate usage errors at link time + * Non-existent functions to indicate usage errors at link time * (or compile-time if the compiler implements __compiletime_error(). */ extern void __xchg_wrong_size(void) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 28c4a502b419..6d6122524711 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -281,9 +281,11 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index a7adb2bfbf0b..0acf5ee45a21 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -6,5 +6,6 @@ int crash_load_segments(struct kimage *image); int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params); +void crash_smp_send_stop(void); #endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 33833d1909af..a5ea841cc6d2 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -16,6 +16,12 @@ # define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) #endif +#ifdef CONFIG_X86_SMAP +# define DISABLE_SMAP 0 +#else +# define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31)) +#endif + #ifdef CONFIG_X86_INTEL_UMIP # define DISABLE_UMIP 0 #else @@ -68,7 +74,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_MPX) +#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 #define DISABLED_MASK12 0 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index eea40d52ca78..107283b1eb1e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -19,7 +19,7 @@ * This is the main reason why we're doing stable VA mappings for RT * services. * - * This flag is used in conjuction with a chicken bit called + * This flag is used in conjunction with a chicken bit called * "efi=old_map" which can be used as a fallback to the old runtime * services mapping method in case there's some b0rkage with a * particular EFI implementation (haha, it is hard to hold up the @@ -82,8 +82,7 @@ struct efi_scratch { #define arch_efi_call_virt_setup() \ ({ \ efi_sync_low_kernel_mappings(); \ - preempt_disable(); \ - __kernel_fpu_begin(); \ + kernel_fpu_begin(); \ firmware_restrict_branch_speculation_start(); \ \ if (!efi_enabled(EFI_OLD_MEMMAP)) \ @@ -99,8 +98,7 @@ struct efi_scratch { efi_switch_mm(efi_scratch.prev_mm); \ \ firmware_restrict_branch_speculation_end(); \ - __kernel_fpu_end(); \ - preempt_enable(); \ + kernel_fpu_end(); \ }) extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, @@ -141,6 +139,8 @@ extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); +extern void efi_free_boot_services(void); +extern void efi_reserve_boot_services(void); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index a9caac9d4a72..b56d504af654 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -12,17 +12,12 @@ #define _ASM_X86_FPU_API_H /* - * Careful: __kernel_fpu_begin/end() must be called with preempt disabled - * and they don't touch the preempt state on their own. - * If you enable preemption after __kernel_fpu_begin(), preempt notifier - * should call the __kernel_fpu_end() to prevent the kernel/user FPU - * state from getting corrupted. KVM for example uses this model. - * - * All other cases use kernel_fpu_begin/end() which disable preemption - * during kernel FPU usage. + * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It + * disables preemption so be careful if you intend to use it for long periods + * of time. + * If you intend to use the FPU in softirq you need to check first with + * irq_fpu_usable() if it is possible. */ -extern void __kernel_fpu_begin(void); -extern void __kernel_fpu_end(void); extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 69dcdf195b61..fa2c93cb42a2 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -106,6 +106,9 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); #define user_insn(insn, output, input...) \ ({ \ int err; \ + \ + might_fault(); \ + \ asm volatile(ASM_STAC "\n" \ "1:" #insn "\n\t" \ "2: " ASM_CLAC "\n" \ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 4139f7650fe5..705dafc2d11a 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -10,6 +10,7 @@ #define _ASM_X86_HYPERV_TLFS_H #include <linux/types.h> +#include <asm/page.h> /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent @@ -30,158 +31,150 @@ /* * Feature identification. EAX indicates which features are available * to the partition based upon the current partition privileges. + * These are HYPERV_CPUID_FEATURES.EAX bits. */ /* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ -#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0) +#define HV_X64_MSR_VP_RUNTIME_AVAILABLE BIT(0) /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ -#define HV_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) -/* Partition reference TSC MSR is available */ -#define HV_MSR_REFERENCE_TSC_AVAILABLE (1 << 9) -/* Partition Guest IDLE MSR is available */ -#define HV_X64_MSR_GUEST_IDLE_AVAILABLE (1 << 10) - -/* A partition's reference time stamp counter (TSC) page */ -#define HV_X64_MSR_REFERENCE_TSC 0x40000021 - -/* - * There is a single feature flag that signifies if the partition has access - * to MSRs with local APIC and TSC frequencies. - */ -#define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11) - -/* AccessReenlightenmentControls privilege */ -#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) - +#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1) /* * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available */ -#define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2) +#define HV_X64_MSR_SYNIC_AVAILABLE BIT(2) /* * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through * HV_X64_MSR_STIMER3_COUNT) available */ -#define HV_MSR_SYNTIMER_AVAILABLE (1 << 3) +#define HV_MSR_SYNTIMER_AVAILABLE BIT(3) /* * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) * are available */ -#define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4) +#define HV_X64_MSR_APIC_ACCESS_AVAILABLE BIT(4) /* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/ -#define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5) +#define HV_X64_MSR_HYPERCALL_AVAILABLE BIT(5) /* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/ -#define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6) +#define HV_X64_MSR_VP_INDEX_AVAILABLE BIT(6) /* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/ -#define HV_X64_MSR_RESET_AVAILABLE (1 << 7) - /* - * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, - * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, - * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available - */ -#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) - -/* Frequency MSRs available */ -#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE (1 << 8) - -/* Crash MSR available */ -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10) - -/* stimer Direct Mode is available */ -#define HV_STIMER_DIRECT_MODE_AVAILABLE (1 << 19) +#define HV_X64_MSR_RESET_AVAILABLE BIT(7) +/* + * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, + * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, + * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available + */ +#define HV_X64_MSR_STAT_PAGES_AVAILABLE BIT(8) +/* Partition reference TSC MSR is available */ +#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9) +/* Partition Guest IDLE MSR is available */ +#define HV_X64_MSR_GUEST_IDLE_AVAILABLE BIT(10) +/* + * There is a single feature flag that signifies if the partition has access + * to MSRs with local APIC and TSC frequencies. + */ +#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11) +/* AccessReenlightenmentControls privilege */ +#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) /* - * Feature identification: EBX indicates which flags were specified at - * partition creation. The format is the same as the partition creation - * flag structure defined in section Partition Creation Flags. + * Feature identification: indicates which flags were specified at partition + * creation. The format is the same as the partition creation flag structure + * defined in section Partition Creation Flags. + * These are HYPERV_CPUID_FEATURES.EBX bits. */ -#define HV_X64_CREATE_PARTITIONS (1 << 0) -#define HV_X64_ACCESS_PARTITION_ID (1 << 1) -#define HV_X64_ACCESS_MEMORY_POOL (1 << 2) -#define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3) -#define HV_X64_POST_MESSAGES (1 << 4) -#define HV_X64_SIGNAL_EVENTS (1 << 5) -#define HV_X64_CREATE_PORT (1 << 6) -#define HV_X64_CONNECT_PORT (1 << 7) -#define HV_X64_ACCESS_STATS (1 << 8) -#define HV_X64_DEBUGGING (1 << 11) -#define HV_X64_CPU_POWER_MANAGEMENT (1 << 12) -#define HV_X64_CONFIGURE_PROFILER (1 << 13) +#define HV_X64_CREATE_PARTITIONS BIT(0) +#define HV_X64_ACCESS_PARTITION_ID BIT(1) +#define HV_X64_ACCESS_MEMORY_POOL BIT(2) +#define HV_X64_ADJUST_MESSAGE_BUFFERS BIT(3) +#define HV_X64_POST_MESSAGES BIT(4) +#define HV_X64_SIGNAL_EVENTS BIT(5) +#define HV_X64_CREATE_PORT BIT(6) +#define HV_X64_CONNECT_PORT BIT(7) +#define HV_X64_ACCESS_STATS BIT(8) +#define HV_X64_DEBUGGING BIT(11) +#define HV_X64_CPU_POWER_MANAGEMENT BIT(12) /* * Feature identification. EDX indicates which miscellaneous features * are available to the partition. + * These are HYPERV_CPUID_FEATURES.EDX bits. */ /* The MWAIT instruction is available (per section MONITOR / MWAIT) */ -#define HV_X64_MWAIT_AVAILABLE (1 << 0) +#define HV_X64_MWAIT_AVAILABLE BIT(0) /* Guest debugging support is available */ -#define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1) +#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) /* Performance Monitor support is available*/ -#define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2) +#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) /* Support for physical CPU dynamic partitioning events is available*/ -#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3) +#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) /* * Support for passing hypercall input parameter block via XMM * registers is available */ -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) +#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) /* Support for a virtual guest idle state is available */ -#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) -/* Guest crash data handler available */ -#define HV_X64_GUEST_CRASH_MSR_AVAILABLE (1 << 10) +#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) +/* Frequency MSRs available */ +#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) +/* Crash MSR available */ +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) +/* stimer Direct Mode is available */ +#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) /* * Implementation recommendations. Indicates which behaviors the hypervisor * recommends the OS implement for optimal performance. + * These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits. + */ +/* + * Recommend using hypercall for address space switches rather + * than MOV to CR3 instruction */ - /* - * Recommend using hypercall for address space switches rather - * than MOV to CR3 instruction - */ -#define HV_X64_AS_SWITCH_RECOMMENDED (1 << 0) +#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0) /* Recommend using hypercall for local TLB flushes rather * than INVLPG or MOV to CR3 instructions */ -#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1) +#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1) /* * Recommend using hypercall for remote TLB flushes rather * than inter-processor interrupts */ -#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2) +#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2) /* * Recommend using MSRs for accessing APIC registers * EOI, ICR and TPR rather than their memory-mapped counterparts */ -#define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3) +#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3) /* Recommend using the hypervisor-provided MSR to initiate a system RESET */ -#define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4) +#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4) /* * Recommend using relaxed timing for this partition. If used, * the VM should disable any watchdog timeouts that rely on the * timely delivery of external interrupts */ -#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) +#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5) /* * Recommend not using Auto End-Of-Interrupt feature */ -#define HV_DEPRECATING_AEOI_RECOMMENDED (1 << 9) +#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9) /* * Recommend using cluster IPI hypercalls. */ -#define HV_X64_CLUSTER_IPI_RECOMMENDED (1 << 10) +#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10) /* Recommend using the newer ExProcessorMasks interface */ -#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) +#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) /* Recommend using enlightened VMCS */ -#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14) +#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) -/* - * Crash notification flags. - */ -#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62) -#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63) +/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) +#define HV_X64_NESTED_MSR_BITMAP BIT(19) + +/* Hyper-V specific model specific registers (MSRs) */ /* MSR used to identify the guest OS. */ #define HV_X64_MSR_GUEST_OS_ID 0x40000000 @@ -201,6 +194,9 @@ /* MSR used to read the per-partition time reference counter */ #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +/* A partition's reference time stamp counter (TSC) page */ +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 + /* MSR used to retrieve the TSC frequency */ #define HV_X64_MSR_TSC_FREQUENCY 0x40000022 @@ -258,9 +254,11 @@ #define HV_X64_MSR_CRASH_P3 0x40000103 #define HV_X64_MSR_CRASH_P4 0x40000104 #define HV_X64_MSR_CRASH_CTL 0x40000105 -#define HV_X64_MSR_CRASH_CTL_NOTIFY (1ULL << 63) -#define HV_X64_MSR_CRASH_PARAMS \ - (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) + +/* TSC emulation after migration */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 /* * Declare the MSR used to setup pages used to communicate with the hypervisor. @@ -271,7 +269,7 @@ union hv_x64_msr_hypercall_contents { u64 enable:1; u64 reserved:11; u64 guest_physical_address:52; - }; + } __packed; }; /* @@ -283,7 +281,7 @@ struct ms_hyperv_tsc_page { volatile u64 tsc_scale; volatile s64 tsc_offset; u64 reserved2[509]; -}; +} __packed; /* * The guest OS needs to register the guest ID with the hypervisor. @@ -311,39 +309,37 @@ struct ms_hyperv_tsc_page { #define HV_LINUX_VENDOR_ID 0x8100 -/* TSC emulation after migration */ -#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 - -/* Nested features (CPUID 0x4000000A) EAX */ -#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) -#define HV_X64_NESTED_MSR_BITMAP BIT(19) - struct hv_reenlightenment_control { __u64 vector:8; __u64 reserved1:8; __u64 enabled:1; __u64 reserved2:15; __u64 target_vp:32; -}; - -#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 -#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 +} __packed; struct hv_tsc_emulation_control { __u64 enabled:1; __u64 reserved:63; -}; +} __packed; struct hv_tsc_emulation_status { __u64 inprogress:1; __u64 reserved:63; -}; +} __packed; #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) +/* + * Crash notification (HV_X64_MSR_CRASH_CTL) flags. + */ +#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62) +#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63) +#define HV_X64_MSR_CRASH_PARAMS \ + (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) + #define HV_IPI_LOW_VECTOR 0x10 #define HV_IPI_HIGH_VECTOR 0xff @@ -358,6 +354,7 @@ struct hv_tsc_emulation_status { #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 @@ -409,7 +406,7 @@ typedef struct _HV_REFERENCE_TSC_PAGE { __u32 res1; __u64 tsc_scale; __s64 tsc_offset; -} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; +} __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; /* Define the number of synthetic interrupt sources. */ #define HV_SYNIC_SINT_COUNT (16) @@ -466,7 +463,7 @@ union hv_message_flags { struct { __u8 msg_pending:1; __u8 reserved:7; - }; + } __packed; }; /* Define port identifier type. */ @@ -475,7 +472,7 @@ union hv_port_id { struct { __u32 id:24; __u32 reserved:8; - } u; + } __packed u; }; /* Define synthetic interrupt controller message header. */ @@ -488,7 +485,7 @@ struct hv_message_header { __u64 sender; union hv_port_id port; }; -}; +} __packed; /* Define synthetic interrupt controller message format. */ struct hv_message { @@ -496,12 +493,12 @@ struct hv_message { union { __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; } u; -}; +} __packed; /* Define the synthetic interrupt message page layout. */ struct hv_message_page { struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; -}; +} __packed; /* Define timer message payload structure. */ struct hv_timer_message_payload { @@ -509,7 +506,7 @@ struct hv_timer_message_payload { __u32 reserved; __u64 expiration_time; /* When the timer expired */ __u64 delivery_time; /* When the message was delivered */ -}; +} __packed; /* Define virtual processor assist page structure. */ struct hv_vp_assist_page { @@ -518,8 +515,9 @@ struct hv_vp_assist_page { __u64 vtl_control[2]; __u64 nested_enlightenments_control[2]; __u32 enlighten_vmentry; + __u32 padding; __u64 current_nested_vmcs; -}; +} __packed; struct hv_enlightened_vmcs { u32 revision_id; @@ -533,6 +531,8 @@ struct hv_enlightened_vmcs { u16 host_gs_selector; u16 host_tr_selector; + u16 padding16_1; + u64 host_ia32_pat; u64 host_ia32_efer; @@ -651,7 +651,7 @@ struct hv_enlightened_vmcs { u64 ept_pointer; u16 virtual_processor_id; - u16 padding16[3]; + u16 padding16_2[3]; u64 padding64_2[5]; u64 guest_physical_address; @@ -693,7 +693,7 @@ struct hv_enlightened_vmcs { u32 nested_flush_hypercall:1; u32 msr_bitmap:1; u32 reserved:30; - } hv_enlightenments_control; + } __packed hv_enlightenments_control; u32 hv_vp_id; u64 hv_vm_id; @@ -703,7 +703,7 @@ struct hv_enlightened_vmcs { u64 padding64_5[7]; u64 xss_exit_bitmap; u64 padding64_6[7]; -}; +} __packed; #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) @@ -725,36 +725,129 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF -#define HV_STIMER_ENABLE (1ULL << 0) -#define HV_STIMER_PERIODIC (1ULL << 1) -#define HV_STIMER_LAZY (1ULL << 2) -#define HV_STIMER_AUTOENABLE (1ULL << 3) -#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) +/* Define synthetic interrupt controller flag constants. */ +#define HV_EVENT_FLAGS_COUNT (256 * 8) +#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long)) + +/* + * Synthetic timer configuration. + */ +union hv_stimer_config { + u64 as_uint64; + struct { + u64 enable:1; + u64 periodic:1; + u64 lazy:1; + u64 auto_enable:1; + u64 apic_vector:8; + u64 direct_mode:1; + u64 reserved_z0:3; + u64 sintx:4; + u64 reserved_z1:44; + } __packed; +}; + + +/* Define the synthetic interrupt controller event flags format. */ +union hv_synic_event_flags { + unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT]; +}; + +/* Define SynIC control register. */ +union hv_synic_scontrol { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:63; + } __packed; +}; + +/* Define synthetic interrupt source. */ +union hv_synic_sint { + u64 as_uint64; + struct { + u64 vector:8; + u64 reserved1:8; + u64 masked:1; + u64 auto_eoi:1; + u64 reserved2:46; + } __packed; +}; + +/* Define the format of the SIMP register */ +union hv_synic_simp { + u64 as_uint64; + struct { + u64 simp_enabled:1; + u64 preserved:11; + u64 base_simp_gpa:52; + } __packed; +}; + +/* Define the format of the SIEFP register */ +union hv_synic_siefp { + u64 as_uint64; + struct { + u64 siefp_enabled:1; + u64 preserved:11; + u64 base_siefp_gpa:52; + } __packed; +}; struct hv_vpset { u64 format; u64 valid_bank_mask; u64 bank_contents[]; -}; +} __packed; /* HvCallSendSyntheticClusterIpi hypercall */ struct hv_send_ipi { u32 vector; u32 reserved; u64 cpu_mask; -}; +} __packed; /* HvCallSendSyntheticClusterIpiEx hypercall */ struct hv_send_ipi_ex { u32 vector; u32 reserved; struct hv_vpset vp_set; -}; +} __packed; /* HvFlushGuestPhysicalAddressSpace hypercalls */ struct hv_guest_mapping_flush { u64 address_space; u64 flags; +} __packed; + +/* + * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited + * by the bitwidth of "additional_pages" in union hv_gpa_page_range. + */ +#define HV_MAX_FLUSH_PAGES (2048) + +/* HvFlushGuestPhysicalAddressList hypercall */ +union hv_gpa_page_range { + u64 address_space; + struct { + u64 additional_pages:11; + u64 largepage:1; + u64 basepfn:52; + } page; +}; + +/* + * All input flush parameters should be in single page. The max flush + * count is equal with how many entries of union hv_gpa_page_range can + * be populated into the input parameter page. + */ +#define HV_MAX_FLUSH_REP_COUNT (PAGE_SIZE - 2 * sizeof(u64) / \ + sizeof(union hv_gpa_page_range)) + +struct hv_guest_mapping_flush_list { + u64 address_space; + u64 flags; + union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT]; }; /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ @@ -763,7 +856,7 @@ struct hv_tlb_flush { u64 flags; u64 processor_mask; u64 gva_list[]; -}; +} __packed; /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ struct hv_tlb_flush_ex { @@ -771,6 +864,6 @@ struct hv_tlb_flush_ex { u64 flags; struct hv_vpset hv_vp_set; u64 gva_list[]; -}; +} __packed; #endif diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h index b523f51c5400..634f99b1dc22 100644 --- a/arch/x86/include/asm/intel_pt.h +++ b/arch/x86/include/asm/intel_pt.h @@ -2,10 +2,36 @@ #ifndef _ASM_X86_INTEL_PT_H #define _ASM_X86_INTEL_PT_H +#define PT_CPUID_LEAVES 2 +#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */ + +enum pt_capabilities { + PT_CAP_max_subleaf = 0, + PT_CAP_cr3_filtering, + PT_CAP_psb_cyc, + PT_CAP_ip_filtering, + PT_CAP_mtc, + PT_CAP_ptwrite, + PT_CAP_power_event_trace, + PT_CAP_topa_output, + PT_CAP_topa_multiple_entries, + PT_CAP_single_range_output, + PT_CAP_output_subsys, + PT_CAP_payloads_lip, + PT_CAP_num_address_ranges, + PT_CAP_mtc_periods, + PT_CAP_cycle_thresholds, + PT_CAP_psb_periods, +}; + #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) void cpu_emergency_stop_pt(void); +extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap); +extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap); #else static inline void cpu_emergency_stop_pt(void) {} +static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; } +static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; } #endif #endif /* _ASM_X86_INTEL_PT_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 2395bb794c7b..fbb16e6b6c18 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -30,6 +30,9 @@ extern void fixup_irqs(void); #ifdef CONFIG_HAVE_KVM extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); +extern __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs); +extern __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs); +extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs); #endif extern void (*x86_platform_ipi_callback)(void); @@ -41,9 +44,13 @@ extern __visible unsigned int do_IRQ(struct pt_regs *regs); extern void init_ISA_irqs(void); +extern void __init init_IRQ(void); + #ifdef CONFIG_X86_LOCAL_APIC void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); + +extern __visible void smp_x86_platform_ipi(struct pt_regs *regs); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 800ffce0db29..80b35e3adf03 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -10,6 +10,7 @@ static inline bool arch_irq_work_has_interrupt(void) return boot_cpu_has(X86_FEATURE_APIC); } extern void arch_irq_work_raise(void); +extern __visible void smp_irq_work_interrupt(struct pt_regs *regs); #else static inline bool arch_irq_work_has_interrupt(void) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fbda5a917c5b..4660ce90de7f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -439,6 +439,11 @@ struct kvm_mmu { u64 pdptrs[4]; /* pae */ }; +struct kvm_tlb_range { + u64 start_gfn; + u64 pages; +}; + enum pmc_type { KVM_PMC_GP = 0, KVM_PMC_FIXED, @@ -497,7 +502,7 @@ struct kvm_mtrr { struct kvm_vcpu_hv_stimer { struct hrtimer timer; int index; - u64 config; + union hv_stimer_config config; u64 count; u64 exp_time; struct hv_message msg; @@ -601,17 +606,16 @@ struct kvm_vcpu_arch { /* * QEMU userspace and the guest each have their own FPU state. - * In vcpu_run, we switch between the user and guest FPU contexts. - * While running a VCPU, the VCPU thread will have the guest FPU - * context. + * In vcpu_run, we switch between the user, maintained in the + * task_struct struct, and guest FPU contexts. While running a VCPU, + * the VCPU thread will have the guest FPU context. * * Note that while the PKRU state lives inside the fpu registers, * it is switched out separately at VMENTER and VMEXIT time. The * "guest_fpu" state here contains the guest FPU context, with the * host PRKU bits. */ - struct fpu user_fpu; - struct fpu guest_fpu; + struct fpu *guest_fpu; u64 xcr0; u64 guest_supported_xcr0; @@ -1042,6 +1046,8 @@ struct kvm_x86_ops { void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); int (*tlb_remote_flush)(struct kvm *kvm); + int (*tlb_remote_flush_with_range)(struct kvm *kvm, + struct kvm_tlb_range *range); /* * Flush any TLB entries associated with the given GVA. @@ -1106,6 +1112,7 @@ struct kvm_x86_ops { bool (*mpx_supported)(void); bool (*xsaves_supported)(void); bool (*umip_emulated)(void); + bool (*pt_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); @@ -1186,6 +1193,7 @@ struct kvm_x86_ops { int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); + uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -1196,6 +1204,7 @@ struct kvm_arch_async_pf { }; extern struct kvm_x86_ops *kvm_x86_ops; +extern struct kmem_cache *x86_fpu_cache; #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) @@ -1492,7 +1501,7 @@ asmlinkage void kvm_spurious_fault(void); "cmpb $0, kvm_rebooting \n\t" \ "jne 668b \n\t" \ __ASM_SIZE(push) " $666b \n\t" \ - "call kvm_spurious_fault \n\t" \ + "jmp kvm_spurious_fault \n\t" \ ".popsection \n\t" \ _ASM_EXTABLE(666b, 667b) @@ -1503,7 +1512,7 @@ asmlinkage void kvm_spurious_fault(void); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 4c723632c036..5ed3cf1c3934 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -92,6 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); +void do_async_page_fault(struct pt_regs *regs, unsigned long error_code); #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init kvm_spinlock_init(void); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 1d0a7778e163..cc60e617931c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -22,6 +22,11 @@ struct ms_hyperv_info { extern struct ms_hyperv_info ms_hyperv; + +typedef int (*hyperv_fill_flush_list_func)( + struct hv_guest_mapping_flush_list *flush, + void *data); + /* * Generate the guest ID. */ @@ -348,6 +353,11 @@ void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); int hyperv_flush_guest_mapping(u64 as); +int hyperv_flush_guest_mapping_range(u64 as, + hyperv_fill_flush_list_func fill_func, void *data); +int hyperv_fill_flush_guest_mapping_list( + struct hv_guest_mapping_flush_list *flush, + u64 start_gfn, u64 end_gfn); #ifdef CONFIG_X86_64 void hv_apic_init(void); @@ -370,6 +380,11 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) return NULL; } static inline int hyperv_flush_guest_mapping(u64 as) { return -1; } +static inline int hyperv_flush_guest_mapping_range(u64 as, + hyperv_fill_flush_list_func fill_func, void *data) +{ + return -1; +} #endif /* CONFIG_HYPERV */ #ifdef CONFIG_HYPERV_TSCPAGE diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9e39cc8bd989..8e40c2446fd1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -121,7 +121,43 @@ #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 #define MSR_IA32_RTIT_CTL 0x00000570 +#define RTIT_CTL_TRACEEN BIT(0) +#define RTIT_CTL_CYCLEACC BIT(1) +#define RTIT_CTL_OS BIT(2) +#define RTIT_CTL_USR BIT(3) +#define RTIT_CTL_PWR_EVT_EN BIT(4) +#define RTIT_CTL_FUP_ON_PTW BIT(5) +#define RTIT_CTL_FABRIC_EN BIT(6) +#define RTIT_CTL_CR3EN BIT(7) +#define RTIT_CTL_TOPA BIT(8) +#define RTIT_CTL_MTC_EN BIT(9) +#define RTIT_CTL_TSC_EN BIT(10) +#define RTIT_CTL_DISRETC BIT(11) +#define RTIT_CTL_PTW_EN BIT(12) +#define RTIT_CTL_BRANCH_EN BIT(13) +#define RTIT_CTL_MTC_RANGE_OFFSET 14 +#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) +#define RTIT_CTL_CYC_THRESH_OFFSET 19 +#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET) +#define RTIT_CTL_PSB_FREQ_OFFSET 24 +#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET) +#define RTIT_CTL_ADDR0_OFFSET 32 +#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET) +#define RTIT_CTL_ADDR1_OFFSET 36 +#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET) +#define RTIT_CTL_ADDR2_OFFSET 40 +#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET) +#define RTIT_CTL_ADDR3_OFFSET 44 +#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET) #define MSR_IA32_RTIT_STATUS 0x00000571 +#define RTIT_STATUS_FILTEREN BIT(0) +#define RTIT_STATUS_CONTEXTEN BIT(1) +#define RTIT_STATUS_TRIGGEREN BIT(2) +#define RTIT_STATUS_BUFFOVF BIT(3) +#define RTIT_STATUS_ERROR BIT(4) +#define RTIT_STATUS_STOPPED BIT(5) +#define RTIT_STATUS_BYTECNT_OFFSET 32 +#define RTIT_STATUS_BYTECNT (0x1ffffull << RTIT_STATUS_BYTECNT_OFFSET) #define MSR_IA32_RTIT_ADDR0_A 0x00000580 #define MSR_IA32_RTIT_ADDR0_B 0x00000581 #define MSR_IA32_RTIT_ADDR1_A 0x00000582 @@ -772,6 +808,7 @@ #define VMX_BASIC_INOUT 0x0040000000000000LLU /* MSR_IA32_VMX_MISC bits */ +#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F /* AMD-V MSRs */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 032b6009baab..dad12b767ba0 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -232,6 +232,7 @@ enum spectre_v2_mitigation { enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_STRICT_PREFERRED, SPECTRE_V2_USER_PRCTL, SPECTRE_V2_USER_SECCOMP, }; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4bf42f9e4eea..a97f28d914d5 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -26,6 +26,11 @@ struct static_key; extern struct static_key paravirt_steal_enabled; extern struct static_key paravirt_steal_rq_enabled; +__visible void __native_queued_spin_unlock(struct qspinlock *lock); +bool pv_is_native_spin_unlock(void); +__visible bool __native_vcpu_is_preempted(long cpu); +bool pv_is_native_vcpu_is_preempted(void); + static inline u64 paravirt_steal_clock(int cpu) { return PVOP_CALL1(u64, time.steal_clock, cpu); diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index ec7f43327033..1ea41aaef68b 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -80,6 +80,13 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); } +static inline void pmd_populate_kernel_safe(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); + set_pmd_safe(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); +} + static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) { @@ -132,6 +139,12 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); } + +static inline void pud_populate_safe(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + set_pud_safe(pud, __pud(_PAGE_TABLE | __pa(pmd))); +} #endif /* CONFIG_X86_PAE */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -141,6 +154,12 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud))); } +static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); + set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud))); +} + static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { gfp_t gfp = GFP_KERNEL_ACCOUNT; @@ -173,6 +192,14 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + if (!pgtable_l5_enabled()) + return; + paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); + set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); +} + static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) { gfp_t gfp = GFP_KERNEL_ACCOUNT; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 106b7d0e2dae..d6ff0bbdb394 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -564,8 +564,12 @@ extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, unsigned int *level); extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); -extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, - unsigned numpages, unsigned long page_flags); +extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, + unsigned long address, + unsigned numpages, + unsigned long page_flags); +extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, + unsigned long numpages); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 90cb2f36c042..99a7fa9ab0a3 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -8,6 +8,9 @@ DECLARE_PER_CPU(int, __preempt_count); +/* We use the MSB mostly because its available */ +#define PREEMPT_NEED_RESCHED 0x80000000 + /* * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such * that a decrement hitting 0 means we can and should reschedule. diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 071b2a6fff85..33051436c864 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -967,7 +967,7 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) } extern unsigned long arch_align_stack(unsigned long sp); -extern void free_init_pages(char *what, unsigned long begin, unsigned long end); +void free_init_pages(const char *what, unsigned long begin, unsigned long end); extern void free_kernel_image_pages(void *begin, void *end); void default_idle(void); diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index a671a1145906..04c17be9b5fd 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -26,6 +26,7 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_APM 1 typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); +void nmi_panic_self_stop(struct pt_regs *regs); void nmi_shootdown_cpus(nmi_shootdown_cb callback); void run_crash_ipi_callback(struct pt_regs *regs); diff --git a/arch/x86/include/asm/intel_rdt_sched.h b/arch/x86/include/asm/resctrl_sched.h index 9acb06b6f81e..54990fe2a3ae 100644 --- a/arch/x86/include/asm/intel_rdt_sched.h +++ b/arch/x86/include/asm/resctrl_sched.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_INTEL_RDT_SCHED_H -#define _ASM_X86_INTEL_RDT_SCHED_H +#ifndef _ASM_X86_RESCTRL_SCHED_H +#define _ASM_X86_RESCTRL_SCHED_H -#ifdef CONFIG_INTEL_RDT +#ifdef CONFIG_RESCTRL #include <linux/sched.h> #include <linux/jump_label.h> @@ -10,7 +10,7 @@ #define IA32_PQR_ASSOC 0x0c8f /** - * struct intel_pqr_state - State cache for the PQR MSR + * struct resctrl_pqr_state - State cache for the PQR MSR * @cur_rmid: The cached Resource Monitoring ID * @cur_closid: The cached Class Of Service ID * @default_rmid: The user assigned Resource Monitoring ID @@ -24,21 +24,21 @@ * The cache also helps to avoid pointless updates if the value does * not change. */ -struct intel_pqr_state { +struct resctrl_pqr_state { u32 cur_rmid; u32 cur_closid; u32 default_rmid; u32 default_closid; }; -DECLARE_PER_CPU(struct intel_pqr_state, pqr_state); +DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state); DECLARE_STATIC_KEY_FALSE(rdt_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); /* - * __intel_rdt_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR + * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR * * Following considerations are made so that this has minimal impact * on scheduler hot path: @@ -51,9 +51,9 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); * simple as possible. * Must be called with preemption disabled. */ -static void __intel_rdt_sched_in(void) +static void __resctrl_sched_in(void) { - struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); + struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); u32 closid = state->default_closid; u32 rmid = state->default_rmid; @@ -78,16 +78,16 @@ static void __intel_rdt_sched_in(void) } } -static inline void intel_rdt_sched_in(void) +static inline void resctrl_sched_in(void) { if (static_branch_likely(&rdt_enable_key)) - __intel_rdt_sched_in(); + __resctrl_sched_in(); } #else -static inline void intel_rdt_sched_in(void) {} +static inline void resctrl_sched_in(void) {} -#endif /* CONFIG_INTEL_RDT */ +#endif /* CONFIG_RESCTRL */ -#endif /* _ASM_X86_INTEL_RDT_SCHED_H */ +#endif /* _ASM_X86_RESCTRL_SCHED_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ae13bc974416..ed8ec011a9fd 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -46,6 +46,9 @@ extern unsigned long saved_video_mode; extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); +extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp); +extern unsigned long __startup_secondary_64(void); +extern int early_make_pgtable(unsigned long address); #ifdef CONFIG_X86_INTEL_MID extern void x86_intel_mid_early_setup(void); diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index bd26834724e5..2fcbd6f33ef7 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -17,4 +17,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where); int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask); + +#ifdef CONFIG_X86_X32_ABI +asmlinkage long sys32_x32_rt_sigreturn(void); +#endif + #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 547c4fe50711..2e95b6c1bca3 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -148,6 +148,12 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); void smp_store_boot_cpu_info(void); void smp_store_cpu_info(int id); + +asmlinkage __visible void smp_reboot_interrupt(void); +__visible void smp_reschedule_interrupt(struct pt_regs *regs); +__visible void smp_call_function_interrupt(struct pt_regs *regs); +__visible void smp_call_function_single_interrupt(struct pt_regs *r); + #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 93b462e48067..dec9c1e84c78 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -290,11 +290,4 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) -#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" -#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" -#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb" -#define SVM_CLGI ".byte 0x0f, 0x01, 0xdd" -#define SVM_STGI ".byte 0x0f, 0x01, 0xdc" -#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf" - #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 82b73b75d67c..e0eccbcb8447 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -140,14 +140,6 @@ struct thread_info { _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_NOHZ) -/* work to do on any return to user space */ -#define _TIF_ALLWORK_MASK \ - (_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \ - _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \ - _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \ - _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT | \ - _TIF_FSCHECK) - /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h index 69615e387973..e0e6d7f21399 100644 --- a/arch/x86/include/asm/trace/exceptions.h +++ b/arch/x86/include/asm/trace/exceptions.h @@ -45,6 +45,7 @@ DEFINE_PAGE_FAULT_EVENT(page_fault_user); DEFINE_PAGE_FAULT_EVENT(page_fault_kernel); #undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE exceptions #endif /* _TRACE_PAGE_FAULT_H */ diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h index 2e6245a023ef..ace464f09681 100644 --- a/arch/x86/include/asm/trace/hyperv.h +++ b/arch/x86/include/asm/trace/hyperv.h @@ -42,6 +42,20 @@ TRACE_EVENT(hyperv_nested_flush_guest_mapping, TP_printk("address space %llx ret %d", __entry->as, __entry->ret) ); +TRACE_EVENT(hyperv_nested_flush_guest_mapping_range, + TP_PROTO(u64 as, int ret), + TP_ARGS(as, ret), + + TP_STRUCT__entry( + __field(u64, as) + __field(int, ret) + ), + TP_fast_assign(__entry->as = as; + __entry->ret = ret; + ), + TP_printk("address space %llx ret %d", __entry->as, __entry->ret) + ); + TRACE_EVENT(hyperv_send_ipi_mask, TP_PROTO(const struct cpumask *cpus, int vector), diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 0af81b590a0c..33b9d0f0aafe 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -389,6 +389,7 @@ TRACE_EVENT(vector_free_moved, #endif /* CONFIG_X86_LOCAL_APIC */ #undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE irq_vectors #endif /* _TRACE_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 3de69330e6c5..7d6f3f3fad78 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -61,34 +61,38 @@ asmlinkage void xen_machine_check(void); asmlinkage void xen_simd_coprocessor_error(void); #endif -dotraplinkage void do_divide_error(struct pt_regs *, long); -dotraplinkage void do_debug(struct pt_regs *, long); -dotraplinkage void do_nmi(struct pt_regs *, long); -dotraplinkage void do_int3(struct pt_regs *, long); -dotraplinkage void do_overflow(struct pt_regs *, long); -dotraplinkage void do_bounds(struct pt_regs *, long); -dotraplinkage void do_invalid_op(struct pt_regs *, long); -dotraplinkage void do_device_not_available(struct pt_regs *, long); -dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); -dotraplinkage void do_invalid_TSS(struct pt_regs *, long); -dotraplinkage void do_segment_not_present(struct pt_regs *, long); -dotraplinkage void do_stack_segment(struct pt_regs *, long); +dotraplinkage void do_divide_error(struct pt_regs *regs, long error_code); +dotraplinkage void do_debug(struct pt_regs *regs, long error_code); +dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); +dotraplinkage void do_int3(struct pt_regs *regs, long error_code); +dotraplinkage void do_overflow(struct pt_regs *regs, long error_code); +dotraplinkage void do_bounds(struct pt_regs *regs, long error_code); +dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); +dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); +dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); +dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); +dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); +dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_64 -dotraplinkage void do_double_fault(struct pt_regs *, long); +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code); +asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); +asmlinkage __visible notrace +struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); +void __init trap_init(void); #endif -dotraplinkage void do_general_protection(struct pt_regs *, long); -dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); -dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); -dotraplinkage void do_coprocessor_error(struct pt_regs *, long); -dotraplinkage void do_alignment_check(struct pt_regs *, long); +dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); +dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code); +dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); +dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); +dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_MCE -dotraplinkage void do_machine_check(struct pt_regs *, long); +dotraplinkage void do_machine_check(struct pt_regs *regs, long error_code); #endif -dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); +dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_32 -dotraplinkage void do_iret_error(struct pt_regs *, long); +dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code); #endif -dotraplinkage void do_mce(struct pt_regs *, long); +dotraplinkage void do_mce(struct pt_regs *regs, long error_code); static inline int get_si_code(unsigned long condition) { @@ -104,11 +108,16 @@ extern int panic_on_unrecovered_nmi; void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 -asmlinkage void smp_thermal_interrupt(void); -asmlinkage void smp_threshold_interrupt(void); -asmlinkage void smp_deferred_error_interrupt(void); +asmlinkage void smp_thermal_interrupt(struct pt_regs *regs); +asmlinkage void smp_threshold_interrupt(struct pt_regs *regs); +asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); #endif +void smp_apic_timer_interrupt(struct pt_regs *regs); +void smp_spurious_interrupt(struct pt_regs *regs); +void smp_error_interrupt(struct pt_regs *regs); +asmlinkage void smp_irq_move_cleanup_interrupt(void); + extern void ist_enter(struct pt_regs *regs); extern void ist_exit(struct pt_regs *regs); extern void ist_begin_non_atomic(struct pt_regs *regs); diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index eb5bbfeccb66..8a0c25c6bf09 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -35,6 +35,7 @@ extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); extern void tsc_early_init(void); extern void tsc_init(void); +extern unsigned long calibrate_delay_is_known(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index ade0f153947d..4e4133e86484 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -77,7 +77,10 @@ #define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 #define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000 #define SECONDARY_EXEC_XSAVES 0x00100000 +#define SECONDARY_EXEC_PT_USE_GPA 0x01000000 +#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 @@ -98,6 +101,8 @@ #define VM_EXIT_LOAD_IA32_EFER 0x00200000 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 #define VM_EXIT_CLEAR_BNDCFGS 0x00800000 +#define VM_EXIT_PT_CONCEAL_PIP 0x01000000 +#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff @@ -109,6 +114,8 @@ #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 #define VM_ENTRY_LOAD_BNDCFGS 0x00010000 +#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000 +#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000 #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff @@ -240,6 +247,8 @@ enum vmcs_field { GUEST_PDPTR3_HIGH = 0x00002811, GUEST_BNDCFGS = 0x00002812, GUEST_BNDCFGS_HIGH = 0x00002813, + GUEST_IA32_RTIT_CTL = 0x00002814, + GUEST_IA32_RTIT_CTL_HIGH = 0x00002815, HOST_IA32_PAT = 0x00002c00, HOST_IA32_PAT_HIGH = 0x00002c01, HOST_IA32_EFER = 0x00002c02, |