summaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/apic.h9
-rw-r--r--arch/x86/include/asm/atomic.h12
-rw-r--r--arch/x86/include/asm/atomic64_32.h8
-rw-r--r--arch/x86/include/asm/atomic64_64.h12
-rw-r--r--arch/x86/include/asm/cpufeatures.h5
-rw-r--r--arch/x86/include/asm/dmi.h2
-rw-r--r--arch/x86/include/asm/export.h5
-rw-r--r--arch/x86/include/asm/hardirq.h26
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h25
-rw-r--r--arch/x86/include/asm/i8259.h1
-rw-r--r--arch/x86/include/asm/irq_remapping.h5
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/kdebug.h12
-rw-r--r--arch/x86/include/asm/kvm_host.h82
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/include/asm/mshyperv.h18
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/page_32_types.h9
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h3
-rw-r--r--arch/x86/include/asm/pci-direct.h4
-rw-r--r--arch/x86/include/asm/pgtable-2level.h17
-rw-r--r--arch/x86/include/asm/pgtable-3level.h44
-rw-r--r--arch/x86/include/asm/pgtable-invert.h41
-rw-r--r--arch/x86/include/asm/pgtable.h76
-rw-r--r--arch/x86/include/asm/pgtable_64.h58
-rw-r--r--arch/x86/include/asm/processor.h19
-rw-r--r--arch/x86/include/asm/set_memory.h42
-rw-r--r--arch/x86/include/asm/signal.h7
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h64
-rw-r--r--arch/x86/include/asm/topology.h6
-rw-r--r--arch/x86/include/asm/trace/hyperv.h14
-rw-r--r--arch/x86/include/asm/vgtod.h2
-rw-r--r--arch/x86/include/asm/vmx.h14
-rw-r--r--arch/x86/include/asm/xen/hypercall.h143
37 files changed, 555 insertions, 249 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index de690c2d2e33..a0ab9ab61c75 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -8,5 +8,6 @@ generated-y += xen-hypercalls.h
generic-y += dma-contiguous.h
generic-y += early_ioremap.h
+generic-y += export.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 74a9e06b6cfd..130e81e10fc7 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -10,6 +10,7 @@
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/msr.h>
+#include <asm/hardirq.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -502,12 +503,19 @@ extern int default_check_phys_apicid_present(int phys_apicid);
#endif /* CONFIG_X86_LOCAL_APIC */
+#ifdef CONFIG_SMP
+bool apic_id_is_primary_thread(unsigned int id);
+#else
+static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
+#endif
+
extern void irq_enter(void);
extern void irq_exit(void);
static inline void entering_irq(void)
{
irq_enter();
+ kvm_set_cpu_l1tf_flush_l1d();
}
static inline void entering_ack_irq(void)
@@ -520,6 +528,7 @@ static inline void ipi_entering_ack_irq(void)
{
irq_enter();
ack_APIC_irq();
+ kvm_set_cpu_l1tf_flush_l1d();
}
static inline void exiting_irq(void)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index b143717b92b3..ce84388e540c 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -80,11 +80,11 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic_sub_and_test arch_atomic_sub_and_test
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
}
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
/**
* arch_atomic_inc - increment atomic variable
@@ -92,12 +92,12 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
*
* Atomically increments @v by 1.
*/
-#define arch_atomic_inc arch_atomic_inc
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter));
}
+#define arch_atomic_inc arch_atomic_inc
/**
* arch_atomic_dec - decrement atomic variable
@@ -105,12 +105,12 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
*
* Atomically decrements @v by 1.
*/
-#define arch_atomic_dec arch_atomic_dec
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter));
}
+#define arch_atomic_dec arch_atomic_dec
/**
* arch_atomic_dec_and_test - decrement and test
@@ -120,11 +120,11 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
-#define arch_atomic_dec_and_test arch_atomic_dec_and_test
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
}
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
/**
* arch_atomic_inc_and_test - increment and test
@@ -134,11 +134,11 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic_inc_and_test arch_atomic_inc_and_test
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
}
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
/**
* arch_atomic_add_negative - add and test if negative
@@ -149,11 +149,11 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-#define arch_atomic_add_negative arch_atomic_add_negative
static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
}
+#define arch_atomic_add_negative arch_atomic_add_negative
/**
* arch_atomic_add_return - add integer and return
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index ef959f02d070..6a5b0ec460da 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -205,12 +205,12 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
-#define arch_atomic64_inc arch_atomic64_inc
static inline void arch_atomic64_inc(atomic64_t *v)
{
__alternative_atomic64(inc, inc_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
}
+#define arch_atomic64_inc arch_atomic64_inc
/**
* arch_atomic64_dec - decrement atomic64 variable
@@ -218,12 +218,12 @@ static inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
-#define arch_atomic64_dec arch_atomic64_dec
static inline void arch_atomic64_dec(atomic64_t *v)
{
__alternative_atomic64(dec, dec_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
}
+#define arch_atomic64_dec arch_atomic64_dec
/**
* arch_atomic64_add_unless - add unless the number is a given value
@@ -245,7 +245,6 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
return (int)a;
}
-#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
@@ -253,8 +252,8 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
"S" (v) : "ecx", "edx", "memory");
return r;
}
+#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
-#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
{
long long r;
@@ -262,6 +261,7 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
"S" (v) : "ecx", "memory");
return r;
}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
#undef alternative_atomic64
#undef __alternative_atomic64
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 4343d9b4f30e..5f851d92eecd 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -71,11 +71,11 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
}
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
/**
* arch_atomic64_inc - increment atomic64 variable
@@ -83,13 +83,13 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
-#define arch_atomic64_inc arch_atomic64_inc
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
: "m" (v->counter));
}
+#define arch_atomic64_inc arch_atomic64_inc
/**
* arch_atomic64_dec - decrement atomic64 variable
@@ -97,13 +97,13 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
-#define arch_atomic64_dec arch_atomic64_dec
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
: "m" (v->counter));
}
+#define arch_atomic64_dec arch_atomic64_dec
/**
* arch_atomic64_dec_and_test - decrement and test
@@ -113,11 +113,11 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
-#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
}
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
/**
* arch_atomic64_inc_and_test - increment and test
@@ -127,11 +127,11 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
}
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
/**
* arch_atomic64_add_negative - add and test if negative
@@ -142,11 +142,11 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-#define arch_atomic64_add_negative arch_atomic64_add_negative
static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
}
+#define arch_atomic64_add_negative arch_atomic64_add_negative
/**
* arch_atomic64_add_return - add and return
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b5c60faf8429..89a048c2faec 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -219,7 +219,8 @@
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
-#define X86_FEATURE_IBRS_ENHANCED ( 7*32+29) /* Enhanced IBRS */
+#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -342,6 +343,7 @@
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
@@ -374,5 +376,6 @@
#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 0ab2ab27ad1f..b825cb201251 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -4,8 +4,8 @@
#include <linux/compiler.h>
#include <linux/init.h>
+#include <linux/io.h>
-#include <asm/io.h>
#include <asm/setup.h>
static __always_inline __init void *dmi_alloc(unsigned len)
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
deleted file mode 100644
index 2a51d66689c5..000000000000
--- a/arch/x86/include/asm/export.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include <asm-generic/export.h>
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 740a428acf1e..d9069bb26c7f 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -3,10 +3,12 @@
#define _ASM_X86_HARDIRQ_H
#include <linux/threads.h>
-#include <linux/irq.h>
typedef struct {
- unsigned int __softirq_pending;
+ u16 __softirq_pending;
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+ u8 kvm_cpu_l1tf_flush_l1d;
+#endif
unsigned int __nmi_count; /* arch dependent */
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int apic_timer_irqs; /* arch dependent */
@@ -58,4 +60,24 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu);
extern u64 arch_irq_stat(void);
#define arch_irq_stat arch_irq_stat
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+{
+ __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
+}
+
+static inline void kvm_clear_cpu_l1tf_flush_l1d(void)
+{
+ __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
+}
+
+static inline bool kvm_get_cpu_l1tf_flush_l1d(void)
+{
+ return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
+}
+#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
+static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
+
#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index b8c89265baf0..e977b6b3a538 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -35,9 +35,9 @@
/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0)
/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
-#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
/* Partition reference TSC MSR is available */
-#define HV_X64_MSR_REFERENCE_TSC_AVAILABLE (1 << 9)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE (1 << 9)
/* A partition's reference time stamp counter (TSC) page */
#define HV_X64_MSR_REFERENCE_TSC 0x40000021
@@ -60,7 +60,7 @@
* Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
* HV_X64_MSR_STIMER3_COUNT) available
*/
-#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3)
+#define HV_MSR_SYNTIMER_AVAILABLE (1 << 3)
/*
* APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
* are available
@@ -86,7 +86,7 @@
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
/* stimer Direct Mode is available */
-#define HV_X64_STIMER_DIRECT_MODE_AVAILABLE (1 << 19)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE (1 << 19)
/*
* Feature identification: EBX indicates which flags were specified at
@@ -160,9 +160,9 @@
#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5)
/*
- * Virtual APIC support
+ * Recommend not using Auto End-Of-Interrupt feature
*/
-#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9)
+#define HV_DEPRECATING_AEOI_RECOMMENDED (1 << 9)
/*
* Recommend using cluster IPI hypercalls.
@@ -176,9 +176,10 @@
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14)
/*
- * Crash notification flag.
+ * Crash notification flags.
*/
-#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
+#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62)
+#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63)
/* MSR used to identify the guest OS. */
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
@@ -309,6 +310,7 @@ struct ms_hyperv_tsc_page {
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
/* Nested features (CPUID 0x4000000A) EAX */
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
struct hv_reenlightenment_control {
@@ -350,6 +352,7 @@ struct hv_tsc_emulation_status {
#define HVCALL_SEND_IPI_EX 0x0015
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
@@ -741,6 +744,12 @@ struct ipi_arg_ex {
struct hv_vpset vp_set;
};
+/* HvFlushGuestPhysicalAddressSpace hypercalls */
+struct hv_guest_mapping_flush {
+ u64 address_space;
+ u64 flags;
+};
+
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
struct hv_tlb_flush {
u64 address_space;
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 5cdcdbd4d892..89789e8c80f6 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -3,6 +3,7 @@
#define _ASM_X86_I8259_H
#include <linux/delay.h>
+#include <asm/io.h>
extern unsigned int cached_irq_mask;
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 023b4a9fc846..5f26962eff42 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -33,6 +33,11 @@ enum irq_remap_cap {
IRQ_POSTING_CAP = 0,
};
+enum {
+ IRQ_REMAP_XAPIC_MODE,
+ IRQ_REMAP_X2APIC_MODE,
+};
+
struct vcpu_data {
u64 pi_desc_addr; /* Physical address of PI Descriptor */
u32 vector; /* Guest vector of the interrupt */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c14f2a74b2be..15450a675031 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -33,7 +33,8 @@ extern inline unsigned long native_save_fl(void)
return flags;
}
-static inline void native_restore_fl(unsigned long flags)
+extern inline void native_restore_fl(unsigned long flags);
+extern inline void native_restore_fl(unsigned long flags)
{
asm volatile("push %0 ; popf"
: /* no output */
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 395c9631e000..75f1e35e7c15 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -22,10 +22,20 @@ enum die_val {
DIE_NMIUNKNOWN,
};
+enum show_regs_mode {
+ SHOW_REGS_SHORT,
+ /*
+ * For when userspace crashed, but we don't think it's our fault, and
+ * therefore don't print kernel registers.
+ */
+ SHOW_REGS_USER,
+ SHOW_REGS_ALL
+};
+
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
-extern void __show_regs(struct pt_regs *regs, int all);
+extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
extern void show_iret_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c13cd28d9d1b..8e90488c3d56 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -17,6 +17,7 @@
#include <linux/tracepoint.h>
#include <linux/cpumask.h>
#include <linux/irq_work.h>
+#include <linux/irq.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -54,6 +55,7 @@
#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
+#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5)
#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
@@ -75,13 +77,13 @@
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
+#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
-#define CR3_PCID_INVD BIT_64(63)
#define CR4_RESERVED_BITS \
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
@@ -325,6 +327,16 @@ struct rsvd_bits_validate {
u64 bad_mt_xwr;
};
+struct kvm_mmu_root_info {
+ gpa_t cr3;
+ hpa_t hpa;
+};
+
+#define KVM_MMU_ROOT_INFO_INVALID \
+ ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+
+#define KVM_MMU_NUM_PREV_ROOTS 3
+
/*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
@@ -344,7 +356,7 @@ struct kvm_mmu {
struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp);
- void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
+ void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
hpa_t root_hpa;
@@ -353,6 +365,7 @@ struct kvm_mmu {
u8 shadow_root_level;
u8 ept_ad;
bool direct_map;
+ struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
/*
* Bitmap; bit set = permission fault
@@ -713,6 +726,9 @@ struct kvm_vcpu_arch {
/* be preempted when it's in kernel-mode(cpl=0) */
bool preempted_in_kernel;
+
+ /* Flush the L1 Data cache for L1TF mitigation on VMENTER */
+ bool l1tf_flush_l1d;
};
struct kvm_lpage_info {
@@ -881,6 +897,7 @@ struct kvm_vcpu_stat {
u64 signal_exits;
u64 irq_window_exits;
u64 nmi_window_exits;
+ u64 l1d_flush;
u64 halt_exits;
u64 halt_successful_poll;
u64 halt_attempted_poll;
@@ -973,6 +990,15 @@ struct kvm_x86_ops {
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+ int (*tlb_remote_flush)(struct kvm *kvm);
+
+ /*
+ * Flush any TLB entries associated with the given GVA.
+ * Does not need to flush GPA->HPA mappings.
+ * Can potentially get non-canonical addresses through INVLPGs, which
+ * the implementation may choose to ignore if appropriate.
+ */
+ void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu);
@@ -1085,6 +1111,14 @@ struct kvm_x86_ops {
void (*setup_mce)(struct kvm_vcpu *vcpu);
+ int (*get_nested_state)(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ unsigned user_data_size);
+ int (*set_nested_state)(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ struct kvm_nested_state *kvm_state);
+ void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
@@ -1117,6 +1151,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
return kvm_x86_ops->vm_free(kvm);
}
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+{
+ if (kvm_x86_ops->tlb_remote_flush &&
+ !kvm_x86_ops->tlb_remote_flush(kvm))
+ return 0;
+ else
+ return -ENOTSUPP;
+}
+
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
@@ -1193,19 +1237,12 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
-#define EMULTYPE_RETRY (1 << 3)
-#define EMULTYPE_NO_REEXECUTE (1 << 4)
-#define EMULTYPE_NO_UD_ON_FAIL (1 << 5)
-#define EMULTYPE_VMWARE (1 << 6)
-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
- int emulation_type, void *insn, int insn_len);
-
-static inline int emulate_instruction(struct kvm_vcpu *vcpu,
- int emulation_type)
-{
- return x86_emulate_instruction(vcpu, 0,
- emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
-}
+#define EMULTYPE_ALLOW_RETRY (1 << 3)
+#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
+#define EMULTYPE_VMWARE (1 << 5)
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+ void *insn, int insn_len);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
@@ -1268,6 +1305,10 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
return !!(*irq_state);
}
+#define KVM_MMU_ROOT_CURRENT BIT(0)
+#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
+#define KVM_MMU_ROOTS_ALL (~0UL)
+
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
@@ -1279,7 +1320,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1298,7 +1339,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
+void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
void kvm_enable_tdp(void);
void kvm_disable_tdp(void);
@@ -1401,7 +1443,6 @@ asmlinkage void kvm_spurious_fault(void);
____kvm_handle_fault_on_reboot(insn, "")
#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
@@ -1413,6 +1454,11 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+ unsigned long ipi_bitmap_high, u32 min,
+ unsigned long icr, int op_64_bit);
+
+u64 kvm_get_arch_capabilities(void);
void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8c7b3e5a2d01..3a17107594c8 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -148,6 +148,7 @@ enum mce_notifier_prios {
MCE_PRIO_LOWEST = 0,
};
+struct notifier_block;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 19886fef1dfc..f37704497d8f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -76,8 +76,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
}
}
-#define hv_init_timer(timer, tick) wrmsrl(timer, tick)
-#define hv_init_timer_config(config, val) wrmsrl(config, val)
+#define hv_init_timer(timer, tick) \
+ wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick)
+#define hv_init_timer_config(timer, val) \
+ wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val)
#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val)
#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val)
@@ -90,8 +92,13 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
-#define hv_get_synint_state(int_num, val) rdmsrl(int_num, val)
-#define hv_set_synint_state(int_num, val) wrmsrl(int_num, val)
+#define hv_get_synint_state(int_num, val) \
+ rdmsrl(HV_X64_MSR_SINT0 + int_num, val)
+#define hv_set_synint_state(int_num, val) \
+ wrmsrl(HV_X64_MSR_SINT0 + int_num, val)
+
+#define hv_get_crash_ctl(val) \
+ rdmsrl(HV_X64_MSR_CRASH_CTL, val)
void hyperv_callback_vector(void);
void hyperv_reenlightenment_vector(void);
@@ -332,6 +339,7 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
void hyperv_report_panic(struct pt_regs *regs, long err);
+void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
bool hv_is_hyperv_initialized(void);
void hyperv_cleanup(void);
@@ -339,6 +347,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void);
+int hyperv_flush_guest_mapping(u64 as);
#ifdef CONFIG_X86_64
void hv_apic_init(void);
@@ -358,6 +367,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
{
return NULL;
}
+static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 68b2c3150de1..4731f0cf97c5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -70,12 +70,19 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO (1 << 4) /*
* Not susceptible to Speculative Store Bypass
* attack, so no Speculative Store Bypass
* control required.
*/
+#define MSR_IA32_FLUSH_CMD 0x0000010b
+#define L1D_FLUSH (1 << 0) /*
+ * Writeback and invalidate the
+ * L1 data cache.
+ */
+
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index aa30c3241ea7..0d5c739eebd7 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -29,8 +29,13 @@
#define N_EXCEPTION_STACKS 1
#ifdef CONFIG_X86_PAE
-/* 44=32+12, the limit we can fit into an unsigned long pfn */
-#define __PHYSICAL_MASK_SHIFT 44
+/*
+ * This is beyond the 44 bit limit imposed by the 32bit long pfns,
+ * but we need the full mask to make sure inverted PROT_NONE
+ * entries have all the host bits set in a guest.
+ * The real limit is still 44 bits.
+ */
+#define __PHYSICAL_MASK_SHIFT 52
#define __VIRTUAL_MASK_SHIFT 32
#else /* !CONFIG_X86_PAE */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index d49bbf4bb5c8..e375d4266b53 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -309,6 +309,11 @@ static inline void flush_tlb_others(const struct cpumask *cpumask,
PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info);
}
+static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ PVOP_VCALL2(pv_mmu_ops.tlb_remove_table, tlb, table);
+}
+
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
{
return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 180bc0bff0fb..4b75acc23b30 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -54,6 +54,7 @@ struct desc_struct;
struct task_struct;
struct cpumask;
struct flush_tlb_info;
+struct mmu_gather;
/*
* Wrapper type for pointers to code which uses the non-standard
@@ -222,6 +223,8 @@ struct pv_mmu_ops {
void (*flush_tlb_others)(const struct cpumask *cpus,
const struct flush_tlb_info *info);
+ void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
+
/* Hooks for allocating and freeing a pagetable top-level */
int (*pgd_alloc)(struct mm_struct *mm);
void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h
index e1084f71a295..94597a3cf3d0 100644
--- a/arch/x86/include/asm/pci-direct.h
+++ b/arch/x86/include/asm/pci-direct.h
@@ -15,8 +15,4 @@ extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val);
extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val);
extern int early_pci_allowed(void);
-
-extern unsigned int pci_early_dump_regs;
-extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
-extern void early_dump_pci_devices(void);
#endif /* _ASM_X86_PCI_DIRECT_H */
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index c399ea5eea41..24c6cf5f16b7 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -104,4 +104,21 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
+/* No inverted PFNs on 2 level page tables */
+
+static inline u64 protnone_mask(u64 val)
+{
+ return 0;
+}
+
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
+{
+ return val;
+}
+
+static inline bool __pte_needs_invert(u64 val)
+{
+ return false;
+}
+
#endif /* _ASM_X86_PGTABLE_2LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index f2ca3139ca22..f8b1ad2c3828 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_H
#define _ASM_X86_PGTABLE_3LEVEL_H
+#include <asm/atomic64_32.h>
+
/*
* Intel Physical Address Extension (PAE) Mode - three-level page
* tables on PPro+ CPUs.
@@ -150,10 +152,7 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
{
pte_t res;
- /* xchg acts as a barrier before the setting of the high bits */
- res.pte_low = xchg(&ptep->pte_low, 0);
- res.pte_high = ptep->pte_high;
- ptep->pte_high = 0;
+ res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0);
return res;
}
@@ -248,12 +247,43 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
#endif
/* Encode and de-code a swap entry */
+#define SWP_TYPE_BITS 5
+
+#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
+
+/* We always extract/encode the offset by shifting it all the way up, and then down again */
+#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
+
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
#define __swp_type(x) (((x).val) & 0x1f)
#define __swp_offset(x) ((x).val >> 5)
#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5})
-#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
-#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } })
+
+/*
+ * Normally, __swp_entry() converts from arch-independent swp_entry_t to
+ * arch-dependent swp_entry_t, and __swp_entry_to_pte() just stores the result
+ * to pte. But here we have 32bit swp_entry_t and 64bit pte, and need to use the
+ * whole 64 bits. Thus, we shift the "real" arch-dependent conversion to
+ * __swp_entry_to_pte() through the following helper macro based on 64bit
+ * __swp_entry().
+ */
+#define __swp_pteval_entry(type, offset) ((pteval_t) { \
+ (~(pteval_t)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+ | ((pteval_t)(type) << (64 - SWP_TYPE_BITS)) })
+
+#define __swp_entry_to_pte(x) ((pte_t){ .pte = \
+ __swp_pteval_entry(__swp_type(x), __swp_offset(x)) })
+/*
+ * Analogically, __pte_to_swp_entry() doesn't just extract the arch-dependent
+ * swp_entry_t, but also has to convert it from 64bit to the 32bit
+ * intermediate representation, using the following macros based on 64bit
+ * __swp_type() and __swp_offset().
+ */
+#define __pteval_swp_type(x) ((unsigned long)((x).pte >> (64 - SWP_TYPE_BITS)))
+#define __pteval_swp_offset(x) ((unsigned long)(~((x).pte) << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT))
+
+#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \
+ __pteval_swp_offset(pte)))
#define gup_get_pte gup_get_pte
/*
@@ -302,4 +332,6 @@ static inline pte_t gup_get_pte(pte_t *ptep)
return pte;
}
+#include <asm/pgtable-invert.h>
+
#endif /* _ASM_X86_PGTABLE_3LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
new file mode 100644
index 000000000000..a0c1525f1b6f
--- /dev/null
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_PGTABLE_INVERT_H
+#define _ASM_PGTABLE_INVERT_H 1
+
+#ifndef __ASSEMBLY__
+
+/*
+ * A clear pte value is special, and doesn't get inverted.
+ *
+ * Note that even users that only pass a pgprot_t (rather
+ * than a full pte) won't trigger the special zero case,
+ * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED
+ * set. So the all zero case really is limited to just the
+ * cleared page table entry case.
+ */
+static inline bool __pte_needs_invert(u64 val)
+{
+ return val && !(val & _PAGE_PRESENT);
+}
+
+/* Get a mask to xor with the page table entry to get the correct pfn. */
+static inline u64 protnone_mask(u64 val)
+{
+ return __pte_needs_invert(val) ? ~0ull : 0;
+}
+
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
+{
+ /*
+ * When a PTE transitions from NONE to !NONE or vice-versa
+ * invert the PFN part to stop speculation.
+ * pte_pfn undoes this when needed.
+ */
+ if (__pte_needs_invert(oldval) != __pte_needs_invert(val))
+ val = (val & ~mask) | (~val & mask);
+ return val;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a1cb3339da8d..690c0307afed 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -188,19 +188,29 @@ static inline int pte_special(pte_t pte)
return pte_flags(pte) & _PAGE_SPECIAL;
}
+/* Entries that were set to PROT_NONE are inverted */
+
+static inline u64 protnone_mask(u64 val);
+
static inline unsigned long pte_pfn(pte_t pte)
{
- return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
+ phys_addr_t pfn = pte_val(pte);
+ pfn ^= protnone_mask(pfn);
+ return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
}
static inline unsigned long pmd_pfn(pmd_t pmd)
{
- return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
+ phys_addr_t pfn = pmd_val(pmd);
+ pfn ^= protnone_mask(pfn);
+ return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
static inline unsigned long pud_pfn(pud_t pud)
{
- return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
+ phys_addr_t pfn = pud_val(pud);
+ pfn ^= protnone_mask(pfn);
+ return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
static inline unsigned long p4d_pfn(p4d_t p4d)
@@ -403,11 +413,6 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_RW);
}
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
- return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
-}
-
static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
{
pudval_t v = native_pud_val(pud);
@@ -462,11 +467,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
return pud_set_flags(pud, _PAGE_RW);
}
-static inline pud_t pud_mknotpresent(pud_t pud)
-{
- return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE);
-}
-
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline int pte_soft_dirty(pte_t pte)
{
@@ -548,25 +548,45 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot)
static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
- return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
- check_pgprot(pgprot));
+ phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ pfn ^= protnone_mask(pgprot_val(pgprot));
+ pfn &= PTE_PFN_MASK;
+ return __pte(pfn | check_pgprot(pgprot));
}
static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
{
- return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
- check_pgprot(pgprot));
+ phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ pfn ^= protnone_mask(pgprot_val(pgprot));
+ pfn &= PHYSICAL_PMD_PAGE_MASK;
+ return __pmd(pfn | check_pgprot(pgprot));
}
static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
{
- return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
- check_pgprot(pgprot));
+ phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+ pfn ^= protnone_mask(pgprot_val(pgprot));
+ pfn &= PHYSICAL_PUD_PAGE_MASK;
+ return __pud(pfn | check_pgprot(pgprot));
}
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+ return pfn_pmd(pmd_pfn(pmd),
+ __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
+static inline pud_t pud_mknotpresent(pud_t pud)
+{
+ return pfn_pud(pud_pfn(pud),
+ __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
+
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- pteval_t val = pte_val(pte);
+ pteval_t val = pte_val(pte), oldval = val;
/*
* Chop off the NX bit (if present), and add the NX portion of
@@ -574,17 +594,17 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
*/
val &= _PAGE_CHG_MASK;
val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
-
+ val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
return __pte(val);
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- pmdval_t val = pmd_val(pmd);
+ pmdval_t val = pmd_val(pmd), oldval = val;
val &= _HPAGE_CHG_MASK;
val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
-
+ val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
return __pmd(val);
}
@@ -1175,7 +1195,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
return xchg(pmdp, pmd);
} else {
pmd_t old = *pmdp;
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
return old;
}
}
@@ -1410,6 +1430,14 @@ static inline bool pud_access_permitted(pud_t pud, bool write)
return __pte_access_permitted(pud_val(pud), write);
}
+#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
+extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+ return boot_cpu_has_bug(X86_BUG_L1TF);
+}
+
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index acb6970e7bcf..ce2b59047cb8 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -55,15 +55,15 @@ struct mm_struct;
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
-static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
+static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
- *ptep = native_make_pte(0);
+ WRITE_ONCE(*ptep, pte);
}
-static inline void native_set_pte(pte_t *ptep, pte_t pte)
+static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- *ptep = pte;
+ native_set_pte(ptep, native_make_pte(0));
}
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
@@ -73,7 +73,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
}
static inline void native_pmd_clear(pmd_t *pmd)
@@ -109,7 +109,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
+ WRITE_ONCE(*pudp, pud);
}
static inline void native_pud_clear(pud_t *pud)
@@ -137,13 +137,13 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
pgd_t pgd;
if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
- *p4dp = p4d;
+ WRITE_ONCE(*p4dp, p4d);
return;
}
pgd = native_make_pgd(native_p4d_val(p4d));
pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd);
- *p4dp = native_make_p4d(native_pgd_val(pgd));
+ WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd)));
}
static inline void native_p4d_clear(p4d_t *p4d)
@@ -153,7 +153,7 @@ static inline void native_p4d_clear(p4d_t *p4d)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- *pgdp = pti_set_user_pgtbl(pgdp, pgd);
+ WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd));
}
static inline void native_pgd_clear(pgd_t *pgd)
@@ -188,7 +188,7 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
*
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
- * | OFFSET (14->63) | TYPE (9-13) |0|0|X|X| X| X|X|SD|0| <- swp entry
+ * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry
*
* G (8) is aliased and used as a PROT_NONE indicator for
* !present ptes. We need to start storing swap entries above
@@ -201,20 +201,34 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
*
* Bit 7 in swp entry should be 0 because pmd_present checks not only P,
* but also L and G.
+ *
+ * The offset is inverted by a binary not operation to make the high
+ * physical bits set.
*/
-#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
-#define SWP_TYPE_BITS 5
-/* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
+#define SWP_TYPE_BITS 5
+
+#define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
+
+/* We always extract/encode the offset by shifting it all the way up, and then down again */
+#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS)
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
-#define __swp_type(x) (((x).val >> (SWP_TYPE_FIRST_BIT)) \
- & ((1U << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x) ((x).val >> SWP_OFFSET_FIRST_BIT)
-#define __swp_entry(type, offset) ((swp_entry_t) { \
- ((type) << (SWP_TYPE_FIRST_BIT)) \
- | ((offset) << SWP_OFFSET_FIRST_BIT) })
+/* Extract the high bits for type */
+#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))
+
+/* Shift up (to get rid of type), then down to get value */
+#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)
+
+/*
+ * Shift the offset up "too far" by TYPE bits, then down again
+ * The offset is inverted by a binary not operation to make the high
+ * physical bits set.
+ */
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+ (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
+ | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })
+
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) })
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
@@ -258,5 +272,7 @@ static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
return true;
}
+#include <asm/pgtable-invert.h>
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 59663c08c949..d53c54b842da 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,8 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
+ /* Address space bits used by the cache internally */
+ u8 x86_cache_bits;
unsigned initialized : 1;
} __randomize_layout;
@@ -181,6 +183,11 @@ extern const struct seq_operations cpuinfo_op;
extern void cpu_detect(struct cpuinfo_x86 *c);
+static inline unsigned long long l1tf_pfn_limit(void)
+{
+ return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
+}
+
extern void early_cpu_init(void);
extern void identify_boot_cpu(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
@@ -978,4 +985,16 @@ bool xen_set_default_idle(void);
void stop_this_cpu(void *dummy);
void df_debug(struct pt_regs *regs, long error_code);
void microcode_check(void);
+
+enum l1tf_mitigations {
+ L1TF_MITIGATION_OFF,
+ L1TF_MITIGATION_FLUSH_NOWARN,
+ L1TF_MITIGATION_FLUSH,
+ L1TF_MITIGATION_FLUSH_NOSMT,
+ L1TF_MITIGATION_FULL,
+ L1TF_MITIGATION_FULL_FORCE
+};
+
+extern enum l1tf_mitigations l1tf_mitigation;
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 34cffcef7375..07a25753e85c 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -89,4 +89,46 @@ extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
+#ifdef CONFIG_X86_64
+static inline int set_mce_nospec(unsigned long pfn)
+{
+ unsigned long decoy_addr;
+ int rc;
+
+ /*
+ * Mark the linear address as UC to make sure we don't log more
+ * errors because of speculative access to the page.
+ * We would like to just call:
+ * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1);
+ * but doing that would radically increase the odds of a
+ * speculative access to the poison page because we'd have
+ * the virtual address of the kernel 1:1 mapping sitting
+ * around in registers.
+ * Instead we get tricky. We create a non-canonical address
+ * that looks just like the one we want, but has bit 63 flipped.
+ * This relies on set_memory_uc() properly sanitizing any __pa()
+ * results with __PHYSICAL_MASK or PTE_PFN_MASK.
+ */
+ decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
+
+ rc = set_memory_uc(decoy_addr, 1);
+ if (rc)
+ pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
+ return rc;
+}
+#define set_mce_nospec set_mce_nospec
+
+/* Restore full speculative operation to the pfn. */
+static inline int clear_mce_nospec(unsigned long pfn)
+{
+ return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
+}
+#define clear_mce_nospec clear_mce_nospec
+#else
+/*
+ * Few people would run a 32-bit kernel on a machine that supports
+ * recoverable errors because they have too much memory to boot 32-bit.
+ */
+#endif
+
#endif /* _ASM_X86_SET_MEMORY_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 5f9012ff52ed..33d3c88a7225 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -39,6 +39,7 @@ extern void do_signal(struct pt_regs *regs);
#define __ARCH_HAS_SA_RESTORER
+#include <asm/asm.h>
#include <uapi/asm/sigcontext.h>
#ifdef __i386__
@@ -86,9 +87,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
static inline int __gen_sigismember(sigset_t *set, int _sig)
{
- unsigned char ret;
- asm("btl %2,%1\n\tsetc %0"
- : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+ bool ret;
+ asm("btl %2,%1" CC_SET(c)
+ : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1));
return ret;
}
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index b6dc698f992a..f335aad404a4 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
return (unsigned long)frame;
}
-void show_opcodes(u8 *rip, const char *loglvl);
+void show_opcodes(struct pt_regs *regs, const char *loglvl);
void show_ip(struct pt_regs *regs, const char *loglvl);
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 511bf5fae8b8..58ce5288878e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -148,6 +148,22 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
#endif
+static inline bool tlb_defer_switch_to_init_mm(void)
+{
+ /*
+ * If we have PCID, then switching to init_mm is reasonably
+ * fast. If we don't have PCID, then switching to init_mm is
+ * quite slow, so we try to defer it in the hopes that we can
+ * avoid it entirely. The latter approach runs the risk of
+ * receiving otherwise unnecessary IPIs.
+ *
+ * This choice is just a heuristic. The tlb code can handle this
+ * function returning true or false regardless of whether we have
+ * PCID.
+ */
+ return !static_cpu_has(X86_FEATURE_PCID);
+}
+
struct tlb_context {
u64 ctx_id;
u64 tlb_gen;
@@ -159,8 +175,16 @@ struct tlb_state {
* are on. This means that it may not match current->active_mm,
* which will contain the previous user mm when we're in lazy TLB
* mode even if we've already switched back to swapper_pg_dir.
+ *
+ * During switch_mm_irqs_off(), loaded_mm will be set to
+ * LOADED_MM_SWITCHING during the brief interrupts-off window
+ * when CR3 and loaded_mm would otherwise be inconsistent. This
+ * is for nmi_uaccess_okay()'s benefit.
*/
struct mm_struct *loaded_mm;
+
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+
u16 loaded_mm_asid;
u16 next_asid;
/* last user mm's ctx id */
@@ -230,6 +254,38 @@ struct tlb_state {
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or
+ * switching the loaded mm. It can also be dangerous if we
+ * interrupted some kernel code that was temporarily using a
+ * different mm.
+ */
+static inline bool nmi_uaccess_okay(void)
+{
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ struct mm_struct *current_mm = current->mm;
+
+ VM_WARN_ON_ONCE(!loaded_mm);
+
+ /*
+ * The condition we want to check is
+ * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though,
+ * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
+ * is supposed to be reasonably fast.
+ *
+ * Instead, we check the almost equivalent but somewhat conservative
+ * condition below, and we rely on the fact that switch_mm_irqs_off()
+ * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
+ */
+ if (loaded_mm != current_mm)
+ return false;
+
+ VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
+
+ return true;
+}
+
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
@@ -536,11 +592,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
#ifndef CONFIG_PARAVIRT
#define flush_tlb_others(mask, info) \
native_flush_tlb_others(mask, info)
-#endif
-
-extern void tlb_flush_remove_tables(struct mm_struct *mm);
-extern void tlb_flush_remove_tables_local(void *arg);
-#define HAVE_TLB_FLUSH_REMOVE_TABLES
+#define paravirt_tlb_remove_table(tlb, page) \
+ tlb_remove_page(tlb, (void *)(page))
+#endif
#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index c1d2a9892352..453cf38a1c33 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -123,13 +123,17 @@ static inline int topology_max_smt_threads(void)
}
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
-extern int topology_phys_to_logical_pkg(unsigned int pkg);
+int topology_phys_to_logical_pkg(unsigned int pkg);
+bool topology_is_primary_thread(unsigned int cpu);
+bool topology_smt_supported(void);
#else
#define topology_max_packages() (1)
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
static inline int topology_max_smt_threads(void) { return 1; }
+static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
+static inline bool topology_smt_supported(void) { return false; }
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index 9c0d4b588e3f..2e6245a023ef 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -28,6 +28,20 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others,
__entry->addr, __entry->end)
);
+TRACE_EVENT(hyperv_nested_flush_guest_mapping,
+ TP_PROTO(u64 as, int ret),
+ TP_ARGS(as, ret),
+
+ TP_STRUCT__entry(
+ __field(u64, as)
+ __field(int, ret)
+ ),
+ TP_fast_assign(__entry->as = as;
+ __entry->ret = ret;
+ ),
+ TP_printk("address space %llx ret %d", __entry->as, __entry->ret)
+ );
+
TRACE_EVENT(hyperv_send_ipi_mask,
TP_PROTO(const struct cpumask *cpus,
int vector),
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9f0449..53748541c487 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void)
*
* If RDPID is available, use it.
*/
- alternative_io ("lsl %[p],%[seg]",
+ alternative_io ("lsl %[seg],%[p]",
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
X86_FEATURE_RDPID,
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 6aa8499e1f62..9527ba5d62da 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -74,6 +74,7 @@
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000
#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
@@ -213,6 +214,8 @@ enum vmcs_field {
VMWRITE_BITMAP_HIGH = 0x00002029,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ ENCLS_EXITING_BITMAP = 0x0000202E,
+ ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
@@ -576,4 +579,15 @@ enum vm_instruction_error_number {
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
};
+enum vmx_l1d_flush_state {
+ VMENTER_L1D_FLUSH_AUTO,
+ VMENTER_L1D_FLUSH_NEVER,
+ VMENTER_L1D_FLUSH_COND,
+ VMENTER_L1D_FLUSH_ALWAYS,
+ VMENTER_L1D_FLUSH_EPT_DISABLED,
+ VMENTER_L1D_FLUSH_NOT_REQUIRED,
+};
+
+extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
+
#endif
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index bfd882617613..ef05bea7010d 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -197,36 +197,38 @@ extern struct { char _entry[32]; } hypercall_page[];
(type)__res; \
})
-#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
-({ \
- __HYPERCALL_DECLS; \
- __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \
- asm volatile (__HYPERCALL \
- : __HYPERCALL_5PARAM \
- : __HYPERCALL_ENTRY(name) \
- : __HYPERCALL_CLOBBER5); \
- (type)__res; \
-})
-
static inline long
-privcmd_call(unsigned call,
- unsigned long a1, unsigned long a2,
- unsigned long a3, unsigned long a4,
- unsigned long a5)
+xen_single_call(unsigned int call,
+ unsigned long a1, unsigned long a2,
+ unsigned long a3, unsigned long a4,
+ unsigned long a5)
{
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
- stac();
asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
: [thunk_target] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
- clac();
return (long)__res;
}
+static inline long
+privcmd_call(unsigned int call,
+ unsigned long a1, unsigned long a2,
+ unsigned long a3, unsigned long a4,
+ unsigned long a5)
+{
+ long res;
+
+ stac();
+ res = xen_single_call(call, a1, a2, a3, a4, a5);
+ clac();
+
+ return res;
+}
+
static inline int
HYPERVISOR_set_trap_table(struct trap_info *table)
{
@@ -254,47 +256,12 @@ HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
}
static inline int
-HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
-{
- return _hypercall2(int, stack_switch, ss, esp);
-}
-
-#ifdef CONFIG_X86_32
-static inline int
-HYPERVISOR_set_callbacks(unsigned long event_selector,
- unsigned long event_address,
- unsigned long failsafe_selector,
- unsigned long failsafe_address)
-{
- return _hypercall4(int, set_callbacks,
- event_selector, event_address,
- failsafe_selector, failsafe_address);
-}
-#else /* CONFIG_X86_64 */
-static inline int
-HYPERVISOR_set_callbacks(unsigned long event_address,
- unsigned long failsafe_address,
- unsigned long syscall_address)
-{
- return _hypercall3(int, set_callbacks,
- event_address, failsafe_address,
- syscall_address);
-}
-#endif /* CONFIG_X86_{32,64} */
-
-static inline int
HYPERVISOR_callback_op(int cmd, void *arg)
{
return _hypercall2(int, callback_op, cmd, arg);
}
static inline int
-HYPERVISOR_fpu_taskswitch(int set)
-{
- return _hypercall1(int, fpu_taskswitch, set);
-}
-
-static inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
return _hypercall2(int, sched_op, cmd, arg);
@@ -406,19 +373,6 @@ HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
}
static inline int
-HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val,
- unsigned long flags, domid_t domid)
-{
- if (sizeof(new_val) == sizeof(long))
- return _hypercall4(int, update_va_mapping_otherdomain, va,
- new_val.pte, flags, domid);
- else
- return _hypercall5(int, update_va_mapping_otherdomain, va,
- new_val.pte, new_val.pte >> 32,
- flags, domid);
-}
-
-static inline int
HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
{
return _hypercall2(int, vm_assist, cmd, type);
@@ -452,12 +406,6 @@ HYPERVISOR_suspend(unsigned long start_info_mfn)
return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
}
-static inline int
-HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
-{
- return _hypercall2(int, nmi_op, op, arg);
-}
-
static inline unsigned long __must_check
HYPERVISOR_hvm_op(int op, void *arg)
{
@@ -516,39 +464,6 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
}
static inline void
-MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd,
- void *uop, unsigned int count)
-{
- mcl->op = __HYPERVISOR_grant_table_op;
- mcl->args[0] = cmd;
- mcl->args[1] = (unsigned long)uop;
- mcl->args[2] = count;
-
- trace_xen_mc_entry(mcl, 3);
-}
-
-static inline void
-MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va,
- pte_t new_val, unsigned long flags,
- domid_t domid)
-{
- mcl->op = __HYPERVISOR_update_va_mapping_otherdomain;
- mcl->args[0] = va;
- if (sizeof(new_val) == sizeof(long)) {
- mcl->args[1] = new_val.pte;
- mcl->args[2] = flags;
- mcl->args[3] = domid;
- } else {
- mcl->args[1] = new_val.pte;
- mcl->args[2] = new_val.pte >> 32;
- mcl->args[3] = flags;
- mcl->args[4] = domid;
- }
-
- trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 4 : 5);
-}
-
-static inline void
MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
struct desc_struct desc)
{
@@ -569,16 +484,6 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
}
static inline void
-MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg)
-{
- mcl->op = __HYPERVISOR_memory_op;
- mcl->args[0] = cmd;
- mcl->args[1] = (unsigned long)arg;
-
- trace_xen_mc_entry(mcl, 2);
-}
-
-static inline void
MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
int count, int *success_count, domid_t domid)
{
@@ -605,16 +510,6 @@ MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
}
static inline void
-MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries)
-{
- mcl->op = __HYPERVISOR_set_gdt;
- mcl->args[0] = (unsigned long)frames;
- mcl->args[1] = entries;
-
- trace_xen_mc_entry(mcl, 2);
-}
-
-static inline void
MULTI_stack_switch(struct multicall_entry *mcl,
unsigned long ss, unsigned long esp)
{
OpenPOWER on IntegriCloud