summaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/acpi.h12
-rw-r--r--arch/x86/include/asm/alternative.h14
-rw-r--r--arch/x86/include/asm/apic.h21
-rw-r--r--arch/x86/include/asm/apic_flat_64.h8
-rw-r--r--arch/x86/include/asm/archrandom.h28
-rw-r--r--arch/x86/include/asm/asm.h14
-rw-r--r--arch/x86/include/asm/barrier.h3
-rw-r--r--arch/x86/include/asm/bitops.h11
-rw-r--r--arch/x86/include/asm/bootparam_utils.h3
-rw-r--r--arch/x86/include/asm/bug.h4
-rw-r--r--arch/x86/include/asm/bugs.h8
-rw-r--r--arch/x86/include/asm/calgary.h57
-rw-r--r--arch/x86/include/asm/compat.h17
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h38
-rw-r--r--arch/x86/include/asm/cpufeature.h7
-rw-r--r--arch/x86/include/asm/cpufeatures.h9
-rw-r--r--arch/x86/include/asm/cpuidle_haltpoll.h8
-rw-r--r--arch/x86/include/asm/crash.h9
-rw-r--r--arch/x86/include/asm/crypto/aes.h12
-rw-r--r--arch/x86/include/asm/crypto/camellia.h65
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h20
-rw-r--r--arch/x86/include/asm/crypto/serpent-avx.h20
-rw-r--r--arch/x86/include/asm/crypto/serpent-sse2.h28
-rw-r--r--arch/x86/include/asm/crypto/twofish.h19
-rw-r--r--arch/x86/include/asm/device.h13
-rw-r--r--arch/x86/include/asm/disabled-features.h10
-rw-r--r--arch/x86/include/asm/div64.h13
-rw-r--r--arch/x86/include/asm/dma-direct.h9
-rw-r--r--arch/x86/include/asm/doublefault.h13
-rw-r--r--arch/x86/include/asm/e820/types.h8
-rw-r--r--arch/x86/include/asm/efi.h266
-rw-r--r--arch/x86/include/asm/emulate_prefix.h14
-rw-r--r--arch/x86/include/asm/error-injection.h13
-rw-r--r--arch/x86/include/asm/fixmap.h7
-rw-r--r--arch/x86/include/asm/fpu/internal.h2
-rw-r--r--arch/x86/include/asm/ftrace.h16
-rw-r--r--arch/x86/include/asm/hw_irq.h4
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h51
-rw-r--r--arch/x86/include/asm/insn.h6
-rw-r--r--arch/x86/include/asm/intel-family.h73
-rw-r--r--arch/x86/include/asm/intel_pmc_ipc.h32
-rw-r--r--arch/x86/include/asm/intel_pt.h2
-rw-r--r--arch/x86/include/asm/intel_scu_ipc.h20
-rw-r--r--arch/x86/include/asm/intel_telemetry.h3
-rw-r--r--arch/x86/include/asm/io.h43
-rw-r--r--arch/x86/include/asm/io_bitmap.h29
-rw-r--r--arch/x86/include/asm/iommu.h19
-rw-r--r--arch/x86/include/asm/ipi.h109
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/kexec.h10
-rw-r--r--arch/x86/include/asm/kprobes.h14
-rw-r--r--arch/x86/include/asm/kvm_emulate.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h194
-rw-r--r--arch/x86/include/asm/linkage.h4
-rw-r--r--arch/x86/include/asm/mce.h3
-rw-r--r--arch/x86/include/asm/mem_encrypt.h10
-rw-r--r--arch/x86/include/asm/memtype.h27
-rw-r--r--arch/x86/include/asm/microcode_amd.h2
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h124
-rw-r--r--arch/x86/include/asm/module.h2
-rw-r--r--arch/x86/include/asm/mpx.h116
-rw-r--r--arch/x86/include/asm/mshyperv.h6
-rw-r--r--arch/x86/include/asm/msr-index.h48
-rw-r--r--arch/x86/include/asm/msr.h3
-rw-r--r--arch/x86/include/asm/mtrr.h4
-rw-r--r--arch/x86/include/asm/mwait.h2
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h7
-rw-r--r--arch/x86/include/asm/paravirt.h16
-rw-r--r--arch/x86/include/asm/paravirt_types.h7
-rw-r--r--arch/x86/include/asm/pat.h27
-rw-r--r--arch/x86/include/asm/pci.h40
-rw-r--r--arch/x86/include/asm/pci_64.h28
-rw-r--r--arch/x86/include/asm/perf_event.h34
-rw-r--r--arch/x86/include/asm/pgtable-3level.h46
-rw-r--r--arch/x86/include/asm/pgtable.h16
-rw-r--r--arch/x86/include/asm/pgtable_32.h2
-rw-r--r--arch/x86/include/asm/pgtable_32_areas.h53
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h56
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/pgtable_areas.h16
-rw-r--r--arch/x86/include/asm/pgtable_types.h147
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/processor.h147
-rw-r--r--arch/x86/include/asm/pti.h2
-rw-r--r--arch/x86/include/asm/ptrace.h35
-rw-r--r--arch/x86/include/asm/purgatory.h10
-rw-r--r--arch/x86/include/asm/qspinlock.h15
-rw-r--r--arch/x86/include/asm/realmode.h5
-rw-r--r--arch/x86/include/asm/refcount.h126
-rw-r--r--arch/x86/include/asm/rio.h64
-rw-r--r--arch/x86/include/asm/sections.h1
-rw-r--r--arch/x86/include/asm/segment.h12
-rw-r--r--arch/x86/include/asm/set_memory.h10
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/special_insns.h24
-rw-r--r--arch/x86/include/asm/suspend_64.h2
-rw-r--r--arch/x86/include/asm/svm.h1
-rw-r--r--arch/x86/include/asm/switch_to.h10
-rw-r--r--arch/x86/include/asm/syscall.h4
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h76
-rw-r--r--arch/x86/include/asm/tce.h35
-rw-r--r--arch/x86/include/asm/text-patching.h88
-rw-r--r--arch/x86/include/asm/thread_info.h23
-rw-r--r--arch/x86/include/asm/tlb.h4
-rw-r--r--arch/x86/include/asm/tlbflush.h30
-rw-r--r--arch/x86/include/asm/trace/hyperv.h15
-rw-r--r--arch/x86/include/asm/trace/mpx.h134
-rw-r--r--arch/x86/include/asm/traps.h3
-rw-r--r--arch/x86/include/asm/uaccess.h27
-rw-r--r--arch/x86/include/asm/umip.h4
-rw-r--r--arch/x86/include/asm/unistd.h6
-rw-r--r--arch/x86/include/asm/unwind_hints.h8
-rw-r--r--arch/x86/include/asm/uv/bios.h2
-rw-r--r--arch/x86/include/asm/uv/uv.h20
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h61
-rw-r--r--arch/x86/include/asm/vdso.h1
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h16
-rw-r--r--arch/x86/include/asm/vmalloc.h6
-rw-r--r--arch/x86/include/asm/vmware.h57
-rw-r--r--arch/x86/include/asm/vmx.h123
-rw-r--r--arch/x86/include/asm/vmxfeatures.h86
-rw-r--r--arch/x86/include/asm/vvar.h13
-rw-r--r--arch/x86/include/asm/x86_init.h4
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h2
-rw-r--r--arch/x86/include/asm/xen/interface.h11
-rw-r--r--arch/x86/include/asm/xen/page-coherent.h14
130 files changed, 1768 insertions, 1865 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 8b52bc5ddf69..ea34464d6221 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -7,7 +7,6 @@ generated-y += unistd_32_ia32.h
generated-y += unistd_64_x32.h
generated-y += xen-hypercalls.h
-generic-y += dma-contiguous.h
generic-y += early_ioremap.h
generic-y += export.h
generic-y += mcs_spinlock.h
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index aac686e1e005..ca0976456a6b 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -13,7 +13,6 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
-#include <asm/realmode.h>
#include <asm/x86_init.h>
#ifdef CONFIG_ACPI_APEI
@@ -62,7 +61,7 @@ static inline void acpi_disable_pci(void)
extern int (*acpi_suspend_lowlevel)(void);
/* Physical address to resume after wakeup */
-#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
+unsigned long acpi_get_wakeup_address(void);
/*
* Check if the CPU can handle C2 and deeper
@@ -117,6 +116,12 @@ static inline bool acpi_has_cpu_in_madt(void)
return !!acpi_lapic;
}
+#define ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static inline void acpi_arch_set_root_pointer(u64 addr)
+{
+ x86_init.acpi.set_root_pointer(addr);
+}
+
#define ACPI_HAVE_ARCH_GET_ROOT_POINTER
static inline u64 acpi_arch_get_root_pointer(void)
{
@@ -125,6 +130,7 @@ static inline u64 acpi_arch_get_root_pointer(void)
void acpi_generic_reduced_hw_init(void);
+void x86_default_set_root_pointer(u64 addr);
u64 x86_default_get_root_pointer(void);
#else /* !CONFIG_ACPI */
@@ -138,6 +144,8 @@ static inline void disable_acpi(void) { }
static inline void acpi_generic_reduced_hw_init(void) { }
+static inline void x86_default_set_root_pointer(u64 addr) { }
+
static inline u64 x86_default_get_root_pointer(void)
{
return 0;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 094fbc9c0b1c..13adca37c99a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -201,10 +201,10 @@ static inline int alternatives_text_reserved(void *start, void *end)
* without volatile and memory clobber.
*/
#define alternative(oldinstr, newinstr, feature) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+ asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
- asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
/*
* Alternative inline assembly with input.
@@ -218,7 +218,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Leaving an unused argument 0 to keep API compatibility.
*/
#define alternative_input(oldinstr, newinstr, feature, input...) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
+ asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
: : "i" (0), ## input)
/*
@@ -231,18 +231,18 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \
feature2, input...) \
- asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
newinstr2, feature2) \
: : "i" (0), ## input)
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, feature, output, input...) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
+ asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
: output : "i" (0), ## input)
/* Like alternative_io, but for replacing a direct call with another one. */
#define alternative_call(oldfunc, newfunc, feature, output, input...) \
- asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
+ asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
/*
@@ -253,7 +253,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
output, input...) \
- asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
+ asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
"call %P[new2]", feature2) \
: output, ASM_CALL_CONSTRAINT \
: [old] "i" (oldfunc), [new1] "i" (newfunc1), \
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index e647aa095867..19e94af9cc5d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,9 +136,11 @@ extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
+extern void apic_soft_disable(void);
extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
+extern void apic_intr_mode_select(void);
extern void apic_intr_mode_init(void);
extern void init_apic_mappings(void);
void register_lapic_address(unsigned long address);
@@ -176,6 +178,8 @@ extern void lapic_online(void);
extern void lapic_offline(void);
extern bool apic_needs_pit(void);
+extern void apic_send_IPI_allbutself(unsigned int vector);
+
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
@@ -185,6 +189,7 @@ static inline void disable_local_APIC(void) { }
# define setup_secondary_APIC_clock x86_init_noop
static inline void lapic_update_tsc_freq(void) { }
static inline void init_bsp_APIC(void) { }
+static inline void apic_intr_mode_select(void) { }
static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
@@ -449,6 +454,14 @@ static inline void ack_APIC_irq(void)
apic_eoi();
}
+
+static inline bool lapic_vector_set_in_irr(unsigned int vector)
+{
+ u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+
+ return !!(irr & (1U << (vector % 32)));
+}
+
static inline unsigned default_get_apic_id(unsigned long x)
{
unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -465,12 +478,6 @@ static inline unsigned default_get_apic_id(unsigned long x)
#define TRAMPOLINE_PHYS_LOW 0x467
#define TRAMPOLINE_PHYS_HIGH 0x469
-#ifdef CONFIG_X86_64
-extern void apic_send_IPI_self(int vector);
-
-DECLARE_PER_CPU(int, x2apic_extra_bits);
-#endif
-
extern void generic_bigsmp_probe(void);
#ifdef CONFIG_X86_LOCAL_APIC
@@ -506,8 +513,10 @@ extern int default_check_phys_apicid_present(int phys_apicid);
#ifdef CONFIG_SMP
bool apic_id_is_primary_thread(unsigned int id);
+void apic_smt_update(void);
#else
static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
+static inline void apic_smt_update(void) { }
#endif
extern void irq_enter(void);
diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h
deleted file mode 100644
index d3a2b3876ce6..000000000000
--- a/arch/x86/include/asm/apic_flat_64.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_APIC_FLAT_64_H
-#define _ASM_X86_APIC_FLAT_64_H
-
-extern void flat_init_apic_ldr(void);
-
-#endif
-
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index af45e1452f09..7a4bb1bd4bdb 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -27,7 +27,7 @@
/* Unconditional execution of RDRAND and RDSEED */
-static inline bool rdrand_long(unsigned long *v)
+static inline bool __must_check rdrand_long(unsigned long *v)
{
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
@@ -41,7 +41,7 @@ static inline bool rdrand_long(unsigned long *v)
return false;
}
-static inline bool rdrand_int(unsigned int *v)
+static inline bool __must_check rdrand_int(unsigned int *v)
{
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
@@ -55,7 +55,7 @@ static inline bool rdrand_int(unsigned int *v)
return false;
}
-static inline bool rdseed_long(unsigned long *v)
+static inline bool __must_check rdseed_long(unsigned long *v)
{
bool ok;
asm volatile(RDSEED_LONG
@@ -64,7 +64,7 @@ static inline bool rdseed_long(unsigned long *v)
return ok;
}
-static inline bool rdseed_int(unsigned int *v)
+static inline bool __must_check rdseed_int(unsigned int *v)
{
bool ok;
asm volatile(RDSEED_INT
@@ -73,10 +73,6 @@ static inline bool rdseed_int(unsigned int *v)
return ok;
}
-/* Conditional execution based on CPU type */
-#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND)
-#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED)
-
/*
* These are the generic interfaces; they must not be declared if the
* stubs in <linux/random.h> are to be invoked,
@@ -84,24 +80,24 @@ static inline bool rdseed_int(unsigned int *v)
*/
#ifdef CONFIG_ARCH_RANDOM
-static inline bool arch_get_random_long(unsigned long *v)
+static inline bool __must_check arch_get_random_long(unsigned long *v)
{
- return arch_has_random() ? rdrand_long(v) : false;
+ return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_long(v) : false;
}
-static inline bool arch_get_random_int(unsigned int *v)
+static inline bool __must_check arch_get_random_int(unsigned int *v)
{
- return arch_has_random() ? rdrand_int(v) : false;
+ return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_int(v) : false;
}
-static inline bool arch_get_random_seed_long(unsigned long *v)
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
- return arch_has_random_seed() ? rdseed_long(v) : false;
+ return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_long(v) : false;
}
-static inline bool arch_get_random_seed_int(unsigned int *v)
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
- return arch_has_random_seed() ? rdseed_int(v) : false;
+ return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_int(v) : false;
}
extern void x86_init_rdrand(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 3ff577c0b102..cd339b88d5d4 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -7,9 +7,11 @@
# define __ASM_FORM_RAW(x) x
# define __ASM_FORM_COMMA(x) x,
#else
-# define __ASM_FORM(x) " " #x " "
-# define __ASM_FORM_RAW(x) #x
-# define __ASM_FORM_COMMA(x) " " #x ","
+#include <linux/stringify.h>
+
+# define __ASM_FORM(x) " " __stringify(x) " "
+# define __ASM_FORM_RAW(x) __stringify(x)
+# define __ASM_FORM_COMMA(x) " " __stringify(x) ","
#endif
#ifndef __x86_64__
@@ -139,9 +141,6 @@
# define _ASM_EXTABLE_EX(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
-# define _ASM_EXTABLE_REFCOUNT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
-
# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
_ASM_ALIGN ; \
@@ -170,9 +169,6 @@
# define _ASM_EXTABLE_EX(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
-# define _ASM_EXTABLE_REFCOUNT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
-
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 84f848c2541a..7f828fe49797 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -49,8 +49,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
#define array_index_mask_nospec array_index_mask_nospec
/* Prevent speculative execution past this barrier. */
-#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
- "lfence", X86_FEATURE_LFENCE_RDTSC)
+#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
#define dma_rmb() barrier()
#define dma_wmb() barrier()
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index ba15d53c1ca7..062cdecb2f24 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -45,14 +45,13 @@
* We do the locked ops that don't return the old value as
* a mask operation on a byte.
*/
-#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7))
static __always_inline void
arch_set_bit(long nr, volatile unsigned long *addr)
{
- if (IS_IMMEDIATE(nr)) {
+ if (__builtin_constant_p(nr)) {
asm volatile(LOCK_PREFIX "orb %1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr))
@@ -72,7 +71,7 @@ arch___set_bit(long nr, volatile unsigned long *addr)
static __always_inline void
arch_clear_bit(long nr, volatile unsigned long *addr)
{
- if (IS_IMMEDIATE(nr)) {
+ if (__builtin_constant_p(nr)) {
asm volatile(LOCK_PREFIX "andb %1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)~CONST_MASK(nr)));
@@ -123,7 +122,7 @@ arch___change_bit(long nr, volatile unsigned long *addr)
static __always_inline void
arch_change_bit(long nr, volatile unsigned long *addr)
{
- if (IS_IMMEDIATE(nr)) {
+ if (__builtin_constant_p(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr)));
@@ -389,7 +388,9 @@ static __always_inline int fls64(__u64 x)
#include <asm-generic/bitops/const_hweight.h>
-#include <asm-generic/bitops-instrumented.h>
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-non-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
#include <asm-generic/bitops/le.h>
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index f5e90a849bca..981fe923a59f 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -59,7 +59,6 @@ static void sanitize_boot_params(struct boot_params *boot_params)
BOOT_PARAM_PRESERVE(apm_bios_info),
BOOT_PARAM_PRESERVE(tboot_addr),
BOOT_PARAM_PRESERVE(ist_info),
- BOOT_PARAM_PRESERVE(acpi_rsdp_addr),
BOOT_PARAM_PRESERVE(hd0_info),
BOOT_PARAM_PRESERVE(hd1_info),
BOOT_PARAM_PRESERVE(sys_desc_table),
@@ -71,6 +70,8 @@ static void sanitize_boot_params(struct boot_params *boot_params)
BOOT_PARAM_PRESERVE(eddbuf_entries),
BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
+ BOOT_PARAM_PRESERVE(secure_boot),
+ BOOT_PARAM_PRESERVE(hdr),
BOOT_PARAM_PRESERVE(e820_table),
BOOT_PARAM_PRESERVE(eddbuf),
};
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 6804d6642767..facba9bc30ca 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -32,7 +32,7 @@
#define _BUG_FLAGS(ins, flags) \
do { \
- asm volatile("1:\t" ins "\n" \
+ asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
"\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \
@@ -49,7 +49,7 @@ do { \
#define _BUG_FLAGS(ins, flags) \
do { \
- asm volatile("1:\t" ins "\n" \
+ asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
"\t.word %c0" "\t# bug_entry::flags\n" \
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 542509b53e0f..92ae28389940 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -6,16 +6,12 @@
extern void check_bugs(void);
-#if defined(CONFIG_CPU_SUP_INTEL)
-void check_mpx_erratum(struct cpuinfo_x86 *c);
-#else
-static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {}
-#endif
-
#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
int ppro_with_ram_bug(void);
#else
static inline int ppro_with_ram_bug(void) { return 0; }
#endif
+extern void cpu_bugs_smt_update(void);
+
#endif /* _ASM_X86_BUGS_H */
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
deleted file mode 100644
index facd374a1bf7..000000000000
--- a/arch/x86/include/asm/calgary.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Derived from include/asm-powerpc/iommu.h
- *
- * Copyright IBM Corporation, 2006-2007
- *
- * Author: Jon Mason <jdmason@us.ibm.com>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- */
-
-#ifndef _ASM_X86_CALGARY_H
-#define _ASM_X86_CALGARY_H
-
-#include <linux/spinlock.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/timer.h>
-#include <asm/types.h>
-
-struct iommu_table {
- const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */
- unsigned long it_base; /* mapped address of tce table */
- unsigned long it_hint; /* Hint for next alloc */
- unsigned long *it_map; /* A simple allocation bitmap for now */
- void __iomem *bbar; /* Bridge BAR */
- u64 tar_val; /* Table Address Register */
- struct timer_list watchdog_timer;
- spinlock_t it_lock; /* Protects it_map */
- unsigned int it_size; /* Size of iommu table in entries */
- unsigned char it_busno; /* Bus number this table belongs to */
-};
-
-struct cal_chipset_ops {
- void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev);
- void (*tce_cache_blast)(struct iommu_table *tbl);
- void (*dump_error_regs)(struct iommu_table *tbl);
-};
-
-#define TCE_TABLE_SIZE_UNSPECIFIED ~0
-#define TCE_TABLE_SIZE_64K 0
-#define TCE_TABLE_SIZE_128K 1
-#define TCE_TABLE_SIZE_256K 2
-#define TCE_TABLE_SIZE_512K 3
-#define TCE_TABLE_SIZE_1M 4
-#define TCE_TABLE_SIZE_2M 5
-#define TCE_TABLE_SIZE_4M 6
-#define TCE_TABLE_SIZE_8M 7
-
-extern int use_calgary;
-
-#ifdef CONFIG_CALGARY_IOMMU
-extern int detect_calgary(void);
-#else
-static inline int detect_calgary(void) { return -ENODEV; }
-#endif
-
-#endif /* _ASM_X86_CALGARY_H */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 22c4dfe65992..52e9f3480f69 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -177,23 +177,6 @@ typedef struct user_regs_struct compat_elf_gregset_t;
(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
#endif
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
static inline void __user *arch_compat_alloc_user_space(long len)
{
compat_uptr_t sp;
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index cff3f3f3bfe0..02c0078d3787 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CPU_ENTRY_AREA_H
#define _ASM_X86_CPU_ENTRY_AREA_H
@@ -6,6 +6,7 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
#include <asm/intel_ds.h>
+#include <asm/pgtable_areas.h>
#ifdef CONFIG_X86_64
@@ -65,6 +66,13 @@ enum exception_stack_ordering {
#endif
+#ifdef CONFIG_X86_32
+struct doublefault_stack {
+ unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)];
+ struct x86_hw_tss tss;
+} __aligned(PAGE_SIZE);
+#endif
+
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
* and early entry/exit code. Real types aren't used for all fields here
@@ -78,10 +86,19 @@ struct cpu_entry_area {
/*
* The GDT is just below entry_stack and thus serves (on x86_64) as
- * a a read-only guard page.
+ * a read-only guard page. On 32-bit the GDT must be writeable, so
+ * it needs an extra guard page.
*/
+#ifdef CONFIG_X86_32
+ char guard_entry_stack[PAGE_SIZE];
+#endif
struct entry_stack_page entry_stack_page;
+#ifdef CONFIG_X86_32
+ char guard_doublefault_stack[PAGE_SIZE];
+ struct doublefault_stack doublefault_stack;
+#endif
+
/*
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
* we need task switches to work, and task switches write to the TSS.
@@ -94,7 +111,6 @@ struct cpu_entry_area {
*/
struct cea_exception_stacks estacks;
#endif
-#ifdef CONFIG_CPU_SUP_INTEL
/*
* Per CPU debug store for Intel performance monitoring. Wastes a
* full page at the moment.
@@ -105,11 +121,13 @@ struct cpu_entry_area {
* Reserve enough fixmap PTEs.
*/
struct debug_store_buffers cpu_debug_buffers;
-#endif
};
-#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+/* Total size includes the readonly IDT mapping page as well: */
+#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
@@ -117,14 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
-#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
-#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
-
-#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
-
-#define CPU_ENTRY_AREA_MAP_SIZE \
- (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
-
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
static inline struct entry_stack *cpu_entry_stack(int cpu)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 58acda503817..59bf91c57aa8 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -61,6 +61,13 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \
(((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
+/*
+ * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the
+ * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all
+ * header macros which use NCAPINTS need to be changed. The duplicated macro
+ * use causes the compiler to issue errors for all headers so that all usage
+ * sites can be corrected.
+ */
#define REQUIRED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index e880f2408e29..f3327cb56edf 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,6 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -221,6 +220,7 @@
#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
+#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -232,6 +232,8 @@
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -291,6 +293,7 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
+#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */
#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
@@ -355,6 +358,8 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
@@ -397,5 +402,7 @@
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h
new file mode 100644
index 000000000000..c8b39c6716ff
--- /dev/null
+++ b/arch/x86/include/asm/cpuidle_haltpoll.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_HALTPOLL_H
+#define _ARCH_HALTPOLL_H
+
+void arch_haltpoll_enable(unsigned int cpu);
+void arch_haltpoll_disable(unsigned int cpu);
+
+#endif
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
index 0acf5ee45a21..f58de66091e5 100644
--- a/arch/x86/include/asm/crash.h
+++ b/arch/x86/include/asm/crash.h
@@ -2,10 +2,17 @@
#ifndef _ASM_X86_CRASH_H
#define _ASM_X86_CRASH_H
+struct kimage;
+
int crash_load_segments(struct kimage *image);
-int crash_copy_backup_region(struct kimage *image);
int crash_setup_memmap_entries(struct kimage *image,
struct boot_params *params);
void crash_smp_send_stop(void);
+#ifdef CONFIG_KEXEC_CORE
+void __init crash_reserve_low_1M(void);
+#else
+static inline void __init crash_reserve_low_1M(void) { }
+#endif
+
#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/crypto/aes.h b/arch/x86/include/asm/crypto/aes.h
deleted file mode 100644
index c508521dd190..000000000000
--- a/arch/x86/include/asm/crypto/aes.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_AES_H
-#define ASM_X86_AES_H
-
-#include <linux/crypto.h>
-#include <crypto/aes.h>
-
-void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
- const u8 *src);
-void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
- const u8 *src);
-#endif
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h
index a5d86fc0593f..f6d91861cb14 100644
--- a/arch/x86/include/asm/crypto/camellia.h
+++ b/arch/x86/include/asm/crypto/camellia.h
@@ -26,71 +26,66 @@ struct camellia_xts_ctx {
extern int __camellia_setkey(struct camellia_ctx *cctx,
const unsigned char *key,
- unsigned int key_len, u32 *flags);
+ unsigned int key_len);
extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen);
/* regular block cipher functions */
-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
+asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
+ bool xor);
+asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
/* 2-way parallel cipher functions */
-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
+asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src,
+ bool xor);
+asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
/* 16-way parallel cipher functions (avx/aes-ni) */
-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
-
-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-
-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-
-static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
+asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+
+asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
+ le128 *iv);
+
+asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
+ le128 *iv);
+asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
+ le128 *iv);
+
+static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src)
{
__camellia_enc_blk(ctx, dst, src, false);
}
-static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
+static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 *src)
{
__camellia_enc_blk(ctx, dst, src, true);
}
-static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk_2way(ctx, dst, src, false);
}
-static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
+static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst,
const u8 *src)
{
__camellia_enc_blk_2way(ctx, dst, src, true);
}
/* glue helpers */
-extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
-extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
+extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src);
+extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
-extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
+extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
-extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src,
+ le128 *iv);
+extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src,
+ le128 *iv);
#endif /* ASM_X86_CAMELLIA_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index d1818634ae7e..777c0f63418c 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -11,18 +11,13 @@
#include <asm/fpu/api.h>
#include <crypto/b128ops.h>
-typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
-typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
-typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
+typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src);
+typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src);
+typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
-typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src,
+typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
-#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
-#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
-#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
-#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn))
-
struct common_glue_func_entry {
unsigned int num_blocks; /* number of blocks that @fn will process */
union {
@@ -114,9 +109,10 @@ extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
struct skcipher_request *req,
common_glue_func_t tweak_fn, void *tweak_ctx,
- void *crypt_ctx);
+ void *crypt_ctx, bool decrypt);
-extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
- le128 *iv, common_glue_func_t fn);
+extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst,
+ const u8 *src, le128 *iv,
+ common_glue_func_t fn);
#endif /* _CRYPTO_GLUE_HELPER_H */
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
index db7c9cc32234..251c2c89d7cf 100644
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -15,26 +15,26 @@ struct serpent_xts_ctx {
struct serpent_ctx crypt_ctx;
};
-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
const u8 *src);
-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
const u8 *src);
-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
const u8 *src);
-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
+asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
+ le128 *iv);
-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
const u8 *src, le128 *iv);
-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
const u8 *src, le128 *iv);
-extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
+extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
-extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv);
+extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv);
extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen);
diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/include/asm/crypto/serpent-sse2.h
index 1a345e8a7496..860ca248914b 100644
--- a/arch/x86/include/asm/crypto/serpent-sse2.h
+++ b/arch/x86/include/asm/crypto/serpent-sse2.h
@@ -9,25 +9,23 @@
#define SERPENT_PARALLEL_BLOCKS 4
-asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
+static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
{
__serpent_enc_blk_4way(ctx, dst, src, false);
}
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
+static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
+ u8 *dst, const u8 *src)
{
__serpent_enc_blk_4way(ctx, dst, src, true);
}
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
+static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
{
serpent_dec_blk_4way(ctx, dst, src);
}
@@ -36,25 +34,23 @@ static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
#define SERPENT_PARALLEL_BLOCKS 8
-asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
+static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
{
__serpent_enc_blk_8way(ctx, dst, src, false);
}
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
+static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
+ u8 *dst, const u8 *src)
{
__serpent_enc_blk_8way(ctx, dst, src, true);
}
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
+static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
{
serpent_dec_blk_8way(ctx, dst, src);
}
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index f618bf272b90..2c377a8042e1 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -7,22 +7,19 @@
#include <crypto/b128ops.h>
/* regular block cipher functions from twofish_x86_64 module */
-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
+asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src);
/* 3-way parallel cipher functions */
-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
+asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src,
+ bool xor);
+asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src);
/* helpers from twofish_x86_64-3way module */
-extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
-extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
+extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src);
+extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
-extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
+extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src,
le128 *iv);
#endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index a8f6c809d9b1..7e31f7f1bb06 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,20 +6,7 @@ struct dev_archdata {
#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
void *iommu; /* hook for IOMMU specific extension */
#endif
-#ifdef CONFIG_STA2X11
- bool is_sta2x11;
-#endif
-};
-
-#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
-struct dma_domain {
- struct list_head node;
- const struct dma_map_ops *dma_ops;
- int domain_nr;
};
-void add_dma_domain(struct dma_domain *domain);
-void del_dma_domain(struct dma_domain *domain);
-#endif
struct pdev_archdata {
};
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index a5ea841cc6d2..4ea8584682f9 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -10,19 +10,13 @@
* cpu_feature_enabled().
*/
-#ifdef CONFIG_X86_INTEL_MPX
-# define DISABLE_MPX 0
-#else
-# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
-#endif
-
#ifdef CONFIG_X86_SMAP
# define DISABLE_SMAP 0
#else
# define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31))
#endif
-#ifdef CONFIG_X86_INTEL_UMIP
+#ifdef CONFIG_X86_UMIP
# define DISABLE_UMIP 0
#else
# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
@@ -74,7 +68,7 @@
#define DISABLED_MASK6 0
#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP)
+#define DISABLED_MASK9 (DISABLE_SMAP)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 0
#define DISABLED_MASK12 0
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 20a46150e0a8..9b8cb50768c2 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -73,6 +73,19 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
#else
# include <asm-generic/div64.h>
+
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+{
+ u64 q;
+
+ asm ("mulq %2; divq %3" : "=a" (q)
+ : "a" (a), "rm" ((u64)mul), "rm" ((u64)div)
+ : "rdx");
+
+ return q;
+}
+#define mul_u64_u32_div mul_u64_u32_div
+
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_DIV64_H */
diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
deleted file mode 100644
index 1a19251eaac9..000000000000
--- a/arch/x86/include/asm/dma-direct.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_DMA_DIRECT_H
-#define ASM_X86_DMA_DIRECT_H 1
-
-bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
-
-#endif /* ASM_X86_DMA_DIRECT_H */
diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h
new file mode 100644
index 000000000000..af9a14ac8962
--- /dev/null
+++ b/arch/x86/include/asm/doublefault.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_DOUBLEFAULT_H
+#define _ASM_X86_DOUBLEFAULT_H
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
+extern void doublefault_init_cpu_tss(void);
+#else
+static inline void doublefault_init_cpu_tss(void)
+{
+}
+#endif
+
+#endif /* _ASM_X86_DOUBLEFAULT_H */
diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h
index c3aa4b5e49e2..314f75d886d0 100644
--- a/arch/x86/include/asm/e820/types.h
+++ b/arch/x86/include/asm/e820/types.h
@@ -29,6 +29,14 @@ enum e820_type {
E820_TYPE_PRAM = 12,
/*
+ * Special-purpose memory is indicated to the system via the
+ * EFI_MEMORY_SP attribute. Define an e820 translation of this
+ * memory type for the purpose of reserving this range and
+ * marking it with the IORES_DESC_SOFT_RESERVED designation.
+ */
+ E820_TYPE_SOFT_RESERVED = 0xefffffff,
+
+ /*
* Reserved RAM used by the kernel itself if
* CONFIG_INTEL_TXT=y is enabled, memory of this type
* will be included in the S3 integrity calculation
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 606a4b6a9812..86169a24b0d8 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -8,6 +8,7 @@
#include <asm/tlb.h>
#include <asm/nospec-branch.h>
#include <asm/mmu_context.h>
+#include <linux/build_bug.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -19,13 +20,16 @@
* This is the main reason why we're doing stable VA mappings for RT
* services.
*
- * This flag is used in conjunction with a chicken bit called
- * "efi=old_map" which can be used as a fallback to the old runtime
- * services mapping method in case there's some b0rkage with a
- * particular EFI implementation (haha, it is hard to hold up the
- * sarcasm here...).
+ * SGI UV1 machines are known to be incompatible with this scheme, so we
+ * provide an opt-out for these machines via a DMI quirk that sets the
+ * attribute below.
*/
-#define EFI_OLD_MEMMAP EFI_ARCH_1
+#define EFI_UV1_MEMMAP EFI_ARCH_1
+
+static inline bool efi_have_uv1_memmap(void)
+{
+ return IS_ENABLED(CONFIG_X86_UV) && efi_enabled(EFI_UV1_MEMMAP);
+}
#define EFI32_LOADER_SIGNATURE "EL32"
#define EFI64_LOADER_SIGNATURE "EL64"
@@ -34,10 +38,46 @@
#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
-#ifdef CONFIG_X86_32
+/*
+ * The EFI services are called through variadic functions in many cases. These
+ * functions are implemented in assembler and support only a fixed number of
+ * arguments. The macros below allows us to check at build time that we don't
+ * try to call them with too many arguments.
+ *
+ * __efi_nargs() will return the number of arguments if it is 7 or less, and
+ * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it
+ * impossible to calculate the exact number of arguments beyond some
+ * pre-defined limit. The maximum number of arguments currently supported by
+ * any of the thunks is 7, so this is good enough for now and can be extended
+ * in the obvious way if we ever need more.
+ */
+
+#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__)
+#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \
+ __efi_arg_sentinel(7), __efi_arg_sentinel(6), \
+ __efi_arg_sentinel(5), __efi_arg_sentinel(4), \
+ __efi_arg_sentinel(3), __efi_arg_sentinel(2), \
+ __efi_arg_sentinel(1), __efi_arg_sentinel(0))
+#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...) \
+ __take_second_arg(n, \
+ ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; }))
+#define __efi_arg_sentinel(n) , n
+
+/*
+ * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis
+ * represents more than n arguments.
+ */
-extern asmlinkage unsigned long efi_call_phys(void *, ...);
+#define __efi_nargs_check(f, n, ...) \
+ __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n)
+#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n)
+#define __efi_nargs_check__(f, p, n) ({ \
+ BUILD_BUG_ON_MSG( \
+ (p) > (n), \
+ #f " called with too many arguments (" #p ">" #n ")"); \
+})
+#ifdef CONFIG_X86_32
#define arch_efi_call_virt_setup() \
({ \
kernel_fpu_begin(); \
@@ -51,13 +91,7 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...);
})
-/*
- * Wrap all the virtual calls in a way that forces the parameters on the stack.
- */
-#define arch_efi_call_virt(p, f, args...) \
-({ \
- ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \
-})
+#define arch_efi_call_virt(p, f, args...) p->f(args)
#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
@@ -65,9 +99,12 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...);
#define EFI_LOADER_SIGNATURE "EL64"
-extern asmlinkage u64 efi_call(void *fp, ...);
+extern asmlinkage u64 __efi_call(void *fp, ...);
-#define efi_call_phys(f, args...) efi_call((f), args)
+#define efi_call(...) ({ \
+ __efi_nargs_check(efi_call, 7, __VA_ARGS__); \
+ __efi_call(__VA_ARGS__); \
+})
/*
* struct efi_scratch - Scratch space used while switching to/from efi_mm
@@ -85,7 +122,7 @@ struct efi_scratch {
kernel_fpu_begin(); \
firmware_restrict_branch_speculation_start(); \
\
- if (!efi_enabled(EFI_OLD_MEMMAP)) \
+ if (!efi_have_uv1_memmap()) \
efi_switch_mm(&efi_mm); \
})
@@ -94,7 +131,7 @@ struct efi_scratch {
#define arch_efi_call_virt_teardown() \
({ \
- if (!efi_enabled(EFI_OLD_MEMMAP)) \
+ if (!efi_have_uv1_memmap()) \
efi_switch_mm(efi_scratch.prev_mm); \
\
firmware_restrict_branch_speculation_end(); \
@@ -121,8 +158,6 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
extern struct efi_scratch efi_scratch;
extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
-extern pgd_t * __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
extern void __init efi_print_memmap(void);
extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
@@ -140,7 +175,8 @@ extern void efi_delete_dummy_variable(void);
extern void efi_switch_mm(struct mm_struct *mm);
extern void efi_recover_from_page_fault(unsigned long phys_addr);
extern void efi_free_boot_services(void);
-extern void efi_reserve_boot_services(void);
+extern pgd_t * __init efi_uv1_memmap_phys_prolog(void);
+extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd);
struct efi_setup_data {
u64 fw_vendor;
@@ -153,102 +189,174 @@ struct efi_setup_data {
extern u64 efi_setup;
#ifdef CONFIG_EFI
+extern efi_status_t __efi64_thunk(u32, ...);
-static inline bool efi_is_native(void)
+#define efi64_thunk(...) ({ \
+ __efi_nargs_check(efi64_thunk, 6, __VA_ARGS__); \
+ __efi64_thunk(__VA_ARGS__); \
+})
+
+static inline bool efi_is_mixed(void)
{
- return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+ if (!IS_ENABLED(CONFIG_EFI_MIXED))
+ return false;
+ return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT);
}
static inline bool efi_runtime_supported(void)
{
- if (efi_is_native())
- return true;
-
- if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP))
+ if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT))
return true;
- return false;
+ return IS_ENABLED(CONFIG_EFI_MIXED);
}
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
-#ifdef CONFIG_EFI_MIXED
extern void efi_thunk_runtime_setup(void);
-extern efi_status_t efi_thunk_set_virtual_address_map(
- void *phys_set_virtual_address_map,
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map);
-#else
-static inline void efi_thunk_runtime_setup(void) {}
-static inline efi_status_t efi_thunk_set_virtual_address_map(
- void *phys_set_virtual_address_map,
- unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
-{
- return EFI_SUCCESS;
-}
-#endif /* CONFIG_EFI_MIXED */
-
+efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map);
/* arch specific definitions used by the stub code */
-struct efi_config {
- u64 image_handle;
- u64 table;
- u64 runtime_services;
- u64 boot_services;
- u64 text_output;
- efi_status_t (*call)(unsigned long, ...);
- bool is64;
-} __packed;
-
-__pure const struct efi_config *__efi_early(void);
+__attribute_const__ bool efi_is_64bit(void);
-static inline bool efi_is_64bit(void)
+static inline bool efi_is_native(void)
{
if (!IS_ENABLED(CONFIG_X86_64))
- return false;
-
+ return true;
if (!IS_ENABLED(CONFIG_EFI_MIXED))
return true;
+ return efi_is_64bit();
+}
+
+#define efi_mixed_mode_cast(attr) \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(u32, __typeof__(attr)), \
+ (unsigned long)(attr), (attr))
+
+#define efi_table_attr(inst, attr) \
+ (efi_is_native() \
+ ? inst->attr \
+ : (__typeof__(inst->attr)) \
+ efi_mixed_mode_cast(inst->mixed_mode.attr))
+
+/*
+ * The following macros allow translating arguments if necessary from native to
+ * mixed mode. The use case for this is to initialize the upper 32 bits of
+ * output parameters, and where the 32-bit method requires a 64-bit argument,
+ * which must be split up into two arguments to be thunked properly.
+ *
+ * As examples, the AllocatePool boot service returns the address of the
+ * allocation, but it will not set the high 32 bits of the address. To ensure
+ * that the full 64-bit address is initialized, we zero-init the address before
+ * calling the thunk.
+ *
+ * The FreePages boot service takes a 64-bit physical address even in 32-bit
+ * mode. For the thunk to work correctly, a native 64-bit call of
+ * free_pages(addr, size)
+ * must be translated to
+ * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size)
+ * so that the two 32-bit halves of addr get pushed onto the stack separately.
+ */
- return __efi_early()->is64;
+static inline void *efi64_zero_upper(void *p)
+{
+ ((u32 *)p)[1] = 0;
+ return p;
}
-#define efi_table_attr(table, attr, instance) \
- (efi_is_64bit() ? \
- ((table##_64_t *)(unsigned long)instance)->attr : \
- ((table##_32_t *)(unsigned long)instance)->attr)
+#define __efi64_argmap_free_pages(addr, size) \
+ ((addr), 0, (size))
+
+#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \
+ ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver))
+
+#define __efi64_argmap_allocate_pool(type, size, buffer) \
+ ((type), (size), efi64_zero_upper(buffer))
+
+#define __efi64_argmap_handle_protocol(handle, protocol, interface) \
+ ((handle), (protocol), efi64_zero_upper(interface))
-#define efi_call_proto(protocol, f, instance, ...) \
- __efi_early()->call(efi_table_attr(protocol, f, instance), \
- instance, ##__VA_ARGS__)
+#define __efi64_argmap_locate_protocol(protocol, reg, interface) \
+ ((protocol), (reg), efi64_zero_upper(interface))
+
+/* PCI I/O */
+#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \
+ ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \
+ efi64_zero_upper(dev), efi64_zero_upper(func))
+
+/*
+ * The macros below handle the plumbing for the argument mapping. To add a
+ * mapping for a specific EFI method, simply define a macro
+ * __efi64_argmap_<method name>, following the examples above.
+ */
-#define efi_call_early(f, ...) \
- __efi_early()->call(efi_table_attr(efi_boot_services, f, \
- __efi_early()->boot_services), __VA_ARGS__)
+#define __efi64_thunk_map(inst, func, ...) \
+ efi64_thunk(inst->mixed_mode.func, \
+ __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \
+ (__VA_ARGS__)))
-#define __efi_call_early(f, ...) \
- __efi_early()->call((unsigned long)f, __VA_ARGS__);
+#define __efi64_argmap(mapped, args) \
+ __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args)
+#define __efi64_argmap__0(mapped, args) __efi_eval mapped
+#define __efi64_argmap__1(mapped, args) __efi_eval args
-#define efi_call_runtime(f, ...) \
- __efi_early()->call(efi_table_attr(efi_runtime_services, f, \
- __efi_early()->runtime_services), __VA_ARGS__)
+#define __efi_eat(...)
+#define __efi_eval(...) __VA_ARGS__
+
+/* The three macros below handle dispatching via the thunk if needed */
+
+#define efi_call_proto(inst, func, ...) \
+ (efi_is_native() \
+ ? inst->func(inst, ##__VA_ARGS__) \
+ : __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__))
+
+#define efi_bs_call(func, ...) \
+ (efi_is_native() \
+ ? efi_system_table()->boottime->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_table_attr(efi_system_table(), \
+ boottime), func, __VA_ARGS__))
+
+#define efi_rt_call(func, ...) \
+ (efi_is_native() \
+ ? efi_system_table()->runtime->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_table_attr(efi_system_table(), \
+ runtime), func, __VA_ARGS__))
extern bool efi_reboot_required(void);
+extern bool efi_is_table_address(unsigned long phys_addr);
+extern void efi_find_mirror(void);
+extern void efi_reserve_boot_services(void);
#else
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
static inline bool efi_reboot_required(void)
{
return false;
}
+static inline bool efi_is_table_address(unsigned long phys_addr)
+{
+ return false;
+}
+static inline void efi_find_mirror(void)
+{
+}
+static inline void efi_reserve_boot_services(void)
+{
+}
#endif /* CONFIG_EFI */
+#ifdef CONFIG_EFI_FAKE_MEMMAP
+extern void __init efi_fake_memmap_early(void);
+#else
+static inline void efi_fake_memmap_early(void)
+{
+}
+#endif
+
#endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h
new file mode 100644
index 000000000000..70f5b98a5286
--- /dev/null
+++ b/arch/x86/include/asm/emulate_prefix.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_EMULATE_PREFIX_H
+#define _ASM_X86_EMULATE_PREFIX_H
+
+/*
+ * Virt escape sequences to trigger instruction emulation;
+ * ideally these would decode to 'whole' instruction and not destroy
+ * the instruction stream; sadly this is not true for the 'kvm' one :/
+ */
+
+#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */
+#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */
+
+#endif
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
deleted file mode 100644
index 47b7a1296245..000000000000
--- a/arch/x86/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-asmlinkage void just_return_func(void);
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 9da8cccdf3fb..28183ee3cc42 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -42,8 +42,7 @@
* Because of this, FIXADDR_TOP x86 integration was left as later work.
*/
#ifdef CONFIG_X86_32
-/* used by vmalloc.c, vsyscall.lds.S.
- *
+/*
* Leave one empty page between vmalloc'ed areas and
* the start of the fixmap.
*/
@@ -120,7 +119,7 @@ enum fixed_addresses {
* before ioremap() is functional.
*
* If necessary we round it up to the next 512 pages boundary so
- * that we can have a single pgd entry and a single pte table:
+ * that we can have a single pmd entry and a single pte table:
*/
#define NR_FIX_BTMAPS 64
#define FIX_BTMAPS_SLOTS 8
@@ -157,7 +156,7 @@ extern pte_t *kmap_pte;
extern pte_t *pkmap_page_table;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
-void native_set_fixmap(enum fixed_addresses idx,
+void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags);
#ifndef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 4c95c365058a..44c48e34d799 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -509,7 +509,7 @@ static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
- return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
+ return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}
/*
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 287f1f7b2e52..85be2f506272 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -16,7 +16,6 @@
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
#ifndef __ASSEMBLY__
-extern void mcount(void);
extern atomic_t modifying_ftrace_code;
extern void __fentry__(void);
@@ -29,14 +28,25 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
+/*
+ * When a ftrace registered caller is tracing a function that is
+ * also set by a register_ftrace_direct() call, it needs to be
+ * differentiated in the ftrace_caller trampoline. To do this, we
+ * place the direct caller in the ORIG_AX part of pt_regs. This
+ * tells the ftrace_caller that there's a direct caller.
+ */
+static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+ /* Emulate a call */
+ regs->orig_ax = addr;
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE
struct dyn_arch_ftrace {
/* No extra data needed for x86 */
};
-int ftrace_int3_handler(struct pt_regs *regs);
-
#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
#endif /* CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index cbd97e22d2f3..4154bc5f6a4e 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -153,8 +153,8 @@ extern char irq_entries_start[];
extern char spurious_entries_start[];
#define VECTOR_UNUSED NULL
-#define VECTOR_SHUTDOWN ((void *)~0UL)
-#define VECTOR_RETRIGGERED ((void *)~1UL)
+#define VECTOR_SHUTDOWN ((void *)-1L)
+#define VECTOR_RETRIGGERED ((void *)-2L)
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index af78cd72b8f3..92abc1e42bfc 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -13,6 +13,16 @@
#include <asm/page.h>
/*
+ * While not explicitly listed in the TLFS, Hyper-V always runs with a page size
+ * of 4096. These definitions are used when communicating with Hyper-V using
+ * guest physical pages and guest physical page addresses, since the guest page
+ * size may not be 4096 on all architectures.
+ */
+#define HV_HYP_PAGE_SHIFT 12
+#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT)
+#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1))
+
+/*
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
* is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
*/
@@ -76,6 +86,8 @@
#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
/* AccessReenlightenmentControls privilege */
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
+/* AccessTscInvariantControls privilege */
+#define HV_X64_ACCESS_TSC_INVARIANT BIT(15)
/*
* Feature identification: indicates which flags were specified at partition
@@ -170,7 +182,15 @@
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
+/*
+ * Virtual processor will never share a physical core with another virtual
+ * processor, except for virtual processors that are reported as sibling SMT
+ * threads.
+ */
+#define HV_X64_NO_NONARCH_CORESHARING BIT(18)
+
/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
+#define HV_X64_NESTED_DIRECT_FLUSH BIT(17)
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
@@ -260,6 +280,9 @@
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+/* TSC invariant control */
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
/*
* Declare the MSR used to setup pages used to communicate with the hypervisor.
*/
@@ -514,14 +537,24 @@ struct hv_timer_message_payload {
__u64 delivery_time; /* When the message was delivered */
} __packed;
+struct hv_nested_enlightenments_control {
+ struct {
+ __u32 directhypercall:1;
+ __u32 reserved:31;
+ } features;
+ struct {
+ __u32 reserved;
+ } hypercallControls;
+} __packed;
+
/* Define virtual processor assist page structure. */
struct hv_vp_assist_page {
__u32 apic_assist;
- __u32 reserved;
- __u64 vtl_control[2];
- __u64 nested_enlightenments_control[2];
- __u32 enlighten_vmentry;
- __u32 padding;
+ __u32 reserved1;
+ __u64 vtl_control[3];
+ struct hv_nested_enlightenments_control nested_control;
+ __u8 enlighten_vmentry;
+ __u8 reserved2[7];
__u64 current_nested_vmcs;
} __packed;
@@ -776,7 +809,8 @@ union hv_synic_sint {
u64 reserved1:8;
u64 masked:1;
u64 auto_eoi:1;
- u64 reserved2:46;
+ u64 polling:1;
+ u64 reserved2:45;
} __packed;
};
@@ -847,7 +881,7 @@ union hv_gpa_page_range {
* count is equal with how many entries of union hv_gpa_page_range can
* be populated into the input parameter page.
*/
-#define HV_MAX_FLUSH_REP_COUNT ((PAGE_SIZE - 2 * sizeof(u64)) / \
+#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \
sizeof(union hv_gpa_page_range))
struct hv_guest_mapping_flush_list {
@@ -872,4 +906,7 @@ struct hv_tlb_flush_ex {
u64 gva_list[];
} __packed;
+struct hv_partition_assist_pg {
+ u32 tlb_lock_count;
+};
#endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 154f27be8bfc..5c1ae3eff9d4 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -45,6 +45,7 @@ struct insn {
struct insn_field immediate2; /* for 64bit imm or seg16 */
};
+ int emulate_prefix_size;
insn_attr_t attr;
unsigned char opnd_bytes;
unsigned char addr_bytes;
@@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
+static inline int insn_has_emulate_prefix(struct insn *insn)
+{
+ return !!insn->emulate_prefix_size;
+}
+
/* Ensure this instruction is decoded completely */
static inline int insn_complete(struct insn *insn)
{
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 0278aa66ef62..4981c293f926 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -5,12 +5,34 @@
/*
* "Big Core" Processors (Branded as Core, Xeon, etc...)
*
- * The "_X" parts are generally the EP and EX Xeons, or the
- * "Extreme" ones, like Broadwell-E, or Atom microserver.
- *
* While adding a new CPUID for a new microarchitecture, add a new
* group to keep logically sorted out in chronological order. Within
* that group keep the CPUID for the variants sorted by model number.
+ *
+ * The defined symbol names have the following form:
+ * INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF}
+ * where:
+ * OPTFAMILY Describes the family of CPUs that this belongs to. Default
+ * is assumed to be "_CORE" (and should be omitted). Other values
+ * currently in use are _ATOM and _XEON_PHI
+ * MICROARCH Is the code name for the micro-architecture for this core.
+ * N.B. Not the platform name.
+ * OPTDIFF If needed, a short string to differentiate by market segment.
+ *
+ * Common OPTDIFFs:
+ *
+ * - regular client parts
+ * _L - regular mobile parts
+ * _G - parts with extra graphics on
+ * _X - regular server parts
+ * _D - micro server parts
+ *
+ * Historical OPTDIFFs:
+ *
+ * _EP - 2 socket server parts
+ * _EX - 4+ socket server parts
+ *
+ * The #define line may optionally include a comment including platform names.
*/
#define INTEL_FAM6_CORE_YONAH 0x0E
@@ -34,30 +56,36 @@
#define INTEL_FAM6_IVYBRIDGE 0x3A
#define INTEL_FAM6_IVYBRIDGE_X 0x3E
-#define INTEL_FAM6_HASWELL_CORE 0x3C
+#define INTEL_FAM6_HASWELL 0x3C
#define INTEL_FAM6_HASWELL_X 0x3F
-#define INTEL_FAM6_HASWELL_ULT 0x45
-#define INTEL_FAM6_HASWELL_GT3E 0x46
+#define INTEL_FAM6_HASWELL_L 0x45
+#define INTEL_FAM6_HASWELL_G 0x46
-#define INTEL_FAM6_BROADWELL_CORE 0x3D
-#define INTEL_FAM6_BROADWELL_GT3E 0x47
+#define INTEL_FAM6_BROADWELL 0x3D
+#define INTEL_FAM6_BROADWELL_G 0x47
#define INTEL_FAM6_BROADWELL_X 0x4F
-#define INTEL_FAM6_BROADWELL_XEON_D 0x56
+#define INTEL_FAM6_BROADWELL_D 0x56
-#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
-#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
+#define INTEL_FAM6_SKYLAKE_L 0x4E
+#define INTEL_FAM6_SKYLAKE 0x5E
#define INTEL_FAM6_SKYLAKE_X 0x55
-#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E
-#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E
+#define INTEL_FAM6_KABYLAKE_L 0x8E
+#define INTEL_FAM6_KABYLAKE 0x9E
-#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66
+#define INTEL_FAM6_CANNONLAKE_L 0x66
#define INTEL_FAM6_ICELAKE_X 0x6A
-#define INTEL_FAM6_ICELAKE_XEON_D 0x6C
-#define INTEL_FAM6_ICELAKE_DESKTOP 0x7D
-#define INTEL_FAM6_ICELAKE_MOBILE 0x7E
+#define INTEL_FAM6_ICELAKE_D 0x6C
+#define INTEL_FAM6_ICELAKE 0x7D
+#define INTEL_FAM6_ICELAKE_L 0x7E
#define INTEL_FAM6_ICELAKE_NNPI 0x9D
+#define INTEL_FAM6_TIGERLAKE_L 0x8C
+#define INTEL_FAM6_TIGERLAKE 0x8D
+
+#define INTEL_FAM6_COMETLAKE 0xA5
+#define INTEL_FAM6_COMETLAKE_L 0xA6
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
@@ -68,17 +96,22 @@
#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */
+#define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */
#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
+#define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
-#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
+#define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */
+
+/* Note: the micro-architecture is "Goldmont Plus" */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
-#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */
+#define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */
+#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
+#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
/* Xeon Phi */
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
index 9e7adcdbe031..e6da1ce26256 100644
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ b/arch/x86/include/asm/intel_pmc_ipc.h
@@ -31,30 +31,13 @@
#if IS_ENABLED(CONFIG_INTEL_PMC_IPC)
-int intel_pmc_ipc_simple_command(int cmd, int sub);
-int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
- u32 *out, u32 outlen, u32 dptr, u32 sptr);
int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
u32 *out, u32 outlen);
int intel_pmc_s0ix_counter_read(u64 *data);
-int intel_pmc_gcr_read(u32 offset, u32 *data);
int intel_pmc_gcr_read64(u32 offset, u64 *data);
-int intel_pmc_gcr_write(u32 offset, u32 data);
-int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val);
#else
-static inline int intel_pmc_ipc_simple_command(int cmd, int sub)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
- u32 *out, u32 outlen, u32 dptr, u32 sptr)
-{
- return -EINVAL;
-}
-
static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
u32 *out, u32 outlen)
{
@@ -66,26 +49,11 @@ static inline int intel_pmc_s0ix_counter_read(u64 *data)
return -EINVAL;
}
-static inline int intel_pmc_gcr_read(u32 offset, u32 *data)
-{
- return -EINVAL;
-}
-
static inline int intel_pmc_gcr_read64(u32 offset, u64 *data)
{
return -EINVAL;
}
-static inline int intel_pmc_gcr_write(u32 offset, u32 data)
-{
- return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val)
-{
- return -EINVAL;
-}
-
#endif /*CONFIG_INTEL_PMC_IPC*/
#endif
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 634f99b1dc22..423b788f495e 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -28,10 +28,12 @@ enum pt_capabilities {
void cpu_emergency_stop_pt(void);
extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap);
extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap);
+extern int is_intel_pt_event(struct perf_event *event);
#else
static inline void cpu_emergency_stop_pt(void) {}
static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; }
static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; }
+static inline int is_intel_pt_event(struct perf_event *event) { return 0; }
#endif
#endif /* _ASM_X86_INTEL_PT_H */
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
index 4a8c6e817398..2a1442ba6e78 100644
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ b/arch/x86/include/asm/intel_scu_ipc.h
@@ -22,24 +22,12 @@
/* Read single register */
int intel_scu_ipc_ioread8(u16 addr, u8 *data);
-/* Read two sequential registers */
-int intel_scu_ipc_ioread16(u16 addr, u16 *data);
-
-/* Read four sequential registers */
-int intel_scu_ipc_ioread32(u16 addr, u32 *data);
-
/* Read a vector */
int intel_scu_ipc_readv(u16 *addr, u8 *data, int len);
/* Write single register */
int intel_scu_ipc_iowrite8(u16 addr, u8 data);
-/* Write two sequential registers */
-int intel_scu_ipc_iowrite16(u16 addr, u16 data);
-
-/* Write four sequential registers */
-int intel_scu_ipc_iowrite32(u16 addr, u32 data);
-
/* Write a vector */
int intel_scu_ipc_writev(u16 *addr, u8 *data, int len);
@@ -50,14 +38,6 @@ int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask);
int intel_scu_ipc_simple_command(int cmd, int sub);
int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
u32 *out, int outlen);
-int intel_scu_ipc_raw_command(int cmd, int sub, u8 *in, int inlen,
- u32 *out, int outlen, u32 dptr, u32 sptr);
-
-/* I2C control api */
-int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data);
-
-/* Update FW version */
-int intel_scu_ipc_fw_update(u8 *buffer, u32 length);
extern struct blocking_notifier_head intel_scu_notifier;
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 214394860632..2f77e31a1283 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -40,13 +40,10 @@ struct telemetry_evtmap {
struct telemetry_unit_config {
struct telemetry_evtmap *telem_evts;
void __iomem *regmap;
- u32 ssram_base_addr;
u8 ssram_evts_used;
u8 curr_period;
u8 max_period;
u8 min_period;
- u32 ssram_size;
-
};
struct telemetry_plt_config {
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 6bed97ff6db2..e1aa17a468a8 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -180,8 +180,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
* The default ioremap() behavior is non-cached; if you need something
* else, you probably want one of the following.
*/
-extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
-#define ioremap_nocache ioremap_nocache
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
@@ -205,10 +203,7 @@ extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long
* If the area you are trying to map is a PCI BAR you should have a
* look at pci_iomap().
*/
-static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
-{
- return ioremap_nocache(offset, size);
-}
+void __iomem *ioremap(resource_size_t offset, unsigned long size);
#define ioremap ioremap
extern void iounmap(volatile void __iomem *addr);
@@ -404,4 +399,40 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset,
extern bool phys_mem_access_encrypted(unsigned long phys_addr,
unsigned long size);
+/**
+ * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units
+ * @__dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: source
+ * @count: number of 512 bits quantities to submit
+ *
+ * Submit data from kernel space to MMIO space, in units of 512 bits at a
+ * time. Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the CPU
+ * instruction is supported on the platform.
+ */
+static inline void iosubmit_cmds512(void __iomem *__dst, const void *src,
+ size_t count)
+{
+ /*
+ * Note that this isn't an "on-stack copy", just definition of "dst"
+ * as a pointer to 64-bytes of stuff that is going to be overwritten.
+ * In the MOVDIR64B case that may be needed as you can use the
+ * MOVDIR64B instruction to copy arbitrary memory around. This trick
+ * lets the compiler know how much gets clobbered.
+ */
+ volatile struct { char _[64]; } *dst = __dst;
+ const u8 *from = src;
+ const u8 *end = from + count * 64;
+
+ while (from < end) {
+ /* MOVDIR64B [rdx], rax */
+ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+ : "=m" (dst)
+ : "d" (from), "a" (dst));
+ from += 64;
+ }
+}
+
#endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
new file mode 100644
index 000000000000..02c6ef8f7667
--- /dev/null
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IOBITMAP_H
+#define _ASM_X86_IOBITMAP_H
+
+#include <linux/refcount.h>
+#include <asm/processor.h>
+
+struct io_bitmap {
+ u64 sequence;
+ refcount_t refcnt;
+ /* The maximum number of bytes to copy so all zero bits are covered */
+ unsigned int max;
+ unsigned long bitmap[IO_BITMAP_LONGS];
+};
+
+struct task_struct;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+void io_bitmap_share(struct task_struct *tsk);
+void io_bitmap_exit(void);
+
+void tss_update_io_bitmap(void);
+#else
+static inline void io_bitmap_share(struct task_struct *tsk) { }
+static inline void io_bitmap_exit(void) { }
+static inline void tss_update_io_bitmap(void) { }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index baedab8ac538..bf1ed2ddc74b 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,11 +2,28 @@
#ifndef _ASM_X86_IOMMU_H
#define _ASM_X86_IOMMU_H
+#include <linux/acpi.h>
+
+#include <asm/e820/api.h>
+
extern int force_iommu, no_iommu;
extern int iommu_detected;
-extern int iommu_pass_through;
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
+static inline int __init
+arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
+{
+ u64 start = rmrr->base_address;
+ u64 end = rmrr->end_address + 1;
+
+ if (e820__mapped_all(start, end, E820_TYPE_RESERVED))
+ return 0;
+
+ pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n",
+ start, end - 1);
+ return -EINVAL;
+}
+
#endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
deleted file mode 100644
index f73076be546a..000000000000
--- a/arch/x86/include/asm/ipi.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _ASM_X86_IPI_H
-#define _ASM_X86_IPI_H
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/*
- * Copyright 2004 James Cleverdon, IBM.
- *
- * Generic APIC InterProcessor Interrupt code.
- *
- * Moved to include file by James Cleverdon from
- * arch/x86-64/kernel/smp.c
- *
- * Copyrights from kernel/smp.c:
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- * (c) 2002,2003 Andi Kleen, SuSE Labs.
- */
-
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/smp.h>
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
- unsigned int dest)
-{
- unsigned int icr = shortcut | dest;
-
- switch (vector) {
- default:
- icr |= APIC_DM_FIXED | vector;
- break;
- case NMI_VECTOR:
- icr |= APIC_DM_NMI;
- break;
- }
- return icr;
-}
-
-static inline int __prepare_ICR2(unsigned int mask)
-{
- return SET_APIC_DEST_FIELD(mask);
-}
-
-static inline void __xapic_wait_icr_idle(void)
-{
- while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
- cpu_relax();
-}
-
-void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
-
-/*
- * This is used to send an IPI with no shorthand notation (the destination is
- * specified in bits 56 to 63 of the ICR).
- */
-void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
-
-extern void default_send_IPI_single(int cpu, int vector);
-extern void default_send_IPI_single_phys(int cpu, int vector);
-extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
- int vector);
-
-/* Avoid include hell */
-#define NMI_VECTOR 0x02
-
-extern int no_broadcast;
-
-static inline void __default_local_send_IPI_allbutself(int vector)
-{
- if (no_broadcast || vector == NMI_VECTOR)
- apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
- else
- __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical);
-}
-
-static inline void __default_local_send_IPI_all(int vector)
-{
- if (no_broadcast || vector == NMI_VECTOR)
- apic->send_IPI_mask(cpu_online_mask, vector);
- else
- __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical);
-}
-
-#ifdef CONFIG_X86_32
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_allbutself(int vector);
-extern void default_send_IPI_all(int vector);
-extern void default_send_IPI_self(int vector);
-#endif
-
-#endif
-
-#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 8f95686ec27e..a176f6165d85 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -34,7 +34,7 @@ extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs);
extern void (*x86_platform_ipi_callback)(void);
extern void native_init_IRQ(void);
-extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
+extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs);
extern __visible unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 75f1e35e7c15..247ab14c6309 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -33,6 +33,7 @@ enum show_regs_mode {
};
extern void die(const char *, struct pt_regs *,long);
+void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5e7d6b46de97..6802c59e8252 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -66,10 +66,6 @@ struct kimage;
# define KEXEC_ARCH KEXEC_ARCH_X86_64
#endif
-/* Memory to backup during crash kdump */
-#define KEXEC_BACKUP_SRC_START (0UL)
-#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
-
/*
* This function is responsible for capturing register states if coming
* via panic otherwise just fix up the ss and sp if coming via kernel
@@ -154,12 +150,6 @@ struct kimage_arch {
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- /* Details of backup region */
- unsigned long backup_src_start;
- unsigned long backup_src_sz;
-
- /* Physical address of backup segment */
- unsigned long backup_load_addr;
/* Core ELF header buffer */
void *elf_headers;
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 5dc909d9ad81..95b1f053bd96 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -11,12 +11,11 @@
#include <asm-generic/kprobes.h>
-#define BREAKPOINT_INSTRUCTION 0xcc
-
#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
+#include <asm/text-patching.h>
#include <asm/insn.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
@@ -25,10 +24,7 @@ struct pt_regs;
struct kprobe;
typedef u8 kprobe_opcode_t;
-#define RELATIVEJUMP_OPCODE 0xe9
-#define RELATIVEJUMP_SIZE 5
-#define RELATIVECALL_OPCODE 0xe8
-#define RELATIVE_ADDR_SIZE 4
+
#define MAX_STACK_SIZE 64
#define CUR_STACK_SIZE(ADDR) \
(current_top_of_stack() - (unsigned long)(ADDR))
@@ -43,11 +39,11 @@ extern __visible kprobe_opcode_t optprobe_template_entry[];
extern __visible kprobe_opcode_t optprobe_template_val[];
extern __visible kprobe_opcode_t optprobe_template_call[];
extern __visible kprobe_opcode_t optprobe_template_end[];
-#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
+#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE)
#define MAX_OPTINSN_SIZE \
(((unsigned long)optprobe_template_end - \
(unsigned long)optprobe_template_entry) + \
- MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
+ MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE)
extern const int kretprobe_blacklist_size;
@@ -73,7 +69,7 @@ struct arch_specific_insn {
struct arch_optimized_insn {
/* copy of the original instructions */
- kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
+ kprobe_opcode_t copied_insn[DISP32_SIZE];
/* detour code buffer */
kprobe_opcode_t *insn;
/* the size of instructions copied to detour code buffer */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index feab24cac610..03946eb3e2b9 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -222,6 +222,10 @@ struct x86_emulate_ops {
bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool check_limit);
+ bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
+ bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
+ bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
+
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
@@ -229,7 +233,7 @@ struct x86_emulate_ops {
int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
const char *smstate);
void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
-
+ int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
};
typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -429,6 +433,7 @@ enum x86_intercept {
x86_intercept_ins,
x86_intercept_out,
x86_intercept_outs,
+ x86_intercept_xsetbv,
nr_x86_intercepts
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74e88e5edd9c..40a0c0fd95ca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -78,6 +78,8 @@
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
+#define KVM_REQ_APICV_UPDATE \
+ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -156,10 +158,8 @@ enum kvm_reg {
VCPU_REGS_R15 = __VCPU_REGS_R15,
#endif
VCPU_REGS_RIP,
- NR_VCPU_REGS
-};
+ NR_VCPU_REGS,
-enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
VCPU_EXREG_CR3,
VCPU_EXREG_RFLAGS,
@@ -177,6 +177,11 @@ enum {
VCPU_SREG_LDTR,
};
+enum exit_fastpath_completion {
+ EXIT_FASTPATH_NONE,
+ EXIT_FASTPATH_SKIP_EMUL_INS,
+};
+
#include <asm/kvm_emulate.h>
#define KVM_NR_MEM_OBJS 40
@@ -219,13 +224,6 @@ enum {
PFERR_WRITE_MASK | \
PFERR_PRESENT_MASK)
-/*
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
- * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
- * with the SVE bit in EPT PTEs.
- */
-#define SPTE_SPECIAL_MASK (1ULL << 62)
-
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
/*
@@ -319,8 +317,12 @@ struct kvm_rmap_head {
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
+ struct list_head lpage_disallowed_link;
+
bool unsync;
+ u8 mmu_valid_gen;
bool mmio_cached;
+ bool lpage_disallowed; /* Can't be replaced by an equiv large page */
/*
* The following two entries are used to key the shadow page in the
@@ -383,12 +385,12 @@ struct kvm_mmu {
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
- int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
+ int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
bool prefault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
- struct x86_exception *exception);
+ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa,
+ u32 access, struct x86_exception *exception);
gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
@@ -457,6 +459,11 @@ struct kvm_pmc {
u64 eventsel;
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
+ /*
+ * eventsel value for general purpose counters,
+ * ctrl value for fixed counters.
+ */
+ u64 current_config;
};
struct kvm_pmu {
@@ -475,7 +482,21 @@ struct kvm_pmu {
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
struct irq_work irq_work;
- u64 reprogram_pmi;
+ DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+
+ /*
+ * The gate to release perf_events not marked in
+ * pmc_in_use only once in a vcpu time slice.
+ */
+ bool need_cleanup;
+
+ /*
+ * The total number of programmed perf_events and it helps to avoid
+ * redundant check before cleanup if guest don't use vPMU at all.
+ */
+ u8 event_count;
};
struct kvm_pmu_ops;
@@ -568,6 +589,7 @@ struct kvm_vcpu_arch {
u64 smbase;
u64 smi_count;
bool tpr_access_reporting;
+ bool xsaves_enabled;
u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
@@ -591,7 +613,7 @@ struct kvm_vcpu_arch {
* Paging state of an L2 guest (used for nested npt)
*
* This context will save all necessary information to walk page tables
- * of the an L2 guest. This context is only initialized for page table
+ * of an L2 guest. This context is only initialized for page table
* walking and not for faulting since we never handle l2 page faults on
* the host.
*/
@@ -670,10 +692,10 @@ struct kvm_vcpu_arch {
bool pvclock_set_guest_stopped_request;
struct {
+ u8 preempted;
u64 msr_val;
u64 last_steal;
- struct gfn_to_hva_cache stime;
- struct kvm_steal_time steal;
+ struct gfn_to_pfn_cache cache;
} st;
u64 tsc_offset;
@@ -717,7 +739,7 @@ struct kvm_vcpu_arch {
/* Cache MMIO info */
u64 mmio_gva;
- unsigned access;
+ unsigned mmio_access;
gfn_t mmio_gfn;
u64 mmio_gen;
@@ -759,9 +781,19 @@ struct kvm_vcpu_arch {
u64 msr_kvm_poll_control;
/*
- * Indicate whether the access faults on its page table in guest
- * which is set when fix page fault and used to detect unhandeable
- * instruction.
+ * Indicates the guest is trying to write a gfn that contains one or
+ * more of the PTEs used to translate the write itself, i.e. the access
+ * is changing its own translation in the guest page tables. KVM exits
+ * to userspace if emulation of the faulting instruction fails and this
+ * flag is set, as KVM cannot make forward progress.
+ *
+ * If emulation fails for a write to guest page tables, KVM unprotects
+ * (zaps) the shadow page for the target gfn and resumes the guest to
+ * retry the non-emulatable instruction (on hardware). Unprotecting the
+ * gfn doesn't allow forward progress for a self-changing access because
+ * doing so also zaps the translation for the gfn, i.e. retrying the
+ * instruction will hit a !PRESENT fault, which results in a new shadow
+ * page and sends KVM back to square one.
*/
bool write_fault_to_shadow_pgtable;
@@ -843,6 +875,8 @@ struct kvm_hv {
/* How many vCPUs have VP index != vCPU index */
atomic_t num_mismatched_vp_indexes;
+
+ struct hv_partition_assist_pg *hv_pa_pg;
};
enum kvm_irqchip_mode {
@@ -851,16 +885,25 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
+#define APICV_INHIBIT_REASON_DISABLE 0
+#define APICV_INHIBIT_REASON_HYPERV 1
+#define APICV_INHIBIT_REASON_NESTED 2
+#define APICV_INHIBIT_REASON_IRQWIN 3
+#define APICV_INHIBIT_REASON_PIT_REINJ 4
+
struct kvm_arch {
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
+ u8 mmu_valid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
*/
struct list_head active_mmu_pages;
+ struct list_head zapped_obsolete_pages;
+ struct list_head lpage_disallowed_mmu_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
@@ -879,6 +922,7 @@ struct kvm_arch {
struct kvm_apic_map *apic_map;
bool apic_access_page_done;
+ unsigned long apicv_inhibit_reasons;
gpa_t wall_clock;
@@ -935,6 +979,7 @@ struct kvm_arch {
bool exception_payload_enabled;
struct kvm_pmu_event_filter *pmu_event_filter;
+ struct task_struct *nx_lpage_recovery_thread;
};
struct kvm_vm_stat {
@@ -948,6 +993,7 @@ struct kvm_vm_stat {
ulong mmu_unsync;
ulong remote_tlb_flush;
ulong lpages;
+ ulong nx_lpage_splits;
ulong max_mmu_page_hash_collisions;
};
@@ -1000,6 +1046,11 @@ struct kvm_lapic_irq {
bool msi_redir_hint;
};
+static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
+{
+ return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
+}
+
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
@@ -1018,7 +1069,7 @@ struct kvm_x86_ops {
void (*vm_destroy)(struct kvm *kvm);
/* Create, but do not attach this VCPU */
- struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
+ int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
@@ -1037,7 +1088,6 @@ struct kvm_x86_ops {
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
- void (*decache_cr3)(struct kvm_vcpu *vcpu);
void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -1069,8 +1119,9 @@ struct kvm_x86_ops {
void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
void (*run)(struct kvm_vcpu *vcpu);
- int (*handle_exit)(struct kvm_vcpu *vcpu);
- void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ int (*handle_exit)(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion exit_fastpath);
+ int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
@@ -1086,7 +1137,8 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
+ bool (*check_apicv_inhibit_reasons)(ulong bit);
+ void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate);
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
@@ -1119,11 +1171,13 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
- void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
+ void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion *exit_fastpath);
bool (*mpx_supported)(void);
bool (*xsaves_supported)(void);
bool (*umip_emulated)(void);
bool (*pt_supported)(void);
+ bool (*pku_supported)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
@@ -1191,7 +1245,7 @@ struct kvm_x86_ops {
int (*set_nested_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state);
- void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+ bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
@@ -1209,6 +1263,9 @@ struct kvm_x86_ops {
uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
+
+ bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
+ int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
@@ -1308,26 +1365,53 @@ extern u64 kvm_default_tsc_scaling_ratio;
extern u64 kvm_mce_cap_supported;
-enum emulation_result {
- EMULATE_DONE, /* no further processing */
- EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
- EMULATE_FAIL, /* can't emulate this instruction */
-};
-
+/*
+ * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
+ * userspace I/O) to indicate that the emulation context
+ * should be resued as is, i.e. skip initialization of
+ * emulation context, instruction fetch and decode.
+ *
+ * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
+ * Indicates that only select instructions (tagged with
+ * EmulateOnUD) should be emulated (to minimize the emulator
+ * attack surface). See also EMULTYPE_TRAP_UD_FORCED.
+ *
+ * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
+ * decode the instruction length. For use *only* by
+ * kvm_x86_ops->skip_emulated_instruction() implementations.
+ *
+ * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to
+ * retry native execution under certain conditions.
+ *
+ * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
+ * triggered by KVM's magic "force emulation" prefix,
+ * which is opt in via module param (off by default).
+ * Bypasses EmulateOnUD restriction despite emulating
+ * due to an intercepted #UD (see EMULTYPE_TRAP_UD).
+ * Used to test the full emulator from userspace.
+ *
+ * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
+ * backdoor emulation, which is opt in via module param.
+ * VMware backoor emulation handles select instructions
+ * and reinjects the #GP for all other cases.
+ */
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
#define EMULTYPE_ALLOW_RETRY (1 << 3)
-#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
-#define EMULTYPE_VMWARE (1 << 5)
+#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
+#define EMULTYPE_VMWARE_GP (1 << 5)
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
-int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
+int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
struct x86_emulate_ctxt;
@@ -1413,11 +1497,15 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
-void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
+bool kvm_apicv_activated(struct kvm *kvm);
+void kvm_apicv_init(struct kvm *kvm, bool enable);
+void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
+void kvm_request_apicv_update(struct kvm *kvm, bool activate,
+ unsigned long bit);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
@@ -1500,7 +1588,7 @@ enum {
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
-asmlinkage void __noreturn kvm_spurious_fault(void);
+asmlinkage void kvm_spurious_fault(void);
/*
* Hardware virtualization extension instructions may fault if a
@@ -1508,24 +1596,14 @@ asmlinkage void __noreturn kvm_spurious_fault(void);
* Usually after catching the fault we just panic; during reboot
* instead the instruction is ignored.
*/
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
+#define __kvm_handle_fault_on_reboot(insn) \
"666: \n\t" \
insn "\n\t" \
"jmp 668f \n\t" \
"667: \n\t" \
"call kvm_spurious_fault \n\t" \
"668: \n\t" \
- ".pushsection .fixup, \"ax\" \n\t" \
- "700: \n\t" \
- cleanup_insn "\n\t" \
- "cmpb $0, kvm_rebooting\n\t" \
- "je 667b \n\t" \
- "jmp 668b \n\t" \
- ".popsection \n\t" \
- _ASM_EXTABLE(666b, 700b)
-
-#define __kvm_handle_fault_on_reboot(insn) \
- ____kvm_handle_fault_on_reboot(insn, "")
+ _ASM_EXTABLE(666b, 667b)
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
@@ -1554,6 +1632,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+ unsigned long *vcpu_bitmap);
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
@@ -1571,7 +1651,6 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
int kvm_is_in_guest(void);
int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
-int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
@@ -1581,6 +1660,13 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq);
+static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
+{
+ /* We can only post Fixed and LowPrio IRQs */
+ return (irq->delivery_mode == dest_Fixed ||
+ irq->delivery_mode == dest_LowestPrio);
+}
+
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
if (kvm_x86_ops->vcpu_blocking)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 14caa9d9fb7f..365111789cc6 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -13,10 +13,6 @@
#ifdef __ASSEMBLY__
-#define GLOBAL(name) \
- .globl name; \
- name:
-
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
#define __ALIGN .p2align 4, 0x90
#define __ALIGN_STR __stringify(__ALIGN)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index dc2d4b206ab7..4359b955e0b7 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -144,7 +144,7 @@ struct mce_log_buffer {
enum mce_notifier_prios {
MCE_PRIO_FIRST = INT_MAX,
- MCE_PRIO_SRAO = INT_MAX - 1,
+ MCE_PRIO_UC = INT_MAX - 1,
MCE_PRIO_EXTLOG = INT_MAX - 2,
MCE_PRIO_NFIT = INT_MAX - 3,
MCE_PRIO_EDAC = INT_MAX - 4,
@@ -290,6 +290,7 @@ extern void apei_mce_report_mem_error(int corrected,
/* These may be used by multiple smca_hwid_mcatypes */
enum smca_bank_types {
SMCA_LS = 0, /* Load Store */
+ SMCA_LS_V2, /* Load Store */
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 0c196c47d621..848ce43b9040 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -92,6 +92,16 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
+static inline bool mem_encrypt_active(void)
+{
+ return sme_me_mask;
+}
+
+static inline u64 sme_get_me_mask(void)
+{
+ return sme_me_mask;
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h
new file mode 100644
index 000000000000..9c2447b3555d
--- /dev/null
+++ b/arch/x86/include/asm/memtype.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_MEMTYPE_H
+#define _ASM_X86_MEMTYPE_H
+
+#include <linux/types.h>
+#include <asm/pgtable_types.h>
+
+extern bool pat_enabled(void);
+extern void pat_disable(const char *reason);
+extern void pat_init(void);
+extern void init_cache_modes(void);
+
+extern int memtype_reserve(u64 start, u64 end,
+ enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+extern int memtype_free(u64 start, u64 end);
+
+extern int memtype_kernel_map_sync(u64 base, unsigned long size,
+ enum page_cache_mode pcm);
+
+extern int memtype_reserve_io(resource_size_t start, resource_size_t end,
+ enum page_cache_mode *pcm);
+
+extern void memtype_free_io(resource_size_t start, resource_size_t end);
+
+extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
+#endif /* _ASM_X86_MEMTYPE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 209492849566..6685e1218959 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -53,6 +53,6 @@ static inline void __init load_ucode_amd_bsp(unsigned int family) {}
static inline void load_ucode_amd_ap(unsigned int family) {}
static inline int __init
save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
-void reload_ucode_amd(void) {}
+static inline void reload_ucode_amd(void) {}
#endif
#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index e78c7db87801..bdeae9291e5c 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -50,10 +50,6 @@ typedef struct {
u16 pkey_allocation_map;
s16 execute_only_pkey;
#endif
-#ifdef CONFIG_X86_INTEL_MPX
- /* address of the bounds directory */
- void __user *bd_addr;
-#endif
} mm_context_t;
#define INIT_MM_CONTEXT(mm) \
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 9024236693d2..b538d9ddee9c 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -12,7 +12,6 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
-#include <asm/mpx.h>
#include <asm/debugreg.h>
extern atomic64_t last_mm_ctx_id;
@@ -26,18 +25,20 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
#ifdef CONFIG_PERF_EVENTS
+DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key);
DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
-static inline void load_mm_cr4(struct mm_struct *mm)
+static inline void load_mm_cr4_irqsoff(struct mm_struct *mm)
{
if (static_branch_unlikely(&rdpmc_always_available_key) ||
- atomic_read(&mm->context.perf_rdpmc_allowed))
- cr4_set_bits(X86_CR4_PCE);
+ (!static_branch_unlikely(&rdpmc_never_available_key) &&
+ atomic_read(&mm->context.perf_rdpmc_allowed)))
+ cr4_set_bits_irqsoff(X86_CR4_PCE);
else
- cr4_clear_bits(X86_CR4_PCE);
+ cr4_clear_bits_irqsoff(X86_CR4_PCE);
}
#else
-static inline void load_mm_cr4(struct mm_struct *mm) {}
+static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {}
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
@@ -67,14 +68,6 @@ struct ldt_struct {
int slot;
};
-/* This is a multiple of PAGE_SIZE. */
-#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
-
-static inline void *ldt_slot_va(int slot)
-{
- return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
-}
-
/*
* Used for LDT copy/destruction.
*/
@@ -97,87 +90,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
-static inline void load_mm_ldt(struct mm_struct *mm)
-{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
-
- /* READ_ONCE synchronizes with smp_store_release */
- ldt = READ_ONCE(mm->context.ldt);
-
- /*
- * Any change to mm->context.ldt is followed by an IPI to all
- * CPUs with the mm active. The LDT will not be freed until
- * after the IPI is handled by all such CPUs. This means that,
- * if the ldt_struct changes before we return, the values we see
- * will be safe, and the new values will be loaded before we run
- * any user code.
- *
- * NB: don't try to convert this to use RCU without extreme care.
- * We would still need IRQs off, because we don't want to change
- * the local LDT after an IPI loaded a newer value than the one
- * that we can see.
- */
-
- if (unlikely(ldt)) {
- if (static_cpu_has(X86_FEATURE_PTI)) {
- if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
- /*
- * Whoops -- either the new LDT isn't mapped
- * (if slot == -1) or is mapped into a bogus
- * slot (if slot > 1).
- */
- clear_LDT();
- return;
- }
-
- /*
- * If page table isolation is enabled, ldt->entries
- * will not be mapped in the userspace pagetables.
- * Tell the CPU to access the LDT through the alias
- * at ldt_slot_va(ldt->slot).
- */
- set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
- } else {
- set_ldt(ldt->entries, ldt->nr_entries);
- }
- } else {
- clear_LDT();
- }
+extern void load_mm_ldt(struct mm_struct *mm);
+extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
#else
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
clear_LDT();
-#endif
}
-
static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- /*
- * Load the LDT if either the old or new mm had an LDT.
- *
- * An mm will never go from having an LDT to not having an LDT. Two
- * mms never share an LDT, so we don't gain anything by checking to
- * see whether the LDT changed. There's also no guarantee that
- * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
- * then prev->context.ldt will also be non-NULL.
- *
- * If we really cared, we could optimize the case where prev == next
- * and we're exiting lazy mode. Most of the time, if this happens,
- * we don't actually need to reload LDTR, but modify_ldt() is mostly
- * used by legacy code and emulators where we don't need this level of
- * performance.
- *
- * This uses | instead of || because it generates better code.
- */
- if (unlikely((unsigned long)prev->context.ldt |
- (unsigned long)next->context.ldt))
- load_mm_ldt(next);
-#endif
-
DEBUG_LOCKS_WARN_ON(preemptible());
}
+#endif
-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
/*
* Init a new mm. Used on mm copies, like at fork()
@@ -272,34 +199,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
- mpx_mm_init(mm);
-}
-
static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
unsigned long end)
{
- /*
- * mpx_notify_unmap() goes and reads a rarely-hot
- * cacheline in the mm_struct. That can be expensive
- * enough to be seen in profiles.
- *
- * The mpx_notify_unmap() call and its contents have been
- * observed to affect munmap() performance on hardware
- * where MPX is not present.
- *
- * The unlikely() optimizes for the fast case: no MPX
- * in the CPU, or no MPX use in the process. Even if
- * we get this wrong (in the unlikely event that MPX
- * is widely enabled on some system) the overhead of
- * MPX itself (reading bounds tables) is expected to
- * overwhelm the overhead of getting this unlikely()
- * consistently wrong.
- */
- if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
- mpx_notify_unmap(mm, start, end);
}
/*
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 7948a17febb4..c215d2762488 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -15,6 +15,8 @@ struct mod_arch_specific {
#ifdef CONFIG_X86_64
/* X86_64 does not define MODULE_PROC_FAMILY */
+#elif defined CONFIG_M486SX
+#define MODULE_PROC_FAMILY "486SX "
#elif defined CONFIG_M486
#define MODULE_PROC_FAMILY "486 "
#elif defined CONFIG_M586
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
deleted file mode 100644
index 143a5c193ed3..000000000000
--- a/arch/x86/include/asm/mpx.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MPX_H
-#define _ASM_X86_MPX_H
-
-#include <linux/types.h>
-#include <linux/mm_types.h>
-
-#include <asm/ptrace.h>
-#include <asm/insn.h>
-
-/*
- * NULL is theoretically a valid place to put the bounds
- * directory, so point this at an invalid address.
- */
-#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1)
-#define MPX_BNDCFG_ENABLE_FLAG 0x1
-#define MPX_BD_ENTRY_VALID_FLAG 0x1
-
-/*
- * The upper 28 bits [47:20] of the virtual address in 64-bit
- * are used to index into bounds directory (BD).
- *
- * The directory is 2G (2^31) in size, and with 8-byte entries
- * it has 2^28 entries.
- */
-#define MPX_BD_SIZE_BYTES_64 (1UL<<31)
-#define MPX_BD_ENTRY_BYTES_64 8
-#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64)
-
-/*
- * The 32-bit directory is 4MB (2^22) in size, and with 4-byte
- * entries it has 2^20 entries.
- */
-#define MPX_BD_SIZE_BYTES_32 (1UL<<22)
-#define MPX_BD_ENTRY_BYTES_32 4
-#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32)
-
-/*
- * A 64-bit table is 4MB total in size, and an entry is
- * 4 64-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_64 (1UL<<22)
-#define MPX_BT_ENTRY_BYTES_64 32
-#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64)
-
-/*
- * A 32-bit table is 16kB total in size, and an entry is
- * 4 32-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_32 (1UL<<14)
-#define MPX_BT_ENTRY_BYTES_32 16
-#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32)
-
-#define MPX_BNDSTA_TAIL 2
-#define MPX_BNDCFG_TAIL 12
-#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
-#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
-#define MPX_BNDSTA_ERROR_CODE 0x3
-
-struct mpx_fault_info {
- void __user *addr;
- void __user *lower;
- void __user *upper;
-};
-
-#ifdef CONFIG_X86_INTEL_MPX
-
-extern int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs);
-extern int mpx_handle_bd_fault(void);
-
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
- return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
-}
-
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
- /*
- * NULL is theoretically a valid place to put the bounds
- * directory, so point this at an invalid address.
- */
- mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
-}
-
-extern void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, unsigned long end);
-extern unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags);
-
-#else
-static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
-{
- return -EINVAL;
-}
-static inline int mpx_handle_bd_fault(void)
-{
- return -EINVAL;
-}
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
- return 0;
-}
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
-}
-static inline void mpx_notify_unmap(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline unsigned long mpx_unmapped_area_check(unsigned long addr,
- unsigned long len, unsigned long flags)
-{
- return addr;
-}
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 2ef31cc8c529..6b79515abb82 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -218,7 +218,9 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
-
+void *hv_alloc_hyperv_page(void);
+void *hv_alloc_hyperv_zeroed_page(void);
+void hv_free_hyperv_page(unsigned long addr);
void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
@@ -241,6 +243,8 @@ static inline void hv_apic_init(void) {}
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
+static inline void *hv_alloc_hyperv_page(void) { return NULL; }
+static inline void hv_free_hyperv_page(unsigned long addr) {}
static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
static inline void clear_hv_tscchange_cb(void) {}
static inline void hyperv_stop_tsc_emulation(void) {};
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 6b4fc2788078..ebe1685e92dd 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -93,6 +93,18 @@
* Microarchitectural Data
* Sampling (MDS) vulnerabilities.
*/
+#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /*
+ * The processor is not susceptible to a
+ * machine check error due to modifying the
+ * code page size along with either the
+ * physical address or cache type
+ * without TLB invalidation.
+ */
+#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */
+#define ARCH_CAP_TAA_NO BIT(8) /*
+ * Not susceptible to
+ * TSX Async Abort (TAA) vulnerabilities.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -103,6 +115,10 @@
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+#define MSR_IA32_TSX_CTRL 0x00000122
+#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
+#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
+
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
@@ -375,15 +391,26 @@
/* Alternative perfctr range with full access. */
#define MSR_IA32_PMC0 0x000004c1
-/* AMD64 MSRs. Not complete. See the architecture manual for a more
- complete list. */
+/* Auto-reload via MSR instead of DS area */
+#define MSR_RELOAD_PMC0 0x000014c1
+#define MSR_RELOAD_FIXED_CTR0 0x00001309
+/*
+ * AMD64 MSRs. Not complete. See the architecture manual for a more
+ * complete list.
+ */
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
+#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_PATCH_LOADER 0xc0010020
+#define MSR_AMD_PERF_CTL 0xc0010062
+#define MSR_AMD_PERF_STATUS 0xc0010063
+#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD_PPIN_CTL 0xc00102f0
+#define MSR_AMD_PPIN 0xc00102f1
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
@@ -531,7 +558,14 @@
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_EBC_FREQUENCY_ID 0x0000002c
#define MSR_SMI_COUNT 0x00000034
-#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+
+/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */
+#define MSR_IA32_FEAT_CTL 0x0000003a
+#define FEAT_CTL_LOCKED BIT(0)
+#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1)
+#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2)
+#define FEAT_CTL_LMCE_ENABLED BIT(20)
+
#define MSR_IA32_TSC_ADJUST 0x0000003b
#define MSR_IA32_BNDCFGS 0x00000d90
@@ -539,11 +573,6 @@
#define MSR_IA32_XSS 0x00000da0
-#define FEATURE_CONTROL_LOCKED (1<<0)
-#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
-#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
-#define FEATURE_CONTROL_LMCE (1<<20)
-
#define MSR_IA32_APICBASE 0x0000001b
#define MSR_IA32_APICBASE_BSP (1<<8)
#define MSR_IA32_APICBASE_ENABLE (1<<11)
@@ -560,9 +589,6 @@
#define MSR_IA32_PERF_STATUS 0x00000198
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
-#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
-#define MSR_AMD_PERF_STATUS 0xc0010063
-#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 5cc3930cb465..86f20d520a07 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -233,8 +233,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
* Thus, use the preferred barrier on the respective CPU, aiming for
* RDTSCP as the default.
*/
- asm volatile(ALTERNATIVE_3("rdtsc",
- "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC,
+ asm volatile(ALTERNATIVE_2("rdtsc",
"lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
"rdtscp", X86_FEATURE_RDTSCP)
: EAX_EDX_RET(val, low, high)
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index dbff1456d215..829df26fd7a3 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -24,7 +24,7 @@
#define _ASM_X86_MTRR_H
#include <uapi/asm/mtrr.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
/*
@@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
}
static inline void mtrr_bp_init(void)
{
- pat_disable("MTRRs disabled, skipping PAT initialization too.");
+ pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
}
#define mtrr_ap_init() do {} while (0)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index e28f8b723b5c..9d5252c9685c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -21,7 +21,7 @@
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
#define MWAITX_MAX_LOOPS ((u32)-1)
-#define MWAITX_DISABLE_CSTATES 0xf
+#define MWAITX_DISABLE_CSTATES 0xf0
static inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 75ded1d13d98..9d5d949e662e 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -41,7 +41,6 @@ struct nmiaction {
struct list_head list;
nmi_handler_t handler;
u64 max_duration;
- struct irq_work irq_work;
unsigned long flags;
const char *name;
};
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 109f974f9835..07e95dcb40ad 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -37,7 +37,6 @@
*/
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
/*
* Google experimented with loop-unrolling and this turned out to be
@@ -192,7 +191,7 @@
" lfence;\n" \
" jmp 902b;\n" \
" .align 16\n" \
- "903: addl $4, %%esp;\n" \
+ "903: lea 4(%%esp), %%esp;\n" \
" pushl %[thunk_target];\n" \
" ret;\n" \
" .align 16\n" \
@@ -314,7 +313,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
#include <asm/segment.h>
/**
- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
@@ -337,7 +336,7 @@ static inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
*/
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index dce26f1d13e1..86e7317eb31f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -139,18 +139,6 @@ static inline void __write_cr4(unsigned long x)
PVOP_VCALL1(cpu.write_cr4, x);
}
-#ifdef CONFIG_X86_64
-static inline unsigned long read_cr8(void)
-{
- return PVOP_CALL0(unsigned long, cpu.read_cr8);
-}
-
-static inline void write_cr8(unsigned long x)
-{
- PVOP_VCALL1(cpu.write_cr8, x);
-}
-#endif
-
static inline void arch_safe_halt(void)
{
PVOP_VCALL0(irq.safe_halt);
@@ -306,10 +294,6 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
{
PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
}
-static inline void set_iopl_mask(unsigned mask)
-{
- PVOP_VCALL1(cpu.set_iopl_mask, mask);
-}
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 639b2df445ee..84812964d3dd 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -119,11 +119,6 @@ struct pv_cpu_ops {
void (*write_cr4)(unsigned long);
-#ifdef CONFIG_X86_64
- unsigned long (*read_cr8)(void);
- void (*write_cr8)(unsigned long);
-#endif
-
/* Segment descriptor handling */
void (*load_tr_desc)(void);
void (*load_gdt)(const struct desc_ptr *);
@@ -145,8 +140,6 @@ struct pv_cpu_ops {
void (*load_sp0)(unsigned long sp0);
- void (*set_iopl_mask)(unsigned mask);
-
void (*wbinvd)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
deleted file mode 100644
index 92015c65fa2a..000000000000
--- a/arch/x86/include/asm/pat.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PAT_H
-#define _ASM_X86_PAT_H
-
-#include <linux/types.h>
-#include <asm/pgtable_types.h>
-
-bool pat_enabled(void);
-void pat_disable(const char *reason);
-extern void pat_init(void);
-extern void init_cache_modes(void);
-
-extern int reserve_memtype(u64 start, u64 end,
- enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
-extern int free_memtype(u64 start, u64 end);
-
-extern int kernel_map_sync_memtype(u64 base, unsigned long size,
- enum page_cache_mode pcm);
-
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
- enum page_cache_mode *pcm);
-
-void io_free_memtype(resource_size_t start, resource_size_t end);
-
-bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
-
-#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index e662f987dfa2..7ccb338507e3 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -9,11 +9,9 @@
#include <linux/scatterlist.h>
#include <linux/numa.h>
#include <asm/io.h>
-#include <asm/pat.h>
+#include <asm/memtype.h>
#include <asm/x86_init.h>
-#ifdef __KERNEL__
-
struct pci_sysdata {
int domain; /* PCI domain */
int node; /* NUMA node */
@@ -27,7 +25,7 @@ struct pci_sysdata {
void *fwnode; /* IRQ domain for MSI assignment */
#endif
#if IS_ENABLED(CONFIG_VMD)
- bool vmd_domain; /* True if in Intel VMD domain */
+ struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */
#endif
};
@@ -35,14 +33,17 @@ extern int pci_routeirq;
extern int noioapicquirk;
extern int noioapicreroute;
+static inline struct pci_sysdata *to_pci_sysdata(const struct pci_bus *bus)
+{
+ return bus->sysdata;
+}
+
#ifdef CONFIG_PCI
#ifdef CONFIG_PCI_DOMAINS
static inline int pci_domain_nr(struct pci_bus *bus)
{
- struct pci_sysdata *sd = bus->sysdata;
-
- return sd->domain;
+ return to_pci_sysdata(bus)->domain;
}
static inline int pci_proc_domain(struct pci_bus *bus)
@@ -54,24 +55,20 @@ static inline int pci_proc_domain(struct pci_bus *bus)
#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
static inline void *_pci_root_bus_fwnode(struct pci_bus *bus)
{
- struct pci_sysdata *sd = bus->sysdata;
-
- return sd->fwnode;
+ return to_pci_sysdata(bus)->fwnode;
}
#define pci_root_bus_fwnode _pci_root_bus_fwnode
#endif
+#if IS_ENABLED(CONFIG_VMD)
static inline bool is_vmd(struct pci_bus *bus)
{
-#if IS_ENABLED(CONFIG_VMD)
- struct pci_sysdata *sd = bus->sysdata;
-
- return sd->vmd_domain;
-#else
- return false;
-#endif
+ return to_pci_sysdata(bus)->vmd_dev != NULL;
}
+#else
+#define is_vmd(bus) false
+#endif /* CONFIG_VMD */
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
@@ -118,11 +115,6 @@ void native_restore_msi_irqs(struct pci_dev *dev);
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#endif
-#endif /* __KERNEL__ */
-
-#ifdef CONFIG_X86_64
-#include <asm/pci_64.h>
-#endif
/* generic pci stuff */
#include <asm-generic/pci.h>
@@ -131,9 +123,7 @@ void native_restore_msi_irqs(struct pci_dev *dev);
/* Returns the node based on pci bus */
static inline int __pcibus_to_node(const struct pci_bus *bus)
{
- const struct pci_sysdata *sd = bus->sysdata;
-
- return sd->node;
+ return to_pci_sysdata(bus)->node;
}
static inline const struct cpumask *
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
deleted file mode 100644
index f5411de0ae11..000000000000
--- a/arch/x86/include/asm/pci_64.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PCI_64_H
-#define _ASM_X86_PCI_64_H
-
-#ifdef __KERNEL__
-
-#ifdef CONFIG_CALGARY_IOMMU
-static inline void *pci_iommu(struct pci_bus *bus)
-{
- struct pci_sysdata *sd = bus->sysdata;
- return sd->iommu;
-}
-
-static inline void set_pci_iommu(struct pci_bus *bus, void *val)
-{
- struct pci_sysdata *sd = bus->sysdata;
- sd->iommu = val;
-}
-#endif /* CONFIG_CALGARY_IOMMU */
-
-extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
- int reg, int len, u32 *value);
-extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
- int reg, int len, u32 value);
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 1392d5e6e8d6..29964b0e1075 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -252,16 +252,20 @@ struct pebs_lbr {
#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
#define IBSCTL_LVT_OFFSET_MASK 0x0F
-/* ibs fetch bits/masks */
+/* IBS fetch bits/masks */
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
-/* ibs op bits/masks */
-/* lower 4 bits of the current count are ignored: */
-#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
+/*
+ * IBS op bits/masks
+ * The lower 7 bits of the current count are random bits
+ * preloaded by hardware and ignored in software
+ */
+#define IBS_OP_CUR_CNT (0xFFF80ULL<<32)
+#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
@@ -318,17 +322,10 @@ struct perf_guest_switch_msr {
u64 host, guest;
};
-extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
extern int x86_perf_rdpmc_index(struct perf_event *event);
#else
-static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
- *nr = 0;
- return NULL;
-}
-
static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
{
memset(cap, 0, sizeof(*cap));
@@ -338,8 +335,23 @@ static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+#else
+static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+{
+ *nr = 0;
+ return NULL;
+}
+#endif
+
#ifdef CONFIG_CPU_SUP_INTEL
extern void intel_pt_handle_vmx(int on);
+#else
+static inline void intel_pt_handle_vmx(int on)
+{
+
+}
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index e3633795fb22..5afb5e0fe903 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -36,39 +36,41 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
#define pmd_read_atomic pmd_read_atomic
/*
- * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
- * a "*pmdp" dereference done by gcc. Problem is, in certain places
- * where pte_offset_map_lock is called, concurrent page faults are
+ * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
+ * a "*pmdp" dereference done by GCC. Problem is, in certain places
+ * where pte_offset_map_lock() is called, concurrent page faults are
* allowed, if the mmap_sem is hold for reading. An example is mincore
* vs page faults vs MADV_DONTNEED. On the page fault side
- * pmd_populate rightfully does a set_64bit, but if we're reading the
+ * pmd_populate() rightfully does a set_64bit(), but if we're reading the
* pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
- * because gcc will not read the 64bit of the pmd atomically. To fix
- * this all places running pmd_offset_map_lock() while holding the
+ * because GCC will not read the 64-bit value of the pmd atomically.
+ *
+ * To fix this all places running pte_offset_map_lock() while holding the
* mmap_sem in read mode, shall read the pmdp pointer using this
- * function to know if the pmd is null nor not, and in turn to know if
- * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
+ * function to know if the pmd is null or not, and in turn to know if
+ * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
* operations.
*
- * Without THP if the mmap_sem is hold for reading, the pmd can only
- * transition from null to not null while pmd_read_atomic runs. So
+ * Without THP if the mmap_sem is held for reading, the pmd can only
+ * transition from null to not null while pmd_read_atomic() runs. So
* we can always return atomic pmd values with this function.
*
- * With THP if the mmap_sem is hold for reading, the pmd can become
+ * With THP if the mmap_sem is held for reading, the pmd can become
* trans_huge or none or point to a pte (and in turn become "stable")
- * at any time under pmd_read_atomic. We could read it really
- * atomically here with a atomic64_read for the THP enabled case (and
+ * at any time under pmd_read_atomic(). We could read it truly
+ * atomically here with an atomic64_read() for the THP enabled case (and
* it would be a whole lot simpler), but to avoid using cmpxchg8b we
* only return an atomic pmdval if the low part of the pmdval is later
- * found stable (i.e. pointing to a pte). And we're returning a none
- * pmdval if the low part of the pmd is none. In some cases the high
- * and low part of the pmdval returned may not be consistent if THP is
- * enabled (the low part may point to previously mapped hugepage,
- * while the high part may point to a more recently mapped hugepage),
- * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part
- * of the pmd to be read atomically to decide if the pmd is unstable
- * or not, with the only exception of when the low part of the pmd is
- * zero in which case we return a none pmd.
+ * found to be stable (i.e. pointing to a pte). We are also returning a
+ * 'none' (zero) pmdval if the low part of the pmd is zero.
+ *
+ * In some cases the high and low part of the pmdval returned may not be
+ * consistent if THP is enabled (the low part may point to previously
+ * mapped hugepage, while the high part may point to a more recently
+ * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
+ * needs the low part of the pmd to be read atomically to decide if the
+ * pmd is unstable or not, with the only exception when the low part
+ * of the pmd is zero, in which case we return a 'none' pmd.
*/
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
{
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 0bc530c4eb13..7e118660bbd9 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -29,8 +29,9 @@
extern pgd_t early_top_pgt[PTRS_PER_PGD];
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
-void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
+void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
+ bool user);
void ptdump_walk_pgd_level_checkwx(void);
void ptdump_walk_user_pgd_level_checkwx(void);
@@ -239,6 +240,7 @@ static inline unsigned long pgd_pfn(pgd_t pgd)
return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
+#define p4d_leaf p4d_large
static inline int p4d_large(p4d_t p4d)
{
/* No 512 GiB pages yet */
@@ -247,6 +249,7 @@ static inline int p4d_large(p4d_t p4d)
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
+#define pmd_leaf pmd_large
static inline int pmd_large(pmd_t pte)
{
return pmd_flags(pte) & _PAGE_PSE;
@@ -874,6 +877,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
}
+#define pud_leaf pud_large
static inline int pud_large(pud_t pud)
{
return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -885,6 +889,7 @@ static inline int pud_bad(pud_t pud)
return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
}
#else
+#define pud_leaf pud_large
static inline int pud_large(pud_t pud)
{
return 0;
@@ -1233,6 +1238,7 @@ static inline bool pgdp_maps_userspace(void *__ptr)
return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
}
+#define pgd_leaf pgd_large
static inline int pgd_large(pgd_t pgd) { return 0; }
#ifdef CONFIG_PAGE_TABLE_ISOLATION
@@ -1463,6 +1469,12 @@ static inline bool arch_has_pfn_modify_check(void)
return boot_cpu_has_bug(X86_BUG_L1TF);
}
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+static inline bool arch_faults_on_old_pte(void)
+{
+ return false;
+}
+
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index c78da8eda8f2..0dca7f7aeff2 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -29,8 +29,6 @@ extern pgd_t swapper_pg_dir[1024];
extern pgd_t initial_page_table[1024];
extern pmd_t initial_pg_pmd[];
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
void paging_init(void);
void sync_initial_page_table(void);
diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h
new file mode 100644
index 000000000000..b6355416a15a
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_X86_PGTABLE_32_AREAS_H
+#define _ASM_X86_PGTABLE_32_AREAS_H
+
+#include <asm/cpu_entry_area.h>
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts. That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8 * 1024 * 1024)
+
+#ifndef __ASSEMBLY__
+extern bool __vmalloc_start_set; /* set once high_memory is set */
+#endif
+
+#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
+
+/* The +1 is for the readonly IDT page: */
+#define CPU_ENTRY_AREA_BASE \
+ ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
+
+#define LDT_BASE_ADDR \
+ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+
+#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
+
+#define PKMAP_BASE \
+ ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
+#else
+# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
+#endif
+
+#define MODULES_VADDR VMALLOC_START
+#define MODULES_END VMALLOC_END
+#define MODULES_LEN (MODULES_VADDR - MODULES_END)
+
+#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
+#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index b0bc0fff5f1f..5356a46b0373 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PGTABLE_32_DEFS_H
-#define _ASM_X86_PGTABLE_32_DEFS_H
+#ifndef _ASM_X86_PGTABLE_32_TYPES_H
+#define _ASM_X86_PGTABLE_32_TYPES_H
/*
* The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -20,54 +20,4 @@
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts. That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- */
-#define VMALLOC_OFFSET (8 * 1024 * 1024)
-
-#ifndef __ASSEMBLY__
-extern bool __vmalloc_start_set; /* set once high_memory is set */
-#endif
-
-#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-/*
- * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
- * to avoid include recursion hell
- */
-#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
-
-#define CPU_ENTRY_AREA_BASE \
- ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \
- & PMD_MASK)
-
-#define LDT_BASE_ADDR \
- ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
-
-#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
-
-#define PKMAP_BASE \
- ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
-
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
-#else
-# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
-#endif
-
-#define MODULES_VADDR VMALLOC_START
-#define MODULES_END VMALLOC_END
-#define MODULES_LEN (MODULES_VADDR - MODULES_END)
-
-#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
-
-#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
+#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 4990d26dfc73..0b6c4042942a 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -241,9 +241,6 @@ extern void cleanup_highmap(void);
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#define pgtable_cache_init() do { } while (0)
-#define check_pgt_cache() do { } while (0)
-
#define PAGE_AGP PAGE_KERNEL_NOCACHE
#define HAVE_PAGE_AGP 1
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h
new file mode 100644
index 000000000000..d34cce1b995c
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_X86_PGTABLE_AREAS_H
+#define _ASM_X86_PGTABLE_AREAS_H
+
+#ifdef CONFIG_X86_32
+# include <asm/pgtable_32_areas.h>
+#endif
+
+/* Single page reserved for the readonly IDT mapping: */
+#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+
+#endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b5e49e6bac63..0239998d8cdc 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -110,11 +110,6 @@
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
- _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_ACCESSED | _PAGE_DIRTY)
-
/*
* Set of bits not changed in pte_modify. The pte's
* protection key is treated like _PAGE_RW, for
@@ -136,80 +131,93 @@
*/
#ifndef __ASSEMBLY__
enum page_cache_mode {
- _PAGE_CACHE_MODE_WB = 0,
- _PAGE_CACHE_MODE_WC = 1,
+ _PAGE_CACHE_MODE_WB = 0,
+ _PAGE_CACHE_MODE_WC = 1,
_PAGE_CACHE_MODE_UC_MINUS = 2,
- _PAGE_CACHE_MODE_UC = 3,
- _PAGE_CACHE_MODE_WT = 4,
- _PAGE_CACHE_MODE_WP = 5,
- _PAGE_CACHE_MODE_NUM = 8
+ _PAGE_CACHE_MODE_UC = 3,
+ _PAGE_CACHE_MODE_WT = 4,
+ _PAGE_CACHE_MODE_WP = 5,
+
+ _PAGE_CACHE_MODE_NUM = 8
};
#endif
-#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
-#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
-#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
+#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
- _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-#define PAGE_COPY PAGE_COPY_NOEXEC
-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
- _PAGE_ACCESSED)
-
-#define __PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
-#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
-
-#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP)
-
-#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
+#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
-#ifndef __ASSEMBLY__
+#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
+#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
-#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
+#define __PP _PAGE_PRESENT
+#define __RW _PAGE_RW
+#define _USR _PAGE_USER
+#define ___A _PAGE_ACCESSED
+#define ___D _PAGE_DIRTY
+#define ___G _PAGE_GLOBAL
+#define __NX _PAGE_NX
+
+#define _ENC _PAGE_ENC
+#define __WP _PAGE_CACHE_WP
+#define __NC _PAGE_NOCACHE
+#define _PSE _PAGE_PSE
+
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+#define __pg(x) __pgprot(x)
+
+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
+
+#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
+#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
+#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
+
+#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
+#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0)
+#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC)
+#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
+#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
+#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G)
+#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
+#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
+#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G)
+#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP)
+
+
+#define __PAGE_KERNEL_IO __PAGE_KERNEL
+#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
- _PAGE_DIRTY | _PAGE_ENC)
-#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
-#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
-#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
+#ifndef __ASSEMBLY__
-#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL)
-#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
+#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC)
+#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC)
+#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0)
+#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0)
-#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask)
+#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask)
-#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
-#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
-#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
+#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC)
+#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0)
+#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
+#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
+#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC)
+#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
+#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
+#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC)
-#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
#endif /* __ASSEMBLY__ */
@@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte)
return native_pte_val(pte) & PTE_FLAGS_MASK;
}
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
extern uint8_t __pte2cachemode_tbl[8];
@@ -561,6 +566,10 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
+
+struct mm_struct;
+extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
+ unsigned int *level);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 99a7fa9ab0a3..3d4cb83a8828 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -102,7 +102,7 @@ static __always_inline bool should_resched(int preempt_offset)
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
extern asmlinkage void ___preempt_schedule(void);
# define __preempt_schedule() \
asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6e0a3b43d027..09705ccc393c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -7,6 +7,7 @@
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
+struct io_bitmap;
struct vm86;
#include <asm/math_emu.h>
@@ -24,6 +25,7 @@ struct vm86;
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
#include <asm/unwind_hints.h>
+#include <asm/vmxfeatures.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -84,6 +86,9 @@ struct cpuinfo_x86 {
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
#endif
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+ __u32 vmx_capability[NVMXINTS];
+#endif
__u8 x86_virt_bits;
__u8 x86_phys_bits;
/* CPUID returned core id bits: */
@@ -93,7 +98,15 @@ struct cpuinfo_x86 {
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
int cpuid_level;
- __u32 x86_capability[NCAPINTS + NBUGINTS];
+ /*
+ * Align to size of unsigned long because the x86_capability array
+ * is passed to bitops which require the alignment. Use unnamed
+ * union to enforce the array is aligned to size of unsigned long.
+ */
+ union {
+ __u32 x86_capability[NCAPINTS + NBUGINTS];
+ unsigned long x86_capability_alignment;
+ };
char x86_vendor_id[16];
char x86_model_id[64];
/* in KB - valid for CPUS which support this call: */
@@ -157,7 +170,6 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
-extern struct x86_hw_tss doublefault_tss;
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
@@ -328,10 +340,32 @@ struct x86_hw_tss {
* IO-bitmap sizes:
*/
#define IO_BITMAP_BITS 65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
-#define INVALID_IO_BITMAP_OFFSET 0x8000
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long))
+
+#define IO_BITMAP_OFFSET_VALID_MAP \
+ (offsetof(struct tss_struct, io_bitmap.bitmap) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#define IO_BITMAP_OFFSET_VALID_ALL \
+ (offsetof(struct tss_struct, io_bitmap.mapall) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+/*
+ * sizeof(unsigned long) coming from an extra "long" at the end of the
+ * iobitmap. The limit is inclusive, i.e. the last valid byte.
+ */
+# define __KERNEL_TSS_LIMIT \
+ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
+ sizeof(unsigned long) - 1)
+#else
+# define __KERNEL_TSS_LIMIT \
+ (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1)
+#endif
+
+/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
+#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
struct entry_stack {
unsigned long words[64];
@@ -341,13 +375,21 @@ struct entry_stack_page {
struct entry_stack stack;
} __aligned(PAGE_SIZE);
-struct tss_struct {
+/*
+ * All IO bitmap related data stored in the TSS:
+ */
+struct x86_io_bitmap {
+ /* The sequence number of the last active bitmap. */
+ u64 prev_sequence;
+
/*
- * The fixed hardware portion. This must not cross a page boundary
- * at risk of violating the SDM's advice and potentially triggering
- * errata.
+ * Store the dirty size of the last io bitmap offender. The next
+ * one will have to do the cleanup as the switch out to a non io
+ * bitmap user will just set x86_tss.io_bitmap_base to a value
+ * outside of the TSS limit. So for sane tasks there is no need to
+ * actually touch the io_bitmap at all.
*/
- struct x86_hw_tss x86_tss;
+ unsigned int prev_max;
/*
* The extra 1 is there because the CPU will access an
@@ -355,21 +397,28 @@ struct tss_struct {
* bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
- unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+ unsigned long bitmap[IO_BITMAP_LONGS + 1];
+
+ /*
+ * Special I/O bitmap to emulate IOPL(3). All bytes zero,
+ * except the additional byte at the end.
+ */
+ unsigned long mapall[IO_BITMAP_LONGS + 1];
+};
+
+struct tss_struct {
+ /*
+ * The fixed hardware portion. This must not cross a page boundary
+ * at risk of violating the SDM's advice and potentially triggering
+ * errata.
+ */
+ struct x86_hw_tss x86_tss;
+
+ struct x86_io_bitmap io_bitmap;
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
-/*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap.
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
-#define __KERNEL_TSS_LIMIT \
- (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
-
/* Per CPU interrupt stacks */
struct irq_stack {
char stack[IRQ_STACK_SIZE];
@@ -480,10 +529,14 @@ struct thread_struct {
struct vm86 *vm86;
#endif
/* IO permissions: */
- unsigned long *io_bitmap_ptr;
- unsigned long iopl;
- /* Max allowed port in the bitmap, in bytes: */
- unsigned io_bitmap_max;
+ struct io_bitmap *io_bitmap;
+
+ /*
+ * IOPL. Priviledge level dependent I/O permission which is
+ * emulated via the I/O bitmap to prevent user space from disabling
+ * interrupts.
+ */
+ unsigned long iopl_emul;
mm_segment_t addr_limit;
@@ -515,25 +568,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
-{
-#ifdef CONFIG_X86_32
- unsigned int reg;
-
- asm volatile ("pushfl;"
- "popl %0;"
- "andl %1, %0;"
- "orl %2, %0;"
- "pushl %0;"
- "popfl"
- : "=&r" (reg)
- : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-#endif
-}
-
static inline void
native_load_sp0(unsigned long sp0)
{
@@ -573,7 +607,6 @@ static inline void load_sp0(unsigned long sp0)
native_load_sp0(sp0);
}
-#define set_iopl_mask native_set_iopl_mask
#endif /* CONFIG_PARAVIRT_XXL */
/* Free all resources held by a thread. */
@@ -841,7 +874,6 @@ static inline void spin_lock_prefetch(const void *x)
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.sysenter_cs = __KERNEL_CS, \
- .io_bitmap_ptr = NULL, \
.addr_limit = KERNEL_DS, \
}
@@ -915,24 +947,6 @@ extern int set_tsc_mode(unsigned int val);
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
-/* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
-#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
-
-#ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(void);
-extern int mpx_disable_management(void);
-#else
-static inline int mpx_enable_management(void)
-{
- return -EINVAL;
-}
-static inline int mpx_disable_management(void)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_X86_INTEL_MPX */
-
#ifdef CONFIG_CPU_SUP_AMD
extern u16 amd_get_nb_id(int cpu);
extern u32 amd_get_nodes_per_socket(void);
@@ -958,7 +972,7 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
extern unsigned long arch_align_stack(unsigned long sp);
void free_init_pages(const char *what, unsigned long begin, unsigned long end);
-extern void free_kernel_image_pages(void *begin, void *end);
+extern void free_kernel_image_pages(const char *what, void *begin, void *end);
void default_idle(void);
#ifdef CONFIG_XEN
@@ -968,7 +982,6 @@ bool xen_set_default_idle(void);
#endif
void stop_this_cpu(void *dummy);
-void df_debug(struct pt_regs *regs, long error_code);
void microcode_check(void);
enum l1tf_mitigations {
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 5df09a0b80b8..07375b476c4f 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PTI_H
#define _ASM_X86_PTI_H
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 332eb3525867..6d6475fdd327 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif
}
+/*
+ * Determine whether the register set came from any context that is running in
+ * 64-bit mode.
+ */
+static inline bool any_64bit_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_64
+ return !user_mode(regs) || user_64bit_mode(regs);
+#else
+ return false;
+#endif
+}
+
#ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
@@ -339,27 +352,17 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
#define ARCH_HAS_USER_SINGLE_STEP_REPORT
-/*
- * When hitting ptrace_stop(), we cannot return using SYSRET because
- * that does not restore the full CPU state, only a minimal set. The
- * ptracer can change arbitrary register values, which is usually okay
- * because the usual ptrace stops run off the signal delivery path which
- * forces IRET; however, ptrace_event() stops happen in arbitrary places
- * in the kernel and don't force IRET path.
- *
- * So force IRET path after a ptrace stop.
- */
-#define arch_ptrace_stop_needed(code, info) \
-({ \
- force_iret(); \
- false; \
-})
-
struct user_desc;
extern int do_get_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info);
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
+#ifdef CONFIG_X86_64
+# define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t)
+#else
+# define do_set_thread_area_64(p, s, t) (0)
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
index 92c34e517da1..5528e9325049 100644
--- a/arch/x86/include/asm/purgatory.h
+++ b/arch/x86/include/asm/purgatory.h
@@ -6,16 +6,6 @@
#include <linux/purgatory.h>
extern void purgatory(void);
-/*
- * These forward declarations serve two purposes:
- *
- * 1) Make sparse happy when checking arch/purgatory
- * 2) Document that these are required to be global so the symbol
- * lookup in kexec works
- */
-extern unsigned long purgatory_backup_dest;
-extern unsigned long purgatory_backup_src;
-extern unsigned long purgatory_backup_sz;
#endif /* __ASSEMBLY__ */
#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index bd5ac6cc37db..444d6fd9a6d8 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -63,10 +63,25 @@ static inline bool vcpu_is_preempted(long cpu)
#endif
#ifdef CONFIG_PARAVIRT
+/*
+ * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack.
+ *
+ * Native (and PV wanting native due to vCPU pinning) should disable this key.
+ * It is done in this backwards fashion to only have a single direction change,
+ * which removes ordering between native_pv_spin_init() and HV setup.
+ */
DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
void native_pv_lock_init(void) __init;
+/*
+ * Shortcut for the queued_spin_lock_slowpath() function that allows
+ * virt to hijack it.
+ *
+ * Returns:
+ * true - lock has been negotiated, all done;
+ * false - queued_spin_lock_slowpath() will do its thing.
+ */
#define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock)
{
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index c53682303c9c..b35030eeec36 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -14,13 +14,12 @@
#include <linux/types.h>
#include <asm/io.h>
-/* This must match data at realmode.S */
+/* This must match data at realmode/rm/header.S */
struct real_mode_header {
u32 text_start;
u32 ro_end;
/* SMP trampoline */
u32 trampoline_start;
- u32 trampoline_status;
u32 trampoline_header;
#ifdef CONFIG_X86_64
u32 trampoline_pgd;
@@ -37,7 +36,7 @@ struct real_mode_header {
#endif
};
-/* This must match data at trampoline_32/64.S */
+/* This must match data at realmode/rm/trampoline_{32,64}.S */
struct trampoline_header {
#ifdef CONFIG_X86_32
u32 start;
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
deleted file mode 100644
index 232f856e0db0..000000000000
--- a/arch/x86/include/asm/refcount.h
+++ /dev/null
@@ -1,126 +0,0 @@
-#ifndef __ASM_X86_REFCOUNT_H
-#define __ASM_X86_REFCOUNT_H
-/*
- * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
- * PaX/grsecurity.
- */
-#include <linux/refcount.h>
-#include <asm/bug.h>
-
-/*
- * This is the first portion of the refcount error handling, which lives in
- * .text.unlikely, and is jumped to from the CPU flag check (in the
- * following macros). This saves the refcount value location into CX for
- * the exception handler to use (in mm/extable.c), and then triggers the
- * central refcount exception. The fixup address for the exception points
- * back to the regular execution flow in .text.
- */
-#define _REFCOUNT_EXCEPTION \
- ".pushsection .text..refcount\n" \
- "111:\tlea %[var], %%" _ASM_CX "\n" \
- "112:\t" ASM_UD2 "\n" \
- ASM_UNREACHABLE \
- ".popsection\n" \
- "113:\n" \
- _ASM_EXTABLE_REFCOUNT(112b, 113b)
-
-/* Trigger refcount exception if refcount result is negative. */
-#define REFCOUNT_CHECK_LT_ZERO \
- "js 111f\n\t" \
- _REFCOUNT_EXCEPTION
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-#define REFCOUNT_CHECK_LE_ZERO \
- "jz 111f\n\t" \
- REFCOUNT_CHECK_LT_ZERO
-
-/* Trigger refcount exception unconditionally. */
-#define REFCOUNT_ERROR \
- "jmp 111f\n\t" \
- _REFCOUNT_EXCEPTION
-
-static __always_inline void refcount_add(unsigned int i, refcount_t *r)
-{
- asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
- REFCOUNT_CHECK_LT_ZERO
- : [var] "+m" (r->refs.counter)
- : "ir" (i)
- : "cc", "cx");
-}
-
-static __always_inline void refcount_inc(refcount_t *r)
-{
- asm volatile(LOCK_PREFIX "incl %0\n\t"
- REFCOUNT_CHECK_LT_ZERO
- : [var] "+m" (r->refs.counter)
- : : "cc", "cx");
-}
-
-static __always_inline void refcount_dec(refcount_t *r)
-{
- asm volatile(LOCK_PREFIX "decl %0\n\t"
- REFCOUNT_CHECK_LE_ZERO
- : [var] "+m" (r->refs.counter)
- : : "cc", "cx");
-}
-
-static __always_inline __must_check
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
-{
- bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
- REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, e, "er", i, "cx");
-
- if (ret) {
- smp_acquire__after_ctrl_dep();
- return true;
- }
-
- return false;
-}
-
-static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
-{
- bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
- REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, e, "cx");
-
- if (ret) {
- smp_acquire__after_ctrl_dep();
- return true;
- }
-
- return false;
-}
-
-static __always_inline __must_check
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
-{
- int c, result;
-
- c = atomic_read(&(r->refs));
- do {
- if (unlikely(c == 0))
- return false;
-
- result = c + i;
-
- /* Did we try to increment from/to an undesirable state? */
- if (unlikely(c < 0 || c == INT_MAX || result < c)) {
- asm volatile(REFCOUNT_ERROR
- : : [var] "m" (r->refs.counter)
- : "cc", "cx");
- break;
- }
-
- } while (!atomic_try_cmpxchg(&(r->refs), &c, result));
-
- return c != 0;
-}
-
-static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
-{
- return refcount_add_not_zero(1, r);
-}
-
-#endif
diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h
deleted file mode 100644
index 0a21986d2238..000000000000
--- a/arch/x86/include/asm/rio.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Derived from include/asm-x86/mach-summit/mach_mpparse.h
- * and include/asm-x86/mach-default/bios_ebda.h
- *
- * Author: Laurent Vivier <Laurent.Vivier@bull.net>
- */
-
-#ifndef _ASM_X86_RIO_H
-#define _ASM_X86_RIO_H
-
-#define RIO_TABLE_VERSION 3
-
-struct rio_table_hdr {
- u8 version; /* Version number of this data structure */
- u8 num_scal_dev; /* # of Scalability devices */
- u8 num_rio_dev; /* # of RIO I/O devices */
-} __attribute__((packed));
-
-struct scal_detail {
- u8 node_id; /* Scalability Node ID */
- u32 CBAR; /* Address of 1MB register space */
- u8 port0node; /* Node ID port connected to: 0xFF=None */
- u8 port0port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 port1node; /* Node ID port connected to: 0xFF = None */
- u8 port1port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 port2node; /* Node ID port connected to: 0xFF = None */
- u8 port2port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 chassis_num; /* 1 based Chassis number (1 = boot node) */
-} __attribute__((packed));
-
-struct rio_detail {
- u8 node_id; /* RIO Node ID */
- u32 BBAR; /* Address of 1MB register space */
- u8 type; /* Type of device */
- u8 owner_id; /* Node ID of Hurricane that owns this */
- /* node */
- u8 port0node; /* Node ID port connected to: 0xFF=None */
- u8 port0port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 port1node; /* Node ID port connected to: 0xFF=None */
- u8 port1port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 first_slot; /* Lowest slot number below this Calgary */
- u8 status; /* Bit 0 = 1 : the XAPIC is used */
- /* = 0 : the XAPIC is not used, ie: */
- /* ints fwded to another XAPIC */
- /* Bits1:7 Reserved */
- u8 WP_index; /* instance index - lower ones have */
- /* lower slot numbers/PCI bus numbers */
- u8 chassis_num; /* 1 based Chassis number */
-} __attribute__((packed));
-
-enum {
- HURR_SCALABILTY = 0, /* Hurricane Scalability info */
- HURR_RIOIB = 2, /* Hurricane RIOIB info */
- COMPAT_CALGARY = 4, /* Compatibility Calgary */
- ALT_CALGARY = 5, /* Second Planar Calgary */
-};
-
-#endif /* _ASM_X86_RIO_H */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 71b32f2570ab..036c360910c5 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -6,7 +6,6 @@
#include <asm/extable.h>
extern char __brk_base[], __brk_limit[];
-extern struct exception_table_entry __stop___ex_table[];
extern char __end_rodata_aligned[];
#if defined(CONFIG_X86_64)
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index ac3892920419..6669164abadc 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -31,6 +31,18 @@
*/
#define SEGMENT_RPL_MASK 0x3
+/*
+ * When running on Xen PV, the actual privilege level of the kernel is 1,
+ * not 0. Testing the Requested Privilege Level in a segment selector to
+ * determine whether the context is user mode or kernel mode with
+ * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level
+ * matches the 0x3 mask.
+ *
+ * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV
+ * kernels because privilege level 2 is never used.
+ */
+#define USER_SEGMENT_RPL_MASK 0x2
+
/* User mode is privilege level 3: */
#define USER_RPL 0x3
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index ae7b909dc242..64c3dce374e5 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -40,7 +40,6 @@ int _set_memory_wt(unsigned long addr, int numpages);
int _set_memory_wb(unsigned long addr, int numpages);
int set_memory_uc(unsigned long addr, int numpages);
int set_memory_wc(unsigned long addr, int numpages);
-int set_memory_wt(unsigned long addr, int numpages);
int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
@@ -48,11 +47,6 @@ int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
int set_memory_np_noalias(unsigned long addr, int numpages);
-int set_memory_array_uc(unsigned long *addr, int addrinarray);
-int set_memory_array_wc(unsigned long *addr, int addrinarray);
-int set_memory_array_wt(unsigned long *addr, int addrinarray);
-int set_memory_array_wb(unsigned long *addr, int addrinarray);
-
int set_pages_array_uc(struct page **pages, int addrinarray);
int set_pages_array_wc(struct page **pages, int addrinarray);
int set_pages_array_wt(struct page **pages, int addrinarray);
@@ -80,8 +74,6 @@ int set_pages_array_wb(struct page **pages, int addrinarray);
int set_pages_uc(struct page *page, int numpages);
int set_pages_wb(struct page *page, int numpages);
-int set_pages_x(struct page *page, int numpages);
-int set_pages_nx(struct page *page, int numpages);
int set_pages_ro(struct page *page, int numpages);
int set_pages_rw(struct page *page, int numpages);
@@ -89,8 +81,6 @@ int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
extern int kernel_set_to_readonly;
-void set_kernel_text_rw(void);
-void set_kernel_text_ro(void);
#ifdef CONFIG_X86_64
static inline int set_mce_nospec(unsigned long pfn)
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index e1356a3b8223..e15f364efbcc 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -143,6 +143,7 @@ void play_dead_common(void);
void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
+void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 219be88a59d2..6d37b8fcfc77 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -73,20 +73,6 @@ static inline unsigned long native_read_cr4(void)
void native_write_cr4(unsigned long val);
-#ifdef CONFIG_X86_64
-static inline unsigned long native_read_cr8(void)
-{
- unsigned long cr8;
- asm volatile("movq %%cr8,%0" : "=r" (cr8));
- return cr8;
-}
-
-static inline void native_write_cr8(unsigned long val)
-{
- asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
-}
-#endif
-
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
static inline u32 rdpkru(void)
{
@@ -200,16 +186,6 @@ static inline void wbinvd(void)
#ifdef CONFIG_X86_64
-static inline unsigned long read_cr8(void)
-{
- return native_read_cr8();
-}
-
-static inline void write_cr8(unsigned long x)
-{
- native_write_cr8(x);
-}
-
static inline void load_gs_index(unsigned selector)
{
native_load_gs_index(selector);
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index a7af9f53c0cb..35bb35d28733 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -34,7 +34,7 @@ struct saved_context {
*/
unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
- unsigned long cr0, cr2, cr3, cr4, cr8;
+ unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
struct saved_msrs saved_msrs;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index dec9c1e84c78..6ece8561ba66 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -52,6 +52,7 @@ enum {
INTERCEPT_MWAIT,
INTERCEPT_MWAIT_COND,
INTERCEPT_XSETBV,
+ INTERCEPT_RDPRU,
};
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 18a4b6890fa8..0e059b73437b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -103,7 +103,17 @@ static inline void update_task_stack(struct task_struct *task)
if (static_cpu_has(X86_FEATURE_XENPV))
load_sp0(task_top_of_stack(task));
#endif
+}
+static inline void kthread_frame_init(struct inactive_task_frame *frame,
+ unsigned long fun, unsigned long arg)
+{
+ frame->bx = fun;
+#ifdef CONFIG_X86_32
+ frame->di = arg;
+#else
+ frame->r12 = arg;
+#endif
}
#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2dc4a021beea..8db3fdb6102e 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[];
extern const sys_call_ptr_t ia32_sys_call_table[];
#endif
+#ifdef CONFIG_X86_X32_ABI
+extern const sys_call_ptr_t x32_sys_call_table[];
+#endif
+
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index e046a405743d..e2389ce9bf58 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -6,6 +6,8 @@
#ifndef _ASM_X86_SYSCALL_WRAPPER_H
#define _ASM_X86_SYSCALL_WRAPPER_H
+struct pt_regs;
+
/* Mapping of registers to parameters for syscalls on x86-64 and x32 */
#define SC_X86_64_REGS_TO_ARGS(x, ...) \
__MAP(x,__SC_ARGS \
@@ -28,13 +30,21 @@
* kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
* case as well.
*/
+#define __IA32_COMPAT_SYS_STUB0(x, name) \
+ asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\
+ ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO); \
+ asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\
+ { \
+ return __se_compat_sys_##name(); \
+ }
+
#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \
asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\
ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \
asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\
{ \
return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
- } \
+ }
#define __IA32_SYS_STUBx(x, name, ...) \
asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \
@@ -48,16 +58,23 @@
* To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
* named __ia32_sys_*()
*/
-#define SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
- asmlinkage long __x64_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
- SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \
- asmlinkage long __x64_sys_##sname(void)
-#define COND_SYSCALL(name) \
- cond_syscall(__x64_sys_##name); \
- cond_syscall(__ia32_sys_##name)
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
+ ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
+ SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \
+ asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name) \
+ asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \
+ { \
+ return sys_ni_syscall(); \
+ } \
+ asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\
+ { \
+ return sys_ni_syscall(); \
+ }
#define SYS_NI(name) \
SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \
@@ -75,15 +92,24 @@
* of the x86-64-style parameter ordering of x32 syscalls. The syscalls common
* with x86_64 obviously do not need such care.
*/
+#define __X32_COMPAT_SYS_STUB0(x, name, ...) \
+ asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\
+ ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO); \
+ asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\
+ { \
+ return __se_compat_sys_##name();\
+ }
+
#define __X32_COMPAT_SYS_STUBx(x, name, ...) \
asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\
ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \
asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\
{ \
return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
- } \
+ }
#else /* CONFIG_X86_X32 */
+#define __X32_COMPAT_SYS_STUB0(x, name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...)
#endif /* CONFIG_X86_X32 */
@@ -94,6 +120,17 @@
* mapping of registers to parameters, we need to generate stubs for each
* of them.
*/
+#define COMPAT_SYSCALL_DEFINE0(name) \
+ static long __se_compat_sys_##name(void); \
+ static inline long __do_compat_sys_##name(void); \
+ __IA32_COMPAT_SYS_STUB0(x, name) \
+ __X32_COMPAT_SYS_STUB0(x, name) \
+ static long __se_compat_sys_##name(void) \
+ { \
+ return __do_compat_sys_##name(); \
+ } \
+ static inline long __do_compat_sys_##name(void)
+
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
@@ -181,15 +218,19 @@
* macros to work correctly.
*/
#ifndef SYSCALL_DEFINE0
-#define SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
- asmlinkage long __x64_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
- asmlinkage long __x64_sys_##sname(void)
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
+ ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
+ asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
#endif
#ifndef COND_SYSCALL
-#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name)
+#define COND_SYSCALL(name) \
+ asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \
+ { \
+ return sys_ni_syscall(); \
+ }
#endif
#ifndef SYS_NI
@@ -201,7 +242,6 @@
* For VSYSCALLS, we need to declare these three syscalls with the new
* pt_regs-based calling convention for in-kernel use.
*/
-struct pt_regs;
asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs);
asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs);
asmlinkage long __x64_sys_time(const struct pt_regs *regs);
diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h
deleted file mode 100644
index 6ed2deacf1d0..000000000000
--- a/arch/x86/include/asm/tce.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file is derived from asm-powerpc/tce.h.
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- * Author: Jon Mason <jdmason@us.ibm.com>
- */
-
-#ifndef _ASM_X86_TCE_H
-#define _ASM_X86_TCE_H
-
-extern unsigned int specified_table_size;
-struct iommu_table;
-
-#define TCE_ENTRY_SIZE 8 /* in bytes */
-
-#define TCE_READ_SHIFT 0
-#define TCE_WRITE_SHIFT 1
-#define TCE_HUBID_SHIFT 2 /* unused */
-#define TCE_RSVD_SHIFT 8 /* unused */
-#define TCE_RPN_SHIFT 12
-#define TCE_UNUSED_SHIFT 48 /* unused */
-
-#define TCE_RPN_MASK 0x0000fffffffff000ULL
-
-extern void tce_build(struct iommu_table *tbl, unsigned long index,
- unsigned int npages, unsigned long uaddr, int direction);
-extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages);
-extern void * __init alloc_tce_table(void);
-extern void __init free_tce_table(void *tbl);
-extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar);
-
-#endif /* _ASM_X86_TCE_H */
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 70c09967a999..67315fa3956a 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -25,13 +25,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
*/
#define POKE_MAX_OPCODE_SIZE 5
-struct text_poke_loc {
- void *detour;
- void *addr;
- size_t len;
- const char opcode[POKE_MAX_OPCODE_SIZE];
-};
-
extern void text_poke_early(void *addr, const void *opcode, size_t len);
/*
@@ -45,14 +38,81 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
* no thread can be preempted in the instructions being modified (no iret to an
* invalid instruction possible) or if the instructions are changed from a
* consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
+ * On the local CPU you need to be protected against NMI or MCE handlers seeing
+ * an inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
-extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
-extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
+
+extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate);
+extern void text_poke_finish(void);
+
+#define INT3_INSN_SIZE 1
+#define INT3_INSN_OPCODE 0xCC
+
+#define CALL_INSN_SIZE 5
+#define CALL_INSN_OPCODE 0xE8
+
+#define JMP32_INSN_SIZE 5
+#define JMP32_INSN_OPCODE 0xE9
+
+#define JMP8_INSN_SIZE 2
+#define JMP8_INSN_OPCODE 0xEB
+
+#define DISP32_SIZE 4
+
+static inline int text_opcode_size(u8 opcode)
+{
+ int size = 0;
+
+#define __CASE(insn) \
+ case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break
+
+ switch(opcode) {
+ __CASE(INT3);
+ __CASE(CALL);
+ __CASE(JMP32);
+ __CASE(JMP8);
+ }
+
+#undef __CASE
+
+ return size;
+}
+
+union text_poke_insn {
+ u8 text[POKE_MAX_OPCODE_SIZE];
+ struct {
+ u8 opcode;
+ s32 disp;
+ } __attribute__((packed));
+};
+
+static __always_inline
+void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
+{
+ static union text_poke_insn insn; /* per instance */
+ int size = text_opcode_size(opcode);
+
+ insn.opcode = opcode;
+
+ if (size > 1) {
+ insn.disp = (long)dest - (long)(addr + size);
+ if (size == 2) {
+ /*
+ * Ensure that for JMP9 the displacement
+ * actually fits the signed byte.
+ */
+ BUG_ON((insn.disp >> 31) != (insn.disp >> 7));
+ }
+ }
+
+ return &insn.text;
+}
+
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
@@ -63,9 +123,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
-#define INT3_INSN_SIZE 1
-#define CALL_INSN_SIZE 5
-
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
/*
@@ -73,6 +130,9 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
* stack where the break point happened, and the saving of
* pt_regs. We can extend the original stack because of
* this gap. See the idtentry macro's create_gap option.
+ *
+ * Similarly entry_32.S will have a gap on the stack for (any) hardware
+ * exception and pt_regs; see FIXUP_FRAME.
*/
regs->sp -= sizeof(unsigned long);
*(unsigned long *)regs->sp = val;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f9453536f9bb..cf4327986e98 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -143,8 +143,8 @@ struct thread_info {
_TIF_NOHZ)
/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_BASE \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
+#define _TIF_WORK_CTXSW_BASE \
+ (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
/*
@@ -156,8 +156,14 @@ struct thread_info {
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
#endif
-#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+#ifdef CONFIG_X86_IOPL_IOPERM
+# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \
+ _TIF_IO_BITMAP)
+#else
+# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY)
+#endif
+
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#define STACK_WARN (THREAD_SIZE/8)
@@ -233,15 +239,6 @@ static inline int arch_within_stack_frames(const void * const stack,
current_thread_info()->status & TS_COMPAT)
#endif
-/*
- * Force syscall return via IRET by making it look as if there was
- * some work pending. IRET is our most capable (but slowest) syscall
- * return path, which is able to restore modified SS, CS and certain
- * EFLAGS values that other (fast) syscall return instructions
- * are not able to restore properly.
- */
-#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
-
extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
extern void arch_release_task_struct(struct task_struct *tsk);
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index f23e7aaff4cd..820082bd6880 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -29,8 +29,8 @@ static inline void tlb_flush(struct mmu_gather *tlb)
* shootdown, enablement code for several hypervisors overrides
* .flush_tlb_others hook in pv_mmu_ops and implements it by issuing
* a hypercall. To keep software pagetable walkers safe in this case we
- * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment
- * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h
+ * switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the comment
+ * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h
* for more details.
*/
static inline void __tlb_remove_table(void *table)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index dee375831962..6f66d841262d 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -290,26 +290,42 @@ static inline void __cr4_set(unsigned long cr4)
}
/* Set in this cpu's CR4. */
-static inline void cr4_set_bits(unsigned long mask)
+static inline void cr4_set_bits_irqsoff(unsigned long mask)
{
- unsigned long cr4, flags;
+ unsigned long cr4;
- local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 | mask) != cr4)
__cr4_set(cr4 | mask);
- local_irq_restore(flags);
}
/* Clear in this cpu's CR4. */
-static inline void cr4_clear_bits(unsigned long mask)
+static inline void cr4_clear_bits_irqsoff(unsigned long mask)
{
- unsigned long cr4, flags;
+ unsigned long cr4;
- local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 & ~mask) != cr4)
__cr4_set(cr4 & ~mask);
+}
+
+/* Set in this cpu's CR4. */
+static inline void cr4_set_bits(unsigned long mask)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cr4_set_bits_irqsoff(mask);
+ local_irq_restore(flags);
+}
+
+/* Clear in this cpu's CR4. */
+static inline void cr4_clear_bits(unsigned long mask)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cr4_clear_bits_irqsoff(mask);
local_irq_restore(flags);
}
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index ace464f09681..4d705cb4d63b 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
__entry->ncpus, __entry->vector)
);
+TRACE_EVENT(hyperv_send_ipi_one,
+ TP_PROTO(int cpu,
+ int vector),
+ TP_ARGS(cpu, vector),
+ TP_STRUCT__entry(
+ __field(int, cpu)
+ __field(int, vector)
+ ),
+ TP_fast_assign(__entry->cpu = cpu;
+ __entry->vector = vector;
+ ),
+ TP_printk("cpu %d vector %x",
+ __entry->cpu, __entry->vector)
+ );
+
#endif /* CONFIG_HYPERV */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
deleted file mode 100644
index 54133017267c..000000000000
--- a/arch/x86/include/asm/trace/mpx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM mpx
-
-#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_MPX_H
-
-#include <linux/tracepoint.h>
-
-#ifdef CONFIG_X86_INTEL_MPX
-
-TRACE_EVENT(mpx_bounds_register_exception,
-
- TP_PROTO(void __user *addr_referenced,
- const struct mpx_bndreg *bndreg),
- TP_ARGS(addr_referenced, bndreg),
-
- TP_STRUCT__entry(
- __field(void __user *, addr_referenced)
- __field(u64, lower_bound)
- __field(u64, upper_bound)
- ),
-
- TP_fast_assign(
- __entry->addr_referenced = addr_referenced;
- __entry->lower_bound = bndreg->lower_bound;
- __entry->upper_bound = bndreg->upper_bound;
- ),
- /*
- * Note that we are printing out the '~' of the upper
- * bounds register here. It is actually stored in its
- * one's complement form so that its 'init' state
- * corresponds to all 0's. But, that looks like
- * gibberish when printed out, so print out the 1's
- * complement instead of the actual value here. Note
- * though that you still need to specify filters for the
- * actual value, not the displayed one.
- */
- TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx",
- __entry->addr_referenced,
- __entry->lower_bound,
- ~__entry->upper_bound
- )
-);
-
-TRACE_EVENT(bounds_exception_mpx,
-
- TP_PROTO(const struct mpx_bndcsr *bndcsr),
- TP_ARGS(bndcsr),
-
- TP_STRUCT__entry(
- __field(u64, bndcfgu)
- __field(u64, bndstatus)
- ),
-
- TP_fast_assign(
- /* need to get rid of the 'const' on bndcsr */
- __entry->bndcfgu = (u64)bndcsr->bndcfgu;
- __entry->bndstatus = (u64)bndcsr->bndstatus;
- ),
-
- TP_printk("bndcfgu:0x%llx bndstatus:0x%llx",
- __entry->bndcfgu,
- __entry->bndstatus)
-);
-
-DECLARE_EVENT_CLASS(mpx_range_trace,
-
- TP_PROTO(unsigned long start,
- unsigned long end),
- TP_ARGS(start, end),
-
- TP_STRUCT__entry(
- __field(unsigned long, start)
- __field(unsigned long, end)
- ),
-
- TP_fast_assign(
- __entry->start = start;
- __entry->end = end;
- ),
-
- TP_printk("[0x%p:0x%p]",
- (void *)__entry->start,
- (void *)__entry->end
- )
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap,
- TP_PROTO(unsigned long start, unsigned long end),
- TP_ARGS(start, end)
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_search,
- TP_PROTO(unsigned long start, unsigned long end),
- TP_ARGS(start, end)
-);
-
-TRACE_EVENT(mpx_new_bounds_table,
-
- TP_PROTO(unsigned long table_vaddr),
- TP_ARGS(table_vaddr),
-
- TP_STRUCT__entry(
- __field(unsigned long, table_vaddr)
- ),
-
- TP_fast_assign(
- __entry->table_vaddr = table_vaddr;
- ),
-
- TP_printk("table vaddr:%p", (void *)__entry->table_vaddr)
-);
-
-#else
-
-/*
- * This gets used outside of MPX-specific code, so we need a stub.
- */
-static inline
-void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
-{
-}
-
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH asm/trace/
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE mpx
-#endif /* _TRACE_MPX_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index b25e633033c3..ffa0dc8a535e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -69,6 +69,9 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
+#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2);
+#endif
dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 9c4435307ff8..61d93f062a36 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -444,8 +444,10 @@ __pu_label: \
({ \
int __gu_err; \
__inttype(*(ptr)) __gu_val; \
+ __typeof__(ptr) __gu_ptr = (ptr); \
+ __typeof__(size) __gu_size = (size); \
__uaccess_begin_nospec(); \
- __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
+ __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT); \
__uaccess_end(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
__builtin_expect(__gu_err, 0); \
@@ -732,5 +734,28 @@ do { \
if (unlikely(__gu_err)) goto err_label; \
} while (0)
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) \
+ while (len >= sizeof(type)) { \
+ unsafe_put_user(*(type *)src,(type __user *)dst,label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
+
+#define unsafe_copy_to_user(_dst,_src,_len,label) \
+do { \
+ char __user *__ucu_dst = (_dst); \
+ const char *__ucu_src = (_src); \
+ size_t __ucu_len = (_len); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \
+} while (0)
+
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h
index db43f2a0d92c..aeed98c3c9e1 100644
--- a/arch/x86/include/asm/umip.h
+++ b/arch/x86/include/asm/umip.h
@@ -4,9 +4,9 @@
#include <linux/types.h>
#include <asm/ptrace.h>
-#ifdef CONFIG_X86_INTEL_UMIP
+#ifdef CONFIG_X86_UMIP
bool fixup_umip_exception(struct pt_regs *regs);
#else
static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; }
-#endif /* CONFIG_X86_INTEL_UMIP */
+#endif /* CONFIG_X86_UMIP */
#endif /* _ASM_X86_UMIP_H */
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 097589753fec..a7dd080749ce 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -5,12 +5,6 @@
#include <uapi/asm/unistd.h>
-# ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
-# else
-# define __SYSCALL_MASK (~0)
-# endif
-
# ifdef CONFIG_X86_32
# include <asm/unistd_32.h>
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 0bcdb1279361..f5e2eb12cb71 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -86,6 +86,14 @@
UNWIND_HINT sp_offset=\sp_offset
.endm
+.macro UNWIND_HINT_SAVE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
+.endm
+
+.macro UNWIND_HINT_RESTORE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+.endm
+
#else /* !__ASSEMBLY__ */
#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 6e7caf65fa40..389174eaec79 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -138,7 +138,7 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
-extern void uv_bios_init(void);
+extern int uv_bios_init(void);
extern unsigned long sn_rtc_cycles_per_second;
extern int uv_type;
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e60c45fd3679..45ea95ce79b4 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,13 +12,26 @@ struct mm_struct;
#ifdef CONFIG_X86_UV
#include <linux/efi.h>
+#define UV_PROC_NODE "sgi_uv"
+
+static inline int uv(int uvtype)
+{
+ /* uv(0) is "any" */
+ if (uvtype >= 0 && uvtype <= 30)
+ return 1 << uvtype;
+ return 1;
+}
+
+extern unsigned long uv_systab_phys;
+
extern enum uv_system_type get_uv_system_type(void);
static inline bool is_early_uv_system(void)
{
- return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+ return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;
}
extern int is_uv_system(void);
-extern int is_uv_hubless(void);
+extern int is_uv_hubbed(int uvtype);
+extern int is_uv_hubless(int uvtype);
extern void uv_cpu_init(void);
extern void uv_nmi_init(void);
extern void uv_system_init(void);
@@ -30,7 +43,8 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
static inline bool is_early_uv_system(void) { return 0; }
static inline int is_uv_system(void) { return 0; }
-static inline int is_uv_hubless(void) { return 0; }
+static inline int is_uv_hubbed(int uv) { return 0; }
+static inline int is_uv_hubless(int uv) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
static inline const struct cpumask *
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 44cf6d6deb7a..950cd1395d5d 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -19,6 +19,7 @@
#include <linux/topology.h>
#include <asm/types.h>
#include <asm/percpu.h>
+#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/bios.h>
#include <asm/irq_vectors.h>
@@ -243,83 +244,61 @@ static inline int uv_hub_info_check(int version)
#define UV4_HUB_REVISION_BASE 7
#define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */
-#ifdef UV1_HUB_IS_SUPPORTED
+/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */
static inline int is_uv1_hub(void)
{
- return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE;
-}
+#ifdef UV1_HUB_IS_SUPPORTED
+ return is_uv_hubbed(uv(1));
#else
-static inline int is_uv1_hub(void)
-{
return 0;
-}
#endif
+}
-#ifdef UV2_HUB_IS_SUPPORTED
static inline int is_uv2_hub(void)
{
- return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
- (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
-}
+#ifdef UV2_HUB_IS_SUPPORTED
+ return is_uv_hubbed(uv(2));
#else
-static inline int is_uv2_hub(void)
-{
return 0;
-}
#endif
+}
-#ifdef UV3_HUB_IS_SUPPORTED
static inline int is_uv3_hub(void)
{
- return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) &&
- (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE));
-}
+#ifdef UV3_HUB_IS_SUPPORTED
+ return is_uv_hubbed(uv(3));
#else
-static inline int is_uv3_hub(void)
-{
return 0;
-}
#endif
+}
/* First test "is UV4A", then "is UV4" */
-#ifdef UV4A_HUB_IS_SUPPORTED
-static inline int is_uv4a_hub(void)
-{
- return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE);
-}
-#else
static inline int is_uv4a_hub(void)
{
+#ifdef UV4A_HUB_IS_SUPPORTED
+ if (is_uv_hubbed(uv(4)))
+ return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE);
+#endif
return 0;
}
-#endif
-#ifdef UV4_HUB_IS_SUPPORTED
static inline int is_uv4_hub(void)
{
- return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE;
-}
+#ifdef UV4_HUB_IS_SUPPORTED
+ return is_uv_hubbed(uv(4));
#else
-static inline int is_uv4_hub(void)
-{
return 0;
-}
#endif
+}
static inline int is_uvx_hub(void)
{
- if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE)
- return uv_hub_info->hub_revision;
-
- return 0;
+ return (is_uv_hubbed(-2) >= uv(2));
}
static inline int is_uv_hub(void)
{
-#ifdef UV1_HUB_IS_SUPPORTED
- return uv_hub_info->hub_revision;
-#endif
- return is_uvx_hub();
+ return is_uv1_hub() || is_uvx_hub();
}
union uvh_apicid {
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 230474e2ddb5..bbcdc7b8f963 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -21,6 +21,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_pvclock_page;
long sym_hvclock_page;
+ long sym_timens_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index ba71a63cdac4..6ee1f7dba34b 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -21,6 +21,7 @@
#include <clocksource/hyperv_timer.h>
#define __vdso_data (VVAR(_vdso_data))
+#define __timens_vdso_data (TIMENS(_vdso_data))
#define VDSO_HAS_TIME 1
@@ -51,11 +52,18 @@ extern struct pvclock_vsyscall_time_info pvclock_page
__attribute__((visibility("hidden")));
#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
+#ifdef CONFIG_HYPERV_TIMER
extern struct ms_hyperv_tsc_page hvclock_page
__attribute__((visibility("hidden")));
#endif
+#ifdef CONFIG_TIME_NS
+static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+{
+ return __timens_vdso_data;
+}
+#endif
+
#ifndef BUILD_VDSO32
static __always_inline
@@ -96,8 +104,6 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
#else
-#define VDSO_HAS_32BIT_FALLBACK 1
-
static __always_inline
long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
{
@@ -228,7 +234,7 @@ static u64 vread_pvclock(void)
}
#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
+#ifdef CONFIG_HYPERV_TIMER
static u64 vread_hvclock(void)
{
return hv_read_tsc_page(&hvclock_page);
@@ -251,7 +257,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode)
return vread_pvclock();
}
#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
+#ifdef CONFIG_HYPERV_TIMER
if (clock_mode == VCLOCK_HVCLOCK) {
barrier();
return vread_hvclock();
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
new file mode 100644
index 000000000000..29837740b520
--- /dev/null
+++ b/arch/x86/include/asm/vmalloc.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_VMALLOC_H
+#define _ASM_X86_VMALLOC_H
+
+#include <asm/pgtable_areas.h>
+
+#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
new file mode 100644
index 000000000000..ac9fc51e2b18
--- /dev/null
+++ b/arch/x86/include/asm/vmware.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+#ifndef _ASM_X86_VMWARE_H
+#define _ASM_X86_VMWARE_H
+
+#include <asm/cpufeatures.h>
+#include <asm/alternative.h>
+#include <linux/stringify.h>
+
+/*
+ * The hypercall definitions differ in the low word of the %edx argument
+ * in the following way: the old port base interface uses the port
+ * number to distinguish between high- and low bandwidth versions.
+ *
+ * The new vmcall interface instead uses a set of flags to select
+ * bandwidth mode and transfer direction. The flags should be loaded
+ * into %dx by any user and are automatically replaced by the port
+ * number if the VMWARE_HYPERVISOR_PORT method is used.
+ *
+ * In short, new driver code should strictly use the new definition of
+ * %dx content.
+ */
+
+/* Old port-based version */
+#define VMWARE_HYPERVISOR_PORT 0x5658
+#define VMWARE_HYPERVISOR_PORT_HB 0x5659
+
+/* Current vmcall / vmmcall version */
+#define VMWARE_HYPERVISOR_HB BIT(0)
+#define VMWARE_HYPERVISOR_OUT BIT(1)
+
+/* The low bandwidth call. The low word of edx is presumed clear. */
+#define VMWARE_HYPERCALL \
+ ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \
+ "inl (%%dx), %%eax", \
+ "vmcall", X86_FEATURE_VMCALL, \
+ "vmmcall", X86_FEATURE_VMW_VMMCALL)
+
+/*
+ * The high bandwidth out call. The low word of edx is presumed to have the
+ * HB and OUT bits set.
+ */
+#define VMWARE_HYPERCALL_HB_OUT \
+ ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+ "rep outsb", \
+ "vmcall", X86_FEATURE_VMCALL, \
+ "vmmcall", X86_FEATURE_VMW_VMMCALL)
+
+/*
+ * The high bandwidth in call. The low word of edx is presumed to have the
+ * HB bit set.
+ */
+#define VMWARE_HYPERCALL_HB_IN \
+ ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+ "rep insb", \
+ "vmcall", X86_FEATURE_VMCALL, \
+ "vmmcall", X86_FEATURE_VMW_VMMCALL)
+#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a39136b0d509..2a85287b3685 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -15,66 +15,70 @@
#include <linux/bitops.h>
#include <linux/types.h>
#include <uapi/asm/vmx.h>
+#include <asm/vmxfeatures.h>
+
+#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f)
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
*/
-#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
-#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
-#define CPU_BASED_HLT_EXITING 0x00000080
-#define CPU_BASED_INVLPG_EXITING 0x00000200
-#define CPU_BASED_MWAIT_EXITING 0x00000400
-#define CPU_BASED_RDPMC_EXITING 0x00000800
-#define CPU_BASED_RDTSC_EXITING 0x00001000
-#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
-#define CPU_BASED_CR3_STORE_EXITING 0x00010000
-#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
-#define CPU_BASED_CR8_STORE_EXITING 0x00100000
-#define CPU_BASED_TPR_SHADOW 0x00200000
-#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
-#define CPU_BASED_MOV_DR_EXITING 0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
-#define CPU_BASED_USE_IO_BITMAPS 0x02000000
-#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000
-#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
-#define CPU_BASED_MONITOR_EXITING 0x20000000
-#define CPU_BASED_PAUSE_EXITING 0x40000000
-#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
+#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(INTR_WINDOW_EXITING)
+#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(USE_TSC_OFFSETTING)
+#define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING)
+#define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING)
+#define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING)
+#define CPU_BASED_RDPMC_EXITING VMCS_CONTROL_BIT(RDPMC_EXITING)
+#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING)
+#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING)
+#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING)
+#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING)
+#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING)
+#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR)
+#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(NMI_WINDOW_EXITING)
+#define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING)
+#define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING)
+#define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS)
+#define CPU_BASED_MONITOR_TRAP_FLAG VMCS_CONTROL_BIT(MONITOR_TRAP_FLAG)
+#define CPU_BASED_USE_MSR_BITMAPS VMCS_CONTROL_BIT(USE_MSR_BITMAPS)
+#define CPU_BASED_MONITOR_EXITING VMCS_CONTROL_BIT(MONITOR_EXITING)
+#define CPU_BASED_PAUSE_EXITING VMCS_CONTROL_BIT(PAUSE_EXITING)
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS VMCS_CONTROL_BIT(SEC_CONTROLS)
#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
/*
* Definitions of Secondary Processor-Based VM-Execution Controls.
*/
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
-#define SECONDARY_EXEC_DESC 0x00000004
-#define SECONDARY_EXEC_RDTSCP 0x00000008
-#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
-#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
-#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
-#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
-#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
-#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
-#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
-#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
-#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
-#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
-#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
-#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000
-#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
-#define SECONDARY_EXEC_ENABLE_PML 0x00020000
-#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000
-#define SECONDARY_EXEC_XSAVES 0x00100000
-#define SECONDARY_EXEC_PT_USE_GPA 0x01000000
-#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000
-#define SECONDARY_EXEC_TSC_SCALING 0x02000000
-
-#define PIN_BASED_EXT_INTR_MASK 0x00000001
-#define PIN_BASED_NMI_EXITING 0x00000008
-#define PIN_BASED_VIRTUAL_NMIS 0x00000020
-#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
-#define PIN_BASED_POSTED_INTR 0x00000080
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES)
+#define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT)
+#define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING)
+#define SECONDARY_EXEC_RDTSCP VMCS_CONTROL_BIT(RDTSCP)
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC)
+#define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID)
+#define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING)
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST VMCS_CONTROL_BIT(UNRESTRICTED_GUEST)
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT VMCS_CONTROL_BIT(APIC_REGISTER_VIRT)
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY VMCS_CONTROL_BIT(VIRT_INTR_DELIVERY)
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING VMCS_CONTROL_BIT(PAUSE_LOOP_EXITING)
+#define SECONDARY_EXEC_RDRAND_EXITING VMCS_CONTROL_BIT(RDRAND_EXITING)
+#define SECONDARY_EXEC_ENABLE_INVPCID VMCS_CONTROL_BIT(INVPCID)
+#define SECONDARY_EXEC_ENABLE_VMFUNC VMCS_CONTROL_BIT(VMFUNC)
+#define SECONDARY_EXEC_SHADOW_VMCS VMCS_CONTROL_BIT(SHADOW_VMCS)
+#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING)
+#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
+#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
+#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
+#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES)
+#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
+#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
+#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000
+
+#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
+#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
+#define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS)
+#define PIN_BASED_VMX_PREEMPTION_TIMER VMCS_CONTROL_BIT(PREEMPTION_TIMER)
+#define PIN_BASED_POSTED_INTR VMCS_CONTROL_BIT(POSTED_INTR)
#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
@@ -110,9 +114,12 @@
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
#define VMX_MISC_ZERO_LEN_INS 0x40000000
+#define VMX_MISC_MSR_LIST_MULTIPLIER 512
/* VMFUNC functions */
-#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
+#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28)
+
+#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING)
#define VMFUNC_EPTP_ENTRIES 512
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
@@ -562,6 +569,20 @@ enum vm_instruction_error_number {
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
};
+/*
+ * VM-instruction errors that can be encountered on VM-Enter, used to trace
+ * nested VM-Enter failures reported by hardware. Errors unique to VM-Enter
+ * from a SMI Transfer Monitor are not included as things have gone seriously
+ * sideways if we get one of those...
+ */
+#define VMX_VMENTER_INSTRUCTION_ERRORS \
+ { VMXERR_VMLAUNCH_NONCLEAR_VMCS, "VMLAUNCH_NONCLEAR_VMCS" }, \
+ { VMXERR_VMRESUME_NONLAUNCHED_VMCS, "VMRESUME_NONLAUNCHED_VMCS" }, \
+ { VMXERR_VMRESUME_AFTER_VMXOFF, "VMRESUME_AFTER_VMXOFF" }, \
+ { VMXERR_ENTRY_INVALID_CONTROL_FIELD, "VMENTRY_INVALID_CONTROL_FIELD" }, \
+ { VMXERR_ENTRY_INVALID_HOST_STATE_FIELD, "VMENTRY_INVALID_HOST_STATE_FIELD" }, \
+ { VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS, "VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" }
+
enum vmx_l1d_flush_state {
VMENTER_L1D_FLUSH_AUTO,
VMENTER_L1D_FLUSH_NEVER,
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
new file mode 100644
index 000000000000..a50e4a0de315
--- /dev/null
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_VMXFEATURES_H
+#define _ASM_X86_VMXFEATURES_H
+
+/*
+ * Defines VMX CPU feature bits
+ */
+#define NVMXINTS 3 /* N 32-bit words worth of info */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name. If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */
+#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* "" VM-Exit on vectored interrupts */
+#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* "" VM-Exit on NMIs */
+#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */
+#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* VMX Preemption Timer */
+#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* Posted Interrupts */
+
+/* EPT/VPID features, scattered to bits 16-23 */
+#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* INVVPID is supported */
+#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */
+#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* EPT Accessed/Dirty bits */
+#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* 1GB EPT pages */
+
+/* Aggregated APIC features 24-27 */
+#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* TPR shadow + virt APIC */
+#define VMX_FEATURE_APICV ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
+
+/* VM-Functions, shifted to bits 28-31 */
+#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */
+
+/* Primary Processor-Based VM-Execution Controls, word 1 */
+#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */
+#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */
+#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */
+#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */
+#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */
+#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* "" VM-Exit on RDPMC */
+#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */
+#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */
+#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */
+#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */
+#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */
+#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
+#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */
+#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */
+#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/
+#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */
+#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */
+#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* "" VM-Exit based on MSR index */
+#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */
+#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */
+#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */
+
+/* Secondary Processor-Based VM-Execution Controls, word 2 */
+#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */
+#define VMX_FEATURE_EPT ( 2*32+ 1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */
+#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* "" VM-Exit on {S,L}*DT instructions */
+#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* "" Enable RDTSCP in guest */
+#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* "" Virtualize X2APIC for the guest */
+#define VMX_FEATURE_VPID ( 2*32+ 5) /* Virtual Processor ID (TLB ASID modifier) */
+#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* "" VM-Exit on WBINVD */
+#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* Allow Big Real Mode and other "invalid" states */
+#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */
+#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */
+#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */
+#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* "" VM-Exit on RDRAND*/
+#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* "" Enable INVPCID in guest */
+#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */
+#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */
+#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
+#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */
+#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */
+#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
+#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
+#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
+#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */
+#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */
+#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
+
+#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 32f5d9a0b90e..183e98e49ab9 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -19,10 +19,10 @@
#ifndef _ASM_X86_VVAR_H
#define _ASM_X86_VVAR_H
-#if defined(__VVAR_KERNEL_LDS)
-
-/* The kernel linker script defines its own magic to put vvars in the
- * right place.
+#ifdef EMIT_VVAR
+/*
+ * EMIT_VVAR() is used by the kernel linker script to put vvars in the
+ * right place. Also, it's used by kernel code to import offsets values.
*/
#define DECLARE_VVAR(offset, type, name) \
EMIT_VVAR(name, offset)
@@ -33,9 +33,12 @@ extern char __vvar_page;
#define DECLARE_VVAR(offset, type, name) \
extern type vvar_ ## name[CS_BASES] \
- __attribute__((visibility("hidden")));
+ __attribute__((visibility("hidden"))); \
+ extern type timens_ ## name[CS_BASES] \
+ __attribute__((visibility("hidden"))); \
#define VVAR(name) (vvar_ ## name)
+#define TIMENS(name) (timens_ ## name)
#define DEFINE_VVAR(type, name) \
type name[CS_BASES] \
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index ac0934189017..96d9cd208610 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -51,12 +51,14 @@ struct x86_init_resources {
* are set up.
* @intr_init: interrupt init code
* @trap_init: platform specific trap setup
+ * @intr_mode_select: interrupt delivery mode selection
* @intr_mode_init: interrupt delivery mode setup
*/
struct x86_init_irqs {
void (*pre_vector_init)(void);
void (*intr_init)(void);
void (*trap_init)(void);
+ void (*intr_mode_select)(void);
void (*intr_mode_init)(void);
};
@@ -134,10 +136,12 @@ struct x86_hyper_init {
/**
* struct x86_init_acpi - x86 ACPI init functions
+ * @set_root_poitner: set RSDP address
* @get_root_pointer: get RSDP address
* @reduced_hw_early_init: hardware reduced platform early init
*/
struct x86_init_acpi {
+ void (*set_root_pointer)(u64 addr);
u64 (*get_root_pointer)(void);
void (*reduced_hw_early_init)(void);
};
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 42e1245af0d8..ff4b52e37e60 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num);
void xen_arch_unregister_cpu(int num);
#endif
-extern void xen_set_iopl_mask(unsigned mask);
-
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 62ca03ef5c65..9139b3e86316 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -379,12 +379,9 @@ struct xen_pmu_arch {
* Prefix forces emulation of some non-trapping instructions.
* Currently only CPUID.
*/
-#ifdef __ASSEMBLY__
-#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
-#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
-#else
-#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
-#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
-#endif
+#include <asm/emulate_prefix.h>
+
+#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
+#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid)
#endif /* _ASM_X86_XEN_INTERFACE_H */
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
index 116777e7f387..63cd41b2e17a 100644
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -21,18 +21,4 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
free_pages((unsigned long) cpu_addr, get_order(size));
}
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
- dma_addr_t dev_addr, unsigned long offset, size_t size,
- enum dma_data_direction dir, unsigned long attrs) { }
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs) { }
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
OpenPOWER on IntegriCloud