diff options
Diffstat (limited to 'arch/x86/include/asm')
56 files changed, 525 insertions, 555 deletions
| diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 3ff577c0b102..cd339b88d5d4 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -7,9 +7,11 @@  # define __ASM_FORM_RAW(x)     x  # define __ASM_FORM_COMMA(x) x,  #else -# define __ASM_FORM(x)	" " #x " " -# define __ASM_FORM_RAW(x)     #x -# define __ASM_FORM_COMMA(x) " " #x "," +#include <linux/stringify.h> + +# define __ASM_FORM(x)	" " __stringify(x) " " +# define __ASM_FORM_RAW(x)     __stringify(x) +# define __ASM_FORM_COMMA(x) " " __stringify(x) ","  #endif  #ifndef __x86_64__ @@ -139,9 +141,6 @@  # define _ASM_EXTABLE_EX(from, to)				\  	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to)			\ -	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) -  # define _ASM_NOKPROBE(entry)					\  	.pushsection "_kprobe_blacklist","aw" ;			\  	_ASM_ALIGN ;						\ @@ -170,9 +169,6 @@  # define _ASM_EXTABLE_EX(from, to)				\  	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to)			\ -	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) -  /* For C file, we already have NOKPROBE_SYMBOL macro */  #endif diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h deleted file mode 100644 index facd374a1bf7..000000000000 --- a/arch/x86/include/asm/calgary.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Derived from include/asm-powerpc/iommu.h - * - * Copyright IBM Corporation, 2006-2007 - * - * Author: Jon Mason <jdmason@us.ibm.com> - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - */ - -#ifndef _ASM_X86_CALGARY_H -#define _ASM_X86_CALGARY_H - -#include <linux/spinlock.h> -#include <linux/device.h> -#include <linux/dma-mapping.h> -#include <linux/timer.h> -#include <asm/types.h> - -struct iommu_table { -	const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */ -	unsigned long  it_base;      /* mapped address of tce table */ -	unsigned long  it_hint;      /* Hint for next alloc */ -	unsigned long *it_map;       /* A simple allocation bitmap for now */ -	void __iomem  *bbar;         /* Bridge BAR */ -	u64	       tar_val;      /* Table Address Register */ -	struct timer_list watchdog_timer; -	spinlock_t     it_lock;      /* Protects it_map */ -	unsigned int   it_size;      /* Size of iommu table in entries */ -	unsigned char  it_busno;     /* Bus number this table belongs to */ -}; - -struct cal_chipset_ops { -	void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev); -	void (*tce_cache_blast)(struct iommu_table *tbl); -	void (*dump_error_regs)(struct iommu_table *tbl); -}; - -#define TCE_TABLE_SIZE_UNSPECIFIED	~0 -#define TCE_TABLE_SIZE_64K		0 -#define TCE_TABLE_SIZE_128K		1 -#define TCE_TABLE_SIZE_256K		2 -#define TCE_TABLE_SIZE_512K		3 -#define TCE_TABLE_SIZE_1M		4 -#define TCE_TABLE_SIZE_2M		5 -#define TCE_TABLE_SIZE_4M		6 -#define TCE_TABLE_SIZE_8M		7 - -extern int use_calgary; - -#ifdef CONFIG_CALGARY_IOMMU -extern int detect_calgary(void); -#else -static inline int detect_calgary(void) { return -ENODEV; } -#endif - -#endif /* _ASM_X86_CALGARY_H */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 8348f7d69fd5..804734058c77 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -65,6 +65,13 @@ enum exception_stack_ordering {  #endif +#ifdef CONFIG_X86_32 +struct doublefault_stack { +	unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)]; +	struct x86_hw_tss tss; +} __aligned(PAGE_SIZE); +#endif +  /*   * cpu_entry_area is a percpu region that contains things needed by the CPU   * and early entry/exit code.  Real types aren't used for all fields here @@ -78,10 +85,19 @@ struct cpu_entry_area {  	/*  	 * The GDT is just below entry_stack and thus serves (on x86_64) as -	 * a a read-only guard page. +	 * a read-only guard page. On 32-bit the GDT must be writeable, so +	 * it needs an extra guard page.  	 */ +#ifdef CONFIG_X86_32 +	char guard_entry_stack[PAGE_SIZE]; +#endif  	struct entry_stack_page entry_stack_page; +#ifdef CONFIG_X86_32 +	char guard_doublefault_stack[PAGE_SIZE]; +	struct doublefault_stack doublefault_stack; +#endif +  	/*  	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because  	 * we need task switches to work, and task switches write to the TSS. @@ -94,7 +110,6 @@ struct cpu_entry_area {  	 */  	struct cea_exception_stacks estacks;  #endif -#ifdef CONFIG_CPU_SUP_INTEL  	/*  	 * Per CPU debug store for Intel performance monitoring. Wastes a  	 * full page at the moment. @@ -105,11 +120,13 @@ struct cpu_entry_area {  	 * Reserve enough fixmap PTEs.  	 */  	struct debug_store_buffers cpu_debug_buffers; -#endif  }; -#define CPU_ENTRY_AREA_SIZE	(sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS) +#define CPU_ENTRY_AREA_SIZE		(sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_ARRAY_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS) + +/* Total size includes the readonly IDT mapping page as well: */ +#define CPU_ENTRY_AREA_TOTAL_SIZE	(CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)  DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);  DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); @@ -117,13 +134,14 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);  extern void setup_cpu_entry_areas(void);  extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); +/* Single page reserved for the readonly IDT mapping: */  #define	CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE  #define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)  #define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)  #define CPU_ENTRY_AREA_MAP_SIZE			\ -	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) +	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)  extern struct cpu_entry_area *get_cpu_entry_area(int cpu); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0652d3eed9bd..e9b62498fe75 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -292,6 +292,7 @@  #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */  #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */  #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU		(13*32+ 4) /* Read processor register at user level */  #define X86_FEATURE_WBNOINVD		(13*32+ 9) /* WBNOINVD instruction */  #define X86_FEATURE_AMD_IBPB		(13*32+12) /* "" Indirect Branch Prediction Barrier */  #define X86_FEATURE_AMD_IBRS		(13*32+14) /* "" Indirect Branch Restricted Speculation */ @@ -399,5 +400,7 @@  #define X86_BUG_MDS			X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */  #define X86_BUG_MSBDS_ONLY		X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */  #define X86_BUG_SWAPGS			X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA			X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT		X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */  #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index 0acf5ee45a21..f58de66091e5 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -2,10 +2,17 @@  #ifndef _ASM_X86_CRASH_H  #define _ASM_X86_CRASH_H +struct kimage; +  int crash_load_segments(struct kimage *image); -int crash_copy_backup_region(struct kimage *image);  int crash_setup_memmap_entries(struct kimage *image,  		struct boot_params *params);  void crash_smp_send_stop(void); +#ifdef CONFIG_KEXEC_CORE +void __init crash_reserve_low_1M(void); +#else +static inline void __init crash_reserve_low_1M(void) { } +#endif +  #endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index a8f6c809d9b1..5e12c63b47aa 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -6,9 +6,6 @@ struct dev_archdata {  #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)  	void *iommu; /* hook for IOMMU specific extension */  #endif -#ifdef CONFIG_STA2X11 -	bool is_sta2x11; -#endif  };  #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index a5ea841cc6d2..8e1d0bb46361 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -22,7 +22,7 @@  # define DISABLE_SMAP	(1<<(X86_FEATURE_SMAP & 31))  #endif -#ifdef CONFIG_X86_INTEL_UMIP +#ifdef CONFIG_X86_UMIP  # define DISABLE_UMIP	0  #else  # define DISABLE_UMIP	(1<<(X86_FEATURE_UMIP & 31)) diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h deleted file mode 100644 index 1a19251eaac9..000000000000 --- a/arch/x86/include/asm/dma-direct.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_DMA_DIRECT_H -#define ASM_X86_DMA_DIRECT_H 1 - -bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); - -#endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h new file mode 100644 index 000000000000..af9a14ac8962 --- /dev/null +++ b/arch/x86/include/asm/doublefault.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_DOUBLEFAULT_H +#define _ASM_X86_DOUBLEFAULT_H + +#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT) +extern void doublefault_init_cpu_tss(void); +#else +static inline void doublefault_init_cpu_tss(void) +{ +} +#endif + +#endif /* _ASM_X86_DOUBLEFAULT_H */ diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h index c3aa4b5e49e2..314f75d886d0 100644 --- a/arch/x86/include/asm/e820/types.h +++ b/arch/x86/include/asm/e820/types.h @@ -29,6 +29,14 @@ enum e820_type {  	E820_TYPE_PRAM		= 12,  	/* +	 * Special-purpose memory is indicated to the system via the +	 * EFI_MEMORY_SP attribute. Define an e820 translation of this +	 * memory type for the purpose of reserving this range and +	 * marking it with the IORES_DESC_SOFT_RESERVED designation. +	 */ +	E820_TYPE_SOFT_RESERVED	= 0xefffffff, + +	/*  	 * Reserved RAM used by the kernel itself if  	 * CONFIG_INTEL_TXT=y is enabled, memory of this type  	 * will be included in the S3 integrity calculation diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 43a82e59c59d..d028e9acdf1c 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -140,7 +140,6 @@ extern void efi_delete_dummy_variable(void);  extern void efi_switch_mm(struct mm_struct *mm);  extern void efi_recover_from_page_fault(unsigned long phys_addr);  extern void efi_free_boot_services(void); -extern void efi_reserve_boot_services(void);  struct efi_setup_data {  	u64 fw_vendor; @@ -244,6 +243,8 @@ static inline bool efi_is_64bit(void)  extern bool efi_reboot_required(void);  extern bool efi_is_table_address(unsigned long phys_addr); +extern void efi_find_mirror(void); +extern void efi_reserve_boot_services(void);  #else  static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}  static inline bool efi_reboot_required(void) @@ -254,6 +255,20 @@ static inline  bool efi_is_table_address(unsigned long phys_addr)  {  	return false;  } +static inline void efi_find_mirror(void) +{ +} +static inline void efi_reserve_boot_services(void) +{ +}  #endif /* CONFIG_EFI */ +#ifdef CONFIG_EFI_FAKE_MEMMAP +extern void __init efi_fake_memmap_early(void); +#else +static inline void efi_fake_memmap_early(void) +{ +} +#endif +  #endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h new file mode 100644 index 000000000000..70f5b98a5286 --- /dev/null +++ b/arch/x86/include/asm/emulate_prefix.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EMULATE_PREFIX_H +#define _ASM_X86_EMULATE_PREFIX_H + +/* + * Virt escape sequences to trigger instruction emulation; + * ideally these would decode to 'whole' instruction and not destroy + * the instruction stream; sadly this is not true for the 'kvm' one :/ + */ + +#define __XEN_EMULATE_PREFIX  0x0f,0x0b,0x78,0x65,0x6e  /* ud2 ; .ascii "xen" */ +#define __KVM_EMULATE_PREFIX  0x0f,0x0b,0x6b,0x76,0x6d	/* ud2 ; .ascii "kvm" */ + +#endif diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 0c47aa82e2e2..28183ee3cc42 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -156,7 +156,7 @@ extern pte_t *kmap_pte;  extern pte_t *pkmap_page_table;  void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,  		       phys_addr_t phys, pgprot_t flags);  #ifndef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 4c95c365058a..44c48e34d799 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -509,7 +509,7 @@ static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)  static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)  { -	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; +	return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;  }  /* diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index c38a66661576..c2a7458f912c 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -28,6 +28,19 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)  	return addr;  } +/* + * When a ftrace registered caller is tracing a function that is + * also set by a register_ftrace_direct() call, it needs to be + * differentiated in the ftrace_caller trampoline. To do this, we + * place the direct caller in the ORIG_AX part of pt_regs. This + * tells the ftrace_caller that there's a direct caller. + */ +static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ +	/* Emulate a call */ +	regs->orig_ax = addr; +} +  #ifdef CONFIG_DYNAMIC_FTRACE  struct dyn_arch_ftrace { diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 7741e211f7f5..5f10f7f2098d 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -86,6 +86,8 @@  #define HV_X64_ACCESS_FREQUENCY_MSRS		BIT(11)  /* AccessReenlightenmentControls privilege */  #define HV_X64_ACCESS_REENLIGHTENMENT		BIT(13) +/* AccessTscInvariantControls privilege */ +#define HV_X64_ACCESS_TSC_INVARIANT		BIT(15)  /*   * Feature identification: indicates which flags were specified at partition @@ -278,6 +280,9 @@  #define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107  #define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108 +/* TSC invariant control */ +#define HV_X64_MSR_TSC_INVARIANT_CONTROL	0x40000118 +  /*   * Declare the MSR used to setup pages used to communicate with the hypervisor.   */ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 154f27be8bfc..5c1ae3eff9d4 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -45,6 +45,7 @@ struct insn {  		struct insn_field immediate2;	/* for 64bit imm or seg16 */  	}; +	int	emulate_prefix_size;  	insn_attr_t attr;  	unsigned char opnd_bytes;  	unsigned char addr_bytes; @@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn)  	return (insn->vex_prefix.nbytes == 4);  } +static inline int insn_has_emulate_prefix(struct insn *insn) +{ +	return !!insn->emulate_prefix_size; +} +  /* Ensure this instruction is decoded completely */  static inline int insn_complete(struct insn *insn)  { diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 6bed97ff6db2..9997521fc5cd 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -180,8 +180,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)   * The default ioremap() behavior is non-cached; if you need something   * else, you probably want one of the following.   */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -#define ioremap_nocache ioremap_nocache  extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);  #define ioremap_uc ioremap_uc  extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); @@ -205,10 +203,7 @@ extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long   * If the area you are trying to map is a PCI BAR you should have a   * look at pci_iomap().   */ -static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) -{ -	return ioremap_nocache(offset, size); -} +void __iomem *ioremap(resource_size_t offset, unsigned long size);  #define ioremap ioremap  extern void iounmap(volatile void __iomem *addr); diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h new file mode 100644 index 000000000000..02c6ef8f7667 --- /dev/null +++ b/arch/x86/include/asm/io_bitmap.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IOBITMAP_H +#define _ASM_X86_IOBITMAP_H + +#include <linux/refcount.h> +#include <asm/processor.h> + +struct io_bitmap { +	u64		sequence; +	refcount_t	refcnt; +	/* The maximum number of bytes to copy so all zero bits are covered */ +	unsigned int	max; +	unsigned long	bitmap[IO_BITMAP_LONGS]; +}; + +struct task_struct; + +#ifdef CONFIG_X86_IOPL_IOPERM +void io_bitmap_share(struct task_struct *tsk); +void io_bitmap_exit(void); + +void tss_update_io_bitmap(void); +#else +static inline void io_bitmap_share(struct task_struct *tsk) { } +static inline void io_bitmap_exit(void) { } +static inline void tss_update_io_bitmap(void) { } +#endif + +#endif diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index b91623d521d9..bf1ed2ddc74b 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,10 +2,28 @@  #ifndef _ASM_X86_IOMMU_H  #define _ASM_X86_IOMMU_H +#include <linux/acpi.h> + +#include <asm/e820/api.h> +  extern int force_iommu, no_iommu;  extern int iommu_detected;  /* 10 seconds */  #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) +static inline int __init +arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) +{ +	u64 start = rmrr->base_address; +	u64 end = rmrr->end_address + 1; + +	if (e820__mapped_all(start, end, E820_TYPE_RESERVED)) +		return 0; + +	pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n", +	       start, end - 1); +	return -EINVAL; +} +  #endif /* _ASM_X86_IOMMU_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 5e7d6b46de97..6802c59e8252 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -66,10 +66,6 @@ struct kimage;  # define KEXEC_ARCH KEXEC_ARCH_X86_64  #endif -/* Memory to backup during crash kdump */ -#define KEXEC_BACKUP_SRC_START	(0UL) -#define KEXEC_BACKUP_SRC_END	(640 * 1024UL - 1)	/* 640K */ -  /*   * This function is responsible for capturing register states if coming   * via panic otherwise just fix up the ss and sp if coming via kernel @@ -154,12 +150,6 @@ struct kimage_arch {  	pud_t *pud;  	pmd_t *pmd;  	pte_t *pte; -	/* Details of backup region */ -	unsigned long backup_src_start; -	unsigned long backup_src_sz; - -	/* Physical address of backup segment */ -	unsigned long backup_load_addr;  	/* Core ELF header buffer */  	void *elf_headers; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 24d6598dea29..b79cd6aa4075 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -156,10 +156,8 @@ enum kvm_reg {  	VCPU_REGS_R15 = __VCPU_REGS_R15,  #endif  	VCPU_REGS_RIP, -	NR_VCPU_REGS -}; +	NR_VCPU_REGS, -enum kvm_reg_ex {  	VCPU_EXREG_PDPTR = NR_VCPU_REGS,  	VCPU_EXREG_CR3,  	VCPU_EXREG_RFLAGS, @@ -312,9 +310,12 @@ struct kvm_rmap_head {  struct kvm_mmu_page {  	struct list_head link;  	struct hlist_node hash_link; +	struct list_head lpage_disallowed_link; +  	bool unsync;  	u8 mmu_valid_gen;  	bool mmio_cached; +	bool lpage_disallowed; /* Can't be replaced by an equiv large page */  	/*  	 * The following two entries are used to key the shadow page in the @@ -451,6 +452,11 @@ struct kvm_pmc {  	u64 eventsel;  	struct perf_event *perf_event;  	struct kvm_vcpu *vcpu; +	/* +	 * eventsel value for general purpose counters, +	 * ctrl value for fixed counters. +	 */ +	u64 current_config;  };  struct kvm_pmu { @@ -469,7 +475,21 @@ struct kvm_pmu {  	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];  	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];  	struct irq_work irq_work; -	u64 reprogram_pmi; +	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); +	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); +	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); + +	/* +	 * The gate to release perf_events not marked in +	 * pmc_in_use only once in a vcpu time slice. +	 */ +	bool need_cleanup; + +	/* +	 * The total number of programmed perf_events and it helps to avoid +	 * redundant check before cleanup if guest don't use vPMU at all. +	 */ +	u8 event_count;  };  struct kvm_pmu_ops; @@ -562,6 +582,7 @@ struct kvm_vcpu_arch {  	u64 smbase;  	u64 smi_count;  	bool tpr_access_reporting; +	bool xsaves_enabled;  	u64 ia32_xss;  	u64 microcode_version;  	u64 arch_capabilities; @@ -859,6 +880,7 @@ struct kvm_arch {  	 */  	struct list_head active_mmu_pages;  	struct list_head zapped_obsolete_pages; +	struct list_head lpage_disallowed_mmu_pages;  	struct kvm_page_track_notifier_node mmu_sp_tracker;  	struct kvm_page_track_notifier_head track_notifier_head; @@ -933,6 +955,7 @@ struct kvm_arch {  	bool exception_payload_enabled;  	struct kvm_pmu_event_filter *pmu_event_filter; +	struct task_struct *nx_lpage_recovery_thread;  };  struct kvm_vm_stat { @@ -946,6 +969,7 @@ struct kvm_vm_stat {  	ulong mmu_unsync;  	ulong remote_tlb_flush;  	ulong lpages; +	ulong nx_lpage_splits;  	ulong max_mmu_page_hash_collisions;  }; @@ -1035,7 +1059,6 @@ struct kvm_x86_ops {  			    struct kvm_segment *var, int seg);  	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);  	void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); -	void (*decache_cr3)(struct kvm_vcpu *vcpu);  	void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);  	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);  	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -1084,7 +1107,7 @@ struct kvm_x86_ops {  	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);  	void (*enable_irq_window)(struct kvm_vcpu *vcpu);  	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); -	bool (*get_enable_apicv)(struct kvm_vcpu *vcpu); +	bool (*get_enable_apicv)(struct kvm *kvm);  	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);  	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);  	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); @@ -1351,6 +1374,7 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,  void kvm_enable_efer_bits(u64);  bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); +int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);  int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);  int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);  int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); @@ -1571,6 +1595,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);  void kvm_make_mclock_inprogress_request(struct kvm *kvm);  void kvm_make_scan_ioapic_request(struct kvm *kvm); +void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, +				       unsigned long *vcpu_bitmap);  void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,  				     struct kvm_async_pf *work); diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 14caa9d9fb7f..365111789cc6 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -13,10 +13,6 @@  #ifdef __ASSEMBLY__ -#define GLOBAL(name)	\ -	.globl name;	\ -	name: -  #if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)  #define __ALIGN		.p2align 4, 0x90  #define __ALIGN_STR	__stringify(__ALIGN) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 16ae821483c8..5f33924e200f 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -26,12 +26,14 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,  #ifdef CONFIG_PERF_EVENTS +DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key);  DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);  static inline void load_mm_cr4_irqsoff(struct mm_struct *mm)  {  	if (static_branch_unlikely(&rdpmc_always_available_key) || -	    atomic_read(&mm->context.perf_rdpmc_allowed)) +	    (!static_branch_unlikely(&rdpmc_never_available_key) && +	     atomic_read(&mm->context.perf_rdpmc_allowed)))  		cr4_set_bits_irqsoff(X86_CR4_PCE);  	else  		cr4_clear_bits_irqsoff(X86_CR4_PCE); diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 7948a17febb4..c215d2762488 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -15,6 +15,8 @@ struct mod_arch_specific {  #ifdef CONFIG_X86_64  /* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M486SX +#define MODULE_PROC_FAMILY "486SX "  #elif defined CONFIG_M486  #define MODULE_PROC_FAMILY "486 "  #elif defined CONFIG_M586 diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f4138aeb4280..6b79515abb82 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -219,6 +219,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)  void __init hyperv_init(void);  void hyperv_setup_mmu_ops(void);  void *hv_alloc_hyperv_page(void); +void *hv_alloc_hyperv_zeroed_page(void);  void hv_free_hyperv_page(unsigned long addr);  void hyperv_reenlightenment_intr(struct pt_regs *regs);  void set_hv_tscchange_cb(void (*cb)(void)); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 20ce682a2540..084e98da04a7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -93,6 +93,18 @@  						  * Microarchitectural Data  						  * Sampling (MDS) vulnerabilities.  						  */ +#define ARCH_CAP_PSCHANGE_MC_NO		BIT(6)	 /* +						  * The processor is not susceptible to a +						  * machine check error due to modifying the +						  * code page size along with either the +						  * physical address or cache type +						  * without TLB invalidation. +						  */ +#define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */ +#define ARCH_CAP_TAA_NO			BIT(8)	/* +						 * Not susceptible to +						 * TSX Async Abort (TAA) vulnerabilities. +						 */  #define MSR_IA32_FLUSH_CMD		0x0000010b  #define L1D_FLUSH			BIT(0)	/* @@ -103,6 +115,10 @@  #define MSR_IA32_BBL_CR_CTL		0x00000119  #define MSR_IA32_BBL_CR_CTL3		0x0000011e +#define MSR_IA32_TSX_CTRL		0x00000122 +#define TSX_CTRL_RTM_DISABLE		BIT(0)	/* Disable RTM feature */ +#define TSX_CTRL_CPUID_CLEAR		BIT(1)	/* Disable TSX enumeration */ +  #define MSR_IA32_SYSENTER_CS		0x00000174  #define MSR_IA32_SYSENTER_ESP		0x00000175  #define MSR_IA32_SYSENTER_EIP		0x00000176 @@ -393,6 +409,8 @@  #define MSR_AMD_PSTATE_DEF_BASE		0xc0010064  #define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140  #define MSR_AMD64_OSVW_STATUS		0xc0010141 +#define MSR_AMD_PPIN_CTL		0xc00102f0 +#define MSR_AMD_PPIN			0xc00102f1  #define MSR_AMD64_LS_CFG		0xc0011020  #define MSR_AMD64_DC_CFG		0xc0011022  #define MSR_AMD64_BU_CFG2		0xc001102a diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 80bc209c0708..5c24a7b35166 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -314,7 +314,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);  #include <asm/segment.h>  /** - * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability   *   * This uses the otherwise unused and obsolete VERW instruction in   * combination with microcode which triggers a CPU buffer flush when the @@ -337,7 +337,7 @@ static inline void mds_clear_cpu_buffers(void)  }  /** - * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability   *   * Clear CPU buffers if the corresponding static key is enabled   */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 69089d46f128..86e7317eb31f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -294,10 +294,6 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)  {  	PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);  } -static inline void set_iopl_mask(unsigned mask) -{ -	PVOP_VCALL1(cpu.set_iopl_mask, mask); -}  static inline void paravirt_activate_mm(struct mm_struct *prev,  					struct mm_struct *next) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 70b654f3ffe5..84812964d3dd 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -140,8 +140,6 @@ struct pv_cpu_ops {  	void (*load_sp0)(unsigned long sp0); -	void (*set_iopl_mask)(unsigned mask); -  	void (*wbinvd)(void);  	/* cpuid emulation, mostly so that caps bits can be disabled */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index e662f987dfa2..90d0731fdcb6 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -12,8 +12,6 @@  #include <asm/pat.h>  #include <asm/x86_init.h> -#ifdef __KERNEL__ -  struct pci_sysdata {  	int		domain;		/* PCI domain */  	int		node;		/* NUMA node */ @@ -118,11 +116,6 @@ void native_restore_msi_irqs(struct pci_dev *dev);  #define native_setup_msi_irqs		NULL  #define native_teardown_msi_irq		NULL  #endif -#endif  /* __KERNEL__ */ - -#ifdef CONFIG_X86_64 -#include <asm/pci_64.h> -#endif  /* generic pci stuff */  #include <asm-generic/pci.h> diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h deleted file mode 100644 index f5411de0ae11..000000000000 --- a/arch/x86/include/asm/pci_64.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PCI_64_H -#define _ASM_X86_PCI_64_H - -#ifdef __KERNEL__ - -#ifdef CONFIG_CALGARY_IOMMU -static inline void *pci_iommu(struct pci_bus *bus) -{ -	struct pci_sysdata *sd = bus->sysdata; -	return sd->iommu; -} - -static inline void set_pci_iommu(struct pci_bus *bus, void *val) -{ -	struct pci_sysdata *sd = bus->sysdata; -	sd->iommu = val; -} -#endif /* CONFIG_CALGARY_IOMMU */ - -extern int (*pci_config_read)(int seg, int bus, int dev, int fn, -			      int reg, int len, u32 *value); -extern int (*pci_config_write)(int seg, int bus, int dev, int fn, -			       int reg, int len, u32 value); - -#endif /* __KERNEL__ */ - -#endif /* _ASM_X86_PCI_64_H */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index e3633795fb22..5afb5e0fe903 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -36,39 +36,41 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)  #define pmd_read_atomic pmd_read_atomic  /* - * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with - * a "*pmdp" dereference done by gcc. Problem is, in certain places - * where pte_offset_map_lock is called, concurrent page faults are + * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by GCC. Problem is, in certain places + * where pte_offset_map_lock() is called, concurrent page faults are   * allowed, if the mmap_sem is hold for reading. An example is mincore   * vs page faults vs MADV_DONTNEED. On the page fault side - * pmd_populate rightfully does a set_64bit, but if we're reading the + * pmd_populate() rightfully does a set_64bit(), but if we're reading the   * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen - * because gcc will not read the 64bit of the pmd atomically. To fix - * this all places running pmd_offset_map_lock() while holding the + * because GCC will not read the 64-bit value of the pmd atomically. + * + * To fix this all places running pte_offset_map_lock() while holding the   * mmap_sem in read mode, shall read the pmdp pointer using this - * function to know if the pmd is null nor not, and in turn to know if - * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd + * function to know if the pmd is null or not, and in turn to know if + * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd   * operations.   * - * Without THP if the mmap_sem is hold for reading, the pmd can only - * transition from null to not null while pmd_read_atomic runs. So + * Without THP if the mmap_sem is held for reading, the pmd can only + * transition from null to not null while pmd_read_atomic() runs. So   * we can always return atomic pmd values with this function.   * - * With THP if the mmap_sem is hold for reading, the pmd can become + * With THP if the mmap_sem is held for reading, the pmd can become   * trans_huge or none or point to a pte (and in turn become "stable") - * at any time under pmd_read_atomic. We could read it really - * atomically here with a atomic64_read for the THP enabled case (and + * at any time under pmd_read_atomic(). We could read it truly + * atomically here with an atomic64_read() for the THP enabled case (and   * it would be a whole lot simpler), but to avoid using cmpxchg8b we   * only return an atomic pmdval if the low part of the pmdval is later - * found stable (i.e. pointing to a pte). And we're returning a none - * pmdval if the low part of the pmd is none. In some cases the high - * and low part of the pmdval returned may not be consistent if THP is - * enabled (the low part may point to previously mapped hugepage, - * while the high part may point to a more recently mapped hugepage), - * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part - * of the pmd to be read atomically to decide if the pmd is unstable - * or not, with the only exception of when the low part of the pmd is - * zero in which case we return a none pmd. + * found to be stable (i.e. pointing to a pte). We are also returning a + * 'none' (zero) pmdval if the low part of the pmd is zero. + * + * In some cases the high and low part of the pmdval returned may not be + * consistent if THP is enabled (the low part may point to previously + * mapped hugepage, while the high part may point to a more recently + * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only + * needs the low part of the pmd to be read atomically to decide if the + * pmd is unstable or not, with the only exception when the low part + * of the pmd is zero, in which case we return a 'none' pmd.   */  static inline pmd_t pmd_read_atomic(pmd_t *pmdp)  { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0bc530c4eb13..ad97dc155195 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1463,6 +1463,12 @@ static inline bool arch_has_pfn_modify_check(void)  	return boot_cpu_has_bug(X86_BUG_L1TF);  } +#define arch_faults_on_old_pte arch_faults_on_old_pte +static inline bool arch_faults_on_old_pte(void) +{ +	return false; +} +  #include <asm-generic/pgtable.h>  #endif	/* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index b0bc0fff5f1f..0416d42e5bdd 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -41,14 +41,15 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */  #endif  /* - * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c - * to avoid include recursion hell + * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE. + * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c + * to avoid include recursion hell.   */ -#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 40) +#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 43) -#define CPU_ENTRY_AREA_BASE						\ -	((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1))   \ -	 & PMD_MASK) +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE	\ +	((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)  #define LDT_BASE_ADDR		\  	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6e0a3b43d027..0340aad3f2fc 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -7,6 +7,7 @@  /* Forward declaration, a strange C thing */  struct task_struct;  struct mm_struct; +struct io_bitmap;  struct vm86;  #include <asm/math_emu.h> @@ -93,7 +94,15 @@ struct cpuinfo_x86 {  	__u32			extended_cpuid_level;  	/* Maximum supported CPUID level, -1=no CPUID: */  	int			cpuid_level; -	__u32			x86_capability[NCAPINTS + NBUGINTS]; +	/* +	 * Align to size of unsigned long because the x86_capability array +	 * is passed to bitops which require the alignment. Use unnamed +	 * union to enforce the array is aligned to size of unsigned long. +	 */ +	union { +		__u32		x86_capability[NCAPINTS + NBUGINTS]; +		unsigned long	x86_capability_alignment; +	};  	char			x86_vendor_id[16];  	char			x86_model_id[64];  	/* in KB - valid for CPUS which support this call: */ @@ -157,7 +166,6 @@ enum cpuid_regs_idx {  extern struct cpuinfo_x86	boot_cpu_data;  extern struct cpuinfo_x86	new_cpu_data; -extern struct x86_hw_tss	doublefault_tss;  extern __u32			cpu_caps_cleared[NCAPINTS + NBUGINTS];  extern __u32			cpu_caps_set[NCAPINTS + NBUGINTS]; @@ -328,10 +336,32 @@ struct x86_hw_tss {   * IO-bitmap sizes:   */  #define IO_BITMAP_BITS			65536 -#define IO_BITMAP_BYTES			(IO_BITMAP_BITS/8) -#define IO_BITMAP_LONGS			(IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET		(offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) -#define INVALID_IO_BITMAP_OFFSET	0x8000 +#define IO_BITMAP_BYTES			(IO_BITMAP_BITS / BITS_PER_BYTE) +#define IO_BITMAP_LONGS			(IO_BITMAP_BYTES / sizeof(long)) + +#define IO_BITMAP_OFFSET_VALID_MAP				\ +	(offsetof(struct tss_struct, io_bitmap.bitmap) -	\ +	 offsetof(struct tss_struct, x86_tss)) + +#define IO_BITMAP_OFFSET_VALID_ALL				\ +	(offsetof(struct tss_struct, io_bitmap.mapall) -	\ +	 offsetof(struct tss_struct, x86_tss)) + +#ifdef CONFIG_X86_IOPL_IOPERM +/* + * sizeof(unsigned long) coming from an extra "long" at the end of the + * iobitmap. The limit is inclusive, i.e. the last valid byte. + */ +# define __KERNEL_TSS_LIMIT	\ +	(IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ +	 sizeof(unsigned long) - 1) +#else +# define __KERNEL_TSS_LIMIT	\ +	(offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) +#endif + +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ +#define IO_BITMAP_OFFSET_INVALID	(__KERNEL_TSS_LIMIT + 1)  struct entry_stack {  	unsigned long		words[64]; @@ -341,13 +371,21 @@ struct entry_stack_page {  	struct entry_stack stack;  } __aligned(PAGE_SIZE); -struct tss_struct { +/* + * All IO bitmap related data stored in the TSS: + */ +struct x86_io_bitmap { +	/* The sequence number of the last active bitmap. */ +	u64			prev_sequence; +  	/* -	 * The fixed hardware portion.  This must not cross a page boundary -	 * at risk of violating the SDM's advice and potentially triggering -	 * errata. +	 * Store the dirty size of the last io bitmap offender. The next +	 * one will have to do the cleanup as the switch out to a non io +	 * bitmap user will just set x86_tss.io_bitmap_base to a value +	 * outside of the TSS limit. So for sane tasks there is no need to +	 * actually touch the io_bitmap at all.  	 */ -	struct x86_hw_tss	x86_tss; +	unsigned int		prev_max;  	/*  	 * The extra 1 is there because the CPU will access an @@ -355,21 +393,28 @@ struct tss_struct {  	 * bitmap. The extra byte must be all 1 bits, and must  	 * be within the limit.  	 */ -	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1]; +	unsigned long		bitmap[IO_BITMAP_LONGS + 1]; + +	/* +	 * Special I/O bitmap to emulate IOPL(3). All bytes zero, +	 * except the additional byte at the end. +	 */ +	unsigned long		mapall[IO_BITMAP_LONGS + 1]; +}; + +struct tss_struct { +	/* +	 * The fixed hardware portion.  This must not cross a page boundary +	 * at risk of violating the SDM's advice and potentially triggering +	 * errata. +	 */ +	struct x86_hw_tss	x86_tss; + +	struct x86_io_bitmap	io_bitmap;  } __aligned(PAGE_SIZE);  DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); -/* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte - */ -#define __KERNEL_TSS_LIMIT	\ -	(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) -  /* Per CPU interrupt stacks */  struct irq_stack {  	char		stack[IRQ_STACK_SIZE]; @@ -480,10 +525,14 @@ struct thread_struct {  	struct vm86		*vm86;  #endif  	/* IO permissions: */ -	unsigned long		*io_bitmap_ptr; -	unsigned long		iopl; -	/* Max allowed port in the bitmap, in bytes: */ -	unsigned		io_bitmap_max; +	struct io_bitmap	*io_bitmap; + +	/* +	 * IOPL. Priviledge level dependent I/O permission which is +	 * emulated via the I/O bitmap to prevent user space from disabling +	 * interrupts. +	 */ +	unsigned long		iopl_emul;  	mm_segment_t		addr_limit; @@ -515,25 +564,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,   */  #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/ -/* - * Set IOPL bits in EFLAGS from given mask - */ -static inline void native_set_iopl_mask(unsigned mask) -{ -#ifdef CONFIG_X86_32 -	unsigned int reg; - -	asm volatile ("pushfl;" -		      "popl %0;" -		      "andl %1, %0;" -		      "orl %2, %0;" -		      "pushl %0;" -		      "popfl" -		      : "=&r" (reg) -		      : "i" (~X86_EFLAGS_IOPL), "r" (mask)); -#endif -} -  static inline void  native_load_sp0(unsigned long sp0)  { @@ -573,7 +603,6 @@ static inline void load_sp0(unsigned long sp0)  	native_load_sp0(sp0);  } -#define set_iopl_mask native_set_iopl_mask  #endif /* CONFIG_PARAVIRT_XXL */  /* Free all resources held by a thread. */ @@ -841,7 +870,6 @@ static inline void spin_lock_prefetch(const void *x)  #define INIT_THREAD  {							  \  	.sp0			= TOP_OF_INIT_STACK,			  \  	.sysenter_cs		= __KERNEL_CS,				  \ -	.io_bitmap_ptr		= NULL,					  \  	.addr_limit		= KERNEL_DS,				  \  } @@ -958,7 +986,7 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)  extern unsigned long arch_align_stack(unsigned long sp);  void free_init_pages(const char *what, unsigned long begin, unsigned long end); -extern void free_kernel_image_pages(void *begin, void *end); +extern void free_kernel_image_pages(const char *what, void *begin, void *end);  void default_idle(void);  #ifdef	CONFIG_XEN @@ -968,7 +996,6 @@ bool xen_set_default_idle(void);  #endif  void stop_this_cpu(void *dummy); -void df_debug(struct pt_regs *regs, long error_code);  void microcode_check(void);  enum l1tf_mitigations { @@ -988,4 +1015,11 @@ enum mds_mitigations {  	MDS_MITIGATION_VMWERV,  }; +enum taa_mitigations { +	TAA_MITIGATION_OFF, +	TAA_MITIGATION_UCODE_NEEDED, +	TAA_MITIGATION_VERW, +	TAA_MITIGATION_TSX_DISABLED, +}; +  #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 332eb3525867..5057a8ed100b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -361,5 +361,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,  extern int do_set_thread_area(struct task_struct *p, int idx,  			      struct user_desc __user *info, int can_allocate); +#ifdef CONFIG_X86_64 +# define do_set_thread_area_64(p, s, t)	do_arch_prctl_64(p, s, t) +#else +# define do_set_thread_area_64(p, s, t)	(0) +#endif +  #endif /* !__ASSEMBLY__ */  #endif /* _ASM_X86_PTRACE_H */ diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h index 92c34e517da1..5528e9325049 100644 --- a/arch/x86/include/asm/purgatory.h +++ b/arch/x86/include/asm/purgatory.h @@ -6,16 +6,6 @@  #include <linux/purgatory.h>  extern void purgatory(void); -/* - * These forward declarations serve two purposes: - * - * 1) Make sparse happy when checking arch/purgatory - * 2) Document that these are required to be global so the symbol - *    lookup in kexec works - */ -extern unsigned long purgatory_backup_dest; -extern unsigned long purgatory_backup_src; -extern unsigned long purgatory_backup_sz;  #endif	/* __ASSEMBLY__ */  #endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h deleted file mode 100644 index 232f856e0db0..000000000000 --- a/arch/x86/include/asm/refcount.h +++ /dev/null @@ -1,126 +0,0 @@ -#ifndef __ASM_X86_REFCOUNT_H -#define __ASM_X86_REFCOUNT_H -/* - * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from - * PaX/grsecurity. - */ -#include <linux/refcount.h> -#include <asm/bug.h> - -/* - * This is the first portion of the refcount error handling, which lives in - * .text.unlikely, and is jumped to from the CPU flag check (in the - * following macros). This saves the refcount value location into CX for - * the exception handler to use (in mm/extable.c), and then triggers the - * central refcount exception. The fixup address for the exception points - * back to the regular execution flow in .text. - */ -#define _REFCOUNT_EXCEPTION				\ -	".pushsection .text..refcount\n"		\ -	"111:\tlea %[var], %%" _ASM_CX "\n"		\ -	"112:\t" ASM_UD2 "\n"				\ -	ASM_UNREACHABLE					\ -	".popsection\n"					\ -	"113:\n"					\ -	_ASM_EXTABLE_REFCOUNT(112b, 113b) - -/* Trigger refcount exception if refcount result is negative. */ -#define REFCOUNT_CHECK_LT_ZERO				\ -	"js 111f\n\t"					\ -	_REFCOUNT_EXCEPTION - -/* Trigger refcount exception if refcount result is zero or negative. */ -#define REFCOUNT_CHECK_LE_ZERO				\ -	"jz 111f\n\t"					\ -	REFCOUNT_CHECK_LT_ZERO - -/* Trigger refcount exception unconditionally. */ -#define REFCOUNT_ERROR					\ -	"jmp 111f\n\t"					\ -	_REFCOUNT_EXCEPTION - -static __always_inline void refcount_add(unsigned int i, refcount_t *r) -{ -	asm volatile(LOCK_PREFIX "addl %1,%0\n\t" -		REFCOUNT_CHECK_LT_ZERO -		: [var] "+m" (r->refs.counter) -		: "ir" (i) -		: "cc", "cx"); -} - -static __always_inline void refcount_inc(refcount_t *r) -{ -	asm volatile(LOCK_PREFIX "incl %0\n\t" -		REFCOUNT_CHECK_LT_ZERO -		: [var] "+m" (r->refs.counter) -		: : "cc", "cx"); -} - -static __always_inline void refcount_dec(refcount_t *r) -{ -	asm volatile(LOCK_PREFIX "decl %0\n\t" -		REFCOUNT_CHECK_LE_ZERO -		: [var] "+m" (r->refs.counter) -		: : "cc", "cx"); -} - -static __always_inline __must_check -bool refcount_sub_and_test(unsigned int i, refcount_t *r) -{ -	bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", -					 REFCOUNT_CHECK_LT_ZERO, -					 r->refs.counter, e, "er", i, "cx"); - -	if (ret) { -		smp_acquire__after_ctrl_dep(); -		return true; -	} - -	return false; -} - -static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) -{ -	bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", -					 REFCOUNT_CHECK_LT_ZERO, -					 r->refs.counter, e, "cx"); - -	if (ret) { -		smp_acquire__after_ctrl_dep(); -		return true; -	} - -	return false; -} - -static __always_inline __must_check -bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ -	int c, result; - -	c = atomic_read(&(r->refs)); -	do { -		if (unlikely(c == 0)) -			return false; - -		result = c + i; - -		/* Did we try to increment from/to an undesirable state? */ -		if (unlikely(c < 0 || c == INT_MAX || result < c)) { -			asm volatile(REFCOUNT_ERROR -				     : : [var] "m" (r->refs.counter) -				     : "cc", "cx"); -			break; -		} - -	} while (!atomic_try_cmpxchg(&(r->refs), &c, result)); - -	return c != 0; -} - -static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) -{ -	return refcount_add_not_zero(1, r); -} - -#endif diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h deleted file mode 100644 index 0a21986d2238..000000000000 --- a/arch/x86/include/asm/rio.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Derived from include/asm-x86/mach-summit/mach_mpparse.h - *          and include/asm-x86/mach-default/bios_ebda.h - * - * Author: Laurent Vivier <Laurent.Vivier@bull.net> - */ - -#ifndef _ASM_X86_RIO_H -#define _ASM_X86_RIO_H - -#define RIO_TABLE_VERSION	3 - -struct rio_table_hdr { -	u8 version;		/* Version number of this data structure  */ -	u8 num_scal_dev;	/* # of Scalability devices               */ -	u8 num_rio_dev;		/* # of RIO I/O devices                   */ -} __attribute__((packed)); - -struct scal_detail { -	u8 node_id;		/* Scalability Node ID                    */ -	u32 CBAR;		/* Address of 1MB register space          */ -	u8 port0node;		/* Node ID port connected to: 0xFF=None   */ -	u8 port0port;		/* Port num port connected to: 0,1,2, or  */ -				/* 0xFF=None                              */ -	u8 port1node;		/* Node ID port connected to: 0xFF = None */ -	u8 port1port;		/* Port num port connected to: 0,1,2, or  */ -				/* 0xFF=None                              */ -	u8 port2node;		/* Node ID port connected to: 0xFF = None */ -	u8 port2port;		/* Port num port connected to: 0,1,2, or  */ -				/* 0xFF=None                              */ -	u8 chassis_num;		/* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { -	u8 node_id;		/* RIO Node ID                            */ -	u32 BBAR;		/* Address of 1MB register space          */ -	u8 type;		/* Type of device                         */ -	u8 owner_id;		/* Node ID of Hurricane that owns this    */ -				/* node                                   */ -	u8 port0node;		/* Node ID port connected to: 0xFF=None   */ -	u8 port0port;		/* Port num port connected to: 0,1,2, or  */ -				/* 0xFF=None                              */ -	u8 port1node;		/* Node ID port connected to: 0xFF=None   */ -	u8 port1port;		/* Port num port connected to: 0,1,2, or  */ -				/* 0xFF=None                              */ -	u8 first_slot;		/* Lowest slot number below this Calgary  */ -	u8 status;		/* Bit 0 = 1 : the XAPIC is used          */ -				/*       = 0 : the XAPIC is not used, ie: */ -				/*            ints fwded to another XAPIC */ -				/*           Bits1:7 Reserved             */ -	u8 WP_index;		/* instance index - lower ones have       */ -				/*     lower slot numbers/PCI bus numbers */ -	u8 chassis_num;		/* 1 based Chassis number                 */ -} __attribute__((packed)); - -enum { -	HURR_SCALABILTY	= 0,	/* Hurricane Scalability info */ -	HURR_RIOIB	= 2,	/* Hurricane RIOIB info       */ -	COMPAT_CALGARY	= 4,	/* Compatibility Calgary      */ -	ALT_CALGARY	= 5,	/* Second Planar Calgary      */ -}; - -#endif /* _ASM_X86_RIO_H */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 71b32f2570ab..036c360910c5 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -6,7 +6,6 @@  #include <asm/extable.h>  extern char __brk_base[], __brk_limit[]; -extern struct exception_table_entry __stop___ex_table[];  extern char __end_rodata_aligned[];  #if defined(CONFIG_X86_64) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index ac3892920419..6669164abadc 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -31,6 +31,18 @@   */  #define SEGMENT_RPL_MASK	0x3 +/* + * When running on Xen PV, the actual privilege level of the kernel is 1, + * not 0. Testing the Requested Privilege Level in a segment selector to + * determine whether the context is user mode or kernel mode with + * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level + * matches the 0x3 mask. + * + * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV + * kernels because privilege level 2 is never used. + */ +#define USER_SEGMENT_RPL_MASK	0x2 +  /* User mode is privilege level 3: */  #define USER_RPL		0x3 diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 18a4b6890fa8..0e059b73437b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -103,7 +103,17 @@ static inline void update_task_stack(struct task_struct *task)  	if (static_cpu_has(X86_FEATURE_XENPV))  		load_sp0(task_top_of_stack(task));  #endif +} +static inline void kthread_frame_init(struct inactive_task_frame *frame, +				      unsigned long fun, unsigned long arg) +{ +	frame->bx = fun; +#ifdef CONFIG_X86_32 +	frame->di = arg; +#else +	frame->r12 = arg; +#endif  }  #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index e046a405743d..e2389ce9bf58 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,6 +6,8 @@  #ifndef _ASM_X86_SYSCALL_WRAPPER_H  #define _ASM_X86_SYSCALL_WRAPPER_H +struct pt_regs; +  /* Mapping of registers to parameters for syscalls on x86-64 and x32 */  #define SC_X86_64_REGS_TO_ARGS(x, ...)					\  	__MAP(x,__SC_ARGS						\ @@ -28,13 +30,21 @@   * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this   * case as well.   */ +#define __IA32_COMPAT_SYS_STUB0(x, name)				\ +	asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\ +	ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO);		\ +	asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\ +	{								\ +		return __se_compat_sys_##name();			\ +	} +  #define __IA32_COMPAT_SYS_STUBx(x, name, ...)				\  	asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\  	ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO);		\  	asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\  	{								\  		return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ -	}								\ +	}  #define __IA32_SYS_STUBx(x, name, ...)					\  	asmlinkage long __ia32_sys##name(const struct pt_regs *regs);	\ @@ -48,16 +58,23 @@   * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias   * named __ia32_sys_*()   */ -#define SYSCALL_DEFINE0(sname)					\ -	SYSCALL_METADATA(_##sname, 0);				\ -	asmlinkage long __x64_sys_##sname(void);		\ -	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\ -	SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname);	\ -	asmlinkage long __x64_sys_##sname(void) -#define COND_SYSCALL(name)						\ -	cond_syscall(__x64_sys_##name);					\ -	cond_syscall(__ia32_sys_##name) +#define SYSCALL_DEFINE0(sname)						\ +	SYSCALL_METADATA(_##sname, 0);					\ +	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ +	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);		\ +	SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname);		\ +	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) + +#define COND_SYSCALL(name)							\ +	asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused)	\ +	{									\ +		return sys_ni_syscall();					\ +	}									\ +	asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\ +	{									\ +		return sys_ni_syscall();					\ +	}  #define SYS_NI(name)							\  	SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers);		\ @@ -75,15 +92,24 @@   * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common   * with x86_64 obviously do not need such care.   */ +#define __X32_COMPAT_SYS_STUB0(x, name, ...)				\ +	asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\ +	ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO);		\ +	asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\ +	{								\ +		return __se_compat_sys_##name();\ +	} +  #define __X32_COMPAT_SYS_STUBx(x, name, ...)				\  	asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\  	ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO);		\  	asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\  	{								\  		return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ -	}								\ +	}  #else /* CONFIG_X86_X32 */ +#define __X32_COMPAT_SYS_STUB0(x, name)  #define __X32_COMPAT_SYS_STUBx(x, name, ...)  #endif /* CONFIG_X86_X32 */ @@ -94,6 +120,17 @@   * mapping of registers to parameters, we need to generate stubs for each   * of them.   */ +#define COMPAT_SYSCALL_DEFINE0(name)					\ +	static long __se_compat_sys_##name(void);			\ +	static inline long __do_compat_sys_##name(void);		\ +	__IA32_COMPAT_SYS_STUB0(x, name)				\ +	__X32_COMPAT_SYS_STUB0(x, name)					\ +	static long __se_compat_sys_##name(void)			\ +	{								\ +		return __do_compat_sys_##name();			\ +	}								\ +	static inline long __do_compat_sys_##name(void) +  #define COMPAT_SYSCALL_DEFINEx(x, name, ...)					\  	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\  	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ @@ -181,15 +218,19 @@   * macros to work correctly.   */  #ifndef SYSCALL_DEFINE0 -#define SYSCALL_DEFINE0(sname)					\ -	SYSCALL_METADATA(_##sname, 0);				\ -	asmlinkage long __x64_sys_##sname(void);		\ -	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\ -	asmlinkage long __x64_sys_##sname(void) +#define SYSCALL_DEFINE0(sname)						\ +	SYSCALL_METADATA(_##sname, 0);					\ +	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ +	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);		\ +	asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)  #endif  #ifndef COND_SYSCALL -#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name) +#define COND_SYSCALL(name) 							\ +	asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused)	\ +	{									\ +		return sys_ni_syscall();					\ +	}  #endif  #ifndef SYS_NI @@ -201,7 +242,6 @@   * For VSYSCALLS, we need to declare these three syscalls with the new   * pt_regs-based calling convention for in-kernel use.   */ -struct pt_regs;  asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs);  asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs);  asmlinkage long __x64_sys_time(const struct pt_regs *regs); diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h deleted file mode 100644 index 6ed2deacf1d0..000000000000 --- a/arch/x86/include/asm/tce.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file is derived from asm-powerpc/tce.h. - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Muli Ben-Yehuda <muli@il.ibm.com> - * Author: Jon Mason <jdmason@us.ibm.com> - */ - -#ifndef _ASM_X86_TCE_H -#define _ASM_X86_TCE_H - -extern unsigned int specified_table_size; -struct iommu_table; - -#define TCE_ENTRY_SIZE   8   /* in bytes */ - -#define TCE_READ_SHIFT   0 -#define TCE_WRITE_SHIFT  1 -#define TCE_HUBID_SHIFT  2   /* unused */ -#define TCE_RSVD_SHIFT   8   /* unused */ -#define TCE_RPN_SHIFT    12 -#define TCE_UNUSED_SHIFT 48  /* unused */ - -#define TCE_RPN_MASK     0x0000fffffffff000ULL - -extern void tce_build(struct iommu_table *tbl, unsigned long index, -		      unsigned int npages, unsigned long uaddr, int direction); -extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages); -extern void * __init alloc_tce_table(void); -extern void __init free_tce_table(void *tbl); -extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar); - -#endif /* _ASM_X86_TCE_H */ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 5e8319bb207a..23c626a742e8 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -26,10 +26,11 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,  #define POKE_MAX_OPCODE_SIZE	5  struct text_poke_loc { -	void *detour;  	void *addr; -	size_t len; -	const char opcode[POKE_MAX_OPCODE_SIZE]; +	int len; +	s32 rel32; +	u8 opcode; +	const u8 text[POKE_MAX_OPCODE_SIZE];  };  extern void text_poke_early(void *addr, const void *opcode, size_t len); @@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);  extern void *text_poke(void *addr, const void *opcode, size_t len);  extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);  extern int poke_int3_handler(struct pt_regs *regs); -extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);  extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); +extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr, +			       const void *opcode, size_t len, const void *emulate);  extern int after_bootmem;  extern __ro_after_init struct mm_struct *poking_mm;  extern __ro_after_init unsigned long poking_addr; @@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)  	regs->ip = ip;  } -#define INT3_INSN_SIZE 1 -#define CALL_INSN_SIZE 5 +#define INT3_INSN_SIZE		1 +#define INT3_INSN_OPCODE	0xCC + +#define CALL_INSN_SIZE		5 +#define CALL_INSN_OPCODE	0xE8 + +#define JMP32_INSN_SIZE		5 +#define JMP32_INSN_OPCODE	0xE9 + +#define JMP8_INSN_SIZE		2 +#define JMP8_INSN_OPCODE	0xEB  static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)  { diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f9453536f9bb..d779366ce3f8 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -143,8 +143,8 @@ struct thread_info {  	 _TIF_NOHZ)  /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW_BASE						\ -	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|		\ +#define _TIF_WORK_CTXSW_BASE					\ +	(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |		\  	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)  /* @@ -156,8 +156,14 @@ struct thread_info {  # define _TIF_WORK_CTXSW	(_TIF_WORK_CTXSW_BASE)  #endif -#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +#ifdef CONFIG_X86_IOPL_IOPERM +# define _TIF_WORK_CTXSW_PREV	(_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ +				 _TIF_IO_BITMAP) +#else +# define _TIF_WORK_CTXSW_PREV	(_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY) +#endif + +#define _TIF_WORK_CTXSW_NEXT	(_TIF_WORK_CTXSW)  #define STACK_WARN		(THREAD_SIZE/8) diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h index ace464f09681..4d705cb4d63b 100644 --- a/arch/x86/include/asm/trace/hyperv.h +++ b/arch/x86/include/asm/trace/hyperv.h @@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,  		      __entry->ncpus, __entry->vector)  	); +TRACE_EVENT(hyperv_send_ipi_one, +	    TP_PROTO(int cpu, +		     int vector), +	    TP_ARGS(cpu, vector), +	    TP_STRUCT__entry( +		    __field(int, cpu) +		    __field(int, vector) +		    ), +	    TP_fast_assign(__entry->cpu = cpu; +			   __entry->vector = vector; +		    ), +	    TP_printk("cpu %d vector %x", +		      __entry->cpu, __entry->vector) +	); +  #endif /* CONFIG_HYPERV */  #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index b25e633033c3..ffa0dc8a535e 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -69,6 +69,9 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);  dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);  dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);  dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); +#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT) +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); +#endif  dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);  dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);  dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h index db43f2a0d92c..aeed98c3c9e1 100644 --- a/arch/x86/include/asm/umip.h +++ b/arch/x86/include/asm/umip.h @@ -4,9 +4,9 @@  #include <linux/types.h>  #include <asm/ptrace.h> -#ifdef CONFIG_X86_INTEL_UMIP +#ifdef CONFIG_X86_UMIP  bool fixup_umip_exception(struct pt_regs *regs);  #else  static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; } -#endif  /* CONFIG_X86_INTEL_UMIP */ +#endif  /* CONFIG_X86_UMIP */  #endif  /* _ASM_X86_UMIP_H */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 0bcdb1279361..f5e2eb12cb71 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -86,6 +86,14 @@  	UNWIND_HINT sp_offset=\sp_offset  .endm +.macro UNWIND_HINT_SAVE +	UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE +	UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +.endm +  #else /* !__ASSEMBLY__ */  #define UNWIND_HINT(sp_reg, sp_offset, type, end)		\ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 6e7caf65fa40..389174eaec79 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -138,7 +138,7 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);  extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);  extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus); -extern void uv_bios_init(void); +extern int uv_bios_init(void);  extern unsigned long sn_rtc_cycles_per_second;  extern int uv_type; diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 6bc6d89d8e2a..45ea95ce79b4 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -12,6 +12,16 @@ struct mm_struct;  #ifdef CONFIG_X86_UV  #include <linux/efi.h> +#define	UV_PROC_NODE	"sgi_uv" + +static inline int uv(int uvtype) +{ +	/* uv(0) is "any" */ +	if (uvtype >= 0 && uvtype <= 30) +		return 1 << uvtype; +	return 1; +} +  extern unsigned long uv_systab_phys;  extern enum uv_system_type get_uv_system_type(void); @@ -20,7 +30,8 @@ static inline bool is_early_uv_system(void)  	return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;  }  extern int is_uv_system(void); -extern int is_uv_hubless(void); +extern int is_uv_hubbed(int uvtype); +extern int is_uv_hubless(int uvtype);  extern void uv_cpu_init(void);  extern void uv_nmi_init(void);  extern void uv_system_init(void); @@ -32,7 +43,8 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,  static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }  static inline bool is_early_uv_system(void)	{ return 0; }  static inline int is_uv_system(void)	{ return 0; } -static inline int is_uv_hubless(void)	{ return 0; } +static inline int is_uv_hubbed(int uv)	{ return 0; } +static inline int is_uv_hubless(int uv) { return 0; }  static inline void uv_cpu_init(void)	{ }  static inline void uv_system_init(void)	{ }  static inline const struct cpumask * diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 44cf6d6deb7a..950cd1395d5d 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -19,6 +19,7 @@  #include <linux/topology.h>  #include <asm/types.h>  #include <asm/percpu.h> +#include <asm/uv/uv.h>  #include <asm/uv/uv_mmrs.h>  #include <asm/uv/bios.h>  #include <asm/irq_vectors.h> @@ -243,83 +244,61 @@ static inline int uv_hub_info_check(int version)  #define UV4_HUB_REVISION_BASE		7  #define UV4A_HUB_REVISION_BASE		8	/* UV4 (fixed) rev 2 */ -#ifdef	UV1_HUB_IS_SUPPORTED +/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */  static inline int is_uv1_hub(void)  { -	return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE; -} +#ifdef	UV1_HUB_IS_SUPPORTED +	return is_uv_hubbed(uv(1));  #else -static inline int is_uv1_hub(void) -{  	return 0; -}  #endif +} -#ifdef	UV2_HUB_IS_SUPPORTED  static inline int is_uv2_hub(void)  { -	return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) && -		(uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE)); -} +#ifdef	UV2_HUB_IS_SUPPORTED +	return is_uv_hubbed(uv(2));  #else -static inline int is_uv2_hub(void) -{  	return 0; -}  #endif +} -#ifdef	UV3_HUB_IS_SUPPORTED  static inline int is_uv3_hub(void)  { -	return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) && -		(uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE)); -} +#ifdef	UV3_HUB_IS_SUPPORTED +	return is_uv_hubbed(uv(3));  #else -static inline int is_uv3_hub(void) -{  	return 0; -}  #endif +}  /* First test "is UV4A", then "is UV4" */ -#ifdef	UV4A_HUB_IS_SUPPORTED -static inline int is_uv4a_hub(void) -{ -	return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE); -} -#else  static inline int is_uv4a_hub(void)  { +#ifdef	UV4A_HUB_IS_SUPPORTED +	if (is_uv_hubbed(uv(4))) +		return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE); +#endif  	return 0;  } -#endif -#ifdef	UV4_HUB_IS_SUPPORTED  static inline int is_uv4_hub(void)  { -	return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE; -} +#ifdef	UV4_HUB_IS_SUPPORTED +	return is_uv_hubbed(uv(4));  #else -static inline int is_uv4_hub(void) -{  	return 0; -}  #endif +}  static inline int is_uvx_hub(void)  { -	if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) -		return uv_hub_info->hub_revision; - -	return 0; +	return (is_uv_hubbed(-2) >= uv(2));  }  static inline int is_uv_hub(void)  { -#ifdef	UV1_HUB_IS_SUPPORTED -	return uv_hub_info->hub_revision; -#endif -	return is_uvx_hub(); +	return is_uv1_hub() || is_uvx_hub();  }  union uvh_apicid { diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 42e1245af0d8..ff4b52e37e60 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num);  void xen_arch_unregister_cpu(int num);  #endif -extern void xen_set_iopl_mask(unsigned mask); -  #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 62ca03ef5c65..9139b3e86316 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -379,12 +379,9 @@ struct xen_pmu_arch {   * Prefix forces emulation of some non-trapping instructions.   * Currently only CPUID.   */ -#ifdef __ASSEMBLY__ -#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; -#define XEN_CPUID          XEN_EMULATE_PREFIX cpuid -#else -#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " -#define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid" -#endif +#include <asm/emulate_prefix.h> + +#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;) +#define XEN_CPUID          XEN_EMULATE_PREFIX __ASM_FORM(cpuid)  #endif /* _ASM_X86_XEN_INTERFACE_H */ | 

