diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
| -rw-r--r-- | arch/x86/kernel/cpu/amd.c | 35 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 185 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/common.c | 106 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/microcode/amd.c | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/microcode/intel.c | 26 | 
5 files changed, 281 insertions, 75 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d58184b7cd44..ea831c858195 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)  	case 0x17: init_amd_zn(c); break;  	} -	/* Enable workaround for FXSAVE leak */ -	if (c->x86 >= 6) +	/* +	 * Enable workaround for FXSAVE leak on CPUs +	 * without a XSaveErPtr feature +	 */ +	if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))  		set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);  	cpu_detect_cache_sizes(c); @@ -826,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)  		set_cpu_cap(c, X86_FEATURE_K8);  	if (cpu_has(c, X86_FEATURE_XMM2)) { -		/* MFENCE stops RDTSC speculation */ -		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); +		unsigned long long val; +		int ret; + +		/* +		 * A serializing LFENCE has less overhead than MFENCE, so +		 * use it for execution serialization.  On families which +		 * don't have that MSR, LFENCE is already serializing. +		 * msr_set_bit() uses the safe accessors, too, even if the MSR +		 * is not present. +		 */ +		msr_set_bit(MSR_F10H_DECFG, +			    MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + +		/* +		 * Verify that the MSR write was successful (could be running +		 * under a hypervisor) and only then assume that LFENCE is +		 * serializing. +		 */ +		ret = rdmsrl_safe(MSR_F10H_DECFG, &val); +		if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) { +			/* A serializing LFENCE stops RDTSC speculation */ +			set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +		} else { +			/* MFENCE stops RDTSC speculation */ +			set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); +		}  	}  	/* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ba0b2424c9b0..e4dc26185aa7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -10,6 +10,10 @@   */  #include <linux/init.h>  #include <linux/utsname.h> +#include <linux/cpu.h> + +#include <asm/nospec-branch.h> +#include <asm/cmdline.h>  #include <asm/bugs.h>  #include <asm/processor.h>  #include <asm/processor-flags.h> @@ -20,6 +24,8 @@  #include <asm/pgtable.h>  #include <asm/set_memory.h> +static void __init spectre_v2_select_mitigation(void); +  void __init check_bugs(void)  {  	identify_boot_cpu(); @@ -29,6 +35,9 @@ void __init check_bugs(void)  		print_cpu_info(&boot_cpu_data);  	} +	/* Select the proper spectre mitigation before patching alternatives */ +	spectre_v2_select_mitigation(); +  #ifdef CONFIG_X86_32  	/*  	 * Check whether we are able to run this kernel safely on SMP. @@ -60,3 +69,179 @@ void __init check_bugs(void)  		set_memory_4k((unsigned long)__va(0), 1);  #endif  } + +/* The kernel command line selection */ +enum spectre_v2_mitigation_cmd { +	SPECTRE_V2_CMD_NONE, +	SPECTRE_V2_CMD_AUTO, +	SPECTRE_V2_CMD_FORCE, +	SPECTRE_V2_CMD_RETPOLINE, +	SPECTRE_V2_CMD_RETPOLINE_GENERIC, +	SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { +	[SPECTRE_V2_NONE]			= "Vulnerable", +	[SPECTRE_V2_RETPOLINE_MINIMAL]		= "Vulnerable: Minimal generic ASM retpoline", +	[SPECTRE_V2_RETPOLINE_MINIMAL_AMD]	= "Vulnerable: Minimal AMD ASM retpoline", +	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline", +	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline", +}; + +#undef pr_fmt +#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + +static void __init spec2_print_if_insecure(const char *reason) +{ +	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) +		pr_info("%s\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ +	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) +		pr_info("%s\n", reason); +} + +static inline bool retp_compiler(void) +{ +	return __is_defined(RETPOLINE); +} + +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ +	int len = strlen(opt); + +	return len == arglen && !strncmp(arg, opt, len); +} + +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) +{ +	char arg[20]; +	int ret; + +	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, +				  sizeof(arg)); +	if (ret > 0)  { +		if (match_option(arg, ret, "off")) { +			goto disable; +		} else if (match_option(arg, ret, "on")) { +			spec2_print_if_secure("force enabled on command line."); +			return SPECTRE_V2_CMD_FORCE; +		} else if (match_option(arg, ret, "retpoline")) { +			spec2_print_if_insecure("retpoline selected on command line."); +			return SPECTRE_V2_CMD_RETPOLINE; +		} else if (match_option(arg, ret, "retpoline,amd")) { +			if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { +				pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); +				return SPECTRE_V2_CMD_AUTO; +			} +			spec2_print_if_insecure("AMD retpoline selected on command line."); +			return SPECTRE_V2_CMD_RETPOLINE_AMD; +		} else if (match_option(arg, ret, "retpoline,generic")) { +			spec2_print_if_insecure("generic retpoline selected on command line."); +			return SPECTRE_V2_CMD_RETPOLINE_GENERIC; +		} else if (match_option(arg, ret, "auto")) { +			return SPECTRE_V2_CMD_AUTO; +		} +	} + +	if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) +		return SPECTRE_V2_CMD_AUTO; +disable: +	spec2_print_if_insecure("disabled on command line."); +	return SPECTRE_V2_CMD_NONE; +} + +static void __init spectre_v2_select_mitigation(void) +{ +	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); +	enum spectre_v2_mitigation mode = SPECTRE_V2_NONE; + +	/* +	 * If the CPU is not affected and the command line mode is NONE or AUTO +	 * then nothing to do. +	 */ +	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && +	    (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO)) +		return; + +	switch (cmd) { +	case SPECTRE_V2_CMD_NONE: +		return; + +	case SPECTRE_V2_CMD_FORCE: +		/* FALLTRHU */ +	case SPECTRE_V2_CMD_AUTO: +		goto retpoline_auto; + +	case SPECTRE_V2_CMD_RETPOLINE_AMD: +		if (IS_ENABLED(CONFIG_RETPOLINE)) +			goto retpoline_amd; +		break; +	case SPECTRE_V2_CMD_RETPOLINE_GENERIC: +		if (IS_ENABLED(CONFIG_RETPOLINE)) +			goto retpoline_generic; +		break; +	case SPECTRE_V2_CMD_RETPOLINE: +		if (IS_ENABLED(CONFIG_RETPOLINE)) +			goto retpoline_auto; +		break; +	} +	pr_err("kernel not compiled with retpoline; no mitigation available!"); +	return; + +retpoline_auto: +	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { +	retpoline_amd: +		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { +			pr_err("LFENCE not serializing. Switching to generic retpoline\n"); +			goto retpoline_generic; +		} +		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : +					 SPECTRE_V2_RETPOLINE_MINIMAL_AMD; +		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); +		setup_force_cpu_cap(X86_FEATURE_RETPOLINE); +	} else { +	retpoline_generic: +		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : +					 SPECTRE_V2_RETPOLINE_MINIMAL; +		setup_force_cpu_cap(X86_FEATURE_RETPOLINE); +	} + +	spectre_v2_enabled = mode; +	pr_info("%s\n", spectre_v2_strings[mode]); +} + +#undef pr_fmt + +#ifdef CONFIG_SYSFS +ssize_t cpu_show_meltdown(struct device *dev, +			  struct device_attribute *attr, char *buf) +{ +	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) +		return sprintf(buf, "Not affected\n"); +	if (boot_cpu_has(X86_FEATURE_PTI)) +		return sprintf(buf, "Mitigation: PTI\n"); +	return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, +			    struct device_attribute *attr, char *buf) +{ +	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) +		return sprintf(buf, "Not affected\n"); +	return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, +			    struct device_attribute *attr, char *buf) +{ +	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) +		return sprintf(buf, "Not affected\n"); + +	return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); +} +#endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fa998ca8aa5a..ef29ad001991 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)  	return NULL;		/* Not found */  } -__u32 cpu_caps_cleared[NCAPINTS]; -__u32 cpu_caps_set[NCAPINTS]; +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +__u32 cpu_caps_set[NCAPINTS + NBUGINTS];  void load_percpu_segment(int cpu)  { @@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)  	load_stack_canary_segment();  } -/* Setup the fixmap mapping only once per-processor */ -static inline void setup_fixmap_gdt(int cpu) -{ -#ifdef CONFIG_X86_64 -	/* On 64-bit systems, we use a read-only fixmap GDT. */ -	pgprot_t prot = PAGE_KERNEL_RO; -#else -	/* -	 * On native 32-bit systems, the GDT cannot be read-only because -	 * our double fault handler uses a task gate, and entering through -	 * a task gate needs to change an available TSS to busy.  If the GDT -	 * is read-only, that will triple fault. -	 * -	 * On Xen PV, the GDT must be read-only because the hypervisor requires -	 * it. -	 */ -	pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ? -		PAGE_KERNEL_RO : PAGE_KERNEL; +#ifdef CONFIG_X86_32 +/* The 32-bit entry code needs to find cpu_entry_area. */ +DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);  #endif -	__set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); -} +#ifdef CONFIG_X86_64 +/* + * Special IST stacks which the CPU switches to when it calls + * an IST-marked descriptor entry. Up to 7 stacks (hardware + * limit), all of them are 4K, except the debug stack which + * is 8K. + */ +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { +	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ, +	  [DEBUG_STACK - 1]			= DEBUG_STKSZ +}; +#endif  /* Load the original GDT from the per-cpu structure */  void load_direct_gdt(int cpu) @@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)  {  	int i; -	for (i = 0; i < NCAPINTS; i++) { +	for (i = 0; i < NCAPINTS + NBUGINTS; i++) {  		c->x86_capability[i] &= ~cpu_caps_cleared[i];  		c->x86_capability[i] |= cpu_caps_set[i];  	} @@ -927,6 +922,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)  	}  	setup_force_cpu_cap(X86_FEATURE_ALWAYS); + +	if (c->x86_vendor != X86_VENDOR_AMD) +		setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + +	setup_force_cpu_bug(X86_BUG_SPECTRE_V1); +	setup_force_cpu_bug(X86_BUG_SPECTRE_V2); +  	fpu__init_system(c);  #ifdef CONFIG_X86_32 @@ -1250,7 +1252,7 @@ void enable_sep_cpu(void)  		return;  	cpu = get_cpu(); -	tss = &per_cpu(cpu_tss, cpu); +	tss = &per_cpu(cpu_tss_rw, cpu);  	/*  	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- @@ -1259,11 +1261,7 @@ void enable_sep_cpu(void)  	tss->x86_tss.ss1 = __KERNEL_CS;  	wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); - -	wrmsr(MSR_IA32_SYSENTER_ESP, -	      (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), -	      0); - +	wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);  	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);  	put_cpu(); @@ -1357,25 +1355,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;  DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;  EXPORT_PER_CPU_SYMBOL(__preempt_count); -/* - * Special IST stacks which the CPU switches to when it calls - * an IST-marked descriptor entry. Up to 7 stacks (hardware - * limit), all of them are 4K, except the debug stack which - * is 8K. - */ -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { -	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ, -	  [DEBUG_STACK - 1]			= DEBUG_STKSZ -}; - -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks -	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); -  /* May not be marked __init: used by software suspend */  void syscall_init(void)  { +	extern char _entry_trampoline[]; +	extern char entry_SYSCALL_64_trampoline[]; + +	int cpu = smp_processor_id(); +	unsigned long SYSCALL64_entry_trampoline = +		(unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + +		(entry_SYSCALL_64_trampoline - _entry_trampoline); +  	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); -	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); +	if (static_cpu_has(X86_FEATURE_PTI)) +		wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); +	else +		wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);  #ifdef CONFIG_IA32_EMULATION  	wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); @@ -1386,7 +1381,7 @@ void syscall_init(void)  	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).  	 */  	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); -	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); +	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));  	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);  #else  	wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); @@ -1530,7 +1525,7 @@ void cpu_init(void)  	if (cpu)  		load_ucode_ap(); -	t = &per_cpu(cpu_tss, cpu); +	t = &per_cpu(cpu_tss_rw, cpu);  	oist = &per_cpu(orig_ist, cpu);  #ifdef CONFIG_NUMA @@ -1569,7 +1564,7 @@ void cpu_init(void)  	 * set up and load the per-CPU TSS  	 */  	if (!oist->ist[0]) { -		char *estacks = per_cpu(exception_stacks, cpu); +		char *estacks = get_cpu_entry_area(cpu)->exception_stacks;  		for (v = 0; v < N_EXCEPTION_STACKS; v++) {  			estacks += exception_stack_sizes[v]; @@ -1580,7 +1575,7 @@ void cpu_init(void)  		}  	} -	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); +	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;  	/*  	 * <= is required because the CPU will access up to @@ -1596,11 +1591,12 @@ void cpu_init(void)  	enter_lazy_tlb(&init_mm, me);  	/* -	 * Initialize the TSS.  Don't bother initializing sp0, as the initial -	 * task never enters user mode. +	 * Initialize the TSS.  sp0 points to the entry trampoline stack +	 * regardless of what task is running.  	 */ -	set_tss_desc(cpu, t); +	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);  	load_TR_desc(); +	load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));  	load_mm_ldt(&init_mm); @@ -1612,7 +1608,6 @@ void cpu_init(void)  	if (is_uv_system())  		uv_cpu_init(); -	setup_fixmap_gdt(cpu);  	load_fixmap_gdt(cpu);  } @@ -1622,7 +1617,7 @@ void cpu_init(void)  {  	int cpu = smp_processor_id();  	struct task_struct *curr = current; -	struct tss_struct *t = &per_cpu(cpu_tss, cpu); +	struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);  	wait_for_master_cpu(cpu); @@ -1657,12 +1652,12 @@ void cpu_init(void)  	 * Initialize the TSS.  Don't bother initializing sp0, as the initial  	 * task never enters user mode.  	 */ -	set_tss_desc(cpu, t); +	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);  	load_TR_desc();  	load_mm_ldt(&init_mm); -	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); +	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;  #ifdef CONFIG_DOUBLEFAULT  	/* Set up doublefault TSS pointer in the GDT */ @@ -1674,7 +1669,6 @@ void cpu_init(void)  	fpu__init_cpu(); -	setup_fixmap_gdt(cpu);  	load_fixmap_gdt(cpu);  }  #endif diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index c6daec4bdba5..330b8462d426 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,  #define F14H_MPB_MAX_SIZE 1824  #define F15H_MPB_MAX_SIZE 4096  #define F16H_MPB_MAX_SIZE 3458 +#define F17H_MPB_MAX_SIZE 3200  	switch (family) {  	case 0x14: @@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,  	case 0x16:  		max_size = F16H_MPB_MAX_SIZE;  		break; +	case 0x17: +		max_size = F17H_MPB_MAX_SIZE; +		break;  	default:  		max_size = F1XH_MPB_MAX_SIZE;  		break; diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 7dbcb7adf797..d9e460fc7a3b 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)  }  #else -/* - * Flush global tlb. We only do this in x86_64 where paging has been enabled - * already and PGE should be enabled as well. - */ -static inline void flush_tlb_early(void) -{ -	__native_flush_tlb_global_irq_disabled(); -} -  static inline void print_ucode(struct ucode_cpu_info *uci)  {  	struct microcode_intel *mc; @@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)  	if (rev != mc->hdr.rev)  		return -1; -#ifdef CONFIG_X86_64 -	/* Flush global tlb. This is precaution. */ -	flush_tlb_early(); -#endif  	uci->cpu_sig.rev = rev;  	if (early) @@ -923,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)  {  	struct cpuinfo_x86 *c = &cpu_data(cpu); -	if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { -		pr_err_once("late loading on model 79 is disabled.\n"); +	/* +	 * Late loading on model 79 with microcode revision less than 0x0b000021 +	 * may result in a system hang. This behavior is documented in item +	 * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). +	 */ +	if (c->x86 == 6 && +	    c->x86_model == INTEL_FAM6_BROADWELL_X && +	    c->x86_mask == 0x01 && +	    c->microcode < 0x0b000021) { +		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); +		pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");  		return true;  	}  | 

