summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c4
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/asm-offsets_32.c7
-rw-r--r--arch/x86/kernel/asm-offsets_64.c10
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/common.c39
-rw-r--r--arch/x86/kernel/cpu/cyrix.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c13
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh6
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/transmeta.c2
-rw-r--r--arch/x86/kernel/e820.c1
-rw-r--r--arch/x86/kernel/fpu/xstate.c3
-rw-r--r--arch/x86/kernel/ftrace.c11
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/head_64.S1
-rw-r--r--arch/x86/kernel/hpet.c1
-rw-r--r--arch/x86/kernel/mcount_64.S14
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/process.c3
-rw-r--r--arch/x86/kernel/signal.c127
-rw-r--r--arch/x86/kernel/traps.c138
-rw-r--r--arch/x86/kernel/verify_cpu.S2
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S11
29 files changed, 286 insertions, 127 deletions
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c6ec49..ab5c2c685a3c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
unsigned long value;
unsigned int id = (x >> 24) & 0xff;
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, value);
id |= (value << 2) & 0xff00;
}
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 84a7524b202c..5c042466f274 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -59,7 +59,6 @@ void common(void) {
#ifdef CONFIG_PARAVIRT
BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6ce39025f467..ecdc1d217dc0 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -7,7 +7,7 @@
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
@@ -52,6 +52,11 @@ void foo(void)
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
offsetofend(struct tss_struct, SYSENTER_stack));
+ /* Offset from cpu_tss to SYSENTER_stack */
+ OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+ /* Size of SYSENTER_stack */
+ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f2edafb5f24e..d875f97d4e0b 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -4,17 +4,11 @@
#include <asm/ia32.h>
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
static char syscalls_64[] = {
#include <asm/syscalls_64.h>
};
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 7a60424d63fa..0d373d7affc8 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -42,7 +42,7 @@ ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
-cpufeature = $(src)/../../include/asm/cpufeature.h
+cpufeature = $(src)/../../include/asm/cpufeatures.h
targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index ce197bb7c129..1661d8ec9280 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,7 +1,7 @@
#include <linux/bitops.h>
#include <linux/kernel.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 81cf716f6f97..4e8d25d395ee 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -801,6 +801,31 @@ static void detect_nopl(struct cpuinfo_x86 *c)
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+#else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+#endif
+#endif
}
static void generic_identify(struct cpuinfo_x86 *c)
@@ -1475,20 +1500,6 @@ void cpu_init(void)
}
#endif
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-void warn_pre_alternatives(void)
-{
- WARN(1, "You're using static_cpu_has before alternatives have run!\n");
-}
-EXPORT_SYMBOL_GPL(warn_pre_alternatives);
-#endif
-
-inline bool __static_cpu_has_safe(u16 bit)
-{
- return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
-
static void bsp_resume(void)
{
if (this_cpu->c_bsp_resume)
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 187bb583d0df..6adef9cac23e 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -8,6 +8,7 @@
#include <linux/timer.h>
#include <asm/pci-direct.h>
#include <asm/tsc.h>
+#include <asm/cpufeature.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 38766c2b5b00..1f7fdb91a818 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -8,7 +8,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/bugs.h>
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6ed779efff26..de6626c18e42 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -14,7 +14,7 @@
#include <linux/sysfs.h>
#include <linux/pci.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/amd_nb.h>
#include <asm/smp.h>
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index afa9f0d487ea..fbb5e90557a5 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,5 +1,5 @@
#include <asm/cpu_device_id.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/slab.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 524f2a8492d7..f0c921b03e42 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1578,6 +1578,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
+ /*
+ * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
+ * whether this processor will actually generate recoverable
+ * machine checks. Check to see if this is an E7 model Xeon.
+ * We can't do a model number check because E5 and E7 use the
+ * same model number. E5 doesn't support recovery, E7 does.
+ */
+ if (mca_cfg.recovery || (mca_cfg.ser &&
+ !strncmp(c->x86_model_id,
+ "Intel(R) Xeon(R) CPU E7-", 24)))
+ set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
}
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
@@ -2030,6 +2041,8 @@ static int __init mcheck_enable(char *str)
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true;
+ else if (!strcmp(str, "recovery"))
+ cfg->recovery = true;
else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 3f20710a5b23..6988c74409a8 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
+# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
#
IN=$1
@@ -49,8 +49,8 @@ dump_array()
trap 'rm "$OUT"' EXIT
(
- echo "#ifndef _ASM_X86_CPUFEATURE_H"
- echo "#include <asm/cpufeature.h>"
+ echo "#ifndef _ASM_X86_CPUFEATURES_H"
+ echo "#include <asm/cpufeatures.h>"
echo "#endif"
echo ""
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index ba80d68f683e..10f8d4796240 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -47,7 +47,7 @@
#include <linux/smp.h>
#include <linux/syscore_ops.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index e3b4d1841175..34178564be2a 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,6 +1,6 @@
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/msr.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 837365f10912..621b501f8935 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -24,6 +24,7 @@
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/setup.h>
+#include <asm/cpufeature.h>
/*
* The e820 map is the map that gets modified e.g. with command line parameters
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d425cda5ae6d..6e8354f5a593 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -51,6 +51,9 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 05c9e3f5b6d7..702547ce33c9 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { }
#endif
/* Defined as markers to the end of the ftrace default trampolines */
-extern void ftrace_caller_end(void);
extern void ftrace_regs_caller_end(void);
-extern void ftrace_return(void);
+extern void ftrace_epilogue(void);
extern void ftrace_caller_op_ptr(void);
extern void ftrace_regs_caller_op_ptr(void);
@@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
} else {
start_offset = (unsigned long)ftrace_caller;
- end_offset = (unsigned long)ftrace_caller_end;
+ end_offset = (unsigned long)ftrace_epilogue;
op_offset = (unsigned long)ftrace_caller_op_ptr;
}
@@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/*
* Allocate enough size to store the ftrace_caller code,
- * the jmp to ftrace_return, as well as the address of
+ * the jmp to ftrace_epilogue, as well as the address of
* the ftrace_ops this trampoline is used for.
*/
trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
@@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = (unsigned long)trampoline + size;
- /* The trampoline ends with a jmp to ftrace_return */
- jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
+ /* The trampoline ends with a jmp to ftrace_epilogue */
+ jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue);
memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
/*
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6bc9ae24b6d2..af1112980dd4 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,7 +19,7 @@
#include <asm/setup.h>
#include <asm/processor-flags.h>
#include <asm/msr-index.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/percpu.h>
#include <asm/nops.h>
#include <asm/bootparam.h>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e860390..2e974680f5ad 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,7 +38,6 @@
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
-L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b8e6ff5cd5d0..be0ebbb6d1d1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -12,6 +12,7 @@
#include <linux/pm.h>
#include <linux/io.h>
+#include <asm/cpufeature.h>
#include <asm/irqdomain.h>
#include <asm/fixmap.h>
#include <asm/hpet.h>
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 87e1762e2bca..ed48a9f465f8 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -168,12 +168,14 @@ GLOBAL(ftrace_call)
restore_mcount_regs
/*
- * The copied trampoline must call ftrace_return as it
+ * The copied trampoline must call ftrace_epilogue as it
* still may need to call the function graph tracer.
+ *
+ * The code up to this label is copied into trampolines so
+ * think twice before adding any new code or changing the
+ * layout here.
*/
-GLOBAL(ftrace_caller_end)
-
-GLOBAL(ftrace_return)
+GLOBAL(ftrace_epilogue)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)
@@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call)
popfq
/*
- * As this jmp to ftrace_return can be a short jump
+ * As this jmp to ftrace_epilogue can be a short jump
* it must not be copied into the trampoline.
* The trampoline will add the code to jump
* to the return.
*/
GLOBAL(ftrace_regs_caller_end)
- jmp ftrace_return
+ jmp ftrace_epilogue
END(ftrace_regs_caller)
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 64f9616f93f1..7f3550acde1b 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,7 @@
#include <linux/uaccess.h>
#include <linux/gfp.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/msr.h>
static struct class *msr_class;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9decee2bfdbe..2915d54e9dd5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
+#ifdef CONFIG_X86_32
+ .SYSENTER_stack_canary = STACK_END_MAGIC,
+#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cb6282c3638f..548ddf7d6fd2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -61,7 +61,38 @@
regs->seg = GET_SEG(seg) | 3; \
} while (0)
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+#ifdef CONFIG_X86_64
+/*
+ * If regs->ss will cause an IRET fault, change it. Otherwise leave it
+ * alone. Using this generally makes no sense unless
+ * user_64bit_mode(regs) would return true.
+ */
+static void force_valid_ss(struct pt_regs *regs)
+{
+ u32 ar;
+ asm volatile ("lar %[old_ss], %[ar]\n\t"
+ "jz 1f\n\t" /* If invalid: */
+ "xorl %[ar], %[ar]\n\t" /* set ar = 0 */
+ "1:"
+ : [ar] "=r" (ar)
+ : [old_ss] "rm" ((u16)regs->ss));
+
+ /*
+ * For a valid 64-bit user context, we need DPL 3, type
+ * read-write data or read-write exp-down data, and S and P
+ * set. We can't use VERW because VERW doesn't check the
+ * P bit.
+ */
+ ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
+ if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
+ ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
+ regs->ss = __USER_DS;
+}
+#endif
+
+static int restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *sc,
+ unsigned long uc_flags)
{
unsigned long buf_val;
void __user *buf;
@@ -94,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
COPY(r15);
#endif /* CONFIG_X86_64 */
-#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
- /* Kernel saves and restores only the CS segment register on signals,
- * which is the bare minimum needed to allow mixed 32/64-bit code.
- * App's signal handler can save/restore other segments if needed. */
- COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+ /*
+ * Fix up SS if needed for the benefit of old DOSEMU and
+ * CRIU.
+ */
+ if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
+ user_64bit_mode(regs)))
+ force_valid_ss(regs);
+#endif
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -165,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
put_user_ex(regs->cs, &sc->cs);
put_user_ex(0, &sc->gs);
put_user_ex(0, &sc->fs);
+ put_user_ex(regs->ss, &sc->ss);
#endif /* CONFIG_X86_32 */
put_user_ex(fpstate, &sc->fpstate);
@@ -403,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
return 0;
}
#else /* !CONFIG_X86_32 */
+static unsigned long frame_uc_flags(struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ if (cpu_has_xsave)
+ flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
+ else
+ flags = UC_SIGCONTEXT_SS;
+
+ if (likely(user_64bit_mode(regs)))
+ flags |= UC_STRICT_RESTORE_SS;
+
+ return flags;
+}
+
static int __setup_rt_frame(int sig, struct ksignal *ksig,
sigset_t *set, struct pt_regs *regs)
{
@@ -422,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
@@ -459,10 +506,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->sp = (unsigned long)frame;
- /* Set up the CS register to run signal handlers in 64-bit mode,
- even if the handler happens to be interrupting 32-bit code. */
+ /*
+ * Set up the CS and SS registers to run signal handlers in
+ * 64-bit mode, even if the handler happens to be interrupting
+ * 32-bit or 16-bit code.
+ *
+ * SS is subtle. In 64-bit mode, we don't need any particular
+ * SS descriptor, but we do need SS to be valid. It's possible
+ * that the old SS is entirely bogus -- this can happen if the
+ * signal we're trying to deliver is #GP or #SS caused by a bad
+ * SS value. We also have a compatbility issue here: DOSEMU
+ * relies on the contents of the SS register indicating the
+ * SS value at the time of the signal, even though that code in
+ * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU
+ * avoids relying on sigreturn to restore SS; instead it uses
+ * a trampoline.) So we do our best: if the old SS was valid,
+ * we keep it. Otherwise we replace it.
+ */
regs->cs = __USER_CS;
+ if (unlikely(regs->ss != __USER_DS))
+ force_valid_ss(regs);
+
return 0;
}
#endif /* CONFIG_X86_32 */
@@ -489,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
@@ -554,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->sc))
+ /*
+ * x86_32 has no uc_flags bits relevant to restore_sigcontext.
+ * Save a few cycles by skipping the __get_user.
+ */
+ if (restore_sigcontext(regs, &frame->sc, 0))
goto badframe;
return regs->ax;
@@ -570,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
sigset_t set;
+ unsigned long uc_flags;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
@@ -692,12 +761,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
-#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_64
+ if (is_ia32_task())
+ return __NR_ia32_restart_syscall;
+#endif
+#ifdef CONFIG_X86_X32_ABI
+ return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#else
return __NR_restart_syscall;
-#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
- return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
- __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
-#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+#endif
}
/*
@@ -763,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
sigset_t set;
+ unsigned long uc_flags;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
@@ -770,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 211c11c7bba4..5c9ca2bb9fe9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,30 +83,16 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
-static inline void conditional_sti(struct pt_regs *regs)
+static inline void cond_local_irq_enable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
-static inline void preempt_conditional_sti(struct pt_regs *regs)
-{
- preempt_count_inc();
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void conditional_cli(struct pt_regs *regs)
-{
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_disable();
-}
-
-static inline void preempt_conditional_cli(struct pt_regs *regs)
+static inline void cond_local_irq_disable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
- preempt_count_dec();
}
void ist_enter(struct pt_regs *regs)
@@ -262,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
-#ifdef CONFIG_X86_64
if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
printk_ratelimit()) {
pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
@@ -271,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
print_vma_addr(" in ", regs->ip);
pr_cont("\n");
}
-#endif
force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
}
@@ -286,7 +270,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
do_trap(trapnr, signr, str, regs, error_code,
fill_trap_info(regs, signr, trapnr, &info));
}
@@ -368,7 +352,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
return;
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (!user_mode(regs))
die("bounds", regs, error_code);
@@ -443,7 +427,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
struct task_struct *tsk;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
local_irq_enable();
@@ -517,9 +501,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
- preempt_conditional_sti(regs);
+ preempt_disable();
+ cond_local_irq_enable(regs);
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
ist_exit(regs);
@@ -571,6 +557,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
NOKPROBE_SYMBOL(fixup_bad_iret);
#endif
+static bool is_sysenter_singlestep(struct pt_regs *regs)
+{
+ /*
+ * We don't try for precision here. If we're anywhere in the region of
+ * code that can be single-stepped in the SYSENTER entry path, then
+ * assume that this is a useless single-step trap due to SYSENTER
+ * being invoked with TF set. (We don't know in advance exactly
+ * which instructions will be hit because BTF could plausibly
+ * be set.)
+ */
+#ifdef CONFIG_X86_32
+ return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
+ (unsigned long)__end_SYSENTER_singlestep_region -
+ (unsigned long)__begin_SYSENTER_singlestep_region;
+#elif defined(CONFIG_IA32_EMULATION)
+ return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
+ (unsigned long)__end_entry_SYSENTER_compat -
+ (unsigned long)entry_SYSENTER_compat;
+#else
+ return false;
+#endif
+}
+
/*
* Our handling of the processor debug registers is non-trivial.
* We do not clear them on entry and exit from the kernel. Therefore
@@ -605,11 +614,42 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
ist_enter(regs);
get_debugreg(dr6, 6);
+ /*
+ * The Intel SDM says:
+ *
+ * Certain debug exceptions may clear bits 0-3. The remaining
+ * contents of the DR6 register are never cleared by the
+ * processor. To avoid confusion in identifying debug
+ * exceptions, debug handlers should clear the register before
+ * returning to the interrupted task.
+ *
+ * Keep it simple: clear DR6 immediately.
+ */
+ set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
dr6 &= ~DR6_RESERVED;
/*
+ * The SDM says "The processor clears the BTF flag when it
+ * generates a debug exception." Clear TIF_BLOCKSTEP to keep
+ * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+ */
+ clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
+
+ if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
+ is_sysenter_singlestep(regs))) {
+ dr6 &= ~DR_STEP;
+ if (!dr6)
+ goto exit;
+ /*
+ * else we might have gotten a single-step trap and hit a
+ * watchpoint at the same time, in which case we should fall
+ * through and handle the watchpoint.
+ */
+ }
+
+ /*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
@@ -617,18 +657,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
if (!dr6 && user_mode(regs))
user_icebp = 1;
- /* Catch kmemcheck conditions first of all! */
+ /* Catch kmemcheck conditions! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
goto exit;
- /* DR6 may or may not be cleared by the CPU */
- set_debugreg(0, 6);
-
- /*
- * The processor cleared BTF, so don't mark that we need it set.
- */
- clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
-
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
@@ -648,24 +680,25 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_inc();
/* It's safe to allow irq's after DR6 has been saved */
- preempt_conditional_sti(regs);
+ preempt_disable();
+ cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
goto exit;
}
- /*
- * Single-stepping through system calls: ignore any exceptions in
- * kernel space, but re-enable TF when returning to user mode.
- *
- * We already checked v86 mode above, so we can check for kernel mode
- * by just checking the CPL of CS.
- */
- if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
+ /*
+ * Historical junk that used to handle SYSENTER single-stepping.
+ * This should be unreachable now. If we survive for a while
+ * without anyone hitting this warning, we'll turn this into
+ * an oops.
+ */
tsk->thread.debugreg6 &= ~DR_STEP;
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
@@ -673,10 +706,19 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
+#if defined(CONFIG_X86_32)
+ /*
+ * This is the most likely code path that involves non-trivial use
+ * of the SYSENTER stack. Check that we haven't overrun it.
+ */
+ WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+ "Overran or corrupted SYSENTER stack\n");
+#endif
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);
@@ -696,7 +738,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
return;
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (!user_mode(regs)) {
if (!fixup_exception(regs, trapnr)) {
@@ -743,7 +785,7 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
}
dotraplinkage void
@@ -756,7 +798,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
info.regs = regs;
math_emulate(&info);
@@ -765,7 +807,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#endif
fpu__restore(&current->thread.fpu); /* interrupts still off */
#ifdef CONFIG_X86_32
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
#endif
}
NOKPROBE_SYMBOL(do_device_not_available);
@@ -868,7 +910,7 @@ void __init trap_init(void)
#endif
#ifdef CONFIG_X86_32
- set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
+ set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 07efb35ee4bc..014ea59aa153 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -30,7 +30,7 @@
* appropriately. Either display a message or halt.
*/
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
verify_cpu:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e574b8546518..3dce1ca0a653 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
/* make room for real-mode segments */
tsk->thread.sp0 += 16;
- if (static_cpu_has_safe(X86_FEATURE_SEP))
+ if (static_cpu_has(X86_FEATURE_SEP))
tsk->thread.sysenter_cs = 0;
load_sp0(tss, &tsk->thread);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index fe133b710bef..5af9958cbdb6 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -192,6 +192,17 @@ SECTIONS
:init
#endif
+ /*
+ * Section for code used exclusively before alternatives are run. All
+ * references to such code must be patched out by alternatives, normally
+ * by using X86_FEATURE_ALWAYS CPU feature bit.
+ *
+ * See static_cpu_has() for an example.
+ */
+ .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
+ *(.altinstr_aux)
+ }
+
INIT_DATA_SECTION(16)
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
OpenPOWER on IntegriCloud