diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 13:16:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 13:16:36 -0700 |
commit | 60f898eeaaa1c5d0162a4240bacf33a6c87ecef6 (patch) | |
tree | 23eeac4b1e9a616779d22c104dbc8bd45dfeefd1 /arch/x86/kernel/cpu | |
parent | 977e1ba50893c15121557b39de586901fe3f75cf (diff) | |
parent | 3b75232d55680ca166dffa274d0587d5faf0a016 (diff) | |
download | talos-op-linux-60f898eeaaa1c5d0162a4240bacf33a6c87ecef6.tar.gz talos-op-linux-60f898eeaaa1c5d0162a4240bacf33a6c87ecef6.zip |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar:
"There were lots of changes in this development cycle:
- over 100 separate cleanups, restructuring changes, speedups and
fixes in the x86 system call, irq, trap and other entry code, part
of a heroic effort to deobfuscate a decade old spaghetti asm code
and its C code dependencies (Denys Vlasenko, Andy Lutomirski)
- alternatives code fixes and enhancements (Borislav Petkov)
- simplifications and cleanups to the compat code (Brian Gerst)
- signal handling fixes and new x86 testcases (Andy Lutomirski)
- various other fixes and cleanups
By their nature many of these changes are risky - we tried to test
them well on many different x86 systems (there are no known
regressions), and they are split up finely to help bisection - but
there's still a fair bit of residual risk left so caveat emptor"
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits)
perf/x86/64: Report regs_user->ax too in get_regs_user()
perf/x86/64: Simplify regs_user->abi setting code in get_regs_user()
perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user()
perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user()
x86/asm/entry/32: Tidy up JNZ instructions after TESTs
x86/asm/entry/64: Reduce padding in execve stubs
x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork
x86/asm/entry/64: Simplify jumps in ret_from_fork
x86/asm/entry/64: Remove a redundant jump
x86/asm/entry/64: Optimize [v]fork/clone stubs
x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too
x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat()
x86/asm/entry/64: Use common code for rt_sigreturn() epilogue
x86/asm/entry/64: Add forgotten CFI annotation
x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout
x86/asm/entry/64: Move opportunistic sysret code to syscall code path
x86, selftests: Add sigreturn selftest
x86/alternatives: Guard NOPs optimization
x86/asm/entry: Clear EXTRA_REGS for all executable formats
x86/signal: Remove pax argument from restore_sigcontext
...
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 87 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 18 |
3 files changed, 68 insertions, 42 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a220239cea65..dd9e50500297 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -711,6 +711,11 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); + + /* 3DNow or LM implies PREFETCHW */ + if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) + if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) + set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2346c95c6ab1..3f70538012e2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -959,38 +959,37 @@ static void identify_cpu(struct cpuinfo_x86 *c) #endif } -#ifdef CONFIG_X86_64 -#ifdef CONFIG_IA32_EMULATION -/* May not be __init: called during resume */ -static void syscall32_cpu_init(void) -{ - /* Load these always in case some future AMD CPU supports - SYSENTER from compat mode too. */ - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); - - wrmsrl(MSR_CSTAR, ia32_cstar_target); -} -#endif /* CONFIG_IA32_EMULATION */ -#endif /* CONFIG_X86_64 */ - +/* + * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions + * on 32-bit kernels: + */ #ifdef CONFIG_X86_32 void enable_sep_cpu(void) { - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss; + int cpu; - if (!boot_cpu_has(X86_FEATURE_SEP)) { - put_cpu(); - return; - } + cpu = get_cpu(); + tss = &per_cpu(cpu_tss, cpu); + + if (!boot_cpu_has(X86_FEATURE_SEP)) + goto out; + + /* + * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- + * see the big comment in struct x86_hw_tss's definition. + */ tss->x86_tss.ss1 = __KERNEL_CS; - tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss; - wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); - wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); - wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); + wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); + + wrmsr(MSR_IA32_SYSENTER_ESP, + (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), + 0); + + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0); + +out: put_cpu(); } #endif @@ -1118,7 +1117,7 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; + (unsigned long)&init_thread_union + THREAD_SIZE; EXPORT_PER_CPU_SYMBOL(kernel_stack); #ifdef CONFIG_X86_64 @@ -1130,8 +1129,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE) __visible; /* - * The following four percpu variables are hot. Align current_task to - * cacheline size such that all four fall in the same cacheline. + * The following percpu variables are hot. Align current_task to + * cacheline size such that they fall in the same cacheline. */ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task; @@ -1171,10 +1170,23 @@ void syscall_init(void) */ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); #ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init(); + wrmsrl(MSR_CSTAR, ia32_cstar_target); + /* + * This only works on Intel CPUs. + * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. + * This does not cause SYSENTER to jump to the wrong location, because + * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). + */ + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); +#else + wrmsrl(MSR_CSTAR, ignore_sysret); + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); #endif /* Flags to clear on syscall */ @@ -1226,6 +1238,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); +/* + * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find + * the top of the kernel stack. Use an extra percpu variable to track the + * top of the kernel stack directly. + */ +DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = + (unsigned long)&init_thread_union + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); + #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif @@ -1307,7 +1328,7 @@ void cpu_init(void) */ load_ucode_ap(); - t = &per_cpu(init_tss, cpu); + t = &per_cpu(cpu_tss, cpu); oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA @@ -1391,7 +1412,7 @@ void cpu_init(void) { int cpu = smp_processor_id(); struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(init_tss, cpu); + struct tss_struct *t = &per_cpu(cpu_tss, cpu); struct thread_struct *thread = &curr->thread; wait_for_master_cpu(cpu); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b71a7f86d68a..e2888a3ad1e3 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2147,24 +2147,24 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) static unsigned long code_segment_base(struct pt_regs *regs) { /* + * For IA32 we look at the GDT/LDT segment base to convert the + * effective IP to a linear address. + */ + +#ifdef CONFIG_X86_32 + /* * If we are in VM86 mode, add the segment offset to convert to a * linear address. */ if (regs->flags & X86_VM_MASK) return 0x10 * regs->cs; - /* - * For IA32 we look at the GDT/LDT segment base to convert the - * effective IP to a linear address. - */ -#ifdef CONFIG_X86_32 if (user_mode(regs) && regs->cs != __USER_CS) return get_segment_base(regs->cs); #else - if (test_thread_flag(TIF_IA32)) { - if (user_mode(regs) && regs->cs != __USER32_CS) - return get_segment_base(regs->cs); - } + if (user_mode(regs) && !user_64bit_mode(regs) && + regs->cs != __USER32_CS) + return get_segment_base(regs->cs); #endif return 0; } |