diff options
Diffstat (limited to 'arch/x86/include/asm/fpu-internal.h')
-rw-r--r-- | arch/x86/include/asm/fpu-internal.h | 390 |
1 files changed, 226 insertions, 164 deletions
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 75f4c6d6a331..92f3c6ed817f 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -12,6 +12,7 @@ #include <linux/kernel_stat.h> #include <linux/regset.h> +#include <linux/compat.h> #include <linux/slab.h> #include <asm/asm.h> #include <asm/cpufeature.h> @@ -21,42 +22,74 @@ #include <asm/uaccess.h> #include <asm/xsave.h> -extern unsigned int sig_xstate_size; +#ifdef CONFIG_X86_64 +# include <asm/sigcontext32.h> +# include <asm/user32.h> +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs *regs); +#else +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct +# define ia32_setup_frame __setup_frame +# define ia32_setup_rt_frame __setup_rt_frame +#endif + +extern unsigned int mxcsr_feature_mask; extern void fpu_init(void); +extern void eager_fpu_init(void); DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); +extern void convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk); +extern void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env); + extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, xstateregs_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, xstateregs_set; - /* * xstateregs_active == fpregs_active. Please refer to the comment * at the definition of fpregs_active. */ #define xstateregs_active fpregs_active -extern struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -extern unsigned int sig_xstate_ia32_size; -extern struct _fpx_sw_bytes fx_sw_reserved_ia32; -struct _fpstate_ia32; -struct _xstate_ia32; -extern int save_i387_xstate_ia32(void __user *buf); -extern int restore_i387_xstate_ia32(void __user *buf); -#endif - #ifdef CONFIG_MATH_EMULATION +# define HAVE_HWFP (boot_cpu_data.hard_math) extern void finit_soft_fpu(struct i387_soft_struct *soft); #else +# define HAVE_HWFP 1 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif +static inline int is_ia32_compat_frame(void) +{ + return config_enabled(CONFIG_IA32_EMULATION) && + test_thread_flag(TIF_IA32); +} + +static inline int is_ia32_frame(void) +{ + return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); +} + +static inline int is_x32_frame(void) +{ + return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); +} + #define X87_FSW_ES (1 << 7) /* Exception Summary */ +static __always_inline __pure bool use_eager_fpu(void) +{ + return static_cpu_has(X86_FEATURE_EAGER_FPU); +} + static __always_inline __pure bool use_xsaveopt(void) { return static_cpu_has(X86_FEATURE_XSAVEOPT); @@ -72,6 +105,13 @@ static __always_inline __pure bool use_fxsr(void) return static_cpu_has(X86_FEATURE_FXSR); } +static inline void fx_finit(struct i387_fxsave_struct *fx) +{ + memset(fx, 0, xstate_size); + fx->cwd = 0x37f; + fx->mxcsr = MXCSR_DEFAULT; +} + extern void __sanitize_i387_state(struct task_struct *); static inline void sanitize_i387_state(struct task_struct *tsk) @@ -81,131 +121,88 @@ static inline void sanitize_i387_state(struct task_struct *tsk) __sanitize_i387_state(tsk); } -#ifdef CONFIG_X86_64 -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +#define check_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +static inline int fsave_user(struct i387_fsave_struct __user *fx) { - int err; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxrstorq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "m" (*fx), "0" (0)); -#else - asm volatile("1: rex64/fxrstor (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "R" (fx), "m" (*fx), "0" (0)); -#endif - return err; + return check_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { - int err; + if (config_enabled(CONFIG_X86_32)) + return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return check_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - /* - * Clear the bytes not touched by the fxsave and reserved - * for the SW usage. - */ - err = __clear_user(&fx->sw_reserved, - sizeof(struct _fpx_sw_bytes)); - if (unlikely(err)) - return -EFAULT; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxsaveq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), [fx] "=m" (*fx) - : "0" (0)); -#else - asm volatile("1: rex64/fxsave (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), "=m" (*fx) - : [fx] "R" (fx), "0" (0)); -#endif - if (unlikely(err) && - __clear_user(fx, sizeof(struct i387_fxsave_struct))) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ - return err; + /* See comment in fpu_fxsave() below. */ + return check_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } -static inline void fpu_fxsave(struct fpu *fpu) +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { - /* Using "rex64; fxsave %0" is broken because, if the memory operand - uses any extended registers for addressing, a second REX prefix - will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ + if (config_enabled(CONFIG_X86_32)) + return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -#ifdef CONFIG_AS_FXSAVEQ - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (fpu->state->fxsave)); -#else - /* Using, as a workaround, the properly prefixed form below isn't - accepted by any binutils version so far released, complaining that - the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). - asm volatile("rex64/fxsave %0" - : "=m" (fpu->state->fxsave)); - This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - asm volatile("rex64/fxsave (%[fx])" - : "=m" (fpu->state->fxsave) - : [fx] "R" (&fpu->state->fxsave)); -#endif + /* See comment in fpu_fxsave() below. */ + return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); } -#else /* CONFIG_X86_32 */ - -/* perform fxrstor iff the processor has extended states, otherwise frstor */ -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +static inline int frstor_checking(struct i387_fsave_struct *fx) { - /* - * The "nop" is needed to make the instructions the same - * length. - */ - alternative_input( - "nop ; frstor %1", - "fxrstor %1", - X86_FEATURE_FXSR, - "m" (*fx)); - - return 0; + return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline void fpu_fxsave(struct fpu *fpu) { - asm volatile("fxsave %[fx]" - : [fx] "=m" (fpu->state->fxsave)); + if (config_enabled(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); + else { + /* Using "rex64; fxsave %0" is broken because, if the memory + * operand uses any extended registers for addressing, a second + * REX prefix will be generated (to the assembler, rex64 + * followed by semicolon is a separate instruction), and hence + * the 64-bitness is lost. + * + * Using "fxsaveq %0" would be the ideal choice, but is only + * supported starting with gas 2.16. + * + * Using, as a workaround, the properly prefixed form below + * isn't accepted by any binutils version so far released, + * complaining that the same type of prefix is used twice if + * an extended register is needed for addressing (fix submitted + * to mainline 2005-11-21). + * + * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); + * + * This, however, we can work around by forcing the compiler to + * select an addressing mode that doesn't require extended + * registers. + */ + asm volatile( "rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); + } } -#endif /* CONFIG_X86_64 */ - /* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact. @@ -248,17 +245,14 @@ static inline int __save_init_fpu(struct task_struct *tsk) return fpu_save_init(&tsk->thread.fpu); } -static inline int fpu_fxrstor_checking(struct fpu *fpu) -{ - return fxrstor_checking(&fpu->state->fxsave); -} - static inline int fpu_restore_checking(struct fpu *fpu) { if (use_xsave()) - return fpu_xrstor_checking(fpu); + return fpu_xrstor_checking(&fpu->state->xsave); + else if (use_fxsr()) + return fxrstor_checking(&fpu->state->fxsave); else - return fpu_fxrstor_checking(fpu); + return frstor_checking(&fpu->state->fsave); } static inline int restore_fpu_checking(struct task_struct *tsk) @@ -310,15 +304,52 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) static inline void __thread_fpu_end(struct task_struct *tsk) { __thread_clear_has_fpu(tsk); - stts(); + if (!use_eager_fpu()) + stts(); } static inline void __thread_fpu_begin(struct task_struct *tsk) { - clts(); + if (!use_eager_fpu()) + clts(); __thread_set_has_fpu(tsk); } +static inline void __drop_fpu(struct task_struct *tsk) +{ + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } +} + +static inline void drop_fpu(struct task_struct *tsk) +{ + /* + * Forget coprocessor state.. + */ + preempt_disable(); + tsk->fpu_counter = 0; + __drop_fpu(tsk); + clear_used_math(); + preempt_enable(); +} + +static inline void drop_init_fpu(struct task_struct *tsk) +{ + if (!use_eager_fpu()) + drop_fpu(tsk); + else { + if (use_xsave()) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); + } +} + /* * FPU state switching for scheduling. * @@ -352,7 +383,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta { fpu_switch_t fpu; - fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + /* + * If the task has used the math, pre-load the FPU on xsave processors + * or if the past 5 consecutive context-switches used math. + */ + fpu.preload = tsk_used_math(new) && (use_eager_fpu() || + new->fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) cpu = ~0; @@ -364,14 +400,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta new->fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); - } else + } else if (!use_eager_fpu()) stts(); } else { old->fpu_counter = 0; old->thread.fpu.last_cpu = ~0; if (fpu.preload) { new->fpu_counter++; - if (fpu_lazy_restore(new, cpu)) + if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); @@ -391,44 +427,40 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) { if (fpu.preload) { if (unlikely(restore_fpu_checking(new))) - __thread_fpu_end(new); + drop_init_fpu(new); } } /* * Signal frame handlers... */ -extern int save_i387_xstate(void __user *buf); -extern int restore_i387_xstate(void __user *buf); +extern int save_xstate_sig(void __user *buf, void __user *fx, int size); +extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); -static inline void __clear_fpu(struct task_struct *tsk) +static inline int xstate_sigframe_size(void) { - if (__thread_has_fpu(tsk)) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(tsk); + return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; +} + +static inline int restore_xstate_sig(void __user *buf, int ia32_frame) +{ + void __user *buf_fx = buf; + int size = xstate_sigframe_size(); + + if (ia32_frame && use_fxsr()) { + buf_fx = buf + sizeof(struct i387_fsave_struct); + size += sizeof(struct i387_fsave_struct); } + + return __restore_xstate_sig(buf, buf_fx, size); } /* - * The actual user_fpu_begin/end() functions - * need to be preemption-safe. + * Need to be preemption-safe. * - * NOTE! user_fpu_end() must be used only after you - * have saved the FP state, and user_fpu_begin() must - * be used only immediately before restoring it. - * These functions do not do any save/restore on - * their own. + * NOTE! user_fpu_begin() must be used only immediately before restoring + * it. This function does not do any save/restore on their own. */ -static inline void user_fpu_end(void) -{ - preempt_disable(); - __thread_fpu_end(current); - preempt_enable(); -} - static inline void user_fpu_begin(void) { preempt_disable(); @@ -437,25 +469,32 @@ static inline void user_fpu_begin(void) preempt_enable(); } +static inline void __save_fpu(struct task_struct *tsk) +{ + if (use_xsave()) + xsave_state(&tsk->thread.fpu.state->xsave, -1); + else + fpu_fxsave(&tsk->thread.fpu); +} + /* * These disable preemption on their own and are safe */ static inline void save_init_fpu(struct task_struct *tsk) { WARN_ON_ONCE(!__thread_has_fpu(tsk)); + + if (use_eager_fpu()) { + __save_fpu(tsk); + return; + } + preempt_disable(); __save_init_fpu(tsk); __thread_fpu_end(tsk); preempt_enable(); } -static inline void clear_fpu(struct task_struct *tsk) -{ - preempt_disable(); - __clear_fpu(tsk); - preempt_enable(); -} - /* * i387 state interaction */ @@ -510,11 +549,34 @@ static inline void fpu_free(struct fpu *fpu) } } -static inline void fpu_copy(struct fpu *dst, struct fpu *src) +static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) { - memcpy(dst->state, src->state, xstate_size); + if (use_eager_fpu()) { + memset(&dst->thread.fpu.state->xsave, 0, xstate_size); + __save_fpu(dst); + } else { + struct fpu *dfpu = &dst->thread.fpu; + struct fpu *sfpu = &src->thread.fpu; + + unlazy_fpu(src); + memcpy(dfpu->state, sfpu->state, xstate_size); + } } -extern void fpu_finit(struct fpu *fpu); +static inline unsigned long +alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, + unsigned long *size) +{ + unsigned long frame_size = xstate_sigframe_size(); + + *buf_fx = sp = round_down(sp - frame_size, 64); + if (ia32_frame && use_fxsr()) { + frame_size += sizeof(struct i387_fsave_struct); + sp -= sizeof(struct i387_fsave_struct); + } + + *size = frame_size; + return sp; +} #endif |