diff options
author | Paul Mundt <lethal@linux-sh.org> | 2010-01-13 12:51:40 +0900 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2010-01-13 12:51:40 +0900 |
commit | 0ea820cf9bf58f735ed40ec67947159c4f170012 (patch) | |
tree | 77320006b4dded5804c678c1a869571be5c0b95f /arch/sh/kernel/cpu/sh4/fpu.c | |
parent | a3705799e2cc5fb69d88ad6a7f317a8f5597f18d (diff) | |
download | blackbird-op-linux-0ea820cf9bf58f735ed40ec67947159c4f170012.tar.gz blackbird-op-linux-0ea820cf9bf58f735ed40ec67947159c4f170012.zip |
sh: Move over to dynamically allocated FPU context.
This follows the x86 xstate changes and implements a task_xstate slab
cache that is dynamically sized to match one of hard FP/soft FP/FPU-less.
This also tidies up and consolidates some of the SH-2A/SH-4 FPU
fragmentation. Now fpu state restorers are commonly defined, with the
init_fpu()/fpu_init() mess reworked to follow the x86 convention.
The fpu_init() register initialization has been replaced by xstate setup
followed by writing out to hardware via the standard restore path.
As init_fpu() now performs a slab allocation a secondary lighterweight
restorer is also introduced for the context switch.
In the future the DSP state will be rolled in here, too.
More work remains for math emulation and the SH-5 FPU, which presently
uses its own special (UP-only) interfaces.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/kernel/cpu/sh4/fpu.c')
-rw-r--r-- | arch/sh/kernel/cpu/sh4/fpu.c | 159 |
1 files changed, 39 insertions, 120 deletions
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index e97857aec8a0..447482d7f65e 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -85,14 +85,14 @@ void save_fpu(struct task_struct *tsk) "fmov.s fr1, @-%0\n\t" "fmov.s fr0, @-%0\n\t" "lds %3, fpscr\n\t":"=r" (dummy) - :"0"((char *)(&tsk->thread.fpu.hard.status)), + :"0"((char *)(&tsk->thread.xstate->hardfpu.status)), "r"(FPSCR_RCHG), "r"(FPSCR_INIT) :"memory"); disable_fpu(); } -static void restore_fpu(struct task_struct *tsk) +void restore_fpu(struct task_struct *tsk) { unsigned long dummy; @@ -135,62 +135,11 @@ static void restore_fpu(struct task_struct *tsk) "lds.l @%0+, fpscr\n\t" "lds.l @%0+, fpul\n\t" :"=r" (dummy) - :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG) + :"0" (tsk->thread.xstate), "r" (FPSCR_RCHG) :"memory"); disable_fpu(); } -/* - * Load the FPU with signalling NANS. This bit pattern we're using - * has the property that no matter wether considered as single or as - * double precision represents signaling NANS. - */ - -static void fpu_init(void) -{ - enable_fpu(); - asm volatile ( "lds %0, fpul\n\t" - "lds %1, fpscr\n\t" - "fsts fpul, fr0\n\t" - "fsts fpul, fr1\n\t" - "fsts fpul, fr2\n\t" - "fsts fpul, fr3\n\t" - "fsts fpul, fr4\n\t" - "fsts fpul, fr5\n\t" - "fsts fpul, fr6\n\t" - "fsts fpul, fr7\n\t" - "fsts fpul, fr8\n\t" - "fsts fpul, fr9\n\t" - "fsts fpul, fr10\n\t" - "fsts fpul, fr11\n\t" - "fsts fpul, fr12\n\t" - "fsts fpul, fr13\n\t" - "fsts fpul, fr14\n\t" - "fsts fpul, fr15\n\t" - "frchg\n\t" - "fsts fpul, fr0\n\t" - "fsts fpul, fr1\n\t" - "fsts fpul, fr2\n\t" - "fsts fpul, fr3\n\t" - "fsts fpul, fr4\n\t" - "fsts fpul, fr5\n\t" - "fsts fpul, fr6\n\t" - "fsts fpul, fr7\n\t" - "fsts fpul, fr8\n\t" - "fsts fpul, fr9\n\t" - "fsts fpul, fr10\n\t" - "fsts fpul, fr11\n\t" - "fsts fpul, fr12\n\t" - "fsts fpul, fr13\n\t" - "fsts fpul, fr14\n\t" - "fsts fpul, fr15\n\t" - "frchg\n\t" - "lds %2, fpscr\n\t" - : /* no output */ - :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT)); - disable_fpu(); -} - /** * denormal_to_double - Given denormalized float number, * store double float @@ -282,9 +231,9 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* fcnvsd */ struct task_struct *tsk = current; - if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)) + if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR)) /* FPU error */ - denormal_to_double(&tsk->thread.fpu.hard, + denormal_to_double(&tsk->thread.xstate->hardfpu, (finsn >> 8) & 0xf); else return 0; @@ -300,9 +249,9 @@ static int ieee_fpe_handler(struct pt_regs *regs) n = (finsn >> 8) & 0xf; m = (finsn >> 4) & 0xf; - hx = tsk->thread.fpu.hard.fp_regs[n]; - hy = tsk->thread.fpu.hard.fp_regs[m]; - fpscr = tsk->thread.fpu.hard.fpscr; + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; + fpscr = tsk->thread.xstate->hardfpu.fpscr; prec = fpscr & FPSCR_DBL_PRECISION; if ((fpscr & FPSCR_CAUSE_ERROR) @@ -312,18 +261,18 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* FPU error because of denormal (doubles) */ llx = ((long long)hx << 32) - | tsk->thread.fpu.hard.fp_regs[n + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[n + 1]; lly = ((long long)hy << 32) - | tsk->thread.fpu.hard.fp_regs[m + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; llx = float64_mul(llx, lly); - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; - tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; + tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff; } else if ((fpscr & FPSCR_CAUSE_ERROR) && (!prec && ((hx & 0x7fffffff) < 0x00800000 || (hy & 0x7fffffff) < 0x00800000))) { /* FPU error because of denormal (floats) */ hx = float32_mul(hx, hy); - tsk->thread.fpu.hard.fp_regs[n] = hx; + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; } else return 0; @@ -338,9 +287,9 @@ static int ieee_fpe_handler(struct pt_regs *regs) n = (finsn >> 8) & 0xf; m = (finsn >> 4) & 0xf; - hx = tsk->thread.fpu.hard.fp_regs[n]; - hy = tsk->thread.fpu.hard.fp_regs[m]; - fpscr = tsk->thread.fpu.hard.fpscr; + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; + fpscr = tsk->thread.xstate->hardfpu.fpscr; prec = fpscr & FPSCR_DBL_PRECISION; if ((fpscr & FPSCR_CAUSE_ERROR) @@ -350,15 +299,15 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* FPU error because of denormal (doubles) */ llx = ((long long)hx << 32) - | tsk->thread.fpu.hard.fp_regs[n + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[n + 1]; lly = ((long long)hy << 32) - | tsk->thread.fpu.hard.fp_regs[m + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; if ((finsn & 0xf00f) == 0xf000) llx = float64_add(llx, lly); else llx = float64_sub(llx, lly); - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; - tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; + tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff; } else if ((fpscr & FPSCR_CAUSE_ERROR) && (!prec && ((hx & 0x7fffffff) < 0x00800000 || (hy & 0x7fffffff) < 0x00800000))) { @@ -367,7 +316,7 @@ static int ieee_fpe_handler(struct pt_regs *regs) hx = float32_add(hx, hy); else hx = float32_sub(hx, hy); - tsk->thread.fpu.hard.fp_regs[n] = hx; + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; } else return 0; @@ -382,9 +331,9 @@ static int ieee_fpe_handler(struct pt_regs *regs) n = (finsn >> 8) & 0xf; m = (finsn >> 4) & 0xf; - hx = tsk->thread.fpu.hard.fp_regs[n]; - hy = tsk->thread.fpu.hard.fp_regs[m]; - fpscr = tsk->thread.fpu.hard.fpscr; + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; + fpscr = tsk->thread.xstate->hardfpu.fpscr; prec = fpscr & FPSCR_DBL_PRECISION; if ((fpscr & FPSCR_CAUSE_ERROR) @@ -394,20 +343,20 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* FPU error because of denormal (doubles) */ llx = ((long long)hx << 32) - | tsk->thread.fpu.hard.fp_regs[n + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[n + 1]; lly = ((long long)hy << 32) - | tsk->thread.fpu.hard.fp_regs[m + 1]; + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; llx = float64_div(llx, lly); - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; - tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; + tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff; } else if ((fpscr & FPSCR_CAUSE_ERROR) && (!prec && ((hx & 0x7fffffff) < 0x00800000 || (hy & 0x7fffffff) < 0x00800000))) { /* FPU error because of denormal (floats) */ hx = float32_div(hx, hy); - tsk->thread.fpu.hard.fp_regs[n] = hx; + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; } else return 0; @@ -420,17 +369,17 @@ static int ieee_fpe_handler(struct pt_regs *regs) unsigned int hx; m = (finsn >> 8) & 0x7; - hx = tsk->thread.fpu.hard.fp_regs[m]; + hx = tsk->thread.xstate->hardfpu.fp_regs[m]; - if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR) + if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR) && ((hx & 0x7fffffff) < 0x00100000)) { /* subnormal double to float conversion */ long long llx; - llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32) - | tsk->thread.fpu.hard.fp_regs[m + 1]; + llx = ((long long)tsk->thread.xstate->hardfpu.fp_regs[m] << 32) + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; - tsk->thread.fpu.hard.fpul = float64_to_float32(llx); + tsk->thread.xstate->hardfpu.fpul = float64_to_float32(llx); } else return 0; @@ -449,7 +398,7 @@ void float_raise(unsigned int flags) int float_rounding_mode(void) { struct task_struct *tsk = current; - int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr); + int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.xstate->hardfpu.fpscr); return roundingMode; } @@ -461,16 +410,16 @@ BUILD_TRAP_HANDLER(fpu_error) __unlazy_fpu(tsk, regs); fpu_exception_flags = 0; if (ieee_fpe_handler(regs)) { - tsk->thread.fpu.hard.fpscr &= + tsk->thread.xstate->hardfpu.fpscr &= ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); - tsk->thread.fpu.hard.fpscr |= fpu_exception_flags; + tsk->thread.xstate->hardfpu.fpscr |= fpu_exception_flags; /* Set the FPSCR flag as well as cause bits - simply * replicate the cause */ - tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10); + tsk->thread.xstate->hardfpu.fpscr |= (fpu_exception_flags >> 10); grab_fpu(regs); restore_fpu(tsk); task_thread_info(tsk)->status |= TS_USEDFPU; - if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) & + if ((((tsk->thread.xstate->hardfpu.fpscr & FPSCR_ENABLE_MASK) >> 7) & (fpu_exception_flags >> 2)) == 0) { return; } @@ -478,33 +427,3 @@ BUILD_TRAP_HANDLER(fpu_error) force_sig(SIGFPE, tsk); } - -void fpu_state_restore(struct pt_regs *regs) -{ - struct task_struct *tsk = current; - - grab_fpu(regs); - if (unlikely(!user_mode(regs))) { - printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); - BUG(); - return; - } - - if (likely(used_math())) { - /* Using the FPU again. */ - restore_fpu(tsk); - } else { - /* First time FPU user. */ - fpu_init(); - set_used_math(); - } - task_thread_info(tsk)->status |= TS_USEDFPU; - tsk->fpu_counter++; -} - -BUILD_TRAP_HANDLER(fpu_state_restore) -{ - TRAP_HANDLER_DECL; - - fpu_state_restore(regs); -} |