summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/math-emu/math_efp.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/math-emu/math_efp.c')
-rw-r--r--arch/powerpc/math-emu/math_efp.c316
1 files changed, 238 insertions, 78 deletions
diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c
index a73f0884d358..28337c9709ae 100644
--- a/arch/powerpc/math-emu/math_efp.c
+++ b/arch/powerpc/math-emu/math_efp.c
@@ -20,6 +20,7 @@
*/
#include <linux/types.h>
+#include <linux/prctl.h>
#include <asm/uaccess.h>
#include <asm/reg.h>
@@ -275,21 +276,13 @@ int do_spe_mathemu(struct pt_regs *regs)
case EFSCTSF:
case EFSCTUF:
- if (!((vb.wp[1] >> 23) == 0xff && ((vb.wp[1] & 0x7fffff) > 0))) {
- /* NaN */
- if (((vb.wp[1] >> 23) & 0xff) == 0) {
- /* denorm */
- vc.wp[1] = 0x0;
- } else if ((vb.wp[1] >> 31) == 0) {
- /* positive normal */
- vc.wp[1] = (func == EFSCTSF) ?
- 0x7fffffff : 0xffffffff;
- } else { /* negative normal */
- vc.wp[1] = (func == EFSCTSF) ?
- 0x80000000 : 0x0;
- }
- } else { /* rB is NaN */
- vc.wp[1] = 0x0;
+ if (SB_c == FP_CLS_NAN) {
+ vc.wp[1] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ SB_e += (func == EFSCTSF ? 31 : 32);
+ FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
+ (func == EFSCTSF));
}
goto update_regs;
@@ -306,16 +299,25 @@ int do_spe_mathemu(struct pt_regs *regs)
}
case EFSCTSI:
- case EFSCTSIZ:
case EFSCTUI:
+ if (SB_c == FP_CLS_NAN) {
+ vc.wp[1] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
+ ((func & 0x3) != 0));
+ }
+ goto update_regs;
+
+ case EFSCTSIZ:
case EFSCTUIZ:
- if (func & 0x4) {
- _FP_ROUND(1, SB);
+ if (SB_c == FP_CLS_NAN) {
+ vc.wp[1] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
- _FP_ROUND_ZERO(1, SB);
+ FP_TO_INT_S(vc.wp[1], SB, 32,
+ ((func & 0x3) != 0));
}
- FP_TO_INT_S(vc.wp[1], SB, 32,
- (((func & 0x3) != 0) || SB_s));
goto update_regs;
default:
@@ -404,22 +406,13 @@ cmp_s:
case EFDCTSF:
case EFDCTUF:
- if (!((vb.wp[0] >> 20) == 0x7ff &&
- ((vb.wp[0] & 0xfffff) > 0 || (vb.wp[1] > 0)))) {
- /* not a NaN */
- if (((vb.wp[0] >> 20) & 0x7ff) == 0) {
- /* denorm */
- vc.wp[1] = 0x0;
- } else if ((vb.wp[0] >> 31) == 0) {
- /* positive normal */
- vc.wp[1] = (func == EFDCTSF) ?
- 0x7fffffff : 0xffffffff;
- } else { /* negative normal */
- vc.wp[1] = (func == EFDCTSF) ?
- 0x80000000 : 0x0;
- }
- } else { /* NaN */
- vc.wp[1] = 0x0;
+ if (DB_c == FP_CLS_NAN) {
+ vc.wp[1] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ DB_e += (func == EFDCTSF ? 31 : 32);
+ FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
+ (func == EFDCTSF));
}
goto update_regs;
@@ -437,21 +430,35 @@ cmp_s:
case EFDCTUIDZ:
case EFDCTSIDZ:
- _FP_ROUND_ZERO(2, DB);
- FP_TO_INT_D(vc.dp[0], DB, 64, ((func & 0x1) == 0));
+ if (DB_c == FP_CLS_NAN) {
+ vc.dp[0] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ FP_TO_INT_D(vc.dp[0], DB, 64,
+ ((func & 0x1) == 0));
+ }
goto update_regs;
case EFDCTUI:
case EFDCTSI:
+ if (DB_c == FP_CLS_NAN) {
+ vc.wp[1] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
+ ((func & 0x3) != 0));
+ }
+ goto update_regs;
+
case EFDCTUIZ:
case EFDCTSIZ:
- if (func & 0x4) {
- _FP_ROUND(2, DB);
+ if (DB_c == FP_CLS_NAN) {
+ vc.wp[1] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
- _FP_ROUND_ZERO(2, DB);
+ FP_TO_INT_D(vc.wp[1], DB, 32,
+ ((func & 0x3) != 0));
}
- FP_TO_INT_D(vc.wp[1], DB, 32,
- (((func & 0x3) != 0) || DB_s));
goto update_regs;
default:
@@ -556,37 +563,60 @@ cmp_d:
cmp = -1;
goto cmp_vs;
- case EVFSCTSF:
- __asm__ __volatile__ ("mtspr 512, %4\n"
- "efsctsf %0, %2\n"
- "efsctsf %1, %3\n"
- : "=r" (vc.wp[0]), "=r" (vc.wp[1])
- : "r" (vb.wp[0]), "r" (vb.wp[1]), "r" (0));
- goto update_regs;
-
case EVFSCTUF:
- __asm__ __volatile__ ("mtspr 512, %4\n"
- "efsctuf %0, %2\n"
- "efsctuf %1, %3\n"
- : "=r" (vc.wp[0]), "=r" (vc.wp[1])
- : "r" (vb.wp[0]), "r" (vb.wp[1]), "r" (0));
+ case EVFSCTSF:
+ if (SB0_c == FP_CLS_NAN) {
+ vc.wp[0] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ SB0_e += (func == EVFSCTSF ? 31 : 32);
+ FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
+ (func == EVFSCTSF));
+ }
+ if (SB1_c == FP_CLS_NAN) {
+ vc.wp[1] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ SB1_e += (func == EVFSCTSF ? 31 : 32);
+ FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
+ (func == EVFSCTSF));
+ }
goto update_regs;
case EVFSCTUI:
case EVFSCTSI:
+ if (SB0_c == FP_CLS_NAN) {
+ vc.wp[0] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
+ ((func & 0x3) != 0));
+ }
+ if (SB1_c == FP_CLS_NAN) {
+ vc.wp[1] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
+ ((func & 0x3) != 0));
+ }
+ goto update_regs;
+
case EVFSCTUIZ:
case EVFSCTSIZ:
- if (func & 0x4) {
- _FP_ROUND(1, SB0);
- _FP_ROUND(1, SB1);
+ if (SB0_c == FP_CLS_NAN) {
+ vc.wp[0] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
- _FP_ROUND_ZERO(1, SB0);
- _FP_ROUND_ZERO(1, SB1);
+ FP_TO_INT_S(vc.wp[0], SB0, 32,
+ ((func & 0x3) != 0));
+ }
+ if (SB1_c == FP_CLS_NAN) {
+ vc.wp[1] = 0;
+ FP_SET_EXCEPTION(FP_EX_INVALID);
+ } else {
+ FP_TO_INT_S(vc.wp[1], SB1, 32,
+ ((func & 0x3) != 0));
}
- FP_TO_INT_S(vc.wp[0], SB0, 32,
- (((func & 0x3) != 0) || SB0_s));
- FP_TO_INT_S(vc.wp[1], SB1, 32,
- (((func & 0x3) != 0) || SB1_s));
goto update_regs;
default:
@@ -630,9 +660,27 @@ update_ccr:
regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2));
update_regs:
- __FPU_FPSCR &= ~FP_EX_MASK;
+ /*
+ * If the "invalid" exception sticky bit was set by the
+ * processor for non-finite input, but was not set before the
+ * instruction being emulated, clear it. Likewise for the
+ * "underflow" bit, which may have been set by the processor
+ * for exact underflow, not just inexact underflow when the
+ * flag should be set for IEEE 754 semantics. Other sticky
+ * exceptions will only be set by the processor when they are
+ * correct according to IEEE 754 semantics, and we must not
+ * clear sticky bits that were already set before the emulated
+ * instruction as they represent the user-visible sticky
+ * exception status. "inexact" traps to kernel are not
+ * required for IEEE semantics and are not enabled by default,
+ * so the "inexact" sticky bit may have been set by a previous
+ * instruction without the kernel being aware of it.
+ */
+ __FPU_FPSCR
+ &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last;
__FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK);
mtspr(SPRN_SPEFSCR, __FPU_FPSCR);
+ current->thread.spefscr_last = __FPU_FPSCR;
current->thread.evr[fc] = vc.wp[0];
regs->gpr[fc] = vc.wp[1];
@@ -644,6 +692,23 @@ update_regs:
pr_debug("va: %08x %08x\n", va.wp[0], va.wp[1]);
pr_debug("vb: %08x %08x\n", vb.wp[0], vb.wp[1]);
+ if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
+ if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO)
+ && (current->thread.fpexc_mode & PR_FP_EXC_DIV))
+ return 1;
+ if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW)
+ && (current->thread.fpexc_mode & PR_FP_EXC_OVF))
+ return 1;
+ if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW)
+ && (current->thread.fpexc_mode & PR_FP_EXC_UND))
+ return 1;
+ if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT)
+ && (current->thread.fpexc_mode & PR_FP_EXC_RES))
+ return 1;
+ if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID)
+ && (current->thread.fpexc_mode & PR_FP_EXC_INV))
+ return 1;
+ }
return 0;
illegal:
@@ -662,21 +727,28 @@ int speround_handler(struct pt_regs *regs)
{
union dw_union fgpr;
int s_lo, s_hi;
- unsigned long speinsn, type, fc;
+ int lo_inexact, hi_inexact;
+ int fp_result;
+ unsigned long speinsn, type, fb, fc, fptype, func;
if (get_user(speinsn, (unsigned int __user *) regs->nip))
return -EFAULT;
if ((speinsn >> 26) != 4)
return -EINVAL; /* not an spe instruction */
- type = insn_type(speinsn & 0x7ff);
+ func = speinsn & 0x7ff;
+ type = insn_type(func);
if (type == XCR) return -ENOSYS;
__FPU_FPSCR = mfspr(SPRN_SPEFSCR);
pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
+ fptype = (speinsn >> 5) & 0x7;
+
/* No need to round if the result is exact */
- if (!(__FPU_FPSCR & FP_EX_INEXACT))
+ lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX);
+ hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH);
+ if (!(lo_inexact || (hi_inexact && fptype == VCT)))
return 0;
fc = (speinsn >> 21) & 0x1f;
@@ -685,9 +757,68 @@ int speround_handler(struct pt_regs *regs)
fgpr.wp[0] = current->thread.evr[fc];
fgpr.wp[1] = regs->gpr[fc];
+ fb = (speinsn >> 11) & 0x1f;
+ switch (func) {
+ case EFSCTUIZ:
+ case EFSCTSIZ:
+ case EVFSCTUIZ:
+ case EVFSCTSIZ:
+ case EFDCTUIDZ:
+ case EFDCTSIDZ:
+ case EFDCTUIZ:
+ case EFDCTSIZ:
+ /*
+ * These instructions always round to zero,
+ * independent of the rounding mode.
+ */
+ return 0;
+
+ case EFSCTUI:
+ case EFSCTUF:
+ case EVFSCTUI:
+ case EVFSCTUF:
+ case EFDCTUI:
+ case EFDCTUF:
+ fp_result = 0;
+ s_lo = 0;
+ s_hi = 0;
+ break;
+
+ case EFSCTSI:
+ case EFSCTSF:
+ fp_result = 0;
+ /* Recover the sign of a zero result if possible. */
+ if (fgpr.wp[1] == 0)
+ s_lo = regs->gpr[fb] & SIGN_BIT_S;
+ break;
+
+ case EVFSCTSI:
+ case EVFSCTSF:
+ fp_result = 0;
+ /* Recover the sign of a zero result if possible. */
+ if (fgpr.wp[1] == 0)
+ s_lo = regs->gpr[fb] & SIGN_BIT_S;
+ if (fgpr.wp[0] == 0)
+ s_hi = current->thread.evr[fb] & SIGN_BIT_S;
+ break;
+
+ case EFDCTSI:
+ case EFDCTSF:
+ fp_result = 0;
+ s_hi = s_lo;
+ /* Recover the sign of a zero result if possible. */
+ if (fgpr.wp[1] == 0)
+ s_hi = current->thread.evr[fb] & SIGN_BIT_S;
+ break;
+
+ default:
+ fp_result = 1;
+ break;
+ }
+
pr_debug("round fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]);
- switch ((speinsn >> 5) & 0x7) {
+ switch (fptype) {
/* Since SPE instructions on E500 core can handle round to nearest
* and round toward zero with IEEE-754 complied, we just need
* to handle round toward +Inf and round toward -Inf by software.
@@ -696,25 +827,52 @@ int speround_handler(struct pt_regs *regs)
if ((FP_ROUNDMODE) == FP_RND_PINF) {
if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */
} else { /* round to -Inf */
- if (s_lo) fgpr.wp[1]++; /* Z < 0, choose Z2 */
+ if (s_lo) {
+ if (fp_result)
+ fgpr.wp[1]++; /* Z < 0, choose Z2 */
+ else
+ fgpr.wp[1]--; /* Z < 0, choose Z2 */
+ }
}
break;
case DPFP:
if (FP_ROUNDMODE == FP_RND_PINF) {
- if (!s_hi) fgpr.dp[0]++; /* Z > 0, choose Z1 */
+ if (!s_hi) {
+ if (fp_result)
+ fgpr.dp[0]++; /* Z > 0, choose Z1 */
+ else
+ fgpr.wp[1]++; /* Z > 0, choose Z1 */
+ }
} else { /* round to -Inf */
- if (s_hi) fgpr.dp[0]++; /* Z < 0, choose Z2 */
+ if (s_hi) {
+ if (fp_result)
+ fgpr.dp[0]++; /* Z < 0, choose Z2 */
+ else
+ fgpr.wp[1]--; /* Z < 0, choose Z2 */
+ }
}
break;
case VCT:
if (FP_ROUNDMODE == FP_RND_PINF) {
- if (!s_lo) fgpr.wp[1]++; /* Z_low > 0, choose Z1 */
- if (!s_hi) fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */
+ if (lo_inexact && !s_lo)
+ fgpr.wp[1]++; /* Z_low > 0, choose Z1 */
+ if (hi_inexact && !s_hi)
+ fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */
} else { /* round to -Inf */
- if (s_lo) fgpr.wp[1]++; /* Z_low < 0, choose Z2 */
- if (s_hi) fgpr.wp[0]++; /* Z_high < 0, choose Z2 */
+ if (lo_inexact && s_lo) {
+ if (fp_result)
+ fgpr.wp[1]++; /* Z_low < 0, choose Z2 */
+ else
+ fgpr.wp[1]--; /* Z_low < 0, choose Z2 */
+ }
+ if (hi_inexact && s_hi) {
+ if (fp_result)
+ fgpr.wp[0]++; /* Z_high < 0, choose Z2 */
+ else
+ fgpr.wp[0]--; /* Z_high < 0, choose Z2 */
+ }
}
break;
@@ -727,6 +885,8 @@ int speround_handler(struct pt_regs *regs)
pr_debug(" to fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]);
+ if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
+ return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0;
return 0;
}
OpenPOWER on IntegriCloud