summaryrefslogtreecommitdiffstats
path: root/gcc/config/sh/lib1funcs-Os-4-200.asm
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/sh/lib1funcs-Os-4-200.asm')
-rw-r--r--gcc/config/sh/lib1funcs-Os-4-200.asm322
1 files changed, 0 insertions, 322 deletions
diff --git a/gcc/config/sh/lib1funcs-Os-4-200.asm b/gcc/config/sh/lib1funcs-Os-4-200.asm
deleted file mode 100644
index aae57ccd36c..00000000000
--- a/gcc/config/sh/lib1funcs-Os-4-200.asm
+++ /dev/null
@@ -1,322 +0,0 @@
-/* Copyright (C) 2006, 2009 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-<http://www.gnu.org/licenses/>. */
-
-/* Moderately Space-optimized libgcc routines for the Renesas SH /
- STMicroelectronics ST40 CPUs.
- Contributed by J"orn Rennecke joern.rennecke@st.com. */
-
-#include "lib1funcs.h"
-
-#if !__SHMEDIA__
-#ifdef L_udivsi3_i4i
-
-/* 88 bytes; sh4-200 cycle counts:
- divisor >= 2G: 11 cycles
- dividend < 2G: 48 cycles
- dividend >= 2G: divisor != 1: 54 cycles
- dividend >= 2G, divisor == 1: 22 cycles */
-#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
-!! args in r4 and r5, result in r0, clobber r1
-
- .global GLOBAL(udivsi3_i4i)
- FUNC(GLOBAL(udivsi3_i4i))
-GLOBAL(udivsi3_i4i):
- mova L1,r0
- cmp/pz r5
- sts fpscr,r1
- lds.l @r0+,fpscr
- sts.l fpul,@-r15
- bf LOCAL(huge_divisor)
- mov.l r1,@-r15
- lds r4,fpul
- cmp/pz r4
-#ifdef FMOVD_WORKS
- fmov.d dr0,@-r15
- float fpul,dr0
- fmov.d dr2,@-r15
- bt LOCAL(dividend_adjusted)
- mov #1,r1
- fmov.d @r0,dr2
- cmp/eq r1,r5
- bt LOCAL(div_by_1)
- fadd dr2,dr0
-LOCAL(dividend_adjusted):
- lds r5,fpul
- float fpul,dr2
- fdiv dr2,dr0
-LOCAL(div_by_1):
- fmov.d @r15+,dr2
- ftrc dr0,fpul
- fmov.d @r15+,dr0
-#else /* !FMOVD_WORKS */
- fmov.s DR01,@-r15
- mov #1,r1
- fmov.s DR00,@-r15
- float fpul,dr0
- fmov.s DR21,@-r15
- bt/s LOCAL(dividend_adjusted)
- fmov.s DR20,@-r15
- cmp/eq r1,r5
- bt LOCAL(div_by_1)
- fmov.s @r0+,DR20
- fmov.s @r0,DR21
- fadd dr2,dr0
-LOCAL(dividend_adjusted):
- lds r5,fpul
- float fpul,dr2
- fdiv dr2,dr0
-LOCAL(div_by_1):
- fmov.s @r15+,DR20
- fmov.s @r15+,DR21
- ftrc dr0,fpul
- fmov.s @r15+,DR00
- fmov.s @r15+,DR01
-#endif /* !FMOVD_WORKS */
- lds.l @r15+,fpscr
- sts fpul,r0
- rts
- lds.l @r15+,fpul
-
-#ifdef FMOVD_WORKS
- .p2align 3 ! make double below 8 byte aligned.
-#endif
-LOCAL(huge_divisor):
- lds r1,fpscr
- add #4,r15
- cmp/hs r5,r4
- rts
- movt r0
-
- .p2align 2
-L1:
-#ifndef FMOVD_WORKS
- .long 0x80000
-#else
- .long 0x180000
-#endif
- .double 4294967296
-
- ENDFUNC(GLOBAL(udivsi3_i4i))
-#elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
-
-#if 0
-/* With 36 bytes, the following would probably be the most compact
- implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
-GLOBAL(udivsi3_i4i):
- mov.l r2,@-r15
- mov #0,r1
- div0u
- mov r1,r2
- mov.l r3,@-r15
- mov r1,r3
- sett
- mov r4,r0
-LOCAL(loop):
- rotcr r2
- ;
- bt/s LOCAL(end)
- cmp/gt r2,r3
- rotcl r0
- bra LOCAL(loop)
- div1 r5,r1
-LOCAL(end):
- rotcl r0
- mov.l @r15+,r3
- rts
- mov.l @r15+,r2
-#endif /* 0 */
-
-/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
- sh4-200 run times:
- udiv small divisor: 55 cycles
- udiv large divisor: 52 cycles
- sdiv small divisor, positive result: 59 cycles
- sdiv large divisor, positive result: 56 cycles
- sdiv small divisor, negative result: 65 cycles (*)
- sdiv large divisor, negative result: 62 cycles (*)
- (*): r2 is restored in the rts delay slot and has a lingering latency
- of two more cycles. */
- .balign 4
- .global GLOBAL(udivsi3_i4i)
- FUNC(GLOBAL(udivsi3_i4i))
- FUNC(GLOBAL(sdivsi3_i4i))
-GLOBAL(udivsi3_i4i):
- sts pr,r1
- mov.l r4,@-r15
- extu.w r5,r0
- cmp/eq r5,r0
- swap.w r4,r0
- shlr16 r4
- bf/s LOCAL(large_divisor)
- div0u
- mov.l r5,@-r15
- shll16 r5
-LOCAL(sdiv_small_divisor):
- div1 r5,r4
- bsr LOCAL(div6)
- div1 r5,r4
- div1 r5,r4
- bsr LOCAL(div6)
- div1 r5,r4
- xtrct r4,r0
- xtrct r0,r4
- bsr LOCAL(div7)
- swap.w r4,r4
- div1 r5,r4
- bsr LOCAL(div7)
- div1 r5,r4
- xtrct r4,r0
- mov.l @r15+,r5
- swap.w r0,r0
- mov.l @r15+,r4
- jmp @r1
- rotcl r0
-LOCAL(div7):
- div1 r5,r4
-LOCAL(div6):
- div1 r5,r4; div1 r5,r4; div1 r5,r4
- div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
-
-LOCAL(divx3):
- rotcl r0
- div1 r5,r4
- rotcl r0
- div1 r5,r4
- rotcl r0
- rts
- div1 r5,r4
-
-LOCAL(large_divisor):
- mov.l r5,@-r15
-LOCAL(sdiv_large_divisor):
- xor r4,r0
- .rept 4
- rotcl r0
- bsr LOCAL(divx3)
- div1 r5,r4
- .endr
- mov.l @r15+,r5
- mov.l @r15+,r4
- jmp @r1
- rotcl r0
- ENDFUNC(GLOBAL(udivsi3_i4i))
-
- .global GLOBAL(sdivsi3_i4i)
-GLOBAL(sdivsi3_i4i):
- mov.l r4,@-r15
- cmp/pz r5
- mov.l r5,@-r15
- bt/s LOCAL(pos_divisor)
- cmp/pz r4
- neg r5,r5
- extu.w r5,r0
- bt/s LOCAL(neg_result)
- cmp/eq r5,r0
- neg r4,r4
-LOCAL(pos_result):
- swap.w r4,r0
- bra LOCAL(sdiv_check_divisor)
- sts pr,r1
-LOCAL(pos_divisor):
- extu.w r5,r0
- bt/s LOCAL(pos_result)
- cmp/eq r5,r0
- neg r4,r4
-LOCAL(neg_result):
- mova LOCAL(negate_result),r0
- ;
- mov r0,r1
- swap.w r4,r0
- lds r2,macl
- sts pr,r2
-LOCAL(sdiv_check_divisor):
- shlr16 r4
- bf/s LOCAL(sdiv_large_divisor)
- div0u
- bra LOCAL(sdiv_small_divisor)
- shll16 r5
- .balign 4
-LOCAL(negate_result):
- neg r0,r0
- jmp @r2
- sts macl,r2
- ENDFUNC(GLOBAL(sdivsi3_i4i))
-#endif /* !__SH_FPU_DOUBLE__ */
-#endif /* L_udivsi3_i4i */
-
-#ifdef L_sdivsi3_i4i
-#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
-/* 48 bytes, 45 cycles on sh4-200 */
-!! args in r4 and r5, result in r0, clobber r1
-
- .global GLOBAL(sdivsi3_i4i)
- FUNC(GLOBAL(sdivsi3_i4i))
-GLOBAL(sdivsi3_i4i):
- sts.l fpscr,@-r15
- sts fpul,r1
- mova L1,r0
- lds.l @r0+,fpscr
- lds r4,fpul
-#ifdef FMOVD_WORKS
- fmov.d dr0,@-r15
- float fpul,dr0
- lds r5,fpul
- fmov.d dr2,@-r15
-#else
- fmov.s DR01,@-r15
- fmov.s DR00,@-r15
- float fpul,dr0
- lds r5,fpul
- fmov.s DR21,@-r15
- fmov.s DR20,@-r15
-#endif
- float fpul,dr2
- fdiv dr2,dr0
-#ifdef FMOVD_WORKS
- fmov.d @r15+,dr2
-#else
- fmov.s @r15+,DR20
- fmov.s @r15+,DR21
-#endif
- ftrc dr0,fpul
-#ifdef FMOVD_WORKS
- fmov.d @r15+,dr0
-#else
- fmov.s @r15+,DR00
- fmov.s @r15+,DR01
-#endif
- lds.l @r15+,fpscr
- sts fpul,r0
- rts
- lds r1,fpul
-
- .p2align 2
-L1:
-#ifndef FMOVD_WORKS
- .long 0x80000
-#else
- .long 0x180000
-#endif
-
- ENDFUNC(GLOBAL(sdivsi3_i4i))
-#endif /* __SH_FPU_DOUBLE__ */
-#endif /* L_sdivsi3_i4i */
-#endif /* !__SHMEDIA__ */
OpenPOWER on IntegriCloud