summaryrefslogtreecommitdiffstats
path: root/libgcc
diff options
context:
space:
mode:
authorro <ro@138bc75d-0d04-0410-961f-82ee72b054a4>2011-11-02 15:03:19 +0000
committerro <ro@138bc75d-0d04-0410-961f-82ee72b054a4>2011-11-02 15:03:19 +0000
commit9213d2eb44a8b9bcc432b57e246d9b52d5bdc949 (patch)
treebfbde9a54f663fb7556b9dacd07709ef97c1961c /libgcc
parent237490bf10db39b859bd28598ff64f1bd2c84421 (diff)
downloadppe42-gcc-9213d2eb44a8b9bcc432b57e246d9b52d5bdc949.tar.gz
ppe42-gcc-9213d2eb44a8b9bcc432b57e246d9b52d5bdc949.zip
Move libgcc1 to toplevel libgcc
gcc: * Makefile.in (LIB1ASMSRC): Don't export. (libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC. * config/arm/arm.c: Update lib1funcs.asm filename. * config/arm/linux-eabi.h: Likewise. * config/arm/bpabi-v6m.S, config/arm/bpabi.S, config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to ../libgcc/config/arm. * config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S. * config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/arm/t-arm-elf (LIB1ASMFUNCS): Remove. * config/arm/t-bpabi: Likewise. * config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove. * config/arm/t-strongarm-elf: Likewise. * config/arm/t-symbian: Likewise. * config/arm/t-vxworks: Likewise. * config/arm/t-wince-pe: Likewise. * config/avr/libgcc.S: Move to ../libgcc/config/avr. * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/bfin/lib1funcs.asm: Move to ../libgcc/config/bfin/lib1funcs.S. * config/bfin/t-bfin: Remove. * config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/bfin/t-bfin-linux: Likewise. * config/bfin/t-bfin-uclinux: Likewise. * config/c6x/lib1funcs.asm: Move to ../libgcc/config/c6x/lib1funcs.S. * config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/fr30/lib1funcs.asm: Move to ../libgcc/config/fr30/lib1funcs.S. * config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/frv/lib1funcs.asm: Move to ../libgcc/config/frv/lib1funcs.S. * config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/h8300/fixunssfsi.c: Update lib1funcs.asm filename. * config/h8300/lib1funcs.asm: Move to ../libgcc/config/h8300/lib1funcs.S. * config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S. * config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/i386/t-interix: Likewise. * config/ia64/lib1funcs.asm: Move to ../libgcc/config/ia64/lib1funcs.S. * config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove. * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove. * config/m32c/m32c.c: Update m32c-lib1.S filename. * config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S. * config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove. * config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S. * config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file. * config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S. * config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S. * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/mips/mips16.S: Move to ../libgcc/config/mips. * config/mips/t-libgcc-mips16: Remove. * config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove. * config/pa/milli64.S: Move to ../libgcc/config/pa. * config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove. * config/pa/t-linux64: Likewise. * config/picochip/libgccExtras/fake_libgcc.asm: Move to ../libgcc/config/picochip/lib1funcs.S. * config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove. * config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S. * config/sh/lib1funcs.h: Move to ../libgcc/config/sh. * config/sh/sh.h: Update lib1funcs.asm filename. * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove. * config/sh/t-netbsd: Likewise. * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Remove. * config/sh/t-sh64 (LIB1ASMFUNCS): Remove. * config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S. * config/sparc/lb1spl.asm: Remove. * config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/sparc/t-leon: Likewise. * config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove. * config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S. * config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove * config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S. * config/vax/t-linux: Remove. * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to ../libgcc/config/xtensa. * config/xtensa/lib1funcs.asm: Move to ../libgcc/config/xtensa/lib1funcs.S. * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file. (bfin*-*): Likewise. (mips64*-*-linux*, mipsisa64*-*-linux*): Remove mips/t-libgcc-mips16 from tmake_file. (mips*-*-linux*): Likewise. (mips*-sde-elf*): Likewise. (mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*) (mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*) (mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise. (mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise. (mips-*-elf*, mipsel-*-elf*): Likewise. (mips64-*-elf*, mips64el-*-elf*): Likewise. (mips64orion-*-elf*, mips64orionel-*-elf*): Likewise. (mips*-*-rtems*): Likewise. (mipstx39-*-elf*, mipstx39el-*-elf*): Likewise. (vax-*-linux*): Remove vax/t-linux from tmake_file. libgcc: * Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use $(srcdir) to refer to $(LIB1ASMSRC). Use $<. * config/arm/bpabi-v6m.S, config/arm/bpabi.S, config/arm/ieee754-df.S, config/arm/ieee754-sf.S, config/arm/lib1funcs.S: New files. * config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S. * config/arm/t-arm: New file. * config/arm/t-bpabi (LIB1ASMFUNCS): Set. * config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi, config/arm/t-strongarm-elf: New files. * config/arm/t-symbian (LIB1ASMFUNCS): Set. * config/arm/t-vxworks, config/arm/t-wince-pe: New files. * config/avr/lib1funcs.S: New file. * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/bfin/lib1funcs.S, config/bfin/t-bfin: New files. * config/c6x/lib1funcs.S: New file. * config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/fr30/lib1funcs.S, config/fr30/t-fr30: New files. * config/frv/lib1funcs.S: New file. * config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/h8300/lib1funcs.S, config/h8300/t-h8300: New files. * config/i386/cygwin.S, config/i386/t-chkstk: New files. * config/ia64/__divxf3.asm: Rename to ... * config/ia64/__divxf3.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_fixtfdi.asm: Rename to ... * config/ia64/_fixtfdi.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_fixunstfdi.asm: Rename to ... * config/ia64/_fixunstfdi.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_floatditf.asm: Rename to ... * config/ia64/_floatditf.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/lib1funcs.S: New file. * config/ia64/t-hpux (LIB1ASMFUNCS): Set. * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix. * config/m32c/lib1funcs.S, config/m32c/t-m32c: New files. * config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files. * config/mcore/lib1funcs.S, config/mcore/t-mcore: New files. * config/mep/lib1funcs.S: New file. * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/mips/mips16.S: New file. * config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/pa/milli64.S: New file. * config/pa/t-linux, config/pa/t-linux64: New files. * config/picochip/lib1funcs.S: New file. * config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files. * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set. * config/sh/t-netbsd: New file. * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set. Use $(srcdir) to refer to lib1funcs.S, adapt filename. * config/sh/t-sh64: New file. * config/sparc/lb1spc.S: New file. * config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm filename. * config/v850/lib1funcs.S, config/v850/t-v850: New files. * config/vax/lib1funcs.S, config/vax/t-linux: New files. * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S, config/xtensa/lib1funcs.S: New files. * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to tmake_file. (arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file. (arm*-*-netbsdelf*): Add arm/t-arm to tmake_file. (arm*-*-linux*): Likewise. Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for arm*-*-linux-*eabi, add arm/t-linux otherwise. (arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file. (arm*-*-ecos-elf): Likewise. (arm*-*-eabi*, arm*-*-symbianelf*): Likewise. (arm*-*-rtems*): Likewise. (arm*-*-elf): Likewise. (arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file. (avr-*-rtems*): Add to tmake_file, add avr/t-avr. (bfin*-elf*): Add bfin/t-bfin to tmake_file. (bfin*-uclinux*): Likewise. (bfin*-linux-uclibc*): Likewise. (bfin*-rtems*): Likewise. (bfin*-*): Likewise. (fido-*-elf): Merge into m68k-*-elf*. (fr30-*-elf)): Add fr30/t-fr30 to tmake_file. (frv-*-*linux*): Add frv/t-frv to tmake_file. (h8300-*-rtems*): Add h8300/t-h8300 to tmake_file. (h8300-*-elf*): Likewise. (hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file. (hppa*-*-linux*): Add pa/t-linux to tmake_file. (i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file. (i[34567]86-*-mingw*): Likewise. (x86_64-*-mingw*): Likewise. (i[34567]86-*-interix3*): Likewise. (ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file. (ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file. (m68k-*-elf*): Also handle fido-*-elf. Add m68k/t-floatlib to tmake_file. (m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file. (m68k-*-linux*): Likewise. (m68k-*-rtems*): Likewise. (mcore-*-elf): Add mcore/t-mcore to tmake_file. (sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for sh64*-*-*. (sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file. Add sh/t-sh64 to tmake_file for sh64*-*-linux*. (sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*) (sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh, sh/t-netbsd to tmake_file. Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*. (sh-*-rtems*): Add sh/t-sh to tmake_file. (sh-wrs-vxworks): Likewise. (sparc-*-linux*): Add sparc/t-softmul to tmake_file except for *-leon[3-9]*. (v850*-*-*): Add v850/t-v850 to tmake_file. (vax-*-linux*): Add vax/t-linux to tmake_file. (m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@180773 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgcc')
-rw-r--r--libgcc/ChangeLog120
-rw-r--r--libgcc/Makefile.in15
-rw-r--r--libgcc/config.host99
-rw-r--r--libgcc/config/arm/bpabi-v6m.S318
-rw-r--r--libgcc/config/arm/bpabi.S163
-rw-r--r--libgcc/config/arm/ieee754-df.S1447
-rw-r--r--libgcc/config/arm/ieee754-sf.S1060
-rw-r--r--libgcc/config/arm/lib1funcs.S1829
-rw-r--r--libgcc/config/arm/libunwind.S2
-rw-r--r--libgcc/config/arm/t-arm3
-rw-r--r--libgcc/config/arm/t-bpabi3
-rw-r--r--libgcc/config/arm/t-elf13
-rw-r--r--libgcc/config/arm/t-linux3
-rw-r--r--libgcc/config/arm/t-linux-eabi2
-rw-r--r--libgcc/config/arm/t-strongarm-elf1
-rw-r--r--libgcc/config/arm/t-symbian14
-rw-r--r--libgcc/config/arm/t-vxworks1
-rw-r--r--libgcc/config/arm/t-wince-pe1
-rw-r--r--libgcc/config/avr/lib1funcs.S1533
-rw-r--r--libgcc/config/avr/t-avr48
-rw-r--r--libgcc/config/bfin/lib1funcs.S211
-rw-r--r--libgcc/config/bfin/t-bfin3
-rw-r--r--libgcc/config/c6x/lib1funcs.S438
-rw-r--r--libgcc/config/c6x/t-elf5
-rw-r--r--libgcc/config/fr30/lib1funcs.S115
-rw-r--r--libgcc/config/fr30/t-fr302
-rw-r--r--libgcc/config/frv/lib1funcs.S269
-rw-r--r--libgcc/config/frv/t-frv3
-rw-r--r--libgcc/config/h8300/lib1funcs.S838
-rw-r--r--libgcc/config/h8300/t-h83003
-rw-r--r--libgcc/config/i386/cygwin.S188
-rw-r--r--libgcc/config/i386/t-chkstk2
-rw-r--r--libgcc/config/ia64/__divxf3.S (renamed from libgcc/config/ia64/__divxf3.asm)2
-rw-r--r--libgcc/config/ia64/_fixtfdi.S (renamed from libgcc/config/ia64/_fixtfdi.asm)2
-rw-r--r--libgcc/config/ia64/_fixunstfdi.S (renamed from libgcc/config/ia64/_fixunstfdi.asm)2
-rw-r--r--libgcc/config/ia64/_floatditf.S (renamed from libgcc/config/ia64/_floatditf.asm)2
-rw-r--r--libgcc/config/ia64/lib1funcs.S795
-rw-r--r--libgcc/config/ia64/t-hpux5
-rw-r--r--libgcc/config/ia64/t-ia6413
-rw-r--r--libgcc/config/ia64/t-softfp-compat2
-rw-r--r--libgcc/config/m32c/lib1funcs.S231
-rw-r--r--libgcc/config/m32c/t-m32c9
-rw-r--r--libgcc/config/m32r/initfini.c2
-rw-r--r--libgcc/config/m68k/lb1sf68.S4116
-rw-r--r--libgcc/config/m68k/t-floatlib5
-rw-r--r--libgcc/config/mcore/lib1funcs.S303
-rw-r--r--libgcc/config/mcore/t-mcore2
-rw-r--r--libgcc/config/mep/lib1funcs.S125
-rw-r--r--libgcc/config/mep/t-mep9
-rw-r--r--libgcc/config/mips/mips16.S712
-rw-r--r--libgcc/config/mips/t-mips1640
-rw-r--r--libgcc/config/pa/milli64.S2134
-rw-r--r--libgcc/config/pa/t-linux6
-rw-r--r--libgcc/config/pa/t-linux644
-rw-r--r--libgcc/config/picochip/lib1funcs.S4
-rw-r--r--libgcc/config/picochip/t-picochip7
-rw-r--r--libgcc/config/sh/lib1funcs.S3933
-rw-r--r--libgcc/config/sh/lib1funcs.h76
-rw-r--r--libgcc/config/sh/t-linux2
-rw-r--r--libgcc/config/sh/t-netbsd1
-rw-r--r--libgcc/config/sh/t-sh15
-rw-r--r--libgcc/config/sh/t-sh646
-rw-r--r--libgcc/config/sparc/lb1spc.S784
-rw-r--r--libgcc/config/sparc/t-softmul2
-rw-r--r--libgcc/config/v850/lib1funcs.S2330
-rw-r--r--libgcc/config/v850/t-v85060
-rw-r--r--libgcc/config/vax/lib1funcs.S92
-rw-r--r--libgcc/config/vax/t-linux2
-rw-r--r--libgcc/config/xtensa/ieee754-df.S2388
-rw-r--r--libgcc/config/xtensa/ieee754-sf.S1757
-rw-r--r--libgcc/config/xtensa/lib1funcs.S845
-rw-r--r--libgcc/config/xtensa/t-xtensa12
72 files changed, 29534 insertions, 55 deletions
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index b5d9c243a98..6b2514aba9a 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,5 +1,125 @@
2011-11-02 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
+ * Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use
+ $(srcdir) to refer to $(LIB1ASMSRC).
+ Use $<.
+ * config/arm/bpabi-v6m.S, config/arm/bpabi.S,
+ config/arm/ieee754-df.S, config/arm/ieee754-sf.S,
+ config/arm/lib1funcs.S: New files.
+ * config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S.
+ * config/arm/t-arm: New file.
+ * config/arm/t-bpabi (LIB1ASMFUNCS): Set.
+ * config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi,
+ config/arm/t-strongarm-elf: New files.
+ * config/arm/t-symbian (LIB1ASMFUNCS): Set.
+ * config/arm/t-vxworks, config/arm/t-wince-pe: New files.
+ * config/avr/lib1funcs.S: New file.
+ * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+ * config/bfin/lib1funcs.S, config/bfin/t-bfin: New files.
+ * config/c6x/lib1funcs.S: New file.
+ * config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+ * config/fr30/lib1funcs.S, config/fr30/t-fr30: New files.
+ * config/frv/lib1funcs.S: New file.
+ * config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+ * config/h8300/lib1funcs.S, config/h8300/t-h8300: New files.
+ * config/i386/cygwin.S, config/i386/t-chkstk: New files.
+ * config/ia64/__divxf3.asm: Rename to ...
+ * config/ia64/__divxf3.S: ... this.
+ Adapt lib1funcs.asm filename.
+ * config/ia64/_fixtfdi.asm: Rename to ...
+ * config/ia64/_fixtfdi.S: ... this.
+ Adapt lib1funcs.asm filename.
+ * config/ia64/_fixunstfdi.asm: Rename to ...
+ * config/ia64/_fixunstfdi.S: ... this.
+ Adapt lib1funcs.asm filename.
+ * config/ia64/_floatditf.asm: Rename to ...
+ * config/ia64/_floatditf.S: ... this.
+ Adapt lib1funcs.asm filename.
+ * config/ia64/lib1funcs.S: New file.
+ * config/ia64/t-hpux (LIB1ASMFUNCS): Set.
+ * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+ * config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix.
+ * config/m32c/lib1funcs.S, config/m32c/t-m32c: New files.
+ * config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files.
+ * config/mcore/lib1funcs.S, config/mcore/t-mcore: New files.
+ * config/mep/lib1funcs.S: New file.
+ * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+ * config/mips/mips16.S: New file.
+ * config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+ * config/pa/milli64.S: New file.
+ * config/pa/t-linux, config/pa/t-linux64: New files.
+ * config/picochip/lib1funcs.S: New file.
+ * config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+ * config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files.
+ * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set.
+ * config/sh/t-netbsd: New file.
+ * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set.
+ Use $(srcdir) to refer to lib1funcs.S, adapt filename.
+ * config/sh/t-sh64: New file.
+ * config/sparc/lb1spc.S: New file.
+ * config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm
+ filename.
+ * config/v850/lib1funcs.S, config/v850/t-v850: New files.
+ * config/vax/lib1funcs.S, config/vax/t-linux: New files.
+ * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S,
+ config/xtensa/lib1funcs.S: New files.
+ * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set.
+ * config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to
+ tmake_file.
+ (arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file.
+ (arm*-*-netbsdelf*): Add arm/t-arm to tmake_file.
+ (arm*-*-linux*): Likewise.
+ Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for
+ arm*-*-linux-*eabi, add arm/t-linux otherwise.
+ (arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file.
+ (arm*-*-ecos-elf): Likewise.
+ (arm*-*-eabi*, arm*-*-symbianelf*): Likewise.
+ (arm*-*-rtems*): Likewise.
+ (arm*-*-elf): Likewise.
+ (arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file.
+ (avr-*-rtems*): Add to tmake_file, add avr/t-avr.
+ (bfin*-elf*): Add bfin/t-bfin to tmake_file.
+ (bfin*-uclinux*): Likewise.
+ (bfin*-linux-uclibc*): Likewise.
+ (bfin*-rtems*): Likewise.
+ (bfin*-*): Likewise.
+ (fido-*-elf): Merge into m68k-*-elf*.
+ (fr30-*-elf)): Add fr30/t-fr30 to tmake_file.
+ (frv-*-*linux*): Add frv/t-frv to tmake_file.
+ (h8300-*-rtems*): Add h8300/t-h8300 to tmake_file.
+ (h8300-*-elf*): Likewise.
+ (hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file.
+ (hppa*-*-linux*): Add pa/t-linux to tmake_file.
+ (i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file.
+ (i[34567]86-*-mingw*): Likewise.
+ (x86_64-*-mingw*): Likewise.
+ (i[34567]86-*-interix3*): Likewise.
+ (ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file.
+ (ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file.
+ (m68k-*-elf*): Also handle fido-*-elf.
+ Add m68k/t-floatlib to tmake_file.
+ (m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file.
+ (m68k-*-linux*): Likewise.
+ (m68k-*-rtems*): Likewise.
+ (mcore-*-elf): Add mcore/t-mcore to tmake_file.
+ (sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for
+ sh64*-*-*.
+ (sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file.
+ Add sh/t-sh64 to tmake_file for sh64*-*-linux*.
+ (sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*)
+ (sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh,
+ sh/t-netbsd to tmake_file.
+ Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*.
+ (sh-*-rtems*): Add sh/t-sh to tmake_file.
+ (sh-wrs-vxworks): Likewise.
+ (sparc-*-linux*): Add sparc/t-softmul to tmake_file except for
+ *-leon[3-9]*.
+ (v850*-*-*): Add v850/t-v850 to tmake_file.
+ (vax-*-linux*): Add vax/t-linux to tmake_file.
+ (m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file.
+
+2011-11-02 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
+
* crtstuff.c: New file.
* Makefile.in (CRTSTUFF_CFLAGS): Define.
(CRTSTUFF_T_CFLAGS): Define.
diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index 467901b057a..6bbb369f8e8 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -394,25 +394,22 @@ LIB2_DIVMOD_FUNCS := $(filter-out $(LIB2FUNCS_EXCLUDE) $(LIB1ASMFUNCS), \
ifeq ($(enable_shared),yes)
lib1asmfuncs-o = $(patsubst %,%$(objext),$(LIB1ASMFUNCS))
-$(lib1asmfuncs-o): %$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC) %.vis
- $(gcc_compile) -DL$* -xassembler-with-cpp \
- -c $(gcc_srcdir)/config/$(LIB1ASMSRC) -include $*.vis
+$(lib1asmfuncs-o): %$(objext): $(srcdir)/config/$(LIB1ASMSRC) %.vis
+ $(gcc_compile) -DL$* -xassembler-with-cpp -c $< -include $*.vis
$(patsubst %,%.vis,$(LIB1ASMFUNCS)): %.vis: %_s$(objext)
$(gen-hide-list)
libgcc-objects += $(lib1asmfuncs-o)
lib1asmfuncs-s-o = $(patsubst %,%_s$(objext),$(LIB1ASMFUNCS))
-$(lib1asmfuncs-s-o): %_s$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC)
- $(gcc_s_compile) -DL$* -xassembler-with-cpp \
- -c $(gcc_srcdir)/config/$(LIB1ASMSRC)
+$(lib1asmfuncs-s-o): %_s$(objext): $(srcdir)/config/$(LIB1ASMSRC)
+ $(gcc_s_compile) -DL$* -xassembler-with-cpp -c $<
libgcc-s-objects += $(lib1asmfuncs-s-o)
else
lib1asmfuncs-o = $(patsubst %,%$(objext),$(LIB1ASMFUNCS))
-$(lib1asmfuncs-o): %$(objext): $(gcc_srcdir)/config/$(LIB1ASMSRC)
- $(gcc_compile) -DL$* -xassembler-with-cpp \
- -c $(gcc_srcdir)/config/$(LIB1ASMSRC)
+$(lib1asmfuncs-o): %$(objext): $(srcdir)/config/$(LIB1ASMSRC)
+ $(gcc_compile) -DL$* -xassembler-with-cpp -c $<
libgcc-objects += $(lib1asmfuncs-o)
endif
diff --git a/libgcc/config.host b/libgcc/config.host
index 01e2f21a797..0a05ea184b0 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -306,22 +306,25 @@ alpha*-dec-*vms*)
md_unwind_header=alpha/vms-unwind.h
;;
arm-wrs-vxworks)
- tmake_file="$tmake_file t-fdpbit"
+ tmake_file="$tmake_file arm/t-arm arm/t-vxworks t-fdpbit"
extra_parts="$extra_parts crti.o crtn.o"
;;
arm*-*-freebsd*)
- tmake_file="$tmake_file t-fdpbit"
+ tmake_file="$tmake_file arm/t-arm arm/t-strongarm-elf t-fdpbit"
;;
arm*-*-netbsdelf*)
- tmake_file="$tmake_file t-slibgcc-gld-nover"
+ tmake_file="$tmake_file arm/t-arm t-slibgcc-gld-nover"
;;
arm*-*-linux*) # ARM GNU/Linux with ELF
- tmake_file="${tmake_file} t-fixedpoint-gnu-prefix"
+ tmake_file="${tmake_file} arm/t-arm t-fixedpoint-gnu-prefix"
case ${host} in
arm*-*-linux-*eabi)
- tmake_file="${tmake_file} arm/t-bpabi t-slibgcc-libgcc"
+ tmake_file="${tmake_file} arm/t-elf arm/t-bpabi arm/t-linux-eabi t-slibgcc-libgcc"
unwind_header=config/arm/unwind-arm.h
;;
+ *)
+ tmake_file="$tmake_file arm/t-linux"
+ ;;
esac
tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
;;
@@ -333,15 +336,15 @@ arm*-*-uclinux*) # ARM ucLinux
unwind_header=config/arm/unwind-arm.h
;;
esac
- tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+ tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
extra_parts="$extra_parts crti.o crtn.o"
;;
arm*-*-ecos-elf)
- tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+ tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
extra_parts="$extra_parts crti.o crtn.o"
;;
arm*-*-eabi* | arm*-*-symbianelf* )
- tmake_file="${tmake_file} t-fixedpoint-gnu-prefix"
+ tmake_file="${tmake_file} arm/t-arm arm/t-elf t-fixedpoint-gnu-prefix"
case ${host} in
arm*-*-eabi*)
tmake_file="${tmake_file} arm/t-bpabi"
@@ -356,17 +359,18 @@ arm*-*-eabi* | arm*-*-symbianelf* )
unwind_header=config/arm/unwind-arm.h
;;
arm*-*-rtems*)
- tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+ tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
extra_parts="$extra_parts crti.o crtn.o"
;;
arm*-*-elf)
- tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+ tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
extra_parts="$extra_parts crti.o crtn.o"
;;
arm*-wince-pe*)
+ tmake_file="$tmake_file arm/t-arm arm/t-wince-pe"
;;
avr-*-rtems*)
- tmake_file=t-fpbit
+ tmake_file="$tmake_file avr/t-avr t-fpbit"
# Don't use default.
extra_parts=
;;
@@ -375,27 +379,27 @@ avr-*-*)
tmake_file="${cpu_type}/t-avr t-fpbit"
;;
bfin*-elf*)
- tmake_file="bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
+ tmake_file="bfin/t-bfin bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
extra_parts="$extra_parts crtbeginS.o crtendS.o crti.o crtn.o crtlibid.o"
;;
bfin*-uclinux*)
- tmake_file="bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
+ tmake_file="bfin/t-bfin bfin/t-crtlibid bfin/t-crtstuff t-fdpbit"
extra_parts="$extra_parts crtbeginS.o crtendS.o crtlibid.o"
md_unwind_header=bfin/linux-unwind.h
;;
bfin*-linux-uclibc*)
- tmake_file="$tmake_file bfin/t-crtstuff t-fdpbit bfin/t-linux"
+ tmake_file="$tmake_file bfin/t-bfin bfin/t-crtstuff t-fdpbit bfin/t-linux"
# No need to build crtbeginT.o on uClibc systems. Should probably
# be moved to the OS specific section above.
extra_parts="crtbegin.o crtbeginS.o crtend.o crtendS.o"
md_unwind_header=bfin/linux-unwind.h
;;
bfin*-rtems*)
- tmake_file="$tmake_file t-fdpbit"
+ tmake_file="$tmake_file bfin/t-bfin t-fdpbit"
extra_parts="$extra_parts crti.o crtn.o"
;;
bfin*-*)
- tmake_file="$tmake_file t-fdpbit"
+ tmake_file="$tmake_file bfin/t-bfin t-fdpbit"
extra_parts="crtbegin.o crtend.o crti.o crtn.o"
;;
crisv32-*-elf)
@@ -415,10 +419,8 @@ cris-*-none)
cris-*-linux* | crisv32-*-linux*)
tmake_file="$tmake_file t-fdpbit cris/t-linux"
;;
-fido-*-elf)
- ;;
fr30-*-elf)
- tmake_file="$tmake_file t-fdpbit"
+ tmake_file="$tmake_file fr30/t-fr30 t-fdpbit"
extra_parts="$extra_parts crti.o crtn.o"
;;
frv-*-elf)
@@ -427,20 +429,21 @@ frv-*-elf)
extra_parts="frvbegin.o frvend.o"
;;
frv-*-*linux*)
- tmake_file="$tmake_file t-fdpbit frv/t-linux"
+ tmake_file="$tmake_file frv/t-frv frv/t-linux t-fdpbit"
;;
h8300-*-rtems*)
- tmake_file="$tmake_file t-fpbit"
+ tmake_file="$tmake_file h8300/t-h8300 t-fpbit"
extra_parts="$extra_parts crti.o crtn.o"
;;
h8300-*-elf*)
- tmake_file="$tmake_file t-fpbit"
+ tmake_file="$tmake_file h8300/t-h8300 t-fpbit"
extra_parts="$extra_parts crti.o crtn.o"
;;
hppa*64*-*-linux*)
+ tmake_file="$tmake_file pa/t-linux pa/t-linux64"
;;
hppa*-*-linux*)
- tmake_file="$tmake_file t-slibgcc-libgcc"
+ tmake_file="$tmake_file pa/t-linux t-slibgcc-libgcc"
# Set the libgcc version number
if test x$enable_sjlj_exceptions = xyes; then
tmake_file="$tmake_file pa/t-slibgcc-sjlj-ver"
@@ -565,7 +568,7 @@ i[34567]86-*-cygwin*)
else
tmake_dlldir_file="i386/t-dlldir-x"
fi
- tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-cygwin i386/t-crtfm t-dfprules"
+ tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-cygwin i386/t-crtfm i386/t-chkstk t-dfprules"
case ${target_thread_file} in
posix)
tmake_file="i386/t-mingw-pthread $tmake_file"
@@ -586,7 +589,7 @@ i[34567]86-*-mingw*)
else
tmake_dlldir_file="i386/t-dlldir-x"
fi
- tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-mingw32 i386/t-crtfm t-dfprules"
+ tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-cygming i386/t-mingw32 i386/t-crtfm i386/t-chkstk t-dfprules"
md_unwind_header=i386/w32-unwind.h
;;
x86_64-*-mingw*)
@@ -602,10 +605,11 @@ x86_64-*-mingw*)
else
tmake_dlldir_file="i386/t-dlldir-x"
fi
- tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-mingw32 t-dfprules i386/t-crtfm"
+ tmake_file="${tmake_file} ${tmake_eh_file} ${tmake_dlldir_file} i386/t-slibgcc-cygming i386/t-mingw32 t-dfprules i386/t-crtfm i386/t-chkstk"
extra_parts="$extra_parts crtfastmath.o"
;;
i[34567]86-*-interix3*)
+ tmake_file="$tmake_file i386/t-chkstk"
;;
ia64*-*-elf*)
extra_parts="$extra_parts crtbeginS.o crtendS.o crtfastmath.o"
@@ -625,10 +629,10 @@ ia64*-*-linux*)
md_unwind_header=ia64/linux-unwind.h
;;
ia64*-*-hpux*)
- tmake_file="ia64/t-hpux t-slibgcc ia64/t-slibgcc-hpux t-slibgcc-hpux"
+ tmake_file="ia64/t-ia64 ia64/t-hpux t-slibgcc ia64/t-slibgcc-hpux t-slibgcc-hpux"
;;
ia64-hp-*vms*)
- tmake_file="$tmake_file ia64/t-eh-ia64 ia64/t-vms t-slibgcc-vms"
+ tmake_file="$tmake_file ia64/t-ia64 ia64/t-eh-ia64 ia64/t-vms t-slibgcc-vms"
extra_parts="$extra_parts crtinitS.o"
md_unwind_header=ia64/vms-unwind.h
;;
@@ -660,18 +664,21 @@ m32r-*-linux*)
m32rle-*-linux*)
tmake_file="$tmake_file m32r/t-linux t-fdpbit"
;;
-m68k-*-elf*)
+m68k-*-elf* | fido-*-elf)
+ tmake_file="$tmake_file m68k/t-floatlib"
;;
m68k*-*-netbsdelf*)
;;
m68k*-*-openbsd*)
;;
m68k-*-uclinux*) # Motorola m68k/ColdFire running uClinux with uClibc
+ tmake_file="$tmake_file m68k/t-floatlib"
md_unwind_header=m68k/linux-unwind.h
;;
m68k-*-linux*) # Motorola m68k's running GNU/Linux
# with ELF format using glibc 2
# aka the GNU/Linux C library 6.
+ tmake_file="$tmake_file m68k/t-floatlib"
# If not configured with --enable-sjlj-exceptions, bump the
# libgcc version number.
if test x$enable_sjlj_exceptions != xyes; then
@@ -680,10 +687,11 @@ m68k-*-linux*) # Motorola m68k's running GNU/Linux
md_unwind_header=m68k/linux-unwind.h
;;
m68k-*-rtems*)
+ tmake_file="$tmake_file m68k/t-floatlib"
extra_parts="$extra_parts crti.o crtn.o"
;;
mcore-*-elf)
- tmake_file=t-fdpbit
+ tmake_file="mcore/t-mcore t-fdpbit"
extra_parts="$extra_parts crti.o crtn.o"
;;
microblaze*-linux*)
@@ -905,6 +913,10 @@ sh-*-elf* | sh[12346l]*-*-elf*)
libic_invalidate_array_4-200.a \
libic_invalidate_array_4a.a \
libgcc-Os-4-200.a libgcc-4-300.a"
+ case ${host} in sh64*-*-*)
+ tmake_file="$tmake_file sh/t-sh64"
+ ;;
+ esac
case ${host} in
sh*-superh-elf)
tmake_file="$tmake_file sh/t-superh"
@@ -913,23 +925,33 @@ sh-*-elf* | sh[12346l]*-*-elf*)
esac
;;
sh-*-linux* | sh[2346lbe]*-*-linux*)
- tmake_file="${tmake_file} t-slibgcc-libgcc sh/t-linux t-fdpbit"
+ tmake_file="${tmake_file} sh/t-sh t-slibgcc-libgcc sh/t-linux t-fdpbit"
+ case ${host} in sh64*-*-linux*)
+ tmake_file="$tmake_file sh/t-sh64"
+ ;;
+ esac
md_unwind_header=sh/linux-unwind.h
;;
sh-*-netbsdelf* | shl*-*-netbsdelf* | sh5-*-netbsd* | sh5l*-*-netbsd* | \
sh64-*-netbsd* | sh64l*-*-netbsd*)
+ tmake_file="$tmake_file sh/t-sh sh/t-netbsd"
+ case ${host} in
+ sh5*-*-netbsd* | sh64*-netbsd*)
+ tmake_file="$tmake_file sh/t-sh64"
+ ;;
+ esac
# NetBSD's C library includes a fast software FP library that
# has support for setting/setting the rounding mode, exception
# mask, etc. Therefore, we don't want to include software FP
# in libgcc.
;;
sh-*-rtems*)
- tmake_file="$tmake_file t-crtstuff-pic t-fdpbit"
+ tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit"
extra_parts="$extra_parts crt1.o crti.o crtn.o crtbeginS.o crtendS.o \
$sh_ic_extra_parts $sh_opt_extra_parts"
;;
sh-wrs-vxworks)
- tmake_file="$tmake_file t-crtstuff-pic t-fdpbit"
+ tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit"
;;
sparc-*-netbsdelf*)
;;
@@ -956,6 +978,13 @@ sparc-*-linux*) # SPARC's running GNU/Linux, libc6
tmake_file="${tmake_file} sparc/t-linux"
;;
esac
+ case ${host} in
+ *-leon[3-9]*)
+ ;;
+ *)
+ tmake_file="$tmake_file sparc/t-softmul"
+ ;;
+ esac
extra_parts="$extra_parts crtfastmath.o"
md_unwind_header=sparc/linux-unwind.h
;;
@@ -1007,9 +1036,10 @@ tic6x-*-elf)
unwind_header=config/c6x/unwind-c6x.h
;;
v850*-*-*)
- tmake_file=t-fdpbit
+ tmake_file="v850/t-v850 t-fdpbit"
;;
vax-*-linux*)
+ tmake_file="$tmake_file vax/t-linux"
;;
vax-*-netbsdelf*)
;;
@@ -1032,6 +1062,7 @@ am33_2.0-*-linux*)
tmake_file="$tmake_file t-fdpbit"
;;
m32c-*-elf*|m32c-*-rtems*)
+ tmake_file="$tmake_file m32c/t-m32c"
;;
mep*-*-*)
tmake_file="mep/t-mep t-fdpbit"
diff --git a/libgcc/config/arm/bpabi-v6m.S b/libgcc/config/arm/bpabi-v6m.S
new file mode 100644
index 00000000000..4ecea6da5a6
--- /dev/null
+++ b/libgcc/config/arm/bpabi-v6m.S
@@ -0,0 +1,318 @@
+/* Miscellaneous BPABI functions. ARMv6M implementation
+
+ Copyright (C) 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
+ Contributed by CodeSourcery.
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 3, or (at your option) any
+ later version.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file. */
+ /* Tag_ABI_align_needed: This code does not require 8-byte
+ alignment from the caller. */
+ /* .eabi_attribute 24, 0 -- default setting. */
+ /* Tag_ABI_align_preserved: This code preserves 8-byte
+ alignment in any callee. */
+ .eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#ifdef L_aeabi_lcmp
+
+FUNC_START aeabi_lcmp
+ cmp xxh, yyh
+ beq 1f
+ bgt 2f
+ mov r0, #1
+ neg r0, r0
+ RET
+2:
+ mov r0, #1
+ RET
+1:
+ sub r0, xxl, yyl
+ beq 1f
+ bhi 2f
+ mov r0, #1
+ neg r0, r0
+ RET
+2:
+ mov r0, #1
+1:
+ RET
+ FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+
+#ifdef L_aeabi_ulcmp
+
+FUNC_START aeabi_ulcmp
+ cmp xxh, yyh
+ bne 1f
+ sub r0, xxl, yyl
+ beq 2f
+1:
+ bcs 1f
+ mov r0, #1
+ neg r0, r0
+ RET
+1:
+ mov r0, #1
+2:
+ RET
+ FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+.macro test_div_by_zero signed
+ cmp yyh, #0
+ bne 7f
+ cmp yyl, #0
+ bne 7f
+ cmp xxh, #0
+ bne 2f
+ cmp xxl, #0
+2:
+ .ifc \signed, unsigned
+ beq 3f
+ mov xxh, #0
+ mvn xxh, xxh @ 0xffffffff
+ mov xxl, xxh
+3:
+ .else
+ beq 5f
+ blt 6f
+ mov xxl, #0
+ mvn xxl, xxl @ 0xffffffff
+ lsr xxh, xxl, #1 @ 0x7fffffff
+ b 5f
+6: mov xxh, #0x80
+ lsl xxh, xxh, #24 @ 0x80000000
+ mov xxl, #0
+5:
+ .endif
+ @ tailcalls are tricky on v6-m.
+ push {r0, r1, r2}
+ ldr r0, 1f
+ adr r1, 1f
+ add r0, r1
+ str r0, [sp, #8]
+ @ We know we are not on armv4t, so pop pc is safe.
+ pop {r0, r1, pc}
+ .align 2
+1:
+ .word __aeabi_ldiv0 - 1b
+7:
+.endm
+
+#ifdef L_aeabi_ldivmod
+
+FUNC_START aeabi_ldivmod
+ test_div_by_zero signed
+
+ push {r0, r1}
+ mov r0, sp
+ push {r0, lr}
+ ldr r0, [sp, #8]
+ bl SYM(__gnu_ldivmod_helper)
+ ldr r3, [sp, #4]
+ mov lr, r3
+ add sp, sp, #8
+ pop {r2, r3}
+ RET
+ FUNC_END aeabi_ldivmod
+
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+FUNC_START aeabi_uldivmod
+ test_div_by_zero unsigned
+
+ push {r0, r1}
+ mov r0, sp
+ push {r0, lr}
+ ldr r0, [sp, #8]
+ bl SYM(__gnu_uldivmod_helper)
+ ldr r3, [sp, #4]
+ mov lr, r3
+ add sp, sp, #8
+ pop {r2, r3}
+ RET
+ FUNC_END aeabi_uldivmod
+
+#endif /* L_aeabi_uldivmod */
+
+#ifdef L_arm_addsubsf3
+
+FUNC_START aeabi_frsub
+
+ push {r4, lr}
+ mov r4, #1
+ lsl r4, #31
+ eor r0, r0, r4
+ bl __aeabi_fadd
+ pop {r4, pc}
+
+ FUNC_END aeabi_frsub
+
+#endif /* L_arm_addsubsf3 */
+
+#ifdef L_arm_cmpsf2
+
+FUNC_START aeabi_cfrcmple
+
+ mov ip, r0
+ mov r0, r1
+ mov r1, ip
+ b 6f
+
+FUNC_START aeabi_cfcmpeq
+FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+ @ The status-returning routines are required to preserve all
+ @ registers except ip, lr, and cpsr.
+6: push {r0, r1, r2, r3, r4, lr}
+ bl __lesf2
+ @ Set the Z flag correctly, and the C flag unconditionally.
+ cmp r0, #0
+ @ Clear the C flag if the return value was -1, indicating
+ @ that the first operand was smaller than the second.
+ bmi 1f
+ mov r1, #0
+ cmn r0, r1
+1:
+ pop {r0, r1, r2, r3, r4, pc}
+
+ FUNC_END aeabi_cfcmple
+ FUNC_END aeabi_cfcmpeq
+ FUNC_END aeabi_cfrcmple
+
+FUNC_START aeabi_fcmpeq
+
+ push {r4, lr}
+ bl __eqsf2
+ neg r0, r0
+ add r0, r0, #1
+ pop {r4, pc}
+
+ FUNC_END aeabi_fcmpeq
+
+.macro COMPARISON cond, helper, mode=sf2
+FUNC_START aeabi_fcmp\cond
+
+ push {r4, lr}
+ bl __\helper\mode
+ cmp r0, #0
+ b\cond 1f
+ mov r0, #0
+ pop {r4, pc}
+1:
+ mov r0, #1
+ pop {r4, pc}
+
+ FUNC_END aeabi_fcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpsf2 */
+
+#ifdef L_arm_addsubdf3
+
+FUNC_START aeabi_drsub
+
+ push {r4, lr}
+ mov r4, #1
+ lsl r4, #31
+ eor xxh, xxh, r4
+ bl __aeabi_dadd
+ pop {r4, pc}
+
+ FUNC_END aeabi_drsub
+
+#endif /* L_arm_addsubdf3 */
+
+#ifdef L_arm_cmpdf2
+
+FUNC_START aeabi_cdrcmple
+
+ mov ip, r0
+ mov r0, r2
+ mov r2, ip
+ mov ip, r1
+ mov r1, r3
+ mov r3, ip
+ b 6f
+
+FUNC_START aeabi_cdcmpeq
+FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+ @ The status-returning routines are required to preserve all
+ @ registers except ip, lr, and cpsr.
+6: push {r0, r1, r2, r3, r4, lr}
+ bl __ledf2
+ @ Set the Z flag correctly, and the C flag unconditionally.
+ cmp r0, #0
+ @ Clear the C flag if the return value was -1, indicating
+ @ that the first operand was smaller than the second.
+ bmi 1f
+ mov r1, #0
+ cmn r0, r1
+1:
+ pop {r0, r1, r2, r3, r4, pc}
+
+ FUNC_END aeabi_cdcmple
+ FUNC_END aeabi_cdcmpeq
+ FUNC_END aeabi_cdrcmple
+
+FUNC_START aeabi_dcmpeq
+
+ push {r4, lr}
+ bl __eqdf2
+ neg r0, r0
+ add r0, r0, #1
+ pop {r4, pc}
+
+ FUNC_END aeabi_dcmpeq
+
+.macro COMPARISON cond, helper, mode=df2
+FUNC_START aeabi_dcmp\cond
+
+ push {r4, lr}
+ bl __\helper\mode
+ cmp r0, #0
+ b\cond 1f
+ mov r0, #0
+ pop {r4, pc}
+1:
+ mov r0, #1
+ pop {r4, pc}
+
+ FUNC_END aeabi_dcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpdf2 */
diff --git a/libgcc/config/arm/bpabi.S b/libgcc/config/arm/bpabi.S
new file mode 100644
index 00000000000..2ff338927fa
--- /dev/null
+++ b/libgcc/config/arm/bpabi.S
@@ -0,0 +1,163 @@
+/* Miscellaneous BPABI functions.
+
+ Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010
+ Free Software Foundation, Inc.
+ Contributed by CodeSourcery, LLC.
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 3, or (at your option) any
+ later version.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file. */
+ /* Tag_ABI_align_needed: This code does not require 8-byte
+ alignment from the caller. */
+ /* .eabi_attribute 24, 0 -- default setting. */
+ /* Tag_ABI_align_preserved: This code preserves 8-byte
+ alignment in any callee. */
+ .eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#ifdef L_aeabi_lcmp
+
+ARM_FUNC_START aeabi_lcmp
+ cmp xxh, yyh
+ do_it lt
+ movlt r0, #-1
+ do_it gt
+ movgt r0, #1
+ do_it ne
+ RETc(ne)
+ subs r0, xxl, yyl
+ do_it lo
+ movlo r0, #-1
+ do_it hi
+ movhi r0, #1
+ RET
+ FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+
+#ifdef L_aeabi_ulcmp
+
+ARM_FUNC_START aeabi_ulcmp
+ cmp xxh, yyh
+ do_it lo
+ movlo r0, #-1
+ do_it hi
+ movhi r0, #1
+ do_it ne
+ RETc(ne)
+ cmp xxl, yyl
+ do_it lo
+ movlo r0, #-1
+ do_it hi
+ movhi r0, #1
+ do_it eq
+ moveq r0, #0
+ RET
+ FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+.macro test_div_by_zero signed
+/* Tail-call to divide-by-zero handlers which may be overridden by the user,
+ so unwinding works properly. */
+#if defined(__thumb2__)
+ cbnz yyh, 1f
+ cbnz yyl, 1f
+ cmp xxh, #0
+ do_it eq
+ cmpeq xxl, #0
+ .ifc \signed, unsigned
+ beq 2f
+ mov xxh, #0xffffffff
+ mov xxl, xxh
+2:
+ .else
+ do_it lt, t
+ movlt xxl, #0
+ movlt xxh, #0x80000000
+ do_it gt, t
+ movgt xxh, #0x7fffffff
+ movgt xxl, #0xffffffff
+ .endif
+ b SYM (__aeabi_ldiv0) __PLT__
+1:
+#else
+ /* Note: Thumb-1 code calls via an ARM shim on processors which
+ support ARM mode. */
+ cmp yyh, #0
+ cmpeq yyl, #0
+ bne 2f
+ cmp xxh, #0
+ cmpeq xxl, #0
+ .ifc \signed, unsigned
+ movne xxh, #0xffffffff
+ movne xxl, #0xffffffff
+ .else
+ movlt xxh, #0x80000000
+ movlt xxl, #0
+ movgt xxh, #0x7fffffff
+ movgt xxl, #0xffffffff
+ .endif
+ b SYM (__aeabi_ldiv0) __PLT__
+2:
+#endif
+.endm
+
+#ifdef L_aeabi_ldivmod
+
+ARM_FUNC_START aeabi_ldivmod
+ test_div_by_zero signed
+
+ sub sp, sp, #8
+#if defined(__thumb2__)
+ mov ip, sp
+ push {ip, lr}
+#else
+ do_push {sp, lr}
+#endif
+ bl SYM(__gnu_ldivmod_helper) __PLT__
+ ldr lr, [sp, #4]
+ add sp, sp, #8
+ do_pop {r2, r3}
+ RET
+
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+ARM_FUNC_START aeabi_uldivmod
+ test_div_by_zero unsigned
+
+ sub sp, sp, #8
+#if defined(__thumb2__)
+ mov ip, sp
+ push {ip, lr}
+#else
+ do_push {sp, lr}
+#endif
+ bl SYM(__gnu_uldivmod_helper) __PLT__
+ ldr lr, [sp, #4]
+ add sp, sp, #8
+ do_pop {r2, r3}
+ RET
+
+#endif /* L_aeabi_divmod */
+
diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S
new file mode 100644
index 00000000000..eb0c38632d0
--- /dev/null
+++ b/libgcc/config/arm/ieee754-df.S
@@ -0,0 +1,1447 @@
+/* ieee754-df.S double-precision floating point support for ARM
+
+ Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+ Contributed by Nicolas Pitre (nico@cam.org)
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 3, or (at your option) any
+ later version.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/*
+ * Notes:
+ *
+ * The goal of this code is to be as fast as possible. This is
+ * not meant to be easy to understand for the casual reader.
+ * For slightly simpler code please see the single precision version
+ * of this file.
+ *
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+
+@ For FPA, float words are always big-endian.
+@ For VFP, floats words follow the memory system mode.
+#if defined(__VFP_FP__) && !defined(__ARMEB__)
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#else
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#endif
+
+
+#ifdef L_arm_negdf2
+
+ARM_FUNC_START negdf2
+ARM_FUNC_ALIAS aeabi_dneg negdf2
+
+ @ flip sign bit
+ eor xh, xh, #0x80000000
+ RET
+
+ FUNC_END aeabi_dneg
+ FUNC_END negdf2
+
+#endif
+
+#ifdef L_arm_addsubdf3
+
+ARM_FUNC_START aeabi_drsub
+
+ eor xh, xh, #0x80000000 @ flip sign bit of first arg
+ b 1f
+
+ARM_FUNC_START subdf3
+ARM_FUNC_ALIAS aeabi_dsub subdf3
+
+ eor yh, yh, #0x80000000 @ flip sign bit of second arg
+#if defined(__INTERWORKING_STUBS__)
+ b 1f @ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START adddf3
+ARM_FUNC_ALIAS aeabi_dadd adddf3
+
+1: do_push {r4, r5, lr}
+
+ @ Look for zeroes, equal values, INF, or NAN.
+ shift1 lsl, r4, xh, #1
+ shift1 lsl, r5, yh, #1
+ teq r4, r5
+ do_it eq
+ teqeq xl, yl
+ do_it ne, ttt
+ COND(orr,s,ne) ip, r4, xl
+ COND(orr,s,ne) ip, r5, yl
+ COND(mvn,s,ne) ip, r4, asr #21
+ COND(mvn,s,ne) ip, r5, asr #21
+ beq LSYM(Lad_s)
+
+ @ Compute exponent difference. Make largest exponent in r4,
+ @ corresponding arg in xh-xl, and positive exponent difference in r5.
+ shift1 lsr, r4, r4, #21
+ rsbs r5, r4, r5, lsr #21
+ do_it lt
+ rsblt r5, r5, #0
+ ble 1f
+ add r4, r4, r5
+ eor yl, xl, yl
+ eor yh, xh, yh
+ eor xl, yl, xl
+ eor xh, yh, xh
+ eor yl, xl, yl
+ eor yh, xh, yh
+1:
+ @ If exponent difference is too large, return largest argument
+ @ already in xh-xl. We need up to 54 bit to handle proper rounding
+ @ of 0x1p54 - 1.1.
+ cmp r5, #54
+ do_it hi
+ RETLDM "r4, r5" hi
+
+ @ Convert mantissa to signed integer.
+ tst xh, #0x80000000
+ mov xh, xh, lsl #12
+ mov ip, #0x00100000
+ orr xh, ip, xh, lsr #12
+ beq 1f
+#if defined(__thumb2__)
+ negs xl, xl
+ sbc xh, xh, xh, lsl #1
+#else
+ rsbs xl, xl, #0
+ rsc xh, xh, #0
+#endif
+1:
+ tst yh, #0x80000000
+ mov yh, yh, lsl #12
+ orr yh, ip, yh, lsr #12
+ beq 1f
+#if defined(__thumb2__)
+ negs yl, yl
+ sbc yh, yh, yh, lsl #1
+#else
+ rsbs yl, yl, #0
+ rsc yh, yh, #0
+#endif
+1:
+ @ If exponent == difference, one or both args were denormalized.
+ @ Since this is not common case, rescale them off line.
+ teq r4, r5
+ beq LSYM(Lad_d)
+LSYM(Lad_x):
+
+ @ Compensate for the exponent overlapping the mantissa MSB added later
+ sub r4, r4, #1
+
+ @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
+ rsbs lr, r5, #32
+ blt 1f
+ shift1 lsl, ip, yl, lr
+ shiftop adds xl xl yl lsr r5 yl
+ adc xh, xh, #0
+ shiftop adds xl xl yh lsl lr yl
+ shiftop adcs xh xh yh asr r5 yh
+ b 2f
+1: sub r5, r5, #32
+ add lr, lr, #32
+ cmp yl, #1
+ shift1 lsl,ip, yh, lr
+ do_it cs
+ orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later
+ shiftop adds xl xl yh asr r5 yh
+ adcs xh, xh, yh, asr #31
+2:
+ @ We now have a result in xh-xl-ip.
+ @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
+ and r5, xh, #0x80000000
+ bpl LSYM(Lad_p)
+#if defined(__thumb2__)
+ mov lr, #0
+ negs ip, ip
+ sbcs xl, lr, xl
+ sbc xh, lr, xh
+#else
+ rsbs ip, ip, #0
+ rscs xl, xl, #0
+ rsc xh, xh, #0
+#endif
+
+ @ Determine how to normalize the result.
+LSYM(Lad_p):
+ cmp xh, #0x00100000
+ bcc LSYM(Lad_a)
+ cmp xh, #0x00200000
+ bcc LSYM(Lad_e)
+
+ @ Result needs to be shifted right.
+ movs xh, xh, lsr #1
+ movs xl, xl, rrx
+ mov ip, ip, rrx
+ add r4, r4, #1
+
+ @ Make sure we did not bust our exponent.
+ mov r2, r4, lsl #21
+ cmn r2, #(2 << 21)
+ bcs LSYM(Lad_o)
+
+ @ Our result is now properly aligned into xh-xl, remaining bits in ip.
+ @ Round with MSB of ip. If halfway between two numbers, round towards
+ @ LSB of xl = 0.
+ @ Pack final result together.
+LSYM(Lad_e):
+ cmp ip, #0x80000000
+ do_it eq
+ COND(mov,s,eq) ip, xl, lsr #1
+ adcs xl, xl, #0
+ adc xh, xh, r4, lsl #20
+ orr xh, xh, r5
+ RETLDM "r4, r5"
+
+ @ Result must be shifted left and exponent adjusted.
+LSYM(Lad_a):
+ movs ip, ip, lsl #1
+ adcs xl, xl, xl
+ adc xh, xh, xh
+ tst xh, #0x00100000
+ sub r4, r4, #1
+ bne LSYM(Lad_e)
+
+ @ No rounding necessary since ip will always be 0 at this point.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+ teq xh, #0
+ movne r3, #20
+ moveq r3, #52
+ moveq xh, xl
+ moveq xl, #0
+ mov r2, xh
+ cmp r2, #(1 << 16)
+ movhs r2, r2, lsr #16
+ subhs r3, r3, #16
+ cmp r2, #(1 << 8)
+ movhs r2, r2, lsr #8
+ subhs r3, r3, #8
+ cmp r2, #(1 << 4)
+ movhs r2, r2, lsr #4
+ subhs r3, r3, #4
+ cmp r2, #(1 << 2)
+ subhs r3, r3, #2
+ sublo r3, r3, r2, lsr #1
+ sub r3, r3, r2, lsr #3
+
+#else
+
+ teq xh, #0
+ do_it eq, t
+ moveq xh, xl
+ moveq xl, #0
+ clz r3, xh
+ do_it eq
+ addeq r3, r3, #32
+ sub r3, r3, #11
+
+#endif
+
+ @ determine how to shift the value.
+ subs r2, r3, #32
+ bge 2f
+ adds r2, r2, #12
+ ble 1f
+
+ @ shift value left 21 to 31 bits, or actually right 11 to 1 bits
+ @ since a register switch happened above.
+ add ip, r2, #20
+ rsb r2, r2, #12
+ shift1 lsl, xl, xh, ip
+ shift1 lsr, xh, xh, r2
+ b 3f
+
+ @ actually shift value left 1 to 20 bits, which might also represent
+ @ 32 to 52 bits if counting the register switch that happened earlier.
+1: add r2, r2, #20
+2: do_it le
+ rsble ip, r2, #32
+ shift1 lsl, xh, xh, r2
+#if defined(__thumb2__)
+ lsr ip, xl, ip
+ itt le
+ orrle xh, xh, ip
+ lslle xl, xl, r2
+#else
+ orrle xh, xh, xl, lsr ip
+ movle xl, xl, lsl r2
+#endif
+
+ @ adjust exponent accordingly.
+3: subs r4, r4, r3
+ do_it ge, tt
+ addge xh, xh, r4, lsl #20
+ orrge xh, xh, r5
+ RETLDM "r4, r5" ge
+
+ @ Exponent too small, denormalize result.
+ @ Find out proper shift value.
+ mvn r4, r4
+ subs r4, r4, #31
+ bge 2f
+ adds r4, r4, #12
+ bgt 1f
+
+ @ shift result right of 1 to 20 bits, sign is in r5.
+ add r4, r4, #20
+ rsb r2, r4, #32
+ shift1 lsr, xl, xl, r4
+ shiftop orr xl xl xh lsl r2 yh
+ shiftop orr xh r5 xh lsr r4 yh
+ RETLDM "r4, r5"
+
+ @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+ @ a register switch from xh to xl.
+1: rsb r4, r4, #12
+ rsb r2, r4, #32
+ shift1 lsr, xl, xl, r2
+ shiftop orr xl xl xh lsl r4 yh
+ mov xh, r5
+ RETLDM "r4, r5"
+
+ @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+ @ from xh to xl.
+2: shift1 lsr, xl, xh, r4
+ mov xh, r5
+ RETLDM "r4, r5"
+
+ @ Adjust exponents for denormalized arguments.
+ @ Note that r4 must not remain equal to 0.
+LSYM(Lad_d):
+ teq r4, #0
+ eor yh, yh, #0x00100000
+ do_it eq, te
+ eoreq xh, xh, #0x00100000
+ addeq r4, r4, #1
+ subne r5, r5, #1
+ b LSYM(Lad_x)
+
+
+LSYM(Lad_s):
+ mvns ip, r4, asr #21
+ do_it ne
+ COND(mvn,s,ne) ip, r5, asr #21
+ beq LSYM(Lad_i)
+
+ teq r4, r5
+ do_it eq
+ teqeq xl, yl
+ beq 1f
+
+ @ Result is x + 0.0 = x or 0.0 + y = y.
+ orrs ip, r4, xl
+ do_it eq, t
+ moveq xh, yh
+ moveq xl, yl
+ RETLDM "r4, r5"
+
+1: teq xh, yh
+
+ @ Result is x - x = 0.
+ do_it ne, tt
+ movne xh, #0
+ movne xl, #0
+ RETLDM "r4, r5" ne
+
+ @ Result is x + x = 2x.
+ movs ip, r4, lsr #21
+ bne 2f
+ movs xl, xl, lsl #1
+ adcs xh, xh, xh
+ do_it cs
+ orrcs xh, xh, #0x80000000
+ RETLDM "r4, r5"
+2: adds r4, r4, #(2 << 21)
+ do_it cc, t
+ addcc xh, xh, #(1 << 20)
+ RETLDM "r4, r5" cc
+ and r5, xh, #0x80000000
+
+ @ Overflow: return INF.
+LSYM(Lad_o):
+ orr xh, r5, #0x7f000000
+ orr xh, xh, #0x00f00000
+ mov xl, #0
+ RETLDM "r4, r5"
+
+ @ At least one of x or y is INF/NAN.
+ @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
+ @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
+ @ if either is NAN: return NAN
+ @ if opposite sign: return NAN
+ @ otherwise return xh-xl (which is INF or -INF)
+LSYM(Lad_i):
+ mvns ip, r4, asr #21
+ do_it ne, te
+ movne xh, yh
+ movne xl, yl
+ COND(mvn,s,eq) ip, r5, asr #21
+ do_it ne, t
+ movne yh, xh
+ movne yl, xl
+ orrs r4, xl, xh, lsl #12
+ do_it eq, te
+ COND(orr,s,eq) r5, yl, yh, lsl #12
+ teqeq xh, yh
+ orrne xh, xh, #0x00080000 @ quiet NAN
+ RETLDM "r4, r5"
+
+ FUNC_END aeabi_dsub
+ FUNC_END subdf3
+ FUNC_END aeabi_dadd
+ FUNC_END adddf3
+
+ARM_FUNC_START floatunsidf
+ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
+
+ teq r0, #0
+ do_it eq, t
+ moveq r1, #0
+ RETc(eq)
+ do_push {r4, r5, lr}
+ mov r4, #0x400 @ initial exponent
+ add r4, r4, #(52-1 - 1)
+ mov r5, #0 @ sign bit is 0
+ .ifnc xl, r0
+ mov xl, r0
+ .endif
+ mov xh, #0
+ b LSYM(Lad_l)
+
+ FUNC_END aeabi_ui2d
+ FUNC_END floatunsidf
+
+ARM_FUNC_START floatsidf
+ARM_FUNC_ALIAS aeabi_i2d floatsidf
+
+ teq r0, #0
+ do_it eq, t
+ moveq r1, #0
+ RETc(eq)
+ do_push {r4, r5, lr}
+ mov r4, #0x400 @ initial exponent
+ add r4, r4, #(52-1 - 1)
+ ands r5, r0, #0x80000000 @ sign bit in r5
+ do_it mi
+ rsbmi r0, r0, #0 @ absolute value
+ .ifnc xl, r0
+ mov xl, r0
+ .endif
+ mov xh, #0
+ b LSYM(Lad_l)
+
+ FUNC_END aeabi_i2d
+ FUNC_END floatsidf
+
+ARM_FUNC_START extendsfdf2
+ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
+
+ movs r2, r0, lsl #1 @ toss sign bit
+ mov xh, r2, asr #3 @ stretch exponent
+ mov xh, xh, rrx @ retrieve sign bit
+ mov xl, r2, lsl #28 @ retrieve remaining bits
+ do_it ne, ttt
+ COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent
+ teqne r3, #0xff000000 @ if not 0, check if INF or NAN
+ eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
+ RETc(ne) @ and return it.
+
+ teq r2, #0 @ if actually 0
+ do_it ne, e
+ teqne r3, #0xff000000 @ or INF or NAN
+ RETc(eq) @ we are done already.
+
+ @ value was denormalized. We can normalize it now.
+ do_push {r4, r5, lr}
+ mov r4, #0x380 @ setup corresponding exponent
+ and r5, xh, #0x80000000 @ move sign bit in r5
+ bic xh, xh, #0x80000000
+ b LSYM(Lad_l)
+
+ FUNC_END aeabi_f2d
+ FUNC_END extendsfdf2
+
+ARM_FUNC_START floatundidf
+ARM_FUNC_ALIAS aeabi_ul2d floatundidf
+
+ orrs r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+ do_it eq, t
+ mvfeqd f0, #0.0
+#else
+ do_it eq
+#endif
+ RETc(eq)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+ @ For hard FPA code we want to return via the tail below so that
+ @ we can return the result in f0 as well as in r0/r1 for backwards
+ @ compatibility.
+ adr ip, LSYM(f0_ret)
+ @ Push pc as well so that RETLDM works correctly.
+ do_push {r4, r5, ip, lr, pc}
+#else
+ do_push {r4, r5, lr}
+#endif
+
+ mov r5, #0
+ b 2f
+
+ARM_FUNC_START floatdidf
+ARM_FUNC_ALIAS aeabi_l2d floatdidf
+
+ orrs r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+ do_it eq, t
+ mvfeqd f0, #0.0
+#else
+ do_it eq
+#endif
+ RETc(eq)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+ @ For hard FPA code we want to return via the tail below so that
+ @ we can return the result in f0 as well as in r0/r1 for backwards
+ @ compatibility.
+ adr ip, LSYM(f0_ret)
+ @ Push pc as well so that RETLDM works correctly.
+ do_push {r4, r5, ip, lr, pc}
+#else
+ do_push {r4, r5, lr}
+#endif
+
+ ands r5, ah, #0x80000000 @ sign bit in r5
+ bpl 2f
+#if defined(__thumb2__)
+ negs al, al
+ sbc ah, ah, ah, lsl #1
+#else
+ rsbs al, al, #0
+ rsc ah, ah, #0
+#endif
+2:
+ mov r4, #0x400 @ initial exponent
+ add r4, r4, #(52-1 - 1)
+
+ @ FPA little-endian: must swap the word order.
+ .ifnc xh, ah
+ mov ip, al
+ mov xh, ah
+ mov xl, ip
+ .endif
+
+ movs ip, xh, lsr #22
+ beq LSYM(Lad_p)
+
+ @ The value is too big. Scale it down a bit...
+ mov r2, #3
+ movs ip, ip, lsr #3
+ do_it ne
+ addne r2, r2, #3
+ movs ip, ip, lsr #3
+ do_it ne
+ addne r2, r2, #3
+ add r2, r2, ip, lsr #3
+
+ rsb r3, r2, #32
+ shift1 lsl, ip, xl, r3
+ shift1 lsr, xl, xl, r2
+ shiftop orr xl xl xh lsl r3 lr
+ shift1 lsr, xh, xh, r2
+ add r4, r4, r2
+ b LSYM(Lad_p)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+
+ @ Legacy code expects the result to be returned in f0. Copy it
+ @ there as well.
+LSYM(f0_ret):
+ do_push {r0, r1}
+ ldfd f0, [sp], #8
+ RETLDM
+
+#endif
+
+ FUNC_END floatdidf
+ FUNC_END aeabi_l2d
+ FUNC_END floatundidf
+ FUNC_END aeabi_ul2d
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_arm_muldivdf3
+
+ARM_FUNC_START muldf3
+ARM_FUNC_ALIAS aeabi_dmul muldf3
+ do_push {r4, r5, r6, lr}
+
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ orr ip, ip, #0x700
+ ands r4, ip, xh, lsr #20
+ do_it ne, tte
+ COND(and,s,ne) r5, ip, yh, lsr #20
+ teqne r4, ip
+ teqne r5, ip
+ bleq LSYM(Lml_s)
+
+ @ Add exponents together
+ add r4, r4, r5
+
+ @ Determine final sign.
+ eor r6, xh, yh
+
+ @ Convert mantissa to unsigned integer.
+ @ If power of two, branch to a separate path.
+ bic xh, xh, ip, lsl #21
+ bic yh, yh, ip, lsl #21
+ orrs r5, xl, xh, lsl #12
+ do_it ne
+ COND(orr,s,ne) r5, yl, yh, lsl #12
+ orr xh, xh, #0x00100000
+ orr yh, yh, #0x00100000
+ beq LSYM(Lml_1)
+
+#if __ARM_ARCH__ < 4
+
+ @ Put sign bit in r6, which will be restored in yl later.
+ and r6, r6, #0x80000000
+
+ @ Well, no way to make it shorter without the umull instruction.
+ stmfd sp!, {r6, r7, r8, r9, sl, fp}
+ mov r7, xl, lsr #16
+ mov r8, yl, lsr #16
+ mov r9, xh, lsr #16
+ mov sl, yh, lsr #16
+ bic xl, xl, r7, lsl #16
+ bic yl, yl, r8, lsl #16
+ bic xh, xh, r9, lsl #16
+ bic yh, yh, sl, lsl #16
+ mul ip, xl, yl
+ mul fp, xl, r8
+ mov lr, #0
+ adds ip, ip, fp, lsl #16
+ adc lr, lr, fp, lsr #16
+ mul fp, r7, yl
+ adds ip, ip, fp, lsl #16
+ adc lr, lr, fp, lsr #16
+ mul fp, xl, sl
+ mov r5, #0
+ adds lr, lr, fp, lsl #16
+ adc r5, r5, fp, lsr #16
+ mul fp, r7, yh
+ adds lr, lr, fp, lsl #16
+ adc r5, r5, fp, lsr #16
+ mul fp, xh, r8
+ adds lr, lr, fp, lsl #16
+ adc r5, r5, fp, lsr #16
+ mul fp, r9, yl
+ adds lr, lr, fp, lsl #16
+ adc r5, r5, fp, lsr #16
+ mul fp, xh, sl
+ mul r6, r9, sl
+ adds r5, r5, fp, lsl #16
+ adc r6, r6, fp, lsr #16
+ mul fp, r9, yh
+ adds r5, r5, fp, lsl #16
+ adc r6, r6, fp, lsr #16
+ mul fp, xl, yh
+ adds lr, lr, fp
+ mul fp, r7, sl
+ adcs r5, r5, fp
+ mul fp, xh, yl
+ adc r6, r6, #0
+ adds lr, lr, fp
+ mul fp, r9, r8
+ adcs r5, r5, fp
+ mul fp, r7, r8
+ adc r6, r6, #0
+ adds lr, lr, fp
+ mul fp, xh, yh
+ adcs r5, r5, fp
+ adc r6, r6, #0
+ ldmfd sp!, {yl, r7, r8, r9, sl, fp}
+
+#else
+
+ @ Here is the actual multiplication.
+ umull ip, lr, xl, yl
+ mov r5, #0
+ umlal lr, r5, xh, yl
+ and yl, r6, #0x80000000
+ umlal lr, r5, xl, yh
+ mov r6, #0
+ umlal r5, r6, xh, yh
+
+#endif
+
+ @ The LSBs in ip are only significant for the final rounding.
+ @ Fold them into lr.
+ teq ip, #0
+ do_it ne
+ orrne lr, lr, #1
+
+ @ Adjust result upon the MSB position.
+ sub r4, r4, #0xff
+ cmp r6, #(1 << (20-11))
+ sbc r4, r4, #0x300
+ bcs 1f
+ movs lr, lr, lsl #1
+ adcs r5, r5, r5
+ adc r6, r6, r6
+1:
+ @ Shift to final position, add sign to result.
+ orr xh, yl, r6, lsl #11
+ orr xh, xh, r5, lsr #21
+ mov xl, r5, lsl #11
+ orr xl, xl, lr, lsr #21
+ mov lr, lr, lsl #11
+
+ @ Check exponent range for under/overflow.
+ subs ip, r4, #(254 - 1)
+ do_it hi
+ cmphi ip, #0x700
+ bhi LSYM(Lml_u)
+
+ @ Round the result, merge final exponent.
+ cmp lr, #0x80000000
+ do_it eq
+ COND(mov,s,eq) lr, xl, lsr #1
+ adcs xl, xl, #0
+ adc xh, xh, r4, lsl #20
+ RETLDM "r4, r5, r6"
+
+ @ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+ and r6, r6, #0x80000000
+ orr xh, r6, xh
+ orr xl, xl, yl
+ eor xh, xh, yh
+ subs r4, r4, ip, lsr #1
+ do_it gt, tt
+ COND(rsb,s,gt) r5, r4, ip
+ orrgt xh, xh, r4, lsl #20
+ RETLDM "r4, r5, r6" gt
+
+ @ Under/overflow: fix things up for the code below.
+ orr xh, xh, #0x00100000
+ mov lr, #0
+ subs r4, r4, #1
+
+LSYM(Lml_u):
+ @ Overflow?
+ bgt LSYM(Lml_o)
+
+ @ Check if denormalized result is possible, otherwise return signed 0.
+ cmn r4, #(53 + 1)
+ do_it le, tt
+ movle xl, #0
+ bicle xh, xh, #0x7fffffff
+ RETLDM "r4, r5, r6" le
+
+ @ Find out proper shift value.
+ rsb r4, r4, #0
+ subs r4, r4, #32
+ bge 2f
+ adds r4, r4, #12
+ bgt 1f
+
+ @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
+ add r4, r4, #20
+ rsb r5, r4, #32
+ shift1 lsl, r3, xl, r5
+ shift1 lsr, xl, xl, r4
+ shiftop orr xl xl xh lsl r5 r2
+ and r2, xh, #0x80000000
+ bic xh, xh, #0x80000000
+ adds xl, xl, r3, lsr #31
+ shiftop adc xh r2 xh lsr r4 r6
+ orrs lr, lr, r3, lsl #1
+ do_it eq
+ biceq xl, xl, r3, lsr #31
+ RETLDM "r4, r5, r6"
+
+ @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+ @ a register switch from xh to xl. Then round.
+1: rsb r4, r4, #12
+ rsb r5, r4, #32
+ shift1 lsl, r3, xl, r4
+ shift1 lsr, xl, xl, r5
+ shiftop orr xl xl xh lsl r4 r2
+ bic xh, xh, #0x7fffffff
+ adds xl, xl, r3, lsr #31
+ adc xh, xh, #0
+ orrs lr, lr, r3, lsl #1
+ do_it eq
+ biceq xl, xl, r3, lsr #31
+ RETLDM "r4, r5, r6"
+
+ @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+ @ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
+2: rsb r5, r4, #32
+ shiftop orr lr lr xl lsl r5 r2
+ shift1 lsr, r3, xl, r4
+ shiftop orr r3 r3 xh lsl r5 r2
+ shift1 lsr, xl, xh, r4
+ bic xh, xh, #0x7fffffff
+ shiftop bic xl xl xh lsr r4 r2
+ add xl, xl, r3, lsr #31
+ orrs lr, lr, r3, lsl #1
+ do_it eq
+ biceq xl, xl, r3, lsr #31
+ RETLDM "r4, r5, r6"
+
+ @ One or both arguments are denormalized.
+ @ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+ teq r4, #0
+ bne 2f
+ and r6, xh, #0x80000000
+1: movs xl, xl, lsl #1
+ adc xh, xh, xh
+ tst xh, #0x00100000
+ do_it eq
+ subeq r4, r4, #1
+ beq 1b
+ orr xh, xh, r6
+ teq r5, #0
+ do_it ne
+ RETc(ne)
+2: and r6, yh, #0x80000000
+3: movs yl, yl, lsl #1
+ adc yh, yh, yh
+ tst yh, #0x00100000
+ do_it eq
+ subeq r5, r5, #1
+ beq 3b
+ orr yh, yh, r6
+ RET
+
+LSYM(Lml_s):
+ @ Isolate the INF and NAN cases away
+ teq r4, ip
+ and r5, ip, yh, lsr #20
+ do_it ne
+ teqne r5, ip
+ beq 1f
+
+ @ Here, one or more arguments are either denormalized or zero.
+ orrs r6, xl, xh, lsl #1
+ do_it ne
+ COND(orr,s,ne) r6, yl, yh, lsl #1
+ bne LSYM(Lml_d)
+
+ @ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+ eor xh, xh, yh
+ and xh, xh, #0x80000000
+ mov xl, #0
+ RETLDM "r4, r5, r6"
+
+1: @ One or both args are INF or NAN.
+ orrs r6, xl, xh, lsl #1
+ do_it eq, te
+ moveq xl, yl
+ moveq xh, yh
+ COND(orr,s,ne) r6, yl, yh, lsl #1
+ beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ teq r4, ip
+ bne 1f
+ orrs r6, xl, xh, lsl #12
+ bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+1: teq r5, ip
+ bne LSYM(Lml_i)
+ orrs r6, yl, yh, lsl #12
+ do_it ne, t
+ movne xl, yl
+ movne xh, yh
+ bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+
+ @ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+ eor xh, xh, yh
+
+ @ Overflow: return INF (sign already in xh).
+LSYM(Lml_o):
+ and xh, xh, #0x80000000
+ orr xh, xh, #0x7f000000
+ orr xh, xh, #0x00f00000
+ mov xl, #0
+ RETLDM "r4, r5, r6"
+
+ @ Return a quiet NAN.
+LSYM(Lml_n):
+ orr xh, xh, #0x7f000000
+ orr xh, xh, #0x00f80000
+ RETLDM "r4, r5, r6"
+
+ FUNC_END aeabi_dmul
+ FUNC_END muldf3
+
+ARM_FUNC_START divdf3
+ARM_FUNC_ALIAS aeabi_ddiv divdf3
+
+ do_push {r4, r5, r6, lr}
+
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ orr ip, ip, #0x700
+ ands r4, ip, xh, lsr #20
+ do_it ne, tte
+ COND(and,s,ne) r5, ip, yh, lsr #20
+ teqne r4, ip
+ teqne r5, ip
+ bleq LSYM(Ldv_s)
+
+ @ Substract divisor exponent from dividend''s.
+ sub r4, r4, r5
+
+ @ Preserve final sign into lr.
+ eor lr, xh, yh
+
+ @ Convert mantissa to unsigned integer.
+ @ Dividend -> r5-r6, divisor -> yh-yl.
+ orrs r5, yl, yh, lsl #12
+ mov xh, xh, lsl #12
+ beq LSYM(Ldv_1)
+ mov yh, yh, lsl #12
+ mov r5, #0x10000000
+ orr yh, r5, yh, lsr #4
+ orr yh, yh, yl, lsr #24
+ mov yl, yl, lsl #8
+ orr r5, r5, xh, lsr #4
+ orr r5, r5, xl, lsr #24
+ mov r6, xl, lsl #8
+
+ @ Initialize xh with final sign bit.
+ and xh, lr, #0x80000000
+
+ @ Ensure result will land to known bit position.
+ @ Apply exponent bias accordingly.
+ cmp r5, yh
+ do_it eq
+ cmpeq r6, yl
+ adc r4, r4, #(255 - 2)
+ add r4, r4, #0x300
+ bcs 1f
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+1:
+ @ Perform first substraction to align result to a nibble.
+ subs r6, r6, yl
+ sbc r5, r5, yh
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+ mov xl, #0x00100000
+ mov ip, #0x00080000
+
+ @ The actual division loop.
+1: subs lr, r6, yl
+ sbcs lr, r5, yh
+ do_it cs, tt
+ subcs r6, r6, yl
+ movcs r5, lr
+ orrcs xl, xl, ip
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+ subs lr, r6, yl
+ sbcs lr, r5, yh
+ do_it cs, tt
+ subcs r6, r6, yl
+ movcs r5, lr
+ orrcs xl, xl, ip, lsr #1
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+ subs lr, r6, yl
+ sbcs lr, r5, yh
+ do_it cs, tt
+ subcs r6, r6, yl
+ movcs r5, lr
+ orrcs xl, xl, ip, lsr #2
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+ subs lr, r6, yl
+ sbcs lr, r5, yh
+ do_it cs, tt
+ subcs r6, r6, yl
+ movcs r5, lr
+ orrcs xl, xl, ip, lsr #3
+
+ orrs lr, r5, r6
+ beq 2f
+ mov r5, r5, lsl #4
+ orr r5, r5, r6, lsr #28
+ mov r6, r6, lsl #4
+ mov yh, yh, lsl #3
+ orr yh, yh, yl, lsr #29
+ mov yl, yl, lsl #3
+ movs ip, ip, lsr #4
+ bne 1b
+
+ @ We are done with a word of the result.
+ @ Loop again for the low word if this pass was for the high word.
+ tst xh, #0x00100000
+ bne 3f
+ orr xh, xh, xl
+ mov xl, #0
+ mov ip, #0x80000000
+ b 1b
+2:
+ @ Be sure result starts in the high word.
+ tst xh, #0x00100000
+ do_it eq, t
+ orreq xh, xh, xl
+ moveq xl, #0
+3:
+ @ Check exponent range for under/overflow.
+ subs ip, r4, #(254 - 1)
+ do_it hi
+ cmphi ip, #0x700
+ bhi LSYM(Lml_u)
+
+ @ Round the result, merge final exponent.
+ subs ip, r5, yh
+ do_it eq, t
+ COND(sub,s,eq) ip, r6, yl
+ COND(mov,s,eq) ip, xl, lsr #1
+ adcs xl, xl, #0
+ adc xh, xh, r4, lsl #20
+ RETLDM "r4, r5, r6"
+
+ @ Division by 0x1p*: shortcut a lot of code.
+LSYM(Ldv_1):
+ and lr, lr, #0x80000000
+ orr xh, lr, xh, lsr #12
+ adds r4, r4, ip, lsr #1
+ do_it gt, tt
+ COND(rsb,s,gt) r5, r4, ip
+ orrgt xh, xh, r4, lsl #20
+ RETLDM "r4, r5, r6" gt
+
+ orr xh, xh, #0x00100000
+ mov lr, #0
+ subs r4, r4, #1
+ b LSYM(Lml_u)
+
+ @ Result mightt need to be denormalized: put remainder bits
+ @ in lr for rounding considerations.
+LSYM(Ldv_u):
+ orr lr, r5, r6
+ b LSYM(Lml_u)
+
+ @ One or both arguments is either INF, NAN or zero.
+LSYM(Ldv_s):
+ and r5, ip, yh, lsr #20
+ teq r4, ip
+ do_it eq
+ teqeq r5, ip
+ beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
+ teq r4, ip
+ bne 1f
+ orrs r4, xl, xh, lsl #12
+ bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ teq r5, ip
+ bne LSYM(Lml_i) @ INF / <anything> -> INF
+ mov xl, yl
+ mov xh, yh
+ b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
+1: teq r5, ip
+ bne 2f
+ orrs r5, yl, yh, lsl #12
+ beq LSYM(Lml_z) @ <anything> / INF -> 0
+ mov xl, yl
+ mov xh, yh
+ b LSYM(Lml_n) @ <anything> / NAN -> NAN
+2: @ If both are nonzero, we need to normalize and resume above.
+ orrs r6, xl, xh, lsl #1
+ do_it ne
+ COND(orr,s,ne) r6, yl, yh, lsl #1
+ bne LSYM(Lml_d)
+ @ One or both arguments are 0.
+ orrs r4, xl, xh, lsl #1
+ bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ orrs r5, yl, yh, lsl #1
+ bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
+ b LSYM(Lml_n) @ 0 / 0 -> NAN
+
+ FUNC_END aeabi_ddiv
+ FUNC_END divdf3
+
+#endif /* L_muldivdf3 */
+
+#ifdef L_arm_cmpdf2
+
+@ Note: only r0 (return value) and ip are clobbered here.
+
+ARM_FUNC_START gtdf2
+ARM_FUNC_ALIAS gedf2 gtdf2
+ mov ip, #-1
+ b 1f
+
+ARM_FUNC_START ltdf2
+ARM_FUNC_ALIAS ledf2 ltdf2
+ mov ip, #1
+ b 1f
+
+ARM_FUNC_START cmpdf2
+ARM_FUNC_ALIAS nedf2 cmpdf2
+ARM_FUNC_ALIAS eqdf2 cmpdf2
+ mov ip, #1 @ how should we specify unordered here?
+
+1: str ip, [sp, #-4]!
+
+ @ Trap any INF/NAN first.
+ mov ip, xh, lsl #1
+ mvns ip, ip, asr #21
+ mov ip, yh, lsl #1
+ do_it ne
+ COND(mvn,s,ne) ip, ip, asr #21
+ beq 3f
+
+ @ Test for equality.
+ @ Note that 0.0 is equal to -0.0.
+2: add sp, sp, #4
+ orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0
+ do_it eq, e
+ COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0
+ teqne xh, yh @ or xh == yh
+ do_it eq, tt
+ teqeq xl, yl @ and xl == yl
+ moveq r0, #0 @ then equal.
+ RETc(eq)
+
+ @ Clear C flag
+ cmn r0, #0
+
+ @ Compare sign,
+ teq xh, yh
+
+ @ Compare values if same sign
+ do_it pl
+ cmppl xh, yh
+ do_it eq
+ cmpeq xl, yl
+
+ @ Result:
+ do_it cs, e
+ movcs r0, yh, asr #31
+ mvncc r0, yh, asr #31
+ orr r0, r0, #1
+ RET
+
+ @ Look for a NAN.
+3: mov ip, xh, lsl #1
+ mvns ip, ip, asr #21
+ bne 4f
+ orrs ip, xl, xh, lsl #12
+ bne 5f @ x is NAN
+4: mov ip, yh, lsl #1
+ mvns ip, ip, asr #21
+ bne 2b
+ orrs ip, yl, yh, lsl #12
+ beq 2b @ y is not NAN
+5: ldr r0, [sp], #4 @ unordered return code
+ RET
+
+ FUNC_END gedf2
+ FUNC_END gtdf2
+ FUNC_END ledf2
+ FUNC_END ltdf2
+ FUNC_END nedf2
+ FUNC_END eqdf2
+ FUNC_END cmpdf2
+
+ARM_FUNC_START aeabi_cdrcmple
+
+ mov ip, r0
+ mov r0, r2
+ mov r2, ip
+ mov ip, r1
+ mov r1, r3
+ mov r3, ip
+ b 6f
+
+ARM_FUNC_START aeabi_cdcmpeq
+ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+ @ The status-returning routines are required to preserve all
+ @ registers except ip, lr, and cpsr.
+6: do_push {r0, lr}
+ ARM_CALL cmpdf2
+ @ Set the Z flag correctly, and the C flag unconditionally.
+ cmp r0, #0
+ @ Clear the C flag if the return value was -1, indicating
+ @ that the first operand was smaller than the second.
+ do_it mi
+ cmnmi r0, #0
+ RETLDM "r0"
+
+ FUNC_END aeabi_cdcmple
+ FUNC_END aeabi_cdcmpeq
+ FUNC_END aeabi_cdrcmple
+
+ARM_FUNC_START aeabi_dcmpeq
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cdcmple
+ do_it eq, e
+ moveq r0, #1 @ Equal to.
+ movne r0, #0 @ Less than, greater than, or unordered.
+ RETLDM
+
+ FUNC_END aeabi_dcmpeq
+
+ARM_FUNC_START aeabi_dcmplt
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cdcmple
+ do_it cc, e
+ movcc r0, #1 @ Less than.
+ movcs r0, #0 @ Equal to, greater than, or unordered.
+ RETLDM
+
+ FUNC_END aeabi_dcmplt
+
+ARM_FUNC_START aeabi_dcmple
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cdcmple
+ do_it ls, e
+ movls r0, #1 @ Less than or equal to.
+ movhi r0, #0 @ Greater than or unordered.
+ RETLDM
+
+ FUNC_END aeabi_dcmple
+
+ARM_FUNC_START aeabi_dcmpge
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cdrcmple
+ do_it ls, e
+ movls r0, #1 @ Operand 2 is less than or equal to operand 1.
+ movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
+ RETLDM
+
+ FUNC_END aeabi_dcmpge
+
+ARM_FUNC_START aeabi_dcmpgt
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cdrcmple
+ do_it cc, e
+ movcc r0, #1 @ Operand 2 is less than operand 1.
+ movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
+ @ or they are unordered.
+ RETLDM
+
+ FUNC_END aeabi_dcmpgt
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_arm_unorddf2
+
+ARM_FUNC_START unorddf2
+ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
+
+ mov ip, xh, lsl #1
+ mvns ip, ip, asr #21
+ bne 1f
+ orrs ip, xl, xh, lsl #12
+ bne 3f @ x is NAN
+1: mov ip, yh, lsl #1
+ mvns ip, ip, asr #21
+ bne 2f
+ orrs ip, yl, yh, lsl #12
+ bne 3f @ y is NAN
+2: mov r0, #0 @ arguments are ordered.
+ RET
+
+3: mov r0, #1 @ arguments are unordered.
+ RET
+
+ FUNC_END aeabi_dcmpun
+ FUNC_END unorddf2
+
+#endif /* L_unorddf2 */
+
+#ifdef L_arm_fixdfsi
+
+ARM_FUNC_START fixdfsi
+ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
+
+ @ check exponent range.
+ mov r2, xh, lsl #1
+ adds r2, r2, #(1 << 21)
+ bcs 2f @ value is INF or NAN
+ bpl 1f @ value is too small
+ mov r3, #(0xfffffc00 + 31)
+ subs r2, r3, r2, asr #21
+ bls 3f @ value is too large
+
+ @ scale value
+ mov r3, xh, lsl #11
+ orr r3, r3, #0x80000000
+ orr r3, r3, xl, lsr #21
+ tst xh, #0x80000000 @ the sign bit
+ shift1 lsr, r0, r3, r2
+ do_it ne
+ rsbne r0, r0, #0
+ RET
+
+1: mov r0, #0
+ RET
+
+2: orrs xl, xl, xh, lsl #12
+ bne 4f @ x is NAN.
+3: ands r0, xh, #0x80000000 @ the sign bit
+ do_it eq
+ moveq r0, #0x7fffffff @ maximum signed positive si
+ RET
+
+4: mov r0, #0 @ How should we convert NAN?
+ RET
+
+ FUNC_END aeabi_d2iz
+ FUNC_END fixdfsi
+
+#endif /* L_fixdfsi */
+
+#ifdef L_arm_fixunsdfsi
+
+ARM_FUNC_START fixunsdfsi
+ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
+
+ @ check exponent range.
+ movs r2, xh, lsl #1
+ bcs 1f @ value is negative
+ adds r2, r2, #(1 << 21)
+ bcs 2f @ value is INF or NAN
+ bpl 1f @ value is too small
+ mov r3, #(0xfffffc00 + 31)
+ subs r2, r3, r2, asr #21
+ bmi 3f @ value is too large
+
+ @ scale value
+ mov r3, xh, lsl #11
+ orr r3, r3, #0x80000000
+ orr r3, r3, xl, lsr #21
+ shift1 lsr, r0, r3, r2
+ RET
+
+1: mov r0, #0
+ RET
+
+2: orrs xl, xl, xh, lsl #12
+ bne 4f @ value is NAN.
+3: mov r0, #0xffffffff @ maximum unsigned si
+ RET
+
+4: mov r0, #0 @ How should we convert NAN?
+ RET
+
+ FUNC_END aeabi_d2uiz
+ FUNC_END fixunsdfsi
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_arm_truncdfsf2
+
+ARM_FUNC_START truncdfsf2
+ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
+
+ @ check exponent range.
+ mov r2, xh, lsl #1
+ subs r3, r2, #((1023 - 127) << 21)
+ do_it cs, t
+ COND(sub,s,cs) ip, r3, #(1 << 21)
+ COND(rsb,s,cs) ip, ip, #(254 << 21)
+ bls 2f @ value is out of range
+
+1: @ shift and round mantissa
+ and ip, xh, #0x80000000
+ mov r2, xl, lsl #3
+ orr xl, ip, xl, lsr #29
+ cmp r2, #0x80000000
+ adc r0, xl, r3, lsl #2
+ do_it eq
+ biceq r0, r0, #1
+ RET
+
+2: @ either overflow or underflow
+ tst xh, #0x40000000
+ bne 3f @ overflow
+
+ @ check if denormalized value is possible
+ adds r2, r3, #(23 << 21)
+ do_it lt, t
+ andlt r0, xh, #0x80000000 @ too small, return signed 0.
+ RETc(lt)
+
+ @ denormalize value so we can resume with the code above afterwards.
+ orr xh, xh, #0x00100000
+ mov r2, r2, lsr #21
+ rsb r2, r2, #24
+ rsb ip, r2, #32
+#if defined(__thumb2__)
+ lsls r3, xl, ip
+#else
+ movs r3, xl, lsl ip
+#endif
+ shift1 lsr, xl, xl, r2
+ do_it ne
+ orrne xl, xl, #1 @ fold r3 for rounding considerations.
+ mov r3, xh, lsl #11
+ mov r3, r3, lsr #11
+ shiftop orr xl xl r3 lsl ip ip
+ shift1 lsr, r3, r3, r2
+ mov r3, r3, lsl #1
+ b 1b
+
+3: @ chech for NAN
+ mvns r3, r2, asr #21
+ bne 5f @ simple overflow
+ orrs r3, xl, xh, lsl #12
+ do_it ne, tt
+ movne r0, #0x7f000000
+ orrne r0, r0, #0x00c00000
+ RETc(ne) @ return NAN
+
+5: @ return INF with sign
+ and r0, xh, #0x80000000
+ orr r0, r0, #0x7f000000
+ orr r0, r0, #0x00800000
+ RET
+
+ FUNC_END aeabi_d2f
+ FUNC_END truncdfsf2
+
+#endif /* L_truncdfsf2 */
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S
new file mode 100644
index 00000000000..c93f66d8ff8
--- /dev/null
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -0,0 +1,1060 @@
+/* ieee754-sf.S single-precision floating point support for ARM
+
+ Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+ Contributed by Nicolas Pitre (nico@cam.org)
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 3, or (at your option) any
+ later version.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/*
+ * Notes:
+ *
+ * The goal of this code is to be as fast as possible. This is
+ * not meant to be easy to understand for the casual reader.
+ *
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+#ifdef L_arm_negsf2
+
+ARM_FUNC_START negsf2
+ARM_FUNC_ALIAS aeabi_fneg negsf2
+
+ eor r0, r0, #0x80000000 @ flip sign bit
+ RET
+
+ FUNC_END aeabi_fneg
+ FUNC_END negsf2
+
+#endif
+
+#ifdef L_arm_addsubsf3
+
+ARM_FUNC_START aeabi_frsub
+
+ eor r0, r0, #0x80000000 @ flip sign bit of first arg
+ b 1f
+
+ARM_FUNC_START subsf3
+ARM_FUNC_ALIAS aeabi_fsub subsf3
+
+ eor r1, r1, #0x80000000 @ flip sign bit of second arg
+#if defined(__INTERWORKING_STUBS__)
+ b 1f @ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START addsf3
+ARM_FUNC_ALIAS aeabi_fadd addsf3
+
+1: @ Look for zeroes, equal values, INF, or NAN.
+ movs r2, r0, lsl #1
+ do_it ne, ttt
+ COND(mov,s,ne) r3, r1, lsl #1
+ teqne r2, r3
+ COND(mvn,s,ne) ip, r2, asr #24
+ COND(mvn,s,ne) ip, r3, asr #24
+ beq LSYM(Lad_s)
+
+ @ Compute exponent difference. Make largest exponent in r2,
+ @ corresponding arg in r0, and positive exponent difference in r3.
+ mov r2, r2, lsr #24
+ rsbs r3, r2, r3, lsr #24
+ do_it gt, ttt
+ addgt r2, r2, r3
+ eorgt r1, r0, r1
+ eorgt r0, r1, r0
+ eorgt r1, r0, r1
+ do_it lt
+ rsblt r3, r3, #0
+
+ @ If exponent difference is too large, return largest argument
+ @ already in r0. We need up to 25 bit to handle proper rounding
+ @ of 0x1p25 - 1.1.
+ cmp r3, #25
+ do_it hi
+ RETc(hi)
+
+ @ Convert mantissa to signed integer.
+ tst r0, #0x80000000
+ orr r0, r0, #0x00800000
+ bic r0, r0, #0xff000000
+ do_it ne
+ rsbne r0, r0, #0
+ tst r1, #0x80000000
+ orr r1, r1, #0x00800000
+ bic r1, r1, #0xff000000
+ do_it ne
+ rsbne r1, r1, #0
+
+ @ If exponent == difference, one or both args were denormalized.
+ @ Since this is not common case, rescale them off line.
+ teq r2, r3
+ beq LSYM(Lad_d)
+LSYM(Lad_x):
+
+ @ Compensate for the exponent overlapping the mantissa MSB added later
+ sub r2, r2, #1
+
+ @ Shift and add second arg to first arg in r0.
+ @ Keep leftover bits into r1.
+ shiftop adds r0 r0 r1 asr r3 ip
+ rsb r3, r3, #32
+ shift1 lsl, r1, r1, r3
+
+ @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
+ and r3, r0, #0x80000000
+ bpl LSYM(Lad_p)
+#if defined(__thumb2__)
+ negs r1, r1
+ sbc r0, r0, r0, lsl #1
+#else
+ rsbs r1, r1, #0
+ rsc r0, r0, #0
+#endif
+
+ @ Determine how to normalize the result.
+LSYM(Lad_p):
+ cmp r0, #0x00800000
+ bcc LSYM(Lad_a)
+ cmp r0, #0x01000000
+ bcc LSYM(Lad_e)
+
+ @ Result needs to be shifted right.
+ movs r0, r0, lsr #1
+ mov r1, r1, rrx
+ add r2, r2, #1
+
+ @ Make sure we did not bust our exponent.
+ cmp r2, #254
+ bhs LSYM(Lad_o)
+
+ @ Our result is now properly aligned into r0, remaining bits in r1.
+ @ Pack final result together.
+ @ Round with MSB of r1. If halfway between two numbers, round towards
+ @ LSB of r0 = 0.
+LSYM(Lad_e):
+ cmp r1, #0x80000000
+ adc r0, r0, r2, lsl #23
+ do_it eq
+ biceq r0, r0, #1
+ orr r0, r0, r3
+ RET
+
+ @ Result must be shifted left and exponent adjusted.
+LSYM(Lad_a):
+ movs r1, r1, lsl #1
+ adc r0, r0, r0
+ tst r0, #0x00800000
+ sub r2, r2, #1
+ bne LSYM(Lad_e)
+
+ @ No rounding necessary since r1 will always be 0 at this point.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+ movs ip, r0, lsr #12
+ moveq r0, r0, lsl #12
+ subeq r2, r2, #12
+ tst r0, #0x00ff0000
+ moveq r0, r0, lsl #8
+ subeq r2, r2, #8
+ tst r0, #0x00f00000
+ moveq r0, r0, lsl #4
+ subeq r2, r2, #4
+ tst r0, #0x00c00000
+ moveq r0, r0, lsl #2
+ subeq r2, r2, #2
+ cmp r0, #0x00800000
+ movcc r0, r0, lsl #1
+ sbcs r2, r2, #0
+
+#else
+
+ clz ip, r0
+ sub ip, ip, #8
+ subs r2, r2, ip
+ shift1 lsl, r0, r0, ip
+
+#endif
+
+ @ Final result with sign
+ @ If exponent negative, denormalize result.
+ do_it ge, et
+ addge r0, r0, r2, lsl #23
+ rsblt r2, r2, #0
+ orrge r0, r0, r3
+#if defined(__thumb2__)
+ do_it lt, t
+ lsrlt r0, r0, r2
+ orrlt r0, r3, r0
+#else
+ orrlt r0, r3, r0, lsr r2
+#endif
+ RET
+
+ @ Fixup and adjust bit position for denormalized arguments.
+ @ Note that r2 must not remain equal to 0.
+LSYM(Lad_d):
+ teq r2, #0
+ eor r1, r1, #0x00800000
+ do_it eq, te
+ eoreq r0, r0, #0x00800000
+ addeq r2, r2, #1
+ subne r3, r3, #1
+ b LSYM(Lad_x)
+
+LSYM(Lad_s):
+ mov r3, r1, lsl #1
+
+ mvns ip, r2, asr #24
+ do_it ne
+ COND(mvn,s,ne) ip, r3, asr #24
+ beq LSYM(Lad_i)
+
+ teq r2, r3
+ beq 1f
+
+ @ Result is x + 0.0 = x or 0.0 + y = y.
+ teq r2, #0
+ do_it eq
+ moveq r0, r1
+ RET
+
+1: teq r0, r1
+
+ @ Result is x - x = 0.
+ do_it ne, t
+ movne r0, #0
+ RETc(ne)
+
+ @ Result is x + x = 2x.
+ tst r2, #0xff000000
+ bne 2f
+ movs r0, r0, lsl #1
+ do_it cs
+ orrcs r0, r0, #0x80000000
+ RET
+2: adds r2, r2, #(2 << 24)
+ do_it cc, t
+ addcc r0, r0, #(1 << 23)
+ RETc(cc)
+ and r3, r0, #0x80000000
+
+ @ Overflow: return INF.
+LSYM(Lad_o):
+ orr r0, r3, #0x7f000000
+ orr r0, r0, #0x00800000
+ RET
+
+ @ At least one of r0/r1 is INF/NAN.
+ @ if r0 != INF/NAN: return r1 (which is INF/NAN)
+ @ if r1 != INF/NAN: return r0 (which is INF/NAN)
+ @ if r0 or r1 is NAN: return NAN
+ @ if opposite sign: return NAN
+ @ otherwise return r0 (which is INF or -INF)
+LSYM(Lad_i):
+ mvns r2, r2, asr #24
+ do_it ne, et
+ movne r0, r1
+ COND(mvn,s,eq) r3, r3, asr #24
+ movne r1, r0
+ movs r2, r0, lsl #9
+ do_it eq, te
+ COND(mov,s,eq) r3, r1, lsl #9
+ teqeq r0, r1
+ orrne r0, r0, #0x00400000 @ quiet NAN
+ RET
+
+ FUNC_END aeabi_frsub
+ FUNC_END aeabi_fadd
+ FUNC_END addsf3
+ FUNC_END aeabi_fsub
+ FUNC_END subsf3
+
+ARM_FUNC_START floatunsisf
+ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
+
+ mov r3, #0
+ b 1f
+
+ARM_FUNC_START floatsisf
+ARM_FUNC_ALIAS aeabi_i2f floatsisf
+
+ ands r3, r0, #0x80000000
+ do_it mi
+ rsbmi r0, r0, #0
+
+1: movs ip, r0
+ do_it eq
+ RETc(eq)
+
+ @ Add initial exponent to sign
+ orr r3, r3, #((127 + 23) << 23)
+
+ .ifnc ah, r0
+ mov ah, r0
+ .endif
+ mov al, #0
+ b 2f
+
+ FUNC_END aeabi_i2f
+ FUNC_END floatsisf
+ FUNC_END aeabi_ui2f
+ FUNC_END floatunsisf
+
+ARM_FUNC_START floatundisf
+ARM_FUNC_ALIAS aeabi_ul2f floatundisf
+
+ orrs r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+ do_it eq, t
+ mvfeqs f0, #0.0
+#else
+ do_it eq
+#endif
+ RETc(eq)
+
+ mov r3, #0
+ b 1f
+
+ARM_FUNC_START floatdisf
+ARM_FUNC_ALIAS aeabi_l2f floatdisf
+
+ orrs r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+ do_it eq, t
+ mvfeqs f0, #0.0
+#else
+ do_it eq
+#endif
+ RETc(eq)
+
+ ands r3, ah, #0x80000000 @ sign bit in r3
+ bpl 1f
+#if defined(__thumb2__)
+ negs al, al
+ sbc ah, ah, ah, lsl #1
+#else
+ rsbs al, al, #0
+ rsc ah, ah, #0
+#endif
+1:
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+ @ For hard FPA code we want to return via the tail below so that
+ @ we can return the result in f0 as well as in r0 for backwards
+ @ compatibility.
+ str lr, [sp, #-8]!
+ adr lr, LSYM(f0_ret)
+#endif
+
+ movs ip, ah
+ do_it eq, tt
+ moveq ip, al
+ moveq ah, al
+ moveq al, #0
+
+ @ Add initial exponent to sign
+ orr r3, r3, #((127 + 23 + 32) << 23)
+ do_it eq
+ subeq r3, r3, #(32 << 23)
+2: sub r3, r3, #(1 << 23)
+
+#if __ARM_ARCH__ < 5
+
+ mov r2, #23
+ cmp ip, #(1 << 16)
+ do_it hs, t
+ movhs ip, ip, lsr #16
+ subhs r2, r2, #16
+ cmp ip, #(1 << 8)
+ do_it hs, t
+ movhs ip, ip, lsr #8
+ subhs r2, r2, #8
+ cmp ip, #(1 << 4)
+ do_it hs, t
+ movhs ip, ip, lsr #4
+ subhs r2, r2, #4
+ cmp ip, #(1 << 2)
+ do_it hs, e
+ subhs r2, r2, #2
+ sublo r2, r2, ip, lsr #1
+ subs r2, r2, ip, lsr #3
+
+#else
+
+ clz r2, ip
+ subs r2, r2, #8
+
+#endif
+
+ sub r3, r3, r2, lsl #23
+ blt 3f
+
+ shiftop add r3 r3 ah lsl r2 ip
+ shift1 lsl, ip, al, r2
+ rsb r2, r2, #32
+ cmp ip, #0x80000000
+ shiftop adc r0 r3 al lsr r2 r2
+ do_it eq
+ biceq r0, r0, #1
+ RET
+
+3: add r2, r2, #32
+ shift1 lsl, ip, ah, r2
+ rsb r2, r2, #32
+ orrs al, al, ip, lsl #1
+ shiftop adc r0 r3 ah lsr r2 r2
+ do_it eq
+ biceq r0, r0, ip, lsr #31
+ RET
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+
+LSYM(f0_ret):
+ str r0, [sp, #-4]!
+ ldfs f0, [sp], #4
+ RETLDM
+
+#endif
+
+ FUNC_END floatdisf
+ FUNC_END aeabi_l2f
+ FUNC_END floatundisf
+ FUNC_END aeabi_ul2f
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_arm_muldivsf3
+
+ARM_FUNC_START mulsf3
+ARM_FUNC_ALIAS aeabi_fmul mulsf3
+
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ ands r2, ip, r0, lsr #23
+ do_it ne, tt
+ COND(and,s,ne) r3, ip, r1, lsr #23
+ teqne r2, ip
+ teqne r3, ip
+ beq LSYM(Lml_s)
+LSYM(Lml_x):
+
+ @ Add exponents together
+ add r2, r2, r3
+
+ @ Determine final sign.
+ eor ip, r0, r1
+
+ @ Convert mantissa to unsigned integer.
+ @ If power of two, branch to a separate path.
+ @ Make up for final alignment.
+ movs r0, r0, lsl #9
+ do_it ne
+ COND(mov,s,ne) r1, r1, lsl #9
+ beq LSYM(Lml_1)
+ mov r3, #0x08000000
+ orr r0, r3, r0, lsr #5
+ orr r1, r3, r1, lsr #5
+
+#if __ARM_ARCH__ < 4
+
+ @ Put sign bit in r3, which will be restored into r0 later.
+ and r3, ip, #0x80000000
+
+ @ Well, no way to make it shorter without the umull instruction.
+ do_push {r3, r4, r5}
+ mov r4, r0, lsr #16
+ mov r5, r1, lsr #16
+ bic r0, r0, r4, lsl #16
+ bic r1, r1, r5, lsl #16
+ mul ip, r4, r5
+ mul r3, r0, r1
+ mul r0, r5, r0
+ mla r0, r4, r1, r0
+ adds r3, r3, r0, lsl #16
+ adc r1, ip, r0, lsr #16
+ do_pop {r0, r4, r5}
+
+#else
+
+ @ The actual multiplication.
+ umull r3, r1, r0, r1
+
+ @ Put final sign in r0.
+ and r0, ip, #0x80000000
+
+#endif
+
+ @ Adjust result upon the MSB position.
+ cmp r1, #(1 << 23)
+ do_it cc, tt
+ movcc r1, r1, lsl #1
+ orrcc r1, r1, r3, lsr #31
+ movcc r3, r3, lsl #1
+
+ @ Add sign to result.
+ orr r0, r0, r1
+
+ @ Apply exponent bias, check for under/overflow.
+ sbc r2, r2, #127
+ cmp r2, #(254 - 1)
+ bhi LSYM(Lml_u)
+
+ @ Round the result, merge final exponent.
+ cmp r3, #0x80000000
+ adc r0, r0, r2, lsl #23
+ do_it eq
+ biceq r0, r0, #1
+ RET
+
+ @ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+ teq r0, #0
+ and ip, ip, #0x80000000
+ do_it eq
+ moveq r1, r1, lsl #9
+ orr r0, ip, r0, lsr #9
+ orr r0, r0, r1, lsr #9
+ subs r2, r2, #127
+ do_it gt, tt
+ COND(rsb,s,gt) r3, r2, #255
+ orrgt r0, r0, r2, lsl #23
+ RETc(gt)
+
+ @ Under/overflow: fix things up for the code below.
+ orr r0, r0, #0x00800000
+ mov r3, #0
+ subs r2, r2, #1
+
+LSYM(Lml_u):
+ @ Overflow?
+ bgt LSYM(Lml_o)
+
+ @ Check if denormalized result is possible, otherwise return signed 0.
+ cmn r2, #(24 + 1)
+ do_it le, t
+ bicle r0, r0, #0x7fffffff
+ RETc(le)
+
+ @ Shift value right, round, etc.
+ rsb r2, r2, #0
+ movs r1, r0, lsl #1
+ shift1 lsr, r1, r1, r2
+ rsb r2, r2, #32
+ shift1 lsl, ip, r0, r2
+ movs r0, r1, rrx
+ adc r0, r0, #0
+ orrs r3, r3, ip, lsl #1
+ do_it eq
+ biceq r0, r0, ip, lsr #31
+ RET
+
+ @ One or both arguments are denormalized.
+ @ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+ teq r2, #0
+ and ip, r0, #0x80000000
+1: do_it eq, tt
+ moveq r0, r0, lsl #1
+ tsteq r0, #0x00800000
+ subeq r2, r2, #1
+ beq 1b
+ orr r0, r0, ip
+ teq r3, #0
+ and ip, r1, #0x80000000
+2: do_it eq, tt
+ moveq r1, r1, lsl #1
+ tsteq r1, #0x00800000
+ subeq r3, r3, #1
+ beq 2b
+ orr r1, r1, ip
+ b LSYM(Lml_x)
+
+LSYM(Lml_s):
+ @ Isolate the INF and NAN cases away
+ and r3, ip, r1, lsr #23
+ teq r2, ip
+ do_it ne
+ teqne r3, ip
+ beq 1f
+
+ @ Here, one or more arguments are either denormalized or zero.
+ bics ip, r0, #0x80000000
+ do_it ne
+ COND(bic,s,ne) ip, r1, #0x80000000
+ bne LSYM(Lml_d)
+
+ @ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+ eor r0, r0, r1
+ bic r0, r0, #0x7fffffff
+ RET
+
+1: @ One or both args are INF or NAN.
+ teq r0, #0x0
+ do_it ne, ett
+ teqne r0, #0x80000000
+ moveq r0, r1
+ teqne r1, #0x0
+ teqne r1, #0x80000000
+ beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ teq r2, ip
+ bne 1f
+ movs r2, r0, lsl #9
+ bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+1: teq r3, ip
+ bne LSYM(Lml_i)
+ movs r3, r1, lsl #9
+ do_it ne
+ movne r0, r1
+ bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+
+ @ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+ eor r0, r0, r1
+
+ @ Overflow: return INF (sign already in r0).
+LSYM(Lml_o):
+ and r0, r0, #0x80000000
+ orr r0, r0, #0x7f000000
+ orr r0, r0, #0x00800000
+ RET
+
+ @ Return a quiet NAN.
+LSYM(Lml_n):
+ orr r0, r0, #0x7f000000
+ orr r0, r0, #0x00c00000
+ RET
+
+ FUNC_END aeabi_fmul
+ FUNC_END mulsf3
+
+ARM_FUNC_START divsf3
+ARM_FUNC_ALIAS aeabi_fdiv divsf3
+
+ @ Mask out exponents, trap any zero/denormal/INF/NAN.
+ mov ip, #0xff
+ ands r2, ip, r0, lsr #23
+ do_it ne, tt
+ COND(and,s,ne) r3, ip, r1, lsr #23
+ teqne r2, ip
+ teqne r3, ip
+ beq LSYM(Ldv_s)
+LSYM(Ldv_x):
+
+ @ Substract divisor exponent from dividend''s
+ sub r2, r2, r3
+
+ @ Preserve final sign into ip.
+ eor ip, r0, r1
+
+ @ Convert mantissa to unsigned integer.
+ @ Dividend -> r3, divisor -> r1.
+ movs r1, r1, lsl #9
+ mov r0, r0, lsl #9
+ beq LSYM(Ldv_1)
+ mov r3, #0x10000000
+ orr r1, r3, r1, lsr #4
+ orr r3, r3, r0, lsr #4
+
+ @ Initialize r0 (result) with final sign bit.
+ and r0, ip, #0x80000000
+
+ @ Ensure result will land to known bit position.
+ @ Apply exponent bias accordingly.
+ cmp r3, r1
+ do_it cc
+ movcc r3, r3, lsl #1
+ adc r2, r2, #(127 - 2)
+
+ @ The actual division loop.
+ mov ip, #0x00800000
+1: cmp r3, r1
+ do_it cs, t
+ subcs r3, r3, r1
+ orrcs r0, r0, ip
+ cmp r3, r1, lsr #1
+ do_it cs, t
+ subcs r3, r3, r1, lsr #1
+ orrcs r0, r0, ip, lsr #1
+ cmp r3, r1, lsr #2
+ do_it cs, t
+ subcs r3, r3, r1, lsr #2
+ orrcs r0, r0, ip, lsr #2
+ cmp r3, r1, lsr #3
+ do_it cs, t
+ subcs r3, r3, r1, lsr #3
+ orrcs r0, r0, ip, lsr #3
+ movs r3, r3, lsl #4
+ do_it ne
+ COND(mov,s,ne) ip, ip, lsr #4
+ bne 1b
+
+ @ Check exponent for under/overflow.
+ cmp r2, #(254 - 1)
+ bhi LSYM(Lml_u)
+
+ @ Round the result, merge final exponent.
+ cmp r3, r1
+ adc r0, r0, r2, lsl #23
+ do_it eq
+ biceq r0, r0, #1
+ RET
+
+ @ Division by 0x1p*: let''s shortcut a lot of code.
+LSYM(Ldv_1):
+ and ip, ip, #0x80000000
+ orr r0, ip, r0, lsr #9
+ adds r2, r2, #127
+ do_it gt, tt
+ COND(rsb,s,gt) r3, r2, #255
+ orrgt r0, r0, r2, lsl #23
+ RETc(gt)
+
+ orr r0, r0, #0x00800000
+ mov r3, #0
+ subs r2, r2, #1
+ b LSYM(Lml_u)
+
+ @ One or both arguments are denormalized.
+ @ Scale them leftwards and preserve sign bit.
+LSYM(Ldv_d):
+ teq r2, #0
+ and ip, r0, #0x80000000
+1: do_it eq, tt
+ moveq r0, r0, lsl #1
+ tsteq r0, #0x00800000
+ subeq r2, r2, #1
+ beq 1b
+ orr r0, r0, ip
+ teq r3, #0
+ and ip, r1, #0x80000000
+2: do_it eq, tt
+ moveq r1, r1, lsl #1
+ tsteq r1, #0x00800000
+ subeq r3, r3, #1
+ beq 2b
+ orr r1, r1, ip
+ b LSYM(Ldv_x)
+
+ @ One or both arguments are either INF, NAN, zero or denormalized.
+LSYM(Ldv_s):
+ and r3, ip, r1, lsr #23
+ teq r2, ip
+ bne 1f
+ movs r2, r0, lsl #9
+ bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ teq r3, ip
+ bne LSYM(Lml_i) @ INF / <anything> -> INF
+ mov r0, r1
+ b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
+1: teq r3, ip
+ bne 2f
+ movs r3, r1, lsl #9
+ beq LSYM(Lml_z) @ <anything> / INF -> 0
+ mov r0, r1
+ b LSYM(Lml_n) @ <anything> / NAN -> NAN
+2: @ If both are nonzero, we need to normalize and resume above.
+ bics ip, r0, #0x80000000
+ do_it ne
+ COND(bic,s,ne) ip, r1, #0x80000000
+ bne LSYM(Ldv_d)
+ @ One or both arguments are zero.
+ bics r2, r0, #0x80000000
+ bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ bics r3, r1, #0x80000000
+ bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
+ b LSYM(Lml_n) @ 0 / 0 -> NAN
+
+ FUNC_END aeabi_fdiv
+ FUNC_END divsf3
+
+#endif /* L_muldivsf3 */
+
+#ifdef L_arm_cmpsf2
+
+ @ The return value in r0 is
+ @
+ @ 0 if the operands are equal
+ @ 1 if the first operand is greater than the second, or
+ @ the operands are unordered and the operation is
+ @ CMP, LT, LE, NE, or EQ.
+ @ -1 if the first operand is less than the second, or
+ @ the operands are unordered and the operation is GT
+ @ or GE.
+ @
+ @ The Z flag will be set iff the operands are equal.
+ @
+ @ The following registers are clobbered by this function:
+ @ ip, r0, r1, r2, r3
+
+ARM_FUNC_START gtsf2
+ARM_FUNC_ALIAS gesf2 gtsf2
+ mov ip, #-1
+ b 1f
+
+ARM_FUNC_START ltsf2
+ARM_FUNC_ALIAS lesf2 ltsf2
+ mov ip, #1
+ b 1f
+
+ARM_FUNC_START cmpsf2
+ARM_FUNC_ALIAS nesf2 cmpsf2
+ARM_FUNC_ALIAS eqsf2 cmpsf2
+ mov ip, #1 @ how should we specify unordered here?
+
+1: str ip, [sp, #-4]!
+
+ @ Trap any INF/NAN first.
+ mov r2, r0, lsl #1
+ mov r3, r1, lsl #1
+ mvns ip, r2, asr #24
+ do_it ne
+ COND(mvn,s,ne) ip, r3, asr #24
+ beq 3f
+
+ @ Compare values.
+ @ Note that 0.0 is equal to -0.0.
+2: add sp, sp, #4
+ orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
+ do_it ne
+ teqne r0, r1 @ if not 0 compare sign
+ do_it pl
+ COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0
+
+ @ Result:
+ do_it hi
+ movhi r0, r1, asr #31
+ do_it lo
+ mvnlo r0, r1, asr #31
+ do_it ne
+ orrne r0, r0, #1
+ RET
+
+ @ Look for a NAN.
+3: mvns ip, r2, asr #24
+ bne 4f
+ movs ip, r0, lsl #9
+ bne 5f @ r0 is NAN
+4: mvns ip, r3, asr #24
+ bne 2b
+ movs ip, r1, lsl #9
+ beq 2b @ r1 is not NAN
+5: ldr r0, [sp], #4 @ return unordered code.
+ RET
+
+ FUNC_END gesf2
+ FUNC_END gtsf2
+ FUNC_END lesf2
+ FUNC_END ltsf2
+ FUNC_END nesf2
+ FUNC_END eqsf2
+ FUNC_END cmpsf2
+
+ARM_FUNC_START aeabi_cfrcmple
+
+ mov ip, r0
+ mov r0, r1
+ mov r1, ip
+ b 6f
+
+ARM_FUNC_START aeabi_cfcmpeq
+ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+ @ The status-returning routines are required to preserve all
+ @ registers except ip, lr, and cpsr.
+6: do_push {r0, r1, r2, r3, lr}
+ ARM_CALL cmpsf2
+ @ Set the Z flag correctly, and the C flag unconditionally.
+ cmp r0, #0
+ @ Clear the C flag if the return value was -1, indicating
+ @ that the first operand was smaller than the second.
+ do_it mi
+ cmnmi r0, #0
+ RETLDM "r0, r1, r2, r3"
+
+ FUNC_END aeabi_cfcmple
+ FUNC_END aeabi_cfcmpeq
+ FUNC_END aeabi_cfrcmple
+
+ARM_FUNC_START aeabi_fcmpeq
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cfcmple
+ do_it eq, e
+ moveq r0, #1 @ Equal to.
+ movne r0, #0 @ Less than, greater than, or unordered.
+ RETLDM
+
+ FUNC_END aeabi_fcmpeq
+
+ARM_FUNC_START aeabi_fcmplt
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cfcmple
+ do_it cc, e
+ movcc r0, #1 @ Less than.
+ movcs r0, #0 @ Equal to, greater than, or unordered.
+ RETLDM
+
+ FUNC_END aeabi_fcmplt
+
+ARM_FUNC_START aeabi_fcmple
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cfcmple
+ do_it ls, e
+ movls r0, #1 @ Less than or equal to.
+ movhi r0, #0 @ Greater than or unordered.
+ RETLDM
+
+ FUNC_END aeabi_fcmple
+
+ARM_FUNC_START aeabi_fcmpge
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cfrcmple
+ do_it ls, e
+ movls r0, #1 @ Operand 2 is less than or equal to operand 1.
+ movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
+ RETLDM
+
+ FUNC_END aeabi_fcmpge
+
+ARM_FUNC_START aeabi_fcmpgt
+
+ str lr, [sp, #-8]!
+ ARM_CALL aeabi_cfrcmple
+ do_it cc, e
+ movcc r0, #1 @ Operand 2 is less than operand 1.
+ movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
+ @ or they are unordered.
+ RETLDM
+
+ FUNC_END aeabi_fcmpgt
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_arm_unordsf2
+
+ARM_FUNC_START unordsf2
+ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
+
+ mov r2, r0, lsl #1
+ mov r3, r1, lsl #1
+ mvns ip, r2, asr #24
+ bne 1f
+ movs ip, r0, lsl #9
+ bne 3f @ r0 is NAN
+1: mvns ip, r3, asr #24
+ bne 2f
+ movs ip, r1, lsl #9
+ bne 3f @ r1 is NAN
+2: mov r0, #0 @ arguments are ordered.
+ RET
+3: mov r0, #1 @ arguments are unordered.
+ RET
+
+ FUNC_END aeabi_fcmpun
+ FUNC_END unordsf2
+
+#endif /* L_unordsf2 */
+
+#ifdef L_arm_fixsfsi
+
+ARM_FUNC_START fixsfsi
+ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
+
+ @ check exponent range.
+ mov r2, r0, lsl #1
+ cmp r2, #(127 << 24)
+ bcc 1f @ value is too small
+ mov r3, #(127 + 31)
+ subs r2, r3, r2, lsr #24
+ bls 2f @ value is too large
+
+ @ scale value
+ mov r3, r0, lsl #8
+ orr r3, r3, #0x80000000
+ tst r0, #0x80000000 @ the sign bit
+ shift1 lsr, r0, r3, r2
+ do_it ne
+ rsbne r0, r0, #0
+ RET
+
+1: mov r0, #0
+ RET
+
+2: cmp r2, #(127 + 31 - 0xff)
+ bne 3f
+ movs r2, r0, lsl #9
+ bne 4f @ r0 is NAN.
+3: ands r0, r0, #0x80000000 @ the sign bit
+ do_it eq
+ moveq r0, #0x7fffffff @ the maximum signed positive si
+ RET
+
+4: mov r0, #0 @ What should we convert NAN to?
+ RET
+
+ FUNC_END aeabi_f2iz
+ FUNC_END fixsfsi
+
+#endif /* L_fixsfsi */
+
+#ifdef L_arm_fixunssfsi
+
+ARM_FUNC_START fixunssfsi
+ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
+
+ @ check exponent range.
+ movs r2, r0, lsl #1
+ bcs 1f @ value is negative
+ cmp r2, #(127 << 24)
+ bcc 1f @ value is too small
+ mov r3, #(127 + 31)
+ subs r2, r3, r2, lsr #24
+ bmi 2f @ value is too large
+
+ @ scale the value
+ mov r3, r0, lsl #8
+ orr r3, r3, #0x80000000
+ shift1 lsr, r0, r3, r2
+ RET
+
+1: mov r0, #0
+ RET
+
+2: cmp r2, #(127 + 31 - 0xff)
+ bne 3f
+ movs r2, r0, lsl #9
+ bne 4f @ r0 is NAN.
+3: mov r0, #0xffffffff @ maximum unsigned si
+ RET
+
+4: mov r0, #0 @ What should we convert NAN to?
+ RET
+
+ FUNC_END aeabi_f2uiz
+ FUNC_END fixunssfsi
+
+#endif /* L_fixunssfsi */
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
new file mode 100644
index 00000000000..2e76c01df4b
--- /dev/null
+++ b/libgcc/config/arm/lib1funcs.S
@@ -0,0 +1,1829 @@
+@ libgcc routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008,
+ 2009, 2010 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* An executable stack is *not* required for these functions. */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif /* __ELF__ and __linux__ */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file. */
+ /* Tag_ABI_align_needed: This code does not require 8-byte
+ alignment from the caller. */
+ /* .eabi_attribute 24, 0 -- default setting. */
+ /* Tag_ABI_align_preserved: This code preserves 8-byte
+ alignment in any callee. */
+ .eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+/* ------------------------------------------------------------------------ */
+
+/* We need to know what prefix to add to function names. */
+
+#ifndef __USER_LABEL_PREFIX__
+#error __USER_LABEL_PREFIX__ not defined
+#endif
+
+/* ANSI concatenation macros. */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels. */
+
+#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+#ifdef __ELF__
+#ifdef __thumb__
+#define __PLT__ /* Not supported in Thumb assembler (for now). */
+#elif defined __vxworks && !defined __PIC__
+#define __PLT__ /* Not supported by the kernel loader. */
+#else
+#define __PLT__ (PLT)
+#endif
+#define TYPE(x) .type SYM(x),function
+#define SIZE(x) .size SYM(x), . - SYM(x)
+#define LSYM(x) .x
+#else
+#define __PLT__
+#define TYPE(x)
+#define SIZE(x)
+#define LSYM(x) x
+#endif
+
+/* Function end macros. Variants for interworking. */
+
+#if defined(__ARM_ARCH_2__)
+# define __ARM_ARCH__ 2
+#endif
+
+#if defined(__ARM_ARCH_3__)
+# define __ARM_ARCH__ 3
+#endif
+
+#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
+ || defined(__ARM_ARCH_4T__)
+/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
+ long multiply instructions. That includes v3M. */
+# define __ARM_ARCH__ 4
+#endif
+
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+ || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+ || defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH__ 5
+#endif
+
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+ || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+ || defined(__ARM_ARCH_6M__)
+# define __ARM_ARCH__ 6
+#endif
+
+#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+ || defined(__ARM_ARCH_7EM__)
+# define __ARM_ARCH__ 7
+#endif
+
+#ifndef __ARM_ARCH__
+#error Unable to determine architecture.
+#endif
+
+/* There are times when we might prefer Thumb1 code even if ARM code is
+ permitted, for example, the code might be smaller, or there might be
+ interworking problems with switching to ARM state if interworking is
+ disabled. */
+#if (defined(__thumb__) \
+ && !defined(__thumb2__) \
+ && (!defined(__THUMB_INTERWORK__) \
+ || defined (__OPTIMIZE_SIZE__) \
+ || defined(__ARM_ARCH_6M__)))
+# define __prefer_thumb__
+#endif
+
+/* How to return from a function call depends on the architecture variant. */
+
+#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
+
+# define RET bx lr
+# define RETc(x) bx##x lr
+
+/* Special precautions for interworking on armv4t. */
+# if (__ARM_ARCH__ == 4)
+
+/* Always use bx, not ldr pc. */
+# if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
+# define __INTERWORKING__
+# endif /* __THUMB__ || __THUMB_INTERWORK__ */
+
+/* Include thumb stub before arm mode code. */
+# if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+# define __INTERWORKING_STUBS__
+# endif /* __thumb__ && !__THUMB_INTERWORK__ */
+
+#endif /* __ARM_ARCH == 4 */
+
+#else
+
+# define RET mov pc, lr
+# define RETc(x) mov##x pc, lr
+
+#endif
+
+.macro cfi_pop advance, reg, cfa_offset
+#ifdef __ELF__
+ .pushsection .debug_frame
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .4byte \advance
+ .byte (0xc0 | \reg) /* DW_CFA_restore */
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .uleb128 \cfa_offset
+ .popsection
+#endif
+.endm
+.macro cfi_push advance, reg, offset, cfa_offset
+#ifdef __ELF__
+ .pushsection .debug_frame
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .4byte \advance
+ .byte (0x80 | \reg) /* DW_CFA_offset */
+ .uleb128 (\offset / -4)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .uleb128 \cfa_offset
+ .popsection
+#endif
+.endm
+.macro cfi_start start_label, end_label
+#ifdef __ELF__
+ .pushsection .debug_frame
+LSYM(Lstart_frame):
+ .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
+LSYM(Lstart_cie):
+ .4byte 0xffffffff @ CIE Identifier Tag
+ .byte 0x1 @ CIE Version
+ .ascii "\0" @ CIE Augmentation
+ .uleb128 0x1 @ CIE Code Alignment Factor
+ .sleb128 -4 @ CIE Data Alignment Factor
+ .byte 0xe @ CIE RA Column
+ .byte 0xc @ DW_CFA_def_cfa
+ .uleb128 0xd
+ .uleb128 0x0
+
+ .align 2
+LSYM(Lend_cie):
+ .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length
+LSYM(Lstart_fde):
+ .4byte LSYM(Lstart_frame) @ FDE CIE offset
+ .4byte \start_label @ FDE initial location
+ .4byte \end_label-\start_label @ FDE address range
+ .popsection
+#endif
+.endm
+.macro cfi_end end_label
+#ifdef __ELF__
+ .pushsection .debug_frame
+ .align 2
+LSYM(Lend_fde):
+ .popsection
+\end_label:
+#endif
+.endm
+
+/* Don't pass dirn, it's there just to get token pasting right. */
+
+.macro RETLDM regs=, cond=, unwind=, dirn=ia
+#if defined (__INTERWORKING__)
+ .ifc "\regs",""
+ ldr\cond lr, [sp], #8
+ .else
+# if defined(__thumb2__)
+ pop\cond {\regs, lr}
+# else
+ ldm\cond\dirn sp!, {\regs, lr}
+# endif
+ .endif
+ .ifnc "\unwind", ""
+ /* Mark LR as restored. */
+97: cfi_pop 97b - \unwind, 0xe, 0x0
+ .endif
+ bx\cond lr
+#else
+ /* Caller is responsible for providing IT instruction. */
+ .ifc "\regs",""
+ ldr\cond pc, [sp], #8
+ .else
+# if defined(__thumb2__)
+ pop\cond {\regs, pc}
+# else
+ ldm\cond\dirn sp!, {\regs, pc}
+# endif
+ .endif
+#endif
+.endm
+
+/* The Unified assembly syntax allows the same code to be assembled for both
+ ARM and Thumb-2. However this is only supported by recent gas, so define
+ a set of macros to allow ARM code on older assemblers. */
+#if defined(__thumb2__)
+.macro do_it cond, suffix=""
+ it\suffix \cond
+.endm
+.macro shift1 op, arg0, arg1, arg2
+ \op \arg0, \arg1, \arg2
+.endm
+#define do_push push
+#define do_pop pop
+#define COND(op1, op2, cond) op1 ## op2 ## cond
+/* Perform an arithmetic operation with a variable shift operand. This
+ requires two instructions and a scratch register on Thumb-2. */
+.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
+ \shiftop \tmp, \src2, \shiftreg
+ \name \dest, \src1, \tmp
+.endm
+#else
+.macro do_it cond, suffix=""
+.endm
+.macro shift1 op, arg0, arg1, arg2
+ mov \arg0, \arg1, \op \arg2
+.endm
+#define do_push stmfd sp!,
+#define do_pop ldmfd sp!,
+#define COND(op1, op2, cond) op1 ## cond ## op2
+.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
+ \name \dest, \src1, \src2, \shiftop \shiftreg
+.endm
+#endif
+
+#ifdef __ARM_EABI__
+.macro ARM_LDIV0 name signed
+ cmp r0, #0
+ .ifc \signed, unsigned
+ movne r0, #0xffffffff
+ .else
+ movgt r0, #0x7fffffff
+ movlt r0, #0x80000000
+ .endif
+ b SYM (__aeabi_idiv0) __PLT__
+.endm
+#else
+.macro ARM_LDIV0 name signed
+ str lr, [sp, #-8]!
+98: cfi_push 98b - __\name, 0xe, -0x8, 0x8
+ bl SYM (__div0) __PLT__
+ mov r0, #0 @ About as wrong as it could be.
+ RETLDM unwind=98b
+.endm
+#endif
+
+
+#ifdef __ARM_EABI__
+.macro THUMB_LDIV0 name signed
+#if defined(__ARM_ARCH_6M__)
+ .ifc \signed, unsigned
+ cmp r0, #0
+ beq 1f
+ mov r0, #0
+ mvn r0, r0 @ 0xffffffff
+1:
+ .else
+ cmp r0, #0
+ beq 2f
+ blt 3f
+ mov r0, #0
+ mvn r0, r0
+ lsr r0, r0, #1 @ 0x7fffffff
+ b 2f
+3: mov r0, #0x80
+ lsl r0, r0, #24 @ 0x80000000
+2:
+ .endif
+ push {r0, r1, r2}
+ ldr r0, 4f
+ adr r1, 4f
+ add r0, r1
+ str r0, [sp, #8]
+ @ We know we are not on armv4t, so pop pc is safe.
+ pop {r0, r1, pc}
+ .align 2
+4:
+ .word __aeabi_idiv0 - 4b
+#elif defined(__thumb2__)
+ .syntax unified
+ .ifc \signed, unsigned
+ cbz r0, 1f
+ mov r0, #0xffffffff
+1:
+ .else
+ cmp r0, #0
+ do_it gt
+ movgt r0, #0x7fffffff
+ do_it lt
+ movlt r0, #0x80000000
+ .endif
+ b.w SYM(__aeabi_idiv0) __PLT__
+#else
+ .align 2
+ bx pc
+ nop
+ .arm
+ cmp r0, #0
+ .ifc \signed, unsigned
+ movne r0, #0xffffffff
+ .else
+ movgt r0, #0x7fffffff
+ movlt r0, #0x80000000
+ .endif
+ b SYM(__aeabi_idiv0) __PLT__
+ .thumb
+#endif
+.endm
+#else
+.macro THUMB_LDIV0 name signed
+ push { r1, lr }
+98: cfi_push 98b - __\name, 0xe, -0x4, 0x8
+ bl SYM (__div0)
+ mov r0, #0 @ About as wrong as it could be.
+#if defined (__INTERWORKING__)
+ pop { r1, r2 }
+ bx r2
+#else
+ pop { r1, pc }
+#endif
+.endm
+#endif
+
+.macro FUNC_END name
+ SIZE (__\name)
+.endm
+
+.macro DIV_FUNC_END name signed
+ cfi_start __\name, LSYM(Lend_div0)
+LSYM(Ldiv0):
+#ifdef __thumb__
+ THUMB_LDIV0 \name \signed
+#else
+ ARM_LDIV0 \name \signed
+#endif
+ cfi_end LSYM(Lend_div0)
+ FUNC_END \name
+.endm
+
+.macro THUMB_FUNC_START name
+ .globl SYM (\name)
+ TYPE (\name)
+ .thumb_func
+SYM (\name):
+.endm
+
+/* Function start macros. Variants for ARM and Thumb. */
+
+#ifdef __thumb__
+#define THUMB_FUNC .thumb_func
+#define THUMB_CODE .force_thumb
+# if defined(__thumb2__)
+#define THUMB_SYNTAX .syntax divided
+# else
+#define THUMB_SYNTAX
+# endif
+#else
+#define THUMB_FUNC
+#define THUMB_CODE
+#define THUMB_SYNTAX
+#endif
+
+.macro FUNC_START name
+ .text
+ .globl SYM (__\name)
+ TYPE (__\name)
+ .align 0
+ THUMB_CODE
+ THUMB_FUNC
+ THUMB_SYNTAX
+SYM (__\name):
+.endm
+
+/* Special function that will always be coded in ARM assembly, even if
+ in Thumb-only compilation. */
+
+#if defined(__thumb2__)
+
+/* For Thumb-2 we build everything in thumb mode. */
+.macro ARM_FUNC_START name
+ FUNC_START \name
+ .syntax unified
+.endm
+#define EQUIV .thumb_set
+.macro ARM_CALL name
+ bl __\name
+.endm
+
+#elif defined(__INTERWORKING_STUBS__)
+
+.macro ARM_FUNC_START name
+ FUNC_START \name
+ bx pc
+ nop
+ .arm
+/* A hook to tell gdb that we've switched to ARM mode. Also used to call
+ directly from other local arm routines. */
+_L__\name:
+.endm
+#define EQUIV .thumb_set
+/* Branch directly to a function declared with ARM_FUNC_START.
+ Must be called in arm mode. */
+.macro ARM_CALL name
+ bl _L__\name
+.endm
+
+#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
+
+#ifdef __ARM_ARCH_6M__
+#define EQUIV .thumb_set
+#else
+.macro ARM_FUNC_START name
+ .text
+ .globl SYM (__\name)
+ TYPE (__\name)
+ .align 0
+ .arm
+SYM (__\name):
+.endm
+#define EQUIV .set
+.macro ARM_CALL name
+ bl __\name
+.endm
+#endif
+
+#endif
+
+.macro FUNC_ALIAS new old
+ .globl SYM (__\new)
+#if defined (__thumb__)
+ .thumb_set SYM (__\new), SYM (__\old)
+#else
+ .set SYM (__\new), SYM (__\old)
+#endif
+.endm
+
+#ifndef __ARM_ARCH_6M__
+.macro ARM_FUNC_ALIAS new old
+ .globl SYM (__\new)
+ EQUIV SYM (__\new), SYM (__\old)
+#if defined(__INTERWORKING_STUBS__)
+ .set SYM (_L__\new), SYM (_L__\old)
+#endif
+.endm
+#endif
+
+#ifdef __ARMEB__
+#define xxh r0
+#define xxl r1
+#define yyh r2
+#define yyl r3
+#else
+#define xxh r1
+#define xxl r0
+#define yyh r3
+#define yyl r2
+#endif
+
+#ifdef __ARM_EABI__
+.macro WEAK name
+ .weak SYM (__\name)
+.endm
+#endif
+
+#ifdef __thumb__
+/* Register aliases. */
+
+work .req r4 @ XXXX is this safe ?
+dividend .req r0
+divisor .req r1
+overdone .req r2
+result .req r2
+curbit .req r3
+#endif
+#if 0
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* Bodies of the division and modulo routines. */
+/* ------------------------------------------------------------------------ */
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
+
+#if defined (__thumb2__)
+ clz \curbit, \dividend
+ clz \result, \divisor
+ sub \curbit, \result, \curbit
+ rsb \curbit, \curbit, #31
+ adr \result, 1f
+ add \curbit, \result, \curbit, lsl #4
+ mov \result, #0
+ mov pc, \curbit
+.p2align 3
+1:
+ .set shift, 32
+ .rept 32
+ .set shift, shift - 1
+ cmp.w \dividend, \divisor, lsl #shift
+ nop.n
+ adc.w \result, \result, \result
+ it cs
+ subcs.w \dividend, \dividend, \divisor, lsl #shift
+ .endr
+#else
+ clz \curbit, \dividend
+ clz \result, \divisor
+ sub \curbit, \result, \curbit
+ rsbs \curbit, \curbit, #31
+ addne \curbit, \curbit, \curbit, lsl #1
+ mov \result, #0
+ addne pc, pc, \curbit, lsl #2
+ nop
+ .set shift, 32
+ .rept 32
+ .set shift, shift - 1
+ cmp \dividend, \divisor, lsl #shift
+ adc \result, \result, \result
+ subcs \dividend, \dividend, \divisor, lsl #shift
+ .endr
+#endif
+
+#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+#if __ARM_ARCH__ >= 5
+
+ clz \curbit, \divisor
+ clz \result, \dividend
+ sub \result, \curbit, \result
+ mov \curbit, #1
+ mov \divisor, \divisor, lsl \result
+ mov \curbit, \curbit, lsl \result
+ mov \result, #0
+
+#else /* __ARM_ARCH__ < 5 */
+
+ @ Initially shift the divisor left 3 bits if possible,
+ @ set curbit accordingly. This allows for curbit to be located
+ @ at the left end of each 4-bit nibbles in the division loop
+ @ to save one loop in most cases.
+ tst \divisor, #0xe0000000
+ moveq \divisor, \divisor, lsl #3
+ moveq \curbit, #8
+ movne \curbit, #1
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ movlo \curbit, \curbit, lsl #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ movlo \curbit, \curbit, lsl #1
+ blo 1b
+
+ mov \result, #0
+
+#endif /* __ARM_ARCH__ < 5 */
+
+ @ Division loop
+1: cmp \dividend, \divisor
+ do_it hs, t
+ subhs \dividend, \dividend, \divisor
+ orrhs \result, \result, \curbit
+ cmp \dividend, \divisor, lsr #1
+ do_it hs, t
+ subhs \dividend, \dividend, \divisor, lsr #1
+ orrhs \result, \result, \curbit, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ do_it hs, t
+ subhs \dividend, \dividend, \divisor, lsr #2
+ orrhs \result, \result, \curbit, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ do_it hs, t
+ subhs \dividend, \dividend, \divisor, lsr #3
+ orrhs \result, \result, \curbit, lsr #3
+ cmp \dividend, #0 @ Early termination?
+ do_it ne, t
+ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
+ movne \divisor, \divisor, lsr #4
+ bne 1b
+
+#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ rsb \order, \order, #31
+
+#else
+
+ cmp \divisor, #(1 << 16)
+ movhs \divisor, \divisor, lsr #16
+ movhs \order, #16
+ movlo \order, #0
+
+ cmp \divisor, #(1 << 8)
+ movhs \divisor, \divisor, lsr #8
+ addhs \order, \order, #8
+
+ cmp \divisor, #(1 << 4)
+ movhs \divisor, \divisor, lsr #4
+ addhs \order, \order, #4
+
+ cmp \divisor, #(1 << 2)
+ addhi \order, \order, #3
+ addls \order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
+
+ clz \order, \divisor
+ clz \spare, \dividend
+ sub \order, \order, \spare
+ rsbs \order, \order, #31
+ addne pc, pc, \order, lsl #3
+ nop
+ .set shift, 32
+ .rept 32
+ .set shift, shift - 1
+ cmp \dividend, \divisor, lsl #shift
+ subcs \dividend, \dividend, \divisor, lsl #shift
+ .endr
+
+#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+#if __ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ clz \spare, \dividend
+ sub \order, \order, \spare
+ mov \divisor, \divisor, lsl \order
+
+#else /* __ARM_ARCH__ < 5 */
+
+ mov \order, #0
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ addlo \order, \order, #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ addlo \order, \order, #1
+ blo 1b
+
+#endif /* __ARM_ARCH__ < 5 */
+
+ @ Perform all needed substractions to keep only the reminder.
+ @ Do comparisons in batch of 4 first.
+ subs \order, \order, #3 @ yes, 3 is intended here
+ blt 2f
+
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ cmp \dividend, #1
+ mov \divisor, \divisor, lsr #4
+ subges \order, \order, #4
+ bge 1b
+
+ tst \order, #3
+ teqne \dividend, #0
+ beq 5f
+
+ @ Either 1, 2 or 3 comparison/substractions are left.
+2: cmn \order, #2
+ blt 4f
+ beq 3f
+ cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+3: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+4: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+5:
+
+#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro THUMB_DIV_MOD_BODY modulo
+ @ Load the constant 0x10000000 into our work register.
+ mov work, #1
+ lsl work, #28
+LSYM(Loop1):
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, work
+ bhs LSYM(Lbignum)
+ cmp divisor, dividend
+ bhs LSYM(Lbignum)
+ lsl divisor, #4
+ lsl curbit, #4
+ b LSYM(Loop1)
+LSYM(Lbignum):
+ @ Set work to 0x80000000
+ lsl work, #3
+LSYM(Loop2):
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, work
+ bhs LSYM(Loop3)
+ cmp divisor, dividend
+ bhs LSYM(Loop3)
+ lsl divisor, #1
+ lsl curbit, #1
+ b LSYM(Loop2)
+LSYM(Loop3):
+ @ Test for possible subtractions ...
+ .if \modulo
+ @ ... On the final pass, this may subtract too much from the dividend,
+ @ so keep track of which subtractions are done, we can fix them up
+ @ afterwards.
+ mov overdone, #0
+ cmp dividend, divisor
+ blo LSYM(Lover1)
+ sub dividend, dividend, divisor
+LSYM(Lover1):
+ lsr work, divisor, #1
+ cmp dividend, work
+ blo LSYM(Lover2)
+ sub dividend, dividend, work
+ mov ip, curbit
+ mov work, #1
+ ror curbit, work
+ orr overdone, curbit
+ mov curbit, ip
+LSYM(Lover2):
+ lsr work, divisor, #2
+ cmp dividend, work
+ blo LSYM(Lover3)
+ sub dividend, dividend, work
+ mov ip, curbit
+ mov work, #2
+ ror curbit, work
+ orr overdone, curbit
+ mov curbit, ip
+LSYM(Lover3):
+ lsr work, divisor, #3
+ cmp dividend, work
+ blo LSYM(Lover4)
+ sub dividend, dividend, work
+ mov ip, curbit
+ mov work, #3
+ ror curbit, work
+ orr overdone, curbit
+ mov curbit, ip
+LSYM(Lover4):
+ mov ip, curbit
+ .else
+ @ ... and note which bits are done in the result. On the final pass,
+ @ this may subtract too much from the dividend, but the result will be ok,
+ @ since the "bit" will have been shifted out at the bottom.
+ cmp dividend, divisor
+ blo LSYM(Lover1)
+ sub dividend, dividend, divisor
+ orr result, result, curbit
+LSYM(Lover1):
+ lsr work, divisor, #1
+ cmp dividend, work
+ blo LSYM(Lover2)
+ sub dividend, dividend, work
+ lsr work, curbit, #1
+ orr result, work
+LSYM(Lover2):
+ lsr work, divisor, #2
+ cmp dividend, work
+ blo LSYM(Lover3)
+ sub dividend, dividend, work
+ lsr work, curbit, #2
+ orr result, work
+LSYM(Lover3):
+ lsr work, divisor, #3
+ cmp dividend, work
+ blo LSYM(Lover4)
+ sub dividend, dividend, work
+ lsr work, curbit, #3
+ orr result, work
+LSYM(Lover4):
+ .endif
+
+ cmp dividend, #0 @ Early termination?
+ beq LSYM(Lover5)
+ lsr curbit, #4 @ No, any more bits to do?
+ beq LSYM(Lover5)
+ lsr divisor, #4
+ b LSYM(Loop3)
+LSYM(Lover5):
+ .if \modulo
+ @ Any subtractions that we should not have done will be recorded in
+ @ the top three bits of "overdone". Exactly which were not needed
+ @ are governed by the position of the bit, stored in ip.
+ mov work, #0xe
+ lsl work, #28
+ and overdone, work
+ beq LSYM(Lgot_result)
+
+ @ If we terminated early, because dividend became zero, then the
+ @ bit in ip will not be in the bottom nibble, and we should not
+ @ perform the additions below. We must test for this though
+ @ (rather relying upon the TSTs to prevent the additions) since
+ @ the bit in ip could be in the top two bits which might then match
+ @ with one of the smaller RORs.
+ mov curbit, ip
+ mov work, #0x7
+ tst curbit, work
+ beq LSYM(Lgot_result)
+
+ mov curbit, ip
+ mov work, #3
+ ror curbit, work
+ tst overdone, curbit
+ beq LSYM(Lover6)
+ lsr work, divisor, #3
+ add dividend, work
+LSYM(Lover6):
+ mov curbit, ip
+ mov work, #2
+ ror curbit, work
+ tst overdone, curbit
+ beq LSYM(Lover7)
+ lsr work, divisor, #2
+ add dividend, work
+LSYM(Lover7):
+ mov curbit, ip
+ mov work, #1
+ ror curbit, work
+ tst overdone, curbit
+ beq LSYM(Lgot_result)
+ lsr work, divisor, #1
+ add dividend, work
+ .endif
+LSYM(Lgot_result):
+.endm
+/* ------------------------------------------------------------------------ */
+/* Start of the Real Functions */
+/* ------------------------------------------------------------------------ */
+#ifdef L_udivsi3
+
+#if defined(__prefer_thumb__)
+
+ FUNC_START udivsi3
+ FUNC_ALIAS aeabi_uidiv udivsi3
+
+ cmp divisor, #0
+ beq LSYM(Ldiv0)
+LSYM(udivsi3_skip_div0_test):
+ mov curbit, #1
+ mov result, #0
+
+ push { work }
+ cmp dividend, divisor
+ blo LSYM(Lgot_result)
+
+ THUMB_DIV_MOD_BODY 0
+
+ mov r0, result
+ pop { work }
+ RET
+
+#else /* ARM version/Thumb-2. */
+
+ ARM_FUNC_START udivsi3
+ ARM_FUNC_ALIAS aeabi_uidiv udivsi3
+
+ /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
+ check for division-by-zero a second time. */
+LSYM(udivsi3_skip_div0_test):
+ subs r2, r1, #1
+ do_it eq
+ RETc(eq)
+ bcc LSYM(Ldiv0)
+ cmp r0, r1
+ bls 11f
+ tst r1, r2
+ beq 12f
+
+ ARM_DIV_BODY r0, r1, r2, r3
+
+ mov r0, r2
+ RET
+
+11: do_it eq, e
+ moveq r0, #1
+ movne r0, #0
+ RET
+
+12: ARM_DIV2_ORDER r1, r2
+
+ mov r0, r0, lsr r2
+ RET
+
+#endif /* ARM version */
+
+ DIV_FUNC_END udivsi3 unsigned
+
+#if defined(__prefer_thumb__)
+FUNC_START aeabi_uidivmod
+ cmp r1, #0
+ beq LSYM(Ldiv0)
+ push {r0, r1, lr}
+ bl LSYM(udivsi3_skip_div0_test)
+ POP {r1, r2, r3}
+ mul r2, r0
+ sub r1, r1, r2
+ bx r3
+#else
+ARM_FUNC_START aeabi_uidivmod
+ cmp r1, #0
+ beq LSYM(Ldiv0)
+ stmfd sp!, { r0, r1, lr }
+ bl LSYM(udivsi3_skip_div0_test)
+ ldmfd sp!, { r1, r2, lr }
+ mul r3, r2, r0
+ sub r1, r1, r3
+ RET
+#endif
+ FUNC_END aeabi_uidivmod
+
+#endif /* L_udivsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_umodsi3
+
+ FUNC_START umodsi3
+
+#ifdef __thumb__
+
+ cmp divisor, #0
+ beq LSYM(Ldiv0)
+ mov curbit, #1
+ cmp dividend, divisor
+ bhs LSYM(Lover10)
+ RET
+
+LSYM(Lover10):
+ push { work }
+
+ THUMB_DIV_MOD_BODY 1
+
+ pop { work }
+ RET
+
+#else /* ARM version. */
+
+ subs r2, r1, #1 @ compare divisor with 1
+ bcc LSYM(Ldiv0)
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ RETc(ls)
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+ RET
+
+#endif /* ARM version. */
+
+ DIV_FUNC_END umodsi3 unsigned
+
+#endif /* L_umodsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_divsi3
+
+#if defined(__prefer_thumb__)
+
+ FUNC_START divsi3
+ FUNC_ALIAS aeabi_idiv divsi3
+
+ cmp divisor, #0
+ beq LSYM(Ldiv0)
+LSYM(divsi3_skip_div0_test):
+ push { work }
+ mov work, dividend
+ eor work, divisor @ Save the sign of the result.
+ mov ip, work
+ mov curbit, #1
+ mov result, #0
+ cmp divisor, #0
+ bpl LSYM(Lover10)
+ neg divisor, divisor @ Loops below use unsigned.
+LSYM(Lover10):
+ cmp dividend, #0
+ bpl LSYM(Lover11)
+ neg dividend, dividend
+LSYM(Lover11):
+ cmp dividend, divisor
+ blo LSYM(Lgot_result)
+
+ THUMB_DIV_MOD_BODY 0
+
+ mov r0, result
+ mov work, ip
+ cmp work, #0
+ bpl LSYM(Lover12)
+ neg r0, r0
+LSYM(Lover12):
+ pop { work }
+ RET
+
+#else /* ARM/Thumb-2 version. */
+
+ ARM_FUNC_START divsi3
+ ARM_FUNC_ALIAS aeabi_idiv divsi3
+
+ cmp r1, #0
+ beq LSYM(Ldiv0)
+LSYM(divsi3_skip_div0_test):
+ eor ip, r0, r1 @ save the sign of the result.
+ do_it mi
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ subs r2, r1, #1 @ division by 1 or -1 ?
+ beq 10f
+ movs r3, r0
+ do_it mi
+ rsbmi r3, r0, #0 @ positive dividend value
+ cmp r3, r1
+ bls 11f
+ tst r1, r2 @ divisor is power of 2 ?
+ beq 12f
+
+ ARM_DIV_BODY r3, r1, r0, r2
+
+ cmp ip, #0
+ do_it mi
+ rsbmi r0, r0, #0
+ RET
+
+10: teq ip, r0 @ same sign ?
+ do_it mi
+ rsbmi r0, r0, #0
+ RET
+
+11: do_it lo
+ movlo r0, #0
+ do_it eq,t
+ moveq r0, ip, asr #31
+ orreq r0, r0, #1
+ RET
+
+12: ARM_DIV2_ORDER r1, r2
+
+ cmp ip, #0
+ mov r0, r3, lsr r2
+ do_it mi
+ rsbmi r0, r0, #0
+ RET
+
+#endif /* ARM version */
+
+ DIV_FUNC_END divsi3 signed
+
+#if defined(__prefer_thumb__)
+FUNC_START aeabi_idivmod
+ cmp r1, #0
+ beq LSYM(Ldiv0)
+ push {r0, r1, lr}
+ bl LSYM(divsi3_skip_div0_test)
+ POP {r1, r2, r3}
+ mul r2, r0
+ sub r1, r1, r2
+ bx r3
+#else
+ARM_FUNC_START aeabi_idivmod
+ cmp r1, #0
+ beq LSYM(Ldiv0)
+ stmfd sp!, { r0, r1, lr }
+ bl LSYM(divsi3_skip_div0_test)
+ ldmfd sp!, { r1, r2, lr }
+ mul r3, r2, r0
+ sub r1, r1, r3
+ RET
+#endif
+ FUNC_END aeabi_idivmod
+
+#endif /* L_divsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_modsi3
+
+ FUNC_START modsi3
+
+#ifdef __thumb__
+
+ mov curbit, #1
+ cmp divisor, #0
+ beq LSYM(Ldiv0)
+ bpl LSYM(Lover10)
+ neg divisor, divisor @ Loops below use unsigned.
+LSYM(Lover10):
+ push { work }
+ @ Need to save the sign of the dividend, unfortunately, we need
+ @ work later on. Must do this after saving the original value of
+ @ the work register, because we will pop this value off first.
+ push { dividend }
+ cmp dividend, #0
+ bpl LSYM(Lover11)
+ neg dividend, dividend
+LSYM(Lover11):
+ cmp dividend, divisor
+ blo LSYM(Lgot_result)
+
+ THUMB_DIV_MOD_BODY 1
+
+ pop { work }
+ cmp work, #0
+ bpl LSYM(Lover12)
+ neg dividend, dividend
+LSYM(Lover12):
+ pop { work }
+ RET
+
+#else /* ARM version. */
+
+ cmp r1, #0
+ beq LSYM(Ldiv0)
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ movs ip, r0 @ preserve sign of dividend
+ rsbmi r0, r0, #0 @ if negative make positive
+ subs r2, r1, #1 @ compare divisor with 1
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ bls 10f
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+10: cmp ip, #0
+ rsbmi r0, r0, #0
+ RET
+
+#endif /* ARM version */
+
+ DIV_FUNC_END modsi3 signed
+
+#endif /* L_modsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_dvmd_tls
+
+#ifdef __ARM_EABI__
+ WEAK aeabi_idiv0
+ WEAK aeabi_ldiv0
+ FUNC_START aeabi_idiv0
+ FUNC_START aeabi_ldiv0
+ RET
+ FUNC_END aeabi_ldiv0
+ FUNC_END aeabi_idiv0
+#else
+ FUNC_START div0
+ RET
+ FUNC_END div0
+#endif
+
+#endif /* L_divmodsi_tools */
+/* ------------------------------------------------------------------------ */
+#ifdef L_dvmd_lnx
+@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls
+
+/* Constant taken from <asm/signal.h>. */
+#define SIGFPE 8
+
+#ifdef __ARM_EABI__
+ WEAK aeabi_idiv0
+ WEAK aeabi_ldiv0
+ ARM_FUNC_START aeabi_idiv0
+ ARM_FUNC_START aeabi_ldiv0
+#else
+ ARM_FUNC_START div0
+#endif
+
+ do_push {r1, lr}
+ mov r0, #SIGFPE
+ bl SYM(raise) __PLT__
+ RETLDM r1
+
+#ifdef __ARM_EABI__
+ FUNC_END aeabi_ldiv0
+ FUNC_END aeabi_idiv0
+#else
+ FUNC_END div0
+#endif
+
+#endif /* L_dvmd_lnx */
+#ifdef L_clear_cache
+#if defined __ARM_EABI__ && defined __linux__
+@ EABI GNU/Linux call to cacheflush syscall.
+ ARM_FUNC_START clear_cache
+ do_push {r7}
+#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
+ movw r7, #2
+ movt r7, #0xf
+#else
+ mov r7, #0xf0000
+ add r7, r7, #2
+#endif
+ mov r2, #0
+ swi 0
+ do_pop {r7}
+ RET
+ FUNC_END clear_cache
+#else
+#error "This is only for ARM EABI GNU/Linux"
+#endif
+#endif /* L_clear_cache */
+/* ------------------------------------------------------------------------ */
+/* Dword shift operations. */
+/* All the following Dword shift variants rely on the fact that
+ shft xxx, Reg
+ is in fact done as
+ shft xxx, (Reg & 255)
+ so for Reg value in (32...63) and (-1...-31) we will get zero (in the
+ case of logical shifts) or the sign (for asr). */
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */
+#ifndef __symbian__
+
+#ifdef L_lshrdi3
+
+ FUNC_START lshrdi3
+ FUNC_ALIAS aeabi_llsr lshrdi3
+
+#ifdef __thumb__
+ lsr al, r2
+ mov r3, ah
+ lsr ah, r2
+ mov ip, r3
+ sub r2, #32
+ lsr r3, r2
+ orr al, r3
+ neg r2, r2
+ mov r3, ip
+ lsl r3, r2
+ orr al, r3
+ RET
+#else
+ subs r3, r2, #32
+ rsb ip, r2, #32
+ movmi al, al, lsr r2
+ movpl al, ah, lsr r3
+ orrmi al, al, ah, lsl ip
+ mov ah, ah, lsr r2
+ RET
+#endif
+ FUNC_END aeabi_llsr
+ FUNC_END lshrdi3
+
+#endif
+
+#ifdef L_ashrdi3
+
+ FUNC_START ashrdi3
+ FUNC_ALIAS aeabi_lasr ashrdi3
+
+#ifdef __thumb__
+ lsr al, r2
+ mov r3, ah
+ asr ah, r2
+ sub r2, #32
+ @ If r2 is negative at this point the following step would OR
+ @ the sign bit into all of AL. That's not what we want...
+ bmi 1f
+ mov ip, r3
+ asr r3, r2
+ orr al, r3
+ mov r3, ip
+1:
+ neg r2, r2
+ lsl r3, r2
+ orr al, r3
+ RET
+#else
+ subs r3, r2, #32
+ rsb ip, r2, #32
+ movmi al, al, lsr r2
+ movpl al, ah, asr r3
+ orrmi al, al, ah, lsl ip
+ mov ah, ah, asr r2
+ RET
+#endif
+
+ FUNC_END aeabi_lasr
+ FUNC_END ashrdi3
+
+#endif
+
+#ifdef L_ashldi3
+
+ FUNC_START ashldi3
+ FUNC_ALIAS aeabi_llsl ashldi3
+
+#ifdef __thumb__
+ lsl ah, r2
+ mov r3, al
+ lsl al, r2
+ mov ip, r3
+ sub r2, #32
+ lsl r3, r2
+ orr ah, r3
+ neg r2, r2
+ mov r3, ip
+ lsr r3, r2
+ orr ah, r3
+ RET
+#else
+ subs r3, r2, #32
+ rsb ip, r2, #32
+ movmi ah, ah, lsl r2
+ movpl ah, al, lsl r3
+ orrmi ah, ah, al, lsr ip
+ mov al, al, lsl r2
+ RET
+#endif
+ FUNC_END aeabi_llsl
+ FUNC_END ashldi3
+
+#endif
+
+#endif /* __symbian__ */
+
+#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
+ || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+ || defined(__ARM_ARCH_5TEJ__)
+#define HAVE_ARM_CLZ 1
+#endif
+
+#ifdef L_clzsi2
+#if defined(__ARM_ARCH_6M__)
+FUNC_START clzsi2
+ mov r1, #28
+ mov r3, #1
+ lsl r3, r3, #16
+ cmp r0, r3 /* 0x10000 */
+ bcc 2f
+ lsr r0, r0, #16
+ sub r1, r1, #16
+2: lsr r3, r3, #8
+ cmp r0, r3 /* #0x100 */
+ bcc 2f
+ lsr r0, r0, #8
+ sub r1, r1, #8
+2: lsr r3, r3, #4
+ cmp r0, r3 /* #0x10 */
+ bcc 2f
+ lsr r0, r0, #4
+ sub r1, r1, #4
+2: adr r2, 1f
+ ldrb r0, [r2, r0]
+ add r0, r0, r1
+ bx lr
+.align 2
+1:
+.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
+ FUNC_END clzsi2
+#else
+ARM_FUNC_START clzsi2
+# if defined(HAVE_ARM_CLZ)
+ clz r0, r0
+ RET
+# else
+ mov r1, #28
+ cmp r0, #0x10000
+ do_it cs, t
+ movcs r0, r0, lsr #16
+ subcs r1, r1, #16
+ cmp r0, #0x100
+ do_it cs, t
+ movcs r0, r0, lsr #8
+ subcs r1, r1, #8
+ cmp r0, #0x10
+ do_it cs, t
+ movcs r0, r0, lsr #4
+ subcs r1, r1, #4
+ adr r2, 1f
+ ldrb r0, [r2, r0]
+ add r0, r0, r1
+ RET
+.align 2
+1:
+.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
+# endif /* !HAVE_ARM_CLZ */
+ FUNC_END clzsi2
+#endif
+#endif /* L_clzsi2 */
+
+#ifdef L_clzdi2
+#if !defined(HAVE_ARM_CLZ)
+
+# if defined(__ARM_ARCH_6M__)
+FUNC_START clzdi2
+ push {r4, lr}
+# else
+ARM_FUNC_START clzdi2
+ do_push {r4, lr}
+# endif
+ cmp xxh, #0
+ bne 1f
+# ifdef __ARMEB__
+ mov r0, xxl
+ bl __clzsi2
+ add r0, r0, #32
+ b 2f
+1:
+ bl __clzsi2
+# else
+ bl __clzsi2
+ add r0, r0, #32
+ b 2f
+1:
+ mov r0, xxh
+ bl __clzsi2
+# endif
+2:
+# if defined(__ARM_ARCH_6M__)
+ pop {r4, pc}
+# else
+ RETLDM r4
+# endif
+ FUNC_END clzdi2
+
+#else /* HAVE_ARM_CLZ */
+
+ARM_FUNC_START clzdi2
+ cmp xxh, #0
+ do_it eq, et
+ clzeq r0, xxl
+ clzne r0, xxh
+ addeq r0, r0, #32
+ RET
+ FUNC_END clzdi2
+
+#endif
+#endif /* L_clzdi2 */
+
+/* ------------------------------------------------------------------------ */
+/* These next two sections are here despite the fact that they contain Thumb
+ assembler because their presence allows interworked code to be linked even
+ when the GCC library is this one. */
+
+/* Do not build the interworking functions when the target architecture does
+ not support Thumb instructions. (This can be a multilib option). */
+#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
+ || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
+ || __ARM_ARCH__ >= 6
+
+#if defined L_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code.
+ The address of function to be called is loaded into a register and then
+ one of these labels is called via a BL instruction. This puts the
+ return address into the link register with the bottom bit set, and the
+ code here switches to the correct mode before executing the function. */
+
+ .text
+ .align 0
+ .force_thumb
+
+.macro call_via register
+ THUMB_FUNC_START _call_via_\register
+
+ bx \register
+ nop
+
+ SIZE (_call_via_\register)
+.endm
+
+ call_via r0
+ call_via r1
+ call_via r2
+ call_via r3
+ call_via r4
+ call_via r5
+ call_via r6
+ call_via r7
+ call_via r8
+ call_via r9
+ call_via sl
+ call_via fp
+ call_via ip
+ call_via sp
+ call_via lr
+
+#endif /* L_call_via_rX */
+
+/* Don't bother with the old interworking routines for Thumb-2. */
+/* ??? Maybe only omit these on "m" variants. */
+#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
+
+#if defined L_interwork_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code,
+ when the target address is in an unknown instruction set. The address
+ of function to be called is loaded into a register and then one of these
+ labels is called via a BL instruction. This puts the return address
+ into the link register with the bottom bit set, and the code here
+ switches to the correct mode before executing the function. Unfortunately
+ the target code cannot be relied upon to return via a BX instruction, so
+ instead we have to store the resturn address on the stack and allow the
+ called function to return here instead. Upon return we recover the real
+ return address and use a BX to get back to Thumb mode.
+
+ There are three variations of this code. The first,
+ _interwork_call_via_rN(), will push the return address onto the
+ stack and pop it in _arm_return(). It should only be used if all
+ arguments are passed in registers.
+
+ The second, _interwork_r7_call_via_rN(), instead stores the return
+ address at [r7, #-4]. It is the caller's responsibility to ensure
+ that this address is valid and contains no useful data.
+
+ The third, _interwork_r11_call_via_rN(), works in the same way but
+ uses r11 instead of r7. It is useful if the caller does not really
+ need a frame pointer. */
+
+ .text
+ .align 0
+
+ .code 32
+ .globl _arm_return
+LSYM(Lstart_arm_return):
+ cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
+ cfi_push 0, 0xe, -0x8, 0x8
+ nop @ This nop is for the benefit of debuggers, so that
+ @ backtraces will use the correct unwind information.
+_arm_return:
+ RETLDM unwind=LSYM(Lstart_arm_return)
+ cfi_end LSYM(Lend_arm_return)
+
+ .globl _arm_return_r7
+_arm_return_r7:
+ ldr lr, [r7, #-4]
+ bx lr
+
+ .globl _arm_return_r11
+_arm_return_r11:
+ ldr lr, [r11, #-4]
+ bx lr
+
+.macro interwork_with_frame frame, register, name, return
+ .code 16
+
+ THUMB_FUNC_START \name
+
+ bx pc
+ nop
+
+ .code 32
+ tst \register, #1
+ streq lr, [\frame, #-4]
+ adreq lr, _arm_return_\frame
+ bx \register
+
+ SIZE (\name)
+.endm
+
+.macro interwork register
+ .code 16
+
+ THUMB_FUNC_START _interwork_call_via_\register
+
+ bx pc
+ nop
+
+ .code 32
+ .globl LSYM(Lchange_\register)
+LSYM(Lchange_\register):
+ tst \register, #1
+ streq lr, [sp, #-8]!
+ adreq lr, _arm_return
+ bx \register
+
+ SIZE (_interwork_call_via_\register)
+
+ interwork_with_frame r7,\register,_interwork_r7_call_via_\register
+ interwork_with_frame r11,\register,_interwork_r11_call_via_\register
+.endm
+
+ interwork r0
+ interwork r1
+ interwork r2
+ interwork r3
+ interwork r4
+ interwork r5
+ interwork r6
+ interwork r7
+ interwork r8
+ interwork r9
+ interwork sl
+ interwork fp
+ interwork ip
+ interwork sp
+
+ /* The LR case has to be handled a little differently... */
+ .code 16
+
+ THUMB_FUNC_START _interwork_call_via_lr
+
+ bx pc
+ nop
+
+ .code 32
+ .globl .Lchange_lr
+.Lchange_lr:
+ tst lr, #1
+ stmeqdb r13!, {lr, pc}
+ mov ip, lr
+ adreq lr, _arm_return
+ bx ip
+
+ SIZE (_interwork_call_via_lr)
+
+#endif /* L_interwork_call_via_rX */
+#endif /* !__thumb2__ */
+
+/* Functions to support compact pic switch tables in thumb1 state.
+ All these routines take an index into the table in r0. The
+ table is at LR & ~1 (but this must be rounded up in the case
+ of 32-bit entires). They are only permitted to clobber r12
+ and r14 and r0 must be preserved on exit. */
+#ifdef L_thumb1_case_sqi
+
+ .text
+ .align 0
+ .force_thumb
+ .syntax unified
+ THUMB_FUNC_START __gnu_thumb1_case_sqi
+ push {r1}
+ mov r1, lr
+ lsrs r1, r1, #1
+ lsls r1, r1, #1
+ ldrsb r1, [r1, r0]
+ lsls r1, r1, #1
+ add lr, lr, r1
+ pop {r1}
+ bx lr
+ SIZE (__gnu_thumb1_case_sqi)
+#endif
+
+#ifdef L_thumb1_case_uqi
+
+ .text
+ .align 0
+ .force_thumb
+ .syntax unified
+ THUMB_FUNC_START __gnu_thumb1_case_uqi
+ push {r1}
+ mov r1, lr
+ lsrs r1, r1, #1
+ lsls r1, r1, #1
+ ldrb r1, [r1, r0]
+ lsls r1, r1, #1
+ add lr, lr, r1
+ pop {r1}
+ bx lr
+ SIZE (__gnu_thumb1_case_uqi)
+#endif
+
+#ifdef L_thumb1_case_shi
+
+ .text
+ .align 0
+ .force_thumb
+ .syntax unified
+ THUMB_FUNC_START __gnu_thumb1_case_shi
+ push {r0, r1}
+ mov r1, lr
+ lsrs r1, r1, #1
+ lsls r0, r0, #1
+ lsls r1, r1, #1
+ ldrsh r1, [r1, r0]
+ lsls r1, r1, #1
+ add lr, lr, r1
+ pop {r0, r1}
+ bx lr
+ SIZE (__gnu_thumb1_case_shi)
+#endif
+
+#ifdef L_thumb1_case_uhi
+
+ .text
+ .align 0
+ .force_thumb
+ .syntax unified
+ THUMB_FUNC_START __gnu_thumb1_case_uhi
+ push {r0, r1}
+ mov r1, lr
+ lsrs r1, r1, #1
+ lsls r0, r0, #1
+ lsls r1, r1, #1
+ ldrh r1, [r1, r0]
+ lsls r1, r1, #1
+ add lr, lr, r1
+ pop {r0, r1}
+ bx lr
+ SIZE (__gnu_thumb1_case_uhi)
+#endif
+
+#ifdef L_thumb1_case_si
+
+ .text
+ .align 0
+ .force_thumb
+ .syntax unified
+ THUMB_FUNC_START __gnu_thumb1_case_si
+ push {r0, r1}
+ mov r1, lr
+ adds.n r1, r1, #2 /* Align to word. */
+ lsrs r1, r1, #2
+ lsls r0, r0, #2
+ lsls r1, r1, #2
+ ldr r0, [r1, r0]
+ adds r0, r0, r1
+ mov lr, r0
+ pop {r0, r1}
+ mov pc, lr /* We know we were called from thumb code. */
+ SIZE (__gnu_thumb1_case_si)
+#endif
+
+#endif /* Arch supports thumb. */
+
+#ifndef __symbian__
+#ifndef __ARM_ARCH_6M__
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
+#include "bpabi.S"
+#else /* __ARM_ARCH_6M__ */
+#include "bpabi-v6m.S"
+#endif /* __ARM_ARCH_6M__ */
+#endif /* !__symbian__ */
diff --git a/libgcc/config/arm/libunwind.S b/libgcc/config/arm/libunwind.S
index a3a19daab4b..8166cd86e47 100644
--- a/libgcc/config/arm/libunwind.S
+++ b/libgcc/config/arm/libunwind.S
@@ -40,7 +40,7 @@
#ifndef __symbian__
-#include "config/arm/lib1funcs.asm"
+#include "lib1funcs.S"
.macro UNPREFIX name
.global SYM (\name)
diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm
new file mode 100644
index 00000000000..4e17e99b4a5
--- /dev/null
+++ b/libgcc/config/arm/t-arm
@@ -0,0 +1,3 @@
+LIB1ASMSRC = arm/lib1funcs.S
+LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \
+ _thumb1_case_uhi _thumb1_case_si
diff --git a/libgcc/config/arm/t-bpabi b/libgcc/config/arm/t-bpabi
index ebb2f9fd85d..8787285ab1f 100644
--- a/libgcc/config/arm/t-bpabi
+++ b/libgcc/config/arm/t-bpabi
@@ -1,3 +1,6 @@
+# Add the bpabi.S functions.
+LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod
+
LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \
$(srcdir)/config/arm/libunwind.S \
$(srcdir)/config/arm/pr-support.c $(srcdir)/unwind-c.c
diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf
new file mode 100644
index 00000000000..fab32e445be
--- /dev/null
+++ b/libgcc/config/arm/t-elf
@@ -0,0 +1,13 @@
+# For most CPUs we have an assembly soft-float implementations.
+# However this is not true for ARMv6M. Here we want to use the soft-fp C
+# implementation. The soft-fp code is only build for ARMv6M. This pulls
+# in the asm implementation for other CPUs.
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \
+ _call_via_rX _interwork_call_via_rX \
+ _lshrdi3 _ashrdi3 _ashldi3 \
+ _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \
+ _arm_fixdfsi _arm_fixunsdfsi \
+ _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
+ _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
+ _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \
+ _clzsi2 _clzdi2
diff --git a/libgcc/config/arm/t-linux b/libgcc/config/arm/t-linux
new file mode 100644
index 00000000000..a154f775a0f
--- /dev/null
+++ b/libgcc/config/arm/t-linux
@@ -0,0 +1,3 @@
+LIB1ASMSRC = arm/lib1funcs.S
+LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \
+ _arm_addsubdf3 _arm_addsubsf3
diff --git a/libgcc/config/arm/t-linux-eabi b/libgcc/config/arm/t-linux-eabi
new file mode 100644
index 00000000000..dfc9197ea45
--- /dev/null
+++ b/libgcc/config/arm/t-linux-eabi
@@ -0,0 +1,2 @@
+# Use a version of div0 which raises SIGFPE, and a special __clear_cache.
+LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache
diff --git a/libgcc/config/arm/t-strongarm-elf b/libgcc/config/arm/t-strongarm-elf
new file mode 100644
index 00000000000..cd9f9667ddf
--- /dev/null
+++ b/libgcc/config/arm/t-strongarm-elf
@@ -0,0 +1 @@
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2
diff --git a/libgcc/config/arm/t-symbian b/libgcc/config/arm/t-symbian
index 6788d5f40b3..1989696c8a3 100644
--- a/libgcc/config/arm/t-symbian
+++ b/libgcc/config/arm/t-symbian
@@ -1,2 +1,16 @@
+LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
+
+# These functions have __aeabi equivalents and will never be called by GCC.
+# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being
+# used -- and we make sure that definitions are not available in lib1funcs.S,
+# either, so they end up undefined.
+LIB1ASMFUNCS += \
+ _ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \
+ _udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \
+ _fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \
+ _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
+ _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
+ _fixsfsi _fixunssfsi
+
# Include the gcc personality routine
LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c
diff --git a/libgcc/config/arm/t-vxworks b/libgcc/config/arm/t-vxworks
new file mode 100644
index 00000000000..70ccdc1556a
--- /dev/null
+++ b/libgcc/config/arm/t-vxworks
@@ -0,0 +1 @@
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
diff --git a/libgcc/config/arm/t-wince-pe b/libgcc/config/arm/t-wince-pe
new file mode 100644
index 00000000000..33ea969ccf4
--- /dev/null
+++ b/libgcc/config/arm/t-wince-pe
@@ -0,0 +1 @@
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
new file mode 100644
index 00000000000..8c369c96a77
--- /dev/null
+++ b/libgcc/config/avr/lib1funcs.S
@@ -0,0 +1,1533 @@
+/* -*- Mode: Asm -*- */
+/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+ Contributed by Denis Chertykov <chertykov@gmail.com>
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#define __zero_reg__ r1
+#define __tmp_reg__ r0
+#define __SREG__ 0x3f
+#define __SP_H__ 0x3e
+#define __SP_L__ 0x3d
+#define __RAMPZ__ 0x3B
+#define __EIND__ 0x3C
+
+/* Most of the functions here are called directly from avr.md
+ patterns, instead of using the standard libcall mechanisms.
+ This can make better code because GCC knows exactly which
+ of the call-used registers (not all of them) are clobbered. */
+
+/* FIXME: At present, there is no SORT directive in the linker
+ script so that we must not assume that different modules
+ in the same input section like .libgcc.text.mul will be
+ located close together. Therefore, we cannot use
+ RCALL/RJMP to call a function like __udivmodhi4 from
+ __divmodhi4 and have to use lengthy XCALL/XJMP even
+ though they are in the same input section and all same
+ input sections together are small enough to reach every
+ location with a RCALL/RJMP instruction. */
+
+ .macro mov_l r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+ movw \r_dest, \r_src
+#else
+ mov \r_dest, \r_src
+#endif
+ .endm
+
+ .macro mov_h r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+ ; empty
+#else
+ mov \r_dest, \r_src
+#endif
+ .endm
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+#define XCALL call
+#define XJMP jmp
+#else
+#define XCALL rcall
+#define XJMP rjmp
+#endif
+
+.macro DEFUN name
+.global \name
+.func \name
+\name:
+.endm
+
+.macro ENDF name
+.size \name, .-\name
+.endfunc
+.endm
+
+
+.section .text.libgcc.mul, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
+#if !defined (__AVR_HAVE_MUL__)
+/*******************************************************
+ Multiplication 8 x 8 without MUL
+*******************************************************/
+#if defined (L_mulqi3)
+
+#define r_arg2 r22 /* multiplicand */
+#define r_arg1 r24 /* multiplier */
+#define r_res __tmp_reg__ /* result */
+
+DEFUN __mulqi3
+ clr r_res ; clear result
+__mulqi3_loop:
+ sbrc r_arg1,0
+ add r_res,r_arg2
+ add r_arg2,r_arg2 ; shift multiplicand
+ breq __mulqi3_exit ; while multiplicand != 0
+ lsr r_arg1 ;
+ brne __mulqi3_loop ; exit if multiplier = 0
+__mulqi3_exit:
+ mov r_arg1,r_res ; result to return register
+ ret
+ENDF __mulqi3
+
+#undef r_arg2
+#undef r_arg1
+#undef r_res
+
+#endif /* defined (L_mulqi3) */
+
+#if defined (L_mulqihi3)
+DEFUN __mulqihi3
+ clr r25
+ sbrc r24, 7
+ dec r25
+ clr r23
+ sbrc r22, 7
+ dec r22
+ XJMP __mulhi3
+ENDF __mulqihi3:
+#endif /* defined (L_mulqihi3) */
+
+#if defined (L_umulqihi3)
+DEFUN __umulqihi3
+ clr r25
+ clr r23
+ XJMP __mulhi3
+ENDF __umulqihi3
+#endif /* defined (L_umulqihi3) */
+
+/*******************************************************
+ Multiplication 16 x 16 without MUL
+*******************************************************/
+#if defined (L_mulhi3)
+#define r_arg1L r24 /* multiplier Low */
+#define r_arg1H r25 /* multiplier High */
+#define r_arg2L r22 /* multiplicand Low */
+#define r_arg2H r23 /* multiplicand High */
+#define r_resL __tmp_reg__ /* result Low */
+#define r_resH r21 /* result High */
+
+DEFUN __mulhi3
+ clr r_resH ; clear result
+ clr r_resL ; clear result
+__mulhi3_loop:
+ sbrs r_arg1L,0
+ rjmp __mulhi3_skip1
+ add r_resL,r_arg2L ; result + multiplicand
+ adc r_resH,r_arg2H
+__mulhi3_skip1:
+ add r_arg2L,r_arg2L ; shift multiplicand
+ adc r_arg2H,r_arg2H
+
+ cp r_arg2L,__zero_reg__
+ cpc r_arg2H,__zero_reg__
+ breq __mulhi3_exit ; while multiplicand != 0
+
+ lsr r_arg1H ; gets LSB of multiplier
+ ror r_arg1L
+ sbiw r_arg1L,0
+ brne __mulhi3_loop ; exit if multiplier = 0
+__mulhi3_exit:
+ mov r_arg1H,r_resH ; result to return register
+ mov r_arg1L,r_resL
+ ret
+ENDF __mulhi3
+
+#undef r_arg1L
+#undef r_arg1H
+#undef r_arg2L
+#undef r_arg2H
+#undef r_resL
+#undef r_resH
+
+#endif /* defined (L_mulhi3) */
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 16 without MUL
+*******************************************************/
+
+#if defined (L_mulhisi3)
+DEFUN __mulhisi3
+;;; FIXME: This is dead code (noone calls it)
+ mov_l r18, r24
+ mov_h r19, r25
+ clr r24
+ sbrc r23, 7
+ dec r24
+ mov r25, r24
+ clr r20
+ sbrc r19, 7
+ dec r20
+ mov r21, r20
+ XJMP __mulsi3
+ENDF __mulhisi3
+#endif /* defined (L_mulhisi3) */
+
+#if defined (L_umulhisi3)
+DEFUN __umulhisi3
+;;; FIXME: This is dead code (noone calls it)
+ mov_l r18, r24
+ mov_h r19, r25
+ clr r24
+ clr r25
+ mov_l r20, r24
+ mov_h r21, r25
+ XJMP __mulsi3
+ENDF __umulhisi3
+#endif /* defined (L_umulhisi3) */
+
+#if defined (L_mulsi3)
+/*******************************************************
+ Multiplication 32 x 32 without MUL
+*******************************************************/
+#define r_arg1L r22 /* multiplier Low */
+#define r_arg1H r23
+#define r_arg1HL r24
+#define r_arg1HH r25 /* multiplier High */
+
+#define r_arg2L r18 /* multiplicand Low */
+#define r_arg2H r19
+#define r_arg2HL r20
+#define r_arg2HH r21 /* multiplicand High */
+
+#define r_resL r26 /* result Low */
+#define r_resH r27
+#define r_resHL r30
+#define r_resHH r31 /* result High */
+
+DEFUN __mulsi3
+ clr r_resHH ; clear result
+ clr r_resHL ; clear result
+ clr r_resH ; clear result
+ clr r_resL ; clear result
+__mulsi3_loop:
+ sbrs r_arg1L,0
+ rjmp __mulsi3_skip1
+ add r_resL,r_arg2L ; result + multiplicand
+ adc r_resH,r_arg2H
+ adc r_resHL,r_arg2HL
+ adc r_resHH,r_arg2HH
+__mulsi3_skip1:
+ add r_arg2L,r_arg2L ; shift multiplicand
+ adc r_arg2H,r_arg2H
+ adc r_arg2HL,r_arg2HL
+ adc r_arg2HH,r_arg2HH
+
+ lsr r_arg1HH ; gets LSB of multiplier
+ ror r_arg1HL
+ ror r_arg1H
+ ror r_arg1L
+ brne __mulsi3_loop
+ sbiw r_arg1HL,0
+ cpc r_arg1H,r_arg1L
+ brne __mulsi3_loop ; exit if multiplier = 0
+__mulsi3_exit:
+ mov_h r_arg1HH,r_resHH ; result to return register
+ mov_l r_arg1HL,r_resHL
+ mov_h r_arg1H,r_resH
+ mov_l r_arg1L,r_resL
+ ret
+ENDF __mulsi3
+
+#undef r_arg1L
+#undef r_arg1H
+#undef r_arg1HL
+#undef r_arg1HH
+
+#undef r_arg2L
+#undef r_arg2H
+#undef r_arg2HL
+#undef r_arg2HH
+
+#undef r_resL
+#undef r_resH
+#undef r_resHL
+#undef r_resHH
+
+#endif /* defined (L_mulsi3) */
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 16
+*******************************************************/
+
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+ XCALL __umulhisi3
+ ;; Sign-extend B
+ tst B1
+ brpl 1f
+ sub C2, A0
+ sbc C3, A1
+1: ;; Sign-extend A
+ XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+ XCALL __umulhisi3
+ ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+ ;; Sign-extend A
+ sbrs A1, 7
+ ret
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+ mul A0, B0
+ movw C0, r0
+ mul A1, B1
+ movw C2, r0
+ mul A0, B1
+ rcall 1f
+ mul A1, B0
+1: add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+ ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 32
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst A1
+ brmi __mulohisi3
+#else
+ sbrs A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XJMP __muluhisi3
+ ;; FALLTHRU
+ENDF __mulshisi3
+
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+ XCALL __muluhisi3
+ ;; One-extend R27:R26 (A1:A0)
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+ XCALL __umulhisi3
+ mul A0, B3
+ add C3, r0
+ mul A1, B2
+ add C3, r0
+ mul A0, B2
+ add C2, r0
+ adc C3, r1
+ clr __zero_reg__
+ ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+ Multiplication 32 x 32
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0 * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+ movw A0, C0
+ push C2
+ push C3
+ XCALL __muluhisi3
+ pop A1
+ pop A0
+ ;; A1:A0 now contains the high word of A
+ mul A0, B0
+ add C2, r0
+ adc C3, r1
+ mul A0, B1
+ add C3, r0
+ mul A1, B0
+ add C3, r0
+ clr __zero_reg__
+ ret
+ENDF __mulsi3
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+.section .text.libgcc.div, "ax", @progbits
+
+/*******************************************************
+ Division 8 / 8 => (result + remainder)
+*******************************************************/
+#define r_rem r25 /* remainder */
+#define r_arg1 r24 /* dividend, quotient */
+#define r_arg2 r22 /* divisor */
+#define r_cnt r23 /* loop count */
+
+#if defined (L_udivmodqi4)
+DEFUN __udivmodqi4
+ sub r_rem,r_rem ; clear remainder and carry
+ ldi r_cnt,9 ; init loop counter
+ rjmp __udivmodqi4_ep ; jump to entry point
+__udivmodqi4_loop:
+ rol r_rem ; shift dividend into remainder
+ cp r_rem,r_arg2 ; compare remainder & divisor
+ brcs __udivmodqi4_ep ; remainder <= divisor
+ sub r_rem,r_arg2 ; restore remainder
+__udivmodqi4_ep:
+ rol r_arg1 ; shift dividend (with CARRY)
+ dec r_cnt ; decrement loop counter
+ brne __udivmodqi4_loop
+ com r_arg1 ; complement result
+ ; because C flag was complemented in loop
+ ret
+ENDF __udivmodqi4
+#endif /* defined (L_udivmodqi4) */
+
+#if defined (L_divmodqi4)
+DEFUN __divmodqi4
+ bst r_arg1,7 ; store sign of dividend
+ mov __tmp_reg__,r_arg1
+ eor __tmp_reg__,r_arg2; r0.7 is sign of result
+ sbrc r_arg1,7
+ neg r_arg1 ; dividend negative : negate
+ sbrc r_arg2,7
+ neg r_arg2 ; divisor negative : negate
+ XCALL __udivmodqi4 ; do the unsigned div/mod
+ brtc __divmodqi4_1
+ neg r_rem ; correct remainder sign
+__divmodqi4_1:
+ sbrc __tmp_reg__,7
+ neg r_arg1 ; correct result sign
+__divmodqi4_exit:
+ ret
+ENDF __divmodqi4
+#endif /* defined (L_divmodqi4) */
+
+#undef r_rem
+#undef r_arg1
+#undef r_arg2
+#undef r_cnt
+
+
+/*******************************************************
+ Division 16 / 16 => (result + remainder)
+*******************************************************/
+#define r_remL r26 /* remainder Low */
+#define r_remH r27 /* remainder High */
+
+/* return: remainder */
+#define r_arg1L r24 /* dividend Low */
+#define r_arg1H r25 /* dividend High */
+
+/* return: quotient */
+#define r_arg2L r22 /* divisor Low */
+#define r_arg2H r23 /* divisor High */
+
+#define r_cnt r21 /* loop count */
+
+#if defined (L_udivmodhi4)
+DEFUN __udivmodhi4
+ sub r_remL,r_remL
+ sub r_remH,r_remH ; clear remainder and carry
+ ldi r_cnt,17 ; init loop counter
+ rjmp __udivmodhi4_ep ; jump to entry point
+__udivmodhi4_loop:
+ rol r_remL ; shift dividend into remainder
+ rol r_remH
+ cp r_remL,r_arg2L ; compare remainder & divisor
+ cpc r_remH,r_arg2H
+ brcs __udivmodhi4_ep ; remainder < divisor
+ sub r_remL,r_arg2L ; restore remainder
+ sbc r_remH,r_arg2H
+__udivmodhi4_ep:
+ rol r_arg1L ; shift dividend (with CARRY)
+ rol r_arg1H
+ dec r_cnt ; decrement loop counter
+ brne __udivmodhi4_loop
+ com r_arg1L
+ com r_arg1H
+; div/mod results to return registers, as for the div() function
+ mov_l r_arg2L, r_arg1L ; quotient
+ mov_h r_arg2H, r_arg1H
+ mov_l r_arg1L, r_remL ; remainder
+ mov_h r_arg1H, r_remH
+ ret
+ENDF __udivmodhi4
+#endif /* defined (L_udivmodhi4) */
+
+#if defined (L_divmodhi4)
+DEFUN __divmodhi4
+ .global _div
+_div:
+ bst r_arg1H,7 ; store sign of dividend
+ mov __tmp_reg__,r_arg1H
+ eor __tmp_reg__,r_arg2H ; r0.7 is sign of result
+ rcall __divmodhi4_neg1 ; dividend negative : negate
+ sbrc r_arg2H,7
+ rcall __divmodhi4_neg2 ; divisor negative : negate
+ XCALL __udivmodhi4 ; do the unsigned div/mod
+ rcall __divmodhi4_neg1 ; correct remainder sign
+ tst __tmp_reg__
+ brpl __divmodhi4_exit
+__divmodhi4_neg2:
+ com r_arg2H
+ neg r_arg2L ; correct divisor/result sign
+ sbci r_arg2H,0xff
+__divmodhi4_exit:
+ ret
+__divmodhi4_neg1:
+ brtc __divmodhi4_exit
+ com r_arg1H
+ neg r_arg1L ; correct dividend/remainder sign
+ sbci r_arg1H,0xff
+ ret
+ENDF __divmodhi4
+#endif /* defined (L_divmodhi4) */
+
+#undef r_remH
+#undef r_remL
+
+#undef r_arg1H
+#undef r_arg1L
+
+#undef r_arg2H
+#undef r_arg2L
+
+#undef r_cnt
+
+/*******************************************************
+ Division 32 / 32 => (result + remainder)
+*******************************************************/
+#define r_remHH r31 /* remainder High */
+#define r_remHL r30
+#define r_remH r27
+#define r_remL r26 /* remainder Low */
+
+/* return: remainder */
+#define r_arg1HH r25 /* dividend High */
+#define r_arg1HL r24
+#define r_arg1H r23
+#define r_arg1L r22 /* dividend Low */
+
+/* return: quotient */
+#define r_arg2HH r21 /* divisor High */
+#define r_arg2HL r20
+#define r_arg2H r19
+#define r_arg2L r18 /* divisor Low */
+
+#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
+
+#if defined (L_udivmodsi4)
+DEFUN __udivmodsi4
+ ldi r_remL, 33 ; init loop counter
+ mov r_cnt, r_remL
+ sub r_remL,r_remL
+ sub r_remH,r_remH ; clear remainder and carry
+ mov_l r_remHL, r_remL
+ mov_h r_remHH, r_remH
+ rjmp __udivmodsi4_ep ; jump to entry point
+__udivmodsi4_loop:
+ rol r_remL ; shift dividend into remainder
+ rol r_remH
+ rol r_remHL
+ rol r_remHH
+ cp r_remL,r_arg2L ; compare remainder & divisor
+ cpc r_remH,r_arg2H
+ cpc r_remHL,r_arg2HL
+ cpc r_remHH,r_arg2HH
+ brcs __udivmodsi4_ep ; remainder <= divisor
+ sub r_remL,r_arg2L ; restore remainder
+ sbc r_remH,r_arg2H
+ sbc r_remHL,r_arg2HL
+ sbc r_remHH,r_arg2HH
+__udivmodsi4_ep:
+ rol r_arg1L ; shift dividend (with CARRY)
+ rol r_arg1H
+ rol r_arg1HL
+ rol r_arg1HH
+ dec r_cnt ; decrement loop counter
+ brne __udivmodsi4_loop
+ ; __zero_reg__ now restored (r_cnt == 0)
+ com r_arg1L
+ com r_arg1H
+ com r_arg1HL
+ com r_arg1HH
+; div/mod results to return registers, as for the ldiv() function
+ mov_l r_arg2L, r_arg1L ; quotient
+ mov_h r_arg2H, r_arg1H
+ mov_l r_arg2HL, r_arg1HL
+ mov_h r_arg2HH, r_arg1HH
+ mov_l r_arg1L, r_remL ; remainder
+ mov_h r_arg1H, r_remH
+ mov_l r_arg1HL, r_remHL
+ mov_h r_arg1HH, r_remHH
+ ret
+ENDF __udivmodsi4
+#endif /* defined (L_udivmodsi4) */
+
+#if defined (L_divmodsi4)
+DEFUN __divmodsi4
+ bst r_arg1HH,7 ; store sign of dividend
+ mov __tmp_reg__,r_arg1HH
+ eor __tmp_reg__,r_arg2HH ; r0.7 is sign of result
+ rcall __divmodsi4_neg1 ; dividend negative : negate
+ sbrc r_arg2HH,7
+ rcall __divmodsi4_neg2 ; divisor negative : negate
+ XCALL __udivmodsi4 ; do the unsigned div/mod
+ rcall __divmodsi4_neg1 ; correct remainder sign
+ rol __tmp_reg__
+ brcc __divmodsi4_exit
+__divmodsi4_neg2:
+ com r_arg2HH
+ com r_arg2HL
+ com r_arg2H
+ neg r_arg2L ; correct divisor/quotient sign
+ sbci r_arg2H,0xff
+ sbci r_arg2HL,0xff
+ sbci r_arg2HH,0xff
+__divmodsi4_exit:
+ ret
+__divmodsi4_neg1:
+ brtc __divmodsi4_exit
+ com r_arg1HH
+ com r_arg1HL
+ com r_arg1H
+ neg r_arg1L ; correct dividend/remainder sign
+ sbci r_arg1H, 0xff
+ sbci r_arg1HL,0xff
+ sbci r_arg1HH,0xff
+ ret
+ENDF __divmodsi4
+#endif /* defined (L_divmodsi4) */
+
+
+.section .text.libgcc.prologue, "ax", @progbits
+
+/**********************************
+ * This is a prologue subroutine
+ **********************************/
+#if defined (L_prologue)
+
+DEFUN __prologue_saves__
+ push r2
+ push r3
+ push r4
+ push r5
+ push r6
+ push r7
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r28
+ push r29
+ in r28,__SP_L__
+ in r29,__SP_H__
+ sub r28,r26
+ sbc r29,r27
+ in __tmp_reg__,__SREG__
+ cli
+ out __SP_H__,r29
+ out __SREG__,__tmp_reg__
+ out __SP_L__,r28
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ eijmp
+#else
+ ijmp
+#endif
+
+ENDF __prologue_saves__
+#endif /* defined (L_prologue) */
+
+/*
+ * This is an epilogue subroutine
+ */
+#if defined (L_epilogue)
+
+DEFUN __epilogue_restores__
+ ldd r2,Y+18
+ ldd r3,Y+17
+ ldd r4,Y+16
+ ldd r5,Y+15
+ ldd r6,Y+14
+ ldd r7,Y+13
+ ldd r8,Y+12
+ ldd r9,Y+11
+ ldd r10,Y+10
+ ldd r11,Y+9
+ ldd r12,Y+8
+ ldd r13,Y+7
+ ldd r14,Y+6
+ ldd r15,Y+5
+ ldd r16,Y+4
+ ldd r17,Y+3
+ ldd r26,Y+2
+ ldd r27,Y+1
+ add r28,r30
+ adc r29,__zero_reg__
+ in __tmp_reg__,__SREG__
+ cli
+ out __SP_H__,r29
+ out __SREG__,__tmp_reg__
+ out __SP_L__,r28
+ mov_l r28, r26
+ mov_h r29, r27
+ ret
+ENDF __epilogue_restores__
+#endif /* defined (L_epilogue) */
+
+#ifdef L_exit
+ .section .fini9,"ax",@progbits
+DEFUN _exit
+ .weak exit
+exit:
+ENDF _exit
+
+ /* Code from .fini8 ... .fini1 sections inserted by ld script. */
+
+ .section .fini0,"ax",@progbits
+ cli
+__stop_program:
+ rjmp __stop_program
+#endif /* defined (L_exit) */
+
+#ifdef L_cleanup
+ .weak _cleanup
+ .func _cleanup
+_cleanup:
+ ret
+.endfunc
+#endif /* defined (L_cleanup) */
+
+
+.section .text.libgcc, "ax", @progbits
+
+#ifdef L_tablejump
+DEFUN __tablejump2__
+ lsl r30
+ rol r31
+ ;; FALLTHRU
+ENDF __tablejump2__
+
+DEFUN __tablejump__
+#if defined (__AVR_HAVE_LPMX__)
+ lpm __tmp_reg__, Z+
+ lpm r31, Z
+ mov r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ eijmp
+#else
+ ijmp
+#endif
+
+#else /* !HAVE_LPMX */
+ lpm
+ adiw r30, 1
+ push r0
+ lpm
+ push r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ in __tmp_reg__, __EIND__
+ push __tmp_reg__
+#endif
+ ret
+#endif /* !HAVE_LPMX */
+ENDF __tablejump__
+#endif /* defined (L_tablejump) */
+
+#ifdef L_copy_data
+ .section .init4,"ax",@progbits
+DEFUN __do_copy_data
+#if defined(__AVR_HAVE_ELPMX__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ ldi r16, hh8(__data_load_start)
+ out __RAMPZ__, r16
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+ elpm r0, Z+
+ st X+, r0
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ ldi r16, hh8(__data_load_start - 0x10000)
+.L__do_copy_data_carry:
+ inc r16
+ out __RAMPZ__, r16
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+ elpm
+ st X+, r0
+ adiw r30, 1
+ brcs .L__do_copy_data_carry
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+#if defined (__AVR_HAVE_LPMX__)
+ lpm r0, Z+
+#else
+ lpm
+ adiw r30, 1
+#endif
+ st X+, r0
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
+ENDF __do_copy_data
+#endif /* L_copy_data */
+
+/* __do_clear_bss is only necessary if there is anything in .bss section. */
+
+#ifdef L_clear_bss
+ .section .init4,"ax",@progbits
+DEFUN __do_clear_bss
+ ldi r17, hi8(__bss_end)
+ ldi r26, lo8(__bss_start)
+ ldi r27, hi8(__bss_start)
+ rjmp .do_clear_bss_start
+.do_clear_bss_loop:
+ st X+, __zero_reg__
+.do_clear_bss_start:
+ cpi r26, lo8(__bss_end)
+ cpc r27, r17
+ brne .do_clear_bss_loop
+ENDF __do_clear_bss
+#endif /* L_clear_bss */
+
+/* __do_global_ctors and __do_global_dtors are only necessary
+ if there are any constructors/destructors. */
+
+#ifdef L_ctors
+ .section .init6,"ax",@progbits
+DEFUN __do_global_ctors
+#if defined(__AVR_HAVE_RAMPZ__)
+ ldi r17, hi8(__ctors_start)
+ ldi r28, lo8(__ctors_end)
+ ldi r29, hi8(__ctors_end)
+ ldi r16, hh8(__ctors_end)
+ rjmp .L__do_global_ctors_start
+.L__do_global_ctors_loop:
+ sbiw r28, 2
+ sbc r16, __zero_reg__
+ mov_h r31, r29
+ mov_l r30, r28
+ out __RAMPZ__, r16
+ XCALL __tablejump_elpm__
+.L__do_global_ctors_start:
+ cpi r28, lo8(__ctors_start)
+ cpc r29, r17
+ ldi r24, hh8(__ctors_start)
+ cpc r16, r24
+ brne .L__do_global_ctors_loop
+#else
+ ldi r17, hi8(__ctors_start)
+ ldi r28, lo8(__ctors_end)
+ ldi r29, hi8(__ctors_end)
+ rjmp .L__do_global_ctors_start
+.L__do_global_ctors_loop:
+ sbiw r28, 2
+ mov_h r31, r29
+ mov_l r30, r28
+ XCALL __tablejump__
+.L__do_global_ctors_start:
+ cpi r28, lo8(__ctors_start)
+ cpc r29, r17
+ brne .L__do_global_ctors_loop
+#endif /* defined(__AVR_HAVE_RAMPZ__) */
+ENDF __do_global_ctors
+#endif /* L_ctors */
+
+#ifdef L_dtors
+ .section .fini6,"ax",@progbits
+DEFUN __do_global_dtors
+#if defined(__AVR_HAVE_RAMPZ__)
+ ldi r17, hi8(__dtors_end)
+ ldi r28, lo8(__dtors_start)
+ ldi r29, hi8(__dtors_start)
+ ldi r16, hh8(__dtors_start)
+ rjmp .L__do_global_dtors_start
+.L__do_global_dtors_loop:
+ sbiw r28, 2
+ sbc r16, __zero_reg__
+ mov_h r31, r29
+ mov_l r30, r28
+ out __RAMPZ__, r16
+ XCALL __tablejump_elpm__
+.L__do_global_dtors_start:
+ cpi r28, lo8(__dtors_end)
+ cpc r29, r17
+ ldi r24, hh8(__dtors_end)
+ cpc r16, r24
+ brne .L__do_global_dtors_loop
+#else
+ ldi r17, hi8(__dtors_end)
+ ldi r28, lo8(__dtors_start)
+ ldi r29, hi8(__dtors_start)
+ rjmp .L__do_global_dtors_start
+.L__do_global_dtors_loop:
+ mov_h r31, r29
+ mov_l r30, r28
+ XCALL __tablejump__
+ adiw r28, 2
+.L__do_global_dtors_start:
+ cpi r28, lo8(__dtors_end)
+ cpc r29, r17
+ brne .L__do_global_dtors_loop
+#endif /* defined(__AVR_HAVE_RAMPZ__) */
+ENDF __do_global_dtors
+#endif /* L_dtors */
+
+.section .text.libgcc, "ax", @progbits
+
+#ifdef L_tablejump_elpm
+DEFUN __tablejump_elpm__
+#if defined (__AVR_HAVE_ELPM__)
+#if defined (__AVR_HAVE_LPMX__)
+ elpm __tmp_reg__, Z+
+ elpm r31, Z
+ mov r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ eijmp
+#else
+ ijmp
+#endif
+
+#else
+ elpm
+ adiw r30, 1
+ push r0
+ elpm
+ push r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ in __tmp_reg__, __EIND__
+ push __tmp_reg__
+#endif
+ ret
+#endif
+#endif /* defined (__AVR_HAVE_ELPM__) */
+ENDF __tablejump_elpm__
+#endif /* defined (L_tablejump_elpm) */
+
+
+.section .text.libgcc.builtins, "ax", @progbits
+
+/**********************************
+ * Find first set Bit (ffs)
+ **********************************/
+
+#if defined (L_ffssi2)
+;; find first set bit
+;; r25:r24 = ffs32 (r25:r22)
+;; clobbers: r22, r26
+DEFUN __ffssi2
+ clr r26
+ tst r22
+ brne 1f
+ subi r26, -8
+ or r22, r23
+ brne 1f
+ subi r26, -8
+ or r22, r24
+ brne 1f
+ subi r26, -8
+ or r22, r25
+ brne 1f
+ ret
+1: mov r24, r22
+ XJMP __loop_ffsqi2
+ENDF __ffssi2
+#endif /* defined (L_ffssi2) */
+
+#if defined (L_ffshi2)
+;; find first set bit
+;; r25:r24 = ffs16 (r25:r24)
+;; clobbers: r26
+DEFUN __ffshi2
+ clr r26
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst r24
+ breq 2f
+#else
+ cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
+1: XJMP __loop_ffsqi2
+2: ldi r26, 8
+ or r24, r25
+ brne 1b
+ ret
+ENDF __ffshi2
+#endif /* defined (L_ffshi2) */
+
+#if defined (L_loop_ffsqi2)
+;; Helper for ffshi2, ffssi2
+;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
+;; r24 must be != 0
+;; clobbers: r26
+DEFUN __loop_ffsqi2
+ inc r26
+ lsr r24
+ brcc __loop_ffsqi2
+ mov r24, r26
+ clr r25
+ ret
+ENDF __loop_ffsqi2
+#endif /* defined (L_loop_ffsqi2) */
+
+
+/**********************************
+ * Count trailing Zeros (ctz)
+ **********************************/
+
+#if defined (L_ctzsi2)
+;; count trailing zeros
+;; r25:r24 = ctz32 (r25:r22)
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzsi2
+ XCALL __ffssi2
+ dec r24
+ ret
+ENDF __ctzsi2
+#endif /* defined (L_ctzsi2) */
+
+#if defined (L_ctzhi2)
+;; count trailing zeros
+;; r25:r24 = ctz16 (r25:r24)
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzhi2
+ XCALL __ffshi2
+ dec r24
+ ret
+ENDF __ctzhi2
+#endif /* defined (L_ctzhi2) */
+
+
+/**********************************
+ * Count leading Zeros (clz)
+ **********************************/
+
+#if defined (L_clzdi2)
+;; count leading zeros
+;; r25:r24 = clz64 (r25:r18)
+;; clobbers: r22, r23, r26
+DEFUN __clzdi2
+ XCALL __clzsi2
+ sbrs r24, 5
+ ret
+ mov_l r22, r18
+ mov_h r23, r19
+ mov_l r24, r20
+ mov_h r25, r21
+ XCALL __clzsi2
+ subi r24, -32
+ ret
+ENDF __clzdi2
+#endif /* defined (L_clzdi2) */
+
+#if defined (L_clzsi2)
+;; count leading zeros
+;; r25:r24 = clz32 (r25:r22)
+;; clobbers: r26
+DEFUN __clzsi2
+ XCALL __clzhi2
+ sbrs r24, 4
+ ret
+ mov_l r24, r22
+ mov_h r25, r23
+ XCALL __clzhi2
+ subi r24, -16
+ ret
+ENDF __clzsi2
+#endif /* defined (L_clzsi2) */
+
+#if defined (L_clzhi2)
+;; count leading zeros
+;; r25:r24 = clz16 (r25:r24)
+;; clobbers: r26
+DEFUN __clzhi2
+ clr r26
+ tst r25
+ brne 1f
+ subi r26, -8
+ or r25, r24
+ brne 1f
+ ldi r24, 16
+ ret
+1: cpi r25, 16
+ brsh 3f
+ subi r26, -3
+ swap r25
+2: inc r26
+3: lsl r25
+ brcc 2b
+ mov r24, r26
+ clr r25
+ ret
+ENDF __clzhi2
+#endif /* defined (L_clzhi2) */
+
+
+/**********************************
+ * Parity
+ **********************************/
+
+#if defined (L_paritydi2)
+;; r25:r24 = parity64 (r25:r18)
+;; clobbers: __tmp_reg__
+DEFUN __paritydi2
+ eor r24, r18
+ eor r24, r19
+ eor r24, r20
+ eor r24, r21
+ XJMP __paritysi2
+ENDF __paritydi2
+#endif /* defined (L_paritydi2) */
+
+#if defined (L_paritysi2)
+;; r25:r24 = parity32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __paritysi2
+ eor r24, r22
+ eor r24, r23
+ XJMP __parityhi2
+ENDF __paritysi2
+#endif /* defined (L_paritysi2) */
+
+#if defined (L_parityhi2)
+;; r25:r24 = parity16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityhi2
+ eor r24, r25
+;; FALLTHRU
+ENDF __parityhi2
+
+;; r25:r24 = parity8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityqi2
+ ;; parity is in r24[0..7]
+ mov __tmp_reg__, r24
+ swap __tmp_reg__
+ eor r24, __tmp_reg__
+ ;; parity is in r24[0..3]
+ subi r24, -4
+ andi r24, -5
+ subi r24, -6
+ ;; parity is in r24[0,3]
+ sbrc r24, 3
+ inc r24
+ ;; parity is in r24[0]
+ andi r24, 1
+ clr r25
+ ret
+ENDF __parityqi2
+#endif /* defined (L_parityhi2) */
+
+
+/**********************************
+ * Population Count
+ **********************************/
+
+#if defined (L_popcounthi2)
+;; population count
+;; r25:r24 = popcount16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcounthi2
+ XCALL __popcountqi2
+ push r24
+ mov r24, r25
+ XCALL __popcountqi2
+ clr r25
+ ;; FALLTHRU
+ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+ pop __tmp_reg__
+ add r24, __tmp_reg__
+ ret
+ENDF __popcounthi2_tail
+#endif /* defined (L_popcounthi2) */
+
+#if defined (L_popcountsi2)
+;; population count
+;; r25:r24 = popcount32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __popcountsi2
+ XCALL __popcounthi2
+ push r24
+ mov_l r24, r22
+ mov_h r25, r23
+ XCALL __popcounthi2
+ XJMP __popcounthi2_tail
+ENDF __popcountsi2
+#endif /* defined (L_popcountsi2) */
+
+#if defined (L_popcountdi2)
+;; population count
+;; r25:r24 = popcount64 (r25:r18)
+;; clobbers: r22, r23, __tmp_reg__
+DEFUN __popcountdi2
+ XCALL __popcountsi2
+ push r24
+ mov_l r22, r18
+ mov_h r23, r19
+ mov_l r24, r20
+ mov_h r25, r21
+ XCALL __popcountsi2
+ XJMP __popcounthi2_tail
+ENDF __popcountdi2
+#endif /* defined (L_popcountdi2) */
+
+#if defined (L_popcountqi2)
+;; population count
+;; r24 = popcount8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcountqi2
+ mov __tmp_reg__, r24
+ andi r24, 1
+ lsr __tmp_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __tmp_reg__
+ ret
+ENDF __popcountqi2
+#endif /* defined (L_popcountqi2) */
+
+
+/**********************************
+ * Swap bytes
+ **********************************/
+
+;; swap two registers with different register number
+.macro bswap a, b
+ eor \a, \b
+ eor \b, \a
+ eor \a, \b
+.endm
+
+#if defined (L_bswapsi2)
+;; swap bytes
+;; r25:r22 = bswap32 (r25:r22)
+DEFUN __bswapsi2
+ bswap r22, r25
+ bswap r23, r24
+ ret
+ENDF __bswapsi2
+#endif /* defined (L_bswapsi2) */
+
+#if defined (L_bswapdi2)
+;; swap bytes
+;; r25:r18 = bswap64 (r25:r18)
+DEFUN __bswapdi2
+ bswap r18, r25
+ bswap r19, r24
+ bswap r20, r23
+ bswap r21, r22
+ ret
+ENDF __bswapdi2
+#endif /* defined (L_bswapdi2) */
+
+
+/**********************************
+ * 64-bit shifts
+ **********************************/
+
+#if defined (L_ashrdi3)
+;; Arithmetic shift right
+;; r25:r18 = ashr64 (r25:r18, r17:r16)
+DEFUN __ashrdi3
+ push r16
+ andi r16, 63
+ breq 2f
+1: asr r25
+ ror r24
+ ror r23
+ ror r22
+ ror r21
+ ror r20
+ ror r19
+ ror r18
+ dec r16
+ brne 1b
+2: pop r16
+ ret
+ENDF __ashrdi3
+#endif /* defined (L_ashrdi3) */
+
+#if defined (L_lshrdi3)
+;; Logic shift right
+;; r25:r18 = lshr64 (r25:r18, r17:r16)
+DEFUN __lshrdi3
+ push r16
+ andi r16, 63
+ breq 2f
+1: lsr r25
+ ror r24
+ ror r23
+ ror r22
+ ror r21
+ ror r20
+ ror r19
+ ror r18
+ dec r16
+ brne 1b
+2: pop r16
+ ret
+ENDF __lshrdi3
+#endif /* defined (L_lshrdi3) */
+
+#if defined (L_ashldi3)
+;; Shift left
+;; r25:r18 = ashl64 (r25:r18, r17:r16)
+DEFUN __ashldi3
+ push r16
+ andi r16, 63
+ breq 2f
+1: lsl r18
+ rol r19
+ rol r20
+ rol r21
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+ dec r16
+ brne 1b
+2: pop r16
+ ret
+ENDF __ashldi3
+#endif /* defined (L_ashldi3) */
+
+
+.section .text.libgcc.fmul, "ax", @progbits
+
+/***********************************************************/
+;;; Softmul versions of FMUL, FMULS and FMULSU to implement
+;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
+/***********************************************************/
+
+#define A1 24
+#define B1 25
+#define C0 22
+#define C1 23
+#define A0 __tmp_reg__
+
+#ifdef L_fmuls
+;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmuls
+ ;; A0.7 = negate result?
+ mov A0, A1
+ eor A0, B1
+ ;; B1 = |B1|
+ sbrc B1, 7
+ neg B1
+ XJMP __fmulsu_exit
+ENDF __fmuls
+#endif /* L_fmuls */
+
+#ifdef L_fmulsu
+;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmulsu
+ ;; A0.7 = negate result?
+ mov A0, A1
+;; FALLTHRU
+ENDF __fmulsu
+
+;; Helper for __fmuls and __fmulsu
+DEFUN __fmulsu_exit
+ ;; A1 = |A1|
+ sbrc A1, 7
+ neg A1
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst A0
+ brmi 1f
+#else
+ sbrs A0, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XJMP __fmul
+1: XCALL __fmul
+ ;; C = -C iff A0.7 = 1
+ com C1
+ neg C0
+ sbci C1, -1
+ ret
+ENDF __fmulsu_exit
+#endif /* L_fmulsu */
+
+
+#ifdef L_fmul
+;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmul
+ ; clear result
+ clr C0
+ clr C1
+ clr A0
+1: tst B1
+ ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
+2: brpl 3f
+ ;; C += A
+ add C0, A0
+ adc C1, A1
+3: ;; A >>= 1
+ lsr A1
+ ror A0
+ ;; B <<= 1
+ lsl B1
+ brne 2b
+ ret
+ENDF __fmul
+#endif /* L_fmul */
+
+#undef A0
+#undef A1
+#undef B1
+#undef C0
+#undef C1
diff --git a/libgcc/config/avr/t-avr b/libgcc/config/avr/t-avr
index 78829c76af4..f1c114a6dd6 100644
--- a/libgcc/config/avr/t-avr
+++ b/libgcc/config/avr/t-avr
@@ -1,3 +1,51 @@
+LIB1ASMSRC = avr/lib1funcs.S
+LIB1ASMFUNCS = \
+ _mulqi3 \
+ _mulhi3 \
+ _mulhisi3 \
+ _umulhisi3 \
+ _usmulhisi3 \
+ _muluhisi3 \
+ _mulshisi3 \
+ _mulsi3 \
+ _udivmodqi4 \
+ _divmodqi4 \
+ _udivmodhi4 \
+ _divmodhi4 \
+ _udivmodsi4 \
+ _divmodsi4 \
+ _prologue \
+ _epilogue \
+ _exit \
+ _cleanup \
+ _tablejump \
+ _tablejump_elpm \
+ _copy_data \
+ _clear_bss \
+ _ctors \
+ _dtors \
+ _ffssi2 \
+ _ffshi2 \
+ _loop_ffsqi2 \
+ _ctzsi2 \
+ _ctzhi2 \
+ _clzdi2 \
+ _clzsi2 \
+ _clzhi2 \
+ _paritydi2 \
+ _paritysi2 \
+ _parityhi2 \
+ _popcounthi2 \
+ _popcountsi2 \
+ _popcountdi2 \
+ _popcountqi2 \
+ _bswapsi2 \
+ _bswapdi2 \
+ _ashldi3 \
+ _ashrdi3 \
+ _lshrdi3 \
+ _fmul _fmuls _fmulsu
+
# Extra 16-bit integer functions.
intfuncs16 = _absvXX2 _addvXX3 _subvXX3 _mulvXX3 _negvXX2 _clrsbXX2
diff --git a/libgcc/config/bfin/lib1funcs.S b/libgcc/config/bfin/lib1funcs.S
new file mode 100644
index 00000000000..c7bf4f3f05c
--- /dev/null
+++ b/libgcc/config/bfin/lib1funcs.S
@@ -0,0 +1,211 @@
+/* libgcc functions for Blackfin.
+ Copyright (C) 2005, 2009 Free Software Foundation, Inc.
+ Contributed by Analog Devices.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifdef L_divsi3
+.text
+.align 2
+.global ___divsi3;
+.type ___divsi3, STT_FUNC;
+
+___divsi3:
+ [--SP]= RETS;
+ [--SP] = R7;
+
+ R2 = -R0;
+ CC = R0 < 0;
+ IF CC R0 = R2;
+ R7 = CC;
+
+ R2 = -R1;
+ CC = R1 < 0;
+ IF CC R1 = R2;
+ R2 = CC;
+ R7 = R7 ^ R2;
+
+ CALL ___udivsi3;
+
+ CC = R7;
+ R1 = -R0;
+ IF CC R0 = R1;
+
+ R7 = [SP++];
+ RETS = [SP++];
+ RTS;
+#endif
+
+#ifdef L_modsi3
+.align 2
+.global ___modsi3;
+.type ___modsi3, STT_FUNC;
+
+___modsi3:
+ [--SP] = RETS;
+ [--SP] = R0;
+ [--SP] = R1;
+ CALL ___divsi3;
+ R2 = [SP++];
+ R1 = [SP++];
+ R2 *= R0;
+ R0 = R1 - R2;
+ RETS = [SP++];
+ RTS;
+#endif
+
+#ifdef L_udivsi3
+.align 2
+.global ___udivsi3;
+.type ___udivsi3, STT_FUNC;
+
+___udivsi3:
+ P0 = 32;
+ LSETUP (0f, 1f) LC0 = P0;
+ /* upper half of dividend */
+ R3 = 0;
+0:
+ /* The first time round in the loop we shift in garbage, but since we
+ perform 33 shifts, it doesn't matter. */
+ R0 = ROT R0 BY 1;
+ R3 = ROT R3 BY 1;
+ R2 = R3 - R1;
+ CC = R3 < R1 (IU);
+1:
+ /* Last instruction of the loop. */
+ IF ! CC R3 = R2;
+
+ /* Shift in the last bit. */
+ R0 = ROT R0 BY 1;
+ /* R0 is the result, R3 contains the remainder. */
+ R0 = ~ R0;
+ RTS;
+#endif
+
+#ifdef L_umodsi3
+.align 2
+.global ___umodsi3;
+.type ___umodsi3, STT_FUNC;
+
+___umodsi3:
+ [--SP] = RETS;
+ CALL ___udivsi3;
+ R0 = R3;
+ RETS = [SP++];
+ RTS;
+#endif
+
+#ifdef L_umulsi3_highpart
+.align 2
+.global ___umulsi3_highpart;
+.type ___umulsi3_highpart, STT_FUNC;
+
+___umulsi3_highpart:
+ A1 = R1.L * R0.L (FU);
+ A1 = A1 >> 16;
+ A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
+ A1 += R0.L * R1.H (FU);
+ A1 = A1 >> 16;
+ A0 += A1;
+ R0 = A0 (FU);
+ RTS;
+#endif
+
+#ifdef L_smulsi3_highpart
+.align 2
+.global ___smulsi3_highpart;
+.type ___smulsi3_highpart, STT_FUNC;
+
+___smulsi3_highpart:
+ A1 = R1.L * R0.L (FU);
+ A1 = A1 >> 16;
+ A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
+ A1 += R1.H * R0.L (IS,M);
+ A1 = A1 >>> 16;
+ R0 = (A0 += A1);
+ RTS;
+#endif
+
+#ifdef L_muldi3
+.align 2
+.global ___muldi3;
+.type ___muldi3, STT_FUNC;
+
+/*
+ R1:R0 * R3:R2
+ = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
+[X] = (R1.h * R3.h) * 2^96
+[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80
+[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
+[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
+[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
+[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16
+[T4] + (R0.l * R2.l)
+
+ We can discard the first three lines marked "X" since we produce
+ only a 64 bit result. So, we need ten 16-bit multiplies.
+
+ Individual mul-acc results:
+[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
+[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
+[E3] = R0.l * R2.h + R2.l * R0.h
+[E4] = R0.l * R2.l
+
+ We also need to add high parts from lower-level results to higher ones:
+ E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
+
+ One interesting property is that all parts of the result that depend
+ on the sign of the multiplication are discarded. Those would be the
+ multiplications involving R1.h and R3.h, but only the top 16 bit of
+ the 32 bit result depend on the sign, and since R1.h and R3.h only
+ occur in E1, the top half of these results is cut off.
+ So, we can just use FU mode for all of the 16-bit multiplies, and
+ ignore questions of when to use mixed mode. */
+
+___muldi3:
+ /* [SP] technically is part of the caller's frame, but we can
+ use it as scratch space. */
+ A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */
+ A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */
+ A0 += A1; /* E1 */
+ R4 = A0.w;
+ A0 = R0.l * R3.l (FU); /* E2 */
+ A0 += R2.l * R1.l (FU); /* E2 */
+
+ A1 = R2.L * R0.L (FU); /* E4 */
+ R3 = A1.w;
+ A1 = A1 >> 16; /* E3c */
+ A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */
+ A1 += R0.L * R2.H (FU); /* E3c */
+ R0 = A1.w;
+ A1 = A1 >> 16; /* E2c */
+ A0 += A1; /* E2c */
+ R1 = A0.w;
+
+ /* low(result) = low(E3c):low(E4) */
+ R0 = PACK (R0.l, R3.l);
+ /* high(result) = E2c + (E1 << 16) */
+ R1.h = R1.h + R4.l (NS) || R4 = [SP];
+ RTS;
+
+.size ___muldi3, .-___muldi3
+#endif
diff --git a/libgcc/config/bfin/t-bfin b/libgcc/config/bfin/t-bfin
new file mode 100644
index 00000000000..bc2b088ffc1
--- /dev/null
+++ b/libgcc/config/bfin/t-bfin
@@ -0,0 +1,3 @@
+LIB1ASMSRC = bfin/lib1funcs.S
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart
+LIB1ASMFUNCS += _smulsi3_highpart
diff --git a/libgcc/config/c6x/lib1funcs.S b/libgcc/config/c6x/lib1funcs.S
new file mode 100644
index 00000000000..5bf34474bbd
--- /dev/null
+++ b/libgcc/config/c6x/lib1funcs.S
@@ -0,0 +1,438 @@
+/* Copyright 2010, 2011 Free Software Foundation, Inc.
+ Contributed by Bernd Schmidt <bernds@codesourcery.com>.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+ ;; ABI considerations for the divide functions
+ ;; The following registers are call-used:
+ ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
+ ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
+ ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
+ ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
+ ;;
+ ;; In our implementation, divu and remu are leaf functions,
+ ;; while both divi and remi call into divu.
+ ;; A0 is not clobbered by any of the functions.
+ ;; divu does not clobber B2 either, which is taken advantage of
+ ;; in remi.
+ ;; divi uses B5 to hold the original return address during
+ ;; the call to divu.
+ ;; remi uses B2 and A5 to hold the input values during the
+ ;; call to divu. It stores B3 in on the stack.
+
+#ifdef L_divsi3
+.text
+.align 2
+.global __c6xabi_divi
+.hidden __c6xabi_divi
+.type __c6xabi_divi, STT_FUNC
+
+__c6xabi_divi:
+ call .s2 __c6xabi_divu
+|| mv .d2 B3, B5
+|| cmpgt .l1 0, A4, A1
+|| cmpgt .l2 0, B4, B1
+
+ [A1] neg .l1 A4, A4
+|| [B1] neg .l2 B4, B4
+|| xor .s1x A1, B1, A1
+
+#ifdef _TMS320C6400
+ [A1] addkpc .s2 1f, B3, 4
+#else
+ [A1] mvkl .s2 1f, B3
+ [A1] mvkh .s2 1f, B3
+ nop 2
+#endif
+1:
+ neg .l1 A4, A4
+|| mv .l2 B3,B5
+|| ret .s2 B5
+ nop 5
+#endif
+
+#if defined L_modsi3 || defined L_divmodsi4
+.align 2
+#ifdef L_modsi3
+#define MOD_OUTPUT_REG A4
+.global __c6xabi_remi
+.hidden __c6xabi_remi
+.type __c6xabi_remi, STT_FUNC
+#else
+#define MOD_OUTPUT_REG A5
+.global __c6xabi_divremi
+.hidden __c6xabi_divremi
+.type __c6xabi_divremi, STT_FUNC
+__c6xabi_divremi:
+#endif
+
+__c6xabi_remi:
+ stw .d2t2 B3, *B15--[2]
+|| cmpgt .l1 0, A4, A1
+|| cmpgt .l2 0, B4, B2
+|| mv .s1 A4, A5
+|| call .s2 __c6xabi_divu
+
+ [A1] neg .l1 A4, A4
+|| [B2] neg .l2 B4, B4
+|| xor .s2x B2, A1, B0
+|| mv .d2 B4, B2
+
+#ifdef _TMS320C6400
+ [B0] addkpc .s2 1f, B3, 1
+ [!B0] addkpc .s2 2f, B3, 1
+ nop 2
+#else
+ [B0] mvkl .s2 1f,B3
+ [!B0] mvkl .s2 2f,B3
+
+ [B0] mvkh .s2 1f,B3
+ [!B0] mvkh .s2 2f,B3
+#endif
+1:
+ neg .l1 A4, A4
+2:
+ ldw .d2t2 *++B15[2], B3
+
+#ifdef _TMS320C6400_PLUS
+ mpy32 .m1x A4, B2, A6
+ nop 3
+ ret .s2 B3
+ sub .l1 A5, A6, MOD_OUTPUT_REG
+ nop 4
+#else
+ mpyu .m1x A4, B2, A1
+ nop 1
+ mpylhu .m1x A4, B2, A6
+|| mpylhu .m2x B2, A4, B2
+ nop 1
+ add .l1x A6, B2, A6
+|| ret .s2 B3
+ shl .s1 A6, 16, A6
+ add .d1 A6, A1, A6
+ sub .l1 A5, A6, MOD_OUTPUT_REG
+ nop 2
+#endif
+
+#endif
+
+#if defined L_udivsi3 || defined L_udivmodsi4
+.align 2
+#ifdef L_udivsi3
+.global __c6xabi_divu
+.hidden __c6xabi_divu
+.type __c6xabi_divu, STT_FUNC
+__c6xabi_divu:
+#else
+.global __c6xabi_divremu
+.hidden __c6xabi_divremu
+.type __c6xabi_divremu, STT_FUNC
+__c6xabi_divremu:
+#endif
+ ;; We use a series of up to 31 subc instructions. First, we find
+ ;; out how many leading zero bits there are in the divisor. This
+ ;; gives us both a shift count for aligning (shifting) the divisor
+ ;; to the, and the number of times we have to execute subc.
+
+ ;; At the end, we have both the remainder and most of the quotient
+ ;; in A4. The top bit of the quotient is computed first and is
+ ;; placed in A2.
+
+ ;; Return immediately if the dividend is zero. Setting B4 to 1
+ ;; is a trick to allow us to leave the following insns in the jump
+ ;; delay slot without affecting the result.
+ mv .s2x A4, B1
+
+#ifndef _TMS320C6400
+[!b1] mvk .s2 1, B4
+#endif
+[b1] lmbd .l2 1, B4, B1
+||[!b1] b .s2 B3 ; RETURN A
+#ifdef _TMS320C6400
+||[!b1] mvk .d2 1, B4
+#endif
+#ifdef L_udivmodsi4
+||[!b1] zero .s1 A5
+#endif
+ mv .l1x B1, A6
+|| shl .s2 B4, B1, B4
+
+ ;; The loop performs a maximum of 28 steps, so we do the
+ ;; first 3 here.
+ cmpltu .l1x A4, B4, A2
+[!A2] sub .l1x A4, B4, A4
+|| shru .s2 B4, 1, B4
+|| xor .s1 1, A2, A2
+
+ shl .s1 A2, 31, A2
+|| [b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+
+ ;; RETURN A may happen here (note: must happen before the next branch)
+0:
+ cmpgt .l2 B1, 7, B0
+|| [b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+|| [b0] b .s1 0b
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+ ;; loop backwards branch happens here
+
+ ret .s2 B3
+|| mvk .s1 32, A1
+ sub .l1 A1, A6, A6
+#ifdef L_udivmodsi4
+|| extu .s1 A4, A6, A5
+#endif
+ shl .s1 A4, A6, A4
+ shru .s1 A4, 1, A4
+|| sub .l1 A6, 1, A6
+ or .l1 A2, A4, A4
+ shru .s1 A4, A6, A4
+ nop
+
+#endif
+
+#ifdef L_umodsi3
+.align 2
+.global __c6xabi_remu
+.hidden __c6xabi_remu
+.type __c6xabi_remu, STT_FUNC
+__c6xabi_remu:
+ ;; The ABI seems designed to prevent these functions calling each other,
+ ;; so we duplicate most of the divsi3 code here.
+ mv .s2x A4, B1
+#ifndef _TMS320C6400
+[!b1] mvk .s2 1, B4
+#endif
+ lmbd .l2 1, B4, B1
+||[!b1] b .s2 B3 ; RETURN A
+#ifdef _TMS320C6400
+||[!b1] mvk .d2 1, B4
+#endif
+
+ mv .l1x B1, A7
+|| shl .s2 B4, B1, B4
+
+ cmpltu .l1x A4, B4, A1
+[!a1] sub .l1x A4, B4, A4
+ shru .s2 B4, 1, B4
+
+0:
+ cmpgt .l2 B1, 7, B0
+|| [b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+ ;; RETURN A may happen here (note: must happen before the next branch)
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+|| [b0] b .s1 0b
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+ ;; loop backwards branch happens here
+
+ ret .s2 B3
+[b1] subc .l1x A4,B4,A4
+|| [b1] add .s2 -1, B1, B1
+[b1] subc .l1x A4,B4,A4
+
+ extu .s1 A4, A7, A4
+ nop 2
+#endif
+
+#if defined L_strasgi_64plus && defined _TMS320C6400_PLUS
+
+.align 2
+.global __c6xabi_strasgi_64plus
+.hidden __c6xabi_strasgi_64plus
+.type __c6xabi_strasgi_64plus, STT_FUNC
+__c6xabi_strasgi_64plus:
+ shru .s2x a6, 2, b31
+|| mv .s1 a4, a30
+|| mv .d2 b4, b30
+
+ add .s2 -4, b31, b31
+
+ sploopd 1
+|| mvc .s2 b31, ilc
+ ldw .d2t2 *b30++, b31
+ nop 4
+ mv .s1x b31,a31
+ spkernel 6, 0
+|| stw .d1t1 a31, *a30++
+
+ ret .s2 b3
+ nop 5
+#endif
+
+#ifdef L_strasgi
+.global __c6xabi_strasgi
+.type __c6xabi_strasgi, STT_FUNC
+__c6xabi_strasgi:
+ ;; This is essentially memcpy, with alignment known to be at least
+ ;; 4, and the size a multiple of 4 greater than or equal to 28.
+ ldw .d2t1 *B4++, A0
+|| mvk .s2 16, B1
+ ldw .d2t1 *B4++, A1
+|| mvk .s2 20, B2
+|| sub .d1 A6, 24, A6
+ ldw .d2t1 *B4++, A5
+ ldw .d2t1 *B4++, A7
+|| mv .l2x A6, B7
+ ldw .d2t1 *B4++, A8
+ ldw .d2t1 *B4++, A9
+|| mv .s2x A0, B5
+|| cmpltu .l2 B2, B7, B0
+
+0:
+ stw .d1t2 B5, *A4++
+||[b0] ldw .d2t1 *B4++, A0
+|| mv .s2x A1, B5
+|| mv .l2 B7, B6
+
+[b0] sub .d2 B6, 24, B7
+||[b0] b .s2 0b
+|| cmpltu .l2 B1, B6, B0
+
+[b0] ldw .d2t1 *B4++, A1
+|| stw .d1t2 B5, *A4++
+|| mv .s2x A5, B5
+|| cmpltu .l2 12, B6, B0
+
+[b0] ldw .d2t1 *B4++, A5
+|| stw .d1t2 B5, *A4++
+|| mv .s2x A7, B5
+|| cmpltu .l2 8, B6, B0
+
+[b0] ldw .d2t1 *B4++, A7
+|| stw .d1t2 B5, *A4++
+|| mv .s2x A8, B5
+|| cmpltu .l2 4, B6, B0
+
+[b0] ldw .d2t1 *B4++, A8
+|| stw .d1t2 B5, *A4++
+|| mv .s2x A9, B5
+|| cmpltu .l2 0, B6, B0
+
+[b0] ldw .d2t1 *B4++, A9
+|| stw .d1t2 B5, *A4++
+|| mv .s2x A0, B5
+|| cmpltu .l2 B2, B7, B0
+
+ ;; loop back branch happens here
+
+ cmpltu .l2 B1, B6, B0
+|| ret .s2 b3
+
+[b0] stw .d1t1 A1, *A4++
+|| cmpltu .l2 12, B6, B0
+[b0] stw .d1t1 A5, *A4++
+|| cmpltu .l2 8, B6, B0
+[b0] stw .d1t1 A7, *A4++
+|| cmpltu .l2 4, B6, B0
+[b0] stw .d1t1 A8, *A4++
+|| cmpltu .l2 0, B6, B0
+[b0] stw .d1t1 A9, *A4++
+
+ ;; return happens here
+
+#endif
+
+#ifdef _TMS320C6400_PLUS
+#ifdef L_push_rts
+.align 2
+.global __c6xabi_push_rts
+.hidden __c6xabi_push_rts
+.type __c6xabi_push_rts, STT_FUNC
+__c6xabi_push_rts:
+ stw .d2t2 B14, *B15--[2]
+ stdw .d2t1 A15:A14, *B15--
+|| b .s2x A3
+ stdw .d2t2 B13:B12, *B15--
+ stdw .d2t1 A13:A12, *B15--
+ stdw .d2t2 B11:B10, *B15--
+ stdw .d2t1 A11:A10, *B15--
+ stdw .d2t2 B3:B2, *B15--
+#endif
+
+#ifdef L_pop_rts
+.align 2
+.global __c6xabi_pop_rts
+.hidden __c6xabi_pop_rts
+.type __c6xabi_pop_rts, STT_FUNC
+__c6xabi_pop_rts:
+ lddw .d2t2 *++B15, B3:B2
+ lddw .d2t1 *++B15, A11:A10
+ lddw .d2t2 *++B15, B11:B10
+ lddw .d2t1 *++B15, A13:A12
+ lddw .d2t2 *++B15, B13:B12
+ lddw .d2t1 *++B15, A15:A14
+|| b .s2 B3
+ ldw .d2t2 *++B15[2], B14
+ nop 4
+#endif
+
+#ifdef L_call_stub
+.align 2
+.global __c6xabi_call_stub
+.type __c6xabi_call_stub, STT_FUNC
+__c6xabi_call_stub:
+ stw .d2t1 A2, *B15--[2]
+ stdw .d2t1 A7:A6, *B15--
+|| call .s2 B31
+ stdw .d2t1 A1:A0, *B15--
+ stdw .d2t2 B7:B6, *B15--
+ stdw .d2t2 B5:B4, *B15--
+ stdw .d2t2 B1:B0, *B15--
+ stdw .d2t2 B3:B2, *B15--
+|| addkpc .s2 1f, B3, 0
+1:
+ lddw .d2t2 *++B15, B3:B2
+ lddw .d2t2 *++B15, B1:B0
+ lddw .d2t2 *++B15, B5:B4
+ lddw .d2t2 *++B15, B7:B6
+ lddw .d2t1 *++B15, A1:A0
+ lddw .d2t1 *++B15, A7:A6
+|| b .s2 B3
+ ldw .d2t1 *++B15[2], A2
+ nop 4
+#endif
+
+#endif
+
diff --git a/libgcc/config/c6x/t-elf b/libgcc/config/c6x/t-elf
index 99d0cd2d5ca..e01c4109e52 100644
--- a/libgcc/config/c6x/t-elf
+++ b/libgcc/config/c6x/t-elf
@@ -1,6 +1,11 @@
# Cannot use default rules due to $(CRTSTUFF_T_CFLAGS).
CUSTOM_CRTIN = yes
+LIB1ASMSRC = c6x/lib1funcs.S
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _udivmodsi4 _divmodsi4
+LIB1ASMFUNCS += _strasgi _strasgi_64plus _clzsi2 _clzdi2 _clz
+LIB1ASMFUNCS += _push_rts _pop_rts _call_stub
+
# Assemble startup files.
crti.o: $(srcdir)/config/c6x/crti.S
$(crt_compile) -c $(CRTSTUFF_T_CFLAGS) $<
diff --git a/libgcc/config/fr30/lib1funcs.S b/libgcc/config/fr30/lib1funcs.S
new file mode 100644
index 00000000000..7c63453123a
--- /dev/null
+++ b/libgcc/config/fr30/lib1funcs.S
@@ -0,0 +1,115 @@
+/* libgcc routines for the FR30.
+ Copyright (C) 1998, 1999, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+ .macro FUNC_START name
+ .text
+ .globl __\name
+ .type __\name, @function
+__\name:
+ .endm
+
+ .macro FUNC_END name
+ .size __\name, . - __\name
+ .endm
+
+ .macro DIV_BODY reg number
+ .if \number
+ DIV_BODY \reg, "\number - 1"
+ div1 \reg
+ .endif
+ .endm
+
+#ifdef L_udivsi3
+FUNC_START udivsi3
+ ;; Perform an unsiged division of r4 / r5 and place the result in r4.
+ ;; Does not handle overflow yet...
+ mov r4, mdl
+ div0u r5
+ DIV_BODY r5 32
+ mov mdl, r4
+ ret
+FUNC_END udivsi3
+#endif /* L_udivsi3 */
+
+#ifdef L_divsi3
+FUNC_START divsi3
+ ;; Perform a siged division of r4 / r5 and place the result in r4.
+ ;; Does not handle overflow yet...
+ mov r4, mdl
+ div0s r5
+ DIV_BODY r5 32
+ div2 r5
+ div3
+ div4s
+ mov mdl, r4
+ ret
+FUNC_END divsi3
+#endif /* L_divsi3 */
+
+#ifdef L_umodsi3
+FUNC_START umodsi3
+ ;; Perform an unsiged division of r4 / r5 and places the remainder in r4.
+ ;; Does not handle overflow yet...
+ mov r4, mdl
+ div0u r5
+ DIV_BODY r5 32
+ mov mdh, r4
+ ret
+FUNC_END umodsi3
+#endif /* L_umodsi3 */
+
+#ifdef L_modsi3
+FUNC_START modsi3
+ ;; Perform a siged division of r4 / r5 and place the remainder in r4.
+ ;; Does not handle overflow yet...
+ mov r4, mdl
+ div0s r5
+ DIV_BODY r5 32
+ div2 r5
+ div3
+ div4s
+ mov mdh, r4
+ ret
+FUNC_END modsi3
+#endif /* L_modsi3 */
+
+#ifdef L_negsi2
+FUNC_START negsi2
+ ldi:8 #0, r0
+ sub r4, r0
+ mov r0, r4
+ ret
+FUNC_END negsi2
+#endif /* L_negsi2 */
+
+#ifdef L_one_cmplsi2
+FUNC_START one_cmplsi2
+ ldi:8 #0xff, r0
+ extsb r0
+ eor r0, r4
+ ret
+FUNC_END one_cmplsi2
+#endif /* L_one_cmplsi2 */
+
+
diff --git a/libgcc/config/fr30/t-fr30 b/libgcc/config/fr30/t-fr30
new file mode 100644
index 00000000000..ee5ed9a127e
--- /dev/null
+++ b/libgcc/config/fr30/t-fr30
@@ -0,0 +1,2 @@
+LIB1ASMSRC = fr30/lib1funcs.S
+LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3
diff --git a/libgcc/config/frv/lib1funcs.S b/libgcc/config/frv/lib1funcs.S
new file mode 100644
index 00000000000..d1ffcab6133
--- /dev/null
+++ b/libgcc/config/frv/lib1funcs.S
@@ -0,0 +1,269 @@
+/* Library functions.
+ Copyright (C) 2000, 2003, 2008, 2009 Free Software Foundation, Inc.
+ Contributed by Red Hat, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software ; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <frv-asm.h>
+
+
+#ifdef L_cmpll
+/* icc0 = __cmpll (long long a, long long b) */
+
+ .globl EXT(__cmpll)
+ .type EXT(__cmpll),@function
+ .text
+ .p2align 4
+EXT(__cmpll):
+ cmp gr8, gr10, icc0
+ ckeq icc0, cc4
+ P(ccmp) gr9, gr11, cc4, 1
+ ret
+.Lend:
+ .size EXT(__cmpll),.Lend-EXT(__cmpll)
+#endif /* L_cmpll */
+
+#ifdef L_cmpf
+/* icc0 = __cmpf (float a, float b) */
+/* Note, because this function returns the result in ICC0, it means it can't
+ handle NaNs. */
+
+ .globl EXT(__cmpf)
+ .type EXT(__cmpf),@function
+ .text
+ .p2align 4
+EXT(__cmpf):
+#ifdef __FRV_HARD_FLOAT__ /* floating point instructions available */
+ movgf gr8, fr0
+ P(movgf) gr9, fr1
+ setlos #1, gr8
+ fcmps fr0, fr1, fcc0
+ P(fcklt) fcc0, cc0
+ fckeq fcc0, cc1
+ csub gr0, gr8, gr8, cc0, 1
+ cmov gr0, gr8, cc1, 1
+ cmpi gr8, 0, icc0
+ ret
+#else /* no floating point instructions available */
+ movsg lr, gr4
+ addi sp, #-16, sp
+ sti gr4, @(sp, 8)
+ st fp, @(sp, gr0)
+ mov sp, fp
+ call EXT(__cmpsf2)
+ cmpi gr8, #0, icc0
+ ldi @(sp, 8), gr4
+ movgs gr4, lr
+ ld @(sp,gr0), fp
+ addi sp, #16, sp
+ ret
+#endif
+.Lend:
+ .size EXT(__cmpf),.Lend-EXT(__cmpf)
+#endif
+
+#ifdef L_cmpd
+/* icc0 = __cmpd (double a, double b) */
+/* Note, because this function returns the result in ICC0, it means it can't
+ handle NaNs. */
+
+ .globl EXT(__cmpd)
+ .type EXT(__cmpd),@function
+ .text
+ .p2align 4
+EXT(__cmpd):
+ movsg lr, gr4
+ addi sp, #-16, sp
+ sti gr4, @(sp, 8)
+ st fp, @(sp, gr0)
+ mov sp, fp
+ call EXT(__cmpdf2)
+ cmpi gr8, #0, icc0
+ ldi @(sp, 8), gr4
+ movgs gr4, lr
+ ld @(sp,gr0), fp
+ addi sp, #16, sp
+ ret
+.Lend:
+ .size EXT(__cmpd),.Lend-EXT(__cmpd)
+#endif
+
+#ifdef L_addll
+/* gr8,gr9 = __addll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+ ABI program calls it. */
+
+ .globl EXT(__addll)
+ .type EXT(__addll),@function
+ .text
+ .p2align
+EXT(__addll):
+ addcc gr9, gr11, gr9, icc0
+ addx gr8, gr10, gr8, icc0
+ ret
+.Lend:
+ .size EXT(__addll),.Lend-EXT(__addll)
+#endif
+
+#ifdef L_subll
+/* gr8,gr9 = __subll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+ ABI program calls it. */
+
+ .globl EXT(__subll)
+ .type EXT(__subll),@function
+ .text
+ .p2align 4
+EXT(__subll):
+ subcc gr9, gr11, gr9, icc0
+ subx gr8, gr10, gr8, icc0
+ ret
+.Lend:
+ .size EXT(__subll),.Lend-EXT(__subll)
+#endif
+
+#ifdef L_andll
+/* gr8,gr9 = __andll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+ ABI program calls it. */
+
+ .globl EXT(__andll)
+ .type EXT(__andll),@function
+ .text
+ .p2align 4
+EXT(__andll):
+ P(and) gr9, gr11, gr9
+ P2(and) gr8, gr10, gr8
+ ret
+.Lend:
+ .size EXT(__andll),.Lend-EXT(__andll)
+#endif
+
+#ifdef L_orll
+/* gr8,gr9 = __orll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+ ABI program calls it. */
+
+ .globl EXT(__orll)
+ .type EXT(__orll),@function
+ .text
+ .p2align 4
+EXT(__orll):
+ P(or) gr9, gr11, gr9
+ P2(or) gr8, gr10, gr8
+ ret
+.Lend:
+ .size EXT(__orll),.Lend-EXT(__orll)
+#endif
+
+#ifdef L_xorll
+/* gr8,gr9 = __xorll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+ ABI program calls it. */
+
+ .globl EXT(__xorll)
+ .type EXT(__xorll),@function
+ .text
+ .p2align 4
+EXT(__xorll):
+ P(xor) gr9, gr11, gr9
+ P2(xor) gr8, gr10, gr8
+ ret
+.Lend:
+ .size EXT(__xorll),.Lend-EXT(__xorll)
+#endif
+
+#ifdef L_notll
+/* gr8,gr9 = __notll (long long a) */
+/* Note, gcc will never call this function, but it is present in case an
+ ABI program calls it. */
+
+ .globl EXT(__notll)
+ .type EXT(__notll),@function
+ .text
+ .p2align 4
+EXT(__notll):
+ P(not) gr9, gr9
+ P2(not) gr8, gr8
+ ret
+.Lend:
+ .size EXT(__notll),.Lend-EXT(__notll)
+#endif
+
+#ifdef L_cmov
+/* (void) __cmov (char *dest, const char *src, size_t len) */
+/*
+ * void __cmov (char *dest, const char *src, size_t len)
+ * {
+ * size_t i;
+ *
+ * if (dest < src || dest > src+len)
+ * {
+ * for (i = 0; i < len; i++)
+ * dest[i] = src[i];
+ * }
+ * else
+ * {
+ * while (len-- > 0)
+ * dest[len] = src[len];
+ * }
+ * }
+ */
+
+ .globl EXT(__cmov)
+ .type EXT(__cmov),@function
+ .text
+ .p2align 4
+EXT(__cmov):
+ P(cmp) gr8, gr9, icc0
+ add gr9, gr10, gr4
+ P(cmp) gr8, gr4, icc1
+ bc icc0, 0, .Lfwd
+ bls icc1, 0, .Lback
+.Lfwd:
+ /* move bytes in a forward direction */
+ P(setlos) #0, gr5
+ cmp gr0, gr10, icc0
+ P(subi) gr9, #1, gr9
+ P2(subi) gr8, #1, gr8
+ bnc icc0, 0, .Lret
+.Lfloop:
+ /* forward byte move loop */
+ addi gr5, #1, gr5
+ P(ldsb) @(gr9, gr5), gr4
+ cmp gr5, gr10, icc0
+ P(stb) gr4, @(gr8, gr5)
+ bc icc0, 0, .Lfloop
+ ret
+.Lbloop:
+ /* backward byte move loop body */
+ ldsb @(gr9,gr10),gr4
+ stb gr4,@(gr8,gr10)
+.Lback:
+ P(cmpi) gr10, #0, icc0
+ addi gr10, #-1, gr10
+ bne icc0, 0, .Lbloop
+.Lret:
+ ret
+.Lend:
+ .size EXT(__cmov),.Lend-EXT(__cmov)
+#endif
diff --git a/libgcc/config/frv/t-frv b/libgcc/config/frv/t-frv
index b364a5a25b9..9773722d8e7 100644
--- a/libgcc/config/frv/t-frv
+++ b/libgcc/config/frv/t-frv
@@ -1,3 +1,6 @@
+LIB1ASMSRC = frv/lib1funcs.S
+LIB1ASMFUNCS = _cmpll _cmpf _cmpd _addll _subll _andll _orll _xorll _notll _cmov
+
# Compile two additional files that are linked with every program
# linked using GCC on systems using COFF or ELF, for the sake of C++
# constructors.
diff --git a/libgcc/config/h8300/lib1funcs.S b/libgcc/config/h8300/lib1funcs.S
new file mode 100644
index 00000000000..1b75b73269d
--- /dev/null
+++ b/libgcc/config/h8300/lib1funcs.S
@@ -0,0 +1,838 @@
+;; libgcc routines for the Renesas H8/300 CPU.
+;; Contributed by Steve Chamberlain <sac@cygnus.com>
+;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
+
+/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009
+ Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Assembler register definitions. */
+
+#define A0 r0
+#define A0L r0l
+#define A0H r0h
+
+#define A1 r1
+#define A1L r1l
+#define A1H r1h
+
+#define A2 r2
+#define A2L r2l
+#define A2H r2h
+
+#define A3 r3
+#define A3L r3l
+#define A3H r3h
+
+#define S0 r4
+#define S0L r4l
+#define S0H r4h
+
+#define S1 r5
+#define S1L r5l
+#define S1H r5h
+
+#define S2 r6
+#define S2L r6l
+#define S2H r6h
+
+#ifdef __H8300__
+#define PUSHP push
+#define POPP pop
+
+#define A0P r0
+#define A1P r1
+#define A2P r2
+#define A3P r3
+#define S0P r4
+#define S1P r5
+#define S2P r6
+#endif
+
+#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
+#define PUSHP push.l
+#define POPP pop.l
+
+#define A0P er0
+#define A1P er1
+#define A2P er2
+#define A3P er3
+#define S0P er4
+#define S1P er5
+#define S2P er6
+
+#define A0E e0
+#define A1E e1
+#define A2E e2
+#define A3E e3
+#endif
+
+#ifdef __H8300H__
+#ifdef __NORMAL_MODE__
+ .h8300hn
+#else
+ .h8300h
+#endif
+#endif
+
+#ifdef __H8300S__
+#ifdef __NORMAL_MODE__
+ .h8300sn
+#else
+ .h8300s
+#endif
+#endif
+#ifdef __H8300SX__
+#ifdef __NORMAL_MODE__
+ .h8300sxn
+#else
+ .h8300sx
+#endif
+#endif
+
+#ifdef L_cmpsi2
+#ifdef __H8300__
+ .section .text
+ .align 2
+ .global ___cmpsi2
+___cmpsi2:
+ cmp.w A0,A2
+ bne .L2
+ cmp.w A1,A3
+ bne .L4
+ mov.w #1,A0
+ rts
+.L2:
+ bgt .L5
+.L3:
+ mov.w #2,A0
+ rts
+.L4:
+ bls .L3
+.L5:
+ sub.w A0,A0
+ rts
+ .end
+#endif
+#endif /* L_cmpsi2 */
+
+#ifdef L_ucmpsi2
+#ifdef __H8300__
+ .section .text
+ .align 2
+ .global ___ucmpsi2
+___ucmpsi2:
+ cmp.w A0,A2
+ bne .L2
+ cmp.w A1,A3
+ bne .L4
+ mov.w #1,A0
+ rts
+.L2:
+ bhi .L5
+.L3:
+ mov.w #2,A0
+ rts
+.L4:
+ bls .L3
+.L5:
+ sub.w A0,A0
+ rts
+ .end
+#endif
+#endif /* L_ucmpsi2 */
+
+#ifdef L_divhi3
+
+;; HImode divides for the H8/300.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+; general purpose normalize routine
+;
+; divisor in A0
+; dividend in A1
+; turns both into +ve numbers, and leaves what the answer sign
+; should be in A2L
+
+#ifdef __H8300__
+ .section .text
+ .align 2
+divnorm:
+ or A0H,A0H ; is divisor > 0
+ stc ccr,A2L
+ bge _lab1
+ not A0H ; no - then make it +ve
+ not A0L
+ adds #1,A0
+_lab1: or A1H,A1H ; look at dividend
+ bge _lab2
+ not A1H ; it is -ve, make it positive
+ not A1L
+ adds #1,A1
+ xor #0x8,A2L; and toggle sign of result
+_lab2: rts
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+ or A0H,A0H ; is divisor > 0
+ stc ccr,A2L
+ bge _lab7
+ not A0H ; no - then make it +ve
+ not A0L
+ adds #1,A0
+_lab7: or A1H,A1H ; look at dividend
+ bge _lab8
+ not A1H ; it is -ve, make it positive
+ not A1L
+ adds #1,A1
+_lab8: rts
+
+; A0=A0/A1 signed
+
+ .global ___divhi3
+___divhi3:
+ bsr divnorm
+ bsr ___udivhi3
+negans: btst #3,A2L ; should answer be negative ?
+ beq _lab4
+ not A0H ; yes, so make it so
+ not A0L
+ adds #1,A0
+_lab4: rts
+
+; A0=A0%A1 signed
+
+ .global ___modhi3
+___modhi3:
+ bsr modnorm
+ bsr ___udivhi3
+ mov A3,A0
+ bra negans
+
+; A0=A0%A1 unsigned
+
+ .global ___umodhi3
+___umodhi3:
+ bsr ___udivhi3
+ mov A3,A0
+ rts
+
+; A0=A0/A1 unsigned
+; A3=A0%A1 unsigned
+; A2H trashed
+; D high 8 bits of denom
+; d low 8 bits of denom
+; N high 8 bits of num
+; n low 8 bits of num
+; M high 8 bits of mod
+; m low 8 bits of mod
+; Q high 8 bits of quot
+; q low 8 bits of quot
+; P preserve
+
+; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
+; see how to partition up the expression.
+
+ .global ___udivhi3
+___udivhi3:
+ ; A0 A1 A2 A3
+ ; Nn Dd P
+ sub.w A3,A3 ; Nn Dd xP 00
+ or A1H,A1H
+ bne divlongway
+ or A0H,A0H
+ beq _lab6
+
+; we know that D == 0 and N is != 0
+ mov.b A0H,A3L ; Nn Dd xP 0N
+ divxu A1L,A3 ; MQ
+ mov.b A3L,A0H ; Q
+; dealt with N, do n
+_lab6: mov.b A0L,A3L ; n
+ divxu A1L,A3 ; mq
+ mov.b A3L,A0L ; Qq
+ mov.b A3H,A3L ; m
+ mov.b #0x0,A3H ; Qq 0m
+ rts
+
+; D != 0 - which means the denominator is
+; loop around to get the result.
+
+divlongway:
+ mov.b A0H,A3L ; Nn Dd xP 0N
+ mov.b #0x0,A0H ; high byte of answer has to be zero
+ mov.b #0x8,A2H ; 8
+div8: add.b A0L,A0L ; n*=2
+ rotxl A3L ; Make remainder bigger
+ rotxl A3H
+ sub.w A1,A3 ; Q-=N
+ bhs setbit ; set a bit ?
+ add.w A1,A3 ; no : too far , Q+=N
+
+ dec A2H
+ bne div8 ; next bit
+ rts
+
+setbit: inc A0L ; do insert bit
+ dec A2H
+ bne div8 ; next bit
+ rts
+
+#endif /* __H8300__ */
+#endif /* L_divhi3 */
+
+#ifdef L_divsi3
+
+;; 4 byte integer divides for the H8/300.
+;;
+;; We have one routine which does all the work and lots of
+;; little ones which prepare the args and massage the sign.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+ .section .text
+ .align 2
+
+; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
+; This function is here to keep branch displacements small.
+
+#ifdef __H8300__
+
+divnorm:
+ mov.b A0H,A0H ; is the numerator -ve
+ stc ccr,S2L ; keep the sign in bit 3 of S2L
+ bge postive
+
+ ; negate arg
+ not A0H
+ not A1H
+ not A0L
+ not A1L
+
+ add #1,A1L
+ addx #0,A1H
+ addx #0,A0L
+ addx #0,A0H
+postive:
+ mov.b A2H,A2H ; is the denominator -ve
+ bge postive2
+ not A2L
+ not A2H
+ not A3L
+ not A3H
+ add.b #1,A3L
+ addx #0,A3H
+ addx #0,A2L
+ addx #0,A2H
+ xor.b #0x08,S2L ; toggle the result sign
+postive2:
+ rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+ mov.b A0H,A0H ; is the numerator -ve
+ stc ccr,S2L ; keep the sign in bit 3 of S2L
+ bge mpostive
+
+ ; negate arg
+ not A0H
+ not A1H
+ not A0L
+ not A1L
+
+ add #1,A1L
+ addx #0,A1H
+ addx #0,A0L
+ addx #0,A0H
+mpostive:
+ mov.b A2H,A2H ; is the denominator -ve
+ bge mpostive2
+ not A2L
+ not A2H
+ not A3L
+ not A3H
+ add.b #1,A3L
+ addx #0,A3H
+ addx #0,A2L
+ addx #0,A2H
+mpostive2:
+ rts
+
+#else /* __H8300H__ */
+
+divnorm:
+ mov.l A0P,A0P ; is the numerator -ve
+ stc ccr,S2L ; keep the sign in bit 3 of S2L
+ bge postive
+
+ neg.l A0P ; negate arg
+
+postive:
+ mov.l A1P,A1P ; is the denominator -ve
+ bge postive2
+
+ neg.l A1P ; negate arg
+ xor.b #0x08,S2L ; toggle the result sign
+
+postive2:
+ rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+ mov.l A0P,A0P ; is the numerator -ve
+ stc ccr,S2L ; keep the sign in bit 3 of S2L
+ bge mpostive
+
+ neg.l A0P ; negate arg
+
+mpostive:
+ mov.l A1P,A1P ; is the denominator -ve
+ bge mpostive2
+
+ neg.l A1P ; negate arg
+
+mpostive2:
+ rts
+
+#endif
+
+; numerator in A0/A1
+; denominator in A2/A3
+ .global ___modsi3
+___modsi3:
+#ifdef __H8300__
+ PUSHP S2P
+ PUSHP S0P
+ PUSHP S1P
+ bsr modnorm
+ bsr divmodsi4
+ mov S0,A0
+ mov S1,A1
+ bra exitdiv
+#else
+ PUSHP S2P
+ bsr modnorm
+ bsr ___udivsi3
+ mov.l er3,er0
+ bra exitdiv
+#endif
+
+ ;; H8/300H and H8S version of ___udivsi3 is defined later in
+ ;; the file.
+#ifdef __H8300__
+ .global ___udivsi3
+___udivsi3:
+ PUSHP S2P
+ PUSHP S0P
+ PUSHP S1P
+ bsr divmodsi4
+ bra reti
+#endif
+
+ .global ___umodsi3
+___umodsi3:
+#ifdef __H8300__
+ PUSHP S2P
+ PUSHP S0P
+ PUSHP S1P
+ bsr divmodsi4
+ mov S0,A0
+ mov S1,A1
+ bra reti
+#else
+ bsr ___udivsi3
+ mov.l er3,er0
+ rts
+#endif
+
+ .global ___divsi3
+___divsi3:
+#ifdef __H8300__
+ PUSHP S2P
+ PUSHP S0P
+ PUSHP S1P
+ jsr divnorm
+ jsr divmodsi4
+#else
+ PUSHP S2P
+ jsr divnorm
+ bsr ___udivsi3
+#endif
+
+ ; examine what the sign should be
+exitdiv:
+ btst #3,S2L
+ beq reti
+
+ ; should be -ve
+#ifdef __H8300__
+ not A0H
+ not A1H
+ not A0L
+ not A1L
+
+ add #1,A1L
+ addx #0,A1H
+ addx #0,A0L
+ addx #0,A0H
+#else /* __H8300H__ */
+ neg.l A0P
+#endif
+
+reti:
+#ifdef __H8300__
+ POPP S1P
+ POPP S0P
+#endif
+ POPP S2P
+ rts
+
+ ; takes A0/A1 numerator (A0P for H8/300H)
+ ; A2/A3 denominator (A1P for H8/300H)
+ ; returns A0/A1 quotient (A0P for H8/300H)
+ ; S0/S1 remainder (S0P for H8/300H)
+ ; trashes S2H
+
+#ifdef __H8300__
+
+divmodsi4:
+ sub.w S0,S0 ; zero play area
+ mov.w S0,S1
+ mov.b A2H,S2H
+ or A2L,S2H
+ or A3H,S2H
+ bne DenHighNonZero
+ mov.b A0H,A0H
+ bne NumByte0Zero
+ mov.b A0L,A0L
+ bne NumByte1Zero
+ mov.b A1H,A1H
+ bne NumByte2Zero
+ bra NumByte3Zero
+NumByte0Zero:
+ mov.b A0H,S1L
+ divxu A3L,S1
+ mov.b S1L,A0H
+NumByte1Zero:
+ mov.b A0L,S1L
+ divxu A3L,S1
+ mov.b S1L,A0L
+NumByte2Zero:
+ mov.b A1H,S1L
+ divxu A3L,S1
+ mov.b S1L,A1H
+NumByte3Zero:
+ mov.b A1L,S1L
+ divxu A3L,S1
+ mov.b S1L,A1L
+
+ mov.b S1H,S1L
+ mov.b #0x0,S1H
+ rts
+
+; have to do the divide by shift and test
+DenHighNonZero:
+ mov.b A0H,S1L
+ mov.b A0L,A0H
+ mov.b A1H,A0L
+ mov.b A1L,A1H
+
+ mov.b #0,A1L
+ mov.b #24,S2H ; only do 24 iterations
+
+nextbit:
+ add.w A1,A1 ; double the answer guess
+ rotxl A0L
+ rotxl A0H
+
+ rotxl S1L ; double remainder
+ rotxl S1H
+ rotxl S0L
+ rotxl S0H
+ sub.w A3,S1 ; does it all fit
+ subx A2L,S0L
+ subx A2H,S0H
+ bhs setone
+
+ add.w A3,S1 ; no, restore mistake
+ addx A2L,S0L
+ addx A2H,S0H
+
+ dec S2H
+ bne nextbit
+ rts
+
+setone:
+ inc A1L
+ dec S2H
+ bne nextbit
+ rts
+
+#else /* __H8300H__ */
+
+ ;; This function also computes the remainder and stores it in er3.
+ .global ___udivsi3
+___udivsi3:
+ mov.w A1E,A1E ; denominator top word 0?
+ bne DenHighNonZero
+
+ ; do it the easy way, see page 107 in manual
+ mov.w A0E,A2
+ extu.l A2P
+ divxu.w A1,A2P
+ mov.w A2E,A0E
+ divxu.w A1,A0P
+ mov.w A0E,A3
+ mov.w A2,A0E
+ extu.l A3P
+ rts
+
+ ; er0 = er0 / er1
+ ; er3 = er0 % er1
+ ; trashes er1 er2
+ ; expects er1 >= 2^16
+DenHighNonZero:
+ mov.l er0,er3
+ mov.l er1,er2
+#ifdef __H8300H__
+divmod_L21:
+ shlr.l er0
+ shlr.l er2 ; make divisor < 2^16
+ mov.w e2,e2
+ bne divmod_L21
+#else
+ shlr.l #2,er2 ; make divisor < 2^16
+ mov.w e2,e2
+ beq divmod_L22A
+divmod_L21:
+ shlr.l #2,er0
+divmod_L22:
+ shlr.l #2,er2 ; make divisor < 2^16
+ mov.w e2,e2
+ bne divmod_L21
+divmod_L22A:
+ rotxl.w r2
+ bcs divmod_L23
+ shlr.l er0
+ bra divmod_L24
+divmod_L23:
+ rotxr.w r2
+ shlr.l #2,er0
+divmod_L24:
+#endif
+ ;; At this point,
+ ;; er0 contains shifted dividend
+ ;; er1 contains divisor
+ ;; er2 contains shifted divisor
+ ;; er3 contains dividend, later remainder
+ divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
+ extu.l er0
+ beq divmod_L25
+ subs #1,er0 ; er0 = AQ - 1
+ mov.w e1,r2
+ mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
+ sub.w r2,e3 ; dividend - 65536 * er2
+ mov.w r1,r2
+ mulxu.w r0,er2 ; compute er3 = remainder (tentative)
+ sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
+divmod_L25:
+ cmp.l er1,er3 ; is divisor < remainder?
+ blo divmod_L26
+ adds #1,er0
+ sub.l er1,er3 ; correct the remainder
+divmod_L26:
+ rts
+
+#endif
+#endif /* L_divsi3 */
+
+#ifdef L_mulhi3
+
+;; HImode multiply.
+; The H8/300 only has an 8*8->16 multiply.
+; The answer is the same as:
+;
+; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
+; (we can ignore A1.h * A0.h cause that will all off the top)
+; A0 in
+; A1 in
+; A0 answer
+
+#ifdef __H8300__
+ .section .text
+ .align 2
+ .global ___mulhi3
+___mulhi3:
+ mov.b A1L,A2L ; A2l gets srcb.l
+ mulxu A0L,A2 ; A2 gets first sub product
+
+ mov.b A0H,A3L ; prepare for
+ mulxu A1L,A3 ; second sub product
+
+ add.b A3L,A2H ; sum first two terms
+
+ mov.b A1H,A3L ; third sub product
+ mulxu A0L,A3
+
+ add.b A3L,A2H ; almost there
+ mov.w A2,A0 ; that is
+ rts
+
+#endif
+#endif /* L_mulhi3 */
+
+#ifdef L_mulsi3
+
+;; SImode multiply.
+;;
+;; I think that shift and add may be sufficient for this. Using the
+;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
+;; the inner loop uses maybe 20 cycles + overhead, but terminates
+;; quickly on small args.
+;;
+;; A0/A1 src_a
+;; A2/A3 src_b
+;;
+;; while (a)
+;; {
+;; if (a & 1)
+;; r += b;
+;; a >>= 1;
+;; b <<= 1;
+;; }
+
+ .section .text
+ .align 2
+
+#ifdef __H8300__
+
+ .global ___mulsi3
+___mulsi3:
+ PUSHP S0P
+ PUSHP S1P
+
+ sub.w S0,S0
+ sub.w S1,S1
+
+ ; while (a)
+_top: mov.w A0,A0
+ bne _more
+ mov.w A1,A1
+ beq _done
+_more: ; if (a & 1)
+ bld #0,A1L
+ bcc _nobit
+ ; r += b
+ add.w A3,S1
+ addx A2L,S0L
+ addx A2H,S0H
+_nobit:
+ ; a >>= 1
+ shlr A0H
+ rotxr A0L
+ rotxr A1H
+ rotxr A1L
+
+ ; b <<= 1
+ add.w A3,A3
+ addx A2L,A2L
+ addx A2H,A2H
+ bra _top
+
+_done:
+ mov.w S0,A0
+ mov.w S1,A1
+ POPP S1P
+ POPP S0P
+ rts
+
+#else /* __H8300H__ */
+
+;
+; mulsi3 for H8/300H - based on Renesas SH implementation
+;
+; by Toshiyasu Morita
+;
+; Old code:
+;
+; 16b * 16b = 372 states (worst case)
+; 32b * 32b = 724 states (worst case)
+;
+; New code:
+;
+; 16b * 16b = 48 states
+; 16b * 32b = 72 states
+; 32b * 32b = 92 states
+;
+
+ .global ___mulsi3
+___mulsi3:
+ mov.w r1,r2 ; ( 2 states) b * d
+ mulxu r0,er2 ; (22 states)
+
+ mov.w e0,r3 ; ( 2 states) a * d
+ beq L_skip1 ; ( 4 states)
+ mulxu r1,er3 ; (22 states)
+ add.w r3,e2 ; ( 2 states)
+
+L_skip1:
+ mov.w e1,r3 ; ( 2 states) c * b
+ beq L_skip2 ; ( 4 states)
+ mulxu r0,er3 ; (22 states)
+ add.w r3,e2 ; ( 2 states)
+
+L_skip2:
+ mov.l er2,er0 ; ( 2 states)
+ rts ; (10 states)
+
+#endif
+#endif /* L_mulsi3 */
+#ifdef L_fixunssfsi_asm
+/* For the h8300 we use asm to save some bytes, to
+ allow more programs to fit into the tiny address
+ space. For the H8/300H and H8S, the C version is good enough. */
+#ifdef __H8300__
+/* We still treat NANs different than libgcc2.c, but then, the
+ behavior is undefined anyways. */
+ .global ___fixunssfsi
+___fixunssfsi:
+ cmp.b #0x4f,r0h
+ bge Large_num
+ jmp @___fixsfsi
+Large_num:
+ bhi L_huge_num
+ xor.b #0x80,A0L
+ bmi L_shift8
+L_huge_num:
+ mov.w #65535,A0
+ mov.w A0,A1
+ rts
+L_shift8:
+ mov.b A0L,A0H
+ mov.b A1H,A0L
+ mov.b A1L,A1H
+ mov.b #0,A1L
+ rts
+#endif
+#endif /* L_fixunssfsi_asm */
diff --git a/libgcc/config/h8300/t-h8300 b/libgcc/config/h8300/t-h8300
new file mode 100644
index 00000000000..4602ff8b9ef
--- /dev/null
+++ b/libgcc/config/h8300/t-h8300
@@ -0,0 +1,3 @@
+LIB1ASMSRC = h8300/lib1funcs.S
+LIB1ASMFUNCS = _cmpsi2 _ucmpsi2 _divhi3 _divsi3 _mulhi3 _mulsi3 \
+ _fixunssfsi_asm
diff --git a/libgcc/config/i386/cygwin.S b/libgcc/config/i386/cygwin.S
new file mode 100644
index 00000000000..8f9c486850e
--- /dev/null
+++ b/libgcc/config/i386/cygwin.S
@@ -0,0 +1,188 @@
+/* stuff needed for libgcc on win32.
+ *
+ * Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009, 2010
+ * Free Software Foundation, Inc.
+ * Written By Steve Chamberlain
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "auto-host.h"
+
+#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE
+ .cfi_sections .debug_frame
+# define cfi_startproc() .cfi_startproc
+# define cfi_endproc() .cfi_endproc
+# define cfi_adjust_cfa_offset(X) .cfi_adjust_cfa_offset X
+# define cfi_def_cfa_register(X) .cfi_def_cfa_register X
+# define cfi_register(D,S) .cfi_register D, S
+# ifdef _WIN64
+# define cfi_push(X) .cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0
+# define cfi_pop(X) .cfi_adjust_cfa_offset -8; .cfi_restore X
+# else
+# define cfi_push(X) .cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0
+# define cfi_pop(X) .cfi_adjust_cfa_offset -4; .cfi_restore X
+# endif
+#else
+# define cfi_startproc()
+# define cfi_endproc()
+# define cfi_adjust_cfa_offset(X)
+# define cfi_def_cfa_register(X)
+# define cfi_register(D,S)
+# define cfi_push(X)
+# define cfi_pop(X)
+#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */
+
+#ifdef L_chkstk
+/* Function prologue calls __chkstk to probe the stack when allocating more
+ than CHECK_STACK_LIMIT bytes in one go. Touching the stack at 4K
+ increments is necessary to ensure that the guard pages used
+ by the OS virtual memory manger are allocated in correct sequence. */
+
+ .global ___chkstk
+ .global __alloca
+#ifdef _WIN64
+/* __alloca is a normal function call, which uses %rcx as the argument. */
+ cfi_startproc()
+__alloca:
+ movq %rcx, %rax
+ /* FALLTHRU */
+
+/* ___chkstk is a *special* function call, which uses %rax as the argument.
+ We avoid clobbering the 4 integer argument registers, %rcx, %rdx,
+ %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use. */
+ .align 4
+___chkstk:
+ popq %r11 /* pop return address */
+ cfi_adjust_cfa_offset(-8) /* indicate return address in r11 */
+ cfi_register(%rip, %r11)
+ movq %rsp, %r10
+ cmpq $0x1000, %rax /* > 4k ?*/
+ jb 2f
+
+1: subq $0x1000, %r10 /* yes, move pointer down 4k*/
+ orl $0x0, (%r10) /* probe there */
+ subq $0x1000, %rax /* decrement count */
+ cmpq $0x1000, %rax
+ ja 1b /* and do it again */
+
+2: subq %rax, %r10
+ movq %rsp, %rax /* hold CFA until return */
+ cfi_def_cfa_register(%rax)
+ orl $0x0, (%r10) /* less than 4k, just peek here */
+ movq %r10, %rsp /* decrement stack */
+
+ /* Push the return value back. Doing this instead of just
+ jumping to %r11 preserves the cached call-return stack
+ used by most modern processors. */
+ pushq %r11
+ ret
+ cfi_endproc()
+#else
+ cfi_startproc()
+___chkstk:
+__alloca:
+ pushl %ecx /* save temp */
+ cfi_push(%eax)
+ leal 8(%esp), %ecx /* point past return addr */
+ cmpl $0x1000, %eax /* > 4k ?*/
+ jb 2f
+
+1: subl $0x1000, %ecx /* yes, move pointer down 4k*/
+ orl $0x0, (%ecx) /* probe there */
+ subl $0x1000, %eax /* decrement count */
+ cmpl $0x1000, %eax
+ ja 1b /* and do it again */
+
+2: subl %eax, %ecx
+ orl $0x0, (%ecx) /* less than 4k, just peek here */
+ movl %esp, %eax /* save current stack pointer */
+ cfi_def_cfa_register(%eax)
+ movl %ecx, %esp /* decrement stack */
+ movl (%eax), %ecx /* recover saved temp */
+
+ /* Copy the return register. Doing this instead of just jumping to
+ the address preserves the cached call-return stack used by most
+ modern processors. */
+ pushl 4(%eax)
+ ret
+ cfi_endproc()
+#endif /* _WIN64 */
+#endif /* L_chkstk */
+
+#ifdef L_chkstk_ms
+/* ___chkstk_ms is a *special* function call, which uses %rax as the argument.
+ We avoid clobbering any registers. Unlike ___chkstk, it just probes the
+ stack and does no stack allocation. */
+ .global ___chkstk_ms
+#ifdef _WIN64
+ cfi_startproc()
+___chkstk_ms:
+ pushq %rcx /* save temps */
+ cfi_push(%rcx)
+ pushq %rax
+ cfi_push(%rax)
+ cmpq $0x1000, %rax /* > 4k ?*/
+ leaq 24(%rsp), %rcx /* point past return addr */
+ jb 2f
+
+1: subq $0x1000, %rcx /* yes, move pointer down 4k */
+ orq $0x0, (%rcx) /* probe there */
+ subq $0x1000, %rax /* decrement count */
+ cmpq $0x1000, %rax
+ ja 1b /* and do it again */
+
+2: subq %rax, %rcx
+ orq $0x0, (%rcx) /* less than 4k, just peek here */
+
+ popq %rax
+ cfi_pop(%rax)
+ popq %rcx
+ cfi_pop(%rcx)
+ ret
+ cfi_endproc()
+#else
+ cfi_startproc()
+___chkstk_ms:
+ pushl %ecx /* save temp */
+ cfi_push(%ecx)
+ pushl %eax
+ cfi_push(%eax)
+ cmpl $0x1000, %eax /* > 4k ?*/
+ leal 12(%esp), %ecx /* point past return addr */
+ jb 2f
+
+1: subl $0x1000, %ecx /* yes, move pointer down 4k*/
+ orl $0x0, (%ecx) /* probe there */
+ subl $0x1000, %eax /* decrement count */
+ cmpl $0x1000, %eax
+ ja 1b /* and do it again */
+
+2: subl %eax, %ecx
+ orl $0x0, (%ecx) /* less than 4k, just peek here */
+
+ popl %eax
+ cfi_pop(%eax)
+ popl %ecx
+ cfi_pop(%ecx)
+ ret
+ cfi_endproc()
+#endif /* _WIN64 */
+#endif /* L_chkstk_ms */
diff --git a/libgcc/config/i386/t-chkstk b/libgcc/config/i386/t-chkstk
new file mode 100644
index 00000000000..822981faab8
--- /dev/null
+++ b/libgcc/config/i386/t-chkstk
@@ -0,0 +1,2 @@
+LIB1ASMSRC = i386/cygwin.S
+LIB1ASMFUNCS = _chkstk _chkstk_ms
diff --git a/libgcc/config/ia64/__divxf3.asm b/libgcc/config/ia64/__divxf3.S
index f741bdaf9bc..9cba8f59423 100644
--- a/libgcc/config/ia64/__divxf3.asm
+++ b/libgcc/config/ia64/__divxf3.S
@@ -3,7 +3,7 @@
#endif
#define L__divxf3
-#include "config/ia64/lib1funcs.asm"
+#include "config/ia64/lib1funcs.S"
#ifdef SHARED
#undef __divtf3
diff --git a/libgcc/config/ia64/_fixtfdi.asm b/libgcc/config/ia64/_fixtfdi.S
index 4d13c808c51..863b70f7edc 100644
--- a/libgcc/config/ia64/_fixtfdi.asm
+++ b/libgcc/config/ia64/_fixtfdi.S
@@ -3,7 +3,7 @@
#endif
#define L_fixtfdi
-#include "config/ia64/lib1funcs.asm"
+#include "config/ia64/lib1funcs.S"
#ifdef SHARED
#undef __fixtfti
diff --git a/libgcc/config/ia64/_fixunstfdi.asm b/libgcc/config/ia64/_fixunstfdi.S
index b722d9e90dc..aac6a284eaa 100644
--- a/libgcc/config/ia64/_fixunstfdi.asm
+++ b/libgcc/config/ia64/_fixunstfdi.S
@@ -3,7 +3,7 @@
#endif
#define L_fixunstfdi
-#include "config/ia64/lib1funcs.asm"
+#include "config/ia64/lib1funcs.S"
#ifdef SHARED
#undef __fixunstfti
diff --git a/libgcc/config/ia64/_floatditf.asm b/libgcc/config/ia64/_floatditf.S
index 21d77028176..e37404d26d5 100644
--- a/libgcc/config/ia64/_floatditf.asm
+++ b/libgcc/config/ia64/_floatditf.S
@@ -3,7 +3,7 @@
#endif
#define L_floatditf
-#include "config/ia64/lib1funcs.asm"
+#include "config/ia64/lib1funcs.S"
#ifdef SHARED
#undef __floattitf
diff --git a/libgcc/config/ia64/lib1funcs.S b/libgcc/config/ia64/lib1funcs.S
new file mode 100644
index 00000000000..b7eaa6eca3c
--- /dev/null
+++ b/libgcc/config/ia64/lib1funcs.S
@@ -0,0 +1,795 @@
+/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
+ Contributed by James E. Wilson <wilson@cygnus.com>.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef L__divxf3
+// Compute a 80-bit IEEE double-extended quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend. farg1 holds the divisor.
+//
+// __divtf3 is an alternate symbol name for backward compatibility.
+
+ .text
+ .align 16
+ .global __divxf3
+ .proc __divxf3
+__divxf3:
+#ifdef SHARED
+ .global __divtf3
+__divtf3:
+#endif
+ cmp.eq p7, p0 = r0, r0
+ frcpa.s0 f10, p6 = farg0, farg1
+ ;;
+(p6) cmp.ne p7, p0 = r0, r0
+ .pred.rel.mutex p6, p7
+(p6) fnma.s1 f11 = farg1, f10, f1
+(p6) fma.s1 f12 = farg0, f10, f0
+ ;;
+(p6) fma.s1 f13 = f11, f11, f0
+(p6) fma.s1 f14 = f11, f11, f11
+ ;;
+(p6) fma.s1 f11 = f13, f13, f11
+(p6) fma.s1 f13 = f14, f10, f10
+ ;;
+(p6) fma.s1 f10 = f13, f11, f10
+(p6) fnma.s1 f11 = farg1, f12, farg0
+ ;;
+(p6) fma.s1 f11 = f11, f10, f12
+(p6) fnma.s1 f12 = farg1, f10, f1
+ ;;
+(p6) fma.s1 f10 = f12, f10, f10
+(p6) fnma.s1 f12 = farg1, f11, farg0
+ ;;
+(p6) fma.s0 fret0 = f12, f10, f11
+(p7) mov fret0 = f10
+ br.ret.sptk rp
+ .endp __divxf3
+#endif
+
+#ifdef L__divdf3
+// Compute a 64-bit IEEE double quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend. farg1 holds the divisor.
+
+ .text
+ .align 16
+ .global __divdf3
+ .proc __divdf3
+__divdf3:
+ cmp.eq p7, p0 = r0, r0
+ frcpa.s0 f10, p6 = farg0, farg1
+ ;;
+(p6) cmp.ne p7, p0 = r0, r0
+ .pred.rel.mutex p6, p7
+(p6) fmpy.s1 f11 = farg0, f10
+(p6) fnma.s1 f12 = farg1, f10, f1
+ ;;
+(p6) fma.s1 f11 = f12, f11, f11
+(p6) fmpy.s1 f13 = f12, f12
+ ;;
+(p6) fma.s1 f10 = f12, f10, f10
+(p6) fma.s1 f11 = f13, f11, f11
+ ;;
+(p6) fmpy.s1 f12 = f13, f13
+(p6) fma.s1 f10 = f13, f10, f10
+ ;;
+(p6) fma.d.s1 f11 = f12, f11, f11
+(p6) fma.s1 f10 = f12, f10, f10
+ ;;
+(p6) fnma.d.s1 f8 = farg1, f11, farg0
+ ;;
+(p6) fma.d fret0 = f8, f10, f11
+(p7) mov fret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __divdf3
+#endif
+
+#ifdef L__divsf3
+// Compute a 32-bit IEEE float quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend. farg1 holds the divisor.
+
+ .text
+ .align 16
+ .global __divsf3
+ .proc __divsf3
+__divsf3:
+ cmp.eq p7, p0 = r0, r0
+ frcpa.s0 f10, p6 = farg0, farg1
+ ;;
+(p6) cmp.ne p7, p0 = r0, r0
+ .pred.rel.mutex p6, p7
+(p6) fmpy.s1 f8 = farg0, f10
+(p6) fnma.s1 f9 = farg1, f10, f1
+ ;;
+(p6) fma.s1 f8 = f9, f8, f8
+(p6) fmpy.s1 f9 = f9, f9
+ ;;
+(p6) fma.s1 f8 = f9, f8, f8
+(p6) fmpy.s1 f9 = f9, f9
+ ;;
+(p6) fma.d.s1 f10 = f9, f8, f8
+ ;;
+(p6) fnorm.s.s0 fret0 = f10
+(p7) mov fret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __divsf3
+#endif
+
+#ifdef L__divdi3
+// Compute a 64-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend. in1 holds the divisor.
+
+ .text
+ .align 16
+ .global __divdi3
+ .proc __divdi3
+__divdi3:
+ .regstk 2,0,0,0
+ // Transfer inputs to FP registers.
+ setf.sig f8 = in0
+ setf.sig f9 = in1
+ // Check divide by zero.
+ cmp.ne.unc p0,p7=0,in1
+ ;;
+ // Convert the inputs to FP, so that they won't be treated as unsigned.
+ fcvt.xf f8 = f8
+ fcvt.xf f9 = f9
+(p7) break 1
+ ;;
+ // Compute the reciprocal approximation.
+ frcpa.s1 f10, p6 = f8, f9
+ ;;
+ // 3 Newton-Raphson iterations.
+(p6) fnma.s1 f11 = f9, f10, f1
+(p6) fmpy.s1 f12 = f8, f10
+ ;;
+(p6) fmpy.s1 f13 = f11, f11
+(p6) fma.s1 f12 = f11, f12, f12
+ ;;
+(p6) fma.s1 f10 = f11, f10, f10
+(p6) fma.s1 f11 = f13, f12, f12
+ ;;
+(p6) fma.s1 f10 = f13, f10, f10
+(p6) fnma.s1 f12 = f9, f11, f8
+ ;;
+(p6) fma.s1 f10 = f12, f10, f11
+ ;;
+ // Round quotient to an integer.
+ fcvt.fx.trunc.s1 f10 = f10
+ ;;
+ // Transfer result to GP registers.
+ getf.sig ret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __divdi3
+#endif
+
+#ifdef L__moddi3
+// Compute a 64-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a). in1 holds the divisor (b).
+
+ .text
+ .align 16
+ .global __moddi3
+ .proc __moddi3
+__moddi3:
+ .regstk 2,0,0,0
+ // Transfer inputs to FP registers.
+ setf.sig f14 = in0
+ setf.sig f9 = in1
+ // Check divide by zero.
+ cmp.ne.unc p0,p7=0,in1
+ ;;
+ // Convert the inputs to FP, so that they won't be treated as unsigned.
+ fcvt.xf f8 = f14
+ fcvt.xf f9 = f9
+(p7) break 1
+ ;;
+ // Compute the reciprocal approximation.
+ frcpa.s1 f10, p6 = f8, f9
+ ;;
+ // 3 Newton-Raphson iterations.
+(p6) fmpy.s1 f12 = f8, f10
+(p6) fnma.s1 f11 = f9, f10, f1
+ ;;
+(p6) fma.s1 f12 = f11, f12, f12
+(p6) fmpy.s1 f13 = f11, f11
+ ;;
+(p6) fma.s1 f10 = f11, f10, f10
+(p6) fma.s1 f11 = f13, f12, f12
+ ;;
+ sub in1 = r0, in1
+(p6) fma.s1 f10 = f13, f10, f10
+(p6) fnma.s1 f12 = f9, f11, f8
+ ;;
+ setf.sig f9 = in1
+(p6) fma.s1 f10 = f12, f10, f11
+ ;;
+ fcvt.fx.trunc.s1 f10 = f10
+ ;;
+ // r = q * (-b) + a
+ xma.l f10 = f10, f9, f14
+ ;;
+ // Transfer result to GP registers.
+ getf.sig ret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __moddi3
+#endif
+
+#ifdef L__udivdi3
+// Compute a 64-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend. in1 holds the divisor.
+
+ .text
+ .align 16
+ .global __udivdi3
+ .proc __udivdi3
+__udivdi3:
+ .regstk 2,0,0,0
+ // Transfer inputs to FP registers.
+ setf.sig f8 = in0
+ setf.sig f9 = in1
+ // Check divide by zero.
+ cmp.ne.unc p0,p7=0,in1
+ ;;
+ // Convert the inputs to FP, to avoid FP software-assist faults.
+ fcvt.xuf.s1 f8 = f8
+ fcvt.xuf.s1 f9 = f9
+(p7) break 1
+ ;;
+ // Compute the reciprocal approximation.
+ frcpa.s1 f10, p6 = f8, f9
+ ;;
+ // 3 Newton-Raphson iterations.
+(p6) fnma.s1 f11 = f9, f10, f1
+(p6) fmpy.s1 f12 = f8, f10
+ ;;
+(p6) fmpy.s1 f13 = f11, f11
+(p6) fma.s1 f12 = f11, f12, f12
+ ;;
+(p6) fma.s1 f10 = f11, f10, f10
+(p6) fma.s1 f11 = f13, f12, f12
+ ;;
+(p6) fma.s1 f10 = f13, f10, f10
+(p6) fnma.s1 f12 = f9, f11, f8
+ ;;
+(p6) fma.s1 f10 = f12, f10, f11
+ ;;
+ // Round quotient to an unsigned integer.
+ fcvt.fxu.trunc.s1 f10 = f10
+ ;;
+ // Transfer result to GP registers.
+ getf.sig ret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __udivdi3
+#endif
+
+#ifdef L__umoddi3
+// Compute a 64-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a). in1 holds the divisor (b).
+
+ .text
+ .align 16
+ .global __umoddi3
+ .proc __umoddi3
+__umoddi3:
+ .regstk 2,0,0,0
+ // Transfer inputs to FP registers.
+ setf.sig f14 = in0
+ setf.sig f9 = in1
+ // Check divide by zero.
+ cmp.ne.unc p0,p7=0,in1
+ ;;
+ // Convert the inputs to FP, to avoid FP software assist faults.
+ fcvt.xuf.s1 f8 = f14
+ fcvt.xuf.s1 f9 = f9
+(p7) break 1;
+ ;;
+ // Compute the reciprocal approximation.
+ frcpa.s1 f10, p6 = f8, f9
+ ;;
+ // 3 Newton-Raphson iterations.
+(p6) fmpy.s1 f12 = f8, f10
+(p6) fnma.s1 f11 = f9, f10, f1
+ ;;
+(p6) fma.s1 f12 = f11, f12, f12
+(p6) fmpy.s1 f13 = f11, f11
+ ;;
+(p6) fma.s1 f10 = f11, f10, f10
+(p6) fma.s1 f11 = f13, f12, f12
+ ;;
+ sub in1 = r0, in1
+(p6) fma.s1 f10 = f13, f10, f10
+(p6) fnma.s1 f12 = f9, f11, f8
+ ;;
+ setf.sig f9 = in1
+(p6) fma.s1 f10 = f12, f10, f11
+ ;;
+ // Round quotient to an unsigned integer.
+ fcvt.fxu.trunc.s1 f10 = f10
+ ;;
+ // r = q * (-b) + a
+ xma.l f10 = f10, f9, f14
+ ;;
+ // Transfer result to GP registers.
+ getf.sig ret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __umoddi3
+#endif
+
+#ifdef L__divsi3
+// Compute a 32-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend. in1 holds the divisor.
+
+ .text
+ .align 16
+ .global __divsi3
+ .proc __divsi3
+__divsi3:
+ .regstk 2,0,0,0
+ // Check divide by zero.
+ cmp.ne.unc p0,p7=0,in1
+ sxt4 in0 = in0
+ sxt4 in1 = in1
+ ;;
+ setf.sig f8 = in0
+ setf.sig f9 = in1
+(p7) break 1
+ ;;
+ mov r2 = 0x0ffdd
+ fcvt.xf f8 = f8
+ fcvt.xf f9 = f9
+ ;;
+ setf.exp f11 = r2
+ frcpa.s1 f10, p6 = f8, f9
+ ;;
+(p6) fmpy.s1 f8 = f8, f10
+(p6) fnma.s1 f9 = f9, f10, f1
+ ;;
+(p6) fma.s1 f8 = f9, f8, f8
+(p6) fma.s1 f9 = f9, f9, f11
+ ;;
+(p6) fma.s1 f10 = f9, f8, f8
+ ;;
+ fcvt.fx.trunc.s1 f10 = f10
+ ;;
+ getf.sig ret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __divsi3
+#endif
+
+#ifdef L__modsi3
+// Compute a 32-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend. in1 holds the divisor.
+
+ .text
+ .align 16
+ .global __modsi3
+ .proc __modsi3
+__modsi3:
+ .regstk 2,0,0,0
+ mov r2 = 0x0ffdd
+ sxt4 in0 = in0
+ sxt4 in1 = in1
+ ;;
+ setf.sig f13 = r32
+ setf.sig f9 = r33
+ // Check divide by zero.
+ cmp.ne.unc p0,p7=0,in1
+ ;;
+ sub in1 = r0, in1
+ fcvt.xf f8 = f13
+ fcvt.xf f9 = f9
+ ;;
+ setf.exp f11 = r2
+ frcpa.s1 f10, p6 = f8, f9
+(p7) break 1
+ ;;
+(p6) fmpy.s1 f12 = f8, f10
+(p6) fnma.s1 f10 = f9, f10, f1
+ ;;
+ setf.sig f9 = in1
+(p6) fma.s1 f12 = f10, f12, f12
+(p6) fma.s1 f10 = f10, f10, f11
+ ;;
+(p6) fma.s1 f10 = f10, f12, f12
+ ;;
+ fcvt.fx.trunc.s1 f10 = f10
+ ;;
+ xma.l f10 = f10, f9, f13
+ ;;
+ getf.sig ret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __modsi3
+#endif
+
+#ifdef L__udivsi3
+// Compute a 32-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend. in1 holds the divisor.
+
+ .text
+ .align 16
+ .global __udivsi3
+ .proc __udivsi3
+__udivsi3:
+ .regstk 2,0,0,0
+ mov r2 = 0x0ffdd
+ zxt4 in0 = in0
+ zxt4 in1 = in1
+ ;;
+ setf.sig f8 = in0
+ setf.sig f9 = in1
+ // Check divide by zero.
+ cmp.ne.unc p0,p7=0,in1
+ ;;
+ fcvt.xf f8 = f8
+ fcvt.xf f9 = f9
+(p7) break 1
+ ;;
+ setf.exp f11 = r2
+ frcpa.s1 f10, p6 = f8, f9
+ ;;
+(p6) fmpy.s1 f8 = f8, f10
+(p6) fnma.s1 f9 = f9, f10, f1
+ ;;
+(p6) fma.s1 f8 = f9, f8, f8
+(p6) fma.s1 f9 = f9, f9, f11
+ ;;
+(p6) fma.s1 f10 = f9, f8, f8
+ ;;
+ fcvt.fxu.trunc.s1 f10 = f10
+ ;;
+ getf.sig ret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __udivsi3
+#endif
+
+#ifdef L__umodsi3
+// Compute a 32-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend. in1 holds the divisor.
+
+ .text
+ .align 16
+ .global __umodsi3
+ .proc __umodsi3
+__umodsi3:
+ .regstk 2,0,0,0
+ mov r2 = 0x0ffdd
+ zxt4 in0 = in0
+ zxt4 in1 = in1
+ ;;
+ setf.sig f13 = in0
+ setf.sig f9 = in1
+ // Check divide by zero.
+ cmp.ne.unc p0,p7=0,in1
+ ;;
+ sub in1 = r0, in1
+ fcvt.xf f8 = f13
+ fcvt.xf f9 = f9
+ ;;
+ setf.exp f11 = r2
+ frcpa.s1 f10, p6 = f8, f9
+(p7) break 1;
+ ;;
+(p6) fmpy.s1 f12 = f8, f10
+(p6) fnma.s1 f10 = f9, f10, f1
+ ;;
+ setf.sig f9 = in1
+(p6) fma.s1 f12 = f10, f12, f12
+(p6) fma.s1 f10 = f10, f10, f11
+ ;;
+(p6) fma.s1 f10 = f10, f12, f12
+ ;;
+ fcvt.fxu.trunc.s1 f10 = f10
+ ;;
+ xma.l f10 = f10, f9, f13
+ ;;
+ getf.sig ret0 = f10
+ br.ret.sptk rp
+ ;;
+ .endp __umodsi3
+#endif
+
+#ifdef L__save_stack_nonlocal
+// Notes on save/restore stack nonlocal: We read ar.bsp but write
+// ar.bspstore. This is because ar.bsp can be read at all times
+// (independent of the RSE mode) but since it's read-only we need to
+// restore the value via ar.bspstore. This is OK because
+// ar.bsp==ar.bspstore after executing "flushrs".
+
+// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
+
+ .text
+ .align 16
+ .global __ia64_save_stack_nonlocal
+ .proc __ia64_save_stack_nonlocal
+__ia64_save_stack_nonlocal:
+ { .mmf
+ alloc r18 = ar.pfs, 2, 0, 0, 0
+ mov r19 = ar.rsc
+ ;;
+ }
+ { .mmi
+ flushrs
+ st8 [in0] = in1, 24
+ and r19 = 0x1c, r19
+ ;;
+ }
+ { .mmi
+ st8 [in0] = r18, -16
+ mov ar.rsc = r19
+ or r19 = 0x3, r19
+ ;;
+ }
+ { .mmi
+ mov r16 = ar.bsp
+ mov r17 = ar.rnat
+ adds r2 = 8, in0
+ ;;
+ }
+ { .mmi
+ st8 [in0] = r16
+ st8 [r2] = r17
+ }
+ { .mib
+ mov ar.rsc = r19
+ br.ret.sptk.few rp
+ ;;
+ }
+ .endp __ia64_save_stack_nonlocal
+#endif
+
+#ifdef L__nonlocal_goto
+// void __ia64_nonlocal_goto(void *target_label, void *save_area,
+// void *static_chain);
+
+ .text
+ .align 16
+ .global __ia64_nonlocal_goto
+ .proc __ia64_nonlocal_goto
+__ia64_nonlocal_goto:
+ { .mmi
+ alloc r20 = ar.pfs, 3, 0, 0, 0
+ ld8 r12 = [in1], 8
+ mov.ret.sptk rp = in0, .L0
+ ;;
+ }
+ { .mmf
+ ld8 r16 = [in1], 8
+ mov r19 = ar.rsc
+ ;;
+ }
+ { .mmi
+ flushrs
+ ld8 r17 = [in1], 8
+ and r19 = 0x1c, r19
+ ;;
+ }
+ { .mmi
+ ld8 r18 = [in1]
+ mov ar.rsc = r19
+ or r19 = 0x3, r19
+ ;;
+ }
+ { .mmi
+ mov ar.bspstore = r16
+ ;;
+ mov ar.rnat = r17
+ ;;
+ }
+ { .mmi
+ loadrs
+ invala
+ mov r15 = in2
+ ;;
+ }
+.L0: { .mib
+ mov ar.rsc = r19
+ mov ar.pfs = r18
+ br.ret.sptk.few rp
+ ;;
+ }
+ .endp __ia64_nonlocal_goto
+#endif
+
+#ifdef L__restore_stack_nonlocal
+// This is mostly the same as nonlocal_goto above.
+// ??? This has not been tested yet.
+
+// void __ia64_restore_stack_nonlocal(void *save_area)
+
+ .text
+ .align 16
+ .global __ia64_restore_stack_nonlocal
+ .proc __ia64_restore_stack_nonlocal
+__ia64_restore_stack_nonlocal:
+ { .mmf
+ alloc r20 = ar.pfs, 4, 0, 0, 0
+ ld8 r12 = [in0], 8
+ ;;
+ }
+ { .mmb
+ ld8 r16=[in0], 8
+ mov r19 = ar.rsc
+ ;;
+ }
+ { .mmi
+ flushrs
+ ld8 r17 = [in0], 8
+ and r19 = 0x1c, r19
+ ;;
+ }
+ { .mmf
+ ld8 r18 = [in0]
+ mov ar.rsc = r19
+ ;;
+ }
+ { .mmi
+ mov ar.bspstore = r16
+ ;;
+ mov ar.rnat = r17
+ or r19 = 0x3, r19
+ ;;
+ }
+ { .mmf
+ loadrs
+ invala
+ ;;
+ }
+.L0: { .mib
+ mov ar.rsc = r19
+ mov ar.pfs = r18
+ br.ret.sptk.few rp
+ ;;
+ }
+ .endp __ia64_restore_stack_nonlocal
+#endif
+
+#ifdef L__trampoline
+// Implement the nested function trampoline. This is out of line
+// so that we don't have to bother with flushing the icache, as
+// well as making the on-stack trampoline smaller.
+//
+// The trampoline has the following form:
+//
+// +-------------------+ >
+// TRAMP: | __ia64_trampoline | |
+// +-------------------+ > fake function descriptor
+// | TRAMP+16 | |
+// +-------------------+ >
+// | target descriptor |
+// +-------------------+
+// | static link |
+// +-------------------+
+
+ .text
+ .align 16
+ .global __ia64_trampoline
+ .proc __ia64_trampoline
+__ia64_trampoline:
+ { .mmi
+ ld8 r2 = [r1], 8
+ ;;
+ ld8 r15 = [r1]
+ }
+ { .mmi
+ ld8 r3 = [r2], 8
+ ;;
+ ld8 r1 = [r2]
+ mov b6 = r3
+ }
+ { .bbb
+ br.sptk.many b6
+ ;;
+ }
+ .endp __ia64_trampoline
+#endif
+
+#ifdef SHARED
+// Thunks for backward compatibility.
+#ifdef L_fixtfdi
+ .text
+ .align 16
+ .global __fixtfti
+ .proc __fixtfti
+__fixtfti:
+ { .bbb
+ br.sptk.many __fixxfti
+ ;;
+ }
+ .endp __fixtfti
+#endif
+#ifdef L_fixunstfdi
+ .align 16
+ .global __fixunstfti
+ .proc __fixunstfti
+__fixunstfti:
+ { .bbb
+ br.sptk.many __fixunsxfti
+ ;;
+ }
+ .endp __fixunstfti
+#endif
+#ifdef L_floatditf
+ .align 16
+ .global __floattitf
+ .proc __floattitf
+__floattitf:
+ { .bbb
+ br.sptk.many __floattixf
+ ;;
+ }
+ .endp __floattitf
+#endif
+#endif
diff --git a/libgcc/config/ia64/t-hpux b/libgcc/config/ia64/t-hpux
index ef3387e7a61..1fee41385c0 100644
--- a/libgcc/config/ia64/t-hpux
+++ b/libgcc/config/ia64/t-hpux
@@ -1 +1,6 @@
+# On HP-UX we do not want _fixtfdi, _fixunstfdi, or _floatditf from
+# LIB1ASMSRC. These functions map the 128 bit conversion function names
+# to 80 bit conversions and were done for Linux backwards compatibility.
+LIB1ASMFUNCS := $(filter-out _fixtfdi _fixunstfdi _floatditf,$(LIB1ASMFUNCS))
+
LIB2ADDEH = $(srcdir)/unwind-c.c
diff --git a/libgcc/config/ia64/t-ia64 b/libgcc/config/ia64/t-ia64
index 59cf3aa75f4..80445d8a2a8 100644
--- a/libgcc/config/ia64/t-ia64
+++ b/libgcc/config/ia64/t-ia64
@@ -1,3 +1,16 @@
+LIB1ASMSRC = ia64/lib1funcs.S
+
+# We use different names for the DImode div/mod files so that they won't
+# conflict with libgcc2.c files. We used to use __ia64 as a prefix, now
+# we use __ as the prefix. Note that L_divdi3 in libgcc2.c actually defines
+# a TImode divide function, so there is no actual overlap here between
+# libgcc2.c and lib1funcs.S.
+LIB1ASMFUNCS = __divxf3 __divdf3 __divsf3 \
+ __divdi3 __moddi3 __udivdi3 __umoddi3 \
+ __divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \
+ __nonlocal_goto __restore_stack_nonlocal __trampoline \
+ _fixtfdi _fixunstfdi _floatditf
+
CUSTOM_CRTSTUFF = yes
# Assemble startup files.
diff --git a/libgcc/config/ia64/t-softfp-compat b/libgcc/config/ia64/t-softfp-compat
index d3dad68c48f..00f45d51cd0 100644
--- a/libgcc/config/ia64/t-softfp-compat
+++ b/libgcc/config/ia64/t-softfp-compat
@@ -3,5 +3,5 @@
# Replace __dvxf3 _fixtfdi _fixunstfdi _floatditf
libgcc1-tf-functions = __divxf3 _fixtfdi _fixunstfdi _floatditf
LIB1ASMFUNCS := $(filter-out $(libgcc1-tf-functions), $(LIB1ASMFUNCS))
-libgcc1-tf-compats = $(addsuffix .asm, $(libgcc1-tf-functions))
+libgcc1-tf-compats = $(addsuffix .S, $(libgcc1-tf-functions))
LIB2ADD += $(addprefix $(srcdir)/config/ia64/, $(libgcc1-tf-compats))
diff --git a/libgcc/config/m32c/lib1funcs.S b/libgcc/config/m32c/lib1funcs.S
new file mode 100644
index 00000000000..9b657787187
--- /dev/null
+++ b/libgcc/config/m32c/lib1funcs.S
@@ -0,0 +1,231 @@
+/* libgcc routines for R8C/M16C/M32C
+ Copyright (C) 2005, 2009, 2010
+ Free Software Foundation, Inc.
+ Contributed by Red Hat.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if defined(__r8c_cpu__) || defined(__m16c_cpu__)
+#define A16
+#define A(n,w) n
+#define W w
+#else
+#define A24
+#define A(n,w) w
+#define W l
+#endif
+
+
+#ifdef L__m32c_memregs
+
+/* Warning: these memory locations are used as a register bank. They
+ *must* end up consecutive in any final executable, so you may *not*
+ use the otherwise obvious ".comm" directive to allocate space for
+ them. */
+
+ .bss
+ .global mem0
+mem0: .space 1
+ .global mem1
+mem1: .space 1
+ .global mem2
+mem2: .space 1
+ .global mem3
+mem3: .space 1
+ .global mem4
+mem4: .space 1
+ .global mem5
+mem5: .space 1
+ .global mem6
+mem6: .space 1
+ .global mem7
+mem7: .space 1
+ .global mem8
+mem8: .space 1
+ .global mem9
+mem9: .space 1
+ .global mem10
+mem10: .space 1
+ .global mem11
+mem11: .space 1
+ .global mem12
+mem12: .space 1
+ .global mem13
+mem13: .space 1
+ .global mem14
+mem14: .space 1
+ .global mem15
+mem15: .space 1
+
+#endif
+
+#ifdef L__m32c_eh_return
+ .text
+ .global __m32c_eh_return
+__m32c_eh_return:
+
+ /* At this point, r0 has the stack adjustment, r1r3 has the
+ address to return to. The stack looks like this:
+
+ old_ra
+ old_fp
+ <- unwound sp
+ ...
+ fb
+ through
+ r0
+ <- sp
+
+ What we need to do is restore all the registers, update the
+ stack, and return to the right place.
+ */
+
+ stc sp,a0
+
+ add.W A(#16,#24),a0
+ /* a0 points to the current stack, just above the register
+ save areas */
+
+ mov.w a0,a1
+ exts.w r0
+ sub.W A(r0,r2r0),a1
+ sub.W A(#3,#4),a1
+ /* a1 points to the new stack. */
+
+ /* This is for the "rts" below. */
+ mov.w r1,[a1]
+#ifdef A16
+ mov.w r2,r1
+ mov.b r1l,2[a1]
+#else
+ mov.w r2,2[a1]
+#endif
+
+ /* This is for the "popc sp" below. */
+ mov.W a1,[a0]
+
+ popm r0,r1,r2,r3,a0,a1,sb,fb
+ popc sp
+ rts
+#endif
+
+/* SImode arguments for SI foo(SI,SI) functions. */
+#ifdef A16
+#define SAL 5[fb]
+#define SAH 7[fb]
+#define SBL 9[fb]
+#define SBH 11[fb]
+#else
+#define SAL 8[fb]
+#define SAH 10[fb]
+#define SBL 12[fb]
+#define SBH 14[fb]
+#endif
+
+#ifdef L__m32c_mulsi3
+ .text
+ .global ___mulsi3
+___mulsi3:
+ enter #0
+ push.w r2
+ mov.w SAL,r0
+ mulu.w SBL,r0 /* writes to r2r0 */
+ mov.w r0,mem0
+ mov.w r2,mem2
+ mov.w SAL,r0
+ mulu.w SBH,r0 /* writes to r2r0 */
+ add.w r0,mem2
+ mov.w SAH,r0
+ mulu.w SBL,r0 /* writes to r2r0 */
+ add.w r0,mem2
+ pop.w r2
+ exitd
+#endif
+
+#ifdef L__m32c_cmpsi2
+ .text
+ .global ___cmpsi2
+___cmpsi2:
+ enter #0
+ cmp.w SBH,SAH
+ jgt cmpsi_gt
+ jlt cmpsi_lt
+ cmp.w SBL,SAL
+ jgt cmpsi_gt
+ jlt cmpsi_lt
+ mov.w #1,r0
+ exitd
+cmpsi_gt:
+ mov.w #2,r0
+ exitd
+cmpsi_lt:
+ mov.w #0,r0
+ exitd
+#endif
+
+#ifdef L__m32c_ucmpsi2
+ .text
+ .global ___ucmpsi2
+___ucmpsi2:
+ enter #0
+ cmp.w SBH,SAH
+ jgtu cmpsi_gt
+ jltu cmpsi_lt
+ cmp.w SBL,SAL
+ jgtu cmpsi_gt
+ jltu cmpsi_lt
+ mov.w #1,r0
+ exitd
+cmpsi_gt:
+ mov.w #2,r0
+ exitd
+cmpsi_lt:
+ mov.w #0,r0
+ exitd
+#endif
+
+#ifdef L__m32c_jsri16
+ .text
+#ifdef A16
+ .global m32c_jsri16
+m32c_jsri16:
+ add.w #-1, sp
+
+ /* Read the address (16 bits) and return address (24 bits) off
+ the stack. */
+ mov.w 4[sp], r0
+ mov.w 1[sp], r3
+ mov.b 3[sp], a0 /* This zero-extends, so the high byte has
+ zero in it. */
+
+ /* Write the return address, then new address, to the stack. */
+ mov.w a0, 1[sp] /* Just to get the zero in 2[sp]. */
+ mov.w r0, 0[sp]
+ mov.w r3, 3[sp]
+ mov.b a0, 5[sp]
+
+ /* This "returns" to the target address, leaving the pending
+ return address on the stack. */
+ rts
+#endif
+
+#endif
diff --git a/libgcc/config/m32c/t-m32c b/libgcc/config/m32c/t-m32c
new file mode 100644
index 00000000000..d21483750fd
--- /dev/null
+++ b/libgcc/config/m32c/t-m32c
@@ -0,0 +1,9 @@
+LIB1ASMSRC = m32c/lib1funcs.S
+
+LIB1ASMFUNCS = \
+ __m32c_memregs \
+ __m32c_eh_return \
+ __m32c_mulsi3 \
+ __m32c_cmpsi2 \
+ __m32c_ucmpsi2 \
+ __m32c_jsri16
diff --git a/libgcc/config/m32r/initfini.c b/libgcc/config/m32r/initfini.c
index 6e7d58614c7..56332459223 100644
--- a/libgcc/config/m32r/initfini.c
+++ b/libgcc/config/m32r/initfini.c
@@ -1,5 +1,5 @@
/* .init/.fini section handling + C++ global constructor/destructor handling.
- This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm.
+ This file is based on crtstuff.c, sol2-crti.S, sol2-crtn.S.
Copyright (C) 1996, 1997, 1998, 2006, 2009 Free Software Foundation, Inc.
diff --git a/libgcc/config/m68k/lb1sf68.S b/libgcc/config/m68k/lb1sf68.S
new file mode 100644
index 00000000000..0339a092c4f
--- /dev/null
+++ b/libgcc/config/m68k/lb1sf68.S
@@ -0,0 +1,4116 @@
+/* libgcc routines for 68000 w/o floating-point hardware.
+ Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Use this one for any 680x0; assumes no floating point hardware.
+ The trailing " '" appearing on some lines is for ANSI preprocessors. Yuk.
+ Some of this code comes from MINIX, via the folks at ericsson.
+ D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
+*/
+
+/* These are predefined by new versions of GNU cpp. */
+
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__ _
+#endif
+
+#ifndef __REGISTER_PREFIX__
+#define __REGISTER_PREFIX__
+#endif
+
+#ifndef __IMMEDIATE_PREFIX__
+#define __IMMEDIATE_PREFIX__ #
+#endif
+
+/* ANSI concatenation macros. */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels. */
+
+#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+/* Note that X is a function. */
+
+#ifdef __ELF__
+#define FUNC(x) .type SYM(x),function
+#else
+/* The .proc pseudo-op is accepted, but ignored, by GAS. We could just
+ define this to the empty string for non-ELF systems, but defining it
+ to .proc means that the information is available to the assembler if
+ the need arises. */
+#define FUNC(x) .proc
+#endif
+
+/* Use the right prefix for registers. */
+
+#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
+
+/* Use the right prefix for immediate values. */
+
+#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
+
+#define d0 REG (d0)
+#define d1 REG (d1)
+#define d2 REG (d2)
+#define d3 REG (d3)
+#define d4 REG (d4)
+#define d5 REG (d5)
+#define d6 REG (d6)
+#define d7 REG (d7)
+#define a0 REG (a0)
+#define a1 REG (a1)
+#define a2 REG (a2)
+#define a3 REG (a3)
+#define a4 REG (a4)
+#define a5 REG (a5)
+#define a6 REG (a6)
+#define fp REG (fp)
+#define sp REG (sp)
+#define pc REG (pc)
+
+/* Provide a few macros to allow for PIC code support.
+ * With PIC, data is stored A5 relative so we've got to take a bit of special
+ * care to ensure that all loads of global data is via A5. PIC also requires
+ * jumps and subroutine calls to be PC relative rather than absolute. We cheat
+ * a little on this and in the PIC case, we use short offset branches and
+ * hope that the final object code is within range (which it should be).
+ */
+#ifndef __PIC__
+
+ /* Non PIC (absolute/relocatable) versions */
+
+ .macro PICCALL addr
+ jbsr \addr
+ .endm
+
+ .macro PICJUMP addr
+ jmp \addr
+ .endm
+
+ .macro PICLEA sym, reg
+ lea \sym, \reg
+ .endm
+
+ .macro PICPEA sym, areg
+ pea \sym
+ .endm
+
+#else /* __PIC__ */
+
+# if defined (__uClinux__)
+
+ /* Versions for uClinux */
+
+# if defined(__ID_SHARED_LIBRARY__)
+
+ /* -mid-shared-library versions */
+
+ .macro PICLEA sym, reg
+ movel a5@(_current_shared_library_a5_offset_), \reg
+ movel \sym@GOT(\reg), \reg
+ .endm
+
+ .macro PICPEA sym, areg
+ movel a5@(_current_shared_library_a5_offset_), \areg
+ movel \sym@GOT(\areg), sp@-
+ .endm
+
+ .macro PICCALL addr
+ PICLEA \addr,a0
+ jsr a0@
+ .endm
+
+ .macro PICJUMP addr
+ PICLEA \addr,a0
+ jmp a0@
+ .endm
+
+# else /* !__ID_SHARED_LIBRARY__ */
+
+ /* Versions for -msep-data */
+
+ .macro PICLEA sym, reg
+ movel \sym@GOT(a5), \reg
+ .endm
+
+ .macro PICPEA sym, areg
+ movel \sym@GOT(a5), sp@-
+ .endm
+
+ .macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+ lea \addr-.-8,a0
+ jsr pc@(a0)
+#else
+ jbsr \addr
+#endif
+ .endm
+
+ .macro PICJUMP addr
+ /* ISA C has no bra.l instruction, and since this assembly file
+ gets assembled into multiple object files, we avoid the
+ bra instruction entirely. */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+ lea \addr-.-8,a0
+ jmp pc@(a0)
+#else
+ bra \addr
+#endif
+ .endm
+
+# endif
+
+# else /* !__uClinux__ */
+
+ /* Versions for Linux */
+
+ .macro PICLEA sym, reg
+ movel #_GLOBAL_OFFSET_TABLE_@GOTPC, \reg
+ lea (-6, pc, \reg), \reg
+ movel \sym@GOT(\reg), \reg
+ .endm
+
+ .macro PICPEA sym, areg
+ movel #_GLOBAL_OFFSET_TABLE_@GOTPC, \areg
+ lea (-6, pc, \areg), \areg
+ movel \sym@GOT(\areg), sp@-
+ .endm
+
+ .macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+ lea \addr-.-8,a0
+ jsr pc@(a0)
+#else
+ jbsr \addr
+#endif
+ .endm
+
+ .macro PICJUMP addr
+ /* ISA C has no bra.l instruction, and since this assembly file
+ gets assembled into multiple object files, we avoid the
+ bra instruction entirely. */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+ lea \addr-.-8,a0
+ jmp pc@(a0)
+#else
+ bra \addr
+#endif
+ .endm
+
+# endif
+#endif /* __PIC__ */
+
+
+#ifdef L_floatex
+
+| This is an attempt at a decent floating point (single, double and
+| extended double) code for the GNU C compiler. It should be easy to
+| adapt to other compilers (but beware of the local labels!).
+
+| Starting date: 21 October, 1990
+
+| It is convenient to introduce the notation (s,e,f) for a floating point
+| number, where s=sign, e=exponent, f=fraction. We will call a floating
+| point number fpn to abbreviate, independently of the precision.
+| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023
+| for doubles and 16383 for long doubles). We then have the following
+| different cases:
+| 1. Normalized fpns have 0 < e < MAX_EXP. They correspond to
+| (-1)^s x 1.f x 2^(e-bias-1).
+| 2. Denormalized fpns have e=0. They correspond to numbers of the form
+| (-1)^s x 0.f x 2^(-bias).
+| 3. +/-INFINITY have e=MAX_EXP, f=0.
+| 4. Quiet NaN (Not a Number) have all bits set.
+| 5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1.
+
+|=============================================================================
+| exceptions
+|=============================================================================
+
+| This is the floating point condition code register (_fpCCR):
+|
+| struct {
+| short _exception_bits;
+| short _trap_enable_bits;
+| short _sticky_bits;
+| short _rounding_mode;
+| short _format;
+| short _last_operation;
+| union {
+| float sf;
+| double df;
+| } _operand1;
+| union {
+| float sf;
+| double df;
+| } _operand2;
+| } _fpCCR;
+
+ .data
+ .even
+
+ .globl SYM (_fpCCR)
+
+SYM (_fpCCR):
+__exception_bits:
+ .word 0
+__trap_enable_bits:
+ .word 0
+__sticky_bits:
+ .word 0
+__rounding_mode:
+ .word ROUND_TO_NEAREST
+__format:
+ .word NIL
+__last_operation:
+ .word NOOP
+__operand1:
+ .long 0
+ .long 0
+__operand2:
+ .long 0
+ .long 0
+
+| Offsets:
+EBITS = __exception_bits - SYM (_fpCCR)
+TRAPE = __trap_enable_bits - SYM (_fpCCR)
+STICK = __sticky_bits - SYM (_fpCCR)
+ROUND = __rounding_mode - SYM (_fpCCR)
+FORMT = __format - SYM (_fpCCR)
+LASTO = __last_operation - SYM (_fpCCR)
+OPER1 = __operand1 - SYM (_fpCCR)
+OPER2 = __operand2 - SYM (_fpCCR)
+
+| The following exception types are supported:
+INEXACT_RESULT = 0x0001
+UNDERFLOW = 0x0002
+OVERFLOW = 0x0004
+DIVIDE_BY_ZERO = 0x0008
+INVALID_OPERATION = 0x0010
+
+| The allowed rounding modes are:
+UNKNOWN = -1
+ROUND_TO_NEAREST = 0 | round result to nearest representable value
+ROUND_TO_ZERO = 1 | round result towards zero
+ROUND_TO_PLUS = 2 | round result towards plus infinity
+ROUND_TO_MINUS = 3 | round result towards minus infinity
+
+| The allowed values of format are:
+NIL = 0
+SINGLE_FLOAT = 1
+DOUBLE_FLOAT = 2
+LONG_FLOAT = 3
+
+| The allowed values for the last operation are:
+NOOP = 0
+ADD = 1
+MULTIPLY = 2
+DIVIDE = 3
+NEGATE = 4
+COMPARE = 5
+EXTENDSFDF = 6
+TRUNCDFSF = 7
+
+|=============================================================================
+| __clear_sticky_bits
+|=============================================================================
+
+| The sticky bits are normally not cleared (thus the name), whereas the
+| exception type and exception value reflect the last computation.
+| This routine is provided to clear them (you can also write to _fpCCR,
+| since it is globally visible).
+
+ .globl SYM (__clear_sticky_bit)
+
+ .text
+ .even
+
+| void __clear_sticky_bits(void);
+SYM (__clear_sticky_bit):
+ PICLEA SYM (_fpCCR),a0
+#ifndef __mcoldfire__
+ movew IMM (0),a0@(STICK)
+#else
+ clr.w a0@(STICK)
+#endif
+ rts
+
+|=============================================================================
+| $_exception_handler
+|=============================================================================
+
+ .globl $_exception_handler
+
+ .text
+ .even
+
+| This is the common exit point if an exception occurs.
+| NOTE: it is NOT callable from C!
+| It expects the exception type in d7, the format (SINGLE_FLOAT,
+| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5.
+| It sets the corresponding exception and sticky bits, and the format.
+| Depending on the format if fills the corresponding slots for the
+| operands which produced the exception (all this information is provided
+| so if you write your own exception handlers you have enough information
+| to deal with the problem).
+| Then checks to see if the corresponding exception is trap-enabled,
+| in which case it pushes the address of _fpCCR and traps through
+| trap FPTRAP (15 for the moment).
+
+FPTRAP = 15
+
+$_exception_handler:
+ PICLEA SYM (_fpCCR),a0
+ movew d7,a0@(EBITS) | set __exception_bits
+#ifndef __mcoldfire__
+ orw d7,a0@(STICK) | and __sticky_bits
+#else
+ movew a0@(STICK),d4
+ orl d7,d4
+ movew d4,a0@(STICK)
+#endif
+ movew d6,a0@(FORMT) | and __format
+ movew d5,a0@(LASTO) | and __last_operation
+
+| Now put the operands in place:
+#ifndef __mcoldfire__
+ cmpw IMM (SINGLE_FLOAT),d6
+#else
+ cmpl IMM (SINGLE_FLOAT),d6
+#endif
+ beq 1f
+ movel a6@(8),a0@(OPER1)
+ movel a6@(12),a0@(OPER1+4)
+ movel a6@(16),a0@(OPER2)
+ movel a6@(20),a0@(OPER2+4)
+ bra 2f
+1: movel a6@(8),a0@(OPER1)
+ movel a6@(12),a0@(OPER2)
+2:
+| And check whether the exception is trap-enabled:
+#ifndef __mcoldfire__
+ andw a0@(TRAPE),d7 | is exception trap-enabled?
+#else
+ clrl d6
+ movew a0@(TRAPE),d6
+ andl d6,d7
+#endif
+ beq 1f | no, exit
+ PICPEA SYM (_fpCCR),a1 | yes, push address of _fpCCR
+ trap IMM (FPTRAP) | and trap
+#ifndef __mcoldfire__
+1: moveml sp@+,d2-d7 | restore data registers
+#else
+1: moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6 | and return
+ rts
+#endif /* L_floatex */
+
+#ifdef L_mulsi3
+ .text
+ FUNC(__mulsi3)
+ .globl SYM (__mulsi3)
+SYM (__mulsi3):
+ movew sp@(4), d0 /* x0 -> d0 */
+ muluw sp@(10), d0 /* x0*y1 */
+ movew sp@(6), d1 /* x1 -> d1 */
+ muluw sp@(8), d1 /* x1*y0 */
+#ifndef __mcoldfire__
+ addw d1, d0
+#else
+ addl d1, d0
+#endif
+ swap d0
+ clrw d0
+ movew sp@(6), d1 /* x1 -> d1 */
+ muluw sp@(10), d1 /* x1*y1 */
+ addl d1, d0
+
+ rts
+#endif /* L_mulsi3 */
+
+#ifdef L_udivsi3
+ .text
+ FUNC(__udivsi3)
+ .globl SYM (__udivsi3)
+SYM (__udivsi3):
+#ifndef __mcoldfire__
+ movel d2, sp@-
+ movel sp@(12), d1 /* d1 = divisor */
+ movel sp@(8), d0 /* d0 = dividend */
+
+ cmpl IMM (0x10000), d1 /* divisor >= 2 ^ 16 ? */
+ jcc L3 /* then try next algorithm */
+ movel d0, d2
+ clrw d2
+ swap d2
+ divu d1, d2 /* high quotient in lower word */
+ movew d2, d0 /* save high quotient */
+ swap d0
+ movew sp@(10), d2 /* get low dividend + high rest */
+ divu d1, d2 /* low quotient */
+ movew d2, d0
+ jra L6
+
+L3: movel d1, d2 /* use d2 as divisor backup */
+L4: lsrl IMM (1), d1 /* shift divisor */
+ lsrl IMM (1), d0 /* shift dividend */
+ cmpl IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ? */
+ jcc L4
+ divu d1, d0 /* now we have 16-bit divisor */
+ andl IMM (0xffff), d0 /* mask out divisor, ignore remainder */
+
+/* Multiply the 16-bit tentative quotient with the 32-bit divisor. Because of
+ the operand ranges, this might give a 33-bit product. If this product is
+ greater than the dividend, the tentative quotient was too large. */
+ movel d2, d1
+ mulu d0, d1 /* low part, 32 bits */
+ swap d2
+ mulu d0, d2 /* high part, at most 17 bits */
+ swap d2 /* align high part with low part */
+ tstw d2 /* high part 17 bits? */
+ jne L5 /* if 17 bits, quotient was too large */
+ addl d2, d1 /* add parts */
+ jcs L5 /* if sum is 33 bits, quotient was too large */
+ cmpl sp@(8), d1 /* compare the sum with the dividend */
+ jls L6 /* if sum > dividend, quotient was too large */
+L5: subql IMM (1), d0 /* adjust quotient */
+
+L6: movel sp@+, d2
+ rts
+
+#else /* __mcoldfire__ */
+
+/* ColdFire implementation of non-restoring division algorithm from
+ Hennessy & Patterson, Appendix A. */
+ link a6,IMM (-12)
+ moveml d2-d4,sp@
+ movel a6@(8),d0
+ movel a6@(12),d1
+ clrl d2 | clear p
+ moveq IMM (31),d4
+L1: addl d0,d0 | shift reg pair (p,a) one bit left
+ addxl d2,d2
+ movl d2,d3 | subtract b from p, store in tmp.
+ subl d1,d3
+ jcs L2 | if no carry,
+ bset IMM (0),d0 | set the low order bit of a to 1,
+ movl d3,d2 | and store tmp in p.
+L2: subql IMM (1),d4
+ jcc L1
+ moveml sp@,d2-d4 | restore data registers
+ unlk a6 | and return
+ rts
+#endif /* __mcoldfire__ */
+
+#endif /* L_udivsi3 */
+
+#ifdef L_divsi3
+ .text
+ FUNC(__divsi3)
+ .globl SYM (__divsi3)
+SYM (__divsi3):
+ movel d2, sp@-
+
+ moveq IMM (1), d2 /* sign of result stored in d2 (=1 or =-1) */
+ movel sp@(12), d1 /* d1 = divisor */
+ jpl L1
+ negl d1
+#ifndef __mcoldfire__
+ negb d2 /* change sign because divisor <0 */
+#else
+ negl d2 /* change sign because divisor <0 */
+#endif
+L1: movel sp@(8), d0 /* d0 = dividend */
+ jpl L2
+ negl d0
+#ifndef __mcoldfire__
+ negb d2
+#else
+ negl d2
+#endif
+
+L2: movel d1, sp@-
+ movel d0, sp@-
+ PICCALL SYM (__udivsi3) /* divide abs(dividend) by abs(divisor) */
+ addql IMM (8), sp
+
+ tstb d2
+ jpl L3
+ negl d0
+
+L3: movel sp@+, d2
+ rts
+#endif /* L_divsi3 */
+
+#ifdef L_umodsi3
+ .text
+ FUNC(__umodsi3)
+ .globl SYM (__umodsi3)
+SYM (__umodsi3):
+ movel sp@(8), d1 /* d1 = divisor */
+ movel sp@(4), d0 /* d0 = dividend */
+ movel d1, sp@-
+ movel d0, sp@-
+ PICCALL SYM (__udivsi3)
+ addql IMM (8), sp
+ movel sp@(8), d1 /* d1 = divisor */
+#ifndef __mcoldfire__
+ movel d1, sp@-
+ movel d0, sp@-
+ PICCALL SYM (__mulsi3) /* d0 = (a/b)*b */
+ addql IMM (8), sp
+#else
+ mulsl d1,d0
+#endif
+ movel sp@(4), d1 /* d1 = dividend */
+ subl d0, d1 /* d1 = a - (a/b)*b */
+ movel d1, d0
+ rts
+#endif /* L_umodsi3 */
+
+#ifdef L_modsi3
+ .text
+ FUNC(__modsi3)
+ .globl SYM (__modsi3)
+SYM (__modsi3):
+ movel sp@(8), d1 /* d1 = divisor */
+ movel sp@(4), d0 /* d0 = dividend */
+ movel d1, sp@-
+ movel d0, sp@-
+ PICCALL SYM (__divsi3)
+ addql IMM (8), sp
+ movel sp@(8), d1 /* d1 = divisor */
+#ifndef __mcoldfire__
+ movel d1, sp@-
+ movel d0, sp@-
+ PICCALL SYM (__mulsi3) /* d0 = (a/b)*b */
+ addql IMM (8), sp
+#else
+ mulsl d1,d0
+#endif
+ movel sp@(4), d1 /* d1 = dividend */
+ subl d0, d1 /* d1 = a - (a/b)*b */
+ movel d1, d0
+ rts
+#endif /* L_modsi3 */
+
+
+#ifdef L_double
+
+ .globl SYM (_fpCCR)
+ .globl $_exception_handler
+
+QUIET_NaN = 0xffffffff
+
+D_MAX_EXP = 0x07ff
+D_BIAS = 1022
+DBL_MAX_EXP = D_MAX_EXP - D_BIAS
+DBL_MIN_EXP = 1 - D_BIAS
+DBL_MANT_DIG = 53
+
+INEXACT_RESULT = 0x0001
+UNDERFLOW = 0x0002
+OVERFLOW = 0x0004
+DIVIDE_BY_ZERO = 0x0008
+INVALID_OPERATION = 0x0010
+
+DOUBLE_FLOAT = 2
+
+NOOP = 0
+ADD = 1
+MULTIPLY = 2
+DIVIDE = 3
+NEGATE = 4
+COMPARE = 5
+EXTENDSFDF = 6
+TRUNCDFSF = 7
+
+UNKNOWN = -1
+ROUND_TO_NEAREST = 0 | round result to nearest representable value
+ROUND_TO_ZERO = 1 | round result towards zero
+ROUND_TO_PLUS = 2 | round result towards plus infinity
+ROUND_TO_MINUS = 3 | round result towards minus infinity
+
+| Entry points:
+
+ .globl SYM (__adddf3)
+ .globl SYM (__subdf3)
+ .globl SYM (__muldf3)
+ .globl SYM (__divdf3)
+ .globl SYM (__negdf2)
+ .globl SYM (__cmpdf2)
+ .globl SYM (__cmpdf2_internal)
+ .hidden SYM (__cmpdf2_internal)
+
+ .text
+ .even
+
+| These are common routines to return and signal exceptions.
+
+Ld$den:
+| Return and signal a denormalized number
+ orl d7,d0
+ movew IMM (INEXACT_RESULT+UNDERFLOW),d7
+ moveq IMM (DOUBLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+Ld$infty:
+Ld$overflow:
+| Return a properly signed INFINITY and set the exception flags
+ movel IMM (0x7ff00000),d0
+ movel IMM (0),d1
+ orl d7,d0
+ movew IMM (INEXACT_RESULT+OVERFLOW),d7
+ moveq IMM (DOUBLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+Ld$underflow:
+| Return 0 and set the exception flags
+ movel IMM (0),d0
+ movel d0,d1
+ movew IMM (INEXACT_RESULT+UNDERFLOW),d7
+ moveq IMM (DOUBLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+Ld$inop:
+| Return a quiet NaN and set the exception flags
+ movel IMM (QUIET_NaN),d0
+ movel d0,d1
+ movew IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+ moveq IMM (DOUBLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+Ld$div$0:
+| Return a properly signed INFINITY and set the exception flags
+ movel IMM (0x7ff00000),d0
+ movel IMM (0),d1
+ orl d7,d0
+ movew IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+ moveq IMM (DOUBLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+|=============================================================================
+|=============================================================================
+| double precision routines
+|=============================================================================
+|=============================================================================
+
+| A double precision floating point number (double) has the format:
+|
+| struct _double {
+| unsigned int sign : 1; /* sign bit */
+| unsigned int exponent : 11; /* exponent, shifted by 126 */
+| unsigned int fraction : 52; /* fraction */
+| } double;
+|
+| Thus sizeof(double) = 8 (64 bits).
+|
+| All the routines are callable from C programs, and return the result
+| in the register pair d0-d1. They also preserve all registers except
+| d0-d1 and a0-a1.
+
+|=============================================================================
+| __subdf3
+|=============================================================================
+
+| double __subdf3(double, double);
+ FUNC(__subdf3)
+SYM (__subdf3):
+ bchg IMM (31),sp@(12) | change sign of second operand
+ | and fall through, so we always add
+|=============================================================================
+| __adddf3
+|=============================================================================
+
+| double __adddf3(double, double);
+ FUNC(__adddf3)
+SYM (__adddf3):
+#ifndef __mcoldfire__
+ link a6,IMM (0) | everything will be done in registers
+ moveml d2-d7,sp@- | save all data registers and a2 (but d0-d1)
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ movel a6@(8),d0 | get first operand
+ movel a6@(12),d1 |
+ movel a6@(16),d2 | get second operand
+ movel a6@(20),d3 |
+
+ movel d0,d7 | get d0's sign bit in d7 '
+ addl d1,d1 | check and clear sign bit of a, and gain one
+ addxl d0,d0 | bit of extra precision
+ beq Ladddf$b | if zero return second operand
+
+ movel d2,d6 | save sign in d6
+ addl d3,d3 | get rid of sign bit and gain one bit of
+ addxl d2,d2 | extra precision
+ beq Ladddf$a | if zero return first operand
+
+ andl IMM (0x80000000),d7 | isolate a's sign bit '
+ swap d6 | and also b's sign bit '
+#ifndef __mcoldfire__
+ andw IMM (0x8000),d6 |
+ orw d6,d7 | and combine them into d7, so that a's sign '
+ | bit is in the high word and b's is in the '
+ | low word, so d6 is free to be used
+#else
+ andl IMM (0x8000),d6
+ orl d6,d7
+#endif
+ movel d7,a0 | now save d7 into a0, so d7 is free to
+ | be used also
+
+| Get the exponents and check for denormalized and/or infinity.
+
+ movel IMM (0x001fffff),d6 | mask for the fraction
+ movel IMM (0x00200000),d7 | mask to put hidden bit back
+
+ movel d0,d4 |
+ andl d6,d0 | get fraction in d0
+ notl d6 | make d6 into mask for the exponent
+ andl d6,d4 | get exponent in d4
+ beq Ladddf$a$den | branch if a is denormalized
+ cmpl d6,d4 | check for INFINITY or NaN
+ beq Ladddf$nf |
+ orl d7,d0 | and put hidden bit back
+Ladddf$1:
+ swap d4 | shift right exponent so that it starts
+#ifndef __mcoldfire__
+ lsrw IMM (5),d4 | in bit 0 and not bit 20
+#else
+ lsrl IMM (5),d4 | in bit 0 and not bit 20
+#endif
+| Now we have a's exponent in d4 and fraction in d0-d1 '
+ movel d2,d5 | save b to get exponent
+ andl d6,d5 | get exponent in d5
+ beq Ladddf$b$den | branch if b is denormalized
+ cmpl d6,d5 | check for INFINITY or NaN
+ beq Ladddf$nf
+ notl d6 | make d6 into mask for the fraction again
+ andl d6,d2 | and get fraction in d2
+ orl d7,d2 | and put hidden bit back
+Ladddf$2:
+ swap d5 | shift right exponent so that it starts
+#ifndef __mcoldfire__
+ lsrw IMM (5),d5 | in bit 0 and not bit 20
+#else
+ lsrl IMM (5),d5 | in bit 0 and not bit 20
+#endif
+
+| Now we have b's exponent in d5 and fraction in d2-d3. '
+
+| The situation now is as follows: the signs are combined in a0, the
+| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a)
+| and d5 (b). To do the rounding correctly we need to keep all the
+| bits until the end, so we need to use d0-d1-d2-d3 for the first number
+| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the
+| exponents in a2-a3.
+
+#ifndef __mcoldfire__
+ moveml a2-a3,sp@- | save the address registers
+#else
+ movel a2,sp@-
+ movel a3,sp@-
+ movel a4,sp@-
+#endif
+
+ movel d4,a2 | save the exponents
+ movel d5,a3 |
+
+ movel IMM (0),d7 | and move the numbers around
+ movel d7,d6 |
+ movel d3,d5 |
+ movel d2,d4 |
+ movel d7,d3 |
+ movel d7,d2 |
+
+| Here we shift the numbers until the exponents are the same, and put
+| the largest exponent in a2.
+#ifndef __mcoldfire__
+ exg d4,a2 | get exponents back
+ exg d5,a3 |
+ cmpw d4,d5 | compare the exponents
+#else
+ movel d4,a4 | get exponents back
+ movel a2,d4
+ movel a4,a2
+ movel d5,a4
+ movel a3,d5
+ movel a4,a3
+ cmpl d4,d5 | compare the exponents
+#endif
+ beq Ladddf$3 | if equal don't shift '
+ bhi 9f | branch if second exponent is higher
+
+| Here we have a's exponent larger than b's, so we have to shift b. We do
+| this by using as counter d2:
+1: movew d4,d2 | move largest exponent to d2
+#ifndef __mcoldfire__
+ subw d5,d2 | and subtract second exponent
+ exg d4,a2 | get back the longs we saved
+ exg d5,a3 |
+#else
+ subl d5,d2 | and subtract second exponent
+ movel d4,a4 | get back the longs we saved
+ movel a2,d4
+ movel a4,a2
+ movel d5,a4
+ movel a3,d5
+ movel a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+ cmpw IMM (DBL_MANT_DIG+2),d2
+#else
+ cmpl IMM (DBL_MANT_DIG+2),d2
+#endif
+ bge Ladddf$b$small
+#ifndef __mcoldfire__
+ cmpw IMM (32),d2 | if difference >= 32, shift by longs
+#else
+ cmpl IMM (32),d2 | if difference >= 32, shift by longs
+#endif
+ bge 5f
+2:
+#ifndef __mcoldfire__
+ cmpw IMM (16),d2 | if difference >= 16, shift by words
+#else
+ cmpl IMM (16),d2 | if difference >= 16, shift by words
+#endif
+ bge 6f
+ bra 3f | enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+ lsrl IMM (1),d4
+ roxrl IMM (1),d5
+ roxrl IMM (1),d6
+ roxrl IMM (1),d7
+#else
+ lsrl IMM (1),d7
+ btst IMM (0),d6
+ beq 10f
+ bset IMM (31),d7
+10: lsrl IMM (1),d6
+ btst IMM (0),d5
+ beq 11f
+ bset IMM (31),d6
+11: lsrl IMM (1),d5
+ btst IMM (0),d4
+ beq 12f
+ bset IMM (31),d5
+12: lsrl IMM (1),d4
+#endif
+3:
+#ifndef __mcoldfire__
+ dbra d2,4b
+#else
+ subql IMM (1),d2
+ bpl 4b
+#endif
+ movel IMM (0),d2
+ movel d2,d3
+ bra Ladddf$4
+5:
+ movel d6,d7
+ movel d5,d6
+ movel d4,d5
+ movel IMM (0),d4
+#ifndef __mcoldfire__
+ subw IMM (32),d2
+#else
+ subl IMM (32),d2
+#endif
+ bra 2b
+6:
+ movew d6,d7
+ swap d7
+ movew d5,d6
+ swap d6
+ movew d4,d5
+ swap d5
+ movew IMM (0),d4
+ swap d4
+#ifndef __mcoldfire__
+ subw IMM (16),d2
+#else
+ subl IMM (16),d2
+#endif
+ bra 3b
+
+9:
+#ifndef __mcoldfire__
+ exg d4,d5
+ movew d4,d6
+ subw d5,d6 | keep d5 (largest exponent) in d4
+ exg d4,a2
+ exg d5,a3
+#else
+ movel d5,d6
+ movel d4,d5
+ movel d6,d4
+ subl d5,d6
+ movel d4,a4
+ movel a2,d4
+ movel a4,a2
+ movel d5,a4
+ movel a3,d5
+ movel a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+ cmpw IMM (DBL_MANT_DIG+2),d6
+#else
+ cmpl IMM (DBL_MANT_DIG+2),d6
+#endif
+ bge Ladddf$a$small
+#ifndef __mcoldfire__
+ cmpw IMM (32),d6 | if difference >= 32, shift by longs
+#else
+ cmpl IMM (32),d6 | if difference >= 32, shift by longs
+#endif
+ bge 5f
+2:
+#ifndef __mcoldfire__
+ cmpw IMM (16),d6 | if difference >= 16, shift by words
+#else
+ cmpl IMM (16),d6 | if difference >= 16, shift by words
+#endif
+ bge 6f
+ bra 3f | enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ roxrl IMM (1),d2
+ roxrl IMM (1),d3
+#else
+ lsrl IMM (1),d3
+ btst IMM (0),d2
+ beq 10f
+ bset IMM (31),d3
+10: lsrl IMM (1),d2
+ btst IMM (0),d1
+ beq 11f
+ bset IMM (31),d2
+11: lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 12f
+ bset IMM (31),d1
+12: lsrl IMM (1),d0
+#endif
+3:
+#ifndef __mcoldfire__
+ dbra d6,4b
+#else
+ subql IMM (1),d6
+ bpl 4b
+#endif
+ movel IMM (0),d7
+ movel d7,d6
+ bra Ladddf$4
+5:
+ movel d2,d3
+ movel d1,d2
+ movel d0,d1
+ movel IMM (0),d0
+#ifndef __mcoldfire__
+ subw IMM (32),d6
+#else
+ subl IMM (32),d6
+#endif
+ bra 2b
+6:
+ movew d2,d3
+ swap d3
+ movew d1,d2
+ swap d2
+ movew d0,d1
+ swap d1
+ movew IMM (0),d0
+ swap d0
+#ifndef __mcoldfire__
+ subw IMM (16),d6
+#else
+ subl IMM (16),d6
+#endif
+ bra 3b
+Ladddf$3:
+#ifndef __mcoldfire__
+ exg d4,a2
+ exg d5,a3
+#else
+ movel d4,a4
+ movel a2,d4
+ movel a4,a2
+ movel d5,a4
+ movel a3,d5
+ movel a4,a3
+#endif
+Ladddf$4:
+| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and
+| the signs in a4.
+
+| Here we have to decide whether to add or subtract the numbers:
+#ifndef __mcoldfire__
+ exg d7,a0 | get the signs
+ exg d6,a3 | a3 is free to be used
+#else
+ movel d7,a4
+ movel a0,d7
+ movel a4,a0
+ movel d6,a4
+ movel a3,d6
+ movel a4,a3
+#endif
+ movel d7,d6 |
+ movew IMM (0),d7 | get a's sign in d7 '
+ swap d6 |
+ movew IMM (0),d6 | and b's sign in d6 '
+ eorl d7,d6 | compare the signs
+ bmi Lsubdf$0 | if the signs are different we have
+ | to subtract
+#ifndef __mcoldfire__
+ exg d7,a0 | else we add the numbers
+ exg d6,a3 |
+#else
+ movel d7,a4
+ movel a0,d7
+ movel a4,a0
+ movel d6,a4
+ movel a3,d6
+ movel a4,a3
+#endif
+ addl d7,d3 |
+ addxl d6,d2 |
+ addxl d5,d1 |
+ addxl d4,d0 |
+
+ movel a2,d4 | return exponent to d4
+ movel a0,d7 |
+ andl IMM (0x80000000),d7 | d7 now has the sign
+
+#ifndef __mcoldfire__
+ moveml sp@+,a2-a3
+#else
+ movel sp@+,a4
+ movel sp@+,a3
+ movel sp@+,a2
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set
+| one more bit we check this:
+ btst IMM (DBL_MANT_DIG+1),d0
+ beq 1f
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ roxrl IMM (1),d2
+ roxrl IMM (1),d3
+ addw IMM (1),d4
+#else
+ lsrl IMM (1),d3
+ btst IMM (0),d2
+ beq 10f
+ bset IMM (31),d3
+10: lsrl IMM (1),d2
+ btst IMM (0),d1
+ beq 11f
+ bset IMM (31),d2
+11: lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 12f
+ bset IMM (31),d1
+12: lsrl IMM (1),d0
+ addl IMM (1),d4
+#endif
+1:
+ lea pc@(Ladddf$5),a0 | to return from rounding routine
+ PICLEA SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+ clrl d6
+#endif
+ movew a1@(6),d6 | rounding mode in d6
+ beq Lround$to$nearest
+#ifndef __mcoldfire__
+ cmpw IMM (ROUND_TO_PLUS),d6
+#else
+ cmpl IMM (ROUND_TO_PLUS),d6
+#endif
+ bhi Lround$to$minus
+ blt Lround$to$zero
+ bra Lround$to$plus
+Ladddf$5:
+| Put back the exponent and check for overflow
+#ifndef __mcoldfire__
+ cmpw IMM (0x7ff),d4 | is the exponent big?
+#else
+ cmpl IMM (0x7ff),d4 | is the exponent big?
+#endif
+ bge 1f
+ bclr IMM (DBL_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+ lslw IMM (4),d4 | put exponent back into position
+#else
+ lsll IMM (4),d4 | put exponent back into position
+#endif
+ swap d0 |
+#ifndef __mcoldfire__
+ orw d4,d0 |
+#else
+ orl d4,d0 |
+#endif
+ swap d0 |
+ bra Ladddf$ret
+1:
+ moveq IMM (ADD),d5
+ bra Ld$overflow
+
+Lsubdf$0:
+| Here we do the subtraction.
+#ifndef __mcoldfire__
+ exg d7,a0 | put sign back in a0
+ exg d6,a3 |
+#else
+ movel d7,a4
+ movel a0,d7
+ movel a4,a0
+ movel d6,a4
+ movel a3,d6
+ movel a4,a3
+#endif
+ subl d7,d3 |
+ subxl d6,d2 |
+ subxl d5,d1 |
+ subxl d4,d0 |
+ beq Ladddf$ret$1 | if zero just exit
+ bpl 1f | if positive skip the following
+ movel a0,d7 |
+ bchg IMM (31),d7 | change sign bit in d7
+ movel d7,a0 |
+ negl d3 |
+ negxl d2 |
+ negxl d1 | and negate result
+ negxl d0 |
+1:
+ movel a2,d4 | return exponent to d4
+ movel a0,d7
+ andl IMM (0x80000000),d7 | isolate sign bit
+#ifndef __mcoldfire__
+ moveml sp@+,a2-a3 |
+#else
+ movel sp@+,a4
+ movel sp@+,a3
+ movel sp@+,a2
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set
+| one more bit we check this:
+ btst IMM (DBL_MANT_DIG+1),d0
+ beq 1f
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ roxrl IMM (1),d2
+ roxrl IMM (1),d3
+ addw IMM (1),d4
+#else
+ lsrl IMM (1),d3
+ btst IMM (0),d2
+ beq 10f
+ bset IMM (31),d3
+10: lsrl IMM (1),d2
+ btst IMM (0),d1
+ beq 11f
+ bset IMM (31),d2
+11: lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 12f
+ bset IMM (31),d1
+12: lsrl IMM (1),d0
+ addl IMM (1),d4
+#endif
+1:
+ lea pc@(Lsubdf$1),a0 | to return from rounding routine
+ PICLEA SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+ clrl d6
+#endif
+ movew a1@(6),d6 | rounding mode in d6
+ beq Lround$to$nearest
+#ifndef __mcoldfire__
+ cmpw IMM (ROUND_TO_PLUS),d6
+#else
+ cmpl IMM (ROUND_TO_PLUS),d6
+#endif
+ bhi Lround$to$minus
+ blt Lround$to$zero
+ bra Lround$to$plus
+Lsubdf$1:
+| Put back the exponent and sign (we don't have overflow). '
+ bclr IMM (DBL_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+ lslw IMM (4),d4 | put exponent back into position
+#else
+ lsll IMM (4),d4 | put exponent back into position
+#endif
+ swap d0 |
+#ifndef __mcoldfire__
+ orw d4,d0 |
+#else
+ orl d4,d0 |
+#endif
+ swap d0 |
+ bra Ladddf$ret
+
+| If one of the numbers was too small (difference of exponents >=
+| DBL_MANT_DIG+1) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Ladddf$a$small:
+#ifndef __mcoldfire__
+ moveml sp@+,a2-a3
+#else
+ movel sp@+,a4
+ movel sp@+,a3
+ movel sp@+,a2
+#endif
+ movel a6@(16),d0
+ movel a6@(20),d1
+ PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | restore data registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6 | and return
+ rts
+
+Ladddf$b$small:
+#ifndef __mcoldfire__
+ moveml sp@+,a2-a3
+#else
+ movel sp@+,a4
+ movel sp@+,a3
+ movel sp@+,a2
+#endif
+ movel a6@(8),d0
+ movel a6@(12),d1
+ PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | restore data registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6 | and return
+ rts
+
+Ladddf$a$den:
+ movel d7,d4 | d7 contains 0x00200000
+ bra Ladddf$1
+
+Ladddf$b$den:
+ movel d7,d5 | d7 contains 0x00200000
+ notl d6
+ bra Ladddf$2
+
+Ladddf$b:
+| Return b (if a is zero)
+ movel d2,d0
+ movel d3,d1
+ bne 1f | Check if b is -0
+ cmpl IMM (0x80000000),d0
+ bne 1f
+ andl IMM (0x80000000),d7 | Use the sign of a
+ clrl d0
+ bra Ladddf$ret
+Ladddf$a:
+ movel a6@(8),d0
+ movel a6@(12),d1
+1:
+ moveq IMM (ADD),d5
+| Check for NaN and +/-INFINITY.
+ movel d0,d7 |
+ andl IMM (0x80000000),d7 |
+ bclr IMM (31),d0 |
+ cmpl IMM (0x7ff00000),d0 |
+ bge 2f |
+ movel d0,d0 | check for zero, since we don't '
+ bne Ladddf$ret | want to return -0 by mistake
+ bclr IMM (31),d7 |
+ bra Ladddf$ret |
+2:
+ andl IMM (0x000fffff),d0 | check for NaN (nonzero fraction)
+ orl d1,d0 |
+ bne Ld$inop |
+ bra Ld$infty |
+
+Ladddf$ret$1:
+#ifndef __mcoldfire__
+ moveml sp@+,a2-a3 | restore regs and exit
+#else
+ movel sp@+,a4
+ movel sp@+,a3
+ movel sp@+,a2
+#endif
+
+Ladddf$ret:
+| Normal exit.
+ PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+ orl d7,d0 | put sign bit back
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+
+Ladddf$ret$den:
+| Return a denormalized number.
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0 | shift right once more
+ roxrl IMM (1),d1 |
+#else
+ lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 10f
+ bset IMM (31),d1
+10: lsrl IMM (1),d0
+#endif
+ bra Ladddf$ret
+
+Ladddf$nf:
+ moveq IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+ movel a6@(8),d0 | get the numbers back (remember that we
+ movel a6@(12),d1 | did some processing already)
+ movel a6@(16),d2 |
+ movel a6@(20),d3 |
+ movel IMM (0x7ff00000),d4 | useful constant (INFINITY)
+ movel d0,d7 | save sign bits
+ movel d2,d6 |
+ bclr IMM (31),d0 | clear sign bits
+ bclr IMM (31),d2 |
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+ cmpl d4,d0 | check first a (d0)
+ bhi Ld$inop | if d0 > 0x7ff00000 or equal and
+ bne 2f
+ tstl d1 | d1 > 0, a is NaN
+ bne Ld$inop |
+2: cmpl d4,d2 | check now b (d1)
+ bhi Ld$inop |
+ bne 3f
+ tstl d3 |
+ bne Ld$inop |
+3:
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+ eorl d7,d6 | to check sign bits
+ bmi 1f
+ andl IMM (0x80000000),d7 | get (common) sign bit
+ bra Ld$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+ cmpl d2,d0 | are both infinite?
+ bne 1f | if d0 <> d2 they are not equal
+ cmpl d3,d1 | if d0 == d2 test d3 and d1
+ beq Ld$inop | if equal return NaN
+1:
+ andl IMM (0x80000000),d7 | get a's sign bit '
+ cmpl d4,d0 | test now for infinity
+ beq Ld$infty | if a is INFINITY return with this sign
+ bchg IMM (31),d7 | else we know b is INFINITY and has
+ bra Ld$infty | the opposite sign
+
+|=============================================================================
+| __muldf3
+|=============================================================================
+
+| double __muldf3(double, double);
+ FUNC(__muldf3)
+SYM (__muldf3):
+#ifndef __mcoldfire__
+ link a6,IMM (0)
+ moveml d2-d7,sp@-
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ movel a6@(8),d0 | get a into d0-d1
+ movel a6@(12),d1 |
+ movel a6@(16),d2 | and b into d2-d3
+ movel a6@(20),d3 |
+ movel d0,d7 | d7 will hold the sign of the product
+ eorl d2,d7 |
+ andl IMM (0x80000000),d7 |
+ movel d7,a0 | save sign bit into a0
+ movel IMM (0x7ff00000),d7 | useful constant (+INFINITY)
+ movel d7,d6 | another (mask for fraction)
+ notl d6 |
+ bclr IMM (31),d0 | get rid of a's sign bit '
+ movel d0,d4 |
+ orl d1,d4 |
+ beq Lmuldf$a$0 | branch if a is zero
+ movel d0,d4 |
+ bclr IMM (31),d2 | get rid of b's sign bit '
+ movel d2,d5 |
+ orl d3,d5 |
+ beq Lmuldf$b$0 | branch if b is zero
+ movel d2,d5 |
+ cmpl d7,d0 | is a big?
+ bhi Lmuldf$inop | if a is NaN return NaN
+ beq Lmuldf$a$nf | we still have to check d1 and b ...
+ cmpl d7,d2 | now compare b with INFINITY
+ bhi Lmuldf$inop | is b NaN?
+ beq Lmuldf$b$nf | we still have to check d3 ...
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5.
+ andl d7,d4 | isolate exponent in d4
+ beq Lmuldf$a$den | if exponent zero, have denormalized
+ andl d6,d0 | isolate fraction
+ orl IMM (0x00100000),d0 | and put hidden bit back
+ swap d4 | I like exponents in the first byte
+#ifndef __mcoldfire__
+ lsrw IMM (4),d4 |
+#else
+ lsrl IMM (4),d4 |
+#endif
+Lmuldf$1:
+ andl d7,d5 |
+ beq Lmuldf$b$den |
+ andl d6,d2 |
+ orl IMM (0x00100000),d2 | and put hidden bit back
+ swap d5 |
+#ifndef __mcoldfire__
+ lsrw IMM (4),d5 |
+#else
+ lsrl IMM (4),d5 |
+#endif
+Lmuldf$2: |
+#ifndef __mcoldfire__
+ addw d5,d4 | add exponents
+ subw IMM (D_BIAS+1),d4 | and subtract bias (plus one)
+#else
+ addl d5,d4 | add exponents
+ subl IMM (D_BIAS+1),d4 | and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were
+| denormalized to start with!), which means that in the product bit 104
+| (which will correspond to bit 8 of the fourth long) is set.
+
+| Here we have to do the product.
+| To do it we have to juggle the registers back and forth, as there are not
+| enough to keep everything in them. So we use the address registers to keep
+| some intermediate data.
+
+#ifndef __mcoldfire__
+ moveml a2-a3,sp@- | save a2 and a3 for temporary use
+#else
+ movel a2,sp@-
+ movel a3,sp@-
+ movel a4,sp@-
+#endif
+ movel IMM (0),a2 | a2 is a null register
+ movel d4,a3 | and a3 will preserve the exponent
+
+| First, shift d2-d3 so bit 20 becomes bit 31:
+#ifndef __mcoldfire__
+ rorl IMM (5),d2 | rotate d2 5 places right
+ swap d2 | and swap it
+ rorl IMM (5),d3 | do the same thing with d3
+ swap d3 |
+ movew d3,d6 | get the rightmost 11 bits of d3
+ andw IMM (0x07ff),d6 |
+ orw d6,d2 | and put them into d2
+ andw IMM (0xf800),d3 | clear those bits in d3
+#else
+ moveq IMM (11),d7 | left shift d2 11 bits
+ lsll d7,d2
+ movel d3,d6 | get a copy of d3
+ lsll d7,d3 | left shift d3 11 bits
+ andl IMM (0xffe00000),d6 | get the top 11 bits of d3
+ moveq IMM (21),d7 | right shift them 21 bits
+ lsrl d7,d6
+ orl d6,d2 | stick them at the end of d2
+#endif
+
+ movel d2,d6 | move b into d6-d7
+ movel d3,d7 | move a into d4-d5
+ movel d0,d4 | and clear d0-d1-d2-d3 (to put result)
+ movel d1,d5 |
+ movel IMM (0),d3 |
+ movel d3,d2 |
+ movel d3,d1 |
+ movel d3,d0 |
+
+| We use a1 as counter:
+ movel IMM (DBL_MANT_DIG-1),a1
+#ifndef __mcoldfire__
+ exg d7,a1
+#else
+ movel d7,a4
+ movel a1,d7
+ movel a4,a1
+#endif
+
+1:
+#ifndef __mcoldfire__
+ exg d7,a1 | put counter back in a1
+#else
+ movel d7,a4
+ movel a1,d7
+ movel a4,a1
+#endif
+ addl d3,d3 | shift sum once left
+ addxl d2,d2 |
+ addxl d1,d1 |
+ addxl d0,d0 |
+ addl d7,d7 |
+ addxl d6,d6 |
+ bcc 2f | if bit clear skip the following
+#ifndef __mcoldfire__
+ exg d7,a2 |
+#else
+ movel d7,a4
+ movel a2,d7
+ movel a4,a2
+#endif
+ addl d5,d3 | else add a to the sum
+ addxl d4,d2 |
+ addxl d7,d1 |
+ addxl d7,d0 |
+#ifndef __mcoldfire__
+ exg d7,a2 |
+#else
+ movel d7,a4
+ movel a2,d7
+ movel a4,a2
+#endif
+2:
+#ifndef __mcoldfire__
+ exg d7,a1 | put counter in d7
+ dbf d7,1b | decrement and branch
+#else
+ movel d7,a4
+ movel a1,d7
+ movel a4,a1
+ subql IMM (1),d7
+ bpl 1b
+#endif
+
+ movel a3,d4 | restore exponent
+#ifndef __mcoldfire__
+ moveml sp@+,a2-a3
+#else
+ movel sp@+,a4
+ movel sp@+,a3
+ movel sp@+,a2
+#endif
+
+| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The
+| first thing to do now is to normalize it so bit 8 becomes bit
+| DBL_MANT_DIG-32 (to do the rounding); later we will shift right.
+ swap d0
+ swap d1
+ movew d1,d0
+ swap d2
+ movew d2,d1
+ swap d3
+ movew d3,d2
+ movew IMM (0),d3
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ roxrl IMM (1),d2
+ roxrl IMM (1),d3
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ roxrl IMM (1),d2
+ roxrl IMM (1),d3
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ roxrl IMM (1),d2
+ roxrl IMM (1),d3
+#else
+ moveq IMM (29),d6
+ lsrl IMM (3),d3
+ movel d2,d7
+ lsll d6,d7
+ orl d7,d3
+ lsrl IMM (3),d2
+ movel d1,d7
+ lsll d6,d7
+ orl d7,d2
+ lsrl IMM (3),d1
+ movel d0,d7
+ lsll d6,d7
+ orl d7,d1
+ lsrl IMM (3),d0
+#endif
+
+| Now round, check for over- and underflow, and exit.
+ movel a0,d7 | get sign bit back into d7
+ moveq IMM (MULTIPLY),d5
+
+ btst IMM (DBL_MANT_DIG+1-32),d0
+ beq Lround$exit
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ addw IMM (1),d4
+#else
+ lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 10f
+ bset IMM (31),d1
+10: lsrl IMM (1),d0
+ addl IMM (1),d4
+#endif
+ bra Lround$exit
+
+Lmuldf$inop:
+ moveq IMM (MULTIPLY),d5
+ bra Ld$inop
+
+Lmuldf$b$nf:
+ moveq IMM (MULTIPLY),d5
+ movel a0,d7 | get sign bit back into d7
+ tstl d3 | we know d2 == 0x7ff00000, so check d3
+ bne Ld$inop | if d3 <> 0 b is NaN
+ bra Ld$overflow | else we have overflow (since a is finite)
+
+Lmuldf$a$nf:
+ moveq IMM (MULTIPLY),d5
+ movel a0,d7 | get sign bit back into d7
+ tstl d1 | we know d0 == 0x7ff00000, so check d1
+ bne Ld$inop | if d1 <> 0 a is NaN
+ bra Ld$overflow | else signal overflow
+
+| If either number is zero return zero, unless the other is +/-INFINITY or
+| NaN, in which case we return NaN.
+Lmuldf$b$0:
+ moveq IMM (MULTIPLY),d5
+#ifndef __mcoldfire__
+ exg d2,d0 | put b (==0) into d0-d1
+ exg d3,d1 | and a (with sign bit cleared) into d2-d3
+ movel a0,d0 | set result sign
+#else
+ movel d0,d2 | put a into d2-d3
+ movel d1,d3
+ movel a0,d0 | put result zero into d0-d1
+ movq IMM(0),d1
+#endif
+ bra 1f
+Lmuldf$a$0:
+ movel a0,d0 | set result sign
+ movel a6@(16),d2 | put b into d2-d3 again
+ movel a6@(20),d3 |
+ bclr IMM (31),d2 | clear sign bit
+1: cmpl IMM (0x7ff00000),d2 | check for non-finiteness
+ bge Ld$inop | in case NaN or +/-INFINITY return NaN
+ PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+
+| If a number is denormalized we put an exponent of 1 but do not put the
+| hidden bit back into the fraction; instead we shift left until bit 21
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmuldf$a$den:
+ movel IMM (1),d4
+ andl d6,d0
+1: addl d1,d1 | shift a left until bit 20 is set
+ addxl d0,d0 |
+#ifndef __mcoldfire__
+ subw IMM (1),d4 | and adjust exponent
+#else
+ subl IMM (1),d4 | and adjust exponent
+#endif
+ btst IMM (20),d0 |
+ bne Lmuldf$1 |
+ bra 1b
+
+Lmuldf$b$den:
+ movel IMM (1),d5
+ andl d6,d2
+1: addl d3,d3 | shift b left until bit 20 is set
+ addxl d2,d2 |
+#ifndef __mcoldfire__
+ subw IMM (1),d5 | and adjust exponent
+#else
+ subql IMM (1),d5 | and adjust exponent
+#endif
+ btst IMM (20),d2 |
+ bne Lmuldf$2 |
+ bra 1b
+
+
+|=============================================================================
+| __divdf3
+|=============================================================================
+
+| double __divdf3(double, double);
+ FUNC(__divdf3)
+SYM (__divdf3):
+#ifndef __mcoldfire__
+ link a6,IMM (0)
+ moveml d2-d7,sp@-
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ movel a6@(8),d0 | get a into d0-d1
+ movel a6@(12),d1 |
+ movel a6@(16),d2 | and b into d2-d3
+ movel a6@(20),d3 |
+ movel d0,d7 | d7 will hold the sign of the result
+ eorl d2,d7 |
+ andl IMM (0x80000000),d7
+ movel d7,a0 | save sign into a0
+ movel IMM (0x7ff00000),d7 | useful constant (+INFINITY)
+ movel d7,d6 | another (mask for fraction)
+ notl d6 |
+ bclr IMM (31),d0 | get rid of a's sign bit '
+ movel d0,d4 |
+ orl d1,d4 |
+ beq Ldivdf$a$0 | branch if a is zero
+ movel d0,d4 |
+ bclr IMM (31),d2 | get rid of b's sign bit '
+ movel d2,d5 |
+ orl d3,d5 |
+ beq Ldivdf$b$0 | branch if b is zero
+ movel d2,d5
+ cmpl d7,d0 | is a big?
+ bhi Ldivdf$inop | if a is NaN return NaN
+ beq Ldivdf$a$nf | if d0 == 0x7ff00000 we check d1
+ cmpl d7,d2 | now compare b with INFINITY
+ bhi Ldivdf$inop | if b is NaN return NaN
+ beq Ldivdf$b$nf | if d2 == 0x7ff00000 we check d3
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5 and normalize the numbers to
+| ensure that the ratio of the fractions is around 1. We do this by
+| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit)
+| set, even if they were denormalized to start with.
+| Thus, the result will satisfy: 2 > result > 1/2.
+ andl d7,d4 | and isolate exponent in d4
+ beq Ldivdf$a$den | if exponent is zero we have a denormalized
+ andl d6,d0 | and isolate fraction
+ orl IMM (0x00100000),d0 | and put hidden bit back
+ swap d4 | I like exponents in the first byte
+#ifndef __mcoldfire__
+ lsrw IMM (4),d4 |
+#else
+ lsrl IMM (4),d4 |
+#endif
+Ldivdf$1: |
+ andl d7,d5 |
+ beq Ldivdf$b$den |
+ andl d6,d2 |
+ orl IMM (0x00100000),d2
+ swap d5 |
+#ifndef __mcoldfire__
+ lsrw IMM (4),d5 |
+#else
+ lsrl IMM (4),d5 |
+#endif
+Ldivdf$2: |
+#ifndef __mcoldfire__
+ subw d5,d4 | subtract exponents
+ addw IMM (D_BIAS),d4 | and add bias
+#else
+ subl d5,d4 | subtract exponents
+ addl IMM (D_BIAS),d4 | and add bias
+#endif
+
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0-d1 hold a (first operand, bit DBL_MANT_DIG-32=0, bit
+| DBL_MANT_DIG-1-32=1)
+| d2-d3 hold b (second operand, bit DBL_MANT_DIG-32=1)
+| d4 holds the difference of the exponents, corrected by the bias
+| a0 holds the sign of the ratio
+
+| To do the rounding correctly we need to keep information about the
+| nonsignificant bits. One way to do this would be to do the division
+| using four registers; another is to use two registers (as originally
+| I did), but use a sticky bit to preserve information about the
+| fractional part. Note that we can keep that info in a1, which is not
+| used.
+ movel IMM (0),d6 | d6-d7 will hold the result
+ movel d6,d7 |
+ movel IMM (0),a1 | and a1 will hold the sticky bit
+
+ movel IMM (DBL_MANT_DIG-32+1),d5
+
+1: cmpl d0,d2 | is a < b?
+ bhi 3f | if b > a skip the following
+ beq 4f | if d0==d2 check d1 and d3
+2: subl d3,d1 |
+ subxl d2,d0 | a <-- a - b
+ bset d5,d6 | set the corresponding bit in d6
+3: addl d1,d1 | shift a by 1
+ addxl d0,d0 |
+#ifndef __mcoldfire__
+ dbra d5,1b | and branch back
+#else
+ subql IMM (1), d5
+ bpl 1b
+#endif
+ bra 5f
+4: cmpl d1,d3 | here d0==d2, so check d1 and d3
+ bhi 3b | if d1 > d2 skip the subtraction
+ bra 2b | else go do it
+5:
+| Here we have to start setting the bits in the second long.
+ movel IMM (31),d5 | again d5 is counter
+
+1: cmpl d0,d2 | is a < b?
+ bhi 3f | if b > a skip the following
+ beq 4f | if d0==d2 check d1 and d3
+2: subl d3,d1 |
+ subxl d2,d0 | a <-- a - b
+ bset d5,d7 | set the corresponding bit in d7
+3: addl d1,d1 | shift a by 1
+ addxl d0,d0 |
+#ifndef __mcoldfire__
+ dbra d5,1b | and branch back
+#else
+ subql IMM (1), d5
+ bpl 1b
+#endif
+ bra 5f
+4: cmpl d1,d3 | here d0==d2, so check d1 and d3
+ bhi 3b | if d1 > d2 skip the subtraction
+ bra 2b | else go do it
+5:
+| Now go ahead checking until we hit a one, which we store in d2.
+ movel IMM (DBL_MANT_DIG),d5
+1: cmpl d2,d0 | is a < b?
+ bhi 4f | if b < a, exit
+ beq 3f | if d0==d2 check d1 and d3
+2: addl d1,d1 | shift a by 1
+ addxl d0,d0 |
+#ifndef __mcoldfire__
+ dbra d5,1b | and branch back
+#else
+ subql IMM (1), d5
+ bpl 1b
+#endif
+ movel IMM (0),d2 | here no sticky bit was found
+ movel d2,d3
+ bra 5f
+3: cmpl d1,d3 | here d0==d2, so check d1 and d3
+ bhi 2b | if d1 > d2 go back
+4:
+| Here put the sticky bit in d2-d3 (in the position which actually corresponds
+| to it; if you don't do this the algorithm loses in some cases). '
+ movel IMM (0),d2
+ movel d2,d3
+#ifndef __mcoldfire__
+ subw IMM (DBL_MANT_DIG),d5
+ addw IMM (63),d5
+ cmpw IMM (31),d5
+#else
+ subl IMM (DBL_MANT_DIG),d5
+ addl IMM (63),d5
+ cmpl IMM (31),d5
+#endif
+ bhi 2f
+1: bset d5,d3
+ bra 5f
+#ifndef __mcoldfire__
+ subw IMM (32),d5
+#else
+ subl IMM (32),d5
+#endif
+2: bset d5,d2
+5:
+| Finally we are finished! Move the longs in the address registers to
+| their final destination:
+ movel d6,d0
+ movel d7,d1
+ movel IMM (0),d3
+
+| Here we have finished the division, with the result in d0-d1-d2-d3, with
+| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set.
+| If it is not, then definitely bit 21 is set. Normalize so bit 22 is
+| not set:
+ btst IMM (DBL_MANT_DIG-32+1),d0
+ beq 1f
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ roxrl IMM (1),d2
+ roxrl IMM (1),d3
+ addw IMM (1),d4
+#else
+ lsrl IMM (1),d3
+ btst IMM (0),d2
+ beq 10f
+ bset IMM (31),d3
+10: lsrl IMM (1),d2
+ btst IMM (0),d1
+ beq 11f
+ bset IMM (31),d2
+11: lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 12f
+ bset IMM (31),d1
+12: lsrl IMM (1),d0
+ addl IMM (1),d4
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+ movel a0,d7 | restore sign bit to d7
+ moveq IMM (DIVIDE),d5
+ bra Lround$exit
+
+Ldivdf$inop:
+ moveq IMM (DIVIDE),d5
+ bra Ld$inop
+
+Ldivdf$a$0:
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+ moveq IMM (DIVIDE),d5
+ bclr IMM (31),d2 |
+ movel d2,d4 |
+ orl d3,d4 |
+ beq Ld$inop | if b is also zero return NaN
+ cmpl IMM (0x7ff00000),d2 | check for NaN
+ bhi Ld$inop |
+ blt 1f |
+ tstl d3 |
+ bne Ld$inop |
+1: movel a0,d0 | else return signed zero
+ moveq IMM(0),d1 |
+ PICLEA SYM (_fpCCR),a0 | clear exception flags
+ movew IMM (0),a0@ |
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 |
+#else
+ moveml sp@,d2-d7 |
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6 |
+ rts |
+
+Ldivdf$b$0:
+ moveq IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit
+| cleared already.
+ movel a0,d7 | put a's sign bit back in d7 '
+ cmpl IMM (0x7ff00000),d0 | compare d0 with INFINITY
+ bhi Ld$inop | if larger it is NaN
+ tstl d1 |
+ bne Ld$inop |
+ bra Ld$div$0 | else signal DIVIDE_BY_ZERO
+
+Ldivdf$b$nf:
+ moveq IMM (DIVIDE),d5
+| If d2 == 0x7ff00000 we have to check d3.
+ tstl d3 |
+ bne Ld$inop | if d3 <> 0, b is NaN
+ bra Ld$underflow | else b is +/-INFINITY, so signal underflow
+
+Ldivdf$a$nf:
+ moveq IMM (DIVIDE),d5
+| If d0 == 0x7ff00000 we have to check d1.
+ tstl d1 |
+ bne Ld$inop | if d1 <> 0, a is NaN
+| If a is INFINITY we have to check b
+ cmpl d7,d2 | compare b with INFINITY
+ bge Ld$inop | if b is NaN or INFINITY return NaN
+ tstl d3 |
+ bne Ld$inop |
+ bra Ld$overflow | else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the
+| bit back into the fraction.
+Ldivdf$a$den:
+ movel IMM (1),d4
+ andl d6,d0
+1: addl d1,d1 | shift a left until bit 20 is set
+ addxl d0,d0
+#ifndef __mcoldfire__
+ subw IMM (1),d4 | and adjust exponent
+#else
+ subl IMM (1),d4 | and adjust exponent
+#endif
+ btst IMM (DBL_MANT_DIG-32-1),d0
+ bne Ldivdf$1
+ bra 1b
+
+Ldivdf$b$den:
+ movel IMM (1),d5
+ andl d6,d2
+1: addl d3,d3 | shift b left until bit 20 is set
+ addxl d2,d2
+#ifndef __mcoldfire__
+ subw IMM (1),d5 | and adjust exponent
+#else
+ subql IMM (1),d5 | and adjust exponent
+#endif
+ btst IMM (DBL_MANT_DIG-32-1),d2
+ bne Ldivdf$2
+ bra 1b
+
+Lround$exit:
+| This is a common exit point for __muldf3 and __divdf3. When they enter
+| this point the sign of the result is in d7, the result in d0-d1, normalized
+| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4.
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+ cmpw IMM (-DBL_MANT_DIG-1),d4
+#else
+ cmpl IMM (-DBL_MANT_DIG-1),d4
+#endif
+ blt Ld$underflow
+| It could happen that the exponent is less than 1, in which case the
+| number is denormalized. In this case we shift right and adjust the
+| exponent until it becomes 1 or the fraction is zero (in the latter case
+| we signal underflow and return zero).
+ movel d7,a0 |
+ movel IMM (0),d6 | use d6-d7 to collect bits flushed right
+ movel d6,d7 | use d6-d7 to collect bits flushed right
+#ifndef __mcoldfire__
+ cmpw IMM (1),d4 | if the exponent is less than 1 we
+#else
+ cmpl IMM (1),d4 | if the exponent is less than 1 we
+#endif
+ bge 2f | have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+ addw IMM (1),d4 | adjust the exponent
+ lsrl IMM (1),d0 | shift right once
+ roxrl IMM (1),d1 |
+ roxrl IMM (1),d2 |
+ roxrl IMM (1),d3 |
+ roxrl IMM (1),d6 |
+ roxrl IMM (1),d7 |
+ cmpw IMM (1),d4 | is the exponent 1 already?
+#else
+ addl IMM (1),d4 | adjust the exponent
+ lsrl IMM (1),d7
+ btst IMM (0),d6
+ beq 13f
+ bset IMM (31),d7
+13: lsrl IMM (1),d6
+ btst IMM (0),d3
+ beq 14f
+ bset IMM (31),d6
+14: lsrl IMM (1),d3
+ btst IMM (0),d2
+ beq 10f
+ bset IMM (31),d3
+10: lsrl IMM (1),d2
+ btst IMM (0),d1
+ beq 11f
+ bset IMM (31),d2
+11: lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 12f
+ bset IMM (31),d1
+12: lsrl IMM (1),d0
+ cmpl IMM (1),d4 | is the exponent 1 already?
+#endif
+ beq 2f | if not loop back
+ bra 1b |
+ bra Ld$underflow | safety check, shouldn't execute '
+2: orl d6,d2 | this is a trick so we don't lose '
+ orl d7,d3 | the bits which were flushed right
+ movel a0,d7 | get back sign bit into d7
+| Now call the rounding routine (which takes care of denormalized numbers):
+ lea pc@(Lround$0),a0 | to return from rounding routine
+ PICLEA SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+ clrl d6
+#endif
+ movew a1@(6),d6 | rounding mode in d6
+ beq Lround$to$nearest
+#ifndef __mcoldfire__
+ cmpw IMM (ROUND_TO_PLUS),d6
+#else
+ cmpl IMM (ROUND_TO_PLUS),d6
+#endif
+ bhi Lround$to$minus
+ blt Lround$to$zero
+ bra Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to '
+| check again for underflow!). We have to check for overflow or for a
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 0x7ff).
+#ifndef __mcoldfire__
+ cmpw IMM (0x07ff),d4
+#else
+ cmpl IMM (0x07ff),d4
+#endif
+ bge Ld$overflow
+| Now check for a denormalized number (exponent==0):
+ movew d4,d4
+ beq Ld$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+ lslw IMM (4),d4 | exponent back to fourth byte
+#else
+ lsll IMM (4),d4 | exponent back to fourth byte
+#endif
+ bclr IMM (DBL_MANT_DIG-32-1),d0
+ swap d0 | and put back exponent
+#ifndef __mcoldfire__
+ orw d4,d0 |
+#else
+ orl d4,d0 |
+#endif
+ swap d0 |
+ orl d7,d0 | and sign also
+
+ PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+
+|=============================================================================
+| __negdf2
+|=============================================================================
+
+| double __negdf2(double, double);
+ FUNC(__negdf2)
+SYM (__negdf2):
+#ifndef __mcoldfire__
+ link a6,IMM (0)
+ moveml d2-d7,sp@-
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ moveq IMM (NEGATE),d5
+ movel a6@(8),d0 | get number to negate in d0-d1
+ movel a6@(12),d1 |
+ bchg IMM (31),d0 | negate
+ movel d0,d2 | make a positive copy (for the tests)
+ bclr IMM (31),d2 |
+ movel d2,d4 | check for zero
+ orl d1,d4 |
+ beq 2f | if zero (either sign) return +zero
+ cmpl IMM (0x7ff00000),d2 | compare to +INFINITY
+ blt 1f | if finite, return
+ bhi Ld$inop | if larger (fraction not zero) is NaN
+ tstl d1 | if d2 == 0x7ff00000 check d1
+ bne Ld$inop |
+ movel d0,d7 | else get sign and return INFINITY
+ andl IMM (0x80000000),d7
+ bra Ld$infty
+1: PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+2: bclr IMM (31),d0
+ bra 1b
+
+|=============================================================================
+| __cmpdf2
+|=============================================================================
+
+GREATER = 1
+LESS = -1
+EQUAL = 0
+
+| int __cmpdf2_internal(double, double, int);
+SYM (__cmpdf2_internal):
+#ifndef __mcoldfire__
+ link a6,IMM (0)
+ moveml d2-d7,sp@- | save registers
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ moveq IMM (COMPARE),d5
+ movel a6@(8),d0 | get first operand
+ movel a6@(12),d1 |
+ movel a6@(16),d2 | get second operand
+ movel a6@(20),d3 |
+| First check if a and/or b are (+/-) zero and in that case clear
+| the sign bit.
+ movel d0,d6 | copy signs into d6 (a) and d7(b)
+ bclr IMM (31),d0 | and clear signs in d0 and d2
+ movel d2,d7 |
+ bclr IMM (31),d2 |
+ cmpl IMM (0x7ff00000),d0 | check for a == NaN
+ bhi Lcmpd$inop | if d0 > 0x7ff00000, a is NaN
+ beq Lcmpdf$a$nf | if equal can be INFINITY, so check d1
+ movel d0,d4 | copy into d4 to test for zero
+ orl d1,d4 |
+ beq Lcmpdf$a$0 |
+Lcmpdf$0:
+ cmpl IMM (0x7ff00000),d2 | check for b == NaN
+ bhi Lcmpd$inop | if d2 > 0x7ff00000, b is NaN
+ beq Lcmpdf$b$nf | if equal can be INFINITY, so check d3
+ movel d2,d4 |
+ orl d3,d4 |
+ beq Lcmpdf$b$0 |
+Lcmpdf$1:
+| Check the signs
+ eorl d6,d7
+ bpl 1f
+| If the signs are not equal check if a >= 0
+ tstl d6
+ bpl Lcmpdf$a$gt$b | if (a >= 0 && b < 0) => a > b
+ bmi Lcmpdf$b$gt$a | if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+ tstl d6
+ bpl 1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+ exg d0,d2
+ exg d1,d3
+#else
+ movel d0,d7
+ movel d2,d0
+ movel d7,d2
+ movel d1,d7
+ movel d3,d1
+ movel d7,d3
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+ cmpl d0,d2
+ bhi Lcmpdf$b$gt$a | |b| > |a|
+ bne Lcmpdf$a$gt$b | |b| < |a|
+| If we got here d0 == d2, so we compare d1 and d3.
+ cmpl d1,d3
+ bhi Lcmpdf$b$gt$a | |b| > |a|
+ bne Lcmpdf$a$gt$b | |b| < |a|
+| If we got here a == b.
+ movel IMM (EQUAL),d0
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | put back the registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+Lcmpdf$a$gt$b:
+ movel IMM (GREATER),d0
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | put back the registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+Lcmpdf$b$gt$a:
+ movel IMM (LESS),d0
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | put back the registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+
+Lcmpdf$a$0:
+ bclr IMM (31),d6
+ bra Lcmpdf$0
+Lcmpdf$b$0:
+ bclr IMM (31),d7
+ bra Lcmpdf$1
+
+Lcmpdf$a$nf:
+ tstl d1
+ bne Ld$inop
+ bra Lcmpdf$0
+
+Lcmpdf$b$nf:
+ tstl d3
+ bne Ld$inop
+ bra Lcmpdf$1
+
+Lcmpd$inop:
+ movl a6@(24),d0
+ moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+ moveq IMM (DOUBLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+| int __cmpdf2(double, double);
+ FUNC(__cmpdf2)
+SYM (__cmpdf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(20),sp@-
+ movl a6@(16),sp@-
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpdf2_internal)
+ unlk a6
+ rts
+
+|=============================================================================
+| rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1-d2-d3, with the exponent in register d4. They assume that the
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d4.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1: btst IMM (DBL_MANT_DIG-32),d0
+ bne 2f | if set the number is normalized
+| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent
+| is one (remember that a denormalized number corresponds to an
+| exponent of -D_BIAS+1).
+#ifndef __mcoldfire__
+ cmpw IMM (1),d4 | remember that the exponent is at least one
+#else
+ cmpl IMM (1),d4 | remember that the exponent is at least one
+#endif
+ beq 2f | an exponent of one means denormalized
+ addl d3,d3 | else shift and adjust the exponent
+ addxl d2,d2 |
+ addxl d1,d1 |
+ addxl d0,d0 |
+#ifndef __mcoldfire__
+ dbra d4,1b |
+#else
+ subql IMM (1), d4
+ bpl 1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f.
+| If delta == 1, we make sure the rounded number will be even (odd?)
+| (after shifting).
+ btst IMM (0),d1 | is delta < 1?
+ beq 2f | if so, do not do anything
+ orl d2,d3 | is delta == 1?
+ bne 1f | if so round to even
+ movel d1,d3 |
+ andl IMM (2),d3 | bit 1 is the last significant bit
+ movel IMM (0),d2 |
+ addl d3,d1 |
+ addxl d2,d0 |
+ bra 2f |
+1: movel IMM (1),d3 | else add 1
+ movel IMM (0),d2 |
+ addl d3,d1 |
+ addxl d2,d0
+| Shift right once (because we used bit #DBL_MANT_DIG-32!).
+2:
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+#else
+ lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 10f
+ bset IMM (31),d1
+10: lsrl IMM (1),d0
+#endif
+
+| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a
+| 'fraction overflow' ...).
+ btst IMM (DBL_MANT_DIG-32),d0
+ beq 1f
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ addw IMM (1),d4
+#else
+ lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 10f
+ bset IMM (31),d1
+10: lsrl IMM (1),d0
+ addl IMM (1),d4
+#endif
+1:
+| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we
+| have to put the exponent to zero and return a denormalized number.
+ btst IMM (DBL_MANT_DIG-32-1),d0
+ beq 1f
+ jmp a0@
+1: movel IMM (0),d4
+ jmp a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+ jmp a0@
+#endif /* L_double */
+
+#ifdef L_float
+
+ .globl SYM (_fpCCR)
+ .globl $_exception_handler
+
+QUIET_NaN = 0xffffffff
+SIGNL_NaN = 0x7f800001
+INFINITY = 0x7f800000
+
+F_MAX_EXP = 0xff
+F_BIAS = 126
+FLT_MAX_EXP = F_MAX_EXP - F_BIAS
+FLT_MIN_EXP = 1 - F_BIAS
+FLT_MANT_DIG = 24
+
+INEXACT_RESULT = 0x0001
+UNDERFLOW = 0x0002
+OVERFLOW = 0x0004
+DIVIDE_BY_ZERO = 0x0008
+INVALID_OPERATION = 0x0010
+
+SINGLE_FLOAT = 1
+
+NOOP = 0
+ADD = 1
+MULTIPLY = 2
+DIVIDE = 3
+NEGATE = 4
+COMPARE = 5
+EXTENDSFDF = 6
+TRUNCDFSF = 7
+
+UNKNOWN = -1
+ROUND_TO_NEAREST = 0 | round result to nearest representable value
+ROUND_TO_ZERO = 1 | round result towards zero
+ROUND_TO_PLUS = 2 | round result towards plus infinity
+ROUND_TO_MINUS = 3 | round result towards minus infinity
+
+| Entry points:
+
+ .globl SYM (__addsf3)
+ .globl SYM (__subsf3)
+ .globl SYM (__mulsf3)
+ .globl SYM (__divsf3)
+ .globl SYM (__negsf2)
+ .globl SYM (__cmpsf2)
+ .globl SYM (__cmpsf2_internal)
+ .hidden SYM (__cmpsf2_internal)
+
+| These are common routines to return and signal exceptions.
+
+ .text
+ .even
+
+Lf$den:
+| Return and signal a denormalized number
+ orl d7,d0
+ moveq IMM (INEXACT_RESULT+UNDERFLOW),d7
+ moveq IMM (SINGLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+Lf$infty:
+Lf$overflow:
+| Return a properly signed INFINITY and set the exception flags
+ movel IMM (INFINITY),d0
+ orl d7,d0
+ moveq IMM (INEXACT_RESULT+OVERFLOW),d7
+ moveq IMM (SINGLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+Lf$underflow:
+| Return 0 and set the exception flags
+ moveq IMM (0),d0
+ moveq IMM (INEXACT_RESULT+UNDERFLOW),d7
+ moveq IMM (SINGLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+Lf$inop:
+| Return a quiet NaN and set the exception flags
+ movel IMM (QUIET_NaN),d0
+ moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+ moveq IMM (SINGLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+Lf$div$0:
+| Return a properly signed INFINITY and set the exception flags
+ movel IMM (INFINITY),d0
+ orl d7,d0
+ moveq IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+ moveq IMM (SINGLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+|=============================================================================
+|=============================================================================
+| single precision routines
+|=============================================================================
+|=============================================================================
+
+| A single precision floating point number (float) has the format:
+|
+| struct _float {
+| unsigned int sign : 1; /* sign bit */
+| unsigned int exponent : 8; /* exponent, shifted by 126 */
+| unsigned int fraction : 23; /* fraction */
+| } float;
+|
+| Thus sizeof(float) = 4 (32 bits).
+|
+| All the routines are callable from C programs, and return the result
+| in the single register d0. They also preserve all registers except
+| d0-d1 and a0-a1.
+
+|=============================================================================
+| __subsf3
+|=============================================================================
+
+| float __subsf3(float, float);
+ FUNC(__subsf3)
+SYM (__subsf3):
+ bchg IMM (31),sp@(8) | change sign of second operand
+ | and fall through
+|=============================================================================
+| __addsf3
+|=============================================================================
+
+| float __addsf3(float, float);
+ FUNC(__addsf3)
+SYM (__addsf3):
+#ifndef __mcoldfire__
+ link a6,IMM (0) | everything will be done in registers
+ moveml d2-d7,sp@- | save all data registers but d0-d1
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ movel a6@(8),d0 | get first operand
+ movel a6@(12),d1 | get second operand
+ movel d0,a0 | get d0's sign bit '
+ addl d0,d0 | check and clear sign bit of a
+ beq Laddsf$b | if zero return second operand
+ movel d1,a1 | save b's sign bit '
+ addl d1,d1 | get rid of sign bit
+ beq Laddsf$a | if zero return first operand
+
+| Get the exponents and check for denormalized and/or infinity.
+
+ movel IMM (0x00ffffff),d4 | mask to get fraction
+ movel IMM (0x01000000),d5 | mask to put hidden bit back
+
+ movel d0,d6 | save a to get exponent
+ andl d4,d0 | get fraction in d0
+ notl d4 | make d4 into a mask for the exponent
+ andl d4,d6 | get exponent in d6
+ beq Laddsf$a$den | branch if a is denormalized
+ cmpl d4,d6 | check for INFINITY or NaN
+ beq Laddsf$nf
+ swap d6 | put exponent into first word
+ orl d5,d0 | and put hidden bit back
+Laddsf$1:
+| Now we have a's exponent in d6 (second byte) and the mantissa in d0. '
+ movel d1,d7 | get exponent in d7
+ andl d4,d7 |
+ beq Laddsf$b$den | branch if b is denormalized
+ cmpl d4,d7 | check for INFINITY or NaN
+ beq Laddsf$nf
+ swap d7 | put exponent into first word
+ notl d4 | make d4 into a mask for the fraction
+ andl d4,d1 | get fraction in d1
+ orl d5,d1 | and put hidden bit back
+Laddsf$2:
+| Now we have b's exponent in d7 (second byte) and the mantissa in d1. '
+
+| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we
+| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra
+| bit).
+
+ movel d1,d2 | move b to d2, since we want to use
+ | two registers to do the sum
+ movel IMM (0),d1 | and clear the new ones
+ movel d1,d3 |
+
+| Here we shift the numbers in registers d0 and d1 so the exponents are the
+| same, and put the largest exponent in d6. Note that we are using two
+| registers for each number (see the discussion by D. Knuth in "Seminumerical
+| Algorithms").
+#ifndef __mcoldfire__
+ cmpw d6,d7 | compare exponents
+#else
+ cmpl d6,d7 | compare exponents
+#endif
+ beq Laddsf$3 | if equal don't shift '
+ bhi 5f | branch if second exponent largest
+1:
+ subl d6,d7 | keep the largest exponent
+ negl d7
+#ifndef __mcoldfire__
+ lsrw IMM (8),d7 | put difference in lower byte
+#else
+ lsrl IMM (8),d7 | put difference in lower byte
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+ cmpw IMM (FLT_MANT_DIG+2),d7
+#else
+ cmpl IMM (FLT_MANT_DIG+2),d7
+#endif
+ bge Laddsf$b$small
+#ifndef __mcoldfire__
+ cmpw IMM (16),d7 | if difference >= 16 swap
+#else
+ cmpl IMM (16),d7 | if difference >= 16 swap
+#endif
+ bge 4f
+2:
+#ifndef __mcoldfire__
+ subw IMM (1),d7
+#else
+ subql IMM (1), d7
+#endif
+3:
+#ifndef __mcoldfire__
+ lsrl IMM (1),d2 | shift right second operand
+ roxrl IMM (1),d3
+ dbra d7,3b
+#else
+ lsrl IMM (1),d3
+ btst IMM (0),d2
+ beq 10f
+ bset IMM (31),d3
+10: lsrl IMM (1),d2
+ subql IMM (1), d7
+ bpl 3b
+#endif
+ bra Laddsf$3
+4:
+ movew d2,d3
+ swap d3
+ movew d3,d2
+ swap d2
+#ifndef __mcoldfire__
+ subw IMM (16),d7
+#else
+ subl IMM (16),d7
+#endif
+ bne 2b | if still more bits, go back to normal case
+ bra Laddsf$3
+5:
+#ifndef __mcoldfire__
+ exg d6,d7 | exchange the exponents
+#else
+ eorl d6,d7
+ eorl d7,d6
+ eorl d6,d7
+#endif
+ subl d6,d7 | keep the largest exponent
+ negl d7 |
+#ifndef __mcoldfire__
+ lsrw IMM (8),d7 | put difference in lower byte
+#else
+ lsrl IMM (8),d7 | put difference in lower byte
+#endif
+| if difference is too large we don't shift (and exit!) '
+#ifndef __mcoldfire__
+ cmpw IMM (FLT_MANT_DIG+2),d7
+#else
+ cmpl IMM (FLT_MANT_DIG+2),d7
+#endif
+ bge Laddsf$a$small
+#ifndef __mcoldfire__
+ cmpw IMM (16),d7 | if difference >= 16 swap
+#else
+ cmpl IMM (16),d7 | if difference >= 16 swap
+#endif
+ bge 8f
+6:
+#ifndef __mcoldfire__
+ subw IMM (1),d7
+#else
+ subl IMM (1),d7
+#endif
+7:
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0 | shift right first operand
+ roxrl IMM (1),d1
+ dbra d7,7b
+#else
+ lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 10f
+ bset IMM (31),d1
+10: lsrl IMM (1),d0
+ subql IMM (1),d7
+ bpl 7b
+#endif
+ bra Laddsf$3
+8:
+ movew d0,d1
+ swap d1
+ movew d1,d0
+ swap d0
+#ifndef __mcoldfire__
+ subw IMM (16),d7
+#else
+ subl IMM (16),d7
+#endif
+ bne 6b | if still more bits, go back to normal case
+ | otherwise we fall through
+
+| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the
+| signs are stored in a0 and a1).
+
+Laddsf$3:
+| Here we have to decide whether to add or subtract the numbers
+#ifndef __mcoldfire__
+ exg d6,a0 | get signs back
+ exg d7,a1 | and save the exponents
+#else
+ movel d6,d4
+ movel a0,d6
+ movel d4,a0
+ movel d7,d4
+ movel a1,d7
+ movel d4,a1
+#endif
+ eorl d6,d7 | combine sign bits
+ bmi Lsubsf$0 | if negative a and b have opposite
+ | sign so we actually subtract the
+ | numbers
+
+| Here we have both positive or both negative
+#ifndef __mcoldfire__
+ exg d6,a0 | now we have the exponent in d6
+#else
+ movel d6,d4
+ movel a0,d6
+ movel d4,a0
+#endif
+ movel a0,d7 | and sign in d7
+ andl IMM (0x80000000),d7
+| Here we do the addition.
+ addl d3,d1
+ addxl d2,d0
+| Note: now we have d2, d3, d4 and d5 to play with!
+
+| Put the exponent, in the first byte, in d2, to use the "standard" rounding
+| routines:
+ movel d6,d2
+#ifndef __mcoldfire__
+ lsrw IMM (8),d2
+#else
+ lsrl IMM (8),d2
+#endif
+
+| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set
+| one more bit we check this:
+ btst IMM (FLT_MANT_DIG+1),d0
+ beq 1f
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+#else
+ lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 10f
+ bset IMM (31),d1
+10: lsrl IMM (1),d0
+#endif
+ addl IMM (1),d2
+1:
+ lea pc@(Laddsf$4),a0 | to return from rounding routine
+ PICLEA SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+ clrl d6
+#endif
+ movew a1@(6),d6 | rounding mode in d6
+ beq Lround$to$nearest
+#ifndef __mcoldfire__
+ cmpw IMM (ROUND_TO_PLUS),d6
+#else
+ cmpl IMM (ROUND_TO_PLUS),d6
+#endif
+ bhi Lround$to$minus
+ blt Lround$to$zero
+ bra Lround$to$plus
+Laddsf$4:
+| Put back the exponent, but check for overflow.
+#ifndef __mcoldfire__
+ cmpw IMM (0xff),d2
+#else
+ cmpl IMM (0xff),d2
+#endif
+ bhi 1f
+ bclr IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+ lslw IMM (7),d2
+#else
+ lsll IMM (7),d2
+#endif
+ swap d2
+ orl d2,d0
+ bra Laddsf$ret
+1:
+ moveq IMM (ADD),d5
+ bra Lf$overflow
+
+Lsubsf$0:
+| We are here if a > 0 and b < 0 (sign bits cleared).
+| Here we do the subtraction.
+ movel d6,d7 | put sign in d7
+ andl IMM (0x80000000),d7
+
+ subl d3,d1 | result in d0-d1
+ subxl d2,d0 |
+ beq Laddsf$ret | if zero just exit
+ bpl 1f | if positive skip the following
+ bchg IMM (31),d7 | change sign bit in d7
+ negl d1
+ negxl d0
+1:
+#ifndef __mcoldfire__
+ exg d2,a0 | now we have the exponent in d2
+ lsrw IMM (8),d2 | put it in the first byte
+#else
+ movel d2,d4
+ movel a0,d2
+ movel d4,a0
+ lsrl IMM (8),d2 | put it in the first byte
+#endif
+
+| Now d0-d1 is positive and the sign bit is in d7.
+
+| Note that we do not have to normalize, since in the subtraction bit
+| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by
+| the rounding routines themselves.
+ lea pc@(Lsubsf$1),a0 | to return from rounding routine
+ PICLEA SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+ clrl d6
+#endif
+ movew a1@(6),d6 | rounding mode in d6
+ beq Lround$to$nearest
+#ifndef __mcoldfire__
+ cmpw IMM (ROUND_TO_PLUS),d6
+#else
+ cmpl IMM (ROUND_TO_PLUS),d6
+#endif
+ bhi Lround$to$minus
+ blt Lround$to$zero
+ bra Lround$to$plus
+Lsubsf$1:
+| Put back the exponent (we can't have overflow!). '
+ bclr IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+ lslw IMM (7),d2
+#else
+ lsll IMM (7),d2
+#endif
+ swap d2
+ orl d2,d0
+ bra Laddsf$ret
+
+| If one of the numbers was too small (difference of exponents >=
+| FLT_MANT_DIG+2) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Laddsf$a$small:
+ movel a6@(12),d0
+ PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | restore data registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6 | and return
+ rts
+
+Laddsf$b$small:
+ movel a6@(8),d0
+ PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | restore data registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6 | and return
+ rts
+
+| If the numbers are denormalized remember to put exponent equal to 1.
+
+Laddsf$a$den:
+ movel d5,d6 | d5 contains 0x01000000
+ swap d6
+ bra Laddsf$1
+
+Laddsf$b$den:
+ movel d5,d7
+ swap d7
+ notl d4 | make d4 into a mask for the fraction
+ | (this was not executed after the jump)
+ bra Laddsf$2
+
+| The rest is mainly code for the different results which can be
+| returned (checking always for +/-INFINITY and NaN).
+
+Laddsf$b:
+| Return b (if a is zero).
+ movel a6@(12),d0
+ cmpl IMM (0x80000000),d0 | Check if b is -0
+ bne 1f
+ movel a0,d7
+ andl IMM (0x80000000),d7 | Use the sign of a
+ clrl d0
+ bra Laddsf$ret
+Laddsf$a:
+| Return a (if b is zero).
+ movel a6@(8),d0
+1:
+ moveq IMM (ADD),d5
+| We have to check for NaN and +/-infty.
+ movel d0,d7
+ andl IMM (0x80000000),d7 | put sign in d7
+ bclr IMM (31),d0 | clear sign
+ cmpl IMM (INFINITY),d0 | check for infty or NaN
+ bge 2f
+ movel d0,d0 | check for zero (we do this because we don't '
+ bne Laddsf$ret | want to return -0 by mistake
+ bclr IMM (31),d7 | if zero be sure to clear sign
+ bra Laddsf$ret | if everything OK just return
+2:
+| The value to be returned is either +/-infty or NaN
+ andl IMM (0x007fffff),d0 | check for NaN
+ bne Lf$inop | if mantissa not zero is NaN
+ bra Lf$infty
+
+Laddsf$ret:
+| Normal exit (a and b nonzero, result is not NaN nor +/-infty).
+| We have to clear the exception flags (just the exception type).
+ PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+ orl d7,d0 | put sign bit
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | restore data registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6 | and return
+ rts
+
+Laddsf$ret$den:
+| Return a denormalized number (for addition we don't signal underflow) '
+ lsrl IMM (1),d0 | remember to shift right back once
+ bra Laddsf$ret | and return
+
+| Note: when adding two floats of the same sign if either one is
+| NaN we return NaN without regard to whether the other is finite or
+| not. When subtracting them (i.e., when adding two numbers of
+| opposite signs) things are more complicated: if both are INFINITY
+| we return NaN, if only one is INFINITY and the other is NaN we return
+| NaN, but if it is finite we return INFINITY with the corresponding sign.
+
+Laddsf$nf:
+ moveq IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+ movel a6@(8),d0 | get the numbers back (remember that we
+ movel a6@(12),d1 | did some processing already)
+ movel IMM (INFINITY),d4 | useful constant (INFINITY)
+ movel d0,d2 | save sign bits
+ movel d1,d3
+ bclr IMM (31),d0 | clear sign bits
+ bclr IMM (31),d1
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+ cmpl d4,d0 | check first a (d0)
+ bhi Lf$inop
+ cmpl d4,d1 | check now b (d1)
+ bhi Lf$inop
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+ eorl d3,d2 | to check sign bits
+ bmi 1f
+ movel d0,d7
+ andl IMM (0x80000000),d7 | get (common) sign bit
+ bra Lf$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+ cmpl d1,d0 | are both infinite?
+ beq Lf$inop | if so return NaN
+
+ movel d0,d7
+ andl IMM (0x80000000),d7 | get a's sign bit '
+ cmpl d4,d0 | test now for infinity
+ beq Lf$infty | if a is INFINITY return with this sign
+ bchg IMM (31),d7 | else we know b is INFINITY and has
+ bra Lf$infty | the opposite sign
+
+|=============================================================================
+| __mulsf3
+|=============================================================================
+
+| float __mulsf3(float, float);
+ FUNC(__mulsf3)
+SYM (__mulsf3):
+#ifndef __mcoldfire__
+ link a6,IMM (0)
+ moveml d2-d7,sp@-
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ movel a6@(8),d0 | get a into d0
+ movel a6@(12),d1 | and b into d1
+ movel d0,d7 | d7 will hold the sign of the product
+ eorl d1,d7 |
+ andl IMM (0x80000000),d7
+ movel IMM (INFINITY),d6 | useful constant (+INFINITY)
+ movel d6,d5 | another (mask for fraction)
+ notl d5 |
+ movel IMM (0x00800000),d4 | this is to put hidden bit back
+ bclr IMM (31),d0 | get rid of a's sign bit '
+ movel d0,d2 |
+ beq Lmulsf$a$0 | branch if a is zero
+ bclr IMM (31),d1 | get rid of b's sign bit '
+ movel d1,d3 |
+ beq Lmulsf$b$0 | branch if b is zero
+ cmpl d6,d0 | is a big?
+ bhi Lmulsf$inop | if a is NaN return NaN
+ beq Lmulsf$inf | if a is INFINITY we have to check b
+ cmpl d6,d1 | now compare b with INFINITY
+ bhi Lmulsf$inop | is b NaN?
+ beq Lmulsf$overflow | is b INFINITY?
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3.
+ andl d6,d2 | and isolate exponent in d2
+ beq Lmulsf$a$den | if exponent is zero we have a denormalized
+ andl d5,d0 | and isolate fraction
+ orl d4,d0 | and put hidden bit back
+ swap d2 | I like exponents in the first byte
+#ifndef __mcoldfire__
+ lsrw IMM (7),d2 |
+#else
+ lsrl IMM (7),d2 |
+#endif
+Lmulsf$1: | number
+ andl d6,d3 |
+ beq Lmulsf$b$den |
+ andl d5,d1 |
+ orl d4,d1 |
+ swap d3 |
+#ifndef __mcoldfire__
+ lsrw IMM (7),d3 |
+#else
+ lsrl IMM (7),d3 |
+#endif
+Lmulsf$2: |
+#ifndef __mcoldfire__
+ addw d3,d2 | add exponents
+ subw IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#else
+ addl d3,d2 | add exponents
+ subl IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit FLT_MANT_DIG-1 set (even if they were
+| denormalized to start with!), which means that in the product
+| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the
+| high long) is set.
+
+| To do the multiplication let us move the number a little bit around ...
+ movel d1,d6 | second operand in d6
+ movel d0,d5 | first operand in d4-d5
+ movel IMM (0),d4
+ movel d4,d1 | the sums will go in d0-d1
+ movel d4,d0
+
+| now bit FLT_MANT_DIG-1 becomes bit 31:
+ lsll IMM (31-FLT_MANT_DIG+1),d6
+
+| Start the loop (we loop #FLT_MANT_DIG times):
+ moveq IMM (FLT_MANT_DIG-1),d3
+1: addl d1,d1 | shift sum
+ addxl d0,d0
+ lsll IMM (1),d6 | get bit bn
+ bcc 2f | if not set skip sum
+ addl d5,d1 | add a
+ addxl d4,d0
+2:
+#ifndef __mcoldfire__
+ dbf d3,1b | loop back
+#else
+ subql IMM (1),d3
+ bpl 1b
+#endif
+
+| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG
+| (mod 32) of d0 set. The first thing to do now is to normalize it so bit
+| FLT_MANT_DIG is set (to do the rounding).
+#ifndef __mcoldfire__
+ rorl IMM (6),d1
+ swap d1
+ movew d1,d3
+ andw IMM (0x03ff),d3
+ andw IMM (0xfd00),d1
+#else
+ movel d1,d3
+ lsll IMM (8),d1
+ addl d1,d1
+ addl d1,d1
+ moveq IMM (22),d5
+ lsrl d5,d3
+ orl d3,d1
+ andl IMM (0xfffffd00),d1
+#endif
+ lsll IMM (8),d0
+ addl d0,d0
+ addl d0,d0
+#ifndef __mcoldfire__
+ orw d3,d0
+#else
+ orl d3,d0
+#endif
+
+ moveq IMM (MULTIPLY),d5
+
+ btst IMM (FLT_MANT_DIG+1),d0
+ beq Lround$exit
+#ifndef __mcoldfire__
+ lsrl IMM (1),d0
+ roxrl IMM (1),d1
+ addw IMM (1),d2
+#else
+ lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 10f
+ bset IMM (31),d1
+10: lsrl IMM (1),d0
+ addql IMM (1),d2
+#endif
+ bra Lround$exit
+
+Lmulsf$inop:
+ moveq IMM (MULTIPLY),d5
+ bra Lf$inop
+
+Lmulsf$overflow:
+ moveq IMM (MULTIPLY),d5
+ bra Lf$overflow
+
+Lmulsf$inf:
+ moveq IMM (MULTIPLY),d5
+| If either is NaN return NaN; else both are (maybe infinite) numbers, so
+| return INFINITY with the correct sign (which is in d7).
+ cmpl d6,d1 | is b NaN?
+ bhi Lf$inop | if so return NaN
+ bra Lf$overflow | else return +/-INFINITY
+
+| If either number is zero return zero, unless the other is +/-INFINITY,
+| or NaN, in which case we return NaN.
+Lmulsf$b$0:
+| Here d1 (==b) is zero.
+ movel a6@(8),d1 | get a again to check for non-finiteness
+ bra 1f
+Lmulsf$a$0:
+ movel a6@(12),d1 | get b again to check for non-finiteness
+1: bclr IMM (31),d1 | clear sign bit
+ cmpl IMM (INFINITY),d1 | and check for a large exponent
+ bge Lf$inop | if b is +/-INFINITY or NaN return NaN
+ movel d7,d0 | else return signed zero
+ PICLEA SYM (_fpCCR),a0 |
+ movew IMM (0),a0@ |
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 |
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6 |
+ rts |
+
+| If a number is denormalized we put an exponent of 1 but do not put the
+| hidden bit back into the fraction; instead we shift left until bit 23
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmulsf$a$den:
+ movel IMM (1),d2
+ andl d5,d0
+1: addl d0,d0 | shift a left (until bit 23 is set)
+#ifndef __mcoldfire__
+ subw IMM (1),d2 | and adjust exponent
+#else
+ subql IMM (1),d2 | and adjust exponent
+#endif
+ btst IMM (FLT_MANT_DIG-1),d0
+ bne Lmulsf$1 |
+ bra 1b | else loop back
+
+Lmulsf$b$den:
+ movel IMM (1),d3
+ andl d5,d1
+1: addl d1,d1 | shift b left until bit 23 is set
+#ifndef __mcoldfire__
+ subw IMM (1),d3 | and adjust exponent
+#else
+ subql IMM (1),d3 | and adjust exponent
+#endif
+ btst IMM (FLT_MANT_DIG-1),d1
+ bne Lmulsf$2 |
+ bra 1b | else loop back
+
+|=============================================================================
+| __divsf3
+|=============================================================================
+
+| float __divsf3(float, float);
+ FUNC(__divsf3)
+SYM (__divsf3):
+#ifndef __mcoldfire__
+ link a6,IMM (0)
+ moveml d2-d7,sp@-
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ movel a6@(8),d0 | get a into d0
+ movel a6@(12),d1 | and b into d1
+ movel d0,d7 | d7 will hold the sign of the result
+ eorl d1,d7 |
+ andl IMM (0x80000000),d7 |
+ movel IMM (INFINITY),d6 | useful constant (+INFINITY)
+ movel d6,d5 | another (mask for fraction)
+ notl d5 |
+ movel IMM (0x00800000),d4 | this is to put hidden bit back
+ bclr IMM (31),d0 | get rid of a's sign bit '
+ movel d0,d2 |
+ beq Ldivsf$a$0 | branch if a is zero
+ bclr IMM (31),d1 | get rid of b's sign bit '
+ movel d1,d3 |
+ beq Ldivsf$b$0 | branch if b is zero
+ cmpl d6,d0 | is a big?
+ bhi Ldivsf$inop | if a is NaN return NaN
+ beq Ldivsf$inf | if a is INFINITY we have to check b
+ cmpl d6,d1 | now compare b with INFINITY
+ bhi Ldivsf$inop | if b is NaN return NaN
+ beq Ldivsf$underflow
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3 and normalize the numbers to
+| ensure that the ratio of the fractions is close to 1. We do this by
+| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set.
+ andl d6,d2 | and isolate exponent in d2
+ beq Ldivsf$a$den | if exponent is zero we have a denormalized
+ andl d5,d0 | and isolate fraction
+ orl d4,d0 | and put hidden bit back
+ swap d2 | I like exponents in the first byte
+#ifndef __mcoldfire__
+ lsrw IMM (7),d2 |
+#else
+ lsrl IMM (7),d2 |
+#endif
+Ldivsf$1: |
+ andl d6,d3 |
+ beq Ldivsf$b$den |
+ andl d5,d1 |
+ orl d4,d1 |
+ swap d3 |
+#ifndef __mcoldfire__
+ lsrw IMM (7),d3 |
+#else
+ lsrl IMM (7),d3 |
+#endif
+Ldivsf$2: |
+#ifndef __mcoldfire__
+ subw d3,d2 | subtract exponents
+ addw IMM (F_BIAS),d2 | and add bias
+#else
+ subl d3,d2 | subtract exponents
+ addl IMM (F_BIAS),d2 | and add bias
+#endif
+
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0 holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1)
+| d1 holds b (second operand, bit FLT_MANT_DIG=1)
+| d2 holds the difference of the exponents, corrected by the bias
+| d7 holds the sign of the ratio
+| d4, d5, d6 hold some constants
+ movel d7,a0 | d6-d7 will hold the ratio of the fractions
+ movel IMM (0),d6 |
+ movel d6,d7
+
+ moveq IMM (FLT_MANT_DIG+1),d3
+1: cmpl d0,d1 | is a < b?
+ bhi 2f |
+ bset d3,d6 | set a bit in d6
+ subl d1,d0 | if a >= b a <-- a-b
+ beq 3f | if a is zero, exit
+2: addl d0,d0 | multiply a by 2
+#ifndef __mcoldfire__
+ dbra d3,1b
+#else
+ subql IMM (1),d3
+ bpl 1b
+#endif
+
+| Now we keep going to set the sticky bit ...
+ moveq IMM (FLT_MANT_DIG),d3
+1: cmpl d0,d1
+ ble 2f
+ addl d0,d0
+#ifndef __mcoldfire__
+ dbra d3,1b
+#else
+ subql IMM(1),d3
+ bpl 1b
+#endif
+ movel IMM (0),d1
+ bra 3f
+2: movel IMM (0),d1
+#ifndef __mcoldfire__
+ subw IMM (FLT_MANT_DIG),d3
+ addw IMM (31),d3
+#else
+ subl IMM (FLT_MANT_DIG),d3
+ addl IMM (31),d3
+#endif
+ bset d3,d1
+3:
+ movel d6,d0 | put the ratio in d0-d1
+ movel a0,d7 | get sign back
+
+| Because of the normalization we did before we are guaranteed that
+| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set,
+| bit 25 could be set, and if it is not set then bit 24 is necessarily set.
+ btst IMM (FLT_MANT_DIG+1),d0
+ beq 1f | if it is not set, then bit 24 is set
+ lsrl IMM (1),d0 |
+#ifndef __mcoldfire__
+ addw IMM (1),d2 |
+#else
+ addl IMM (1),d2 |
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+ moveq IMM (DIVIDE),d5
+ bra Lround$exit
+
+Ldivsf$inop:
+ moveq IMM (DIVIDE),d5
+ bra Lf$inop
+
+Ldivsf$overflow:
+ moveq IMM (DIVIDE),d5
+ bra Lf$overflow
+
+Ldivsf$underflow:
+ moveq IMM (DIVIDE),d5
+ bra Lf$underflow
+
+Ldivsf$a$0:
+ moveq IMM (DIVIDE),d5
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+ andl IMM (0x7fffffff),d1 | clear sign bit and test b
+ beq Lf$inop | if b is also zero return NaN
+ cmpl IMM (INFINITY),d1 | check for NaN
+ bhi Lf$inop |
+ movel d7,d0 | else return signed zero
+ PICLEA SYM (_fpCCR),a0 |
+ movew IMM (0),a0@ |
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 |
+#else
+ moveml sp@,d2-d7 |
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6 |
+ rts |
+
+Ldivsf$b$0:
+ moveq IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit
+| cleared already.
+ cmpl IMM (INFINITY),d0 | compare d0 with INFINITY
+ bhi Lf$inop | if larger it is NaN
+ bra Lf$div$0 | else signal DIVIDE_BY_ZERO
+
+Ldivsf$inf:
+ moveq IMM (DIVIDE),d5
+| If a is INFINITY we have to check b
+ cmpl IMM (INFINITY),d1 | compare b with INFINITY
+ bge Lf$inop | if b is NaN or INFINITY return NaN
+ bra Lf$overflow | else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the
+| bit back into the fraction.
+Ldivsf$a$den:
+ movel IMM (1),d2
+ andl d5,d0
+1: addl d0,d0 | shift a left until bit FLT_MANT_DIG-1 is set
+#ifndef __mcoldfire__
+ subw IMM (1),d2 | and adjust exponent
+#else
+ subl IMM (1),d2 | and adjust exponent
+#endif
+ btst IMM (FLT_MANT_DIG-1),d0
+ bne Ldivsf$1
+ bra 1b
+
+Ldivsf$b$den:
+ movel IMM (1),d3
+ andl d5,d1
+1: addl d1,d1 | shift b left until bit FLT_MANT_DIG is set
+#ifndef __mcoldfire__
+ subw IMM (1),d3 | and adjust exponent
+#else
+ subl IMM (1),d3 | and adjust exponent
+#endif
+ btst IMM (FLT_MANT_DIG-1),d1
+ bne Ldivsf$2
+ bra 1b
+
+Lround$exit:
+| This is a common exit point for __mulsf3 and __divsf3.
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+ cmpw IMM (-FLT_MANT_DIG-1),d2
+#else
+ cmpl IMM (-FLT_MANT_DIG-1),d2
+#endif
+ blt Lf$underflow
+| It could happen that the exponent is less than 1, in which case the
+| number is denormalized. In this case we shift right and adjust the
+| exponent until it becomes 1 or the fraction is zero (in the latter case
+| we signal underflow and return zero).
+ movel IMM (0),d6 | d6 is used temporarily
+#ifndef __mcoldfire__
+ cmpw IMM (1),d2 | if the exponent is less than 1 we
+#else
+ cmpl IMM (1),d2 | if the exponent is less than 1 we
+#endif
+ bge 2f | have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+ addw IMM (1),d2 | adjust the exponent
+ lsrl IMM (1),d0 | shift right once
+ roxrl IMM (1),d1 |
+ roxrl IMM (1),d6 | d6 collect bits we would lose otherwise
+ cmpw IMM (1),d2 | is the exponent 1 already?
+#else
+ addql IMM (1),d2 | adjust the exponent
+ lsrl IMM (1),d6
+ btst IMM (0),d1
+ beq 11f
+ bset IMM (31),d6
+11: lsrl IMM (1),d1
+ btst IMM (0),d0
+ beq 10f
+ bset IMM (31),d1
+10: lsrl IMM (1),d0
+ cmpl IMM (1),d2 | is the exponent 1 already?
+#endif
+ beq 2f | if not loop back
+ bra 1b |
+ bra Lf$underflow | safety check, shouldn't execute '
+2: orl d6,d1 | this is a trick so we don't lose '
+ | the extra bits which were flushed right
+| Now call the rounding routine (which takes care of denormalized numbers):
+ lea pc@(Lround$0),a0 | to return from rounding routine
+ PICLEA SYM (_fpCCR),a1 | check the rounding mode
+#ifdef __mcoldfire__
+ clrl d6
+#endif
+ movew a1@(6),d6 | rounding mode in d6
+ beq Lround$to$nearest
+#ifndef __mcoldfire__
+ cmpw IMM (ROUND_TO_PLUS),d6
+#else
+ cmpl IMM (ROUND_TO_PLUS),d6
+#endif
+ bhi Lround$to$minus
+ blt Lround$to$zero
+ bra Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to '
+| check again for underflow!). We have to check for overflow or for a
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 255).
+#ifndef __mcoldfire__
+ cmpw IMM (0x00ff),d2
+#else
+ cmpl IMM (0x00ff),d2
+#endif
+ bge Lf$overflow
+| Now check for a denormalized number (exponent==0).
+ movew d2,d2
+ beq Lf$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+ lslw IMM (7),d2 | exponent back to fourth byte
+#else
+ lsll IMM (7),d2 | exponent back to fourth byte
+#endif
+ bclr IMM (FLT_MANT_DIG-1),d0
+ swap d0 | and put back exponent
+#ifndef __mcoldfire__
+ orw d2,d0 |
+#else
+ orl d2,d0
+#endif
+ swap d0 |
+ orl d7,d0 | and sign also
+
+ PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+
+|=============================================================================
+| __negsf2
+|=============================================================================
+
+| This is trivial and could be shorter if we didn't bother checking for NaN '
+| and +/-INFINITY.
+
+| float __negsf2(float);
+ FUNC(__negsf2)
+SYM (__negsf2):
+#ifndef __mcoldfire__
+ link a6,IMM (0)
+ moveml d2-d7,sp@-
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ moveq IMM (NEGATE),d5
+ movel a6@(8),d0 | get number to negate in d0
+ bchg IMM (31),d0 | negate
+ movel d0,d1 | make a positive copy
+ bclr IMM (31),d1 |
+ tstl d1 | check for zero
+ beq 2f | if zero (either sign) return +zero
+ cmpl IMM (INFINITY),d1 | compare to +INFINITY
+ blt 1f |
+ bhi Lf$inop | if larger (fraction not zero) is NaN
+ movel d0,d7 | else get sign and return INFINITY
+ andl IMM (0x80000000),d7
+ bra Lf$infty
+1: PICLEA SYM (_fpCCR),a0
+ movew IMM (0),a0@
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+2: bclr IMM (31),d0
+ bra 1b
+
+|=============================================================================
+| __cmpsf2
+|=============================================================================
+
+GREATER = 1
+LESS = -1
+EQUAL = 0
+
+| int __cmpsf2_internal(float, float, int);
+SYM (__cmpsf2_internal):
+#ifndef __mcoldfire__
+ link a6,IMM (0)
+ moveml d2-d7,sp@- | save registers
+#else
+ link a6,IMM (-24)
+ moveml d2-d7,sp@
+#endif
+ moveq IMM (COMPARE),d5
+ movel a6@(8),d0 | get first operand
+ movel a6@(12),d1 | get second operand
+| Check if either is NaN, and in that case return garbage and signal
+| INVALID_OPERATION. Check also if either is zero, and clear the signs
+| if necessary.
+ movel d0,d6
+ andl IMM (0x7fffffff),d0
+ beq Lcmpsf$a$0
+ cmpl IMM (0x7f800000),d0
+ bhi Lcmpf$inop
+Lcmpsf$1:
+ movel d1,d7
+ andl IMM (0x7fffffff),d1
+ beq Lcmpsf$b$0
+ cmpl IMM (0x7f800000),d1
+ bhi Lcmpf$inop
+Lcmpsf$2:
+| Check the signs
+ eorl d6,d7
+ bpl 1f
+| If the signs are not equal check if a >= 0
+ tstl d6
+ bpl Lcmpsf$a$gt$b | if (a >= 0 && b < 0) => a > b
+ bmi Lcmpsf$b$gt$a | if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+ tstl d6
+ bpl 1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+ exg d0,d1
+#else
+ movel d0,d7
+ movel d1,d0
+ movel d7,d1
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+ cmpl d0,d1
+ bhi Lcmpsf$b$gt$a | |b| > |a|
+ bne Lcmpsf$a$gt$b | |b| < |a|
+| If we got here a == b.
+ movel IMM (EQUAL),d0
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | put back the registers
+#else
+ moveml sp@,d2-d7
+#endif
+ unlk a6
+ rts
+Lcmpsf$a$gt$b:
+ movel IMM (GREATER),d0
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | put back the registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+Lcmpsf$b$gt$a:
+ movel IMM (LESS),d0
+#ifndef __mcoldfire__
+ moveml sp@+,d2-d7 | put back the registers
+#else
+ moveml sp@,d2-d7
+ | XXX if frame pointer is ever removed, stack pointer must
+ | be adjusted here.
+#endif
+ unlk a6
+ rts
+
+Lcmpsf$a$0:
+ bclr IMM (31),d6
+ bra Lcmpsf$1
+Lcmpsf$b$0:
+ bclr IMM (31),d7
+ bra Lcmpsf$2
+
+Lcmpf$inop:
+ movl a6@(16),d0
+ moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+ moveq IMM (SINGLE_FLOAT),d6
+ PICJUMP $_exception_handler
+
+| int __cmpsf2(float, float);
+ FUNC(__cmpsf2)
+SYM (__cmpsf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpsf2_internal)
+ unlk a6
+ rts
+
+|=============================================================================
+| rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1, with the exponent in register d2. They assume that the
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d2.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1: btst IMM (FLT_MANT_DIG),d0
+ bne 2f | if set the number is normalized
+| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent
+| is one (remember that a denormalized number corresponds to an
+| exponent of -F_BIAS+1).
+#ifndef __mcoldfire__
+ cmpw IMM (1),d2 | remember that the exponent is at least one
+#else
+ cmpl IMM (1),d2 | remember that the exponent is at least one
+#endif
+ beq 2f | an exponent of one means denormalized
+ addl d1,d1 | else shift and adjust the exponent
+ addxl d0,d0 |
+#ifndef __mcoldfire__
+ dbra d2,1b |
+#else
+ subql IMM (1),d2
+ bpl 1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f.
+| If delta == 1, we make sure the rounded number will be even (odd?)
+| (after shifting).
+ btst IMM (0),d0 | is delta < 1?
+ beq 2f | if so, do not do anything
+ tstl d1 | is delta == 1?
+ bne 1f | if so round to even
+ movel d0,d1 |
+ andl IMM (2),d1 | bit 1 is the last significant bit
+ addl d1,d0 |
+ bra 2f |
+1: movel IMM (1),d1 | else add 1
+ addl d1,d0 |
+| Shift right once (because we used bit #FLT_MANT_DIG!).
+2: lsrl IMM (1),d0
+| Now check again bit #FLT_MANT_DIG (rounding could have produced a
+| 'fraction overflow' ...).
+ btst IMM (FLT_MANT_DIG),d0
+ beq 1f
+ lsrl IMM (1),d0
+#ifndef __mcoldfire__
+ addw IMM (1),d2
+#else
+ addql IMM (1),d2
+#endif
+1:
+| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we
+| have to put the exponent to zero and return a denormalized number.
+ btst IMM (FLT_MANT_DIG-1),d0
+ beq 1f
+ jmp a0@
+1: movel IMM (0),d2
+ jmp a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+ jmp a0@
+#endif /* L_float */
+
+| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2,
+| __ledf2, __ltdf2 to all return the same value as a direct call to
+| __cmpdf2 would. In this implementation, each of these routines
+| simply calls __cmpdf2. It would be more efficient to give the
+| __cmpdf2 routine several names, but separating them out will make it
+| easier to write efficient versions of these routines someday.
+| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1.
+| The other routines return 1.
+
+#ifdef L_eqdf2
+ .text
+ FUNC(__eqdf2)
+ .globl SYM (__eqdf2)
+SYM (__eqdf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(20),sp@-
+ movl a6@(16),sp@-
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpdf2_internal)
+ unlk a6
+ rts
+#endif /* L_eqdf2 */
+
+#ifdef L_nedf2
+ .text
+ FUNC(__nedf2)
+ .globl SYM (__nedf2)
+SYM (__nedf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(20),sp@-
+ movl a6@(16),sp@-
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpdf2_internal)
+ unlk a6
+ rts
+#endif /* L_nedf2 */
+
+#ifdef L_gtdf2
+ .text
+ FUNC(__gtdf2)
+ .globl SYM (__gtdf2)
+SYM (__gtdf2):
+ link a6,IMM (0)
+ pea -1
+ movl a6@(20),sp@-
+ movl a6@(16),sp@-
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpdf2_internal)
+ unlk a6
+ rts
+#endif /* L_gtdf2 */
+
+#ifdef L_gedf2
+ .text
+ FUNC(__gedf2)
+ .globl SYM (__gedf2)
+SYM (__gedf2):
+ link a6,IMM (0)
+ pea -1
+ movl a6@(20),sp@-
+ movl a6@(16),sp@-
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpdf2_internal)
+ unlk a6
+ rts
+#endif /* L_gedf2 */
+
+#ifdef L_ltdf2
+ .text
+ FUNC(__ltdf2)
+ .globl SYM (__ltdf2)
+SYM (__ltdf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(20),sp@-
+ movl a6@(16),sp@-
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpdf2_internal)
+ unlk a6
+ rts
+#endif /* L_ltdf2 */
+
+#ifdef L_ledf2
+ .text
+ FUNC(__ledf2)
+ .globl SYM (__ledf2)
+SYM (__ledf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(20),sp@-
+ movl a6@(16),sp@-
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpdf2_internal)
+ unlk a6
+ rts
+#endif /* L_ledf2 */
+
+| The comments above about __eqdf2, et. al., also apply to __eqsf2,
+| et. al., except that the latter call __cmpsf2 rather than __cmpdf2.
+
+#ifdef L_eqsf2
+ .text
+ FUNC(__eqsf2)
+ .globl SYM (__eqsf2)
+SYM (__eqsf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpsf2_internal)
+ unlk a6
+ rts
+#endif /* L_eqsf2 */
+
+#ifdef L_nesf2
+ .text
+ FUNC(__nesf2)
+ .globl SYM (__nesf2)
+SYM (__nesf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpsf2_internal)
+ unlk a6
+ rts
+#endif /* L_nesf2 */
+
+#ifdef L_gtsf2
+ .text
+ FUNC(__gtsf2)
+ .globl SYM (__gtsf2)
+SYM (__gtsf2):
+ link a6,IMM (0)
+ pea -1
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpsf2_internal)
+ unlk a6
+ rts
+#endif /* L_gtsf2 */
+
+#ifdef L_gesf2
+ .text
+ FUNC(__gesf2)
+ .globl SYM (__gesf2)
+SYM (__gesf2):
+ link a6,IMM (0)
+ pea -1
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpsf2_internal)
+ unlk a6
+ rts
+#endif /* L_gesf2 */
+
+#ifdef L_ltsf2
+ .text
+ FUNC(__ltsf2)
+ .globl SYM (__ltsf2)
+SYM (__ltsf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpsf2_internal)
+ unlk a6
+ rts
+#endif /* L_ltsf2 */
+
+#ifdef L_lesf2
+ .text
+ FUNC(__lesf2)
+ .globl SYM (__lesf2)
+SYM (__lesf2):
+ link a6,IMM (0)
+ pea 1
+ movl a6@(12),sp@-
+ movl a6@(8),sp@-
+ PICCALL SYM (__cmpsf2_internal)
+ unlk a6
+ rts
+#endif /* L_lesf2 */
+
+#if defined (__ELF__) && defined (__linux__)
+ /* Make stack non-executable for ELF linux targets. */
+ .section .note.GNU-stack,"",@progbits
+#endif
diff --git a/libgcc/config/m68k/t-floatlib b/libgcc/config/m68k/t-floatlib
new file mode 100644
index 00000000000..4160eb9f537
--- /dev/null
+++ b/libgcc/config/m68k/t-floatlib
@@ -0,0 +1,5 @@
+LIB1ASMSRC = m68k/lb1sf68.S
+LIB1ASMFUNCS = _mulsi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \
+ _double _float _floatex \
+ _eqdf2 _nedf2 _gtdf2 _gedf2 _ltdf2 _ledf2 \
+ _eqsf2 _nesf2 _gtsf2 _gesf2 _ltsf2 _lesf2
diff --git a/libgcc/config/mcore/lib1funcs.S b/libgcc/config/mcore/lib1funcs.S
new file mode 100644
index 00000000000..701762f2a3c
--- /dev/null
+++ b/libgcc/config/mcore/lib1funcs.S
@@ -0,0 +1,303 @@
+/* libgcc routines for the MCore.
+ Copyright (C) 1993, 1999, 2000, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels. */
+
+#define SYM(x) CONCAT1 (__, x)
+
+#ifdef __ELF__
+#define TYPE(x) .type SYM (x),@function
+#define SIZE(x) .size SYM (x), . - SYM (x)
+#else
+#define TYPE(x)
+#define SIZE(x)
+#endif
+
+.macro FUNC_START name
+ .text
+ .globl SYM (\name)
+ TYPE (\name)
+SYM (\name):
+.endm
+
+.macro FUNC_END name
+ SIZE (\name)
+.endm
+
+#ifdef L_udivsi3
+FUNC_START udiv32
+FUNC_START udivsi32
+
+ movi r1,0 // r1-r2 form 64 bit dividend
+ movi r4,1 // r4 is quotient (1 for a sentinel)
+
+ cmpnei r3,0 // look for 0 divisor
+ bt 9f
+ trap 3 // divide by 0
+9:
+ // control iterations; skip across high order 0 bits in dividend
+ mov r7,r2
+ cmpnei r7,0
+ bt 8f
+ movi r2,0 // 0 dividend
+ jmp r15 // quick return
+8:
+ ff1 r7 // figure distance to skip
+ lsl r4,r7 // move the sentinel along (with 0's behind)
+ lsl r2,r7 // and the low 32 bits of numerator
+
+// appears to be wrong...
+// tested out incorrectly in our OS work...
+// mov r7,r3 // looking at divisor
+// ff1 r7 // I can move 32-r7 more bits to left.
+// addi r7,1 // ok, one short of that...
+// mov r1,r2
+// lsr r1,r7 // bits that came from low order...
+// rsubi r7,31 // r7 == "32-n" == LEFT distance
+// addi r7,1 // this is (32-n)
+// lsl r4,r7 // fixes the high 32 (quotient)
+// lsl r2,r7
+// cmpnei r4,0
+// bf 4f // the sentinel went away...
+
+ // run the remaining bits
+
+1: lslc r2,1 // 1 bit left shift of r1-r2
+ addc r1,r1
+ cmphs r1,r3 // upper 32 of dividend >= divisor?
+ bf 2f
+ sub r1,r3 // if yes, subtract divisor
+2: addc r4,r4 // shift by 1 and count subtracts
+ bf 1b // if sentinel falls out of quotient, stop
+
+4: mov r2,r4 // return quotient
+ mov r3,r1 // and piggyback the remainder
+ jmp r15
+FUNC_END udiv32
+FUNC_END udivsi32
+#endif
+
+#ifdef L_umodsi3
+FUNC_START urem32
+FUNC_START umodsi3
+ movi r1,0 // r1-r2 form 64 bit dividend
+ movi r4,1 // r4 is quotient (1 for a sentinel)
+ cmpnei r3,0 // look for 0 divisor
+ bt 9f
+ trap 3 // divide by 0
+9:
+ // control iterations; skip across high order 0 bits in dividend
+ mov r7,r2
+ cmpnei r7,0
+ bt 8f
+ movi r2,0 // 0 dividend
+ jmp r15 // quick return
+8:
+ ff1 r7 // figure distance to skip
+ lsl r4,r7 // move the sentinel along (with 0's behind)
+ lsl r2,r7 // and the low 32 bits of numerator
+
+1: lslc r2,1 // 1 bit left shift of r1-r2
+ addc r1,r1
+ cmphs r1,r3 // upper 32 of dividend >= divisor?
+ bf 2f
+ sub r1,r3 // if yes, subtract divisor
+2: addc r4,r4 // shift by 1 and count subtracts
+ bf 1b // if sentinel falls out of quotient, stop
+ mov r2,r1 // return remainder
+ jmp r15
+FUNC_END urem32
+FUNC_END umodsi3
+#endif
+
+#ifdef L_divsi3
+FUNC_START div32
+FUNC_START divsi3
+ mov r5,r2 // calc sign of quotient
+ xor r5,r3
+ abs r2 // do unsigned divide
+ abs r3
+ movi r1,0 // r1-r2 form 64 bit dividend
+ movi r4,1 // r4 is quotient (1 for a sentinel)
+ cmpnei r3,0 // look for 0 divisor
+ bt 9f
+ trap 3 // divide by 0
+9:
+ // control iterations; skip across high order 0 bits in dividend
+ mov r7,r2
+ cmpnei r7,0
+ bt 8f
+ movi r2,0 // 0 dividend
+ jmp r15 // quick return
+8:
+ ff1 r7 // figure distance to skip
+ lsl r4,r7 // move the sentinel along (with 0's behind)
+ lsl r2,r7 // and the low 32 bits of numerator
+
+// tested out incorrectly in our OS work...
+// mov r7,r3 // looking at divisor
+// ff1 r7 // I can move 32-r7 more bits to left.
+// addi r7,1 // ok, one short of that...
+// mov r1,r2
+// lsr r1,r7 // bits that came from low order...
+// rsubi r7,31 // r7 == "32-n" == LEFT distance
+// addi r7,1 // this is (32-n)
+// lsl r4,r7 // fixes the high 32 (quotient)
+// lsl r2,r7
+// cmpnei r4,0
+// bf 4f // the sentinel went away...
+
+ // run the remaining bits
+1: lslc r2,1 // 1 bit left shift of r1-r2
+ addc r1,r1
+ cmphs r1,r3 // upper 32 of dividend >= divisor?
+ bf 2f
+ sub r1,r3 // if yes, subtract divisor
+2: addc r4,r4 // shift by 1 and count subtracts
+ bf 1b // if sentinel falls out of quotient, stop
+
+4: mov r2,r4 // return quotient
+ mov r3,r1 // piggyback the remainder
+ btsti r5,31 // after adjusting for sign
+ bf 3f
+ rsubi r2,0
+ rsubi r3,0
+3: jmp r15
+FUNC_END div32
+FUNC_END divsi3
+#endif
+
+#ifdef L_modsi3
+FUNC_START rem32
+FUNC_START modsi3
+ mov r5,r2 // calc sign of remainder
+ abs r2 // do unsigned divide
+ abs r3
+ movi r1,0 // r1-r2 form 64 bit dividend
+ movi r4,1 // r4 is quotient (1 for a sentinel)
+ cmpnei r3,0 // look for 0 divisor
+ bt 9f
+ trap 3 // divide by 0
+9:
+ // control iterations; skip across high order 0 bits in dividend
+ mov r7,r2
+ cmpnei r7,0
+ bt 8f
+ movi r2,0 // 0 dividend
+ jmp r15 // quick return
+8:
+ ff1 r7 // figure distance to skip
+ lsl r4,r7 // move the sentinel along (with 0's behind)
+ lsl r2,r7 // and the low 32 bits of numerator
+
+1: lslc r2,1 // 1 bit left shift of r1-r2
+ addc r1,r1
+ cmphs r1,r3 // upper 32 of dividend >= divisor?
+ bf 2f
+ sub r1,r3 // if yes, subtract divisor
+2: addc r4,r4 // shift by 1 and count subtracts
+ bf 1b // if sentinel falls out of quotient, stop
+ mov r2,r1 // return remainder
+ btsti r5,31 // after adjusting for sign
+ bf 3f
+ rsubi r2,0
+3: jmp r15
+FUNC_END rem32
+FUNC_END modsi3
+#endif
+
+
+/* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2}
+ will behave as __cmpdf2. So, we stub the implementations to
+ jump on to __cmpdf2 and __cmpsf2.
+
+ All of these shortcircuit the return path so that __cmp{sd}f2
+ will go directly back to the caller. */
+
+.macro COMPARE_DF_JUMP name
+ .import SYM (cmpdf2)
+FUNC_START \name
+ jmpi SYM (cmpdf2)
+FUNC_END \name
+.endm
+
+#ifdef L_eqdf2
+COMPARE_DF_JUMP eqdf2
+#endif /* L_eqdf2 */
+
+#ifdef L_nedf2
+COMPARE_DF_JUMP nedf2
+#endif /* L_nedf2 */
+
+#ifdef L_gtdf2
+COMPARE_DF_JUMP gtdf2
+#endif /* L_gtdf2 */
+
+#ifdef L_gedf2
+COMPARE_DF_JUMP gedf2
+#endif /* L_gedf2 */
+
+#ifdef L_ltdf2
+COMPARE_DF_JUMP ltdf2
+#endif /* L_ltdf2 */
+
+#ifdef L_ledf2
+COMPARE_DF_JUMP ledf2
+#endif /* L_ledf2 */
+
+/* SINGLE PRECISION FLOATING POINT STUBS */
+
+.macro COMPARE_SF_JUMP name
+ .import SYM (cmpsf2)
+FUNC_START \name
+ jmpi SYM (cmpsf2)
+FUNC_END \name
+.endm
+
+#ifdef L_eqsf2
+COMPARE_SF_JUMP eqsf2
+#endif /* L_eqsf2 */
+
+#ifdef L_nesf2
+COMPARE_SF_JUMP nesf2
+#endif /* L_nesf2 */
+
+#ifdef L_gtsf2
+COMPARE_SF_JUMP gtsf2
+#endif /* L_gtsf2 */
+
+#ifdef L_gesf2
+COMPARE_SF_JUMP __gesf2
+#endif /* L_gesf2 */
+
+#ifdef L_ltsf2
+COMPARE_SF_JUMP __ltsf2
+#endif /* L_ltsf2 */
+
+#ifdef L_lesf2
+COMPARE_SF_JUMP lesf2
+#endif /* L_lesf2 */
diff --git a/libgcc/config/mcore/t-mcore b/libgcc/config/mcore/t-mcore
new file mode 100644
index 00000000000..19c4c15cd0b
--- /dev/null
+++ b/libgcc/config/mcore/t-mcore
@@ -0,0 +1,2 @@
+LIB1ASMSRC = mcore/lib1funcs.S
+LIB1ASMFUNCS = _divsi3 _udivsi3 _modsi3 _umodsi3
diff --git a/libgcc/config/mep/lib1funcs.S b/libgcc/config/mep/lib1funcs.S
new file mode 100644
index 00000000000..0a18913f927
--- /dev/null
+++ b/libgcc/config/mep/lib1funcs.S
@@ -0,0 +1,125 @@
+/* libgcc routines for Toshiba Media Processor.
+ Copyright (C) 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#define SAVEALL \
+ add3 $sp, $sp, -16*4 ; \
+ sw $0, ($sp) ; \
+ sw $1, 4($sp) ; \
+ sw $2, 8($sp) ; \
+ sw $3, 12($sp) ; \
+ sw $4, 16($sp) ; \
+ sw $5, 20($sp) ; \
+ sw $6, 24($sp) ; \
+ sw $7, 28($sp) ; \
+ sw $8, 32($sp) ; \
+ sw $9, 36($sp) ; \
+ sw $10, 40($sp) ; \
+ sw $11, 44($sp) ; \
+ sw $12, 48($sp) ; \
+ sw $13, 52($sp) ; \
+ sw $14, 56($sp) ; \
+ ldc $5, $lp ; \
+ add $5, 3 ; \
+ mov $6, -4 ; \
+ and $5, $6
+
+#define RESTOREALL \
+ stc $5, $lp ; \
+ lw $14, 56($sp) ; \
+ lw $13, 52($sp) ; \
+ lw $12, 48($sp) ; \
+ lw $11, 44($sp) ; \
+ lw $10, 40($sp) ; \
+ lw $9, 36($sp) ; \
+ lw $8, 32($sp) ; \
+ lw $7, 28($sp) ; \
+ lw $6, 24($sp) ; \
+ lw $5, 20($sp) ; \
+ lw $4, 16($sp) ; \
+ lw $3, 12($sp) ; \
+ lw $2, 8($sp) ; \
+ lw $1, 4($sp) ; \
+ lw $0, ($sp) ; \
+ add3 $sp, $sp, 16*4 ; \
+ ret
+
+#ifdef L_mep_profile
+ .text
+ .global __mep_mcount
+__mep_mcount:
+ SAVEALL
+ ldc $1, $lp
+ mov $2, $0
+ bsr __mep_mcount_2
+ RESTOREALL
+#endif
+
+#ifdef L_mep_bb_init_trace
+ .text
+ .global __mep_bb_init_trace_func
+__mep_bb_init_trace_func:
+ SAVEALL
+ lw $1, ($5)
+ lw $2, 4($5)
+ add $5, 8
+ bsr __bb_init_trace_func
+ RESTOREALL
+#endif
+
+#ifdef L_mep_bb_init
+ .text
+ .global __mep_bb_init_func
+__mep_bb_init_func:
+ SAVEALL
+ lw $1, ($5)
+ add $5, 4
+ bsr __bb_init_func
+ RESTOREALL
+#endif
+
+#ifdef L_mep_bb_trace
+ .text
+ .global __mep_bb_trace_func
+__mep_bb_trace_func:
+ SAVEALL
+ movu $3, __bb
+ lw $1, ($5)
+ sw $1, ($3)
+ lw $2, 4($5)
+ sw $2, 4($3)
+ add $5, 8
+ bsr __bb_trace_func
+ RESTOREALL
+#endif
+
+#ifdef L_mep_bb_increment
+ .text
+ .global __mep_bb_increment_func
+__mep_bb_increment_func:
+ SAVEALL
+ lw $1, ($5)
+ lw $0, ($1)
+ add $0, 1
+ sw $0, ($1)
+ add $5, 4
+ RESTOREALL
+#endif
diff --git a/libgcc/config/mep/t-mep b/libgcc/config/mep/t-mep
index 36e6f5dc771..d1fb094a41e 100644
--- a/libgcc/config/mep/t-mep
+++ b/libgcc/config/mep/t-mep
@@ -1,2 +1,11 @@
+# profiling support
+LIB1ASMSRC = mep/lib1funcs.S
+
+LIB1ASMFUNCS = _mep_profile \
+ _mep_bb_init_trace \
+ _mep_bb_init \
+ _mep_bb_trace \
+ _mep_bb_increment
+
# Use -O0 instead of -O2 so we don't get complex relocations
CRTSTUFF_CFLAGS += -O0
diff --git a/libgcc/config/mips/mips16.S b/libgcc/config/mips/mips16.S
new file mode 100644
index 00000000000..ec331b5f65e
--- /dev/null
+++ b/libgcc/config/mips/mips16.S
@@ -0,0 +1,712 @@
+/* mips16 floating point support code
+ Copyright (C) 1996, 1997, 1998, 2008, 2009, 2010
+ Free Software Foundation, Inc.
+ Contributed by Cygnus Support
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* This file contains mips16 floating point support functions. These
+ functions are called by mips16 code to handle floating point when
+ -msoft-float is not used. They accept the arguments and return
+ values using the soft-float calling convention, but do the actual
+ operation using the hard floating point instructions. */
+
+#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
+
+/* This file contains 32-bit assembly code. */
+ .set nomips16
+
+/* Start a function. */
+
+#define STARTFN(NAME) .globl NAME; .ent NAME; NAME:
+
+/* Finish a function. */
+
+#define ENDFN(NAME) .end NAME
+
+/* ARG1
+ The FPR that holds the first floating-point argument.
+
+ ARG2
+ The FPR that holds the second floating-point argument.
+
+ RET
+ The FPR that holds a floating-point return value. */
+
+#define RET $f0
+#define ARG1 $f12
+#ifdef __mips64
+#define ARG2 $f13
+#else
+#define ARG2 $f14
+#endif
+
+/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR
+ and so that its low 32 bits contain LOW_FPR. */
+#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR) \
+ .set noat; \
+ mfc1 $1, LOW_FPR; \
+ mfc1 GPR, HIGH_FPR; \
+ dsll $1, $1, 32; \
+ dsll GPR, GPR, 32; \
+ dsrl $1, $1, 32; \
+ or GPR, GPR, $1; \
+ .set at
+
+/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of
+ GPR to LOW_FPR. */
+#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR) \
+ .set noat; \
+ dsrl $1, GPR, 32; \
+ mtc1 GPR, LOW_FPR; \
+ mtc1 $1, HIGH_FPR; \
+ .set at
+
+/* Jump to T, and use "OPCODE, OP2" to implement a delayed move. */
+#define DELAYt(T, OPCODE, OP2) \
+ .set noreorder; \
+ jr T; \
+ OPCODE, OP2; \
+ .set reorder
+
+/* Use "OPCODE. OP2" and jump to T. */
+#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T
+
+/* MOVE_SF_BYTE0(D)
+ Move the first single-precision floating-point argument between
+ GPRs and FPRs.
+
+ MOVE_SI_BYTE0(D)
+ Likewise the first single-precision integer argument.
+
+ MOVE_SF_BYTE4(D)
+ Move the second single-precision floating-point argument between
+ GPRs and FPRs, given that the first argument occupies 4 bytes.
+
+ MOVE_SF_BYTE8(D)
+ Move the second single-precision floating-point argument between
+ GPRs and FPRs, given that the first argument occupies 8 bytes.
+
+ MOVE_DF_BYTE0(D)
+ Move the first double-precision floating-point argument between
+ GPRs and FPRs.
+
+ MOVE_DF_BYTE8(D)
+ Likewise the second double-precision floating-point argument.
+
+ MOVE_SF_RET(D, T)
+ Likewise a single-precision floating-point return value,
+ then jump to T.
+
+ MOVE_SC_RET(D, T)
+ Likewise a complex single-precision floating-point return value.
+
+ MOVE_DF_RET(D, T)
+ Likewise a double-precision floating-point return value.
+
+ MOVE_DC_RET(D, T)
+ Likewise a complex double-precision floating-point return value.
+
+ MOVE_SI_RET(D, T)
+ Likewise a single-precision integer return value.
+
+ The D argument is "t" to move to FPRs and "f" to move from FPRs.
+ The return macros may assume that the target of the jump does not
+ use a floating-point register. */
+
+#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+
+#if defined(__mips64) && defined(__MIPSEB__)
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T
+#elif defined(__mips64)
+/* The high 32 bits of $2 correspond to the second word in memory;
+ i.e. the imaginary part. */
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T
+#elif __mips_fpr == 64
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#else
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2)
+#endif
+
+#if defined(__mips64)
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13
+#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13
+#else
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14
+#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14
+#endif
+#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D)
+
+#if defined(__mips64)
+#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12
+#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13
+#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0)
+#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64 && defined(__MIPSEB__)
+#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T)
+#elif defined(__MIPSEB__)
+/* FPRs are little-endian. */
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T)
+#else
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T)
+#endif
+
+/* Single-precision math. */
+
+/* Define a function NAME that loads two single-precision values,
+ performs FPU operation OPCODE on them, and returns the single-
+ precision result. */
+
+#define OPSF3(NAME, OPCODE) \
+STARTFN (NAME); \
+ MOVE_SF_BYTE0 (t); \
+ MOVE_SF_BYTE4 (t); \
+ OPCODE RET,ARG1,ARG2; \
+ MOVE_SF_RET (f, $31); \
+ ENDFN (NAME)
+
+#ifdef L_m16addsf3
+OPSF3 (__mips16_addsf3, add.s)
+#endif
+#ifdef L_m16subsf3
+OPSF3 (__mips16_subsf3, sub.s)
+#endif
+#ifdef L_m16mulsf3
+OPSF3 (__mips16_mulsf3, mul.s)
+#endif
+#ifdef L_m16divsf3
+OPSF3 (__mips16_divsf3, div.s)
+#endif
+
+/* Define a function NAME that loads a single-precision value,
+ performs FPU operation OPCODE on it, and returns the single-
+ precision result. */
+
+#define OPSF2(NAME, OPCODE) \
+STARTFN (NAME); \
+ MOVE_SF_BYTE0 (t); \
+ OPCODE RET,ARG1; \
+ MOVE_SF_RET (f, $31); \
+ ENDFN (NAME)
+
+#ifdef L_m16negsf2
+OPSF2 (__mips16_negsf2, neg.s)
+#endif
+#ifdef L_m16abssf2
+OPSF2 (__mips16_abssf2, abs.s)
+#endif
+
+/* Single-precision comparisons. */
+
+/* Define a function NAME that loads two single-precision values,
+ performs floating point comparison OPCODE, and returns TRUE or
+ FALSE depending on the result. */
+
+#define CMPSF(NAME, OPCODE, TRUE, FALSE) \
+STARTFN (NAME); \
+ MOVE_SF_BYTE0 (t); \
+ MOVE_SF_BYTE4 (t); \
+ OPCODE ARG1,ARG2; \
+ li $2,TRUE; \
+ bc1t 1f; \
+ li $2,FALSE; \
+1:; \
+ j $31; \
+ ENDFN (NAME)
+
+/* Like CMPSF, but reverse the comparison operands. */
+
+#define REVCMPSF(NAME, OPCODE, TRUE, FALSE) \
+STARTFN (NAME); \
+ MOVE_SF_BYTE0 (t); \
+ MOVE_SF_BYTE4 (t); \
+ OPCODE ARG2,ARG1; \
+ li $2,TRUE; \
+ bc1t 1f; \
+ li $2,FALSE; \
+1:; \
+ j $31; \
+ ENDFN (NAME)
+
+#ifdef L_m16eqsf2
+CMPSF (__mips16_eqsf2, c.eq.s, 0, 1)
+#endif
+#ifdef L_m16nesf2
+CMPSF (__mips16_nesf2, c.eq.s, 0, 1)
+#endif
+#ifdef L_m16gtsf2
+REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0)
+#endif
+#ifdef L_m16gesf2
+REVCMPSF (__mips16_gesf2, c.le.s, 0, -1)
+#endif
+#ifdef L_m16lesf2
+CMPSF (__mips16_lesf2, c.le.s, 0, 1)
+#endif
+#ifdef L_m16ltsf2
+CMPSF (__mips16_ltsf2, c.lt.s, -1, 0)
+#endif
+#ifdef L_m16unordsf2
+CMPSF(__mips16_unordsf2, c.un.s, 1, 0)
+#endif
+
+
+/* Single-precision conversions. */
+
+#ifdef L_m16fltsisf
+STARTFN (__mips16_floatsisf)
+ MOVE_SF_BYTE0 (t)
+ cvt.s.w RET,ARG1
+ MOVE_SF_RET (f, $31)
+ ENDFN (__mips16_floatsisf)
+#endif
+
+#ifdef L_m16fltunsisf
+STARTFN (__mips16_floatunsisf)
+ .set noreorder
+ bltz $4,1f
+ MOVE_SF_BYTE0 (t)
+ .set reorder
+ cvt.s.w RET,ARG1
+ MOVE_SF_RET (f, $31)
+1:
+ and $2,$4,1
+ srl $3,$4,1
+ or $2,$2,$3
+ mtc1 $2,RET
+ cvt.s.w RET,RET
+ add.s RET,RET,RET
+ MOVE_SF_RET (f, $31)
+ ENDFN (__mips16_floatunsisf)
+#endif
+
+#ifdef L_m16fix_truncsfsi
+STARTFN (__mips16_fix_truncsfsi)
+ MOVE_SF_BYTE0 (t)
+ trunc.w.s RET,ARG1,$4
+ MOVE_SI_RET (f, $31)
+ ENDFN (__mips16_fix_truncsfsi)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+
+/* Double-precision math. */
+
+/* Define a function NAME that loads two double-precision values,
+ performs FPU operation OPCODE on them, and returns the double-
+ precision result. */
+
+#define OPDF3(NAME, OPCODE) \
+STARTFN (NAME); \
+ MOVE_DF_BYTE0 (t); \
+ MOVE_DF_BYTE8 (t); \
+ OPCODE RET,ARG1,ARG2; \
+ MOVE_DF_RET (f, $31); \
+ ENDFN (NAME)
+
+#ifdef L_m16adddf3
+OPDF3 (__mips16_adddf3, add.d)
+#endif
+#ifdef L_m16subdf3
+OPDF3 (__mips16_subdf3, sub.d)
+#endif
+#ifdef L_m16muldf3
+OPDF3 (__mips16_muldf3, mul.d)
+#endif
+#ifdef L_m16divdf3
+OPDF3 (__mips16_divdf3, div.d)
+#endif
+
+/* Define a function NAME that loads a double-precision value,
+ performs FPU operation OPCODE on it, and returns the double-
+ precision result. */
+
+#define OPDF2(NAME, OPCODE) \
+STARTFN (NAME); \
+ MOVE_DF_BYTE0 (t); \
+ OPCODE RET,ARG1; \
+ MOVE_DF_RET (f, $31); \
+ ENDFN (NAME)
+
+#ifdef L_m16negdf2
+OPDF2 (__mips16_negdf2, neg.d)
+#endif
+#ifdef L_m16absdf2
+OPDF2 (__mips16_absdf2, abs.d)
+#endif
+
+/* Conversions between single and double precision. */
+
+#ifdef L_m16extsfdf2
+STARTFN (__mips16_extendsfdf2)
+ MOVE_SF_BYTE0 (t)
+ cvt.d.s RET,ARG1
+ MOVE_DF_RET (f, $31)
+ ENDFN (__mips16_extendsfdf2)
+#endif
+
+#ifdef L_m16trdfsf2
+STARTFN (__mips16_truncdfsf2)
+ MOVE_DF_BYTE0 (t)
+ cvt.s.d RET,ARG1
+ MOVE_SF_RET (f, $31)
+ ENDFN (__mips16_truncdfsf2)
+#endif
+
+/* Double-precision comparisons. */
+
+/* Define a function NAME that loads two double-precision values,
+ performs floating point comparison OPCODE, and returns TRUE or
+ FALSE depending on the result. */
+
+#define CMPDF(NAME, OPCODE, TRUE, FALSE) \
+STARTFN (NAME); \
+ MOVE_DF_BYTE0 (t); \
+ MOVE_DF_BYTE8 (t); \
+ OPCODE ARG1,ARG2; \
+ li $2,TRUE; \
+ bc1t 1f; \
+ li $2,FALSE; \
+1:; \
+ j $31; \
+ ENDFN (NAME)
+
+/* Like CMPDF, but reverse the comparison operands. */
+
+#define REVCMPDF(NAME, OPCODE, TRUE, FALSE) \
+STARTFN (NAME); \
+ MOVE_DF_BYTE0 (t); \
+ MOVE_DF_BYTE8 (t); \
+ OPCODE ARG2,ARG1; \
+ li $2,TRUE; \
+ bc1t 1f; \
+ li $2,FALSE; \
+1:; \
+ j $31; \
+ ENDFN (NAME)
+
+#ifdef L_m16eqdf2
+CMPDF (__mips16_eqdf2, c.eq.d, 0, 1)
+#endif
+#ifdef L_m16nedf2
+CMPDF (__mips16_nedf2, c.eq.d, 0, 1)
+#endif
+#ifdef L_m16gtdf2
+REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0)
+#endif
+#ifdef L_m16gedf2
+REVCMPDF (__mips16_gedf2, c.le.d, 0, -1)
+#endif
+#ifdef L_m16ledf2
+CMPDF (__mips16_ledf2, c.le.d, 0, 1)
+#endif
+#ifdef L_m16ltdf2
+CMPDF (__mips16_ltdf2, c.lt.d, -1, 0)
+#endif
+#ifdef L_m16unorddf2
+CMPDF(__mips16_unorddf2, c.un.d, 1, 0)
+#endif
+
+/* Double-precision conversions. */
+
+#ifdef L_m16fltsidf
+STARTFN (__mips16_floatsidf)
+ MOVE_SI_BYTE0 (t)
+ cvt.d.w RET,ARG1
+ MOVE_DF_RET (f, $31)
+ ENDFN (__mips16_floatsidf)
+#endif
+
+#ifdef L_m16fltunsidf
+STARTFN (__mips16_floatunsidf)
+ MOVE_SI_BYTE0 (t)
+ cvt.d.w RET,ARG1
+ bgez $4,1f
+ li.d ARG1, 4.294967296e+9
+ add.d RET, RET, ARG1
+1: MOVE_DF_RET (f, $31)
+ ENDFN (__mips16_floatunsidf)
+#endif
+
+#ifdef L_m16fix_truncdfsi
+STARTFN (__mips16_fix_truncdfsi)
+ MOVE_DF_BYTE0 (t)
+ trunc.w.d RET,ARG1,$4
+ MOVE_SI_RET (f, $31)
+ ENDFN (__mips16_fix_truncdfsi)
+#endif
+#endif /* !__mips_single_float */
+
+/* Define a function NAME that moves a return value of mode MODE from
+ FPRs to GPRs. */
+
+#define RET_FUNCTION(NAME, MODE) \
+STARTFN (NAME); \
+ MOVE_##MODE##_RET (t, $31); \
+ ENDFN (NAME)
+
+#ifdef L_m16retsf
+RET_FUNCTION (__mips16_ret_sf, SF)
+#endif
+
+#ifdef L_m16retsc
+RET_FUNCTION (__mips16_ret_sc, SC)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16retdf
+RET_FUNCTION (__mips16_ret_df, DF)
+#endif
+
+#ifdef L_m16retdc
+RET_FUNCTION (__mips16_ret_dc, DC)
+#endif
+#endif /* !__mips_single_float */
+
+/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument
+ code X. X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2
+ classify the first and second arguments as follows:
+
+ 1: a single-precision argument
+ 2: a double-precision argument
+ 0: no argument, or not one of the above. */
+
+#define STUB_ARGS_0 /* () */
+#define STUB_ARGS_1 MOVE_SF_BYTE0 (t) /* (sf) */
+#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t) /* (sf, sf) */
+#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t) /* (sf, df) */
+#define STUB_ARGS_2 MOVE_DF_BYTE0 (t) /* (df) */
+#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t) /* (df, sf) */
+#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t) /* (df, df) */
+
+/* These functions are used by 16-bit code when calling via a function
+ pointer. They must copy the floating point arguments from the GPRs
+ to FPRs and then call function $2. */
+
+#define CALL_STUB_NO_RET(NAME, CODE) \
+STARTFN (NAME); \
+ STUB_ARGS_##CODE; \
+ .set noreorder; \
+ jr $2; \
+ move $25,$2; \
+ .set reorder; \
+ ENDFN (NAME)
+
+#ifdef L_m16stub1
+CALL_STUB_NO_RET (__mips16_call_stub_1, 1)
+#endif
+
+#ifdef L_m16stub5
+CALL_STUB_NO_RET (__mips16_call_stub_5, 5)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+
+#ifdef L_m16stub2
+CALL_STUB_NO_RET (__mips16_call_stub_2, 2)
+#endif
+
+#ifdef L_m16stub6
+CALL_STUB_NO_RET (__mips16_call_stub_6, 6)
+#endif
+
+#ifdef L_m16stub9
+CALL_STUB_NO_RET (__mips16_call_stub_9, 9)
+#endif
+
+#ifdef L_m16stub10
+CALL_STUB_NO_RET (__mips16_call_stub_10, 10)
+#endif
+#endif /* !__mips_single_float */
+
+/* Now we have the same set of functions, except that this time the
+ function being called returns an SFmode, SCmode, DFmode or DCmode
+ value; we need to instantiate a set for each case. The calling
+ function will arrange to preserve $18, so these functions are free
+ to use it to hold the return address.
+
+ Note that we do not know whether the function we are calling is 16
+ bit or 32 bit. However, it does not matter, because 16-bit
+ functions always return floating point values in both the gp and
+ the fp regs. It would be possible to check whether the function
+ being called is 16 bits, in which case the copy is unnecessary;
+ however, it's faster to always do the copy. */
+
+#define CALL_STUB_RET(NAME, CODE, MODE) \
+STARTFN (NAME); \
+ move $18,$31; \
+ STUB_ARGS_##CODE; \
+ .set noreorder; \
+ jalr $2; \
+ move $25,$2; \
+ .set reorder; \
+ MOVE_##MODE##_RET (f, $18); \
+ ENDFN (NAME)
+
+/* First, instantiate the single-float set. */
+
+#ifdef L_m16stubsf0
+CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF)
+#endif
+
+#ifdef L_m16stubsf1
+CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF)
+#endif
+
+#ifdef L_m16stubsf5
+CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubsf2
+CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF)
+#endif
+
+#ifdef L_m16stubsf6
+CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF)
+#endif
+
+#ifdef L_m16stubsf9
+CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF)
+#endif
+
+#ifdef L_m16stubsf10
+CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Now we have the same set of functions again, except that this time
+ the function being called returns an DFmode value. */
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubdf0
+CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF)
+#endif
+
+#ifdef L_m16stubdf1
+CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF)
+#endif
+
+#ifdef L_m16stubdf5
+CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF)
+#endif
+
+#ifdef L_m16stubdf2
+CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF)
+#endif
+
+#ifdef L_m16stubdf6
+CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF)
+#endif
+
+#ifdef L_m16stubdf9
+CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF)
+#endif
+
+#ifdef L_m16stubdf10
+CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Ho hum. Here we have the same set of functions again, this time
+ for when the function being called returns an SCmode value. */
+
+#ifdef L_m16stubsc0
+CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC)
+#endif
+
+#ifdef L_m16stubsc1
+CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC)
+#endif
+
+#ifdef L_m16stubsc5
+CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubsc2
+CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC)
+#endif
+
+#ifdef L_m16stubsc6
+CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC)
+#endif
+
+#ifdef L_m16stubsc9
+CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC)
+#endif
+
+#ifdef L_m16stubsc10
+CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Finally, another set of functions for DCmode. */
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubdc0
+CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC)
+#endif
+
+#ifdef L_m16stubdc1
+CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC)
+#endif
+
+#ifdef L_m16stubdc5
+CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC)
+#endif
+
+#ifdef L_m16stubdc2
+CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC)
+#endif
+
+#ifdef L_m16stubdc6
+CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC)
+#endif
+
+#ifdef L_m16stubdc9
+CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC)
+#endif
+
+#ifdef L_m16stubdc10
+CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC)
+#endif
+#endif /* !__mips_single_float */
+#endif
diff --git a/libgcc/config/mips/t-mips16 b/libgcc/config/mips/t-mips16
index 46c7472f5f6..5553ed76e2d 100644
--- a/libgcc/config/mips/t-mips16
+++ b/libgcc/config/mips/t-mips16
@@ -1,3 +1,43 @@
+# Copyright (C) 2007, 2008, 2011 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = mips/mips16.S
+LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \
+ _m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \
+ _m16unordsf2 \
+ _m16fltsisf _m16fix_truncsfsi _m16fltunsisf \
+ _m16adddf3 _m16subdf3 _m16muldf3 _m16divdf3 \
+ _m16extsfdf2 _m16trdfsf2 \
+ _m16eqdf2 _m16nedf2 _m16gtdf2 _m16gedf2 _m16ledf2 _m16ltdf2 \
+ _m16unorddf2 \
+ _m16fltsidf _m16fix_truncdfsi _m16fltunsidf \
+ _m16retsf _m16retdf \
+ _m16retsc _m16retdc \
+ _m16stub1 _m16stub2 _m16stub5 _m16stub6 _m16stub9 _m16stub10 \
+ _m16stubsf0 _m16stubsf1 _m16stubsf2 _m16stubsf5 _m16stubsf6 \
+ _m16stubsf9 _m16stubsf10 \
+ _m16stubdf0 _m16stubdf1 _m16stubdf2 _m16stubdf5 _m16stubdf6 \
+ _m16stubdf9 _m16stubdf10 \
+ _m16stubsc0 _m16stubsc1 _m16stubsc2 _m16stubsc5 _m16stubsc6 \
+ _m16stubsc9 _m16stubsc10 \
+ _m16stubdc0 _m16stubdc1 _m16stubdc2 _m16stubdc5 _m16stubdc6 \
+ _m16stubdc9 _m16stubdc10
+
SYNC = yes
SYNC_CFLAGS = -mno-mips16
diff --git a/libgcc/config/pa/milli64.S b/libgcc/config/pa/milli64.S
new file mode 100644
index 00000000000..2e9c4f741b6
--- /dev/null
+++ b/libgcc/config/pa/milli64.S
@@ -0,0 +1,2134 @@
+/* 32 and 64-bit millicode, original author Hewlett-Packard
+ adapted for gcc by Paul Bame <bame@debian.org>
+ and Alan Modra <alan@linuxcare.com.au>.
+
+ Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifdef pa64
+ .level 2.0w
+#endif
+
+/* Hardware General Registers. */
+r0: .reg %r0
+r1: .reg %r1
+r2: .reg %r2
+r3: .reg %r3
+r4: .reg %r4
+r5: .reg %r5
+r6: .reg %r6
+r7: .reg %r7
+r8: .reg %r8
+r9: .reg %r9
+r10: .reg %r10
+r11: .reg %r11
+r12: .reg %r12
+r13: .reg %r13
+r14: .reg %r14
+r15: .reg %r15
+r16: .reg %r16
+r17: .reg %r17
+r18: .reg %r18
+r19: .reg %r19
+r20: .reg %r20
+r21: .reg %r21
+r22: .reg %r22
+r23: .reg %r23
+r24: .reg %r24
+r25: .reg %r25
+r26: .reg %r26
+r27: .reg %r27
+r28: .reg %r28
+r29: .reg %r29
+r30: .reg %r30
+r31: .reg %r31
+
+/* Hardware Space Registers. */
+sr0: .reg %sr0
+sr1: .reg %sr1
+sr2: .reg %sr2
+sr3: .reg %sr3
+sr4: .reg %sr4
+sr5: .reg %sr5
+sr6: .reg %sr6
+sr7: .reg %sr7
+
+/* Hardware Floating Point Registers. */
+fr0: .reg %fr0
+fr1: .reg %fr1
+fr2: .reg %fr2
+fr3: .reg %fr3
+fr4: .reg %fr4
+fr5: .reg %fr5
+fr6: .reg %fr6
+fr7: .reg %fr7
+fr8: .reg %fr8
+fr9: .reg %fr9
+fr10: .reg %fr10
+fr11: .reg %fr11
+fr12: .reg %fr12
+fr13: .reg %fr13
+fr14: .reg %fr14
+fr15: .reg %fr15
+
+/* Hardware Control Registers. */
+cr11: .reg %cr11
+sar: .reg %cr11 /* Shift Amount Register */
+
+/* Software Architecture General Registers. */
+rp: .reg r2 /* return pointer */
+#ifdef pa64
+mrp: .reg r2 /* millicode return pointer */
+#else
+mrp: .reg r31 /* millicode return pointer */
+#endif
+ret0: .reg r28 /* return value */
+ret1: .reg r29 /* return value (high part of double) */
+sp: .reg r30 /* stack pointer */
+dp: .reg r27 /* data pointer */
+arg0: .reg r26 /* argument */
+arg1: .reg r25 /* argument or high part of double argument */
+arg2: .reg r24 /* argument */
+arg3: .reg r23 /* argument or high part of double argument */
+
+/* Software Architecture Space Registers. */
+/* sr0 ; return link from BLE */
+sret: .reg sr1 /* return value */
+sarg: .reg sr1 /* argument */
+/* sr4 ; PC SPACE tracker */
+/* sr5 ; process private data */
+
+/* Frame Offsets (millicode convention!) Used when calling other
+ millicode routines. Stack unwinding is dependent upon these
+ definitions. */
+r31_slot: .equ -20 /* "current RP" slot */
+sr0_slot: .equ -16 /* "static link" slot */
+#if defined(pa64)
+mrp_slot: .equ -16 /* "current RP" slot */
+psp_slot: .equ -8 /* "previous SP" slot */
+#else
+mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
+#endif
+
+
+#define DEFINE(name,value)name: .EQU value
+#define RDEFINE(name,value)name: .REG value
+#ifdef milliext
+#define MILLI_BE(lbl) BE lbl(sr7,r0)
+#define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
+#define MILLI_BLE(lbl) BLE lbl(sr7,r0)
+#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
+#define MILLIRETN BE,n 0(sr0,mrp)
+#define MILLIRET BE 0(sr0,mrp)
+#define MILLI_RETN BE,n 0(sr0,mrp)
+#define MILLI_RET BE 0(sr0,mrp)
+#else
+#define MILLI_BE(lbl) B lbl
+#define MILLI_BEN(lbl) B,n lbl
+#define MILLI_BLE(lbl) BL lbl,mrp
+#define MILLI_BLEN(lbl) BL,n lbl,mrp
+#define MILLIRETN BV,n 0(mrp)
+#define MILLIRET BV 0(mrp)
+#define MILLI_RETN BV,n 0(mrp)
+#define MILLI_RET BV 0(mrp)
+#endif
+
+#ifdef __STDC__
+#define CAT(a,b) a##b
+#else
+#define CAT(a,b) a/**/b
+#endif
+
+#ifdef ELF
+#define SUBSPA_MILLI .section .text
+#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
+#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
+#define ATTR_MILLI
+#define SUBSPA_DATA .section .data
+#define ATTR_DATA
+#define GLOBAL $global$
+#define GSYM(sym) !sym:
+#define LSYM(sym) !CAT(.L,sym:)
+#define LREF(sym) CAT(.L,sym)
+
+#else
+
+#ifdef coff
+/* This used to be .milli but since link32 places different named
+ sections in different segments millicode ends up a long ways away
+ from .text (1meg?). This way they will be a lot closer.
+
+ The SUBSPA_MILLI_* specify locality sets for certain millicode
+ modules in order to ensure that modules that call one another are
+ placed close together. Without locality sets this is unlikely to
+ happen because of the Dynamite linker library search algorithm. We
+ want these modules close together so that short calls always reach
+ (we don't want to require long calls or use long call stubs). */
+
+#define SUBSPA_MILLI .subspa .text
+#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
+#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
+#define ATTR_MILLI .attr code,read,execute
+#define SUBSPA_DATA .subspa .data
+#define ATTR_DATA .attr init_data,read,write
+#define GLOBAL _gp
+#else
+#define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
+#define SUBSPA_MILLI_DIV SUBSPA_MILLI
+#define SUBSPA_MILLI_MUL SUBSPA_MILLI
+#define ATTR_MILLI
+#define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
+#define ATTR_DATA
+#define GLOBAL $global$
+#endif
+#define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16
+
+#define GSYM(sym) !sym
+#define LSYM(sym) !CAT(L$,sym)
+#define LREF(sym) CAT(L$,sym)
+#endif
+
+#ifdef L_dyncall
+ SUBSPA_MILLI
+ ATTR_DATA
+GSYM($$dyncall)
+ .export $$dyncall,millicode
+ .proc
+ .callinfo millicode
+ .entry
+ bb,>=,n %r22,30,LREF(1) ; branch if not plabel address
+ depi 0,31,2,%r22 ; clear the two least significant bits
+ ldw 4(%r22),%r19 ; load new LTP value
+ ldw 0(%r22),%r22 ; load address of target
+LSYM(1)
+#ifdef LINUX
+ bv %r0(%r22) ; branch to the real target
+#else
+ ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22
+ mtsp %r1,%sr0 ; move that space identifier into sr0
+ be 0(%sr0,%r22) ; branch to the real target
+#endif
+ stw %r2,-24(%r30) ; save return address into frame marker
+ .exit
+ .procend
+#endif
+
+#ifdef L_divI
+/* ROUTINES: $$divI, $$divoI
+
+ Single precision divide for signed binary integers.
+
+ The quotient is truncated towards zero.
+ The sign of the quotient is the XOR of the signs of the dividend and
+ divisor.
+ Divide by zero is trapped.
+ Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
+
+ INPUT REGISTERS:
+ . arg0 == dividend
+ . arg1 == divisor
+ . mrp == return pc
+ . sr0 == return space when called externally
+
+ OUTPUT REGISTERS:
+ . arg0 = undefined
+ . arg1 = undefined
+ . ret1 = quotient
+
+ OTHER REGISTERS AFFECTED:
+ . r1 = undefined
+
+ SIDE EFFECTS:
+ . Causes a trap under the following conditions:
+ . divisor is zero (traps with ADDIT,= 0,25,0)
+ . dividend==-2**31 and divisor==-1 and routine is $$divoI
+ . (traps with ADDO 26,25,0)
+ . Changes memory at the following places:
+ . NONE
+
+ PERMISSIBLE CONTEXT:
+ . Unwindable.
+ . Suitable for internal or external millicode.
+ . Assumes the special millicode register conventions.
+
+ DISCUSSION:
+ . Branchs to other millicode routines using BE
+ . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
+ .
+ . For selected divisors, calls a divide by constant routine written by
+ . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
+ .
+ . The only overflow case is -2**31 divided by -1.
+ . Both routines return -2**31 but only $$divoI traps. */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1) /* r29 */
+RDEFINE(temp1,arg0)
+ SUBSPA_MILLI_DIV
+ ATTR_MILLI
+ .import $$divI_2,millicode
+ .import $$divI_3,millicode
+ .import $$divI_4,millicode
+ .import $$divI_5,millicode
+ .import $$divI_6,millicode
+ .import $$divI_7,millicode
+ .import $$divI_8,millicode
+ .import $$divI_9,millicode
+ .import $$divI_10,millicode
+ .import $$divI_12,millicode
+ .import $$divI_14,millicode
+ .import $$divI_15,millicode
+ .export $$divI,millicode
+ .export $$divoI,millicode
+ .proc
+ .callinfo millicode
+ .entry
+GSYM($$divoI)
+ comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
+GSYM($$divI)
+ ldo -1(arg1),temp /* is there at most one bit set ? */
+ and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
+ addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
+ b,n LREF(neg_denom)
+LSYM(pow2)
+ addi,>= 0,arg0,retreg /* if numerator is negative, add the */
+ add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
+ extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
+ extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
+ or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
+ ldi 0xcc,temp1 /* setup 0xcc in temp1 */
+ extru,= arg1,23,8,temp /* test denominator with 0xff00 */
+ extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
+ or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
+ ldi 0xaa,temp /* setup 0xaa in temp */
+ extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
+ extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
+ and,= arg1,temp1,r0 /* test denominator with 0xcc */
+ extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
+ and,= arg1,temp,r0 /* test denominator with 0xaa */
+ extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
+ MILLIRETN
+LSYM(neg_denom)
+ addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
+ b,n LREF(regular_seq)
+ sub r0,arg1,temp /* make denominator positive */
+ comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
+ ldo -1(temp),retreg /* is there at most one bit set ? */
+ and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
+ b,n LREF(regular_seq)
+ sub r0,arg0,retreg /* negate numerator */
+ comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
+ copy retreg,arg0 /* set up arg0, arg1 and temp */
+ copy temp,arg1 /* before branching to pow2 */
+ b LREF(pow2)
+ ldo -1(arg1),temp
+LSYM(regular_seq)
+ comib,>>=,n 15,arg1,LREF(small_divisor)
+ add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
+LSYM(normal)
+ subi 0,retreg,retreg /* make it positive */
+ sub 0,arg1,temp /* clear carry, */
+ /* negate the divisor */
+ ds 0,temp,0 /* set V-bit to the comple- */
+ /* ment of the divisor sign */
+ add retreg,retreg,retreg /* shift msb bit into carry */
+ ds r0,arg1,temp /* 1st divide step, if no carry */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 2nd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 3rd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 4th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 5th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 6th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 7th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 8th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 9th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 10th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 11th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 12th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 13th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 14th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 15th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 16th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 17th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 18th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 19th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 20th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 21st divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 22nd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 23rd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 24th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 25th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 26th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 27th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 28th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 29th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 30th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 31st divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 32nd divide step, */
+ addc retreg,retreg,retreg /* shift last retreg bit into retreg */
+ xor,>= arg0,arg1,0 /* get correct sign of quotient */
+ sub 0,retreg,retreg /* based on operand signs */
+ MILLIRETN
+ nop
+
+LSYM(small_divisor)
+
+#if defined(pa64)
+/* Clear the upper 32 bits of the arg1 register. We are working with */
+/* small divisors (and 32-bit integers) We must not be mislead */
+/* by "1" bits left in the upper 32 bits. */
+ depd %r0,31,32,%r25
+#endif
+ blr,n arg1,r0
+ nop
+/* table for divisor == 0,1, ... ,15 */
+ addit,= 0,arg1,r0 /* trap if divisor == 0 */
+ nop
+ MILLIRET /* divisor == 1 */
+ copy arg0,retreg
+ MILLI_BEN($$divI_2) /* divisor == 2 */
+ nop
+ MILLI_BEN($$divI_3) /* divisor == 3 */
+ nop
+ MILLI_BEN($$divI_4) /* divisor == 4 */
+ nop
+ MILLI_BEN($$divI_5) /* divisor == 5 */
+ nop
+ MILLI_BEN($$divI_6) /* divisor == 6 */
+ nop
+ MILLI_BEN($$divI_7) /* divisor == 7 */
+ nop
+ MILLI_BEN($$divI_8) /* divisor == 8 */
+ nop
+ MILLI_BEN($$divI_9) /* divisor == 9 */
+ nop
+ MILLI_BEN($$divI_10) /* divisor == 10 */
+ nop
+ b LREF(normal) /* divisor == 11 */
+ add,>= 0,arg0,retreg
+ MILLI_BEN($$divI_12) /* divisor == 12 */
+ nop
+ b LREF(normal) /* divisor == 13 */
+ add,>= 0,arg0,retreg
+ MILLI_BEN($$divI_14) /* divisor == 14 */
+ nop
+ MILLI_BEN($$divI_15) /* divisor == 15 */
+ nop
+
+LSYM(negative1)
+ sub 0,arg0,retreg /* result is negation of dividend */
+ MILLIRET
+ addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
+ .exit
+ .procend
+ .end
+#endif
+
+#ifdef L_divU
+/* ROUTINE: $$divU
+ .
+ . Single precision divide for unsigned integers.
+ .
+ . Quotient is truncated towards zero.
+ . Traps on divide by zero.
+
+ INPUT REGISTERS:
+ . arg0 == dividend
+ . arg1 == divisor
+ . mrp == return pc
+ . sr0 == return space when called externally
+
+ OUTPUT REGISTERS:
+ . arg0 = undefined
+ . arg1 = undefined
+ . ret1 = quotient
+
+ OTHER REGISTERS AFFECTED:
+ . r1 = undefined
+
+ SIDE EFFECTS:
+ . Causes a trap under the following conditions:
+ . divisor is zero
+ . Changes memory at the following places:
+ . NONE
+
+ PERMISSIBLE CONTEXT:
+ . Unwindable.
+ . Does not create a stack frame.
+ . Suitable for internal or external millicode.
+ . Assumes the special millicode register conventions.
+
+ DISCUSSION:
+ . Branchs to other millicode routines using BE:
+ . $$divU_# for 3,5,6,7,9,10,12,14,15
+ .
+ . For selected small divisors calls the special divide by constant
+ . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1) /* r29 */
+RDEFINE(temp1,arg0)
+ SUBSPA_MILLI_DIV
+ ATTR_MILLI
+ .export $$divU,millicode
+ .import $$divU_3,millicode
+ .import $$divU_5,millicode
+ .import $$divU_6,millicode
+ .import $$divU_7,millicode
+ .import $$divU_9,millicode
+ .import $$divU_10,millicode
+ .import $$divU_12,millicode
+ .import $$divU_14,millicode
+ .import $$divU_15,millicode
+ .proc
+ .callinfo millicode
+ .entry
+GSYM($$divU)
+/* The subtract is not nullified since it does no harm and can be used
+ by the two cases that branch back to "normal". */
+ ldo -1(arg1),temp /* is there at most one bit set ? */
+ and,= arg1,temp,r0 /* if so, denominator is power of 2 */
+ b LREF(regular_seq)
+ addit,= 0,arg1,0 /* trap for zero dvr */
+ copy arg0,retreg
+ extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
+ extru retreg,15,16,retreg /* retreg = retreg >> 16 */
+ or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
+ ldi 0xcc,temp1 /* setup 0xcc in temp1 */
+ extru,= arg1,23,8,temp /* test denominator with 0xff00 */
+ extru retreg,23,24,retreg /* retreg = retreg >> 8 */
+ or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
+ ldi 0xaa,temp /* setup 0xaa in temp */
+ extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
+ extru retreg,27,28,retreg /* retreg = retreg >> 4 */
+ and,= arg1,temp1,r0 /* test denominator with 0xcc */
+ extru retreg,29,30,retreg /* retreg = retreg >> 2 */
+ and,= arg1,temp,r0 /* test denominator with 0xaa */
+ extru retreg,30,31,retreg /* retreg = retreg >> 1 */
+ MILLIRETN
+ nop
+LSYM(regular_seq)
+ comib,>= 15,arg1,LREF(special_divisor)
+ subi 0,arg1,temp /* clear carry, negate the divisor */
+ ds r0,temp,r0 /* set V-bit to 1 */
+LSYM(normal)
+ add arg0,arg0,retreg /* shift msb bit into carry */
+ ds r0,arg1,temp /* 1st divide step, if no carry */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 2nd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 3rd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 4th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 5th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 6th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 7th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 8th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 9th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 10th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 11th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 12th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 13th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 14th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 15th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 16th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 17th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 18th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 19th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 20th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 21st divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 22nd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 23rd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 24th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 25th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 26th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 27th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 28th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 29th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 30th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 31st divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds temp,arg1,temp /* 32nd divide step, */
+ MILLIRET
+ addc retreg,retreg,retreg /* shift last retreg bit into retreg */
+
+/* Handle the cases where divisor is a small constant or has high bit on. */
+LSYM(special_divisor)
+/* blr arg1,r0 */
+/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
+
+/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
+ generating such a blr, comib sequence. A problem in nullification. So I
+ rewrote this code. */
+
+#if defined(pa64)
+/* Clear the upper 32 bits of the arg1 register. We are working with
+ small divisors (and 32-bit unsigned integers) We must not be mislead
+ by "1" bits left in the upper 32 bits. */
+ depd %r0,31,32,%r25
+#endif
+ comib,> 0,arg1,LREF(big_divisor)
+ nop
+ blr arg1,r0
+ nop
+
+LSYM(zero_divisor) /* this label is here to provide external visibility */
+ addit,= 0,arg1,0 /* trap for zero dvr */
+ nop
+ MILLIRET /* divisor == 1 */
+ copy arg0,retreg
+ MILLIRET /* divisor == 2 */
+ extru arg0,30,31,retreg
+ MILLI_BEN($$divU_3) /* divisor == 3 */
+ nop
+ MILLIRET /* divisor == 4 */
+ extru arg0,29,30,retreg
+ MILLI_BEN($$divU_5) /* divisor == 5 */
+ nop
+ MILLI_BEN($$divU_6) /* divisor == 6 */
+ nop
+ MILLI_BEN($$divU_7) /* divisor == 7 */
+ nop
+ MILLIRET /* divisor == 8 */
+ extru arg0,28,29,retreg
+ MILLI_BEN($$divU_9) /* divisor == 9 */
+ nop
+ MILLI_BEN($$divU_10) /* divisor == 10 */
+ nop
+ b LREF(normal) /* divisor == 11 */
+ ds r0,temp,r0 /* set V-bit to 1 */
+ MILLI_BEN($$divU_12) /* divisor == 12 */
+ nop
+ b LREF(normal) /* divisor == 13 */
+ ds r0,temp,r0 /* set V-bit to 1 */
+ MILLI_BEN($$divU_14) /* divisor == 14 */
+ nop
+ MILLI_BEN($$divU_15) /* divisor == 15 */
+ nop
+
+/* Handle the case where the high bit is on in the divisor.
+ Compute: if( dividend>=divisor) quotient=1; else quotient=0;
+ Note: dividend>==divisor iff dividend-divisor does not borrow
+ and not borrow iff carry. */
+LSYM(big_divisor)
+ sub arg0,arg1,r0
+ MILLIRET
+ addc r0,r0,retreg
+ .exit
+ .procend
+ .end
+#endif
+
+#ifdef L_remI
+/* ROUTINE: $$remI
+
+ DESCRIPTION:
+ . $$remI returns the remainder of the division of two signed 32-bit
+ . integers. The sign of the remainder is the same as the sign of
+ . the dividend.
+
+
+ INPUT REGISTERS:
+ . arg0 == dividend
+ . arg1 == divisor
+ . mrp == return pc
+ . sr0 == return space when called externally
+
+ OUTPUT REGISTERS:
+ . arg0 = destroyed
+ . arg1 = destroyed
+ . ret1 = remainder
+
+ OTHER REGISTERS AFFECTED:
+ . r1 = undefined
+
+ SIDE EFFECTS:
+ . Causes a trap under the following conditions: DIVIDE BY ZERO
+ . Changes memory at the following places: NONE
+
+ PERMISSIBLE CONTEXT:
+ . Unwindable
+ . Does not create a stack frame
+ . Is usable for internal or external microcode
+
+ DISCUSSION:
+ . Calls other millicode routines via mrp: NONE
+ . Calls other millicode routines: NONE */
+
+RDEFINE(tmp,r1)
+RDEFINE(retreg,ret1)
+
+ SUBSPA_MILLI
+ ATTR_MILLI
+ .proc
+ .callinfo millicode
+ .entry
+GSYM($$remI)
+GSYM($$remoI)
+ .export $$remI,MILLICODE
+ .export $$remoI,MILLICODE
+ ldo -1(arg1),tmp /* is there at most one bit set ? */
+ and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
+ addi,> 0,arg1,r0 /* if denominator > 0, use power */
+ /* of 2 */
+ b,n LREF(neg_denom)
+LSYM(pow2)
+ comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
+ and arg0,tmp,retreg /* get the result */
+ MILLIRETN
+LSYM(neg_num)
+ subi 0,arg0,arg0 /* negate numerator */
+ and arg0,tmp,retreg /* get the result */
+ subi 0,retreg,retreg /* negate result */
+ MILLIRETN
+LSYM(neg_denom)
+ addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
+ /* of 2 */
+ b,n LREF(regular_seq)
+ sub r0,arg1,tmp /* make denominator positive */
+ comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
+ ldo -1(tmp),retreg /* is there at most one bit set ? */
+ and,= tmp,retreg,r0 /* if not, go to regular_seq */
+ b,n LREF(regular_seq)
+ comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
+ and arg0,retreg,retreg
+ MILLIRETN
+LSYM(neg_num_2)
+ subi 0,arg0,tmp /* test against 0x80000000 */
+ and tmp,retreg,retreg
+ subi 0,retreg,retreg
+ MILLIRETN
+LSYM(regular_seq)
+ addit,= 0,arg1,0 /* trap if div by zero */
+ add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
+ sub 0,retreg,retreg /* make it positive */
+ sub 0,arg1, tmp /* clear carry, */
+ /* negate the divisor */
+ ds 0, tmp,0 /* set V-bit to the comple- */
+ /* ment of the divisor sign */
+ or 0,0, tmp /* clear tmp */
+ add retreg,retreg,retreg /* shift msb bit into carry */
+ ds tmp,arg1, tmp /* 1st divide step, if no carry */
+ /* out, msb of quotient = 0 */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+LSYM(t1)
+ ds tmp,arg1, tmp /* 2nd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 3rd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 4th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 5th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 6th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 7th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 8th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 9th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 10th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 11th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 12th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 13th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 14th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 15th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 16th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 17th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 18th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 19th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 20th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 21st divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 22nd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 23rd divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 24th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 25th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 26th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 27th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 28th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 29th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 30th divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 31st divide step */
+ addc retreg,retreg,retreg /* shift retreg with/into carry */
+ ds tmp,arg1, tmp /* 32nd divide step, */
+ addc retreg,retreg,retreg /* shift last bit into retreg */
+ movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
+ add,< arg1,0,0 /* if arg1 > 0, add arg1 */
+ add,tr tmp,arg1,retreg /* for correcting remainder tmp */
+ sub tmp,arg1,retreg /* else add absolute value arg1 */
+LSYM(finish)
+ add,>= arg0,0,0 /* set sign of remainder */
+ sub 0,retreg,retreg /* to sign of dividend */
+ MILLIRET
+ nop
+ .exit
+ .procend
+#ifdef milliext
+ .origin 0x00000200
+#endif
+ .end
+#endif
+
+#ifdef L_remU
+/* ROUTINE: $$remU
+ . Single precision divide for remainder with unsigned binary integers.
+ .
+ . The remainder must be dividend-(dividend/divisor)*divisor.
+ . Divide by zero is trapped.
+
+ INPUT REGISTERS:
+ . arg0 == dividend
+ . arg1 == divisor
+ . mrp == return pc
+ . sr0 == return space when called externally
+
+ OUTPUT REGISTERS:
+ . arg0 = undefined
+ . arg1 = undefined
+ . ret1 = remainder
+
+ OTHER REGISTERS AFFECTED:
+ . r1 = undefined
+
+ SIDE EFFECTS:
+ . Causes a trap under the following conditions: DIVIDE BY ZERO
+ . Changes memory at the following places: NONE
+
+ PERMISSIBLE CONTEXT:
+ . Unwindable.
+ . Does not create a stack frame.
+ . Suitable for internal or external millicode.
+ . Assumes the special millicode register conventions.
+
+ DISCUSSION:
+ . Calls other millicode routines using mrp: NONE
+ . Calls other millicode routines: NONE */
+
+
+RDEFINE(temp,r1)
+RDEFINE(rmndr,ret1) /* r29 */
+ SUBSPA_MILLI
+ ATTR_MILLI
+ .export $$remU,millicode
+ .proc
+ .callinfo millicode
+ .entry
+GSYM($$remU)
+ ldo -1(arg1),temp /* is there at most one bit set ? */
+ and,= arg1,temp,r0 /* if not, don't use power of 2 */
+ b LREF(regular_seq)
+ addit,= 0,arg1,r0 /* trap on div by zero */
+ and arg0,temp,rmndr /* get the result for power of 2 */
+ MILLIRETN
+LSYM(regular_seq)
+ comib,>=,n 0,arg1,LREF(special_case)
+ subi 0,arg1,rmndr /* clear carry, negate the divisor */
+ ds r0,rmndr,r0 /* set V-bit to 1 */
+ add arg0,arg0,temp /* shift msb bit into carry */
+ ds r0,arg1,rmndr /* 1st divide step, if no carry */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 2nd divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 3rd divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 4th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 5th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 6th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 7th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 8th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 9th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 10th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 11th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 12th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 13th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 14th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 15th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 16th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 17th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 18th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 19th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 20th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 21st divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 22nd divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 23rd divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 24th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 25th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 26th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 27th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 28th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 29th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 30th divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 31st divide step */
+ addc temp,temp,temp /* shift temp with/into carry */
+ ds rmndr,arg1,rmndr /* 32nd divide step, */
+ comiclr,<= 0,rmndr,r0
+ add rmndr,arg1,rmndr /* correction */
+ MILLIRETN
+ nop
+
+/* Putting >= on the last DS and deleting COMICLR does not work! */
+LSYM(special_case)
+ sub,>>= arg0,arg1,rmndr
+ copy arg0,rmndr
+ MILLIRETN
+ nop
+ .exit
+ .procend
+ .end
+#endif
+
+#ifdef L_div_const
+/* ROUTINE: $$divI_2
+ . $$divI_3 $$divU_3
+ . $$divI_4
+ . $$divI_5 $$divU_5
+ . $$divI_6 $$divU_6
+ . $$divI_7 $$divU_7
+ . $$divI_8
+ . $$divI_9 $$divU_9
+ . $$divI_10 $$divU_10
+ .
+ . $$divI_12 $$divU_12
+ .
+ . $$divI_14 $$divU_14
+ . $$divI_15 $$divU_15
+ . $$divI_16
+ . $$divI_17 $$divU_17
+ .
+ . Divide by selected constants for single precision binary integers.
+
+ INPUT REGISTERS:
+ . arg0 == dividend
+ . mrp == return pc
+ . sr0 == return space when called externally
+
+ OUTPUT REGISTERS:
+ . arg0 = undefined
+ . arg1 = undefined
+ . ret1 = quotient
+
+ OTHER REGISTERS AFFECTED:
+ . r1 = undefined
+
+ SIDE EFFECTS:
+ . Causes a trap under the following conditions: NONE
+ . Changes memory at the following places: NONE
+
+ PERMISSIBLE CONTEXT:
+ . Unwindable.
+ . Does not create a stack frame.
+ . Suitable for internal or external millicode.
+ . Assumes the special millicode register conventions.
+
+ DISCUSSION:
+ . Calls other millicode routines using mrp: NONE
+ . Calls other millicode routines: NONE */
+
+
+/* TRUNCATED DIVISION BY SMALL INTEGERS
+
+ We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
+ (with y fixed).
+
+ Let a = floor(z/y), for some choice of z. Note that z will be
+ chosen so that division by z is cheap.
+
+ Let r be the remainder(z/y). In other words, r = z - ay.
+
+ Now, our method is to choose a value for b such that
+
+ q'(x) = floor((ax+b)/z)
+
+ is equal to q(x) over as large a range of x as possible. If the
+ two are equal over a sufficiently large range, and if it is easy to
+ form the product (ax), and it is easy to divide by z, then we can
+ perform the division much faster than the general division algorithm.
+
+ So, we want the following to be true:
+
+ . For x in the following range:
+ .
+ . ky <= x < (k+1)y
+ .
+ . implies that
+ .
+ . k <= (ax+b)/z < (k+1)
+
+ We want to determine b such that this is true for all k in the
+ range {0..K} for some maximum K.
+
+ Since (ax+b) is an increasing function of x, we can take each
+ bound separately to determine the "best" value for b.
+
+ (ax+b)/z < (k+1) implies
+
+ (a((k+1)y-1)+b < (k+1)z implies
+
+ b < a + (k+1)(z-ay) implies
+
+ b < a + (k+1)r
+
+ This needs to be true for all k in the range {0..K}. In
+ particular, it is true for k = 0 and this leads to a maximum
+ acceptable value for b.
+
+ b < a+r or b <= a+r-1
+
+ Taking the other bound, we have
+
+ k <= (ax+b)/z implies
+
+ k <= (aky+b)/z implies
+
+ k(z-ay) <= b implies
+
+ kr <= b
+
+ Clearly, the largest range for k will be achieved by maximizing b,
+ when r is not zero. When r is zero, then the simplest choice for b
+ is 0. When r is not 0, set
+
+ . b = a+r-1
+
+ Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
+ for all x in the range:
+
+ . 0 <= x < (K+1)y
+
+ We need to determine what K is. Of our two bounds,
+
+ . b < a+(k+1)r is satisfied for all k >= 0, by construction.
+
+ The other bound is
+
+ . kr <= b
+
+ This is always true if r = 0. If r is not 0 (the usual case), then
+ K = floor((a+r-1)/r), is the maximum value for k.
+
+ Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
+ answer for q(x) = floor(x/y) when x is in the range
+
+ (0,(K+1)y-1) K = floor((a+r-1)/r)
+
+ To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
+ the formula for q'(x) yields the correct value of q(x) for all x
+ representable by a single word in HPPA.
+
+ We are also constrained in that computing the product (ax), adding
+ b, and dividing by z must all be done quickly, otherwise we will be
+ better off going through the general algorithm using the DS
+ instruction, which uses approximately 70 cycles.
+
+ For each y, there is a choice of z which satisfies the constraints
+ for (K+1)y >= 2**32. We may not, however, be able to satisfy the
+ timing constraints for arbitrary y. It seems that z being equal to
+ a power of 2 or a power of 2 minus 1 is as good as we can do, since
+ it minimizes the time to do division by z. We want the choice of z
+ to also result in a value for (a) that minimizes the computation of
+ the product (ax). This is best achieved if (a) has a regular bit
+ pattern (so the multiplication can be done with shifts and adds).
+ The value of (a) also needs to be less than 2**32 so the product is
+ always guaranteed to fit in 2 words.
+
+ In actual practice, the following should be done:
+
+ 1) For negative x, you should take the absolute value and remember
+ . the fact so that the result can be negated. This obviously does
+ . not apply in the unsigned case.
+ 2) For even y, you should factor out the power of 2 that divides y
+ . and divide x by it. You can then proceed by dividing by the
+ . odd factor of y.
+
+ Here is a table of some odd values of y, and corresponding choices
+ for z which are "good".
+
+ y z r a (hex) max x (hex)
+
+ 3 2**32 1 55555555 100000001
+ 5 2**32 1 33333333 100000003
+ 7 2**24-1 0 249249 (infinite)
+ 9 2**24-1 0 1c71c7 (infinite)
+ 11 2**20-1 0 1745d (infinite)
+ 13 2**24-1 0 13b13b (infinite)
+ 15 2**32 1 11111111 10000000d
+ 17 2**32 1 f0f0f0f 10000000f
+
+ If r is 1, then b = a+r-1 = a. This simplifies the computation
+ of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
+ then b = 0 is ok to use which simplifies (ax+b).
+
+ The bit patterns for 55555555, 33333333, and 11111111 are obviously
+ very regular. The bit patterns for the other values of a above are:
+
+ y (hex) (binary)
+
+ 7 249249 001001001001001001001001 << regular >>
+ 9 1c71c7 000111000111000111000111 << regular >>
+ 11 1745d 000000010111010001011101 << irregular >>
+ 13 13b13b 000100111011000100111011 << irregular >>
+
+ The bit patterns for (a) corresponding to (y) of 11 and 13 may be
+ too irregular to warrant using this method.
+
+ When z is a power of 2 minus 1, then the division by z is slightly
+ more complicated, involving an iterative solution.
+
+ The code presented here solves division by 1 through 17, except for
+ 11 and 13. There are algorithms for both signed and unsigned
+ quantities given.
+
+ TIMINGS (cycles)
+
+ divisor positive negative unsigned
+
+ . 1 2 2 2
+ . 2 4 4 2
+ . 3 19 21 19
+ . 4 4 4 2
+ . 5 18 22 19
+ . 6 19 22 19
+ . 8 4 4 2
+ . 10 18 19 17
+ . 12 18 20 18
+ . 15 16 18 16
+ . 16 4 4 2
+ . 17 16 18 16
+
+ Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
+ a loop body is executed until the tentative quotient is 0. The
+ number of times the loop body is executed varies depending on the
+ dividend, but is never more than two times. If the dividend is
+ less than the divisor, then the loop body is not executed at all.
+ Each iteration adds 4 cycles to the timings.
+
+ divisor positive negative unsigned
+
+ . 7 19+4n 20+4n 20+4n n = number of iterations
+ . 9 21+4n 22+4n 21+4n
+ . 14 21+4n 22+4n 20+4n
+
+ To give an idea of how the number of iterations varies, here is a
+ table of dividend versus number of iterations when dividing by 7.
+
+ smallest largest required
+ dividend dividend iterations
+
+ . 0 6 0
+ . 7 0x6ffffff 1
+ 0x1000006 0xffffffff 2
+
+ There is some overlap in the range of numbers requiring 1 and 2
+ iterations. */
+
+RDEFINE(t2,r1)
+RDEFINE(x2,arg0) /* r26 */
+RDEFINE(t1,arg1) /* r25 */
+RDEFINE(x1,ret1) /* r29 */
+
+ SUBSPA_MILLI_DIV
+ ATTR_MILLI
+
+ .proc
+ .callinfo millicode
+ .entry
+/* NONE of these routines require a stack frame
+ ALL of these routines are unwindable from millicode */
+
+GSYM($$divide_by_constant)
+ .export $$divide_by_constant,millicode
+/* Provides a "nice" label for the code covered by the unwind descriptor
+ for things like gprof. */
+
+/* DIVISION BY 2 (shift by 1) */
+GSYM($$divI_2)
+ .export $$divI_2,millicode
+ comclr,>= arg0,0,0
+ addi 1,arg0,arg0
+ MILLIRET
+ extrs arg0,30,31,ret1
+
+
+/* DIVISION BY 4 (shift by 2) */
+GSYM($$divI_4)
+ .export $$divI_4,millicode
+ comclr,>= arg0,0,0
+ addi 3,arg0,arg0
+ MILLIRET
+ extrs arg0,29,30,ret1
+
+
+/* DIVISION BY 8 (shift by 3) */
+GSYM($$divI_8)
+ .export $$divI_8,millicode
+ comclr,>= arg0,0,0
+ addi 7,arg0,arg0
+ MILLIRET
+ extrs arg0,28,29,ret1
+
+/* DIVISION BY 16 (shift by 4) */
+GSYM($$divI_16)
+ .export $$divI_16,millicode
+ comclr,>= arg0,0,0
+ addi 15,arg0,arg0
+ MILLIRET
+ extrs arg0,27,28,ret1
+
+/****************************************************************************
+*
+* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
+*
+* includes 3,5,15,17 and also 6,10,12
+*
+****************************************************************************/
+
+/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
+
+GSYM($$divI_3)
+ .export $$divI_3,millicode
+ comb,<,N x2,0,LREF(neg3)
+
+ addi 1,x2,x2 /* this cannot overflow */
+ extru x2,1,2,x1 /* multiply by 5 to get started */
+ sh2add x2,x2,x2
+ b LREF(pos)
+ addc x1,0,x1
+
+LSYM(neg3)
+ subi 1,x2,x2 /* this cannot overflow */
+ extru x2,1,2,x1 /* multiply by 5 to get started */
+ sh2add x2,x2,x2
+ b LREF(neg)
+ addc x1,0,x1
+
+GSYM($$divU_3)
+ .export $$divU_3,millicode
+ addi 1,x2,x2 /* this CAN overflow */
+ addc 0,0,x1
+ shd x1,x2,30,t1 /* multiply by 5 to get started */
+ sh2add x2,x2,x2
+ b LREF(pos)
+ addc x1,t1,x1
+
+/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
+
+GSYM($$divI_5)
+ .export $$divI_5,millicode
+ comb,<,N x2,0,LREF(neg5)
+
+ addi 3,x2,t1 /* this cannot overflow */
+ sh1add x2,t1,x2 /* multiply by 3 to get started */
+ b LREF(pos)
+ addc 0,0,x1
+
+LSYM(neg5)
+ sub 0,x2,x2 /* negate x2 */
+ addi 1,x2,x2 /* this cannot overflow */
+ shd 0,x2,31,x1 /* get top bit (can be 1) */
+ sh1add x2,x2,x2 /* multiply by 3 to get started */
+ b LREF(neg)
+ addc x1,0,x1
+
+GSYM($$divU_5)
+ .export $$divU_5,millicode
+ addi 1,x2,x2 /* this CAN overflow */
+ addc 0,0,x1
+ shd x1,x2,31,t1 /* multiply by 3 to get started */
+ sh1add x2,x2,x2
+ b LREF(pos)
+ addc t1,x1,x1
+
+/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
+GSYM($$divI_6)
+ .export $$divI_6,millicode
+ comb,<,N x2,0,LREF(neg6)
+ extru x2,30,31,x2 /* divide by 2 */
+ addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
+ sh2add x2,t1,x2 /* multiply by 5 to get started */
+ b LREF(pos)
+ addc 0,0,x1
+
+LSYM(neg6)
+ subi 2,x2,x2 /* negate, divide by 2, and add 1 */
+ /* negation and adding 1 are done */
+ /* at the same time by the SUBI */
+ extru x2,30,31,x2
+ shd 0,x2,30,x1
+ sh2add x2,x2,x2 /* multiply by 5 to get started */
+ b LREF(neg)
+ addc x1,0,x1
+
+GSYM($$divU_6)
+ .export $$divU_6,millicode
+ extru x2,30,31,x2 /* divide by 2 */
+ addi 1,x2,x2 /* cannot carry */
+ shd 0,x2,30,x1 /* multiply by 5 to get started */
+ sh2add x2,x2,x2
+ b LREF(pos)
+ addc x1,0,x1
+
+/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
+GSYM($$divU_10)
+ .export $$divU_10,millicode
+ extru x2,30,31,x2 /* divide by 2 */
+ addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
+ sh1add x2,t1,x2 /* multiply by 3 to get started */
+ addc 0,0,x1
+LSYM(pos)
+ shd x1,x2,28,t1 /* multiply by 0x11 */
+ shd x2,0,28,t2
+ add x2,t2,x2
+ addc x1,t1,x1
+LSYM(pos_for_17)
+ shd x1,x2,24,t1 /* multiply by 0x101 */
+ shd x2,0,24,t2
+ add x2,t2,x2
+ addc x1,t1,x1
+
+ shd x1,x2,16,t1 /* multiply by 0x10001 */
+ shd x2,0,16,t2
+ add x2,t2,x2
+ MILLIRET
+ addc x1,t1,x1
+
+GSYM($$divI_10)
+ .export $$divI_10,millicode
+ comb,< x2,0,LREF(neg10)
+ copy 0,x1
+ extru x2,30,31,x2 /* divide by 2 */
+ addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */
+ sh1add x2,x2,x2 /* multiply by 3 to get started */
+
+LSYM(neg10)
+ subi 2,x2,x2 /* negate, divide by 2, and add 1 */
+ /* negation and adding 1 are done */
+ /* at the same time by the SUBI */
+ extru x2,30,31,x2
+ sh1add x2,x2,x2 /* multiply by 3 to get started */
+LSYM(neg)
+ shd x1,x2,28,t1 /* multiply by 0x11 */
+ shd x2,0,28,t2
+ add x2,t2,x2
+ addc x1,t1,x1
+LSYM(neg_for_17)
+ shd x1,x2,24,t1 /* multiply by 0x101 */
+ shd x2,0,24,t2
+ add x2,t2,x2
+ addc x1,t1,x1
+
+ shd x1,x2,16,t1 /* multiply by 0x10001 */
+ shd x2,0,16,t2
+ add x2,t2,x2
+ addc x1,t1,x1
+ MILLIRET
+ sub 0,x1,x1
+
+/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
+GSYM($$divI_12)
+ .export $$divI_12,millicode
+ comb,< x2,0,LREF(neg12)
+ copy 0,x1
+ extru x2,29,30,x2 /* divide by 4 */
+ addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
+ sh2add x2,x2,x2 /* multiply by 5 to get started */
+
+LSYM(neg12)
+ subi 4,x2,x2 /* negate, divide by 4, and add 1 */
+ /* negation and adding 1 are done */
+ /* at the same time by the SUBI */
+ extru x2,29,30,x2
+ b LREF(neg)
+ sh2add x2,x2,x2 /* multiply by 5 to get started */
+
+GSYM($$divU_12)
+ .export $$divU_12,millicode
+ extru x2,29,30,x2 /* divide by 4 */
+ addi 5,x2,t1 /* cannot carry */
+ sh2add x2,t1,x2 /* multiply by 5 to get started */
+ b LREF(pos)
+ addc 0,0,x1
+
+/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
+GSYM($$divI_15)
+ .export $$divI_15,millicode
+ comb,< x2,0,LREF(neg15)
+ copy 0,x1
+ addib,tr 1,x2,LREF(pos)+4
+ shd x1,x2,28,t1
+
+LSYM(neg15)
+ b LREF(neg)
+ subi 1,x2,x2
+
+GSYM($$divU_15)
+ .export $$divU_15,millicode
+ addi 1,x2,x2 /* this CAN overflow */
+ b LREF(pos)
+ addc 0,0,x1
+
+/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
+GSYM($$divI_17)
+ .export $$divI_17,millicode
+ comb,<,n x2,0,LREF(neg17)
+ addi 1,x2,x2 /* this cannot overflow */
+ shd 0,x2,28,t1 /* multiply by 0xf to get started */
+ shd x2,0,28,t2
+ sub t2,x2,x2
+ b LREF(pos_for_17)
+ subb t1,0,x1
+
+LSYM(neg17)
+ subi 1,x2,x2 /* this cannot overflow */
+ shd 0,x2,28,t1 /* multiply by 0xf to get started */
+ shd x2,0,28,t2
+ sub t2,x2,x2
+ b LREF(neg_for_17)
+ subb t1,0,x1
+
+GSYM($$divU_17)
+ .export $$divU_17,millicode
+ addi 1,x2,x2 /* this CAN overflow */
+ addc 0,0,x1
+ shd x1,x2,28,t1 /* multiply by 0xf to get started */
+LSYM(u17)
+ shd x2,0,28,t2
+ sub t2,x2,x2
+ b LREF(pos_for_17)
+ subb t1,x1,x1
+
+
+/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
+ includes 7,9 and also 14
+
+
+ z = 2**24-1
+ r = z mod x = 0
+
+ so choose b = 0
+
+ Also, in order to divide by z = 2**24-1, we approximate by dividing
+ by (z+1) = 2**24 (which is easy), and then correcting.
+
+ (ax) = (z+1)q' + r
+ . = zq' + (q'+r)
+
+ So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
+ Then the true remainder of (ax)/z is (q'+r). Repeat the process
+ with this new remainder, adding the tentative quotients together,
+ until a tentative quotient is 0 (and then we are done). There is
+ one last correction to be done. It is possible that (q'+r) = z.
+ If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
+ in fact, we need to add 1 more to the quotient. Now, it turns
+ out that this happens if and only if the original value x is
+ an exact multiple of y. So, to avoid a three instruction test at
+ the end, instead use 1 instruction to add 1 to x at the beginning. */
+
+/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
+GSYM($$divI_7)
+ .export $$divI_7,millicode
+ comb,<,n x2,0,LREF(neg7)
+LSYM(7)
+ addi 1,x2,x2 /* cannot overflow */
+ shd 0,x2,29,x1
+ sh3add x2,x2,x2
+ addc x1,0,x1
+LSYM(pos7)
+ shd x1,x2,26,t1
+ shd x2,0,26,t2
+ add x2,t2,x2
+ addc x1,t1,x1
+
+ shd x1,x2,20,t1
+ shd x2,0,20,t2
+ add x2,t2,x2
+ addc x1,t1,t1
+
+ /* computed <t1,x2>. Now divide it by (2**24 - 1) */
+
+ copy 0,x1
+ shd,= t1,x2,24,t1 /* tentative quotient */
+LSYM(1)
+ addb,tr t1,x1,LREF(2) /* add to previous quotient */
+ extru x2,31,24,x2 /* new remainder (unadjusted) */
+
+ MILLIRETN
+
+LSYM(2)
+ addb,tr t1,x2,LREF(1) /* adjust remainder */
+ extru,= x2,7,8,t1 /* new quotient */
+
+LSYM(neg7)
+ subi 1,x2,x2 /* negate x2 and add 1 */
+LSYM(8)
+ shd 0,x2,29,x1
+ sh3add x2,x2,x2
+ addc x1,0,x1
+
+LSYM(neg7_shift)
+ shd x1,x2,26,t1
+ shd x2,0,26,t2
+ add x2,t2,x2
+ addc x1,t1,x1
+
+ shd x1,x2,20,t1
+ shd x2,0,20,t2
+ add x2,t2,x2
+ addc x1,t1,t1
+
+ /* computed <t1,x2>. Now divide it by (2**24 - 1) */
+
+ copy 0,x1
+ shd,= t1,x2,24,t1 /* tentative quotient */
+LSYM(3)
+ addb,tr t1,x1,LREF(4) /* add to previous quotient */
+ extru x2,31,24,x2 /* new remainder (unadjusted) */
+
+ MILLIRET
+ sub 0,x1,x1 /* negate result */
+
+LSYM(4)
+ addb,tr t1,x2,LREF(3) /* adjust remainder */
+ extru,= x2,7,8,t1 /* new quotient */
+
+GSYM($$divU_7)
+ .export $$divU_7,millicode
+ addi 1,x2,x2 /* can carry */
+ addc 0,0,x1
+ shd x1,x2,29,t1
+ sh3add x2,x2,x2
+ b LREF(pos7)
+ addc t1,x1,x1
+
+/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
+GSYM($$divI_9)
+ .export $$divI_9,millicode
+ comb,<,n x2,0,LREF(neg9)
+ addi 1,x2,x2 /* cannot overflow */
+ shd 0,x2,29,t1
+ shd x2,0,29,t2
+ sub t2,x2,x2
+ b LREF(pos7)
+ subb t1,0,x1
+
+LSYM(neg9)
+ subi 1,x2,x2 /* negate and add 1 */
+ shd 0,x2,29,t1
+ shd x2,0,29,t2
+ sub t2,x2,x2
+ b LREF(neg7_shift)
+ subb t1,0,x1
+
+GSYM($$divU_9)
+ .export $$divU_9,millicode
+ addi 1,x2,x2 /* can carry */
+ addc 0,0,x1
+ shd x1,x2,29,t1
+ shd x2,0,29,t2
+ sub t2,x2,x2
+ b LREF(pos7)
+ subb t1,x1,x1
+
+/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
+GSYM($$divI_14)
+ .export $$divI_14,millicode
+ comb,<,n x2,0,LREF(neg14)
+GSYM($$divU_14)
+ .export $$divU_14,millicode
+ b LREF(7) /* go to 7 case */
+ extru x2,30,31,x2 /* divide by 2 */
+
+LSYM(neg14)
+ subi 2,x2,x2 /* negate (and add 2) */
+ b LREF(8)
+ extru x2,30,31,x2 /* divide by 2 */
+ .exit
+ .procend
+ .end
+#endif
+
+#ifdef L_mulI
+/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
+/******************************************************************************
+This routine is used on PA2.0 processors when gcc -mno-fpregs is used
+
+ROUTINE: $$mulI
+
+
+DESCRIPTION:
+
+ $$mulI multiplies two single word integers, giving a single
+ word result.
+
+
+INPUT REGISTERS:
+
+ arg0 = Operand 1
+ arg1 = Operand 2
+ r31 == return pc
+ sr0 == return space when called externally
+
+
+OUTPUT REGISTERS:
+
+ arg0 = undefined
+ arg1 = undefined
+ ret1 = result
+
+OTHER REGISTERS AFFECTED:
+
+ r1 = undefined
+
+SIDE EFFECTS:
+
+ Causes a trap under the following conditions: NONE
+ Changes memory at the following places: NONE
+
+PERMISSIBLE CONTEXT:
+
+ Unwindable
+ Does not create a stack frame
+ Is usable for internal or external microcode
+
+DISCUSSION:
+
+ Calls other millicode routines via mrp: NONE
+ Calls other millicode routines: NONE
+
+***************************************************************************/
+
+
+#define a0 %arg0
+#define a1 %arg1
+#define t0 %r1
+#define r %ret1
+
+#define a0__128a0 zdep a0,24,25,a0
+#define a0__256a0 zdep a0,23,24,a0
+#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
+#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
+#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
+#define b_n_ret_t0 b,n LREF(ret_t0)
+#define b_e_shift b LREF(e_shift)
+#define b_e_t0ma0 b LREF(e_t0ma0)
+#define b_e_t0 b LREF(e_t0)
+#define b_e_t0a0 b LREF(e_t0a0)
+#define b_e_t02a0 b LREF(e_t02a0)
+#define b_e_t04a0 b LREF(e_t04a0)
+#define b_e_2t0 b LREF(e_2t0)
+#define b_e_2t0a0 b LREF(e_2t0a0)
+#define b_e_2t04a0 b LREF(e2t04a0)
+#define b_e_3t0 b LREF(e_3t0)
+#define b_e_4t0 b LREF(e_4t0)
+#define b_e_4t0a0 b LREF(e_4t0a0)
+#define b_e_4t08a0 b LREF(e4t08a0)
+#define b_e_5t0 b LREF(e_5t0)
+#define b_e_8t0 b LREF(e_8t0)
+#define b_e_8t0a0 b LREF(e_8t0a0)
+#define r__r_a0 add r,a0,r
+#define r__r_2a0 sh1add a0,r,r
+#define r__r_4a0 sh2add a0,r,r
+#define r__r_8a0 sh3add a0,r,r
+#define r__r_t0 add r,t0,r
+#define r__r_2t0 sh1add t0,r,r
+#define r__r_4t0 sh2add t0,r,r
+#define r__r_8t0 sh3add t0,r,r
+#define t0__3a0 sh1add a0,a0,t0
+#define t0__4a0 sh2add a0,0,t0
+#define t0__5a0 sh2add a0,a0,t0
+#define t0__8a0 sh3add a0,0,t0
+#define t0__9a0 sh3add a0,a0,t0
+#define t0__16a0 zdep a0,27,28,t0
+#define t0__32a0 zdep a0,26,27,t0
+#define t0__64a0 zdep a0,25,26,t0
+#define t0__128a0 zdep a0,24,25,t0
+#define t0__t0ma0 sub t0,a0,t0
+#define t0__t0_a0 add t0,a0,t0
+#define t0__t0_2a0 sh1add a0,t0,t0
+#define t0__t0_4a0 sh2add a0,t0,t0
+#define t0__t0_8a0 sh3add a0,t0,t0
+#define t0__2t0_a0 sh1add t0,a0,t0
+#define t0__3t0 sh1add t0,t0,t0
+#define t0__4t0 sh2add t0,0,t0
+#define t0__4t0_a0 sh2add t0,a0,t0
+#define t0__5t0 sh2add t0,t0,t0
+#define t0__8t0 sh3add t0,0,t0
+#define t0__8t0_a0 sh3add t0,a0,t0
+#define t0__9t0 sh3add t0,t0,t0
+#define t0__16t0 zdep t0,27,28,t0
+#define t0__32t0 zdep t0,26,27,t0
+#define t0__256a0 zdep a0,23,24,t0
+
+
+ SUBSPA_MILLI
+ ATTR_MILLI
+ .align 16
+ .proc
+ .callinfo millicode
+ .export $$mulI,millicode
+GSYM($$mulI)
+ combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
+ copy 0,r /* zero out the result */
+ xor a0,a1,a0 /* swap a0 & a1 using the */
+ xor a0,a1,a1 /* old xor trick */
+ xor a0,a1,a0
+LSYM(l4)
+ combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
+ zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
+ sub,> 0,a1,t0 /* otherwise negate both and */
+ combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
+ sub 0,a0,a1
+ movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
+
+LSYM(l0) r__r_t0 /* add in this partial product */
+LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
+LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
+LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
+ extru a1,23,24,a1 /* a1 >>= 8 ****************** */
+
+/*16 insts before this. */
+/* a0 <<= 8 ************************** */
+LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
+LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
+LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
+LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
+LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
+LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
+LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
+LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
+LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
+LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
+LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
+LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
+LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
+LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
+LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
+LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
+LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
+LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
+LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
+LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
+LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
+LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
+LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
+LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
+LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
+LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
+LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
+LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
+LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
+LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
+LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
+LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
+LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
+LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
+LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
+LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
+LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
+LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
+LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
+LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
+LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
+LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
+LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
+LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
+LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
+LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
+LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
+LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
+LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
+LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
+LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
+LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
+LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
+LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
+LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
+LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
+LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
+LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
+LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
+LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
+LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
+LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
+LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
+LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
+LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
+LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
+LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
+LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
+LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
+LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
+LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
+LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
+LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
+LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
+LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
+LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
+LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
+LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
+LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
+LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
+LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
+LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
+LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
+LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
+LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
+LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
+LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
+LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
+LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
+LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
+LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
+LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
+LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
+LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
+LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
+LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
+LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
+LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
+LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
+LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
+LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
+LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
+LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
+LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
+LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
+LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
+LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
+LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
+LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
+LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
+LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
+LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
+LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
+LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
+LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
+LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
+LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
+LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
+LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
+LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
+LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
+LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
+LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
+LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
+LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
+LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
+LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
+LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
+LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
+LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
+LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
+LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
+LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
+LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
+LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
+LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
+LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
+LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
+LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
+LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
+LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
+LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
+LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
+LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
+LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
+LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
+LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
+LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
+LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
+LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
+LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
+LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
+LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
+LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
+LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
+LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
+LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
+LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
+LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
+LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
+LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
+LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
+LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
+LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
+LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
+LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
+LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
+LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
+LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
+LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
+LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
+LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
+LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
+LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
+LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
+LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
+LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
+LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
+LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
+LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
+LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
+LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
+LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
+LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
+LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
+LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
+LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
+LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
+LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
+LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
+LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
+LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
+LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
+LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
+LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
+LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
+LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
+LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
+LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
+LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
+LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
+LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
+LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
+LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
+LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
+LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
+LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
+LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
+LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
+LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
+LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
+LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
+LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
+LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
+LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
+LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
+LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
+LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
+LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
+LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
+LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
+LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
+LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
+/*1040 insts before this. */
+LSYM(ret_t0) MILLIRET
+LSYM(e_t0) r__r_t0
+LSYM(e_shift) a1_ne_0_b_l2
+ a0__256a0 /* a0 <<= 8 *********** */
+ MILLIRETN
+LSYM(e_t0ma0) a1_ne_0_b_l0
+ t0__t0ma0
+ MILLIRET
+ r__r_t0
+LSYM(e_t0a0) a1_ne_0_b_l0
+ t0__t0_a0
+ MILLIRET
+ r__r_t0
+LSYM(e_t02a0) a1_ne_0_b_l0
+ t0__t0_2a0
+ MILLIRET
+ r__r_t0
+LSYM(e_t04a0) a1_ne_0_b_l0
+ t0__t0_4a0
+ MILLIRET
+ r__r_t0
+LSYM(e_2t0) a1_ne_0_b_l1
+ r__r_2t0
+ MILLIRETN
+LSYM(e_2t0a0) a1_ne_0_b_l0
+ t0__2t0_a0
+ MILLIRET
+ r__r_t0
+LSYM(e2t04a0) t0__t0_2a0
+ a1_ne_0_b_l1
+ r__r_2t0
+ MILLIRETN
+LSYM(e_3t0) a1_ne_0_b_l0
+ t0__3t0
+ MILLIRET
+ r__r_t0
+LSYM(e_4t0) a1_ne_0_b_l1
+ r__r_4t0
+ MILLIRETN
+LSYM(e_4t0a0) a1_ne_0_b_l0
+ t0__4t0_a0
+ MILLIRET
+ r__r_t0
+LSYM(e4t08a0) t0__t0_2a0
+ a1_ne_0_b_l1
+ r__r_4t0
+ MILLIRETN
+LSYM(e_5t0) a1_ne_0_b_l0
+ t0__5t0
+ MILLIRET
+ r__r_t0
+LSYM(e_8t0) a1_ne_0_b_l1
+ r__r_8t0
+ MILLIRETN
+LSYM(e_8t0a0) a1_ne_0_b_l0
+ t0__8t0_a0
+ MILLIRET
+ r__r_t0
+
+ .procend
+ .end
+#endif
diff --git a/libgcc/config/pa/t-linux b/libgcc/config/pa/t-linux
new file mode 100644
index 00000000000..d396bf7705a
--- /dev/null
+++ b/libgcc/config/pa/t-linux
@@ -0,0 +1,6 @@
+#Plug millicode routines into libgcc.a We want these on both native and
+#cross compiles. We use the "64-bit" routines because the "32-bit" code
+#is broken for certain corner cases.
+
+LIB1ASMSRC = pa/milli64.S
+LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
diff --git a/libgcc/config/pa/t-linux64 b/libgcc/config/pa/t-linux64
new file mode 100644
index 00000000000..6cb9806ff2e
--- /dev/null
+++ b/libgcc/config/pa/t-linux64
@@ -0,0 +1,4 @@
+# Plug millicode routines into libgcc.a We want these on both native and
+# cross compiles.
+# FIXME: Explain.
+LIB1ASMFUNCS := $(filter-out _dyncall, $(LIB1ASMFUNCS))
diff --git a/libgcc/config/picochip/lib1funcs.S b/libgcc/config/picochip/lib1funcs.S
new file mode 100644
index 00000000000..d344170d248
--- /dev/null
+++ b/libgcc/config/picochip/lib1funcs.S
@@ -0,0 +1,4 @@
+// picoChip ASM file
+// Fake libgcc asm file. This contains nothing, but is used to prevent gcc
+// getting upset about the lack of a lib1funcs.S file when LIB1ASMFUNCS is
+// defined to switch off the compilation of parts of libgcc.
diff --git a/libgcc/config/picochip/t-picochip b/libgcc/config/picochip/t-picochip
index 5135d500cbb..a596ec98947 100644
--- a/libgcc/config/picochip/t-picochip
+++ b/libgcc/config/picochip/t-picochip
@@ -1,2 +1,9 @@
+# Prevent some of the more complicated libgcc functions from being
+# compiled. This is because they are generally too big to fit into an
+# AE anyway, so there is no point in having them. Also, some don't
+# compile properly so we'll ignore them for the moment.
+LIB1ASMSRC = picochip/lib1funcs.S
+LIB1ASMFUNCS = _mulsc3 _divsc3
+
# Turn off the building of exception handling libraries.
LIB2ADDEH =
diff --git a/libgcc/config/sh/lib1funcs.S b/libgcc/config/sh/lib1funcs.S
new file mode 100644
index 00000000000..2f0ca16cd91
--- /dev/null
+++ b/libgcc/config/sh/lib1funcs.S
@@ -0,0 +1,3933 @@
+/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ 2004, 2005, 2006, 2009
+ Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+!! libgcc routines for the Renesas / SuperH SH CPUs.
+!! Contributed by Steve Chamberlain.
+!! sac@cygnus.com
+
+!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
+!! recoded in assembly by Toshiyasu Morita
+!! tm@netcom.com
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
+/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
+ ELF local label prefixes by J"orn Rennecke
+ amylaar@cygnus.com */
+
+#include "lib1funcs.h"
+
+/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
+ so it is more convenient to define NO_FPSCR_VALUES here than to
+ define it on the command line. */
+#if defined __vxworks && defined __PIC__
+#define NO_FPSCR_VALUES
+#endif
+
+#if ! __SH5__
+#ifdef L_ashiftrt
+ .global GLOBAL(ashiftrt_r4_0)
+ .global GLOBAL(ashiftrt_r4_1)
+ .global GLOBAL(ashiftrt_r4_2)
+ .global GLOBAL(ashiftrt_r4_3)
+ .global GLOBAL(ashiftrt_r4_4)
+ .global GLOBAL(ashiftrt_r4_5)
+ .global GLOBAL(ashiftrt_r4_6)
+ .global GLOBAL(ashiftrt_r4_7)
+ .global GLOBAL(ashiftrt_r4_8)
+ .global GLOBAL(ashiftrt_r4_9)
+ .global GLOBAL(ashiftrt_r4_10)
+ .global GLOBAL(ashiftrt_r4_11)
+ .global GLOBAL(ashiftrt_r4_12)
+ .global GLOBAL(ashiftrt_r4_13)
+ .global GLOBAL(ashiftrt_r4_14)
+ .global GLOBAL(ashiftrt_r4_15)
+ .global GLOBAL(ashiftrt_r4_16)
+ .global GLOBAL(ashiftrt_r4_17)
+ .global GLOBAL(ashiftrt_r4_18)
+ .global GLOBAL(ashiftrt_r4_19)
+ .global GLOBAL(ashiftrt_r4_20)
+ .global GLOBAL(ashiftrt_r4_21)
+ .global GLOBAL(ashiftrt_r4_22)
+ .global GLOBAL(ashiftrt_r4_23)
+ .global GLOBAL(ashiftrt_r4_24)
+ .global GLOBAL(ashiftrt_r4_25)
+ .global GLOBAL(ashiftrt_r4_26)
+ .global GLOBAL(ashiftrt_r4_27)
+ .global GLOBAL(ashiftrt_r4_28)
+ .global GLOBAL(ashiftrt_r4_29)
+ .global GLOBAL(ashiftrt_r4_30)
+ .global GLOBAL(ashiftrt_r4_31)
+ .global GLOBAL(ashiftrt_r4_32)
+
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
+ HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
+
+ .align 1
+GLOBAL(ashiftrt_r4_32):
+GLOBAL(ashiftrt_r4_31):
+ rotcl r4
+ rts
+ subc r4,r4
+
+GLOBAL(ashiftrt_r4_30):
+ shar r4
+GLOBAL(ashiftrt_r4_29):
+ shar r4
+GLOBAL(ashiftrt_r4_28):
+ shar r4
+GLOBAL(ashiftrt_r4_27):
+ shar r4
+GLOBAL(ashiftrt_r4_26):
+ shar r4
+GLOBAL(ashiftrt_r4_25):
+ shar r4
+GLOBAL(ashiftrt_r4_24):
+ shlr16 r4
+ shlr8 r4
+ rts
+ exts.b r4,r4
+
+GLOBAL(ashiftrt_r4_23):
+ shar r4
+GLOBAL(ashiftrt_r4_22):
+ shar r4
+GLOBAL(ashiftrt_r4_21):
+ shar r4
+GLOBAL(ashiftrt_r4_20):
+ shar r4
+GLOBAL(ashiftrt_r4_19):
+ shar r4
+GLOBAL(ashiftrt_r4_18):
+ shar r4
+GLOBAL(ashiftrt_r4_17):
+ shar r4
+GLOBAL(ashiftrt_r4_16):
+ shlr16 r4
+ rts
+ exts.w r4,r4
+
+GLOBAL(ashiftrt_r4_15):
+ shar r4
+GLOBAL(ashiftrt_r4_14):
+ shar r4
+GLOBAL(ashiftrt_r4_13):
+ shar r4
+GLOBAL(ashiftrt_r4_12):
+ shar r4
+GLOBAL(ashiftrt_r4_11):
+ shar r4
+GLOBAL(ashiftrt_r4_10):
+ shar r4
+GLOBAL(ashiftrt_r4_9):
+ shar r4
+GLOBAL(ashiftrt_r4_8):
+ shar r4
+GLOBAL(ashiftrt_r4_7):
+ shar r4
+GLOBAL(ashiftrt_r4_6):
+ shar r4
+GLOBAL(ashiftrt_r4_5):
+ shar r4
+GLOBAL(ashiftrt_r4_4):
+ shar r4
+GLOBAL(ashiftrt_r4_3):
+ shar r4
+GLOBAL(ashiftrt_r4_2):
+ shar r4
+GLOBAL(ashiftrt_r4_1):
+ rts
+ shar r4
+
+GLOBAL(ashiftrt_r4_0):
+ rts
+ nop
+
+ ENDFUNC(GLOBAL(ashiftrt_r4_0))
+ ENDFUNC(GLOBAL(ashiftrt_r4_1))
+ ENDFUNC(GLOBAL(ashiftrt_r4_2))
+ ENDFUNC(GLOBAL(ashiftrt_r4_3))
+ ENDFUNC(GLOBAL(ashiftrt_r4_4))
+ ENDFUNC(GLOBAL(ashiftrt_r4_5))
+ ENDFUNC(GLOBAL(ashiftrt_r4_6))
+ ENDFUNC(GLOBAL(ashiftrt_r4_7))
+ ENDFUNC(GLOBAL(ashiftrt_r4_8))
+ ENDFUNC(GLOBAL(ashiftrt_r4_9))
+ ENDFUNC(GLOBAL(ashiftrt_r4_10))
+ ENDFUNC(GLOBAL(ashiftrt_r4_11))
+ ENDFUNC(GLOBAL(ashiftrt_r4_12))
+ ENDFUNC(GLOBAL(ashiftrt_r4_13))
+ ENDFUNC(GLOBAL(ashiftrt_r4_14))
+ ENDFUNC(GLOBAL(ashiftrt_r4_15))
+ ENDFUNC(GLOBAL(ashiftrt_r4_16))
+ ENDFUNC(GLOBAL(ashiftrt_r4_17))
+ ENDFUNC(GLOBAL(ashiftrt_r4_18))
+ ENDFUNC(GLOBAL(ashiftrt_r4_19))
+ ENDFUNC(GLOBAL(ashiftrt_r4_20))
+ ENDFUNC(GLOBAL(ashiftrt_r4_21))
+ ENDFUNC(GLOBAL(ashiftrt_r4_22))
+ ENDFUNC(GLOBAL(ashiftrt_r4_23))
+ ENDFUNC(GLOBAL(ashiftrt_r4_24))
+ ENDFUNC(GLOBAL(ashiftrt_r4_25))
+ ENDFUNC(GLOBAL(ashiftrt_r4_26))
+ ENDFUNC(GLOBAL(ashiftrt_r4_27))
+ ENDFUNC(GLOBAL(ashiftrt_r4_28))
+ ENDFUNC(GLOBAL(ashiftrt_r4_29))
+ ENDFUNC(GLOBAL(ashiftrt_r4_30))
+ ENDFUNC(GLOBAL(ashiftrt_r4_31))
+ ENDFUNC(GLOBAL(ashiftrt_r4_32))
+#endif
+
+#ifdef L_ashiftrt_n
+
+!
+! GLOBAL(ashrsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+
+ .global GLOBAL(ashrsi3)
+ HIDDEN_FUNC(GLOBAL(ashrsi3))
+ .align 2
+GLOBAL(ashrsi3):
+ mov #31,r0
+ and r0,r5
+ mova LOCAL(ashrsi3_table),r0
+ mov.b @(r0,r5),r5
+#ifdef __sh1__
+ add r5,r0
+ jmp @r0
+#else
+ braf r5
+#endif
+ mov r4,r0
+
+ .align 2
+LOCAL(ashrsi3_table):
+ .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
+ .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
+
+LOCAL(ashrsi3_31):
+ rotcl r0
+ rts
+ subc r0,r0
+
+LOCAL(ashrsi3_30):
+ shar r0
+LOCAL(ashrsi3_29):
+ shar r0
+LOCAL(ashrsi3_28):
+ shar r0
+LOCAL(ashrsi3_27):
+ shar r0
+LOCAL(ashrsi3_26):
+ shar r0
+LOCAL(ashrsi3_25):
+ shar r0
+LOCAL(ashrsi3_24):
+ shlr16 r0
+ shlr8 r0
+ rts
+ exts.b r0,r0
+
+LOCAL(ashrsi3_23):
+ shar r0
+LOCAL(ashrsi3_22):
+ shar r0
+LOCAL(ashrsi3_21):
+ shar r0
+LOCAL(ashrsi3_20):
+ shar r0
+LOCAL(ashrsi3_19):
+ shar r0
+LOCAL(ashrsi3_18):
+ shar r0
+LOCAL(ashrsi3_17):
+ shar r0
+LOCAL(ashrsi3_16):
+ shlr16 r0
+ rts
+ exts.w r0,r0
+
+LOCAL(ashrsi3_15):
+ shar r0
+LOCAL(ashrsi3_14):
+ shar r0
+LOCAL(ashrsi3_13):
+ shar r0
+LOCAL(ashrsi3_12):
+ shar r0
+LOCAL(ashrsi3_11):
+ shar r0
+LOCAL(ashrsi3_10):
+ shar r0
+LOCAL(ashrsi3_9):
+ shar r0
+LOCAL(ashrsi3_8):
+ shar r0
+LOCAL(ashrsi3_7):
+ shar r0
+LOCAL(ashrsi3_6):
+ shar r0
+LOCAL(ashrsi3_5):
+ shar r0
+LOCAL(ashrsi3_4):
+ shar r0
+LOCAL(ashrsi3_3):
+ shar r0
+LOCAL(ashrsi3_2):
+ shar r0
+LOCAL(ashrsi3_1):
+ rts
+ shar r0
+
+LOCAL(ashrsi3_0):
+ rts
+ nop
+
+ ENDFUNC(GLOBAL(ashrsi3))
+#endif
+
+#ifdef L_ashiftlt
+
+!
+! GLOBAL(ashlsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+ .global GLOBAL(ashlsi3)
+ HIDDEN_FUNC(GLOBAL(ashlsi3))
+ .align 2
+GLOBAL(ashlsi3):
+ mov #31,r0
+ and r0,r5
+ mova LOCAL(ashlsi3_table),r0
+ mov.b @(r0,r5),r5
+#ifdef __sh1__
+ add r5,r0
+ jmp @r0
+#else
+ braf r5
+#endif
+ mov r4,r0
+
+ .align 2
+LOCAL(ashlsi3_table):
+ .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
+ .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
+
+LOCAL(ashlsi3_6):
+ shll2 r0
+LOCAL(ashlsi3_4):
+ shll2 r0
+LOCAL(ashlsi3_2):
+ rts
+ shll2 r0
+
+LOCAL(ashlsi3_7):
+ shll2 r0
+LOCAL(ashlsi3_5):
+ shll2 r0
+LOCAL(ashlsi3_3):
+ shll2 r0
+LOCAL(ashlsi3_1):
+ rts
+ shll r0
+
+LOCAL(ashlsi3_14):
+ shll2 r0
+LOCAL(ashlsi3_12):
+ shll2 r0
+LOCAL(ashlsi3_10):
+ shll2 r0
+LOCAL(ashlsi3_8):
+ rts
+ shll8 r0
+
+LOCAL(ashlsi3_15):
+ shll2 r0
+LOCAL(ashlsi3_13):
+ shll2 r0
+LOCAL(ashlsi3_11):
+ shll2 r0
+LOCAL(ashlsi3_9):
+ shll8 r0
+ rts
+ shll r0
+
+LOCAL(ashlsi3_22):
+ shll2 r0
+LOCAL(ashlsi3_20):
+ shll2 r0
+LOCAL(ashlsi3_18):
+ shll2 r0
+LOCAL(ashlsi3_16):
+ rts
+ shll16 r0
+
+LOCAL(ashlsi3_23):
+ shll2 r0
+LOCAL(ashlsi3_21):
+ shll2 r0
+LOCAL(ashlsi3_19):
+ shll2 r0
+LOCAL(ashlsi3_17):
+ shll16 r0
+ rts
+ shll r0
+
+LOCAL(ashlsi3_30):
+ shll2 r0
+LOCAL(ashlsi3_28):
+ shll2 r0
+LOCAL(ashlsi3_26):
+ shll2 r0
+LOCAL(ashlsi3_24):
+ shll16 r0
+ rts
+ shll8 r0
+
+LOCAL(ashlsi3_31):
+ shll2 r0
+LOCAL(ashlsi3_29):
+ shll2 r0
+LOCAL(ashlsi3_27):
+ shll2 r0
+LOCAL(ashlsi3_25):
+ shll16 r0
+ shll8 r0
+ rts
+ shll r0
+
+LOCAL(ashlsi3_0):
+ rts
+ nop
+
+ ENDFUNC(GLOBAL(ashlsi3))
+#endif
+
+#ifdef L_lshiftrt
+
+!
+! GLOBAL(lshrsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+ .global GLOBAL(lshrsi3)
+ HIDDEN_FUNC(GLOBAL(lshrsi3))
+ .align 2
+GLOBAL(lshrsi3):
+ mov #31,r0
+ and r0,r5
+ mova LOCAL(lshrsi3_table),r0
+ mov.b @(r0,r5),r5
+#ifdef __sh1__
+ add r5,r0
+ jmp @r0
+#else
+ braf r5
+#endif
+ mov r4,r0
+
+ .align 2
+LOCAL(lshrsi3_table):
+ .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
+ .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
+
+LOCAL(lshrsi3_6):
+ shlr2 r0
+LOCAL(lshrsi3_4):
+ shlr2 r0
+LOCAL(lshrsi3_2):
+ rts
+ shlr2 r0
+
+LOCAL(lshrsi3_7):
+ shlr2 r0
+LOCAL(lshrsi3_5):
+ shlr2 r0
+LOCAL(lshrsi3_3):
+ shlr2 r0
+LOCAL(lshrsi3_1):
+ rts
+ shlr r0
+
+LOCAL(lshrsi3_14):
+ shlr2 r0
+LOCAL(lshrsi3_12):
+ shlr2 r0
+LOCAL(lshrsi3_10):
+ shlr2 r0
+LOCAL(lshrsi3_8):
+ rts
+ shlr8 r0
+
+LOCAL(lshrsi3_15):
+ shlr2 r0
+LOCAL(lshrsi3_13):
+ shlr2 r0
+LOCAL(lshrsi3_11):
+ shlr2 r0
+LOCAL(lshrsi3_9):
+ shlr8 r0
+ rts
+ shlr r0
+
+LOCAL(lshrsi3_22):
+ shlr2 r0
+LOCAL(lshrsi3_20):
+ shlr2 r0
+LOCAL(lshrsi3_18):
+ shlr2 r0
+LOCAL(lshrsi3_16):
+ rts
+ shlr16 r0
+
+LOCAL(lshrsi3_23):
+ shlr2 r0
+LOCAL(lshrsi3_21):
+ shlr2 r0
+LOCAL(lshrsi3_19):
+ shlr2 r0
+LOCAL(lshrsi3_17):
+ shlr16 r0
+ rts
+ shlr r0
+
+LOCAL(lshrsi3_30):
+ shlr2 r0
+LOCAL(lshrsi3_28):
+ shlr2 r0
+LOCAL(lshrsi3_26):
+ shlr2 r0
+LOCAL(lshrsi3_24):
+ shlr16 r0
+ rts
+ shlr8 r0
+
+LOCAL(lshrsi3_31):
+ shlr2 r0
+LOCAL(lshrsi3_29):
+ shlr2 r0
+LOCAL(lshrsi3_27):
+ shlr2 r0
+LOCAL(lshrsi3_25):
+ shlr16 r0
+ shlr8 r0
+ rts
+ shlr r0
+
+LOCAL(lshrsi3_0):
+ rts
+ nop
+
+ ENDFUNC(GLOBAL(lshrsi3))
+#endif
+
+#ifdef L_movmem
+ .text
+ .balign 4
+ .global GLOBAL(movmem)
+ HIDDEN_FUNC(GLOBAL(movmem))
+ HIDDEN_ALIAS(movstr,movmem)
+ /* This would be a lot simpler if r6 contained the byte count
+ minus 64, and we wouldn't be called here for a byte count of 64. */
+GLOBAL(movmem):
+ sts.l pr,@-r15
+ shll2 r6
+ bsr GLOBAL(movmemSI52+2)
+ mov.l @(48,r5),r0
+ .balign 4
+LOCAL(movmem_loop): /* Reached with rts */
+ mov.l @(60,r5),r0
+ add #-64,r6
+ mov.l r0,@(60,r4)
+ tst r6,r6
+ mov.l @(56,r5),r0
+ bt LOCAL(movmem_done)
+ mov.l r0,@(56,r4)
+ cmp/pl r6
+ mov.l @(52,r5),r0
+ add #64,r5
+ mov.l r0,@(52,r4)
+ add #64,r4
+ bt GLOBAL(movmemSI52)
+! done all the large groups, do the remainder
+! jump to movmem+
+ mova GLOBAL(movmemSI4)+4,r0
+ add r6,r0
+ jmp @r0
+LOCAL(movmem_done): ! share slot insn, works out aligned.
+ lds.l @r15+,pr
+ mov.l r0,@(56,r4)
+ mov.l @(52,r5),r0
+ rts
+ mov.l r0,@(52,r4)
+ .balign 4
+! ??? We need aliases movstr* for movmem* for the older libraries. These
+! aliases will be removed at the some point in the future.
+ .global GLOBAL(movmemSI64)
+ HIDDEN_FUNC(GLOBAL(movmemSI64))
+ HIDDEN_ALIAS(movstrSI64,movmemSI64)
+GLOBAL(movmemSI64):
+ mov.l @(60,r5),r0
+ mov.l r0,@(60,r4)
+ .global GLOBAL(movmemSI60)
+ HIDDEN_FUNC(GLOBAL(movmemSI60))
+ HIDDEN_ALIAS(movstrSI60,movmemSI60)
+GLOBAL(movmemSI60):
+ mov.l @(56,r5),r0
+ mov.l r0,@(56,r4)
+ .global GLOBAL(movmemSI56)
+ HIDDEN_FUNC(GLOBAL(movmemSI56))
+ HIDDEN_ALIAS(movstrSI56,movmemSI56)
+GLOBAL(movmemSI56):
+ mov.l @(52,r5),r0
+ mov.l r0,@(52,r4)
+ .global GLOBAL(movmemSI52)
+ HIDDEN_FUNC(GLOBAL(movmemSI52))
+ HIDDEN_ALIAS(movstrSI52,movmemSI52)
+GLOBAL(movmemSI52):
+ mov.l @(48,r5),r0
+ mov.l r0,@(48,r4)
+ .global GLOBAL(movmemSI48)
+ HIDDEN_FUNC(GLOBAL(movmemSI48))
+ HIDDEN_ALIAS(movstrSI48,movmemSI48)
+GLOBAL(movmemSI48):
+ mov.l @(44,r5),r0
+ mov.l r0,@(44,r4)
+ .global GLOBAL(movmemSI44)
+ HIDDEN_FUNC(GLOBAL(movmemSI44))
+ HIDDEN_ALIAS(movstrSI44,movmemSI44)
+GLOBAL(movmemSI44):
+ mov.l @(40,r5),r0
+ mov.l r0,@(40,r4)
+ .global GLOBAL(movmemSI40)
+ HIDDEN_FUNC(GLOBAL(movmemSI40))
+ HIDDEN_ALIAS(movstrSI40,movmemSI40)
+GLOBAL(movmemSI40):
+ mov.l @(36,r5),r0
+ mov.l r0,@(36,r4)
+ .global GLOBAL(movmemSI36)
+ HIDDEN_FUNC(GLOBAL(movmemSI36))
+ HIDDEN_ALIAS(movstrSI36,movmemSI36)
+GLOBAL(movmemSI36):
+ mov.l @(32,r5),r0
+ mov.l r0,@(32,r4)
+ .global GLOBAL(movmemSI32)
+ HIDDEN_FUNC(GLOBAL(movmemSI32))
+ HIDDEN_ALIAS(movstrSI32,movmemSI32)
+GLOBAL(movmemSI32):
+ mov.l @(28,r5),r0
+ mov.l r0,@(28,r4)
+ .global GLOBAL(movmemSI28)
+ HIDDEN_FUNC(GLOBAL(movmemSI28))
+ HIDDEN_ALIAS(movstrSI28,movmemSI28)
+GLOBAL(movmemSI28):
+ mov.l @(24,r5),r0
+ mov.l r0,@(24,r4)
+ .global GLOBAL(movmemSI24)
+ HIDDEN_FUNC(GLOBAL(movmemSI24))
+ HIDDEN_ALIAS(movstrSI24,movmemSI24)
+GLOBAL(movmemSI24):
+ mov.l @(20,r5),r0
+ mov.l r0,@(20,r4)
+ .global GLOBAL(movmemSI20)
+ HIDDEN_FUNC(GLOBAL(movmemSI20))
+ HIDDEN_ALIAS(movstrSI20,movmemSI20)
+GLOBAL(movmemSI20):
+ mov.l @(16,r5),r0
+ mov.l r0,@(16,r4)
+ .global GLOBAL(movmemSI16)
+ HIDDEN_FUNC(GLOBAL(movmemSI16))
+ HIDDEN_ALIAS(movstrSI16,movmemSI16)
+GLOBAL(movmemSI16):
+ mov.l @(12,r5),r0
+ mov.l r0,@(12,r4)
+ .global GLOBAL(movmemSI12)
+ HIDDEN_FUNC(GLOBAL(movmemSI12))
+ HIDDEN_ALIAS(movstrSI12,movmemSI12)
+GLOBAL(movmemSI12):
+ mov.l @(8,r5),r0
+ mov.l r0,@(8,r4)
+ .global GLOBAL(movmemSI8)
+ HIDDEN_FUNC(GLOBAL(movmemSI8))
+ HIDDEN_ALIAS(movstrSI8,movmemSI8)
+GLOBAL(movmemSI8):
+ mov.l @(4,r5),r0
+ mov.l r0,@(4,r4)
+ .global GLOBAL(movmemSI4)
+ HIDDEN_FUNC(GLOBAL(movmemSI4))
+ HIDDEN_ALIAS(movstrSI4,movmemSI4)
+GLOBAL(movmemSI4):
+ mov.l @(0,r5),r0
+ rts
+ mov.l r0,@(0,r4)
+
+ ENDFUNC(GLOBAL(movmemSI64))
+ ENDFUNC(GLOBAL(movmemSI60))
+ ENDFUNC(GLOBAL(movmemSI56))
+ ENDFUNC(GLOBAL(movmemSI52))
+ ENDFUNC(GLOBAL(movmemSI48))
+ ENDFUNC(GLOBAL(movmemSI44))
+ ENDFUNC(GLOBAL(movmemSI40))
+ ENDFUNC(GLOBAL(movmemSI36))
+ ENDFUNC(GLOBAL(movmemSI32))
+ ENDFUNC(GLOBAL(movmemSI28))
+ ENDFUNC(GLOBAL(movmemSI24))
+ ENDFUNC(GLOBAL(movmemSI20))
+ ENDFUNC(GLOBAL(movmemSI16))
+ ENDFUNC(GLOBAL(movmemSI12))
+ ENDFUNC(GLOBAL(movmemSI8))
+ ENDFUNC(GLOBAL(movmemSI4))
+ ENDFUNC(GLOBAL(movmem))
+#endif
+
+#ifdef L_movmem_i4
+ .text
+ .global GLOBAL(movmem_i4_even)
+ .global GLOBAL(movmem_i4_odd)
+ .global GLOBAL(movmemSI12_i4)
+
+ HIDDEN_FUNC(GLOBAL(movmem_i4_even))
+ HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
+ HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
+
+ HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
+ HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
+ HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
+
+ .p2align 5
+L_movmem_2mod4_end:
+ mov.l r0,@(16,r4)
+ rts
+ mov.l r1,@(20,r4)
+
+ .p2align 2
+
+GLOBAL(movmem_i4_even):
+ mov.l @r5+,r0
+ bra L_movmem_start_even
+ mov.l @r5+,r1
+
+GLOBAL(movmem_i4_odd):
+ mov.l @r5+,r1
+ add #-4,r4
+ mov.l @r5+,r2
+ mov.l @r5+,r3
+ mov.l r1,@(4,r4)
+ mov.l r2,@(8,r4)
+
+L_movmem_loop:
+ mov.l r3,@(12,r4)
+ dt r6
+ mov.l @r5+,r0
+ bt/s L_movmem_2mod4_end
+ mov.l @r5+,r1
+ add #16,r4
+L_movmem_start_even:
+ mov.l @r5+,r2
+ mov.l @r5+,r3
+ mov.l r0,@r4
+ dt r6
+ mov.l r1,@(4,r4)
+ bf/s L_movmem_loop
+ mov.l r2,@(8,r4)
+ rts
+ mov.l r3,@(12,r4)
+
+ ENDFUNC(GLOBAL(movmem_i4_even))
+ ENDFUNC(GLOBAL(movmem_i4_odd))
+
+ .p2align 4
+GLOBAL(movmemSI12_i4):
+ mov.l @r5,r0
+ mov.l @(4,r5),r1
+ mov.l @(8,r5),r2
+ mov.l r0,@r4
+ mov.l r1,@(4,r4)
+ rts
+ mov.l r2,@(8,r4)
+
+ ENDFUNC(GLOBAL(movmemSI12_i4))
+#endif
+
+#ifdef L_mulsi3
+
+
+ .global GLOBAL(mulsi3)
+ HIDDEN_FUNC(GLOBAL(mulsi3))
+
+! r4 = aabb
+! r5 = ccdd
+! r0 = aabb*ccdd via partial products
+!
+! if aa == 0 and cc = 0
+! r0 = bb*dd
+!
+! else
+! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
+!
+
+GLOBAL(mulsi3):
+ mulu.w r4,r5 ! multiply the lsws macl=bb*dd
+ mov r5,r3 ! r3 = ccdd
+ swap.w r4,r2 ! r2 = bbaa
+ xtrct r2,r3 ! r3 = aacc
+ tst r3,r3 ! msws zero ?
+ bf hiset
+ rts ! yes - then we have the answer
+ sts macl,r0
+
+hiset: sts macl,r0 ! r0 = bb*dd
+ mulu.w r2,r5 ! brewing macl = aa*dd
+ sts macl,r1
+ mulu.w r3,r4 ! brewing macl = cc*bb
+ sts macl,r2
+ add r1,r2
+ shll16 r2
+ rts
+ add r2,r0
+
+ ENDFUNC(GLOBAL(mulsi3))
+#endif
+#endif /* ! __SH5__ */
+#ifdef L_sdivsi3_i4
+ .title "SH DIVIDE"
+!! 4 byte integer Divide code for the Renesas SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber dr0, dr2
+
+ .global GLOBAL(sdivsi3_i4)
+ HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+ lds r4,fpul
+ float fpul,dr0
+ lds r5,fpul
+ float fpul,dr2
+ fdiv dr2,dr0
+ rts
+ ftrc dr0,fpul
+
+ ENDFUNC(GLOBAL(sdivsi3_i4))
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
+!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
+
+#if ! __SH5__ || __SH5__ == 32
+#if __SH5__
+ .mode SHcompact
+#endif
+ .global GLOBAL(sdivsi3_i4)
+ HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+ sts.l fpscr,@-r15
+ mov #8,r2
+ swap.w r2,r2
+ lds r2,fpscr
+ lds r4,fpul
+ float fpul,dr0
+ lds r5,fpul
+ float fpul,dr2
+ fdiv dr2,dr0
+ ftrc dr0,fpul
+ rts
+ lds.l @r15+,fpscr
+
+ ENDFUNC(GLOBAL(sdivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_sdivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+ sh2e/sh3e code. */
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+!!
+!! Steve Chamberlain
+!! sac@cygnus.com
+!!
+!!
+
+!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
+
+ .global GLOBAL(sdivsi3)
+#if __SHMEDIA__
+#if __SH5__ == 32
+ .section .text..SHmedia32,"ax"
+#else
+ .text
+#endif
+ .align 2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+ code that follows. Note that the registers that are modified are
+ exactly those listed as clobbered in the patterns divsi3_i1 and
+ divsi3_i1_media.
+
+int __sdivsi3 (i, j)
+ int i, j;
+{
+ register unsigned long long r18 asm ("r18");
+ register unsigned long long r19 asm ("r19");
+ register unsigned long long r0 asm ("r0") = 0;
+ register unsigned long long r1 asm ("r1") = 1;
+ register int r2 asm ("r2") = i >> 31;
+ register int r3 asm ("r3") = j >> 31;
+
+ r2 = r2 ? r2 : r1;
+ r3 = r3 ? r3 : r1;
+ r18 = i * r2;
+ r19 = j * r3;
+ r2 *= r3;
+
+ r19 <<= 31;
+ r1 <<= 31;
+ do
+ if (r18 >= r19)
+ r0 |= r1, r18 -= r19;
+ while (r19 >>= 1, r1 >>= 1);
+
+ return r2 * (int)r0;
+}
+*/
+GLOBAL(sdivsi3):
+ pt/l LOCAL(sdivsi3_dontadd), tr2
+ pt/l LOCAL(sdivsi3_loop), tr1
+ ptabs/l r18, tr0
+ movi 0, r0
+ movi 1, r1
+ shari.l r4, 31, r2
+ shari.l r5, 31, r3
+ cmveq r2, r1, r2
+ cmveq r3, r1, r3
+ muls.l r4, r2, r18
+ muls.l r5, r3, r19
+ muls.l r2, r3, r2
+ shlli r19, 31, r19
+ shlli r1, 31, r1
+LOCAL(sdivsi3_loop):
+ bgtu r19, r18, tr2
+ or r0, r1, r0
+ sub r18, r19, r18
+LOCAL(sdivsi3_dontadd):
+ shlri r1, 1, r1
+ shlri r19, 1, r19
+ bnei r1, 0, tr1
+ muls.l r0, r2, r0
+ add.l r0, r63, r0
+ blink tr0, r63
+#elif 0 /* ! 0 */
+ // inputs: r4,r5
+ // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
+ // result in r0
+GLOBAL(sdivsi3):
+ // can create absolute value without extra latency,
+ // but dependent on proper sign extension of inputs:
+ // shari.l r5,31,r2
+ // xor r5,r2,r20
+ // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ shari.l r5,31,r2
+ ori r2,1,r2
+ muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
+ shari.l r4,31,r3
+ nsb r20,r0
+ shlld r20,r0,r25
+ shlri r25,48,r25
+ sub r19,r25,r1
+ mmulfx.w r1,r1,r2
+ mshflo.w r1,r63,r1
+ // If r4 was to be used in-place instead of r21, could use this sequence
+ // to compute absolute:
+ // sub r63,r4,r19 // compute absolute value of r4
+ // shlri r4,32,r3 // into lower 32 bit of r4, keeping
+ // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
+ ori r3,1,r3
+ mmulfx.w r25,r2,r2
+ sub r19,r0,r0
+ muls.l r4,r3,r21
+ msub.w r1,r2,r2
+ addi r2,-2,r1
+ mulu.l r21,r1,r19
+ mmulfx.w r2,r2,r2
+ shlli r1,15,r1
+ shlrd r19,r0,r19
+ mulu.l r19,r20,r3
+ mmacnfx.wl r25,r2,r1
+ ptabs r18,tr0
+ sub r21,r3,r25
+
+ mulu.l r25,r1,r2
+ addi r0,14,r0
+ xor r4,r5,r18
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ add r19,r2,r19
+ shari.l r18,31,r18
+ sub r25,r3,r25
+
+ mulu.l r25,r1,r2
+ sub r25,r20,r25
+ add r19,r18,r19
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ addi r25,1,r25
+ add r19,r2,r19
+
+ cmpgt r25,r3,r25
+ add.l r19,r25,r0
+ xor r0,r18,r0
+ blink tr0,r63
+#else /* ! 0 && ! 0 */
+
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+ HIDDEN_FUNC(GLOBAL(sdivsi3_2))
+#ifndef __pic__
+ FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3): /* this is the shcompact entry point */
+ // The special SHmedia entry point sdivsi3_1 prevents accidental linking
+ // with the SHcompact implementation, which clobbers tr1 / tr2.
+ .global GLOBAL(sdivsi3_1)
+GLOBAL(sdivsi3_1):
+ .global GLOBAL(div_table_internal)
+ movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
+ shori GLOBAL(div_table_internal) & 65535, r20
+#endif
+ .global GLOBAL(sdivsi3_2)
+ // div_table in r20
+ // clobbered: r1,r18,r19,r21,r25,tr0
+GLOBAL(sdivsi3_2):
+ nsb r5, r1
+ shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25 // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21 // s2.14
+ ptabs r18, tr0
+ shari r19, 24, r19 // truncate to s2.14
+ sub r21, r19, r19 // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+ sub r63, r1, r1
+ addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19 // multiply by two and convert to s2.58
+ /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18 // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0 // s2.60
+ muls.l r18, r4, r25 // s32.30
+ /* bubble */
+ shari r0, 16, r19 // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+ shari r25, 63, r0
+ shari r4, 14, r18 // s19.-14
+ shari r19, 30, r19 // s-16.44
+ muls.l r19, r18, r19 // s15.30
+ xor r21, r0, r21 // You could also use the constant 1 << 27.
+ add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+#ifndef __pic__
+ ENDFUNC(GLOBAL(sdivsi3))
+#endif
+ ENDFUNC(GLOBAL(sdivsi3_2))
+#endif
+#elif defined __SHMEDIA__
+/* m5compact-nofpu */
+ // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
+ .mode SHmedia
+ .section .text..SHmedia32,"ax"
+ .align 2
+ FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+ pt/l LOCAL(sdivsi3_dontsub), tr0
+ pt/l LOCAL(sdivsi3_loop), tr1
+ ptabs/l r18,tr2
+ shari.l r4,31,r18
+ shari.l r5,31,r19
+ xor r4,r18,r20
+ xor r5,r19,r21
+ sub.l r20,r18,r20
+ sub.l r21,r19,r21
+ xor r18,r19,r19
+ shlli r21,32,r25
+ addi r25,-1,r21
+ addz.l r20,r63,r20
+LOCAL(sdivsi3_loop):
+ shlli r20,1,r20
+ bgeu/u r21,r20,tr0
+ sub r20,r21,r20
+LOCAL(sdivsi3_dontsub):
+ addi.l r25,-1,r25
+ bnei r25,-32,tr1
+ xor r20,r19,r20
+ sub.l r20,r19,r0
+ blink tr2,r63
+ ENDFUNC(GLOBAL(sdivsi3))
+#else /* ! __SHMEDIA__ */
+ FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+ mov r4,r1
+ mov r5,r0
+
+ tst r0,r0
+ bt div0
+ mov #0,r2
+ div0s r2,r1
+ subc r3,r3
+ subc r2,r1
+ div0s r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ div1 r0,r3
+ rotcl r1
+ addc r2,r1
+ rts
+ mov r1,r0
+
+
+div0: rts
+ mov #0,r0
+
+ ENDFUNC(GLOBAL(sdivsi3))
+#endif /* ! __SHMEDIA__ */
+#endif /* ! __SH4__ */
+#endif
+#ifdef L_udivsi3_i4
+
+ .title "SH DIVIDE"
+!! 4 byte integer Divide code for the Renesas SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
+!! and t bit
+
+ .global GLOBAL(udivsi3_i4)
+ HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+ mov #1,r1
+ cmp/hi r1,r5
+ bf trivial
+ rotr r1
+ xor r1,r4
+ lds r4,fpul
+ mova L1,r0
+#ifdef FMOVD_WORKS
+ fmov.d @r0+,dr4
+#else
+ fmov.s @r0+,DR40
+ fmov.s @r0,DR41
+#endif
+ float fpul,dr0
+ xor r1,r5
+ lds r5,fpul
+ float fpul,dr2
+ fadd dr4,dr0
+ fadd dr4,dr2
+ fdiv dr2,dr0
+ rts
+ ftrc dr0,fpul
+
+trivial:
+ rts
+ lds r4,fpul
+
+ .align 2
+#ifdef FMOVD_WORKS
+ .align 3 ! make double below 8 byte aligned.
+#endif
+L1:
+ .double 2147483648
+
+ ENDFUNC(GLOBAL(udivsi3_i4))
+#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
+#if ! __SH5__ || __SH5__ == 32
+!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
+ .mode SHmedia
+ .global GLOBAL(udivsi3_i4)
+ HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+ addz.l r4,r63,r20
+ addz.l r5,r63,r21
+ fmov.qd r20,dr0
+ fmov.qd r21,dr32
+ ptabs r18,tr0
+ float.qd dr0,dr0
+ float.qd dr32,dr32
+ fdiv.d dr0,dr32,dr0
+ ftrc.dq dr0,dr32
+ fmov.s fr33,fr32
+ blink tr0,r63
+
+ ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
+
+ .global GLOBAL(udivsi3_i4)
+ HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+ mov #1,r1
+ cmp/hi r1,r5
+ bf trivial
+ sts.l fpscr,@-r15
+ mova L1,r0
+ lds.l @r0+,fpscr
+ rotr r1
+ xor r1,r4
+ lds r4,fpul
+#ifdef FMOVD_WORKS
+ fmov.d @r0+,dr4
+#else
+ fmov.s @r0+,DR40
+ fmov.s @r0,DR41
+#endif
+ float fpul,dr0
+ xor r1,r5
+ lds r5,fpul
+ float fpul,dr2
+ fadd dr4,dr0
+ fadd dr4,dr2
+ fdiv dr2,dr0
+ ftrc dr0,fpul
+ rts
+ lds.l @r15+,fpscr
+
+#ifdef FMOVD_WORKS
+ .align 3 ! make double below 8 byte aligned.
+#endif
+trivial:
+ rts
+ lds r4,fpul
+
+ .align 2
+L1:
+#ifndef FMOVD_WORKS
+ .long 0x80000
+#else
+ .long 0x180000
+#endif
+ .double 2147483648
+
+ ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_udivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+ sh2e/sh3e code. */
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+
+!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
+ .global GLOBAL(udivsi3)
+ HIDDEN_FUNC(GLOBAL(udivsi3))
+
+#if __SHMEDIA__
+#if __SH5__ == 32
+ .section .text..SHmedia32,"ax"
+#else
+ .text
+#endif
+ .align 2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+ code that follows. Note that the registers that are modified are
+ exactly those listed as clobbered in the patterns udivsi3_i1 and
+ udivsi3_i1_media.
+
+unsigned
+__udivsi3 (i, j)
+ unsigned i, j;
+{
+ register unsigned long long r0 asm ("r0") = 0;
+ register unsigned long long r18 asm ("r18") = 1;
+ register unsigned long long r4 asm ("r4") = i;
+ register unsigned long long r19 asm ("r19") = j;
+
+ r19 <<= 31;
+ r18 <<= 31;
+ do
+ if (r4 >= r19)
+ r0 |= r18, r4 -= r19;
+ while (r19 >>= 1, r18 >>= 1);
+
+ return r0;
+}
+*/
+GLOBAL(udivsi3):
+ pt/l LOCAL(udivsi3_dontadd), tr2
+ pt/l LOCAL(udivsi3_loop), tr1
+ ptabs/l r18, tr0
+ movi 0, r0
+ movi 1, r18
+ addz.l r5, r63, r19
+ addz.l r4, r63, r4
+ shlli r19, 31, r19
+ shlli r18, 31, r18
+LOCAL(udivsi3_loop):
+ bgtu r19, r4, tr2
+ or r0, r18, r0
+ sub r4, r19, r4
+LOCAL(udivsi3_dontadd):
+ shlri r18, 1, r18
+ shlri r19, 1, r19
+ bnei r18, 0, tr1
+ blink tr0, r63
+#else
+GLOBAL(udivsi3):
+ // inputs: r4,r5
+ // clobbered: r18,r19,r20,r21,r22,r25,tr0
+ // result in r0.
+ addz.l r5,r63,r22
+ nsb r22,r0
+ shlld r22,r0,r25
+ shlri r25,48,r25
+ movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
+ sub r20,r25,r21
+ mmulfx.w r21,r21,r19
+ mshflo.w r21,r63,r21
+ ptabs r18,tr0
+ mmulfx.w r25,r19,r19
+ sub r20,r0,r0
+ /* bubble */
+ msub.w r21,r19,r19
+ addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
+ before the msub.w, but we need a different value for
+ r19 to keep errors under control. */
+ mulu.l r4,r21,r18
+ mmulfx.w r19,r19,r19
+ shlli r21,15,r21
+ shlrd r18,r0,r18
+ mulu.l r18,r22,r20
+ mmacnfx.wl r25,r19,r21
+ /* bubble */
+ sub r4,r20,r25
+
+ mulu.l r25,r21,r19
+ addi r0,14,r0
+ /* bubble */
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ add r18,r19,r18
+ /* bubble */
+ sub.l r25,r20,r25
+
+ mulu.l r25,r21,r19
+ addz.l r25,r63,r25
+ sub r25,r22,r25
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ addi r25,1,r25
+ add r18,r19,r18
+
+ cmpgt r25,r20,r25
+ add.l r18,r25,r0
+ blink tr0,r63
+#endif
+#elif defined (__SHMEDIA__)
+/* m5compact-nofpu - more emphasis on code size than on speed, but don't
+ ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
+ So use a short shmedia loop. */
+ // clobbered: r20,r21,r25,tr0,tr1,tr2
+ .mode SHmedia
+ .section .text..SHmedia32,"ax"
+ .align 2
+GLOBAL(udivsi3):
+ pt/l LOCAL(udivsi3_dontsub), tr0
+ pt/l LOCAL(udivsi3_loop), tr1
+ ptabs/l r18,tr2
+ shlli r5,32,r25
+ addi r25,-1,r21
+ addz.l r4,r63,r20
+LOCAL(udivsi3_loop):
+ shlli r20,1,r20
+ bgeu/u r21,r20,tr0
+ sub r20,r21,r20
+LOCAL(udivsi3_dontsub):
+ addi.l r25,-1,r25
+ bnei r25,-32,tr1
+ add.l r20,r63,r0
+ blink tr2,r63
+#else /* ! defined (__SHMEDIA__) */
+LOCAL(div8):
+ div1 r5,r4
+LOCAL(div7):
+ div1 r5,r4; div1 r5,r4; div1 r5,r4
+ div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
+
+LOCAL(divx4):
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ rts; div1 r5,r4
+
+GLOBAL(udivsi3):
+ sts.l pr,@-r15
+ extu.w r5,r0
+ cmp/eq r5,r0
+#ifdef __sh1__
+ bf LOCAL(large_divisor)
+#else
+ bf/s LOCAL(large_divisor)
+#endif
+ div0u
+ swap.w r4,r0
+ shlr16 r4
+ bsr LOCAL(div8)
+ shll16 r5
+ bsr LOCAL(div7)
+ div1 r5,r4
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(div8)
+ swap.w r4,r4
+ bsr LOCAL(div7)
+ div1 r5,r4
+ lds.l @r15+,pr
+ xtrct r4,r0
+ swap.w r0,r0
+ rotcl r0
+ rts
+ shlr16 r5
+
+LOCAL(large_divisor):
+#ifdef __sh1__
+ div0u
+#endif
+ mov #0,r0
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ lds.l @r15+,pr
+ rts
+ rotcl r0
+
+ ENDFUNC(GLOBAL(udivsi3))
+#endif /* ! __SHMEDIA__ */
+#endif /* __SH4__ */
+#endif /* L_udivsi3 */
+
+#ifdef L_udivdi3
+#ifdef __SHMEDIA__
+ .mode SHmedia
+ .section .text..SHmedia32,"ax"
+ .align 2
+ .global GLOBAL(udivdi3)
+ FUNC(GLOBAL(udivdi3))
+GLOBAL(udivdi3):
+ HIDDEN_ALIAS(udivdi3_internal,udivdi3)
+ shlri r3,1,r4
+ nsb r4,r22
+ shlld r3,r22,r6
+ shlri r6,49,r5
+ movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
+ sub r21,r5,r1
+ mmulfx.w r1,r1,r4
+ mshflo.w r1,r63,r1
+ sub r63,r22,r20 // r63 == 64 % 64
+ mmulfx.w r5,r4,r4
+ pta LOCAL(large_divisor),tr0
+ addi r20,32,r9
+ msub.w r1,r4,r1
+ madd.w r1,r1,r1
+ mmulfx.w r1,r1,r4
+ shlri r6,32,r7
+ bgt/u r9,r63,tr0 // large_divisor
+ mmulfx.w r5,r4,r4
+ shlri r2,32+14,r19
+ addi r22,-31,r0
+ msub.w r1,r4,r1
+
+ mulu.l r1,r7,r4
+ addi r1,-3,r5
+ mulu.l r5,r19,r5
+ sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+ shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+ the case may be, %0000000000000000 000.11111111111, still */
+ muls.l r1,r4,r4 /* leaving at least one sign bit. */
+ mulu.l r5,r3,r8
+ mshalds.l r1,r21,r1
+ shari r4,26,r4
+ shlld r8,r0,r8
+ add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+ sub r2,r8,r2
+ /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
+
+ shlri r2,22,r21
+ mulu.l r21,r1,r21
+ shlld r5,r0,r8
+ addi r20,30-22,r0
+ shlrd r21,r0,r21
+ mulu.l r21,r3,r5
+ add r8,r21,r8
+ mcmpgt.l r21,r63,r21 // See Note 1
+ addi r20,30,r0
+ mshfhi.l r63,r21,r21
+ sub r2,r5,r2
+ andc r2,r21,r2
+
+ /* small divisor: need a third divide step */
+ mulu.l r2,r1,r7
+ ptabs r18,tr0
+ addi r2,1,r2
+ shlrd r7,r0,r7
+ mulu.l r7,r3,r5
+ add r8,r7,r8
+ sub r2,r3,r2
+ cmpgt r2,r5,r5
+ add r8,r5,r2
+ /* could test r3 here to check for divide by zero. */
+ blink tr0,r63
+
+LOCAL(large_divisor):
+ mmulfx.w r5,r4,r4
+ shlrd r2,r9,r25
+ shlri r25,32,r8
+ msub.w r1,r4,r1
+
+ mulu.l r1,r7,r4
+ addi r1,-3,r5
+ mulu.l r5,r8,r5
+ sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+ shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+ the case may be, %0000000000000000 000.11111111111, still */
+ muls.l r1,r4,r4 /* leaving at least one sign bit. */
+ shlri r5,14-1,r8
+ mulu.l r8,r7,r5
+ mshalds.l r1,r21,r1
+ shari r4,26,r4
+ add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+ sub r25,r5,r25
+ /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
+
+ shlri r25,22,r21
+ mulu.l r21,r1,r21
+ pta LOCAL(no_lo_adj),tr0
+ addi r22,32,r0
+ shlri r21,40,r21
+ mulu.l r21,r7,r5
+ add r8,r21,r8
+ shlld r2,r0,r2
+ sub r25,r5,r25
+ bgtu/u r7,r25,tr0 // no_lo_adj
+ addi r8,1,r8
+ sub r25,r7,r25
+LOCAL(no_lo_adj):
+ mextr4 r2,r25,r2
+
+ /* large_divisor: only needs a few adjustments. */
+ mulu.l r8,r6,r5
+ ptabs r18,tr0
+ /* bubble */
+ cmpgtu r5,r2,r5
+ sub r8,r5,r2
+ blink tr0,r63
+ ENDFUNC(GLOBAL(udivdi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+ always fits into 32 bits, yet we still reduce the rest sufficiently
+ would require a lot of instructions to do the shifts just right. Using
+ the full 64 bit shift result to multiply with the divisor would require
+ four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+ Fortunately, if the upper 32 bits of the shift result are nonzero, we
+ know that the rest after taking this partial result into account will
+ fit into 32 bits. So we just clear the upper 32 bits of the rest if the
+ upper 32 bits of the partial result are nonzero. */
+#endif /* __SHMEDIA__ */
+#endif /* L_udivdi3 */
+
+#ifdef L_divdi3
+#ifdef __SHMEDIA__
+ .mode SHmedia
+ .section .text..SHmedia32,"ax"
+ .align 2
+ .global GLOBAL(divdi3)
+ FUNC(GLOBAL(divdi3))
+GLOBAL(divdi3):
+ pta GLOBAL(udivdi3_internal),tr0
+ shari r2,63,r22
+ shari r3,63,r23
+ xor r2,r22,r2
+ xor r3,r23,r3
+ sub r2,r22,r2
+ sub r3,r23,r3
+ beq/u r22,r23,tr0
+ ptabs r18,tr1
+ blink tr0,r18
+ sub r63,r2,r2
+ blink tr1,r63
+ ENDFUNC(GLOBAL(divdi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_divdi3 */
+
+#ifdef L_umoddi3
+#ifdef __SHMEDIA__
+ .mode SHmedia
+ .section .text..SHmedia32,"ax"
+ .align 2
+ .global GLOBAL(umoddi3)
+ FUNC(GLOBAL(umoddi3))
+GLOBAL(umoddi3):
+ HIDDEN_ALIAS(umoddi3_internal,umoddi3)
+ shlri r3,1,r4
+ nsb r4,r22
+ shlld r3,r22,r6
+ shlri r6,49,r5
+ movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
+ sub r21,r5,r1
+ mmulfx.w r1,r1,r4
+ mshflo.w r1,r63,r1
+ sub r63,r22,r20 // r63 == 64 % 64
+ mmulfx.w r5,r4,r4
+ pta LOCAL(large_divisor),tr0
+ addi r20,32,r9
+ msub.w r1,r4,r1
+ madd.w r1,r1,r1
+ mmulfx.w r1,r1,r4
+ shlri r6,32,r7
+ bgt/u r9,r63,tr0 // large_divisor
+ mmulfx.w r5,r4,r4
+ shlri r2,32+14,r19
+ addi r22,-31,r0
+ msub.w r1,r4,r1
+
+ mulu.l r1,r7,r4
+ addi r1,-3,r5
+ mulu.l r5,r19,r5
+ sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+ shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+ the case may be, %0000000000000000 000.11111111111, still */
+ muls.l r1,r4,r4 /* leaving at least one sign bit. */
+ mulu.l r5,r3,r5
+ mshalds.l r1,r21,r1
+ shari r4,26,r4
+ shlld r5,r0,r5
+ add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+ sub r2,r5,r2
+ /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
+
+ shlri r2,22,r21
+ mulu.l r21,r1,r21
+ addi r20,30-22,r0
+ /* bubble */ /* could test r3 here to check for divide by zero. */
+ shlrd r21,r0,r21
+ mulu.l r21,r3,r5
+ mcmpgt.l r21,r63,r21 // See Note 1
+ addi r20,30,r0
+ mshfhi.l r63,r21,r21
+ sub r2,r5,r2
+ andc r2,r21,r2
+
+ /* small divisor: need a third divide step */
+ mulu.l r2,r1,r7
+ ptabs r18,tr0
+ sub r2,r3,r8 /* re-use r8 here for rest - r3 */
+ shlrd r7,r0,r7
+ mulu.l r7,r3,r5
+ /* bubble */
+ addi r8,1,r7
+ cmpgt r7,r5,r7
+ cmvne r7,r8,r2
+ sub r2,r5,r2
+ blink tr0,r63
+
+LOCAL(large_divisor):
+ mmulfx.w r5,r4,r4
+ shlrd r2,r9,r25
+ shlri r25,32,r8
+ msub.w r1,r4,r1
+
+ mulu.l r1,r7,r4
+ addi r1,-3,r5
+ mulu.l r5,r8,r5
+ sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+ shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+ the case may be, %0000000000000000 000.11111111111, still */
+ muls.l r1,r4,r4 /* leaving at least one sign bit. */
+ shlri r5,14-1,r8
+ mulu.l r8,r7,r5
+ mshalds.l r1,r21,r1
+ shari r4,26,r4
+ add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+ sub r25,r5,r25
+ /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
+
+ shlri r25,22,r21
+ mulu.l r21,r1,r21
+ pta LOCAL(no_lo_adj),tr0
+ addi r22,32,r0
+ shlri r21,40,r21
+ mulu.l r21,r7,r5
+ add r8,r21,r8
+ shlld r2,r0,r2
+ sub r25,r5,r25
+ bgtu/u r7,r25,tr0 // no_lo_adj
+ addi r8,1,r8
+ sub r25,r7,r25
+LOCAL(no_lo_adj):
+ mextr4 r2,r25,r2
+
+ /* large_divisor: only needs a few adjustments. */
+ mulu.l r8,r6,r5
+ ptabs r18,tr0
+ add r2,r6,r7
+ cmpgtu r5,r2,r8
+ cmvne r8,r7,r2
+ sub r2,r5,r2
+ shlrd r2,r22,r2
+ blink tr0,r63
+ ENDFUNC(GLOBAL(umoddi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+ always fits into 32 bits, yet we still reduce the rest sufficiently
+ would require a lot of instructions to do the shifts just right. Using
+ the full 64 bit shift result to multiply with the divisor would require
+ four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+ Fortunately, if the upper 32 bits of the shift result are nonzero, we
+ know that the rest after taking this partial result into account will
+ fit into 32 bits. So we just clear the upper 32 bits of the rest if the
+ upper 32 bits of the partial result are nonzero. */
+#endif /* __SHMEDIA__ */
+#endif /* L_umoddi3 */
+
+#ifdef L_moddi3
+#ifdef __SHMEDIA__
+ .mode SHmedia
+ .section .text..SHmedia32,"ax"
+ .align 2
+ .global GLOBAL(moddi3)
+ FUNC(GLOBAL(moddi3))
+GLOBAL(moddi3):
+ pta GLOBAL(umoddi3_internal),tr0
+ shari r2,63,r22
+ shari r3,63,r23
+ xor r2,r22,r2
+ xor r3,r23,r3
+ sub r2,r22,r2
+ sub r3,r23,r3
+ beq/u r22,r63,tr0
+ ptabs r18,tr1
+ blink tr0,r18
+ sub r63,r2,r2
+ blink tr1,r63
+ ENDFUNC(GLOBAL(moddi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_moddi3 */
+
+#ifdef L_set_fpscr
+#if !defined (__SH2A_NOFPU__)
+#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
+#ifdef __SH5__
+ .mode SHcompact
+#endif
+ .global GLOBAL(set_fpscr)
+ HIDDEN_FUNC(GLOBAL(set_fpscr))
+GLOBAL(set_fpscr):
+ lds r4,fpscr
+#ifdef __PIC__
+ mov.l r12,@-r15
+#ifdef __vxworks
+ mov.l LOCAL(set_fpscr_L0_base),r12
+ mov.l LOCAL(set_fpscr_L0_index),r0
+ mov.l @r12,r12
+ mov.l @(r0,r12),r12
+#else
+ mova LOCAL(set_fpscr_L0),r0
+ mov.l LOCAL(set_fpscr_L0),r12
+ add r0,r12
+#endif
+ mov.l LOCAL(set_fpscr_L1),r0
+ mov.l @(r0,r12),r1
+ mov.l @r15+,r12
+#else
+ mov.l LOCAL(set_fpscr_L1),r1
+#endif
+ swap.w r4,r0
+ or #24,r0
+#ifndef FMOVD_WORKS
+ xor #16,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+ swap.w r0,r3
+ mov.l r3,@(4,r1)
+#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+ swap.w r0,r2
+ mov.l r2,@r1
+#endif
+#ifndef FMOVD_WORKS
+ xor #8,r0
+#else
+ xor #24,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+ swap.w r0,r2
+ rts
+ mov.l r2,@r1
+#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+ swap.w r0,r3
+ rts
+ mov.l r3,@(4,r1)
+#endif
+ .align 2
+#ifdef __PIC__
+#ifdef __vxworks
+LOCAL(set_fpscr_L0_base):
+ .long ___GOTT_BASE__
+LOCAL(set_fpscr_L0_index):
+ .long ___GOTT_INDEX__
+#else
+LOCAL(set_fpscr_L0):
+ .long _GLOBAL_OFFSET_TABLE_
+#endif
+LOCAL(set_fpscr_L1):
+ .long GLOBAL(fpscr_values@GOT)
+#else
+LOCAL(set_fpscr_L1):
+ .long GLOBAL(fpscr_values)
+#endif
+
+ ENDFUNC(GLOBAL(set_fpscr))
+#ifndef NO_FPSCR_VALUES
+#ifdef __ELF__
+ .comm GLOBAL(fpscr_values),8,4
+#else
+ .comm GLOBAL(fpscr_values),8
+#endif /* ELF */
+#endif /* NO_FPSCR_VALUES */
+#endif /* SH2E / SH3E / SH4 */
+#endif /* __SH2A_NOFPU__ */
+#endif /* L_set_fpscr */
+#ifdef L_ic_invalidate
+#if __SH5__ == 32
+ .mode SHmedia
+ .section .text..SHmedia32,"ax"
+ .align 2
+ .global GLOBAL(init_trampoline)
+ HIDDEN_FUNC(GLOBAL(init_trampoline))
+GLOBAL(init_trampoline):
+ st.l r0,8,r2
+#ifdef __LITTLE_ENDIAN__
+ movi 9,r20
+ shori 0x402b,r20
+ shori 0xd101,r20
+ shori 0xd002,r20
+#else
+ movi 0xffffffffffffd002,r20
+ shori 0xd101,r20
+ shori 0x402b,r20
+ shori 9,r20
+#endif
+ st.q r0,0,r20
+ st.l r0,12,r3
+ ENDFUNC(GLOBAL(init_trampoline))
+ .global GLOBAL(ic_invalidate)
+ HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+ ocbwb r0,0
+ synco
+ icbi r0, 0
+ ptabs r18, tr0
+ synci
+ blink tr0, r63
+ ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4A__)
+ .global GLOBAL(ic_invalidate)
+ HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+ ocbwb @r4
+ synco
+ icbi @r4
+ rts
+ nop
+ ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+ /* For system code, we use ic_invalidate_line_i, but user code
+ needs a different mechanism. A kernel call is generally not
+ available, and it would also be slow. Different SH4 variants use
+ different sizes and associativities of the Icache. We use a small
+ bit of dispatch code that can be put hidden in every shared object,
+ which calls the actual processor-specific invalidation code in a
+ separate module.
+ Or if you have operating system support, the OS could mmap the
+ procesor-specific code from a single page, since it is highly
+ repetitive. */
+ .global GLOBAL(ic_invalidate)
+ HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+#ifdef __pic__
+#ifdef __vxworks
+ mov.l 1f,r1
+ mov.l 2f,r0
+ mov.l @r1,r1
+ mov.l 0f,r2
+ mov.l @(r0,r1),r0
+#else
+ mov.l 1f,r1
+ mova 1f,r0
+ mov.l 0f,r2
+ add r1,r0
+#endif
+ mov.l @(r0,r2),r1
+#else
+ mov.l 0f,r1
+#endif
+ ocbwb @r4
+ mov.l @(8,r1),r0
+ sub r1,r4
+ and r4,r0
+ add r1,r0
+ jmp @r0
+ mov.l @(4,r1),r0
+ .align 2
+#ifndef __pic__
+0: .long GLOBAL(ic_invalidate_array)
+#else /* __pic__ */
+ .global GLOBAL(ic_invalidate_array)
+0: .long GLOBAL(ic_invalidate_array)@GOT
+#ifdef __vxworks
+1: .long ___GOTT_BASE__
+2: .long ___GOTT_INDEX__
+#else
+1: .long _GLOBAL_OFFSET_TABLE_
+#endif
+ ENDFUNC(GLOBAL(ic_invalidate))
+#endif /* __pic__ */
+#endif /* SH4 */
+#endif /* L_ic_invalidate */
+
+#ifdef L_ic_invalidate_array
+#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
+ .global GLOBAL(ic_invalidate_array)
+ /* This is needed when an SH4 dso with trampolines is used on SH4A. */
+ .global GLOBAL(ic_invalidate_array)
+ FUNC(GLOBAL(ic_invalidate_array))
+GLOBAL(ic_invalidate_array):
+ add r1,r4
+ synco
+ icbi @r4
+ rts
+ nop
+ .align 2
+ .long 0
+ ENDFUNC(GLOBAL(ic_invalidate_array))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+ .global GLOBAL(ic_invalidate_array)
+ .p2align 5
+ FUNC(GLOBAL(ic_invalidate_array))
+/* This must be aligned to the beginning of a cache line. */
+GLOBAL(ic_invalidate_array):
+#ifndef WAYS
+#define WAYS 4
+#define WAY_SIZE 0x4000
+#endif
+#if WAYS == 1
+ .rept WAY_SIZE * WAYS / 32
+ rts
+ nop
+ .rept 7
+ .long WAY_SIZE - 32
+ .endr
+ .endr
+#elif WAYS <= 6
+ .rept WAY_SIZE * WAYS / 32
+ braf r0
+ add #-8,r0
+ .long WAY_SIZE + 8
+ .long WAY_SIZE - 32
+ .rept WAYS-2
+ braf r0
+ nop
+ .endr
+ .rept 7 - WAYS
+ rts
+ nop
+ .endr
+ .endr
+#else /* WAYS > 6 */
+ /* This variant needs two different pages for mmap-ing. */
+ .rept WAYS-1
+ .rept WAY_SIZE / 32
+ braf r0
+ nop
+ .long WAY_SIZE
+ .rept 6
+ .long WAY_SIZE - 32
+ .endr
+ .endr
+ .endr
+ .rept WAY_SIZE / 32
+ rts
+ .rept 15
+ nop
+ .endr
+ .endr
+#endif /* WAYS */
+ ENDFUNC(GLOBAL(ic_invalidate_array))
+#endif /* SH4 */
+#endif /* L_ic_invalidate_array */
+
+#if defined (__SH5__) && __SH5__ == 32
+#ifdef L_shcompact_call_trampoline
+ .section .rodata
+ .align 1
+LOCAL(ct_main_table):
+.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
+.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
+ .mode SHmedia
+ .section .text..SHmedia32, "ax"
+ .align 2
+
+ /* This function loads 64-bit general-purpose registers from the
+ stack, from a memory address contained in them or from an FP
+ register, according to a cookie passed in r1. Its execution
+ time is linear on the number of registers that actually have
+ to be copied. See sh.h for details on the actual bit pattern.
+
+ The function to be called is passed in r0. If a 32-bit return
+ value is expected, the actual function will be tail-called,
+ otherwise the return address will be stored in r10 (that the
+ caller should expect to be clobbered) and the return value
+ will be expanded into r2/r3 upon return. */
+
+ .global GLOBAL(GCC_shcompact_call_trampoline)
+ FUNC(GLOBAL(GCC_shcompact_call_trampoline))
+GLOBAL(GCC_shcompact_call_trampoline):
+ ptabs/l r0, tr0 /* Prepare to call the actual function. */
+ movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
+ pt/l LOCAL(ct_loop), tr1
+ addz.l r1, r63, r1
+ shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
+LOCAL(ct_loop):
+ nsb r1, r28
+ shlli r28, 1, r29
+ ldx.w r0, r29, r30
+LOCAL(ct_main_label):
+ ptrel/l r30, tr2
+ blink tr2, r63
+LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
+ /* It must be dr0, so just do it. */
+ fmov.dq dr0, r2
+ movi 7, r30
+ shlli r30, 29, r31
+ andc r1, r31, r1
+ blink tr1, r63
+LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
+ /* It is either dr0 or dr2. */
+ movi 7, r30
+ shlri r1, 26, r32
+ shlli r30, 26, r31
+ andc r1, r31, r1
+ fmov.dq dr0, r3
+ beqi/l r32, 4, tr1
+ fmov.dq dr2, r3
+ blink tr1, r63
+LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
+ shlri r1, 23 - 3, r34
+ andi r34, 3 << 3, r33
+ addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
+LOCAL(ct_r4_fp_base):
+ ptrel/l r32, tr2
+ movi 7, r30
+ shlli r30, 23, r31
+ andc r1, r31, r1
+ blink tr2, r63
+LOCAL(ct_r4_fp_copy):
+ fmov.dq dr0, r4
+ blink tr1, r63
+ fmov.dq dr2, r4
+ blink tr1, r63
+ fmov.dq dr4, r4
+ blink tr1, r63
+LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
+ shlri r1, 20 - 3, r34
+ andi r34, 3 << 3, r33
+ addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
+LOCAL(ct_r5_fp_base):
+ ptrel/l r32, tr2
+ movi 7, r30
+ shlli r30, 20, r31
+ andc r1, r31, r1
+ blink tr2, r63
+LOCAL(ct_r5_fp_copy):
+ fmov.dq dr0, r5
+ blink tr1, r63
+ fmov.dq dr2, r5
+ blink tr1, r63
+ fmov.dq dr4, r5
+ blink tr1, r63
+ fmov.dq dr6, r5
+ blink tr1, r63
+LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
+ /* It must be dr8. */
+ fmov.dq dr8, r6
+ movi 15, r30
+ shlli r30, 16, r31
+ andc r1, r31, r1
+ blink tr1, r63
+LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
+ shlri r1, 16 - 3, r34
+ andi r34, 3 << 3, r33
+ addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
+LOCAL(ct_r6_fp_base):
+ ptrel/l r32, tr2
+ movi 7, r30
+ shlli r30, 16, r31
+ andc r1, r31, r1
+ blink tr2, r63
+LOCAL(ct_r6_fp_copy):
+ fmov.dq dr0, r6
+ blink tr1, r63
+ fmov.dq dr2, r6
+ blink tr1, r63
+ fmov.dq dr4, r6
+ blink tr1, r63
+ fmov.dq dr6, r6
+ blink tr1, r63
+LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
+ /* It is either dr8 or dr10. */
+ movi 15 << 12, r31
+ shlri r1, 12, r32
+ andc r1, r31, r1
+ fmov.dq dr8, r7
+ beqi/l r32, 8, tr1
+ fmov.dq dr10, r7
+ blink tr1, r63
+LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
+ shlri r1, 12 - 3, r34
+ andi r34, 3 << 3, r33
+ addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
+LOCAL(ct_r7_fp_base):
+ ptrel/l r32, tr2
+ movi 7 << 12, r31
+ andc r1, r31, r1
+ blink tr2, r63
+LOCAL(ct_r7_fp_copy):
+ fmov.dq dr0, r7
+ blink tr1, r63
+ fmov.dq dr2, r7
+ blink tr1, r63
+ fmov.dq dr4, r7
+ blink tr1, r63
+ fmov.dq dr6, r7
+ blink tr1, r63
+LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
+ /* It is either dr8 or dr10. */
+ movi 15 << 8, r31
+ andi r1, 1 << 8, r32
+ andc r1, r31, r1
+ fmov.dq dr8, r8
+ beq/l r32, r63, tr1
+ fmov.dq dr10, r8
+ blink tr1, r63
+LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
+ shlri r1, 8 - 3, r34
+ andi r34, 3 << 3, r33
+ addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
+LOCAL(ct_r8_fp_base):
+ ptrel/l r32, tr2
+ movi 7 << 8, r31
+ andc r1, r31, r1
+ blink tr2, r63
+LOCAL(ct_r8_fp_copy):
+ fmov.dq dr0, r8
+ blink tr1, r63
+ fmov.dq dr2, r8
+ blink tr1, r63
+ fmov.dq dr4, r8
+ blink tr1, r63
+ fmov.dq dr6, r8
+ blink tr1, r63
+LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
+ /* It is either dr8 or dr10. */
+ movi 15 << 4, r31
+ andi r1, 1 << 4, r32
+ andc r1, r31, r1
+ fmov.dq dr8, r9
+ beq/l r32, r63, tr1
+ fmov.dq dr10, r9
+ blink tr1, r63
+LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
+ shlri r1, 4 - 3, r34
+ andi r34, 3 << 3, r33
+ addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
+LOCAL(ct_r9_fp_base):
+ ptrel/l r32, tr2
+ movi 7 << 4, r31
+ andc r1, r31, r1
+ blink tr2, r63
+LOCAL(ct_r9_fp_copy):
+ fmov.dq dr0, r9
+ blink tr1, r63
+ fmov.dq dr2, r9
+ blink tr1, r63
+ fmov.dq dr4, r9
+ blink tr1, r63
+ fmov.dq dr6, r9
+ blink tr1, r63
+LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
+ pt/l LOCAL(ct_r2_load), tr2
+ movi 3, r30
+ shlli r30, 29, r31
+ and r1, r31, r32
+ andc r1, r31, r1
+ beq/l r31, r32, tr2
+ addi.l r2, 8, r3
+ ldx.q r2, r63, r2
+ /* Fall through. */
+LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
+ pt/l LOCAL(ct_r3_load), tr2
+ movi 3, r30
+ shlli r30, 26, r31
+ and r1, r31, r32
+ andc r1, r31, r1
+ beq/l r31, r32, tr2
+ addi.l r3, 8, r4
+ ldx.q r3, r63, r3
+LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
+ pt/l LOCAL(ct_r4_load), tr2
+ movi 3, r30
+ shlli r30, 23, r31
+ and r1, r31, r32
+ andc r1, r31, r1
+ beq/l r31, r32, tr2
+ addi.l r4, 8, r5
+ ldx.q r4, r63, r4
+LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
+ pt/l LOCAL(ct_r5_load), tr2
+ movi 3, r30
+ shlli r30, 20, r31
+ and r1, r31, r32
+ andc r1, r31, r1
+ beq/l r31, r32, tr2
+ addi.l r5, 8, r6
+ ldx.q r5, r63, r5
+LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
+ pt/l LOCAL(ct_r6_load), tr2
+ movi 3 << 16, r31
+ and r1, r31, r32
+ andc r1, r31, r1
+ beq/l r31, r32, tr2
+ addi.l r6, 8, r7
+ ldx.q r6, r63, r6
+LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
+ pt/l LOCAL(ct_r7_load), tr2
+ movi 3 << 12, r31
+ and r1, r31, r32
+ andc r1, r31, r1
+ beq/l r31, r32, tr2
+ addi.l r7, 8, r8
+ ldx.q r7, r63, r7
+LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
+ pt/l LOCAL(ct_r8_load), tr2
+ movi 3 << 8, r31
+ and r1, r31, r32
+ andc r1, r31, r1
+ beq/l r31, r32, tr2
+ addi.l r8, 8, r9
+ ldx.q r8, r63, r8
+LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
+ pt/l LOCAL(ct_check_tramp), tr2
+ ldx.q r9, r63, r9
+ blink tr2, r63
+LOCAL(ct_r2_load):
+ ldx.q r2, r63, r2
+ blink tr1, r63
+LOCAL(ct_r3_load):
+ ldx.q r3, r63, r3
+ blink tr1, r63
+LOCAL(ct_r4_load):
+ ldx.q r4, r63, r4
+ blink tr1, r63
+LOCAL(ct_r5_load):
+ ldx.q r5, r63, r5
+ blink tr1, r63
+LOCAL(ct_r6_load):
+ ldx.q r6, r63, r6
+ blink tr1, r63
+LOCAL(ct_r7_load):
+ ldx.q r7, r63, r7
+ blink tr1, r63
+LOCAL(ct_r8_load):
+ ldx.q r8, r63, r8
+ blink tr1, r63
+LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
+ movi 1, r30
+ ldx.q r15, r63, r2
+ shlli r30, 29, r31
+ addi.l r15, 8, r15
+ andc r1, r31, r1
+ blink tr1, r63
+LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
+ movi 1, r30
+ ldx.q r15, r63, r3
+ shlli r30, 26, r31
+ addi.l r15, 8, r15
+ andc r1, r31, r1
+ blink tr1, r63
+LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
+ movi 1, r30
+ ldx.q r15, r63, r4
+ shlli r30, 23, r31
+ addi.l r15, 8, r15
+ andc r1, r31, r1
+ blink tr1, r63
+LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
+ movi 1, r30
+ ldx.q r15, r63, r5
+ shlli r30, 20, r31
+ addi.l r15, 8, r15
+ andc r1, r31, r1
+ blink tr1, r63
+LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
+ movi 1, r30
+ ldx.q r15, r63, r6
+ shlli r30, 16, r31
+ addi.l r15, 8, r15
+ andc r1, r31, r1
+ blink tr1, r63
+LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
+ ldx.q r15, r63, r7
+ movi 1 << 12, r31
+ addi.l r15, 8, r15
+ andc r1, r31, r1
+ blink tr1, r63
+LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
+ ldx.q r15, r63, r8
+ movi 1 << 8, r31
+ addi.l r15, 8, r15
+ andc r1, r31, r1
+ blink tr1, r63
+LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
+ andi r1, 7 << 1, r30
+ movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
+ shlli r30, 2, r31
+ shori LOCAL(ct_end_of_pop_seq) & 65535, r32
+ sub.l r32, r31, r33
+ ptabs/l r33, tr2
+ blink tr2, r63
+LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
+ ldx.q r15, r63, r3
+ addi.l r15, 8, r15
+ ldx.q r15, r63, r4
+ addi.l r15, 8, r15
+ ldx.q r15, r63, r5
+ addi.l r15, 8, r15
+ ldx.q r15, r63, r6
+ addi.l r15, 8, r15
+ ldx.q r15, r63, r7
+ addi.l r15, 8, r15
+ ldx.q r15, r63, r8
+ addi.l r15, 8, r15
+LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
+ ldx.q r15, r63, r9
+ addi.l r15, 8, r15
+LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
+LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
+ pt/u LOCAL(ct_ret_wide), tr2
+ andi r1, 1, r1
+ bne/u r1, r63, tr2
+LOCAL(ct_call_func): /* Just branch to the function. */
+ blink tr0, r63
+LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
+ 64-bit return value. */
+ add.l r18, r63, r10
+ blink tr0, r18
+ ptabs r10, tr0
+#if __LITTLE_ENDIAN__
+ shari r2, 32, r3
+ add.l r2, r63, r2
+#else
+ add.l r2, r63, r3
+ shari r2, 32, r2
+#endif
+ blink tr0, r63
+
+ ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
+#endif /* L_shcompact_call_trampoline */
+
+#ifdef L_shcompact_return_trampoline
+ /* This function does the converse of the code in `ret_wide'
+ above. It is tail-called by SHcompact functions returning
+ 64-bit non-floating-point values, to pack the 32-bit values in
+ r2 and r3 into r2. */
+
+ .mode SHmedia
+ .section .text..SHmedia32, "ax"
+ .align 2
+ .global GLOBAL(GCC_shcompact_return_trampoline)
+ HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
+GLOBAL(GCC_shcompact_return_trampoline):
+ ptabs/l r18, tr0
+#if __LITTLE_ENDIAN__
+ addz.l r2, r63, r2
+ shlli r3, 32, r3
+#else
+ addz.l r3, r63, r3
+ shlli r2, 32, r2
+#endif
+ or r3, r2, r2
+ blink tr0, r63
+
+ ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
+#endif /* L_shcompact_return_trampoline */
+
+#ifdef L_shcompact_incoming_args
+ .section .rodata
+ .align 1
+LOCAL(ia_main_table):
+.word 1 /* Invalid, just loop */
+.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
+.word 1 /* Invalid, just loop */
+.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
+.word 1 /* Invalid, just loop */
+.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
+.word 1 /* Invalid, just loop */
+.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
+.word 1 /* Invalid, just loop */
+.word 1 /* Invalid, just loop */
+.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
+.word 1 /* Invalid, just loop */
+.word 1 /* Invalid, just loop */
+.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
+.word 1 /* Invalid, just loop */
+.word 1 /* Invalid, just loop */
+.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
+.word 1 /* Invalid, just loop */
+.word 1 /* Invalid, just loop */
+.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+ .mode SHmedia
+ .section .text..SHmedia32, "ax"
+ .align 2
+
+ /* This function stores 64-bit general-purpose registers back in
+ the stack, and loads the address in which each register
+ was stored into itself. The lower 32 bits of r17 hold the address
+ to begin storing, and the upper 32 bits of r17 hold the cookie.
+ Its execution time is linear on the
+ number of registers that actually have to be copied, and it is
+ optimized for structures larger than 64 bits, as opposed to
+ individual `long long' arguments. See sh.h for details on the
+ actual bit pattern. */
+
+ .global GLOBAL(GCC_shcompact_incoming_args)
+ FUNC(GLOBAL(GCC_shcompact_incoming_args))
+GLOBAL(GCC_shcompact_incoming_args):
+ ptabs/l r18, tr0 /* Prepare to return. */
+ shlri r17, 32, r0 /* Load the cookie. */
+ movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
+ pt/l LOCAL(ia_loop), tr1
+ add.l r17, r63, r17
+ shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
+LOCAL(ia_loop):
+ nsb r0, r36
+ shlli r36, 1, r37
+ ldx.w r43, r37, r38
+LOCAL(ia_main_label):
+ ptrel/l r38, tr2
+ blink tr2, r63
+LOCAL(ia_r2_ld): /* Store r2 and load its address. */
+ movi 3, r38
+ shlli r38, 29, r39
+ and r0, r39, r40
+ andc r0, r39, r0
+ stx.q r17, r63, r2
+ add.l r17, r63, r2
+ addi.l r17, 8, r17
+ beq/u r39, r40, tr1
+LOCAL(ia_r3_ld): /* Store r3 and load its address. */
+ movi 3, r38
+ shlli r38, 26, r39
+ and r0, r39, r40
+ andc r0, r39, r0
+ stx.q r17, r63, r3
+ add.l r17, r63, r3
+ addi.l r17, 8, r17
+ beq/u r39, r40, tr1
+LOCAL(ia_r4_ld): /* Store r4 and load its address. */
+ movi 3, r38
+ shlli r38, 23, r39
+ and r0, r39, r40
+ andc r0, r39, r0
+ stx.q r17, r63, r4
+ add.l r17, r63, r4
+ addi.l r17, 8, r17
+ beq/u r39, r40, tr1
+LOCAL(ia_r5_ld): /* Store r5 and load its address. */
+ movi 3, r38
+ shlli r38, 20, r39
+ and r0, r39, r40
+ andc r0, r39, r0
+ stx.q r17, r63, r5
+ add.l r17, r63, r5
+ addi.l r17, 8, r17
+ beq/u r39, r40, tr1
+LOCAL(ia_r6_ld): /* Store r6 and load its address. */
+ movi 3, r38
+ shlli r38, 16, r39
+ and r0, r39, r40
+ andc r0, r39, r0
+ stx.q r17, r63, r6
+ add.l r17, r63, r6
+ addi.l r17, 8, r17
+ beq/u r39, r40, tr1
+LOCAL(ia_r7_ld): /* Store r7 and load its address. */
+ movi 3 << 12, r39
+ and r0, r39, r40
+ andc r0, r39, r0
+ stx.q r17, r63, r7
+ add.l r17, r63, r7
+ addi.l r17, 8, r17
+ beq/u r39, r40, tr1
+LOCAL(ia_r8_ld): /* Store r8 and load its address. */
+ movi 3 << 8, r39
+ and r0, r39, r40
+ andc r0, r39, r0
+ stx.q r17, r63, r8
+ add.l r17, r63, r8
+ addi.l r17, 8, r17
+ beq/u r39, r40, tr1
+LOCAL(ia_r9_ld): /* Store r9 and load its address. */
+ stx.q r17, r63, r9
+ add.l r17, r63, r9
+ blink tr0, r63
+LOCAL(ia_r2_push): /* Push r2 onto the stack. */
+ movi 1, r38
+ shlli r38, 29, r39
+ andc r0, r39, r0
+ stx.q r17, r63, r2
+ addi.l r17, 8, r17
+ blink tr1, r63
+LOCAL(ia_r3_push): /* Push r3 onto the stack. */
+ movi 1, r38
+ shlli r38, 26, r39
+ andc r0, r39, r0
+ stx.q r17, r63, r3
+ addi.l r17, 8, r17
+ blink tr1, r63
+LOCAL(ia_r4_push): /* Push r4 onto the stack. */
+ movi 1, r38
+ shlli r38, 23, r39
+ andc r0, r39, r0
+ stx.q r17, r63, r4
+ addi.l r17, 8, r17
+ blink tr1, r63
+LOCAL(ia_r5_push): /* Push r5 onto the stack. */
+ movi 1, r38
+ shlli r38, 20, r39
+ andc r0, r39, r0
+ stx.q r17, r63, r5
+ addi.l r17, 8, r17
+ blink tr1, r63
+LOCAL(ia_r6_push): /* Push r6 onto the stack. */
+ movi 1, r38
+ shlli r38, 16, r39
+ andc r0, r39, r0
+ stx.q r17, r63, r6
+ addi.l r17, 8, r17
+ blink tr1, r63
+LOCAL(ia_r7_push): /* Push r7 onto the stack. */
+ movi 1 << 12, r39
+ andc r0, r39, r0
+ stx.q r17, r63, r7
+ addi.l r17, 8, r17
+ blink tr1, r63
+LOCAL(ia_r8_push): /* Push r8 onto the stack. */
+ movi 1 << 8, r39
+ andc r0, r39, r0
+ stx.q r17, r63, r8
+ addi.l r17, 8, r17
+ blink tr1, r63
+LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
+ andi r0, 7 << 1, r38
+ movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
+ shlli r38, 2, r39
+ shori LOCAL(ia_end_of_push_seq) & 65535, r40
+ sub.l r40, r39, r41
+ ptabs/l r41, tr2
+ blink tr2, r63
+LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
+ stx.q r17, r63, r3
+ addi.l r17, 8, r17
+ stx.q r17, r63, r4
+ addi.l r17, 8, r17
+ stx.q r17, r63, r5
+ addi.l r17, 8, r17
+ stx.q r17, r63, r6
+ addi.l r17, 8, r17
+ stx.q r17, r63, r7
+ addi.l r17, 8, r17
+ stx.q r17, r63, r8
+ addi.l r17, 8, r17
+LOCAL(ia_r9_push): /* Push r9 onto the stack. */
+ stx.q r17, r63, r9
+LOCAL(ia_return): /* Return. */
+ blink tr0, r63
+LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
+ ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
+#endif /* L_shcompact_incoming_args */
+#endif
+#if __SH5__
+#ifdef L_nested_trampoline
+#if __SH5__ == 32
+ .section .text..SHmedia32,"ax"
+#else
+ .text
+#endif
+ .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
+ .global GLOBAL(GCC_nested_trampoline)
+ HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
+GLOBAL(GCC_nested_trampoline):
+ .mode SHmedia
+ ptrel/u r63, tr0
+ gettr tr0, r0
+#if __SH5__ == 64
+ ld.q r0, 24, r1
+#else
+ ld.l r0, 24, r1
+#endif
+ ptabs/l r1, tr1
+#if __SH5__ == 64
+ ld.q r0, 32, r1
+#else
+ ld.l r0, 28, r1
+#endif
+ blink tr1, r63
+
+ ENDFUNC(GLOBAL(GCC_nested_trampoline))
+#endif /* L_nested_trampoline */
+#endif /* __SH5__ */
+#if __SH5__ == 32
+#ifdef L_push_pop_shmedia_regs
+ .section .text..SHmedia32,"ax"
+ .mode SHmedia
+ .align 2
+#ifndef __SH4_NOFPU__
+ .global GLOBAL(GCC_push_shmedia_regs)
+ FUNC(GLOBAL(GCC_push_shmedia_regs))
+GLOBAL(GCC_push_shmedia_regs):
+ addi.l r15, -14*8, r15
+ fst.d r15, 13*8, dr62
+ fst.d r15, 12*8, dr60
+ fst.d r15, 11*8, dr58
+ fst.d r15, 10*8, dr56
+ fst.d r15, 9*8, dr54
+ fst.d r15, 8*8, dr52
+ fst.d r15, 7*8, dr50
+ fst.d r15, 6*8, dr48
+ fst.d r15, 5*8, dr46
+ fst.d r15, 4*8, dr44
+ fst.d r15, 3*8, dr42
+ fst.d r15, 2*8, dr40
+ fst.d r15, 1*8, dr38
+ fst.d r15, 0*8, dr36
+#else /* ! __SH4_NOFPU__ */
+ .global GLOBAL(GCC_push_shmedia_regs_nofpu)
+ FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+GLOBAL(GCC_push_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__ */
+ ptabs/l r18, tr0
+ addi.l r15, -27*8, r15
+ gettr tr7, r62
+ gettr tr6, r61
+ gettr tr5, r60
+ st.q r15, 26*8, r62
+ st.q r15, 25*8, r61
+ st.q r15, 24*8, r60
+ st.q r15, 23*8, r59
+ st.q r15, 22*8, r58
+ st.q r15, 21*8, r57
+ st.q r15, 20*8, r56
+ st.q r15, 19*8, r55
+ st.q r15, 18*8, r54
+ st.q r15, 17*8, r53
+ st.q r15, 16*8, r52
+ st.q r15, 15*8, r51
+ st.q r15, 14*8, r50
+ st.q r15, 13*8, r49
+ st.q r15, 12*8, r48
+ st.q r15, 11*8, r47
+ st.q r15, 10*8, r46
+ st.q r15, 9*8, r45
+ st.q r15, 8*8, r44
+ st.q r15, 7*8, r35
+ st.q r15, 6*8, r34
+ st.q r15, 5*8, r33
+ st.q r15, 4*8, r32
+ st.q r15, 3*8, r31
+ st.q r15, 2*8, r30
+ st.q r15, 1*8, r29
+ st.q r15, 0*8, r28
+ blink tr0, r63
+#ifndef __SH4_NOFPU__
+ ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
+#else
+ ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+#endif
+#ifndef __SH4_NOFPU__
+ .global GLOBAL(GCC_pop_shmedia_regs)
+ FUNC(GLOBAL(GCC_pop_shmedia_regs))
+GLOBAL(GCC_pop_shmedia_regs):
+ pt .L0, tr1
+ movi 41*8, r0
+ fld.d r15, 40*8, dr62
+ fld.d r15, 39*8, dr60
+ fld.d r15, 38*8, dr58
+ fld.d r15, 37*8, dr56
+ fld.d r15, 36*8, dr54
+ fld.d r15, 35*8, dr52
+ fld.d r15, 34*8, dr50
+ fld.d r15, 33*8, dr48
+ fld.d r15, 32*8, dr46
+ fld.d r15, 31*8, dr44
+ fld.d r15, 30*8, dr42
+ fld.d r15, 29*8, dr40
+ fld.d r15, 28*8, dr38
+ fld.d r15, 27*8, dr36
+ blink tr1, r63
+#else /* ! __SH4_NOFPU__ */
+ .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
+ FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+GLOBAL(GCC_pop_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__ */
+ movi 27*8, r0
+.L0:
+ ptabs r18, tr0
+ ld.q r15, 26*8, r62
+ ld.q r15, 25*8, r61
+ ld.q r15, 24*8, r60
+ ptabs r62, tr7
+ ptabs r61, tr6
+ ptabs r60, tr5
+ ld.q r15, 23*8, r59
+ ld.q r15, 22*8, r58
+ ld.q r15, 21*8, r57
+ ld.q r15, 20*8, r56
+ ld.q r15, 19*8, r55
+ ld.q r15, 18*8, r54
+ ld.q r15, 17*8, r53
+ ld.q r15, 16*8, r52
+ ld.q r15, 15*8, r51
+ ld.q r15, 14*8, r50
+ ld.q r15, 13*8, r49
+ ld.q r15, 12*8, r48
+ ld.q r15, 11*8, r47
+ ld.q r15, 10*8, r46
+ ld.q r15, 9*8, r45
+ ld.q r15, 8*8, r44
+ ld.q r15, 7*8, r35
+ ld.q r15, 6*8, r34
+ ld.q r15, 5*8, r33
+ ld.q r15, 4*8, r32
+ ld.q r15, 3*8, r31
+ ld.q r15, 2*8, r30
+ ld.q r15, 1*8, r29
+ ld.q r15, 0*8, r28
+ add.l r15, r0, r15
+ blink tr0, r63
+
+#ifndef __SH4_NOFPU__
+ ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
+#else
+ ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+#endif
+#endif /* __SH5__ == 32 */
+#endif /* L_push_pop_shmedia_regs */
+
+#ifdef L_div_table
+#if __SH5__
+#if defined(__pic__) && defined(__SHMEDIA__)
+ .global GLOBAL(sdivsi3)
+ FUNC(GLOBAL(sdivsi3))
+#if __SH5__ == 32
+ .section .text..SHmedia32,"ax"
+#else
+ .text
+#endif
+#if 0
+/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
+ in a text section does not work (at least for shared libraries):
+ the linker sets the LSB of the address as if this was SHmedia code. */
+#define TEXT_DATA_BUG
+#endif
+ .align 2
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+ .global GLOBAL(sdivsi3)
+GLOBAL(sdivsi3):
+#ifdef TEXT_DATA_BUG
+ ptb datalabel Local_div_table,tr0
+#else
+ ptb GLOBAL(div_table_internal),tr0
+#endif
+ nsb r5, r1
+ shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
+ /* bubble */
+ gettr tr0,r20
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25 // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21 // s2.14
+ ptabs r18, tr0
+ shari r19, 24, r19 // truncate to s2.14
+ sub r21, r19, r19 // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+ sub r63, r1, r1
+ addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19 // multiply by two and convert to s2.58
+ /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18 // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0 // s2.60
+ muls.l r18, r4, r25 // s32.30
+ /* bubble */
+ shari r0, 16, r19 // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+ shari r25, 63, r0
+ shari r4, 14, r18 // s19.-14
+ shari r19, 30, r19 // s-16.44
+ muls.l r19, r18, r19 // s15.30
+ xor r21, r0, r21 // You could also use the constant 1 << 27.
+ add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+ ENDFUNC(GLOBAL(sdivsi3))
+/* This table has been generated by divtab.c .
+Defects for bias -330:
+ Max defect: 6.081536e-07 at -1.000000e+00
+ Min defect: 2.849516e-08 at 1.030651e+00
+ Max 2nd step defect: 9.606539e-12 at -1.000000e+00
+ Min 2nd step defect: 0.000000e+00 at 0.000000e+00
+ Defect at 1: 1.238659e-07
+ Defect at -2: 1.061708e-07 */
+#else /* ! __pic__ || ! __SHMEDIA__ */
+ .section .rodata
+#endif /* __pic__ */
+#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
+ .balign 2
+ .type Local_div_table,@object
+ .size Local_div_table,128
+/* negative division constants */
+ .word -16638
+ .word -17135
+ .word -17737
+ .word -18433
+ .word -19103
+ .word -19751
+ .word -20583
+ .word -21383
+ .word -22343
+ .word -23353
+ .word -24407
+ .word -25582
+ .word -26863
+ .word -28382
+ .word -29965
+ .word -31800
+/* negative division factors */
+ .byte 66
+ .byte 70
+ .byte 75
+ .byte 81
+ .byte 87
+ .byte 93
+ .byte 101
+ .byte 109
+ .byte 119
+ .byte 130
+ .byte 142
+ .byte 156
+ .byte 172
+ .byte 192
+ .byte 214
+ .byte 241
+ .skip 16
+Local_div_table:
+ .skip 16
+/* positive division factors */
+ .byte 241
+ .byte 214
+ .byte 192
+ .byte 172
+ .byte 156
+ .byte 142
+ .byte 130
+ .byte 119
+ .byte 109
+ .byte 101
+ .byte 93
+ .byte 87
+ .byte 81
+ .byte 75
+ .byte 70
+ .byte 66
+/* positive division constants */
+ .word 31801
+ .word 29966
+ .word 28383
+ .word 26864
+ .word 25583
+ .word 24408
+ .word 23354
+ .word 22344
+ .word 21384
+ .word 20584
+ .word 19752
+ .word 19104
+ .word 18434
+ .word 17738
+ .word 17136
+ .word 16639
+ .section .rodata
+#endif /* TEXT_DATA_BUG */
+ .balign 2
+ .type GLOBAL(div_table),@object
+ .size GLOBAL(div_table),128
+/* negative division constants */
+ .word -16638
+ .word -17135
+ .word -17737
+ .word -18433
+ .word -19103
+ .word -19751
+ .word -20583
+ .word -21383
+ .word -22343
+ .word -23353
+ .word -24407
+ .word -25582
+ .word -26863
+ .word -28382
+ .word -29965
+ .word -31800
+/* negative division factors */
+ .byte 66
+ .byte 70
+ .byte 75
+ .byte 81
+ .byte 87
+ .byte 93
+ .byte 101
+ .byte 109
+ .byte 119
+ .byte 130
+ .byte 142
+ .byte 156
+ .byte 172
+ .byte 192
+ .byte 214
+ .byte 241
+ .skip 16
+ .global GLOBAL(div_table)
+GLOBAL(div_table):
+ HIDDEN_ALIAS(div_table_internal,div_table)
+ .skip 16
+/* positive division factors */
+ .byte 241
+ .byte 214
+ .byte 192
+ .byte 172
+ .byte 156
+ .byte 142
+ .byte 130
+ .byte 119
+ .byte 109
+ .byte 101
+ .byte 93
+ .byte 87
+ .byte 81
+ .byte 75
+ .byte 70
+ .byte 66
+/* positive division constants */
+ .word 31801
+ .word 29966
+ .word 28383
+ .word 26864
+ .word 25583
+ .word 24408
+ .word 23354
+ .word 22344
+ .word 21384
+ .word 20584
+ .word 19752
+ .word 19104
+ .word 18434
+ .word 17738
+ .word 17136
+ .word 16639
+
+#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
+/* This code used shld, thus is not suitable for SH1 / SH2. */
+
+/* Signed / unsigned division without use of FPU, optimized for SH4.
+ Uses a lookup table for divisors in the range -128 .. +128, and
+ div1 with case distinction for larger divisors in three more ranges.
+ The code is lumped together with the table to allow the use of mova. */
+#ifdef __LITTLE_ENDIAN__
+#define L_LSB 0
+#define L_LSWMSB 1
+#define L_MSWLSB 2
+#else
+#define L_LSB 3
+#define L_LSWMSB 2
+#define L_MSWLSB 1
+#endif
+
+ .balign 4
+ .global GLOBAL(udivsi3_i4i)
+ FUNC(GLOBAL(udivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+ mov.w LOCAL(c128_w), r1
+ div0u
+ mov r4,r0
+ shlr8 r0
+ cmp/hi r1,r5
+ extu.w r5,r1
+ bf LOCAL(udiv_le128)
+ cmp/eq r5,r1
+ bf LOCAL(udiv_ge64k)
+ shlr r0
+ mov r5,r1
+ shll16 r5
+ mov.l r4,@-r15
+ div1 r5,r0
+ mov.l r1,@-r15
+ div1 r5,r0
+ div1 r5,r0
+ bra LOCAL(udiv_25)
+ div1 r5,r0
+
+LOCAL(div_le128):
+ mova LOCAL(div_table_ix),r0
+ bra LOCAL(div_le128_2)
+ mov.b @(r0,r5),r1
+LOCAL(udiv_le128):
+ mov.l r4,@-r15
+ mova LOCAL(div_table_ix),r0
+ mov.b @(r0,r5),r1
+ mov.l r5,@-r15
+LOCAL(div_le128_2):
+ mova LOCAL(div_table_inv),r0
+ mov.l @(r0,r1),r1
+ mov r5,r0
+ tst #0xfe,r0
+ mova LOCAL(div_table_clz),r0
+ dmulu.l r1,r4
+ mov.b @(r0,r5),r1
+ bt/s LOCAL(div_by_1)
+ mov r4,r0
+ mov.l @r15+,r5
+ sts mach,r0
+ /* clrt */
+ addc r4,r0
+ mov.l @r15+,r4
+ rotcr r0
+ rts
+ shld r1,r0
+
+LOCAL(div_by_1_neg):
+ neg r4,r0
+LOCAL(div_by_1):
+ mov.l @r15+,r5
+ rts
+ mov.l @r15+,r4
+
+LOCAL(div_ge64k):
+ bt/s LOCAL(div_r8)
+ div0u
+ shll8 r5
+ bra LOCAL(div_ge64k_2)
+ div1 r5,r0
+LOCAL(udiv_ge64k):
+ cmp/hi r0,r5
+ mov r5,r1
+ bt LOCAL(udiv_r8)
+ shll8 r5
+ mov.l r4,@-r15
+ div1 r5,r0
+ mov.l r1,@-r15
+LOCAL(div_ge64k_2):
+ div1 r5,r0
+ mov.l LOCAL(zero_l),r1
+ .rept 4
+ div1 r5,r0
+ .endr
+ mov.l r1,@-r15
+ div1 r5,r0
+ mov.w LOCAL(m256_w),r1
+ div1 r5,r0
+ mov.b r0,@(L_LSWMSB,r15)
+ xor r4,r0
+ and r1,r0
+ bra LOCAL(div_ge64k_end)
+ xor r4,r0
+
+LOCAL(div_r8):
+ shll16 r4
+ bra LOCAL(div_r8_2)
+ shll8 r4
+LOCAL(udiv_r8):
+ mov.l r4,@-r15
+ shll16 r4
+ clrt
+ shll8 r4
+ mov.l r5,@-r15
+LOCAL(div_r8_2):
+ rotcl r4
+ mov r0,r1
+ div1 r5,r1
+ mov r4,r0
+ rotcl r0
+ mov r5,r4
+ div1 r5,r1
+ .rept 5
+ rotcl r0; div1 r5,r1
+ .endr
+ rotcl r0
+ mov.l @r15+,r5
+ div1 r4,r1
+ mov.l @r15+,r4
+ rts
+ rotcl r0
+
+ ENDFUNC(GLOBAL(udivsi3_i4i))
+
+ .global GLOBAL(sdivsi3_i4i)
+ FUNC(GLOBAL(sdivsi3_i4i))
+ /* This is link-compatible with a GLOBAL(sdivsi3) call,
+ but we effectively clobber only r1. */
+GLOBAL(sdivsi3_i4i):
+ mov.l r4,@-r15
+ cmp/pz r5
+ mov.w LOCAL(c128_w), r1
+ bt/s LOCAL(pos_divisor)
+ cmp/pz r4
+ mov.l r5,@-r15
+ neg r5,r5
+ bt/s LOCAL(neg_result)
+ cmp/hi r1,r5
+ neg r4,r4
+LOCAL(pos_result):
+ extu.w r5,r0
+ bf LOCAL(div_le128)
+ cmp/eq r5,r0
+ mov r4,r0
+ shlr8 r0
+ bf/s LOCAL(div_ge64k)
+ cmp/hi r0,r5
+ div0u
+ shll16 r5
+ div1 r5,r0
+ div1 r5,r0
+ div1 r5,r0
+LOCAL(udiv_25):
+ mov.l LOCAL(zero_l),r1
+ div1 r5,r0
+ div1 r5,r0
+ mov.l r1,@-r15
+ .rept 3
+ div1 r5,r0
+ .endr
+ mov.b r0,@(L_MSWLSB,r15)
+ xtrct r4,r0
+ swap.w r0,r0
+ .rept 8
+ div1 r5,r0
+ .endr
+ mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_end):
+ .rept 8
+ div1 r5,r0
+ .endr
+ mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+ extu.b r0,r0
+ mov.l @r15+,r5
+ or r4,r0
+ mov.l @r15+,r4
+ rts
+ rotcl r0
+
+LOCAL(div_le128_neg):
+ tst #0xfe,r0
+ mova LOCAL(div_table_ix),r0
+ mov.b @(r0,r5),r1
+ mova LOCAL(div_table_inv),r0
+ bt/s LOCAL(div_by_1_neg)
+ mov.l @(r0,r1),r1
+ mova LOCAL(div_table_clz),r0
+ dmulu.l r1,r4
+ mov.b @(r0,r5),r1
+ mov.l @r15+,r5
+ sts mach,r0
+ /* clrt */
+ addc r4,r0
+ mov.l @r15+,r4
+ rotcr r0
+ shld r1,r0
+ rts
+ neg r0,r0
+
+LOCAL(pos_divisor):
+ mov.l r5,@-r15
+ bt/s LOCAL(pos_result)
+ cmp/hi r1,r5
+ neg r4,r4
+LOCAL(neg_result):
+ extu.w r5,r0
+ bf LOCAL(div_le128_neg)
+ cmp/eq r5,r0
+ mov r4,r0
+ shlr8 r0
+ bf/s LOCAL(div_ge64k_neg)
+ cmp/hi r0,r5
+ div0u
+ mov.l LOCAL(zero_l),r1
+ shll16 r5
+ div1 r5,r0
+ mov.l r1,@-r15
+ .rept 7
+ div1 r5,r0
+ .endr
+ mov.b r0,@(L_MSWLSB,r15)
+ xtrct r4,r0
+ swap.w r0,r0
+ .rept 8
+ div1 r5,r0
+ .endr
+ mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_neg_end):
+ .rept 8
+ div1 r5,r0
+ .endr
+ mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+ extu.b r0,r1
+ mov.l @r15+,r5
+ or r4,r1
+LOCAL(div_r8_neg_end):
+ mov.l @r15+,r4
+ rotcl r1
+ rts
+ neg r1,r0
+
+LOCAL(div_ge64k_neg):
+ bt/s LOCAL(div_r8_neg)
+ div0u
+ shll8 r5
+ mov.l LOCAL(zero_l),r1
+ .rept 6
+ div1 r5,r0
+ .endr
+ mov.l r1,@-r15
+ div1 r5,r0
+ mov.w LOCAL(m256_w),r1
+ div1 r5,r0
+ mov.b r0,@(L_LSWMSB,r15)
+ xor r4,r0
+ and r1,r0
+ bra LOCAL(div_ge64k_neg_end)
+ xor r4,r0
+
+LOCAL(c128_w):
+ .word 128
+
+LOCAL(div_r8_neg):
+ clrt
+ shll16 r4
+ mov r4,r1
+ shll8 r1
+ mov r5,r4
+ .rept 7
+ rotcl r1; div1 r5,r0
+ .endr
+ mov.l @r15+,r5
+ rotcl r1
+ bra LOCAL(div_r8_neg_end)
+ div1 r4,r0
+
+LOCAL(m256_w):
+ .word 0xff00
+/* This table has been generated by divtab-sh4.c. */
+ .balign 4
+LOCAL(div_table_clz):
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte -1
+ .byte -1
+ .byte -2
+ .byte -2
+ .byte -2
+ .byte -2
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+/* Lookup table translating positive divisor to index into table of
+ normalized inverse. N.B. the '0' entry is also the last entry of the
+ previous table, and causes an unaligned access for division by zero. */
+LOCAL(div_table_ix):
+ .byte -6
+ .byte -128
+ .byte -128
+ .byte 0
+ .byte -128
+ .byte -64
+ .byte 0
+ .byte 64
+ .byte -128
+ .byte -96
+ .byte -64
+ .byte -32
+ .byte 0
+ .byte 32
+ .byte 64
+ .byte 96
+ .byte -128
+ .byte -112
+ .byte -96
+ .byte -80
+ .byte -64
+ .byte -48
+ .byte -32
+ .byte -16
+ .byte 0
+ .byte 16
+ .byte 32
+ .byte 48
+ .byte 64
+ .byte 80
+ .byte 96
+ .byte 112
+ .byte -128
+ .byte -120
+ .byte -112
+ .byte -104
+ .byte -96
+ .byte -88
+ .byte -80
+ .byte -72
+ .byte -64
+ .byte -56
+ .byte -48
+ .byte -40
+ .byte -32
+ .byte -24
+ .byte -16
+ .byte -8
+ .byte 0
+ .byte 8
+ .byte 16
+ .byte 24
+ .byte 32
+ .byte 40
+ .byte 48
+ .byte 56
+ .byte 64
+ .byte 72
+ .byte 80
+ .byte 88
+ .byte 96
+ .byte 104
+ .byte 112
+ .byte 120
+ .byte -128
+ .byte -124
+ .byte -120
+ .byte -116
+ .byte -112
+ .byte -108
+ .byte -104
+ .byte -100
+ .byte -96
+ .byte -92
+ .byte -88
+ .byte -84
+ .byte -80
+ .byte -76
+ .byte -72
+ .byte -68
+ .byte -64
+ .byte -60
+ .byte -56
+ .byte -52
+ .byte -48
+ .byte -44
+ .byte -40
+ .byte -36
+ .byte -32
+ .byte -28
+ .byte -24
+ .byte -20
+ .byte -16
+ .byte -12
+ .byte -8
+ .byte -4
+ .byte 0
+ .byte 4
+ .byte 8
+ .byte 12
+ .byte 16
+ .byte 20
+ .byte 24
+ .byte 28
+ .byte 32
+ .byte 36
+ .byte 40
+ .byte 44
+ .byte 48
+ .byte 52
+ .byte 56
+ .byte 60
+ .byte 64
+ .byte 68
+ .byte 72
+ .byte 76
+ .byte 80
+ .byte 84
+ .byte 88
+ .byte 92
+ .byte 96
+ .byte 100
+ .byte 104
+ .byte 108
+ .byte 112
+ .byte 116
+ .byte 120
+ .byte 124
+ .byte -128
+/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
+ .balign 4
+LOCAL(zero_l):
+ .long 0x0
+ .long 0xF81F81F9
+ .long 0xF07C1F08
+ .long 0xE9131AC0
+ .long 0xE1E1E1E2
+ .long 0xDAE6076C
+ .long 0xD41D41D5
+ .long 0xCD856891
+ .long 0xC71C71C8
+ .long 0xC0E07039
+ .long 0xBACF914D
+ .long 0xB4E81B4F
+ .long 0xAF286BCB
+ .long 0xA98EF607
+ .long 0xA41A41A5
+ .long 0x9EC8E952
+ .long 0x9999999A
+ .long 0x948B0FCE
+ .long 0x8F9C18FA
+ .long 0x8ACB90F7
+ .long 0x86186187
+ .long 0x81818182
+ .long 0x7D05F418
+ .long 0x78A4C818
+ .long 0x745D1746
+ .long 0x702E05C1
+ .long 0x6C16C16D
+ .long 0x68168169
+ .long 0x642C8591
+ .long 0x60581606
+ .long 0x5C9882BA
+ .long 0x58ED2309
+LOCAL(div_table_inv):
+ .long 0x55555556
+ .long 0x51D07EAF
+ .long 0x4E5E0A73
+ .long 0x4AFD6A06
+ .long 0x47AE147B
+ .long 0x446F8657
+ .long 0x41414142
+ .long 0x3E22CBCF
+ .long 0x3B13B13C
+ .long 0x38138139
+ .long 0x3521CFB3
+ .long 0x323E34A3
+ .long 0x2F684BDB
+ .long 0x2C9FB4D9
+ .long 0x29E4129F
+ .long 0x27350B89
+ .long 0x24924925
+ .long 0x21FB7813
+ .long 0x1F7047DD
+ .long 0x1CF06ADB
+ .long 0x1A7B9612
+ .long 0x18118119
+ .long 0x15B1E5F8
+ .long 0x135C8114
+ .long 0x11111112
+ .long 0xECF56BF
+ .long 0xC9714FC
+ .long 0xA6810A7
+ .long 0x8421085
+ .long 0x624DD30
+ .long 0x4104105
+ .long 0x2040811
+ /* maximum error: 0.987342 scaled: 0.921875*/
+
+ ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* SH3 / SH4 */
+
+#endif /* L_div_table */
+
+#ifdef L_udiv_qrnnd_16
+#if !__SHMEDIA__
+ HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
+ /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
+ /* n1 < d, but n1 might be larger than d1. */
+ .global GLOBAL(udiv_qrnnd_16)
+ .balign 8
+GLOBAL(udiv_qrnnd_16):
+ div0u
+ cmp/hi r6,r0
+ bt .Lots
+ .rept 16
+ div1 r6,r0
+ .endr
+ extu.w r0,r1
+ bt 0f
+ add r6,r0
+0: rotcl r1
+ mulu.w r1,r5
+ xtrct r4,r0
+ swap.w r0,r0
+ sts macl,r2
+ cmp/hs r2,r0
+ sub r2,r0
+ bt 0f
+ addc r5,r0
+ add #-1,r1
+ bt 0f
+1: add #-1,r1
+ rts
+ add r5,r0
+ .balign 8
+.Lots:
+ sub r5,r0
+ swap.w r4,r1
+ xtrct r0,r1
+ clrt
+ mov r1,r0
+ addc r5,r0
+ mov #-1,r1
+ SL1(bf, 1b,
+ shlr16 r1)
+0: rts
+ nop
+ ENDFUNC(GLOBAL(udiv_qrnnd_16))
+#endif /* !__SHMEDIA__ */
+#endif /* L_udiv_qrnnd_16 */
diff --git a/libgcc/config/sh/lib1funcs.h b/libgcc/config/sh/lib1funcs.h
new file mode 100644
index 00000000000..af4b41cc314
--- /dev/null
+++ b/libgcc/config/sh/lib1funcs.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ 2004, 2005, 2006, 2009
+ Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifdef __ELF__
+#define LOCAL(X) .L_##X
+#define FUNC(X) .type X,@function
+#define HIDDEN_FUNC(X) FUNC(X); .hidden X
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
+#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
+#define ENDFUNC(X) ENDFUNC0(X)
+#else
+#define LOCAL(X) L_##X
+#define FUNC(X)
+#define HIDDEN_FUNC(X)
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
+#define ENDFUNC(X)
+#endif
+
+#define CONCAT(A,B) A##B
+#define GLOBAL0(U,X) CONCAT(U,__##X)
+#define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
+
+#define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
+
+#if defined __SH2A__ && defined __FMOVD_ENABLED__
+#undef FMOVD_WORKS
+#define FMOVD_WORKS
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define DR00 fr1
+#define DR01 fr0
+#define DR20 fr3
+#define DR21 fr2
+#define DR40 fr5
+#define DR41 fr4
+#else /* !__LITTLE_ENDIAN__ */
+#define DR00 fr0
+#define DR01 fr1
+#define DR20 fr2
+#define DR21 fr3
+#define DR40 fr4
+#define DR41 fr5
+#endif /* !__LITTLE_ENDIAN__ */
+
+#ifdef __sh1__
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+ in_slot, in_slot_arg2; branch dest
+#define SL1(branch, dest, in_slot) \
+ in_slot; branch dest
+#else /* ! __sh1__ */
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+ branch##.s dest; in_slot, in_slot_arg2
+#define SL1(branch, dest, in_slot) \
+ branch##/s dest; in_slot
+#endif /* !__sh1__ */
diff --git a/libgcc/config/sh/t-linux b/libgcc/config/sh/t-linux
index af618e260c6..9b1feacd1f3 100644
--- a/libgcc/config/sh/t-linux
+++ b/libgcc/config/sh/t-linux
@@ -1,3 +1,5 @@
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
HOST_LIBGCC2_CFLAGS = -fpic -mieee -DNO_FPSCR_VALUES
# Override t-slibgcc-elf-ver to export some libgcc symbols with
diff --git a/libgcc/config/sh/t-netbsd b/libgcc/config/sh/t-netbsd
new file mode 100644
index 00000000000..663edbf4187
--- /dev/null
+++ b/libgcc/config/sh/t-netbsd
@@ -0,0 +1 @@
+LIB1ASMFUNCS_CACHE = _ic_invalidate
diff --git a/libgcc/config/sh/t-sh b/libgcc/config/sh/t-sh
index ab4d98089b1..2319adbef1d 100644
--- a/libgcc/config/sh/t-sh
+++ b/libgcc/config/sh/t-sh
@@ -17,26 +17,33 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
+LIB1ASMSRC = sh/lib1funcs.S
+LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \
+ _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+ _div_table _udiv_qrnnd_16 \
+ $(LIB1ASMFUNCS_CACHE)
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
crt1.o: $(srcdir)/config/sh/crt1.S
$(gcc_compile) -c $<
-ic_invalidate_array_4-100.o: $(gcc_srcdir)/config/sh/lib1funcs.asm
+ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.S
$(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=1 -DWAY_SIZE=0x2000 $<
libic_invalidate_array_4-100.a: ic_invalidate_array_4-100.o
$(AR_CREATE_FOR_TARGET) $@ $<
-ic_invalidate_array_4-200.o: $(gcc_srcdir)/config/sh/lib1funcs.asm
+ic_invalidate_array_4-200.o: $(srcdir)/config/sh/lib1funcs.S
$(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=2 -DWAY_SIZE=0x2000 $<
libic_invalidate_array_4-200.a: ic_invalidate_array_4-200.o
$(AR_CREATE_FOR_TARGET) $@ $<
-ic_invalidate_array_4a.o: $(gcc_srcdir)/config/sh/lib1funcs.asm
+ic_invalidate_array_4a.o: $(srcdir)/config/sh/lib1funcs.S
$(gcc_compile) -c -DL_ic_invalidate_array -D__FORCE_SH4A__ $<
libic_invalidate_array_4a.a: ic_invalidate_array_4a.o
$(AR_CREATE_FOR_TARGET) $@ $<
sdivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S
- $(gcc_compile) -c -DL_sdivsi3_i4i $<
+ $(compile) -c -DL_sdivsi3_i4i $<
udivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S
$(gcc_compile) -c -DL_udivsi3_i4i $<
unwind-dw2-Os-4-200.o: $(gcc_srcdir)/unwind-dw2.c
diff --git a/libgcc/config/sh/t-sh64 b/libgcc/config/sh/t-sh64
new file mode 100644
index 00000000000..fa9950e03b2
--- /dev/null
+++ b/libgcc/config/sh/t-sh64
@@ -0,0 +1,6 @@
+LIB1ASMFUNCS = \
+ _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+ _shcompact_call_trampoline _shcompact_return_trampoline \
+ _shcompact_incoming_args _ic_invalidate _nested_trampoline \
+ _push_pop_shmedia_regs \
+ _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table
diff --git a/libgcc/config/sparc/lb1spc.S b/libgcc/config/sparc/lb1spc.S
new file mode 100644
index 00000000000..b60bd5740e7
--- /dev/null
+++ b/libgcc/config/sparc/lb1spc.S
@@ -0,0 +1,784 @@
+/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
+ for the sparc processor.
+
+ These routines are derived from the SPARC Architecture Manual, version 8,
+ slightly edited to match the desired calling convention, and also to
+ optimize them for our purposes. */
+
+#ifdef L_mulsi3
+.text
+ .align 4
+ .global .umul
+ .proc 4
+.umul:
+ or %o0, %o1, %o4 ! logical or of multiplier and multiplicand
+ mov %o0, %y ! multiplier to Y register
+ andncc %o4, 0xfff, %o5 ! mask out lower 12 bits
+ be mul_shortway ! can do it the short way
+ andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc
+ !
+ ! long multiply
+ !
+ mulscc %o4, %o1, %o4 ! first iteration of 33
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4 ! 32nd iteration
+ mulscc %o4, %g0, %o4 ! last iteration only shifts
+ ! the upper 32 bits of product are wrong, but we do not care
+ retl
+ rd %y, %o0
+ !
+ ! short multiply
+ !
+mul_shortway:
+ mulscc %o4, %o1, %o4 ! first iteration of 13
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4
+ mulscc %o4, %o1, %o4 ! 12th iteration
+ mulscc %o4, %g0, %o4 ! last iteration only shifts
+ rd %y, %o5
+ sll %o4, 12, %o4 ! left shift partial product by 12 bits
+ srl %o5, 20, %o5 ! right shift partial product by 20 bits
+ retl
+ or %o5, %o4, %o0 ! merge for true product
+#endif
+
+#ifdef L_divsi3
+/*
+ * Division and remainder, from Appendix E of the SPARC Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .div name of function to generate
+ * div div=div => %o0 / %o1; div=rem => %o0 % %o1
+ * true true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+ .global .udiv
+ .align 4
+ .proc 4
+ .text
+.udiv:
+ b ready_to_divide
+ mov 0, %g3 ! result is always positive
+
+ .global .div
+ .align 4
+ .proc 4
+ .text
+.div:
+ ! compute sign of result; if neither is negative, no problem
+ orcc %o1, %o0, %g0 ! either negative?
+ bge ready_to_divide ! no, go do the divide
+ xor %o1, %o0, %g3 ! compute sign in any case
+ tst %o1
+ bge 1f
+ tst %o0
+ ! %o1 is definitely negative; %o0 might also be negative
+ bge ready_to_divide ! if %o0 not negative...
+ sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
+1: ! %o0 is negative, %o1 is nonnegative
+ sub %g0, %o0, %o0 ! make %o0 nonnegative
+
+
+ready_to_divide:
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta 0x2 ! ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu got_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu not_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g2
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g2.
+ 2: addcc %o5, %o5, %o5
+ bcc not_too_big
+ add %g2, 1, %g2
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b do_single_div
+ sub %g2, 1, %g2
+
+ not_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be do_single_div
+ nop
+ /* NB: these are commented out in the V8-SPARC manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g2
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ do_single_div:
+ subcc %g2, 1, %g2
+ bl end_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b end_single_divloop
+ nop
+ single_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ end_single_divloop:
+ subcc %g2, 1, %g2
+ bge single_divloop
+ tst %o3
+ b,a end_regular_divide
+
+not_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be got_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+divloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+L4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+L3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+L2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+L3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+L1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+L3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+L2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+L3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+ 9:
+end_regular_divide:
+ subcc %o4, 1, %o4
+ bge divloop
+ tst %o3
+ bl,a got_result
+ ! non-restoring fixup here (one instruction only!)
+ sub %o2, 1, %o2
+
+
+got_result:
+ ! check to see if answer should be < 0
+ tst %g3
+ bl,a 1f
+ sub %g0, %o2, %o2
+1:
+ retl
+ mov %o2, %o0
+#endif
+
+#ifdef L_modsi3
+/* This implementation was taken from glibc:
+ *
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+.text
+ .align 4
+ .global .urem
+ .proc 4
+.urem:
+ b divide
+ mov 0, %g3 ! result always positive
+
+ .align 4
+ .global .rem
+ .proc 4
+.rem:
+ ! compute sign of result; if neither is negative, no problem
+ orcc %o1, %o0, %g0 ! either negative?
+ bge 2f ! no, go do the divide
+ mov %o0, %g3 ! sign of remainder matches %o0
+ tst %o1
+ bge 1f
+ tst %o0
+ ! %o1 is definitely negative; %o0 might also be negative
+ bge 2f ! if %o0 not negative...
+ sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
+1: ! %o0 is negative, %o1 is nonnegative
+ sub %g0, %o0, %o0 ! make %o0 nonnegative
+2:
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+divide:
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta 0x2 !ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu got_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu not_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g2
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g2.
+ 2: addcc %o5, %o5, %o5
+ bcc not_too_big
+ add %g2, 1, %g2
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b do_single_div
+ sub %g2, 1, %g2
+
+ not_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be do_single_div
+ nop
+ /* NB: these are commented out in the V8-SPARC manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g2
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ do_single_div:
+ subcc %g2, 1, %g2
+ bl end_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b end_single_divloop
+ nop
+ single_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ end_single_divloop:
+ subcc %g2, 1, %g2
+ bge single_divloop
+ tst %o3
+ b,a end_regular_divide
+
+not_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be got_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+divloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl L1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl L2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl L3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl L4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+L4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+L3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl L4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+L4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+L2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl L3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl L4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+L4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+L3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl L4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+L4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+L1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl L2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl L3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl L4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+L4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+L3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl L4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+L4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+L2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl L3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl L4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+L4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+L3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl L4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+L4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+ 9:
+end_regular_divide:
+ subcc %o4, 1, %o4
+ bge divloop
+ tst %o3
+ bl,a got_result
+ ! non-restoring fixup here (one instruction only!)
+ add %o3, %o1, %o3
+
+got_result:
+ ! check to see if answer should be < 0
+ tst %g3
+ bl,a 1f
+ sub %g0, %o3, %o3
+1:
+ retl
+ mov %o3, %o0
+
+#endif
+
diff --git a/libgcc/config/sparc/t-softmul b/libgcc/config/sparc/t-softmul
index 49faae47c53..7142200600f 100644
--- a/libgcc/config/sparc/t-softmul
+++ b/libgcc/config/sparc/t-softmul
@@ -1,2 +1,2 @@
-LIB1ASMSRC = sparc/lb1spc.asm
+LIB1ASMSRC = sparc/lb1spc.S
LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
diff --git a/libgcc/config/v850/lib1funcs.S b/libgcc/config/v850/lib1funcs.S
new file mode 100644
index 00000000000..04e9b1e0ad4
--- /dev/null
+++ b/libgcc/config/v850/lib1funcs.S
@@ -0,0 +1,2330 @@
+/* libgcc routines for NEC V850.
+ Copyright (C) 1996, 1997, 2002, 2005, 2009, 2010
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifdef L_mulsi3
+ .text
+ .globl ___mulsi3
+ .type ___mulsi3,@function
+___mulsi3:
+#ifdef __v850__
+/*
+ #define SHIFT 12
+ #define MASK ((1 << SHIFT) - 1)
+
+ #define STEP(i, j) \
+ ({ \
+ short a_part = (a >> (i)) & MASK; \
+ short b_part = (b >> (j)) & MASK; \
+ int res = (((int) a_part) * ((int) b_part)); \
+ res; \
+ })
+
+ int
+ __mulsi3 (unsigned a, unsigned b)
+ {
+ return STEP (0, 0) +
+ ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) +
+ ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0))
+ << (2 * SHIFT));
+ }
+*/
+ mov r6, r14
+ movea lo(32767), r0, r10
+ and r10, r14
+ mov r7, r15
+ and r10, r15
+ shr 15, r6
+ mov r6, r13
+ and r10, r13
+ shr 15, r7
+ mov r7, r12
+ and r10, r12
+ shr 15, r6
+ shr 15, r7
+ mov r14, r10
+ mulh r15, r10
+ mov r14, r11
+ mulh r12, r11
+ mov r13, r16
+ mulh r15, r16
+ mulh r14, r7
+ mulh r15, r6
+ add r16, r11
+ mulh r13, r12
+ shl 15, r11
+ add r11, r10
+ add r12, r7
+ add r6, r7
+ shl 30, r7
+ add r7, r10
+ jmp [r31]
+#endif /* __v850__ */
+#if defined(__v850e__) || defined(__v850ea__) || defined(__v850e2__) || defined(__v850e2v3__)
+ /* This routine is almost unneccesarry because gcc
+ generates the MUL instruction for the RTX mulsi3.
+ But if someone wants to link his application with
+ previsously compiled v850 objects then they will
+ need this function. */
+
+ /* It isn't good to put the inst sequence as below;
+ mul r7, r6,
+ mov r6, r10, r0
+ In this case, there is a RAW hazard between them.
+ MUL inst takes 2 cycle in EX stage, then MOV inst
+ must wait 1cycle. */
+ mov r7, r10
+ mul r6, r10, r0
+ jmp [r31]
+#endif /* __v850e__ */
+ .size ___mulsi3,.-___mulsi3
+#endif /* L_mulsi3 */
+
+
+#ifdef L_udivsi3
+ .text
+ .global ___udivsi3
+ .type ___udivsi3,@function
+___udivsi3:
+#ifdef __v850__
+ mov 1,r12
+ mov 0,r10
+ cmp r6,r7
+ bnl .L12
+ movhi hi(-2147483648),r0,r13
+ cmp r0,r7
+ blt .L12
+.L4:
+ shl 1,r7
+ shl 1,r12
+ cmp r6,r7
+ bnl .L12
+ cmp r0,r12
+ be .L8
+ mov r7,r19
+ and r13,r19
+ be .L4
+ br .L12
+.L9:
+ cmp r7,r6
+ bl .L10
+ sub r7,r6
+ or r12,r10
+.L10:
+ shr 1,r12
+ shr 1,r7
+.L12:
+ cmp r0,r12
+ bne .L9
+.L8:
+ jmp [r31]
+
+#else /* defined(__v850e__) */
+
+ /* See comments at end of __mulsi3. */
+ mov r6, r10
+ divu r7, r10, r0
+ jmp [r31]
+
+#endif /* __v850e__ */
+
+ .size ___udivsi3,.-___udivsi3
+#endif
+
+#ifdef L_divsi3
+ .text
+ .globl ___divsi3
+ .type ___divsi3,@function
+___divsi3:
+#ifdef __v850__
+ add -8,sp
+ st.w r31,4[sp]
+ st.w r22,0[sp]
+ mov 1,r22
+ tst r7,r7
+ bp .L3
+ subr r0,r7
+ subr r0,r22
+.L3:
+ tst r6,r6
+ bp .L4
+ subr r0,r6
+ subr r0,r22
+.L4:
+ jarl ___udivsi3,r31
+ cmp r0,r22
+ bp .L7
+ subr r0,r10
+.L7:
+ ld.w 0[sp],r22
+ ld.w 4[sp],r31
+ add 8,sp
+ jmp [r31]
+
+#else /* defined(__v850e__) */
+
+ /* See comments at end of __mulsi3. */
+ mov r6, r10
+ div r7, r10, r0
+ jmp [r31]
+
+#endif /* __v850e__ */
+
+ .size ___divsi3,.-___divsi3
+#endif
+
+#ifdef L_umodsi3
+ .text
+ .globl ___umodsi3
+ .type ___umodsi3,@function
+___umodsi3:
+#ifdef __v850__
+ add -12,sp
+ st.w r31,8[sp]
+ st.w r7,4[sp]
+ st.w r6,0[sp]
+ jarl ___udivsi3,r31
+ ld.w 4[sp],r7
+ mov r10,r6
+ jarl ___mulsi3,r31
+ ld.w 0[sp],r6
+ subr r6,r10
+ ld.w 8[sp],r31
+ add 12,sp
+ jmp [r31]
+
+#else /* defined(__v850e__) */
+
+ /* See comments at end of __mulsi3. */
+ divu r7, r6, r10
+ jmp [r31]
+
+#endif /* __v850e__ */
+
+ .size ___umodsi3,.-___umodsi3
+#endif /* L_umodsi3 */
+
+#ifdef L_modsi3
+ .text
+ .globl ___modsi3
+ .type ___modsi3,@function
+___modsi3:
+#ifdef __v850__
+ add -12,sp
+ st.w r31,8[sp]
+ st.w r7,4[sp]
+ st.w r6,0[sp]
+ jarl ___divsi3,r31
+ ld.w 4[sp],r7
+ mov r10,r6
+ jarl ___mulsi3,r31
+ ld.w 0[sp],r6
+ subr r6,r10
+ ld.w 8[sp],r31
+ add 12,sp
+ jmp [r31]
+
+#else /* defined(__v850e__) */
+
+ /* See comments at end of __mulsi3. */
+ div r7, r6, r10
+ jmp [r31]
+
+#endif /* __v850e__ */
+
+ .size ___modsi3,.-___modsi3
+#endif /* L_modsi3 */
+
+#ifdef L_save_2
+ .text
+ .align 2
+ .globl __save_r2_r29
+ .type __save_r2_r29,@function
+ /* Allocate space and save registers 2, 20 .. 29 on the stack. */
+ /* Called via: jalr __save_r2_r29,r10. */
+__save_r2_r29:
+#ifdef __EP__
+ mov ep,r1
+ addi -44,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ sst.w r22,28[ep]
+ sst.w r21,32[ep]
+ sst.w r20,36[ep]
+ sst.w r2,40[ep]
+ mov r1,ep
+#else
+ addi -44,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+ st.w r22,28[sp]
+ st.w r21,32[sp]
+ st.w r20,36[sp]
+ st.w r2,40[sp]
+#endif
+ jmp [r10]
+ .size __save_r2_r29,.-__save_r2_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r2_r29. */
+ .align 2
+ .globl __return_r2_r29
+ .type __return_r2_r29,@function
+__return_r2_r29:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ sld.w 28[ep],r22
+ sld.w 32[ep],r21
+ sld.w 36[ep],r20
+ sld.w 40[ep],r2
+ addi 44,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r23
+ ld.w 28[sp],r22
+ ld.w 32[sp],r21
+ ld.w 36[sp],r20
+ ld.w 40[sp],r2
+ addi 44,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r2_r29,.-__return_r2_r29
+#endif /* L_save_2 */
+
+#ifdef L_save_20
+ .text
+ .align 2
+ .globl __save_r20_r29
+ .type __save_r20_r29,@function
+ /* Allocate space and save registers 20 .. 29 on the stack. */
+ /* Called via: jalr __save_r20_r29,r10. */
+__save_r20_r29:
+#ifdef __EP__
+ mov ep,r1
+ addi -40,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ sst.w r22,28[ep]
+ sst.w r21,32[ep]
+ sst.w r20,36[ep]
+ mov r1,ep
+#else
+ addi -40,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+ st.w r22,28[sp]
+ st.w r21,32[sp]
+ st.w r20,36[sp]
+#endif
+ jmp [r10]
+ .size __save_r20_r29,.-__save_r20_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r20_r29. */
+ .align 2
+ .globl __return_r20_r29
+ .type __return_r20_r29,@function
+__return_r20_r29:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ sld.w 28[ep],r22
+ sld.w 32[ep],r21
+ sld.w 36[ep],r20
+ addi 40,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r23
+ ld.w 28[sp],r22
+ ld.w 32[sp],r21
+ ld.w 36[sp],r20
+ addi 40,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r20_r29,.-__return_r20_r29
+#endif /* L_save_20 */
+
+#ifdef L_save_21
+ .text
+ .align 2
+ .globl __save_r21_r29
+ .type __save_r21_r29,@function
+ /* Allocate space and save registers 21 .. 29 on the stack. */
+ /* Called via: jalr __save_r21_r29,r10. */
+__save_r21_r29:
+#ifdef __EP__
+ mov ep,r1
+ addi -36,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ sst.w r22,28[ep]
+ sst.w r21,32[ep]
+ mov r1,ep
+#else
+ addi -36,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+ st.w r22,28[sp]
+ st.w r21,32[sp]
+#endif
+ jmp [r10]
+ .size __save_r21_r29,.-__save_r21_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r21_r29. */
+ .align 2
+ .globl __return_r21_r29
+ .type __return_r21_r29,@function
+__return_r21_r29:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ sld.w 28[ep],r22
+ sld.w 32[ep],r21
+ addi 36,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r23
+ ld.w 28[sp],r22
+ ld.w 32[sp],r21
+ addi 36,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r21_r29,.-__return_r21_r29
+#endif /* L_save_21 */
+
+#ifdef L_save_22
+ .text
+ .align 2
+ .globl __save_r22_r29
+ .type __save_r22_r29,@function
+ /* Allocate space and save registers 22 .. 29 on the stack. */
+ /* Called via: jalr __save_r22_r29,r10. */
+__save_r22_r29:
+#ifdef __EP__
+ mov ep,r1
+ addi -32,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ sst.w r22,28[ep]
+ mov r1,ep
+#else
+ addi -32,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+ st.w r22,28[sp]
+#endif
+ jmp [r10]
+ .size __save_r22_r29,.-__save_r22_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r22_r29. */
+ .align 2
+ .globl __return_r22_r29
+ .type __return_r22_r29,@function
+__return_r22_r29:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ sld.w 28[ep],r22
+ addi 32,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r23
+ ld.w 28[sp],r22
+ addi 32,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r22_r29,.-__return_r22_r29
+#endif /* L_save_22 */
+
+#ifdef L_save_23
+ .text
+ .align 2
+ .globl __save_r23_r29
+ .type __save_r23_r29,@function
+ /* Allocate space and save registers 23 .. 29 on the stack. */
+ /* Called via: jalr __save_r23_r29,r10. */
+__save_r23_r29:
+#ifdef __EP__
+ mov ep,r1
+ addi -28,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ mov r1,ep
+#else
+ addi -28,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+#endif
+ jmp [r10]
+ .size __save_r23_r29,.-__save_r23_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r23_r29. */
+ .align 2
+ .globl __return_r23_r29
+ .type __return_r23_r29,@function
+__return_r23_r29:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ addi 28,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r23
+ addi 28,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r23_r29,.-__return_r23_r29
+#endif /* L_save_23 */
+
+#ifdef L_save_24
+ .text
+ .align 2
+ .globl __save_r24_r29
+ .type __save_r24_r29,@function
+ /* Allocate space and save registers 24 .. 29 on the stack. */
+ /* Called via: jalr __save_r24_r29,r10. */
+__save_r24_r29:
+#ifdef __EP__
+ mov ep,r1
+ addi -24,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ mov r1,ep
+#else
+ addi -24,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+#endif
+ jmp [r10]
+ .size __save_r24_r29,.-__save_r24_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r24_r29. */
+ .align 2
+ .globl __return_r24_r29
+ .type __return_r24_r29,@function
+__return_r24_r29:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ addi 24,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ addi 24,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r24_r29,.-__return_r24_r29
+#endif /* L_save_24 */
+
+#ifdef L_save_25
+ .text
+ .align 2
+ .globl __save_r25_r29
+ .type __save_r25_r29,@function
+ /* Allocate space and save registers 25 .. 29 on the stack. */
+ /* Called via: jalr __save_r25_r29,r10. */
+__save_r25_r29:
+#ifdef __EP__
+ mov ep,r1
+ addi -20,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ mov r1,ep
+#else
+ addi -20,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+#endif
+ jmp [r10]
+ .size __save_r25_r29,.-__save_r25_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r25_r29. */
+ .align 2
+ .globl __return_r25_r29
+ .type __return_r25_r29,@function
+__return_r25_r29:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ addi 20,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[ep],r29
+ ld.w 4[ep],r28
+ ld.w 8[ep],r27
+ ld.w 12[ep],r26
+ ld.w 16[ep],r25
+ addi 20,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r25_r29,.-__return_r25_r29
+#endif /* L_save_25 */
+
+#ifdef L_save_26
+ .text
+ .align 2
+ .globl __save_r26_r29
+ .type __save_r26_r29,@function
+ /* Allocate space and save registers 26 .. 29 on the stack. */
+ /* Called via: jalr __save_r26_r29,r10. */
+__save_r26_r29:
+#ifdef __EP__
+ mov ep,r1
+ add -16,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ mov r1,ep
+#else
+ add -16,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+#endif
+ jmp [r10]
+ .size __save_r26_r29,.-__save_r26_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r26_r29. */
+ .align 2
+ .globl __return_r26_r29
+ .type __return_r26_r29,@function
+__return_r26_r29:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ addi 16,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ addi 16,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r26_r29,.-__return_r26_r29
+#endif /* L_save_26 */
+
+#ifdef L_save_27
+ .text
+ .align 2
+ .globl __save_r27_r29
+ .type __save_r27_r29,@function
+ /* Allocate space and save registers 27 .. 29 on the stack. */
+ /* Called via: jalr __save_r27_r29,r10. */
+__save_r27_r29:
+ add -12,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ jmp [r10]
+ .size __save_r27_r29,.-__save_r27_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r27_r29. */
+ .align 2
+ .globl __return_r27_r29
+ .type __return_r27_r29,@function
+__return_r27_r29:
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ add 12,sp
+ jmp [r31]
+ .size __return_r27_r29,.-__return_r27_r29
+#endif /* L_save_27 */
+
+#ifdef L_save_28
+ .text
+ .align 2
+ .globl __save_r28_r29
+ .type __save_r28_r29,@function
+ /* Allocate space and save registers 28,29 on the stack. */
+ /* Called via: jalr __save_r28_r29,r10. */
+__save_r28_r29:
+ add -8,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ jmp [r10]
+ .size __save_r28_r29,.-__save_r28_r29
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r28_r29. */
+ .align 2
+ .globl __return_r28_r29
+ .type __return_r28_r29,@function
+__return_r28_r29:
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ add 8,sp
+ jmp [r31]
+ .size __return_r28_r29,.-__return_r28_r29
+#endif /* L_save_28 */
+
+#ifdef L_save_29
+ .text
+ .align 2
+ .globl __save_r29
+ .type __save_r29,@function
+ /* Allocate space and save register 29 on the stack. */
+ /* Called via: jalr __save_r29,r10. */
+__save_r29:
+ add -4,sp
+ st.w r29,0[sp]
+ jmp [r10]
+ .size __save_r29,.-__save_r29
+
+ /* Restore saved register 29, deallocate stack and return to the user. */
+ /* Called via: jr __return_r29. */
+ .align 2
+ .globl __return_r29
+ .type __return_r29,@function
+__return_r29:
+ ld.w 0[sp],r29
+ add 4,sp
+ jmp [r31]
+ .size __return_r29,.-__return_r29
+#endif /* L_save_28 */
+
+#ifdef L_save_2c
+ .text
+ .align 2
+ .globl __save_r2_r31
+ .type __save_r2_r31,@function
+ /* Allocate space and save registers 20 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r2_r31,r10. */
+__save_r2_r31:
+#ifdef __EP__
+ mov ep,r1
+ addi -48,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ sst.w r22,28[ep]
+ sst.w r21,32[ep]
+ sst.w r20,36[ep]
+ sst.w r2,40[ep]
+ sst.w r31,44[ep]
+ mov r1,ep
+#else
+ addi -48,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+ st.w r22,28[sp]
+ st.w r21,32[sp]
+ st.w r20,36[sp]
+ st.w r2,40[sp]
+ st.w r31,44[sp]
+#endif
+ jmp [r10]
+ .size __save_r2_r31,.-__save_r2_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r20_r31. */
+ .align 2
+ .globl __return_r2_r31
+ .type __return_r2_r31,@function
+__return_r2_r31:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ sld.w 28[ep],r22
+ sld.w 32[ep],r21
+ sld.w 36[ep],r20
+ sld.w 40[ep],r2
+ sld.w 44[ep],r31
+ addi 48,sp,sp
+ mov r1,ep
+#else
+ ld.w 44[sp],r29
+ ld.w 40[sp],r28
+ ld.w 36[sp],r27
+ ld.w 32[sp],r26
+ ld.w 28[sp],r25
+ ld.w 24[sp],r24
+ ld.w 20[sp],r23
+ ld.w 16[sp],r22
+ ld.w 12[sp],r21
+ ld.w 8[sp],r20
+ ld.w 4[sp],r2
+ ld.w 0[sp],r31
+ addi 48,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r2_r31,.-__return_r2_r31
+#endif /* L_save_2c */
+
+#ifdef L_save_20c
+ .text
+ .align 2
+ .globl __save_r20_r31
+ .type __save_r20_r31,@function
+ /* Allocate space and save registers 20 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r20_r31,r10. */
+__save_r20_r31:
+#ifdef __EP__
+ mov ep,r1
+ addi -44,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ sst.w r22,28[ep]
+ sst.w r21,32[ep]
+ sst.w r20,36[ep]
+ sst.w r31,40[ep]
+ mov r1,ep
+#else
+ addi -44,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+ st.w r22,28[sp]
+ st.w r21,32[sp]
+ st.w r20,36[sp]
+ st.w r31,40[sp]
+#endif
+ jmp [r10]
+ .size __save_r20_r31,.-__save_r20_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r20_r31. */
+ .align 2
+ .globl __return_r20_r31
+ .type __return_r20_r31,@function
+__return_r20_r31:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ sld.w 28[ep],r22
+ sld.w 32[ep],r21
+ sld.w 36[ep],r20
+ sld.w 40[ep],r31
+ addi 44,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r23
+ ld.w 28[sp],r22
+ ld.w 32[sp],r21
+ ld.w 36[sp],r20
+ ld.w 40[sp],r31
+ addi 44,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r20_r31,.-__return_r20_r31
+#endif /* L_save_20c */
+
+#ifdef L_save_21c
+ .text
+ .align 2
+ .globl __save_r21_r31
+ .type __save_r21_r31,@function
+ /* Allocate space and save registers 21 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r21_r31,r10. */
+__save_r21_r31:
+#ifdef __EP__
+ mov ep,r1
+ addi -40,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ sst.w r22,28[ep]
+ sst.w r21,32[ep]
+ sst.w r31,36[ep]
+ mov r1,ep
+ jmp [r10]
+#else
+ addi -40,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+ st.w r22,28[sp]
+ st.w r21,32[sp]
+ st.w r31,36[sp]
+ jmp [r10]
+#endif
+ .size __save_r21_r31,.-__save_r21_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r21_r31. */
+ .align 2
+ .globl __return_r21_r31
+ .type __return_r21_r31,@function
+__return_r21_r31:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ sld.w 28[ep],r22
+ sld.w 32[ep],r21
+ sld.w 36[ep],r31
+ addi 40,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r23
+ ld.w 28[sp],r22
+ ld.w 32[sp],r21
+ ld.w 36[sp],r31
+ addi 40,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r21_r31,.-__return_r21_r31
+#endif /* L_save_21c */
+
+#ifdef L_save_22c
+ .text
+ .align 2
+ .globl __save_r22_r31
+ .type __save_r22_r31,@function
+ /* Allocate space and save registers 22 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r22_r31,r10. */
+__save_r22_r31:
+#ifdef __EP__
+ mov ep,r1
+ addi -36,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ sst.w r22,28[ep]
+ sst.w r31,32[ep]
+ mov r1,ep
+#else
+ addi -36,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+ st.w r22,28[sp]
+ st.w r31,32[sp]
+#endif
+ jmp [r10]
+ .size __save_r22_r31,.-__save_r22_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r22_r31. */
+ .align 2
+ .globl __return_r22_r31
+ .type __return_r22_r31,@function
+__return_r22_r31:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ sld.w 28[ep],r22
+ sld.w 32[ep],r31
+ addi 36,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r23
+ ld.w 28[sp],r22
+ ld.w 32[sp],r31
+ addi 36,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r22_r31,.-__return_r22_r31
+#endif /* L_save_22c */
+
+#ifdef L_save_23c
+ .text
+ .align 2
+ .globl __save_r23_r31
+ .type __save_r23_r31,@function
+ /* Allocate space and save registers 23 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r23_r31,r10. */
+__save_r23_r31:
+#ifdef __EP__
+ mov ep,r1
+ addi -32,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r23,24[ep]
+ sst.w r31,28[ep]
+ mov r1,ep
+#else
+ addi -32,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r23,24[sp]
+ st.w r31,28[sp]
+#endif
+ jmp [r10]
+ .size __save_r23_r31,.-__save_r23_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r23_r31. */
+ .align 2
+ .globl __return_r23_r31
+ .type __return_r23_r31,@function
+__return_r23_r31:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r23
+ sld.w 28[ep],r31
+ addi 32,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r23
+ ld.w 28[sp],r31
+ addi 32,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r23_r31,.-__return_r23_r31
+#endif /* L_save_23c */
+
+#ifdef L_save_24c
+ .text
+ .align 2
+ .globl __save_r24_r31
+ .type __save_r24_r31,@function
+ /* Allocate space and save registers 24 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r24_r31,r10. */
+__save_r24_r31:
+#ifdef __EP__
+ mov ep,r1
+ addi -28,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r24,20[ep]
+ sst.w r31,24[ep]
+ mov r1,ep
+#else
+ addi -28,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r24,20[sp]
+ st.w r31,24[sp]
+#endif
+ jmp [r10]
+ .size __save_r24_r31,.-__save_r24_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r24_r31. */
+ .align 2
+ .globl __return_r24_r31
+ .type __return_r24_r31,@function
+__return_r24_r31:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r24
+ sld.w 24[ep],r31
+ addi 28,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r24
+ ld.w 24[sp],r31
+ addi 28,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r24_r31,.-__return_r24_r31
+#endif /* L_save_24c */
+
+#ifdef L_save_25c
+ .text
+ .align 2
+ .globl __save_r25_r31
+ .type __save_r25_r31,@function
+ /* Allocate space and save registers 25 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r25_r31,r10. */
+__save_r25_r31:
+#ifdef __EP__
+ mov ep,r1
+ addi -24,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r25,16[ep]
+ sst.w r31,20[ep]
+ mov r1,ep
+#else
+ addi -24,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r25,16[sp]
+ st.w r31,20[sp]
+#endif
+ jmp [r10]
+ .size __save_r25_r31,.-__save_r25_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r25_r31. */
+ .align 2
+ .globl __return_r25_r31
+ .type __return_r25_r31,@function
+__return_r25_r31:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r25
+ sld.w 20[ep],r31
+ addi 24,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r25
+ ld.w 20[sp],r31
+ addi 24,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r25_r31,.-__return_r25_r31
+#endif /* L_save_25c */
+
+#ifdef L_save_26c
+ .text
+ .align 2
+ .globl __save_r26_r31
+ .type __save_r26_r31,@function
+ /* Allocate space and save registers 26 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r26_r31,r10. */
+__save_r26_r31:
+#ifdef __EP__
+ mov ep,r1
+ addi -20,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r26,12[ep]
+ sst.w r31,16[ep]
+ mov r1,ep
+#else
+ addi -20,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r26,12[sp]
+ st.w r31,16[sp]
+#endif
+ jmp [r10]
+ .size __save_r26_r31,.-__save_r26_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r26_r31. */
+ .align 2
+ .globl __return_r26_r31
+ .type __return_r26_r31,@function
+__return_r26_r31:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r26
+ sld.w 16[ep],r31
+ addi 20,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r26
+ ld.w 16[sp],r31
+ addi 20,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r26_r31,.-__return_r26_r31
+#endif /* L_save_26c */
+
+#ifdef L_save_27c
+ .text
+ .align 2
+ .globl __save_r27_r31
+ .type __save_r27_r31,@function
+ /* Allocate space and save registers 27 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r27_r31,r10. */
+__save_r27_r31:
+#ifdef __EP__
+ mov ep,r1
+ addi -16,sp,sp
+ mov sp,ep
+ sst.w r29,0[ep]
+ sst.w r28,4[ep]
+ sst.w r27,8[ep]
+ sst.w r31,12[ep]
+ mov r1,ep
+#else
+ addi -16,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r27,8[sp]
+ st.w r31,12[sp]
+#endif
+ jmp [r10]
+ .size __save_r27_r31,.-__save_r27_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r27_r31. */
+ .align 2
+ .globl __return_r27_r31
+ .type __return_r27_r31,@function
+__return_r27_r31:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 0[ep],r29
+ sld.w 4[ep],r28
+ sld.w 8[ep],r27
+ sld.w 12[ep],r31
+ addi 16,sp,sp
+ mov r1,ep
+#else
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r27
+ ld.w 12[sp],r31
+ addi 16,sp,sp
+#endif
+ jmp [r31]
+ .size __return_r27_r31,.-__return_r27_r31
+#endif /* L_save_27c */
+
+#ifdef L_save_28c
+ .text
+ .align 2
+ .globl __save_r28_r31
+ .type __save_r28_r31,@function
+ /* Allocate space and save registers 28 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r28_r31,r10. */
+__save_r28_r31:
+ addi -12,sp,sp
+ st.w r29,0[sp]
+ st.w r28,4[sp]
+ st.w r31,8[sp]
+ jmp [r10]
+ .size __save_r28_r31,.-__save_r28_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r28_r31. */
+ .align 2
+ .globl __return_r28_r31
+ .type __return_r28_r31,@function
+__return_r28_r31:
+ ld.w 0[sp],r29
+ ld.w 4[sp],r28
+ ld.w 8[sp],r31
+ addi 12,sp,sp
+ jmp [r31]
+ .size __return_r28_r31,.-__return_r28_r31
+#endif /* L_save_28c */
+
+#ifdef L_save_29c
+ .text
+ .align 2
+ .globl __save_r29_r31
+ .type __save_r29_r31,@function
+ /* Allocate space and save registers 29 & 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r29_r31,r10. */
+__save_r29_r31:
+ addi -8,sp,sp
+ st.w r29,0[sp]
+ st.w r31,4[sp]
+ jmp [r10]
+ .size __save_r29_r31,.-__save_r29_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r29_r31. */
+ .align 2
+ .globl __return_r29_r31
+ .type __return_r29_r31,@function
+__return_r29_r31:
+ ld.w 0[sp],r29
+ ld.w 4[sp],r31
+ addi 8,sp,sp
+ jmp [r31]
+ .size __return_r29_r31,.-__return_r29_r31
+#endif /* L_save_29c */
+
+#ifdef L_save_31c
+ .text
+ .align 2
+ .globl __save_r31
+ .type __save_r31,@function
+ /* Allocate space and save register 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: jalr __save_r31,r10. */
+__save_r31:
+ addi -4,sp,sp
+ st.w r31,0[sp]
+ jmp [r10]
+ .size __save_r31,.-__save_r31
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: jr __return_r31. */
+ .align 2
+ .globl __return_r31
+ .type __return_r31,@function
+__return_r31:
+ ld.w 0[sp],r31
+ addi 4,sp,sp
+ jmp [r31]
+ .size __return_r31,.-__return_r31
+#endif /* L_save_31c */
+
+#ifdef L_save_interrupt
+ .text
+ .align 2
+ .globl __save_interrupt
+ .type __save_interrupt,@function
+ /* Save registers r1, r4 on stack and load up with expected values. */
+ /* Note, 20 bytes of stack have already been allocated. */
+ /* Called via: jalr __save_interrupt,r10. */
+__save_interrupt:
+ /* add -20,sp ; st.w r11,16[sp] ; st.w r10,12[sp] ; */
+ st.w ep,0[sp]
+ st.w gp,4[sp]
+ st.w r1,8[sp]
+ movhi hi(__ep),r0,ep
+ movea lo(__ep),ep,ep
+ movhi hi(__gp),r0,gp
+ movea lo(__gp),gp,gp
+ jmp [r10]
+ .size __save_interrupt,.-__save_interrupt
+
+ /* Restore saved registers, deallocate stack and return from the interrupt. */
+ /* Called via: jr __return_interrupt. */
+ .align 2
+ .globl __return_interrupt
+ .type __return_interrupt,@function
+__return_interrupt:
+ ld.w 0[sp],ep
+ ld.w 4[sp],gp
+ ld.w 8[sp],r1
+ ld.w 12[sp],r10
+ ld.w 16[sp],r11
+ addi 20,sp,sp
+ reti
+ .size __return_interrupt,.-__return_interrupt
+#endif /* L_save_interrupt */
+
+#ifdef L_save_all_interrupt
+ .text
+ .align 2
+ .globl __save_all_interrupt
+ .type __save_all_interrupt,@function
+ /* Save all registers except for those saved in __save_interrupt. */
+ /* Allocate enough stack for all of the registers & 16 bytes of space. */
+ /* Called via: jalr __save_all_interrupt,r10. */
+__save_all_interrupt:
+ addi -104,sp,sp
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sst.w r31,100[ep]
+ sst.w r2,96[ep]
+ sst.w gp,92[ep]
+ sst.w r6,88[ep]
+ sst.w r7,84[ep]
+ sst.w r8,80[ep]
+ sst.w r9,76[ep]
+ sst.w r11,72[ep]
+ sst.w r12,68[ep]
+ sst.w r13,64[ep]
+ sst.w r14,60[ep]
+ sst.w r15,56[ep]
+ sst.w r16,52[ep]
+ sst.w r17,48[ep]
+ sst.w r18,44[ep]
+ sst.w r19,40[ep]
+ sst.w r20,36[ep]
+ sst.w r21,32[ep]
+ sst.w r22,28[ep]
+ sst.w r23,24[ep]
+ sst.w r24,20[ep]
+ sst.w r25,16[ep]
+ sst.w r26,12[ep]
+ sst.w r27,8[ep]
+ sst.w r28,4[ep]
+ sst.w r29,0[ep]
+ mov r1,ep
+#else
+ st.w r31,100[sp]
+ st.w r2,96[sp]
+ st.w gp,92[sp]
+ st.w r6,88[sp]
+ st.w r7,84[sp]
+ st.w r8,80[sp]
+ st.w r9,76[sp]
+ st.w r11,72[sp]
+ st.w r12,68[sp]
+ st.w r13,64[sp]
+ st.w r14,60[sp]
+ st.w r15,56[sp]
+ st.w r16,52[sp]
+ st.w r17,48[sp]
+ st.w r18,44[sp]
+ st.w r19,40[sp]
+ st.w r20,36[sp]
+ st.w r21,32[sp]
+ st.w r22,28[sp]
+ st.w r23,24[sp]
+ st.w r24,20[sp]
+ st.w r25,16[sp]
+ st.w r26,12[sp]
+ st.w r27,8[sp]
+ st.w r28,4[sp]
+ st.w r29,0[sp]
+#endif
+ jmp [r10]
+ .size __save_all_interrupt,.-__save_all_interrupt
+
+ .globl __restore_all_interrupt
+ .type __restore_all_interrupt,@function
+ /* Restore all registers saved in __save_all_interrupt and
+ deallocate the stack space. */
+ /* Called via: jalr __restore_all_interrupt,r10. */
+__restore_all_interrupt:
+#ifdef __EP__
+ mov ep,r1
+ mov sp,ep
+ sld.w 100[ep],r31
+ sld.w 96[ep],r2
+ sld.w 92[ep],gp
+ sld.w 88[ep],r6
+ sld.w 84[ep],r7
+ sld.w 80[ep],r8
+ sld.w 76[ep],r9
+ sld.w 72[ep],r11
+ sld.w 68[ep],r12
+ sld.w 64[ep],r13
+ sld.w 60[ep],r14
+ sld.w 56[ep],r15
+ sld.w 52[ep],r16
+ sld.w 48[ep],r17
+ sld.w 44[ep],r18
+ sld.w 40[ep],r19
+ sld.w 36[ep],r20
+ sld.w 32[ep],r21
+ sld.w 28[ep],r22
+ sld.w 24[ep],r23
+ sld.w 20[ep],r24
+ sld.w 16[ep],r25
+ sld.w 12[ep],r26
+ sld.w 8[ep],r27
+ sld.w 4[ep],r28
+ sld.w 0[ep],r29
+ mov r1,ep
+#else
+ ld.w 100[sp],r31
+ ld.w 96[sp],r2
+ ld.w 92[sp],gp
+ ld.w 88[sp],r6
+ ld.w 84[sp],r7
+ ld.w 80[sp],r8
+ ld.w 76[sp],r9
+ ld.w 72[sp],r11
+ ld.w 68[sp],r12
+ ld.w 64[sp],r13
+ ld.w 60[sp],r14
+ ld.w 56[sp],r15
+ ld.w 52[sp],r16
+ ld.w 48[sp],r17
+ ld.w 44[sp],r18
+ ld.w 40[sp],r19
+ ld.w 36[sp],r20
+ ld.w 32[sp],r21
+ ld.w 28[sp],r22
+ ld.w 24[sp],r23
+ ld.w 20[sp],r24
+ ld.w 16[sp],r25
+ ld.w 12[sp],r26
+ ld.w 8[sp],r27
+ ld.w 4[sp],r28
+ ld.w 0[sp],r29
+#endif
+ addi 104,sp,sp
+ jmp [r10]
+ .size __restore_all_interrupt,.-__restore_all_interrupt
+#endif /* L_save_all_interrupt */
+
+#if defined(__v850e__) || defined(__v850e1__) || defined(__v850e2__) || defined(__v850e2v3__)
+#ifdef L_callt_save_r2_r29
+ /* Put these functions into the call table area. */
+ .call_table_text
+
+ /* Allocate space and save registers 2, 20 .. 29 on the stack. */
+ /* Called via: callt ctoff(__callt_save_r2_r29). */
+ .align 2
+.L_save_r2_r29:
+ add -4, sp
+ st.w r2, 0[sp]
+ prepare {r20 - r29}, 0
+ ctret
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: callt ctoff(__callt_return_r2_r29). */
+ .align 2
+.L_return_r2_r29:
+ dispose 0, {r20-r29}
+ ld.w 0[sp], r2
+ add 4, sp
+ jmp [r31]
+
+ /* Place the offsets of the start of these routines into the call table. */
+ .call_table_data
+
+ .global __callt_save_r2_r29
+ .type __callt_save_r2_r29,@function
+__callt_save_r2_r29: .short ctoff(.L_save_r2_r29)
+
+ .global __callt_return_r2_r29
+ .type __callt_return_r2_r29,@function
+__callt_return_r2_r29: .short ctoff(.L_return_r2_r29)
+
+#endif /* L_callt_save_r2_r29. */
+
+#ifdef L_callt_save_r2_r31
+ /* Put these functions into the call table area. */
+ .call_table_text
+
+ /* Allocate space and save registers 2 and 20 .. 29, 31 on the stack. */
+ /* Also allocate space for the argument save area. */
+ /* Called via: callt ctoff(__callt_save_r2_r31). */
+ .align 2
+.L_save_r2_r31:
+ add -4, sp
+ st.w r2, 0[sp]
+ prepare {r20 - r29, r31}, 0
+ ctret
+
+ /* Restore saved registers, deallocate stack and return to the user. */
+ /* Called via: callt ctoff(__callt_return_r2_r31). */
+ .align 2
+.L_return_r2_r31:
+ dispose 0, {r20 - r29, r31}
+ ld.w 0[sp], r2
+ addi 4, sp, sp
+ jmp [r31]
+
+ /* Place the offsets of the start of these routines into the call table. */
+ .call_table_data
+
+ .global __callt_save_r2_r31
+ .type __callt_save_r2_r31,@function
+__callt_save_r2_r31: .short ctoff(.L_save_r2_r31)
+
+ .global __callt_return_r2_r31
+ .type __callt_return_r2_r31,@function
+__callt_return_r2_r31: .short ctoff(.L_return_r2_r31)
+
+#endif /* L_callt_save_r2_r31 */
+
+#ifdef L_callt_save_interrupt
+ /* Put these functions into the call table area. */
+ .call_table_text
+
+ /* Save registers r1, ep, gp, r10 on stack and load up with expected values. */
+ /* Called via: callt ctoff(__callt_save_interrupt). */
+ .align 2
+.L_save_interrupt:
+ /* SP has already been moved before callt ctoff(_save_interrupt). */
+ /* R1,R10,R11,ctpc,ctpsw has alread been saved bofore callt ctoff(_save_interrupt). */
+ /* addi -28, sp, sp */
+ /* st.w r1, 24[sp] */
+ /* st.w r10, 12[sp] */
+ /* st.w r11, 16[sp] */
+ /* stsr ctpc, r10 */
+ /* st.w r10, 20[sp] */
+ /* stsr ctpsw, r10 */
+ /* st.w r10, 24[sp] */
+ st.w ep, 0[sp]
+ st.w gp, 4[sp]
+ st.w r1, 8[sp]
+ mov hilo(__ep),ep
+ mov hilo(__gp),gp
+ ctret
+
+ .call_table_text
+ /* Restore saved registers, deallocate stack and return from the interrupt. */
+ /* Called via: callt ctoff(__callt_restore_interrupt). */
+ .align 2
+ .globl __return_interrupt
+ .type __return_interrupt,@function
+.L_return_interrupt:
+ ld.w 24[sp], r1
+ ldsr r1, ctpsw
+ ld.w 20[sp], r1
+ ldsr r1, ctpc
+ ld.w 16[sp], r11
+ ld.w 12[sp], r10
+ ld.w 8[sp], r1
+ ld.w 4[sp], gp
+ ld.w 0[sp], ep
+ addi 28, sp, sp
+ reti
+
+ /* Place the offsets of the start of these routines into the call table. */
+ .call_table_data
+
+ .global __callt_save_interrupt
+ .type __callt_save_interrupt,@function
+__callt_save_interrupt: .short ctoff(.L_save_interrupt)
+
+ .global __callt_return_interrupt
+ .type __callt_return_interrupt,@function
+__callt_return_interrupt: .short ctoff(.L_return_interrupt)
+
+#endif /* L_callt_save_interrupt */
+
+#ifdef L_callt_save_all_interrupt
+ /* Put these functions into the call table area. */
+ .call_table_text
+
+ /* Save all registers except for those saved in __save_interrupt. */
+ /* Allocate enough stack for all of the registers & 16 bytes of space. */
+ /* Called via: callt ctoff(__callt_save_all_interrupt). */
+ .align 2
+.L_save_all_interrupt:
+ addi -60, sp, sp
+#ifdef __EP__
+ mov ep, r1
+ mov sp, ep
+ sst.w r2, 56[ep]
+ sst.w r5, 52[ep]
+ sst.w r6, 48[ep]
+ sst.w r7, 44[ep]
+ sst.w r8, 40[ep]
+ sst.w r9, 36[ep]
+ sst.w r11, 32[ep]
+ sst.w r12, 28[ep]
+ sst.w r13, 24[ep]
+ sst.w r14, 20[ep]
+ sst.w r15, 16[ep]
+ sst.w r16, 12[ep]
+ sst.w r17, 8[ep]
+ sst.w r18, 4[ep]
+ sst.w r19, 0[ep]
+ mov r1, ep
+#else
+ st.w r2, 56[sp]
+ st.w r5, 52[sp]
+ st.w r6, 48[sp]
+ st.w r7, 44[sp]
+ st.w r8, 40[sp]
+ st.w r9, 36[sp]
+ st.w r11, 32[sp]
+ st.w r12, 28[sp]
+ st.w r13, 24[sp]
+ st.w r14, 20[sp]
+ st.w r15, 16[sp]
+ st.w r16, 12[sp]
+ st.w r17, 8[sp]
+ st.w r18, 4[sp]
+ st.w r19, 0[sp]
+#endif
+ prepare {r20 - r29, r31}, 0
+ ctret
+
+ /* Restore all registers saved in __save_all_interrupt
+ deallocate the stack space. */
+ /* Called via: callt ctoff(__callt_restore_all_interrupt). */
+ .align 2
+.L_restore_all_interrupt:
+ dispose 0, {r20 - r29, r31}
+#ifdef __EP__
+ mov ep, r1
+ mov sp, ep
+ sld.w 0 [ep], r19
+ sld.w 4 [ep], r18
+ sld.w 8 [ep], r17
+ sld.w 12[ep], r16
+ sld.w 16[ep], r15
+ sld.w 20[ep], r14
+ sld.w 24[ep], r13
+ sld.w 28[ep], r12
+ sld.w 32[ep], r11
+ sld.w 36[ep], r9
+ sld.w 40[ep], r8
+ sld.w 44[ep], r7
+ sld.w 48[ep], r6
+ sld.w 52[ep], r5
+ sld.w 56[ep], r2
+ mov r1, ep
+#else
+ ld.w 0 [sp], r19
+ ld.w 4 [sp], r18
+ ld.w 8 [sp], r17
+ ld.w 12[sp], r16
+ ld.w 16[sp], r15
+ ld.w 20[sp], r14
+ ld.w 24[sp], r13
+ ld.w 28[sp], r12
+ ld.w 32[sp], r11
+ ld.w 36[sp], r9
+ ld.w 40[sp], r8
+ ld.w 44[sp], r7
+ ld.w 48[sp], r6
+ ld.w 52[sp], r5
+ ld.w 56[sp], r2
+#endif
+ addi 60, sp, sp
+ ctret
+
+ /* Place the offsets of the start of these routines into the call table. */
+ .call_table_data
+
+ .global __callt_save_all_interrupt
+ .type __callt_save_all_interrupt,@function
+__callt_save_all_interrupt: .short ctoff(.L_save_all_interrupt)
+
+ .global __callt_restore_all_interrupt
+ .type __callt_restore_all_interrupt,@function
+__callt_restore_all_interrupt: .short ctoff(.L_restore_all_interrupt)
+
+#endif /* L_callt_save_all_interrupt */
+
+
+#define MAKE_CALLT_FUNCS( START ) \
+ .call_table_text ;\
+ .align 2 ;\
+ /* Allocate space and save registers START .. r29 on the stack. */ ;\
+ /* Called via: callt ctoff(__callt_save_START_r29). */ ;\
+.L_save_##START##_r29: ;\
+ prepare { START - r29 }, 0 ;\
+ ctret ;\
+ ;\
+ /* Restore saved registers, deallocate stack and return. */ ;\
+ /* Called via: callt ctoff(__return_START_r29). */ ;\
+ .align 2 ;\
+.L_return_##START##_r29: ;\
+ dispose 0, { START - r29 }, r31 ;\
+ ;\
+ /* Place the offsets of the start of these funcs into the call table. */;\
+ .call_table_data ;\
+ ;\
+ .global __callt_save_##START##_r29 ;\
+ .type __callt_save_##START##_r29,@function ;\
+__callt_save_##START##_r29: .short ctoff(.L_save_##START##_r29 ) ;\
+ ;\
+ .global __callt_return_##START##_r29 ;\
+ .type __callt_return_##START##_r29,@function ;\
+__callt_return_##START##_r29: .short ctoff(.L_return_##START##_r29 )
+
+
+#define MAKE_CALLT_CFUNCS( START ) \
+ .call_table_text ;\
+ .align 2 ;\
+ /* Allocate space and save registers START .. r31 on the stack. */ ;\
+ /* Called via: callt ctoff(__callt_save_START_r31c). */ ;\
+.L_save_##START##_r31c: ;\
+ prepare { START - r29, r31}, 0 ;\
+ ctret ;\
+ ;\
+ /* Restore saved registers, deallocate stack and return. */ ;\
+ /* Called via: callt ctoff(__return_START_r31c). */ ;\
+ .align 2 ;\
+.L_return_##START##_r31c: ;\
+ dispose 0, { START - r29, r31}, r31 ;\
+ ;\
+ /* Place the offsets of the start of these funcs into the call table. */;\
+ .call_table_data ;\
+ ;\
+ .global __callt_save_##START##_r31c ;\
+ .type __callt_save_##START##_r31c,@function ;\
+__callt_save_##START##_r31c: .short ctoff(.L_save_##START##_r31c ) ;\
+ ;\
+ .global __callt_return_##START##_r31c ;\
+ .type __callt_return_##START##_r31c,@function ;\
+__callt_return_##START##_r31c: .short ctoff(.L_return_##START##_r31c )
+
+
+#ifdef L_callt_save_20
+ MAKE_CALLT_FUNCS (r20)
+#endif
+#ifdef L_callt_save_21
+ MAKE_CALLT_FUNCS (r21)
+#endif
+#ifdef L_callt_save_22
+ MAKE_CALLT_FUNCS (r22)
+#endif
+#ifdef L_callt_save_23
+ MAKE_CALLT_FUNCS (r23)
+#endif
+#ifdef L_callt_save_24
+ MAKE_CALLT_FUNCS (r24)
+#endif
+#ifdef L_callt_save_25
+ MAKE_CALLT_FUNCS (r25)
+#endif
+#ifdef L_callt_save_26
+ MAKE_CALLT_FUNCS (r26)
+#endif
+#ifdef L_callt_save_27
+ MAKE_CALLT_FUNCS (r27)
+#endif
+#ifdef L_callt_save_28
+ MAKE_CALLT_FUNCS (r28)
+#endif
+#ifdef L_callt_save_29
+ MAKE_CALLT_FUNCS (r29)
+#endif
+
+#ifdef L_callt_save_20c
+ MAKE_CALLT_CFUNCS (r20)
+#endif
+#ifdef L_callt_save_21c
+ MAKE_CALLT_CFUNCS (r21)
+#endif
+#ifdef L_callt_save_22c
+ MAKE_CALLT_CFUNCS (r22)
+#endif
+#ifdef L_callt_save_23c
+ MAKE_CALLT_CFUNCS (r23)
+#endif
+#ifdef L_callt_save_24c
+ MAKE_CALLT_CFUNCS (r24)
+#endif
+#ifdef L_callt_save_25c
+ MAKE_CALLT_CFUNCS (r25)
+#endif
+#ifdef L_callt_save_26c
+ MAKE_CALLT_CFUNCS (r26)
+#endif
+#ifdef L_callt_save_27c
+ MAKE_CALLT_CFUNCS (r27)
+#endif
+#ifdef L_callt_save_28c
+ MAKE_CALLT_CFUNCS (r28)
+#endif
+#ifdef L_callt_save_29c
+ MAKE_CALLT_CFUNCS (r29)
+#endif
+
+
+#ifdef L_callt_save_31c
+ .call_table_text
+ .align 2
+ /* Allocate space and save register r31 on the stack. */
+ /* Called via: callt ctoff(__callt_save_r31c). */
+.L_callt_save_r31c:
+ prepare {r31}, 0
+ ctret
+
+ /* Restore saved registers, deallocate stack and return. */
+ /* Called via: callt ctoff(__return_r31c). */
+ .align 2
+.L_callt_return_r31c:
+ dispose 0, {r31}, r31
+
+ /* Place the offsets of the start of these funcs into the call table. */
+ .call_table_data
+
+ .global __callt_save_r31c
+ .type __callt_save_r31c,@function
+__callt_save_r31c: .short ctoff(.L_callt_save_r31c)
+
+ .global __callt_return_r31c
+ .type __callt_return_r31c,@function
+__callt_return_r31c: .short ctoff(.L_callt_return_r31c)
+#endif
+
+#endif /* __v850e__ */
+
+/* libgcc2 routines for NEC V850. */
+/* Double Integer Arithmetical Operation. */
+
+#ifdef L_negdi2
+ .text
+ .global ___negdi2
+ .type ___negdi2, @function
+___negdi2:
+ not r6, r10
+ add 1, r10
+ setf l, r6
+ not r7, r11
+ add r6, r11
+ jmp [lp]
+
+ .size ___negdi2,.-___negdi2
+#endif
+
+#ifdef L_cmpdi2
+ .text
+ .global ___cmpdi2
+ .type ___cmpdi2,@function
+___cmpdi2:
+ # Signed comparison bitween each high word.
+ cmp r9, r7
+ be .L_cmpdi_cmp_low
+ setf ge, r10
+ setf gt, r6
+ add r6, r10
+ jmp [lp]
+.L_cmpdi_cmp_low:
+ # Unsigned comparigon bitween each low word.
+ cmp r8, r6
+ setf nl, r10
+ setf h, r6
+ add r6, r10
+ jmp [lp]
+ .size ___cmpdi2, . - ___cmpdi2
+#endif
+
+#ifdef L_ucmpdi2
+ .text
+ .global ___ucmpdi2
+ .type ___ucmpdi2,@function
+___ucmpdi2:
+ cmp r9, r7 # Check if each high word are same.
+ bne .L_ucmpdi_check_psw
+ cmp r8, r6 # Compare the word.
+.L_ucmpdi_check_psw:
+ setf nl, r10 #
+ setf h, r6 #
+ add r6, r10 # Add the result of comparison NL and comparison H.
+ jmp [lp]
+ .size ___ucmpdi2, . - ___ucmpdi2
+#endif
+
+#ifdef L_muldi3
+ .text
+ .global ___muldi3
+ .type ___muldi3,@function
+___muldi3:
+#ifdef __v850__
+ jarl __save_r26_r31, r10
+ addi 16, sp, sp
+ mov r6, r28
+ shr 15, r28
+ movea lo(32767), r0, r14
+ and r14, r28
+ mov r8, r10
+ shr 15, r10
+ and r14, r10
+ mov r6, r19
+ shr 30, r19
+ mov r7, r12
+ shl 2, r12
+ or r12, r19
+ and r14, r19
+ mov r8, r13
+ shr 30, r13
+ mov r9, r12
+ shl 2, r12
+ or r12, r13
+ and r14, r13
+ mov r7, r11
+ shr 13, r11
+ and r14, r11
+ mov r9, r31
+ shr 13, r31
+ and r14, r31
+ mov r7, r29
+ shr 28, r29
+ and r14, r29
+ mov r9, r12
+ shr 28, r12
+ and r14, r12
+ and r14, r6
+ and r14, r8
+ mov r6, r14
+ mulh r8, r14
+ mov r6, r16
+ mulh r10, r16
+ mov r6, r18
+ mulh r13, r18
+ mov r6, r15
+ mulh r31, r15
+ mulh r12, r6
+ mov r28, r17
+ mulh r10, r17
+ add -16, sp
+ mov r28, r12
+ mulh r8, r12
+ add r17, r18
+ mov r28, r17
+ mulh r31, r17
+ add r12, r16
+ mov r28, r12
+ mulh r13, r12
+ add r17, r6
+ mov r19, r17
+ add r12, r15
+ mov r19, r12
+ mulh r8, r12
+ mulh r10, r17
+ add r12, r18
+ mov r19, r12
+ mulh r13, r12
+ add r17, r15
+ mov r11, r13
+ mulh r8, r13
+ add r12, r6
+ mov r11, r12
+ mulh r10, r12
+ add r13, r15
+ mulh r29, r8
+ add r12, r6
+ mov r16, r13
+ shl 15, r13
+ add r14, r13
+ mov r18, r12
+ shl 30, r12
+ mov r13, r26
+ add r12, r26
+ shr 15, r14
+ movhi hi(131071), r0, r12
+ movea lo(131071), r12, r13
+ and r13, r14
+ mov r16, r12
+ and r13, r12
+ add r12, r14
+ mov r18, r12
+ shl 15, r12
+ and r13, r12
+ add r12, r14
+ shr 17, r14
+ shr 17, r16
+ add r14, r16
+ shl 13, r15
+ shr 2, r18
+ add r18, r15
+ add r15, r16
+ mov r16, r27
+ add r8, r6
+ shl 28, r6
+ add r6, r27
+ mov r26, r10
+ mov r27, r11
+ jr __return_r26_r31
+#else /* defined(__v850e__) */
+ /* (Ahi << 32 + Alo) * (Bhi << 32 + Blo) */
+ /* r7 r6 r9 r8 */
+ mov r8, r10
+ mulu r7, r8, r0 /* Ahi * Blo */
+ mulu r6, r9, r0 /* Alo * Bhi */
+ mulu r6, r10, r11 /* Alo * Blo */
+ add r8, r11
+ add r9, r11
+ jmp [r31]
+#endif /* defined(__v850e__) */
+ .size ___muldi3, . - ___muldi3
+#endif
+
diff --git a/libgcc/config/v850/t-v850 b/libgcc/config/v850/t-v850
new file mode 100644
index 00000000000..b61703ace09
--- /dev/null
+++ b/libgcc/config/v850/t-v850
@@ -0,0 +1,60 @@
+LIB1ASMSRC = v850/lib1funcs.S
+LIB1ASMFUNCS = _mulsi3 \
+ _divsi3 \
+ _udivsi3 \
+ _modsi3 \
+ _umodsi3 \
+ _save_2 \
+ _save_20 \
+ _save_21 \
+ _save_22 \
+ _save_23 \
+ _save_24 \
+ _save_25 \
+ _save_26 \
+ _save_27 \
+ _save_28 \
+ _save_29 \
+ _save_2c \
+ _save_20c \
+ _save_21c \
+ _save_22c \
+ _save_23c \
+ _save_24c \
+ _save_25c \
+ _save_26c \
+ _save_27c \
+ _save_28c \
+ _save_29c \
+ _save_31c \
+ _save_interrupt \
+ _save_all_interrupt \
+ _callt_save_20 \
+ _callt_save_21 \
+ _callt_save_22 \
+ _callt_save_23 \
+ _callt_save_24 \
+ _callt_save_25 \
+ _callt_save_26 \
+ _callt_save_27 \
+ _callt_save_28 \
+ _callt_save_29 \
+ _callt_save_20c \
+ _callt_save_21c \
+ _callt_save_22c \
+ _callt_save_23c \
+ _callt_save_24c \
+ _callt_save_25c \
+ _callt_save_26c \
+ _callt_save_27c \
+ _callt_save_28c \
+ _callt_save_29c \
+ _callt_save_31c \
+ _callt_save_interrupt \
+ _callt_save_all_interrupt \
+ _callt_save_r2_r29 \
+ _callt_save_r2_r31 \
+ _negdi2 \
+ _cmpdi2 \
+ _ucmpdi2 \
+ _muldi3
diff --git a/libgcc/config/vax/lib1funcs.S b/libgcc/config/vax/lib1funcs.S
new file mode 100644
index 00000000000..1d57b56dad9
--- /dev/null
+++ b/libgcc/config/vax/lib1funcs.S
@@ -0,0 +1,92 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of GCC.
+ Contributed by Maciej W. Rozycki <macro@linux-mips.org>.
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 3, or (at your option) any
+ later version.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef L_udivsi3
+ .text
+ .globl __udivsi3
+ .type __udivsi3, @function
+__udivsi3:
+ .word 0
+ movl 8(%ap), %r1
+ blss 0f /* Check bit #31 of divisor. */
+ movl 4(%ap), %r2
+ blss 1f /* Check bit #31 of dividend. */
+
+ /* Both zero, do a standard division. */
+
+ divl3 %r1, %r2, %r0
+ ret
+
+ /* MSB of divisor set, only 1 or 0 may result. */
+0:
+ decl %r1
+ clrl %r0
+ cmpl %r1, 4(%ap)
+ adwc $0, %r0
+ ret
+
+ /* MSB of dividend set, do an extended division. */
+1:
+ clrl %r3
+ ediv %r1, %r2, %r0, %r3
+ ret
+ .size __udivsi3, . - __udivsi3
+ .previous
+#endif
+
+#ifdef L_umodsi3
+ .text
+ .globl __umodsi3
+ .type __umodsi3, @function
+__umodsi3:
+ .word 0
+ movl 8(%ap), %r1
+ blss 0f /* Check bit #31 of divisor. */
+ movl 4(%ap), %r2
+ blss 1f /* Check bit #31 of dividend. */
+
+ /* Both zero, do a standard division. */
+
+ divl3 %r1, %r2, %r0
+ mull2 %r0, %r1
+ subl3 %r1, %r2, %r0
+ ret
+
+ /* MSB of divisor set, subtract the divisor at most once. */
+0:
+ movl 4(%ap), %r2
+ clrl %r0
+ cmpl %r2, %r1
+ sbwc $0, %r0
+ bicl2 %r0, %r1
+ subl3 %r1, %r2, %r0
+ ret
+
+ /* MSB of dividend set, do an extended division. */
+1:
+ clrl %r3
+ ediv %r1, %r2, %r3, %r0
+ ret
+ .size __umodsi3, . - __umodsi3
+ .previous
+#endif
diff --git a/libgcc/config/vax/t-linux b/libgcc/config/vax/t-linux
new file mode 100644
index 00000000000..17929c8717c
--- /dev/null
+++ b/libgcc/config/vax/t-linux
@@ -0,0 +1,2 @@
+LIB1ASMSRC = vax/lib1funcs.S
+LIB1ASMFUNCS = _udivsi3 _umodsi3
diff --git a/libgcc/config/xtensa/ieee754-df.S b/libgcc/config/xtensa/ieee754-df.S
new file mode 100644
index 00000000000..9b46889bdc2
--- /dev/null
+++ b/libgcc/config/xtensa/ieee754-df.S
@@ -0,0 +1,2388 @@
+/* IEEE-754 double-precision functions for Xtensa
+ Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/* Warning! The branch displacements for some Xtensa branch instructions
+ are quite small, and this code has been carefully laid out to keep
+ branch targets in range. If you change anything, be sure to check that
+ the assembler is not relaxing anything to branch over a jump. */
+
+#ifdef L_negdf2
+
+ .align 4
+ .global __negdf2
+ .type __negdf2, @function
+__negdf2:
+ leaf_entry sp, 16
+ movi a4, 0x80000000
+ xor xh, xh, a4
+ leaf_return
+
+#endif /* L_negdf2 */
+
+#ifdef L_addsubdf3
+
+ /* Addition */
+__adddf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Ladd_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall yh, a6, 1f
+ /* If x is a NaN, return it. Otherwise, return y. */
+ slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, .Ladd_ynan_or_inf
+1: leaf_return
+
+.Ladd_ynan_or_inf:
+ /* Return y. */
+ mov xh, yh
+ mov xl, yl
+ leaf_return
+
+.Ladd_opposite_signs:
+ /* Operand signs differ. Do a subtraction. */
+ slli a7, a6, 11
+ xor yh, yh, a7
+ j .Lsub_same_sign
+
+ .align 4
+ .global __adddf3
+ .type __adddf3, @function
+__adddf3:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, xh, yh
+ bltz a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:
+ /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
+ ball xh, a6, .Ladd_xnan_or_inf
+ ball yh, a6, .Ladd_ynan_or_inf
+
+ /* Compare the exponents. The smaller operand will be shifted
+ right by the exponent difference and added to the larger
+ one. */
+ extui a7, xh, 20, 12
+ extui a8, yh, 20, 12
+ bltu a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone yh, a6, .Ladd_yexpzero
+
+ /* Replace yh sign/exponent with 0x001. */
+ or yh, yh, a6
+ slli yh, yh, 11
+ srli yh, yh, 11
+
+.Ladd_yexpdiff:
+ /* Compute the exponent difference. Optimize for difference < 32. */
+ sub a10, a7, a8
+ bgeui a10, 32, .Ladd_bigshifty
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out of yl are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, yl, a9
+ src yl, yh, yl
+ srl yh, yh
+
+.Ladd_addy:
+ /* Do the 64-bit addition. */
+ add xl, xl, yl
+ add xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, 1
+1:
+ /* Check if the add overflowed into the exponent. */
+ extui a10, xh, 20, 12
+ beq a10, a7, .Ladd_round
+ mov a8, a7
+ j .Ladd_carry
+
+.Ladd_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0", and increment the apparent exponent
+ because subnormals behave as if they had the minimum (nonzero)
+ exponent. Test for the case when both exponents are zero. */
+ slli yh, yh, 12
+ srli yh, yh, 12
+ bnone xh, a6, .Ladd_bothexpzero
+ addi a8, a8, 1
+ j .Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+ /* Both exponents are zero. Handle this as a special case. There
+ is no need to shift or round, and the normal code for handling
+ a carry into the exponent field will not work because it
+ assumes there is an implicit "1.0" that needs to be added. */
+ add xl, xl, yl
+ add xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, 1
+1: leaf_return
+
+.Ladd_bigshifty:
+ /* Exponent difference > 64 -- just return the bigger value. */
+ bgeui a10, 64, 1b
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out are saved in a9 for rounding the result. */
+ ssr a10
+ sll a11, yl /* lost bits shifted out of yl */
+ src a9, yh, yl
+ srl yl, yh
+ movi yh, 0
+ beqz a11, .Ladd_addy
+ or a9, a9, a10 /* any positive, nonzero value will work */
+ j .Ladd_addy
+
+.Ladd_xexpzero:
+ /* Same as "yexpzero" except skip handling the case when both
+ exponents are zero. */
+ slli xh, xh, 12
+ srli xh, xh, 12
+ addi a7, a7, 1
+ j .Ladd_xexpdiff
+
+.Ladd_shiftx:
+ /* Same thing as the "shifty" code, but with x and y swapped. Also,
+ because the exponent difference is always nonzero in this version,
+ the shift sequence can use SLL and skip loading a constant zero. */
+ bnone xh, a6, .Ladd_xexpzero
+
+ or xh, xh, a6
+ slli xh, xh, 11
+ srli xh, xh, 11
+
+.Ladd_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Ladd_bigshiftx
+
+ ssr a10
+ sll a9, xl
+ src xl, xh, xl
+ srl xh, xh
+
+.Ladd_addx:
+ add xl, xl, yl
+ add xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, 1
+1:
+ /* Check if the add overflowed into the exponent. */
+ extui a10, xh, 20, 12
+ bne a10, a8, .Ladd_carry
+
+.Ladd_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi xl, xl, 1
+ beqz xl, .Ladd_roundcarry
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_bigshiftx:
+ /* Mostly the same thing as "bigshifty".... */
+ bgeui a10, 64, .Ladd_returny
+
+ ssr a10
+ sll a11, xl
+ src a9, xh, xl
+ srl xl, xh
+ movi xh, 0
+ beqz a11, .Ladd_addx
+ or a9, a9, a10
+ j .Ladd_addx
+
+.Ladd_returny:
+ mov xh, yh
+ mov xl, yl
+ leaf_return
+
+.Ladd_carry:
+ /* The addition has overflowed into the exponent field, so the
+ value needs to be renormalized. The mantissa of the result
+ can be recovered by subtracting the original exponent and
+ adding 0x100000 (which is the explicit "1.0" for the
+ mantissa of the non-shifted operand -- the "1.0" for the
+ shifted operand was already added). The mantissa can then
+ be shifted right by one bit. The explicit "1.0" of the
+ shifted mantissa then needs to be replaced by the exponent,
+ incremented by one to account for the normalizing shift.
+ It is faster to combine these operations: do the shift first
+ and combine the additions and subtractions. If x is the
+ original exponent, the result is:
+ shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
+ or:
+ shifted mantissa + ((x + 1) << 19)
+ Note that the exponent is incremented here by leaving the
+ explicit "1.0" of the mantissa in the exponent field. */
+
+ /* Shift xh/xl right by one bit. Save the lsb of xl. */
+ mov a10, xl
+ ssai 1
+ src xl, xh, xl
+ srl xh, xh
+
+ /* See explanation above. The original exponent is in a8. */
+ addi a8, a8, 1
+ slli a8, a8, 19
+ add xh, xh, a8
+
+ /* Return an Infinity if the exponent overflowed. */
+ ball xh, a6, .Ladd_infinity
+
+ /* Same thing as the "round" code except the msb of the leftover
+ fraction is bit 0 of a10, with the rest of the fraction in a9. */
+ bbci.l a10, 0, 1f
+ addi xl, xl, 1
+ beqz xl, .Ladd_roundcarry
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_infinity:
+ /* Clear the mantissa. */
+ movi xl, 0
+ srli xh, xh, 20
+ slli xh, xh, 20
+
+ /* The sign bit may have been lost in a carry-out. Put it back. */
+ slli a8, a8, 1
+ or xh, xh, a8
+ leaf_return
+
+.Ladd_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ leaf_return
+
+.Ladd_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow to the exponent is OK. */
+ leaf_return
+
+
+ /* Subtraction */
+__subdf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Lsub_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall yh, a6, 1f
+ /* Both x and y are either NaN or Inf, so the result is NaN. */
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+1: leaf_return
+
+.Lsub_ynan_or_inf:
+ /* Negate y and return it. */
+ slli a7, a6, 11
+ xor xh, yh, a7
+ mov xl, yl
+ leaf_return
+
+.Lsub_opposite_signs:
+ /* Operand signs differ. Do an addition. */
+ slli a7, a6, 11
+ xor yh, yh, a7
+ j .Ladd_same_sign
+
+ .align 4
+ .global __subdf3
+ .type __subdf3, @function
+__subdf3:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, xh, yh
+ bltz a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:
+ /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
+ ball xh, a6, .Lsub_xnan_or_inf
+ ball yh, a6, .Lsub_ynan_or_inf
+
+ /* Compare the operands. In contrast to addition, the entire
+ value matters here. */
+ extui a7, xh, 20, 11
+ extui a8, yh, 20, 11
+ bltu xh, yh, .Lsub_xsmaller
+ beq xh, yh, .Lsub_compare_low
+
+.Lsub_ysmaller:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone yh, a6, .Lsub_yexpzero
+
+ /* Replace yh sign/exponent with 0x001. */
+ or yh, yh, a6
+ slli yh, yh, 11
+ srli yh, yh, 11
+
+.Lsub_yexpdiff:
+ /* Compute the exponent difference. Optimize for difference < 32. */
+ sub a10, a7, a8
+ bgeui a10, 32, .Lsub_bigshifty
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out of yl are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, yl, a9
+ src yl, yh, yl
+ srl yh, yh
+
+.Lsub_suby:
+ /* Do the 64-bit subtraction. */
+ sub xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, -1
+1: sub xl, xl, yl
+
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from xh/xl. */
+ neg a9, a9
+ beqz a9, 1f
+ addi a5, xh, -1
+ moveqz xh, a5, xl
+ addi xl, xl, -1
+1:
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, xh, 20, 11
+ beq a10, a7, .Lsub_round
+ j .Lsub_borrow
+
+.Lsub_compare_low:
+ /* The high words are equal. Compare the low words. */
+ bltu xl, yl, .Lsub_xsmaller
+ bltu yl, xl, .Lsub_ysmaller
+ /* The operands are equal. Return 0.0. */
+ movi xh, 0
+ movi xl, 0
+1: leaf_return
+
+.Lsub_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0". Unless x is also a subnormal, increment
+ y's apparent exponent because subnormals behave as if they had
+ the minimum (nonzero) exponent. */
+ slli yh, yh, 12
+ srli yh, yh, 12
+ bnone xh, a6, .Lsub_yexpdiff
+ addi a8, a8, 1
+ j .Lsub_yexpdiff
+
+.Lsub_bigshifty:
+ /* Exponent difference > 64 -- just return the bigger value. */
+ bgeui a10, 64, 1b
+
+ /* Shift yh/yl right by the exponent difference. Any bits that are
+ shifted out are saved in a9 for rounding the result. */
+ ssr a10
+ sll a11, yl /* lost bits shifted out of yl */
+ src a9, yh, yl
+ srl yl, yh
+ movi yh, 0
+ beqz a11, .Lsub_suby
+ or a9, a9, a10 /* any positive, nonzero value will work */
+ j .Lsub_suby
+
+.Lsub_xsmaller:
+ /* Same thing as the "ysmaller" code, but with x and y swapped and
+ with y negated. */
+ bnone xh, a6, .Lsub_xexpzero
+
+ or xh, xh, a6
+ slli xh, xh, 11
+ srli xh, xh, 11
+
+.Lsub_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Lsub_bigshiftx
+
+ ssr a10
+ movi a9, 0
+ src a9, xl, a9
+ src xl, xh, xl
+ srl xh, xh
+
+ /* Negate y. */
+ slli a11, a6, 11
+ xor yh, yh, a11
+
+.Lsub_subx:
+ sub xl, yl, xl
+ sub xh, yh, xh
+ bgeu yl, xl, 1f
+ addi xh, xh, -1
+1:
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from xh/xl. */
+ neg a9, a9
+ beqz a9, 1f
+ addi a5, xh, -1
+ moveqz xh, a5, xl
+ addi xl, xl, -1
+1:
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, xh, 20, 11
+ bne a10, a8, .Lsub_borrow
+
+.Lsub_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi xl, xl, 1
+ beqz xl, .Lsub_roundcarry
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Lsub_exactlyhalf
+1: leaf_return
+
+.Lsub_xexpzero:
+ /* Same as "yexpzero". */
+ slli xh, xh, 12
+ srli xh, xh, 12
+ bnone yh, a6, .Lsub_xexpdiff
+ addi a7, a7, 1
+ j .Lsub_xexpdiff
+
+.Lsub_bigshiftx:
+ /* Mostly the same thing as "bigshifty", but with the sign bit of the
+ shifted value set so that the subsequent subtraction flips the
+ sign of y. */
+ bgeui a10, 64, .Lsub_returny
+
+ ssr a10
+ sll a11, xl
+ src a9, xh, xl
+ srl xl, xh
+ slli xh, a6, 11 /* set sign bit of xh */
+ beqz a11, .Lsub_subx
+ or a9, a9, a10
+ j .Lsub_subx
+
+.Lsub_returny:
+ /* Negate and return y. */
+ slli a7, a6, 11
+ xor xh, yh, a7
+ mov xl, yl
+ leaf_return
+
+.Lsub_borrow:
+ /* The subtraction has underflowed into the exponent field, so the
+ value needs to be renormalized. Shift the mantissa left as
+ needed to remove any leading zeros and adjust the exponent
+ accordingly. If the exponent is not large enough to remove
+ all the leading zeros, the result will be a subnormal value. */
+
+ slli a8, xh, 12
+ beqz a8, .Lsub_xhzero
+ do_nsau a6, a8, a7, a11
+ srli a8, a8, 12
+ bge a6, a10, .Lsub_subnormal
+ addi a6, a6, 1
+
+.Lsub_shift_lt32:
+ /* Shift the mantissa (a8/xl/a9) left by a6. */
+ ssl a6
+ src a8, a8, xl
+ src xl, xl, a9
+ sll a9, a9
+
+ /* Combine the shifted mantissa with the sign and exponent,
+ decrementing the exponent by a6. (The exponent has already
+ been decremented by one due to the borrow from the subtraction,
+ but adding the mantissa will increment the exponent by one.) */
+ srli xh, xh, 20
+ sub xh, xh, a6
+ slli xh, xh, 20
+ add xh, xh, a8
+ j .Lsub_round
+
+.Lsub_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ leaf_return
+
+.Lsub_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow to the exponent is OK. */
+ leaf_return
+
+.Lsub_xhzero:
+ /* When normalizing the result, all the mantissa bits in the high
+ word are zero. Shift by "20 + (leading zero count of xl) + 1". */
+ do_nsau a6, xl, a7, a11
+ addi a6, a6, 21
+ blt a10, a6, .Lsub_subnormal
+
+.Lsub_normalize_shift:
+ bltui a6, 32, .Lsub_shift_lt32
+
+ ssl a6
+ src a8, xl, a9
+ sll xl, a9
+ movi a9, 0
+
+ srli xh, xh, 20
+ sub xh, xh, a6
+ slli xh, xh, 20
+ add xh, xh, a8
+ j .Lsub_round
+
+.Lsub_subnormal:
+ /* The exponent is too small to shift away all the leading zeros.
+ Set a6 to the current exponent (which has already been
+ decremented by the borrow) so that the exponent of the result
+ will be zero. Do not add 1 to a6 in this case, because: (1)
+ adding the mantissa will not increment the exponent, so there is
+ no need to subtract anything extra from the exponent to
+ compensate, and (2) the effective exponent of a subnormal is 1
+ not 0 so the shift amount must be 1 smaller than normal. */
+ mov a6, a10
+ j .Lsub_normalize_shift
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_muldf3
+
+ /* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__muldf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Lmul_xexpzero:
+ /* Clear the sign bit of x. */
+ slli xh, xh, 1
+ srli xh, xh, 1
+
+ /* If x is zero, return zero. */
+ or a10, xh, xl
+ beqz a10, .Lmul_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ beqz xh, .Lmul_xh_zero
+ do_nsau a10, xh, a11, a12
+ addi a10, a10, -11
+ ssl a10
+ src xh, xh, xl
+ sll xl, xl
+ movi a8, 1
+ sub a8, a8, a10
+ j .Lmul_xnormalized
+.Lmul_xh_zero:
+ do_nsau a10, xl, a11, a12
+ addi a10, a10, -11
+ movi a8, -31
+ sub a8, a8, a10
+ ssl a10
+ bltz a10, .Lmul_xl_srl
+ sll xh, xl
+ movi xl, 0
+ j .Lmul_xnormalized
+.Lmul_xl_srl:
+ srl xh, xl
+ sll xl, xl
+ j .Lmul_xnormalized
+
+.Lmul_yexpzero:
+ /* Clear the sign bit of y. */
+ slli yh, yh, 1
+ srli yh, yh, 1
+
+ /* If y is zero, return zero. */
+ or a10, yh, yl
+ beqz a10, .Lmul_return_zero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ beqz yh, .Lmul_yh_zero
+ do_nsau a10, yh, a11, a12
+ addi a10, a10, -11
+ ssl a10
+ src yh, yh, yl
+ sll yl, yl
+ movi a9, 1
+ sub a9, a9, a10
+ j .Lmul_ynormalized
+.Lmul_yh_zero:
+ do_nsau a10, yl, a11, a12
+ addi a10, a10, -11
+ movi a9, -31
+ sub a9, a9, a10
+ ssl a10
+ bltz a10, .Lmul_yl_srl
+ sll yh, yl
+ movi yl, 0
+ j .Lmul_ynormalized
+.Lmul_yl_srl:
+ srl yh, yl
+ sll yl, yl
+ j .Lmul_ynormalized
+
+.Lmul_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ j .Lmul_done
+
+.Lmul_xnan_or_inf:
+ /* If y is zero, return NaN. */
+ bnez yl, 1f
+ slli a8, yh, 1
+ bnez a8, 1f
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+ j .Lmul_done
+1:
+ /* If y is NaN, return y. */
+ bnall yh, a6, .Lmul_returnx
+ slli a8, yh, 12
+ or a8, a8, yl
+ beqz a8, .Lmul_returnx
+
+.Lmul_returny:
+ mov xh, yh
+ mov xl, yl
+
+.Lmul_returnx:
+ /* Set the sign bit and return. */
+ extui a7, a7, 31, 1
+ slli xh, xh, 1
+ ssai 1
+ src xh, a7, xh
+ j .Lmul_done
+
+.Lmul_ynan_or_inf:
+ /* If x is zero, return NaN. */
+ bnez xl, .Lmul_returny
+ slli a8, xh, 1
+ bnez a8, .Lmul_returny
+ movi a7, 0x80000 /* make it a quiet NaN */
+ or xh, yh, a7
+ j .Lmul_done
+
+ .align 4
+ .global __muldf3
+ .type __muldf3, @function
+__muldf3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 64
+#else
+ leaf_entry sp, 32
+#endif
+ movi a6, 0x7ff00000
+
+ /* Get the sign of the result. */
+ xor a7, xh, yh
+
+ /* Check for NaN and infinity. */
+ ball xh, a6, .Lmul_xnan_or_inf
+ ball yh, a6, .Lmul_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, xh, 20, 11
+ extui a9, yh, 20, 11
+
+ beqz a8, .Lmul_xexpzero
+.Lmul_xnormalized:
+ beqz a9, .Lmul_yexpzero
+.Lmul_ynormalized:
+
+ /* Add the exponents. */
+ add a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0x1fffff
+ or xh, xh, a6
+ and xh, xh, a10
+ or yh, yh, a6
+ and yh, yh, a10
+
+ /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
+ The least-significant word of the result is thrown away except
+ that if it is nonzero, the lsb of a6 is set to 1. */
+#if XCHAL_HAVE_MUL32_HIGH
+
+ /* Compute a6 with any carry-outs in a10. */
+ movi a10, 0
+ mull a6, xl, yh
+ mull a11, xh, yl
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a10, a10, 1
+1:
+ muluh a11, xl, yl
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a10, a10, 1
+1:
+ /* If the low word of the result is nonzero, set the lsb of a6. */
+ mull a11, xl, yl
+ beqz a11, 1f
+ movi a9, 1
+ or a6, a6, a9
+1:
+ /* Compute xl with any carry-outs in a9. */
+ movi a9, 0
+ mull a11, xh, yh
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a9, a9, 1
+1:
+ muluh a11, xh, yl
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a9, a9, 1
+1:
+ muluh xl, xl, yh
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ /* Compute xh. */
+ muluh xh, xh, yh
+ add xh, xh, a9
+
+#else /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
+ products. These partial products are:
+
+ 0 xll * yll
+
+ 1 xll * ylh
+ 2 xlh * yll
+
+ 3 xll * yhl
+ 4 xlh * ylh
+ 5 xhl * yll
+
+ 6 xll * yhh
+ 7 xlh * yhl
+ 8 xhl * ylh
+ 9 xhh * yll
+
+ 10 xlh * yhh
+ 11 xhl * yhl
+ 12 xhh * ylh
+
+ 13 xhl * yhh
+ 14 xhh * yhl
+
+ 15 xhh * yhh
+
+ where the input chunks are (hh, hl, lh, ll). If using the Mul16
+ or Mul32 multiplier options, these input chunks must be stored in
+ separate registers. For Mac16, the UMUL.AA.* opcodes can specify
+ that the inputs come from either half of the registers, so there
+ is no need to shift them out ahead of time. If there is no
+ multiply hardware, the 16-bit chunks can be extracted when setting
+ up the arguments to the separate multiply function. */
+
+ /* Save a7 since it is needed to hold a temporary value. */
+ s32i a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Calling a separate multiply function will clobber a0 and requires
+ use of a8 as a temporary, so save those values now. (The function
+ uses a custom ABI so nothing else needs to be saved.) */
+ s32i a0, sp, 0
+ s32i a8, sp, 8
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define xlh a12
+#define ylh a13
+#define xhh a14
+#define yhh a15
+
+ /* Get the high halves of the inputs into registers. */
+ srli xlh, xl, 16
+ srli ylh, yl, 16
+ srli xhh, xh, 16
+ srli yhh, yh, 16
+
+#define xll xl
+#define yll yl
+#define xhl xh
+#define yhl yh
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui xl, xl, 0, 16
+ extui xh, xh, 0, 16
+ extui yl, yl, 0, 16
+ extui yh, yh, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a10 with carry-out in a9. */
+ do_mul(a10, xl, l, yl, h) /* pp 1 */
+ do_mul(a11, xl, h, yl, l) /* pp 2 */
+ movi a9, 0
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Initialize a6 with a9/a10 shifted into position. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a6, a9, a10
+
+ /* Compute the low word into a10. */
+ do_mul(a11, xl, l, yl, l) /* pp 0 */
+ sll a10, a10
+ add a10, a10, a11
+ bgeu a10, a11, 1f
+ addi a6, a6, 1
+1:
+ /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
+ This is good enough to determine the low half of a6, so that any
+ nonzero bits from the low word of the result can be collapsed
+ into a6, freeing up a register. */
+ movi a9, 0
+ do_mul(a11, xl, l, yh, l) /* pp 3 */
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a11, xl, h, yl, h) /* pp 4 */
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a11, xh, l, yl, l) /* pp 5 */
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Collapse any nonzero bits from the low word into a6. */
+ beqz a10, 1f
+ movi a11, 1
+ or a6, a6, a11
+1:
+ /* Add pp6-9 into a11 with carry-outs in a10. */
+ do_mul(a7, xl, l, yh, h) /* pp 6 */
+ do_mul(a11, xh, h, yl, l) /* pp 9 */
+ movi a10, 0
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ do_mul(a7, xl, h, yh, l) /* pp 7 */
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ do_mul(a7, xh, l, yl, h) /* pp 8 */
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ /* Shift a10/a11 into position, and add low half of a11 to a6. */
+ src a10, a10, a11
+ add a10, a10, a9
+ sll a11, a11
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a10, a10, 1
+1:
+ /* Add pp10-12 into xl with carry-outs in a9. */
+ movi a9, 0
+ do_mul(xl, xl, h, yh, h) /* pp 10 */
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a10, xh, l, yh, l) /* pp 11 */
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ do_mul(a10, xh, h, yl, h) /* pp 12 */
+ add xl, xl, a10
+ bgeu xl, a10, 1f
+ addi a9, a9, 1
+1:
+ /* Add pp13-14 into a11 with carry-outs in a10. */
+ do_mul(a11, xh, l, yh, h) /* pp 13 */
+ do_mul(a7, xh, h, yh, l) /* pp 14 */
+ movi a10, 0
+ add a11, a11, a7
+ bgeu a11, a7, 1f
+ addi a10, a10, 1
+1:
+ /* Shift a10/a11 into position, and add low half of a11 to a6. */
+ src a10, a10, a11
+ add a10, a10, a9
+ sll a11, a11
+ add xl, xl, a11
+ bgeu xl, a11, 1f
+ addi a10, a10, 1
+1:
+ /* Compute xh. */
+ do_mul(xh, xh, h, yh, h) /* pp 15 */
+ add xh, xh, a10
+
+ /* Restore values saved on the stack during the multiplication. */
+ l32i a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ l32i a0, sp, 0
+ l32i a8, sp, 8
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Shift left by 12 bits, unless there was a carry-out from the
+ multiply, in which case, shift by 11 bits and increment the
+ exponent. Note: It is convenient to use the constant 0x3ff
+ instead of 0x400 when removing the extra exponent bias (so that
+ it is easy to construct 0x7fe for the overflow check). Reverse
+ the logic here to decrement the exponent sum by one unless there
+ was a carry-out. */
+ movi a4, 11
+ srli a5, xh, 21 - 12
+ bnez a5, 1f
+ addi a4, a4, 1
+ addi a8, a8, -1
+1: ssl a4
+ src xh, xh, xl
+ src xl, xl, a6
+ sll a6, a6
+
+ /* Subtract the extra bias from the exponent sum (plus one to account
+ for the explicit "1.0" of the mantissa that will be added to the
+ exponent in the final result). */
+ movi a4, 0x3ff
+ sub a8, a8, a4
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..7fd are OK here. */
+ slli a4, a4, 1 /* 0x7fe */
+ bgeu a8, a4, .Lmul_overflow
+
+.Lmul_round:
+ /* Round. */
+ bgez a6, .Lmul_rounded
+ addi xl, xl, 1
+ beqz xl, .Lmul_roundcarry
+ slli a6, a6, 1
+ beqz a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 20
+ add xh, xh, a8
+
+.Lmul_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or xh, xh, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+.Lmul_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ j .Lmul_rounded
+
+.Lmul_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow is OK -- it will be added to the exponent. */
+ j .Lmul_rounded
+
+.Lmul_overflow:
+ bltz a8, .Lmul_underflow
+ /* Return +/- Infinity. */
+ addi a8, a4, 1 /* 0x7ff */
+ slli xh, a8, 20
+ movi xl, 0
+ j .Lmul_addsign
+
+.Lmul_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ mov a9, a6
+ ssr a8
+ bgeui a8, 32, .Lmul_bigshift
+
+ /* Shift xh/xl right. Any bits that are shifted out of xl are saved
+ in a6 (combined with the shifted-out bits currently in a6) for
+ rounding the result. */
+ sll a6, xl
+ src xl, xh, xl
+ srl xh, xh
+ j 1f
+
+.Lmul_bigshift:
+ bgeui a8, 64, .Lmul_flush_to_zero
+ sll a10, xl /* lost bits shifted out of xl */
+ src a6, xh, xl
+ srl xl, xh
+ movi xh, 0
+ or a9, a9, a10
+
+ /* Set the exponent to zero. */
+1: movi a8, 0
+
+ /* Pack any nonzero bits shifted out into a6. */
+ beqz a9, .Lmul_round
+ movi a9, 1
+ or a6, a6, a9
+ j .Lmul_round
+
+.Lmul_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ j .Lmul_done
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_muldf3 */
+
+#ifdef L_divdf3
+
+ /* Division */
+__divdf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Ldiv_yexpzero:
+ /* Clear the sign bit of y. */
+ slli yh, yh, 1
+ srli yh, yh, 1
+
+ /* Check for division by zero. */
+ or a10, yh, yl
+ beqz a10, .Ldiv_yzero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ beqz yh, .Ldiv_yh_zero
+ do_nsau a10, yh, a11, a9
+ addi a10, a10, -11
+ ssl a10
+ src yh, yh, yl
+ sll yl, yl
+ movi a9, 1
+ sub a9, a9, a10
+ j .Ldiv_ynormalized
+.Ldiv_yh_zero:
+ do_nsau a10, yl, a11, a9
+ addi a10, a10, -11
+ movi a9, -31
+ sub a9, a9, a10
+ ssl a10
+ bltz a10, .Ldiv_yl_srl
+ sll yh, yl
+ movi yl, 0
+ j .Ldiv_ynormalized
+.Ldiv_yl_srl:
+ srl yh, yl
+ sll yl, yl
+ j .Ldiv_ynormalized
+
+.Ldiv_yzero:
+ /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
+ slli xh, xh, 1
+ srli xh, xh, 1
+ or xl, xl, xh
+ srli xh, a7, 31
+ slli xh, xh, 31
+ or xh, xh, a6
+ bnez xl, 1f
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+1: movi xl, 0
+ leaf_return
+
+.Ldiv_xexpzero:
+ /* Clear the sign bit of x. */
+ slli xh, xh, 1
+ srli xh, xh, 1
+
+ /* If x is zero, return zero. */
+ or a10, xh, xl
+ beqz a10, .Ldiv_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ beqz xh, .Ldiv_xh_zero
+ do_nsau a10, xh, a11, a8
+ addi a10, a10, -11
+ ssl a10
+ src xh, xh, xl
+ sll xl, xl
+ movi a8, 1
+ sub a8, a8, a10
+ j .Ldiv_xnormalized
+.Ldiv_xh_zero:
+ do_nsau a10, xl, a11, a8
+ addi a10, a10, -11
+ movi a8, -31
+ sub a8, a8, a10
+ ssl a10
+ bltz a10, .Ldiv_xl_srl
+ sll xh, xl
+ movi xl, 0
+ j .Ldiv_xnormalized
+.Ldiv_xl_srl:
+ srl xh, xl
+ sll xl, xl
+ j .Ldiv_xnormalized
+
+.Ldiv_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ leaf_return
+
+.Ldiv_xnan_or_inf:
+ /* Set the sign bit of the result. */
+ srli a7, yh, 31
+ slli a7, a7, 31
+ xor xh, xh, a7
+ /* If y is NaN or Inf, return NaN. */
+ bnall yh, a6, 1f
+ movi a4, 0x80000 /* make it a quiet NaN */
+ or xh, xh, a4
+1: leaf_return
+
+.Ldiv_ynan_or_inf:
+ /* If y is Infinity, return zero. */
+ slli a8, yh, 12
+ or a8, a8, yl
+ beqz a8, .Ldiv_return_zero
+ /* y is NaN; return it. */
+ mov xh, yh
+ mov xl, yl
+ leaf_return
+
+.Ldiv_highequal1:
+ bltu xl, yl, 2f
+ j 3f
+
+ .align 4
+ .global __divdf3
+ .type __divdf3, @function
+__divdf3:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+
+ /* Get the sign of the result. */
+ xor a7, xh, yh
+
+ /* Check for NaN and infinity. */
+ ball xh, a6, .Ldiv_xnan_or_inf
+ ball yh, a6, .Ldiv_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, xh, 20, 11
+ extui a9, yh, 20, 11
+
+ beqz a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:
+ beqz a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:
+
+ /* Subtract the exponents. */
+ sub a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0x1fffff
+ or xh, xh, a6
+ and xh, xh, a10
+ or yh, yh, a6
+ and yh, yh, a10
+
+ /* Set SAR for left shift by one. */
+ ssai (32 - 1)
+
+ /* The first digit of the mantissa division must be a one.
+ Shift x (and adjust the exponent) as needed to make this true. */
+ bltu yh, xh, 3f
+ beq yh, xh, .Ldiv_highequal1
+2: src xh, xh, xl
+ sll xl, xl
+ addi a8, a8, -1
+3:
+ /* Do the first subtraction and shift. */
+ sub xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, -1
+1: sub xl, xl, yl
+ src xh, xh, xl
+ sll xl, xl
+
+ /* Put the quotient into a10/a11. */
+ movi a10, 0
+ movi a11, 1
+
+ /* Divide one bit at a time for 52 bits. */
+ movi a9, 52
+#if XCHAL_HAVE_LOOPS
+ loop a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+ /* Shift the quotient << 1. */
+ src a10, a10, a11
+ sll a11, a11
+
+ /* Is this digit a 0 or 1? */
+ bltu xh, yh, 3f
+ beq xh, yh, .Ldiv_highequal2
+
+ /* Output a 1 and subtract. */
+2: addi a11, a11, 1
+ sub xh, xh, yh
+ bgeu xl, yl, 1f
+ addi xh, xh, -1
+1: sub xl, xl, yl
+
+ /* Shift the dividend << 1. */
+3: src xh, xh, xl
+ sll xl, xl
+
+#if !XCHAL_HAVE_LOOPS
+ addi a9, a9, -1
+ bnez a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+ /* Add the exponent bias (less one to account for the explicit "1.0"
+ of the mantissa that will be added to the exponent in the final
+ result). */
+ movi a9, 0x3fe
+ add a8, a8, a9
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..7fd are OK here. */
+ addmi a9, a9, 0x400 /* 0x7fe */
+ bgeu a8, a9, .Ldiv_overflow
+
+.Ldiv_round:
+ /* Round. The remainder (<< 1) is in xh/xl. */
+ bltu xh, yh, .Ldiv_rounded
+ beq xh, yh, .Ldiv_highequal3
+.Ldiv_roundup:
+ addi a11, a11, 1
+ beqz a11, .Ldiv_roundcarry
+
+.Ldiv_rounded:
+ mov xl, a11
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 20
+ add xh, a10, a8
+
+.Ldiv_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or xh, xh, a7
+ leaf_return
+
+.Ldiv_highequal2:
+ bgeu xl, yl, 2b
+ j 3b
+
+.Ldiv_highequal3:
+ bltu xl, yl, .Ldiv_rounded
+ bne xl, yl, .Ldiv_roundup
+
+ /* Remainder is exactly half the divisor. Round even. */
+ addi a11, a11, 1
+ beqz a11, .Ldiv_roundcarry
+ srli a11, a11, 1
+ slli a11, a11, 1
+ j .Ldiv_rounded
+
+.Ldiv_overflow:
+ bltz a8, .Ldiv_underflow
+ /* Return +/- Infinity. */
+ addi a8, a9, 1 /* 0x7ff */
+ slli xh, a8, 20
+ movi xl, 0
+ j .Ldiv_addsign
+
+.Ldiv_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ ssr a8
+ bgeui a8, 32, .Ldiv_bigshift
+
+ /* Shift a10/a11 right. Any bits that are shifted out of a11 are
+ saved in a6 for rounding the result. */
+ sll a6, a11
+ src a11, a10, a11
+ srl a10, a10
+ j 1f
+
+.Ldiv_bigshift:
+ bgeui a8, 64, .Ldiv_flush_to_zero
+ sll a9, a11 /* lost bits shifted out of a11 */
+ src a6, a10, a11
+ srl a11, a10
+ movi a10, 0
+ or xl, xl, a9
+
+ /* Set the exponent to zero. */
+1: movi a8, 0
+
+ /* Pack any nonzero remainder (in xh/xl) into a6. */
+ or xh, xh, xl
+ beqz xh, 1f
+ movi a9, 1
+ or a6, a6, a9
+
+ /* Round a10/a11 based on the bits shifted out into a6. */
+1: bgez a6, .Ldiv_rounded
+ addi a11, a11, 1
+ beqz a11, .Ldiv_roundcarry
+ slli a6, a6, 1
+ bnez a6, .Ldiv_rounded
+ srli a11, a11, 1
+ slli a11, a11, 1
+ j .Ldiv_rounded
+
+.Ldiv_roundcarry:
+ /* a11 is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi a10, a10, 1
+ /* Overflow to the exponent field is OK. */
+ j .Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli xh, a7, 31
+ slli xh, xh, 31
+ movi xl, 0
+ leaf_return
+
+#endif /* L_divdf3 */
+
+#ifdef L_cmpdf2
+
+ /* Equal and Not Equal */
+
+ .align 4
+ .global __eqdf2
+ .global __nedf2
+ .set __nedf2, __eqdf2
+ .type __eqdf2, @function
+__eqdf2:
+ leaf_entry sp, 16
+ bne xl, yl, 2f
+ bne xh, yh, 4f
+
+ /* The values are equal but NaN != NaN. Check the exponent. */
+ movi a6, 0x7ff00000
+ ball xh, a6, 3f
+
+ /* Equal. */
+ movi a2, 0
+ leaf_return
+
+ /* Not equal. */
+2: movi a2, 1
+ leaf_return
+
+ /* Check if the mantissas are nonzero. */
+3: slli a7, xh, 12
+ or a7, a7, xl
+ j 5f
+
+ /* Check if x and y are zero with different signs. */
+4: or a7, xh, yh
+ slli a7, a7, 1
+ or a7, a7, xl /* xl == yl here */
+
+ /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+ or x when exponent(x) = 0x7ff and x == y. */
+5: movi a2, 0
+ movi a3, 1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than */
+
+ .align 4
+ .global __gtdf2
+ .type __gtdf2, @function
+__gtdf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Lle_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+
+ /* Less Than or Equal */
+
+ .align 4
+ .global __ledf2
+ .type __ledf2, @function
+__ledf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Lle_cmp
+ movi a2, 1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+.Lle_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, xh, yh
+ bltz a7, .Lle_diff_signs
+
+ /* Check if x is negative. */
+ bltz xh, .Lle_xneg
+
+ /* Check if x <= y. */
+ bltu xh, yh, 4f
+ bne xh, yh, 5f
+ bltu yl, xl, 5f
+4: movi a2, 0
+ leaf_return
+
+.Lle_xneg:
+ /* Check if y <= x. */
+ bltu yh, xh, 4b
+ bne yh, xh, 5f
+ bgeu xl, yl, 4b
+5: movi a2, 1
+ leaf_return
+
+.Lle_diff_signs:
+ bltz xh, 4b
+
+ /* Check if both x and y are zero. */
+ or a7, xh, yh
+ slli a7, a7, 1
+ or a7, a7, xl
+ or a7, a7, yl
+ movi a2, 1
+ movi a3, 0
+ moveqz a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than or Equal */
+
+ .align 4
+ .global __gedf2
+ .type __gedf2, @function
+__gedf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Llt_cmp
+ movi a2, -1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, -1
+ leaf_return
+
+
+ /* Less Than */
+
+ .align 4
+ .global __ltdf2
+ .type __ltdf2, @function
+__ltdf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 2f
+1: bnall yh, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, .Llt_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+.Llt_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, xh, yh
+ bltz a7, .Llt_diff_signs
+
+ /* Check if x is negative. */
+ bltz xh, .Llt_xneg
+
+ /* Check if x < y. */
+ bltu xh, yh, 4f
+ bne xh, yh, 5f
+ bgeu xl, yl, 5f
+4: movi a2, -1
+ leaf_return
+
+.Llt_xneg:
+ /* Check if y < x. */
+ bltu yh, xh, 4b
+ bne yh, xh, 5f
+ bltu yl, xl, 4b
+5: movi a2, 0
+ leaf_return
+
+.Llt_diff_signs:
+ bgez xh, 5b
+
+ /* Check if both x and y are nonzero. */
+ or a7, xh, yh
+ slli a7, a7, 1
+ or a7, a7, xl
+ or a7, a7, yl
+ movi a2, 0
+ movi a3, -1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Unordered */
+
+ .align 4
+ .global __unorddf2
+ .type __unorddf2, @function
+__unorddf2:
+ leaf_entry sp, 16
+ movi a6, 0x7ff00000
+ ball xh, a6, 3f
+1: ball yh, a6, 4f
+2: movi a2, 0
+ leaf_return
+
+3: slli a7, xh, 12
+ or a7, a7, xl
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+4: slli a7, yh, 12
+ or a7, a7, yl
+ beqz a7, 2b
+ movi a2, 1
+ leaf_return
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_fixdfsi
+
+ .align 4
+ .global __fixdfsi
+ .type __fixdfsi, @function
+__fixdfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixdfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 19, 10 /* 0x3fe */
+ sub a4, a4, a5
+ bgei a4, 32, .Lfixdfsi_maxint
+ blti a4, 1, .Lfixdfsi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src a5, a7, xl
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixdfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixdfsi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi xh, 0
+
+.Lfixdfsi_maxint:
+ slli a4, a6, 11 /* 0x80000000 */
+ addi a5, a4, -1 /* 0x7fffffff */
+ movgez a4, a5, xh
+ mov a2, a4
+ leaf_return
+
+.Lfixdfsi_zero:
+ movi a2, 0
+ leaf_return
+
+#endif /* L_fixdfsi */
+
+#ifdef L_fixdfdi
+
+ .align 4
+ .global __fixdfdi
+ .type __fixdfdi, @function
+__fixdfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixdfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 19, 10 /* 0x3fe */
+ sub a4, a4, a5
+ bgei a4, 64, .Lfixdfdi_maxint
+ blti a4, 1, .Lfixdfdi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src xh, a7, xl
+ sll xl, xl
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixdfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixdfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixdfdi_smallshift:
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixdfdi_shifted
+
+.Lfixdfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixdfdi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi xh, 0
+
+.Lfixdfdi_maxint:
+ slli a7, a6, 11 /* 0x80000000 */
+ bgez xh, 1f
+ mov xh, a7
+ movi xl, 0
+ leaf_return
+
+1: addi xh, a7, -1 /* 0x7fffffff */
+ movi xl, -1
+ leaf_return
+
+.Lfixdfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+#endif /* L_fixdfdi */
+
+#ifdef L_fixunsdfsi
+
+ .align 4
+ .global __fixunsdfsi
+ .type __fixunsdfsi, @function
+__fixunsdfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixunsdfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 20, 10 /* 0x3ff */
+ sub a4, a4, a5
+ bgei a4, 32, .Lfixunsdfsi_maxint
+ bltz a4, .Lfixunsdfsi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src a5, a7, xl
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 32, .Lfixunsdfsi_bigexp
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixunsdfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixunsdfsi_maxint
+
+ /* Translate NaN to 0xffffffff. */
+ movi a2, -1
+ leaf_return
+
+.Lfixunsdfsi_maxint:
+ slli a4, a6, 11 /* 0x80000000 */
+ movi a5, -1 /* 0xffffffff */
+ movgez a4, a5, xh
+ mov a2, a4
+ leaf_return
+
+.Lfixunsdfsi_zero:
+ movi a2, 0
+ leaf_return
+
+.Lfixunsdfsi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz xh, 1f
+ mov a2, a5 /* no shift needed */
+ leaf_return
+
+ /* Return 0x80000000 if negative. */
+1: slli a2, a6, 11
+ leaf_return
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_fixunsdfdi
+
+ .align 4
+ .global __fixunsdfdi
+ .type __fixunsdfdi, @function
+__fixunsdfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7ff00000
+ ball xh, a6, .Lfixunsdfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
+ extui a4, xh, 20, 11
+ extui a5, a6, 20, 10 /* 0x3ff */
+ sub a4, a4, a5
+ bgei a4, 64, .Lfixunsdfdi_maxint
+ bltz a4, .Lfixunsdfdi_zero
+
+ /* Add explicit "1.0" and shift << 11. */
+ or a7, xh, a6
+ ssai (32 - 11)
+ src xh, a7, xl
+ sll xl, xl
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 64, .Lfixunsdfdi_bigexp
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixunsdfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixunsdfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixunsdfdi_smallshift:
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixunsdfdi_shifted
+
+.Lfixunsdfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, xh, 12
+ or a4, a4, xl
+ beqz a4, .Lfixunsdfdi_maxint
+
+ /* Translate NaN to 0xffffffff.... */
+1: movi xh, -1
+ movi xl, -1
+ leaf_return
+
+.Lfixunsdfdi_maxint:
+ bgez xh, 1b
+2: slli xh, a6, 11 /* 0x80000000 */
+ movi xl, 0
+ leaf_return
+
+.Lfixunsdfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+.Lfixunsdfdi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a7, 2b
+ leaf_return /* no shift needed */
+
+#endif /* L_fixunsdfdi */
+
+#ifdef L_floatsidf
+
+ .align 4
+ .global __floatunsidf
+ .type __floatunsidf, @function
+__floatunsidf:
+ leaf_entry sp, 16
+ beqz a2, .Lfloatsidf_return_zero
+
+ /* Set the sign to zero and jump to the floatsidf code. */
+ movi a7, 0
+ j .Lfloatsidf_normalize
+
+ .align 4
+ .global __floatsidf
+ .type __floatsidf, @function
+__floatsidf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ beqz a2, .Lfloatsidf_return_zero
+
+ /* Save the sign. */
+ extui a7, a2, 31, 1
+
+ /* Get the absolute value. */
+#if XCHAL_HAVE_ABS
+ abs a2, a2
+#else
+ neg a4, a2
+ movltz a2, a4, a2
+#endif
+
+.Lfloatsidf_normalize:
+ /* Normalize with the first 1 bit in the msb. */
+ do_nsau a4, a2, a5, a6
+ ssl a4
+ sll a5, a2
+
+ /* Shift the mantissa into position. */
+ srli xh, a5, 11
+ slli xl, a5, (32 - 11)
+
+ /* Set the exponent. */
+ movi a5, 0x41d /* 0x3fe + 31 */
+ sub a5, a5, a4
+ slli a5, a5, 20
+ add xh, xh, a5
+
+ /* Add the sign and return. */
+ slli a7, a7, 31
+ or xh, xh, a7
+ leaf_return
+
+.Lfloatsidf_return_zero:
+ movi a3, 0
+ leaf_return
+
+#endif /* L_floatsidf */
+
+#ifdef L_floatdidf
+
+ .align 4
+ .global __floatundidf
+ .type __floatundidf, @function
+__floatundidf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Set the sign to zero and jump to the floatdidf code. */
+ movi a7, 0
+ j .Lfloatdidf_normalize
+
+ .align 4
+ .global __floatdidf
+ .type __floatdidf, @function
+__floatdidf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Save the sign. */
+ extui a7, xh, 31, 1
+
+ /* Get the absolute value. */
+ bgez xh, .Lfloatdidf_normalize
+ neg xl, xl
+ neg xh, xh
+ beqz xl, .Lfloatdidf_normalize
+ addi xh, xh, -1
+
+.Lfloatdidf_normalize:
+ /* Normalize with the first 1 bit in the msb of xh. */
+ beqz xh, .Lfloatdidf_bigshift
+ do_nsau a4, xh, a5, a6
+ ssl a4
+ src xh, xh, xl
+ sll xl, xl
+
+.Lfloatdidf_shifted:
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ ssai 11
+ sll a6, xl
+ src xl, xh, xl
+ srl xh, xh
+
+ /* Set the exponent. */
+ movi a5, 0x43d /* 0x3fe + 63 */
+ sub a5, a5, a4
+ slli a5, a5, 20
+ add xh, xh, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or xh, xh, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, 2f
+ addi xl, xl, 1
+ beqz xl, .Lfloatdidf_roundcarry
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatdidf_exactlyhalf
+2: leaf_return
+
+.Lfloatdidf_bigshift:
+ /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
+ do_nsau a4, xl, a5, a6
+ ssl a4
+ sll xh, xl
+ movi xl, 0
+ addi a4, a4, 32
+ j .Lfloatdidf_shifted
+
+.Lfloatdidf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli xl, xl, 1
+ slli xl, xl, 1
+ leaf_return
+
+.Lfloatdidf_roundcarry:
+ /* xl is always zero when the rounding increment overflows, so
+ there's no need to round it to an even value. */
+ addi xh, xh, 1
+ /* Overflow to the exponent is OK. */
+ leaf_return
+
+#endif /* L_floatdidf */
+
+#ifdef L_truncdfsf2
+
+ .align 4
+ .global __truncdfsf2
+ .type __truncdfsf2, @function
+__truncdfsf2:
+ leaf_entry sp, 16
+
+ /* Adjust the exponent bias. */
+ movi a4, (0x3ff - 0x7f) << 20
+ sub a5, xh, a4
+
+ /* Check for underflow. */
+ xor a6, xh, a5
+ bltz a6, .Ltrunc_underflow
+ extui a6, a5, 20, 11
+ beqz a6, .Ltrunc_underflow
+
+ /* Check for overflow. */
+ movi a4, 255
+ bge a6, a4, .Ltrunc_overflow
+
+ /* Shift a5/xl << 3 into a5/a4. */
+ ssai (32 - 3)
+ src a5, a5, xl
+ sll a4, xl
+
+.Ltrunc_addsign:
+ /* Add the sign bit. */
+ extui a6, xh, 31, 1
+ slli a6, a6, 31
+ or a2, a6, a5
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a4, 1f
+ addi a2, a2, 1
+ /* Overflow to the exponent is OK. The answer will be correct. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a4, a4, 1
+ beqz a4, .Ltrunc_exactlyhalf
+1: leaf_return
+
+.Ltrunc_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+.Ltrunc_overflow:
+ /* Check if exponent == 0x7ff. */
+ movi a4, 0x7ff00000
+ bnall xh, a4, 1f
+
+ /* Check if mantissa is nonzero. */
+ slli a5, xh, 12
+ or a5, a5, xl
+ beqz a5, 1f
+
+ /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
+ srli a4, a4, 1
+
+1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
+ /* Add the sign bit. */
+ extui a6, xh, 31, 1
+ ssai 1
+ src a2, a6, a4
+ leaf_return
+
+.Ltrunc_underflow:
+ /* Find shift count for a subnormal. Flush to zero if >= 32. */
+ extui a6, xh, 20, 11
+ movi a5, 0x3ff - 0x7f
+ sub a6, a5, a6
+ addi a6, a6, 1
+ bgeui a6, 32, 1f
+
+ /* Replace the exponent with an explicit "1.0". */
+ slli a5, a5, 13 /* 0x700000 */
+ or a5, a5, xh
+ slli a5, a5, 11
+ srli a5, a5, 11
+
+ /* Shift the mantissa left by 3 bits (into a5/a4). */
+ ssai (32 - 3)
+ src a5, a5, xl
+ sll a4, xl
+
+ /* Shift right by a6. */
+ ssr a6
+ sll a7, a4
+ src a4, a5, a4
+ srl a5, a5
+ beqz a7, .Ltrunc_addsign
+ or a4, a4, a6 /* any positive, nonzero value will work */
+ j .Ltrunc_addsign
+
+ /* Return +/- zero. */
+1: extui a2, xh, 31, 1
+ slli a2, a2, 31
+ leaf_return
+
+#endif /* L_truncdfsf2 */
+
+#ifdef L_extendsfdf2
+
+ .align 4
+ .global __extendsfdf2
+ .type __extendsfdf2, @function
+__extendsfdf2:
+ leaf_entry sp, 16
+
+ /* Save the sign bit and then shift it off. */
+ extui a5, a2, 31, 1
+ slli a5, a5, 31
+ slli a4, a2, 1
+
+ /* Extract and check the exponent. */
+ extui a6, a2, 23, 8
+ beqz a6, .Lextend_expzero
+ addi a6, a6, 1
+ beqi a6, 256, .Lextend_nan_or_inf
+
+ /* Shift >> 3 into a4/xl. */
+ srli a4, a4, 4
+ slli xl, a2, (32 - 3)
+
+ /* Adjust the exponent bias. */
+ movi a6, (0x3ff - 0x7f) << 20
+ add a4, a4, a6
+
+ /* Add the sign bit. */
+ or xh, a4, a5
+ leaf_return
+
+.Lextend_nan_or_inf:
+ movi a4, 0x7ff00000
+
+ /* Check for NaN. */
+ slli a7, a2, 9
+ beqz a7, 1f
+
+ slli a6, a6, 11 /* 0x80000 */
+ or a4, a4, a6
+
+ /* Add the sign and return. */
+1: or xh, a4, a5
+ movi xl, 0
+ leaf_return
+
+.Lextend_expzero:
+ beqz a4, 1b
+
+ /* Normalize it to have 8 zero bits before the first 1 bit. */
+ do_nsau a7, a4, a2, a3
+ addi a7, a7, -8
+ ssl a7
+ sll a4, a4
+
+ /* Shift >> 3 into a4/xl. */
+ slli xl, a4, (32 - 3)
+ srli a4, a4, 3
+
+ /* Set the exponent. */
+ movi a6, 0x3fe - 0x7f
+ sub a6, a6, a7
+ slli a6, a6, 20
+ add a4, a4, a6
+
+ /* Add the sign and return. */
+ or xh, a4, a5
+ leaf_return
+
+#endif /* L_extendsfdf2 */
+
+
diff --git a/libgcc/config/xtensa/ieee754-sf.S b/libgcc/config/xtensa/ieee754-sf.S
new file mode 100644
index 00000000000..d75be0e5ae5
--- /dev/null
+++ b/libgcc/config/xtensa/ieee754-sf.S
@@ -0,0 +1,1757 @@
+/* IEEE-754 single-precision functions for Xtensa
+ Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/* Warning! The branch displacements for some Xtensa branch instructions
+ are quite small, and this code has been carefully laid out to keep
+ branch targets in range. If you change anything, be sure to check that
+ the assembler is not relaxing anything to branch over a jump. */
+
+#ifdef L_negsf2
+
+ .align 4
+ .global __negsf2
+ .type __negsf2, @function
+__negsf2:
+ leaf_entry sp, 16
+ movi a4, 0x80000000
+ xor a2, a2, a4
+ leaf_return
+
+#endif /* L_negsf2 */
+
+#ifdef L_addsubsf3
+
+ /* Addition */
+__addsf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Ladd_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall a3, a6, 1f
+ /* If x is a NaN, return it. Otherwise, return y. */
+ slli a7, a2, 9
+ beqz a7, .Ladd_ynan_or_inf
+1: leaf_return
+
+.Ladd_ynan_or_inf:
+ /* Return y. */
+ mov a2, a3
+ leaf_return
+
+.Ladd_opposite_signs:
+ /* Operand signs differ. Do a subtraction. */
+ slli a7, a6, 8
+ xor a3, a3, a7
+ j .Lsub_same_sign
+
+ .align 4
+ .global __addsf3
+ .type __addsf3, @function
+__addsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, a2, a3
+ bltz a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:
+ /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
+ ball a2, a6, .Ladd_xnan_or_inf
+ ball a3, a6, .Ladd_ynan_or_inf
+
+ /* Compare the exponents. The smaller operand will be shifted
+ right by the exponent difference and added to the larger
+ one. */
+ extui a7, a2, 23, 9
+ extui a8, a3, 23, 9
+ bltu a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone a3, a6, .Ladd_yexpzero
+
+ /* Replace y sign/exponent with 0x008. */
+ or a3, a3, a6
+ slli a3, a3, 8
+ srli a3, a3, 8
+
+.Ladd_yexpdiff:
+ /* Compute the exponent difference. */
+ sub a10, a7, a8
+
+ /* Exponent difference > 32 -- just return the bigger value. */
+ bgeui a10, 32, 1f
+
+ /* Shift y right by the exponent difference. Any bits that are
+ shifted out of y are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, a3, a9
+ srl a3, a3
+
+ /* Do the addition. */
+ add a2, a2, a3
+
+ /* Check if the add overflowed into the exponent. */
+ extui a10, a2, 23, 9
+ beq a10, a7, .Ladd_round
+ mov a8, a7
+ j .Ladd_carry
+
+.Ladd_yexpzero:
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0", and increment the apparent exponent
+ because subnormals behave as if they had the minimum (nonzero)
+ exponent. Test for the case when both exponents are zero. */
+ slli a3, a3, 9
+ srli a3, a3, 9
+ bnone a2, a6, .Ladd_bothexpzero
+ addi a8, a8, 1
+ j .Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+ /* Both exponents are zero. Handle this as a special case. There
+ is no need to shift or round, and the normal code for handling
+ a carry into the exponent field will not work because it
+ assumes there is an implicit "1.0" that needs to be added. */
+ add a2, a2, a3
+1: leaf_return
+
+.Ladd_xexpzero:
+ /* Same as "yexpzero" except skip handling the case when both
+ exponents are zero. */
+ slli a2, a2, 9
+ srli a2, a2, 9
+ addi a7, a7, 1
+ j .Ladd_xexpdiff
+
+.Ladd_shiftx:
+ /* Same thing as the "shifty" code, but with x and y swapped. Also,
+ because the exponent difference is always nonzero in this version,
+ the shift sequence can use SLL and skip loading a constant zero. */
+ bnone a2, a6, .Ladd_xexpzero
+
+ or a2, a2, a6
+ slli a2, a2, 8
+ srli a2, a2, 8
+
+.Ladd_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Ladd_returny
+
+ ssr a10
+ sll a9, a2
+ srl a2, a2
+
+ add a2, a2, a3
+
+ /* Check if the add overflowed into the exponent. */
+ extui a10, a2, 23, 9
+ bne a10, a8, .Ladd_carry
+
+.Ladd_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi a2, a2, 1
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_returny:
+ mov a2, a3
+ leaf_return
+
+.Ladd_carry:
+ /* The addition has overflowed into the exponent field, so the
+ value needs to be renormalized. The mantissa of the result
+ can be recovered by subtracting the original exponent and
+ adding 0x800000 (which is the explicit "1.0" for the
+ mantissa of the non-shifted operand -- the "1.0" for the
+ shifted operand was already added). The mantissa can then
+ be shifted right by one bit. The explicit "1.0" of the
+ shifted mantissa then needs to be replaced by the exponent,
+ incremented by one to account for the normalizing shift.
+ It is faster to combine these operations: do the shift first
+ and combine the additions and subtractions. If x is the
+ original exponent, the result is:
+ shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
+ or:
+ shifted mantissa + ((x + 1) << 22)
+ Note that the exponent is incremented here by leaving the
+ explicit "1.0" of the mantissa in the exponent field. */
+
+ /* Shift x right by one bit. Save the lsb. */
+ mov a10, a2
+ srli a2, a2, 1
+
+ /* See explanation above. The original exponent is in a8. */
+ addi a8, a8, 1
+ slli a8, a8, 22
+ add a2, a2, a8
+
+ /* Return an Infinity if the exponent overflowed. */
+ ball a2, a6, .Ladd_infinity
+
+ /* Same thing as the "round" code except the msb of the leftover
+ fraction is bit 0 of a10, with the rest of the fraction in a9. */
+ bbci.l a10, 0, 1f
+ addi a2, a2, 1
+ beqz a9, .Ladd_exactlyhalf
+1: leaf_return
+
+.Ladd_infinity:
+ /* Clear the mantissa. */
+ srli a2, a2, 23
+ slli a2, a2, 23
+
+ /* The sign bit may have been lost in a carry-out. Put it back. */
+ slli a8, a8, 1
+ or a2, a2, a8
+ leaf_return
+
+.Ladd_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+
+ /* Subtraction */
+__subsf3_aux:
+
+ /* Handle NaNs and Infinities. (This code is placed before the
+ start of the function just to keep it in range of the limited
+ branch displacements.) */
+
+.Lsub_xnan_or_inf:
+ /* If y is neither Infinity nor NaN, return x. */
+ bnall a3, a6, 1f
+ /* Both x and y are either NaN or Inf, so the result is NaN. */
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Lsub_ynan_or_inf:
+ /* Negate y and return it. */
+ slli a7, a6, 8
+ xor a2, a3, a7
+ leaf_return
+
+.Lsub_opposite_signs:
+ /* Operand signs differ. Do an addition. */
+ slli a7, a6, 8
+ xor a3, a3, a7
+ j .Ladd_same_sign
+
+ .align 4
+ .global __subsf3
+ .type __subsf3, @function
+__subsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Check if the two operands have the same sign. */
+ xor a7, a2, a3
+ bltz a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:
+ /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
+ ball a2, a6, .Lsub_xnan_or_inf
+ ball a3, a6, .Lsub_ynan_or_inf
+
+ /* Compare the operands. In contrast to addition, the entire
+ value matters here. */
+ extui a7, a2, 23, 8
+ extui a8, a3, 23, 8
+ bltu a2, a3, .Lsub_xsmaller
+
+.Lsub_ysmaller:
+ /* Check if the smaller (or equal) exponent is zero. */
+ bnone a3, a6, .Lsub_yexpzero
+
+ /* Replace y sign/exponent with 0x008. */
+ or a3, a3, a6
+ slli a3, a3, 8
+ srli a3, a3, 8
+
+.Lsub_yexpdiff:
+ /* Compute the exponent difference. */
+ sub a10, a7, a8
+
+ /* Exponent difference > 32 -- just return the bigger value. */
+ bgeui a10, 32, 1f
+
+ /* Shift y right by the exponent difference. Any bits that are
+ shifted out of y are saved in a9 for rounding the result. */
+ ssr a10
+ movi a9, 0
+ src a9, a3, a9
+ srl a3, a3
+
+ sub a2, a2, a3
+
+ /* Subtract the leftover bits in a9 from zero and propagate any
+ borrow from a2. */
+ neg a9, a9
+ addi a10, a2, -1
+ movnez a2, a10, a9
+
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, a2, 23, 8
+ beq a10, a7, .Lsub_round
+ j .Lsub_borrow
+
+.Lsub_yexpzero:
+ /* Return zero if the inputs are equal. (For the non-subnormal
+ case, subtracting the "1.0" will cause a borrow from the exponent
+ and this case can be detected when handling the borrow.) */
+ beq a2, a3, .Lsub_return_zero
+
+ /* y is a subnormal value. Replace its sign/exponent with zero,
+ i.e., no implicit "1.0". Unless x is also a subnormal, increment
+ y's apparent exponent because subnormals behave as if they had
+ the minimum (nonzero) exponent. */
+ slli a3, a3, 9
+ srli a3, a3, 9
+ bnone a2, a6, .Lsub_yexpdiff
+ addi a8, a8, 1
+ j .Lsub_yexpdiff
+
+.Lsub_returny:
+ /* Negate and return y. */
+ slli a7, a6, 8
+ xor a2, a3, a7
+1: leaf_return
+
+.Lsub_xsmaller:
+ /* Same thing as the "ysmaller" code, but with x and y swapped and
+ with y negated. */
+ bnone a2, a6, .Lsub_xexpzero
+
+ or a2, a2, a6
+ slli a2, a2, 8
+ srli a2, a2, 8
+
+.Lsub_xexpdiff:
+ sub a10, a8, a7
+ bgeui a10, 32, .Lsub_returny
+
+ ssr a10
+ movi a9, 0
+ src a9, a2, a9
+ srl a2, a2
+
+ /* Negate y. */
+ slli a11, a6, 8
+ xor a3, a3, a11
+
+ sub a2, a3, a2
+
+ neg a9, a9
+ addi a10, a2, -1
+ movnez a2, a10, a9
+
+ /* Check if the subtract underflowed into the exponent. */
+ extui a10, a2, 23, 8
+ bne a10, a8, .Lsub_borrow
+
+.Lsub_round:
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a9, 1f
+ addi a2, a2, 1
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a9, a9, 1
+ beqz a9, .Lsub_exactlyhalf
+1: leaf_return
+
+.Lsub_xexpzero:
+ /* Same as "yexpzero". */
+ beq a2, a3, .Lsub_return_zero
+ slli a2, a2, 9
+ srli a2, a2, 9
+ bnone a3, a6, .Lsub_xexpdiff
+ addi a7, a7, 1
+ j .Lsub_xexpdiff
+
+.Lsub_return_zero:
+ movi a2, 0
+ leaf_return
+
+.Lsub_borrow:
+ /* The subtraction has underflowed into the exponent field, so the
+ value needs to be renormalized. Shift the mantissa left as
+ needed to remove any leading zeros and adjust the exponent
+ accordingly. If the exponent is not large enough to remove
+ all the leading zeros, the result will be a subnormal value. */
+
+ slli a8, a2, 9
+ beqz a8, .Lsub_xzero
+ do_nsau a6, a8, a7, a11
+ srli a8, a8, 9
+ bge a6, a10, .Lsub_subnormal
+ addi a6, a6, 1
+
+.Lsub_normalize_shift:
+ /* Shift the mantissa (a8/a9) left by a6. */
+ ssl a6
+ src a8, a8, a9
+ sll a9, a9
+
+ /* Combine the shifted mantissa with the sign and exponent,
+ decrementing the exponent by a6. (The exponent has already
+ been decremented by one due to the borrow from the subtraction,
+ but adding the mantissa will increment the exponent by one.) */
+ srli a2, a2, 23
+ sub a2, a2, a6
+ slli a2, a2, 23
+ add a2, a2, a8
+ j .Lsub_round
+
+.Lsub_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+.Lsub_xzero:
+ /* If there was a borrow from the exponent, and the mantissa and
+ guard digits are all zero, then the inputs were equal and the
+ result should be zero. */
+ beqz a9, .Lsub_return_zero
+
+ /* Only the guard digit is nonzero. Shift by min(24, a10). */
+ addi a11, a10, -24
+ movi a6, 24
+ movltz a6, a10, a11
+ j .Lsub_normalize_shift
+
+.Lsub_subnormal:
+ /* The exponent is too small to shift away all the leading zeros.
+ Set a6 to the current exponent (which has already been
+ decremented by the borrow) so that the exponent of the result
+ will be zero. Do not add 1 to a6 in this case, because: (1)
+ adding the mantissa will not increment the exponent, so there is
+ no need to subtract anything extra from the exponent to
+ compensate, and (2) the effective exponent of a subnormal is 1
+ not 0 so the shift amount must be 1 smaller than normal. */
+ mov a6, a10
+ j .Lsub_normalize_shift
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_mulsf3
+
+ /* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__mulsf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Lmul_xexpzero:
+ /* Clear the sign bit of x. */
+ slli a2, a2, 1
+ srli a2, a2, 1
+
+ /* If x is zero, return zero. */
+ beqz a2, .Lmul_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ do_nsau a10, a2, a11, a12
+ addi a10, a10, -8
+ ssl a10
+ sll a2, a2
+ movi a8, 1
+ sub a8, a8, a10
+ j .Lmul_xnormalized
+
+.Lmul_yexpzero:
+ /* Clear the sign bit of y. */
+ slli a3, a3, 1
+ srli a3, a3, 1
+
+ /* If y is zero, return zero. */
+ beqz a3, .Lmul_return_zero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ do_nsau a10, a3, a11, a12
+ addi a10, a10, -8
+ ssl a10
+ sll a3, a3
+ movi a9, 1
+ sub a9, a9, a10
+ j .Lmul_ynormalized
+
+.Lmul_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ j .Lmul_done
+
+.Lmul_xnan_or_inf:
+ /* If y is zero, return NaN. */
+ slli a8, a3, 1
+ bnez a8, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+ j .Lmul_done
+1:
+ /* If y is NaN, return y. */
+ bnall a3, a6, .Lmul_returnx
+ slli a8, a3, 9
+ beqz a8, .Lmul_returnx
+
+.Lmul_returny:
+ mov a2, a3
+
+.Lmul_returnx:
+ /* Set the sign bit and return. */
+ extui a7, a7, 31, 1
+ slli a2, a2, 1
+ ssai 1
+ src a2, a7, a2
+ j .Lmul_done
+
+.Lmul_ynan_or_inf:
+ /* If x is zero, return NaN. */
+ slli a8, a2, 1
+ bnez a8, .Lmul_returny
+ movi a7, 0x400000 /* make it a quiet NaN */
+ or a2, a3, a7
+ j .Lmul_done
+
+ .align 4
+ .global __mulsf3
+ .type __mulsf3, @function
+__mulsf3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 64
+#else
+ leaf_entry sp, 32
+#endif
+ movi a6, 0x7f800000
+
+ /* Get the sign of the result. */
+ xor a7, a2, a3
+
+ /* Check for NaN and infinity. */
+ ball a2, a6, .Lmul_xnan_or_inf
+ ball a3, a6, .Lmul_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, a2, 23, 8
+ extui a9, a3, 23, 8
+
+ beqz a8, .Lmul_xexpzero
+.Lmul_xnormalized:
+ beqz a9, .Lmul_yexpzero
+.Lmul_ynormalized:
+
+ /* Add the exponents. */
+ add a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0xffffff
+ or a2, a2, a6
+ and a2, a2, a10
+ or a3, a3, a6
+ and a3, a3, a10
+
+ /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
+
+#if XCHAL_HAVE_MUL32_HIGH
+
+ mull a6, a2, a3
+ muluh a2, a2, a3
+
+#else
+
+ /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
+ products. These partial products are:
+
+ 0 xl * yl
+
+ 1 xl * yh
+ 2 xh * yl
+
+ 3 xh * yh
+
+ If using the Mul16 or Mul32 multiplier options, these input
+ chunks must be stored in separate registers. For Mac16, the
+ UMUL.AA.* opcodes can specify that the inputs come from either
+ half of the registers, so there is no need to shift them out
+ ahead of time. If there is no multiply hardware, the 16-bit
+ chunks can be extracted when setting up the arguments to the
+ separate multiply function. */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Calling a separate multiply function will clobber a0 and requires
+ use of a8 as a temporary, so save those values now. (The function
+ uses a custom ABI so nothing else needs to be saved.) */
+ s32i a0, sp, 0
+ s32i a8, sp, 4
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+ /* Get the high halves of the inputs into registers. */
+ srli a2h, a2, 16
+ srli a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui a2, a2, 0, 16
+ extui a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a6 with carry-out in a9. */
+ do_mul(a6, a2, l, a3, h) /* pp 1 */
+ do_mul(a11, a2, h, a3, l) /* pp 2 */
+ movi a9, 0
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Shift the high half of a9/a6 into position in a9. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a9, a9, a6
+
+ /* Compute the low word into a6. */
+ do_mul(a11, a2, l, a3, l) /* pp 0 */
+ sll a6, a6
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Compute the high word into a2. */
+ do_mul(a2, a2, h, a3, h) /* pp 3 */
+ add a2, a2, a9
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Restore values saved on the stack during the multiplication. */
+ l32i a0, sp, 0
+ l32i a8, sp, 4
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+ /* Shift left by 9 bits, unless there was a carry-out from the
+ multiply, in which case, shift by 8 bits and increment the
+ exponent. */
+ movi a4, 9
+ srli a5, a2, 24 - 9
+ beqz a5, 1f
+ addi a4, a4, -1
+ addi a8, a8, 1
+1: ssl a4
+ src a2, a2, a6
+ sll a6, a6
+
+ /* Subtract the extra bias from the exponent sum (plus one to account
+ for the explicit "1.0" of the mantissa that will be added to the
+ exponent in the final result). */
+ movi a4, 0x80
+ sub a8, a8, a4
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..fd are OK here. */
+ movi a4, 0xfe
+ bgeu a8, a4, .Lmul_overflow
+
+.Lmul_round:
+ /* Round. */
+ bgez a6, .Lmul_rounded
+ addi a2, a2, 1
+ slli a6, a6, 1
+ beqz a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 23
+ add a2, a2, a8
+
+.Lmul_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or a2, a2, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+.Lmul_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ j .Lmul_rounded
+
+.Lmul_overflow:
+ bltz a8, .Lmul_underflow
+ /* Return +/- Infinity. */
+ movi a8, 0xff
+ slli a2, a8, 23
+ j .Lmul_addsign
+
+.Lmul_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ mov a9, a6
+ ssr a8
+ bgeui a8, 32, .Lmul_flush_to_zero
+
+ /* Shift a2 right. Any bits that are shifted out of a2 are saved
+ in a6 (combined with the shifted-out bits currently in a6) for
+ rounding the result. */
+ sll a6, a2
+ srl a2, a2
+
+ /* Set the exponent to zero. */
+ movi a8, 0
+
+ /* Pack any nonzero bits shifted out into a6. */
+ beqz a9, .Lmul_round
+ movi a9, 1
+ or a6, a6, a9
+ j .Lmul_round
+
+.Lmul_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ j .Lmul_done
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_mulsf3 */
+
+#ifdef L_divsf3
+
+ /* Division */
+__divsf3_aux:
+
+ /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+ (This code is placed before the start of the function just to
+ keep it in range of the limited branch displacements.) */
+
+.Ldiv_yexpzero:
+ /* Clear the sign bit of y. */
+ slli a3, a3, 1
+ srli a3, a3, 1
+
+ /* Check for division by zero. */
+ beqz a3, .Ldiv_yzero
+
+ /* Normalize y. Adjust the exponent in a9. */
+ do_nsau a10, a3, a4, a5
+ addi a10, a10, -8
+ ssl a10
+ sll a3, a3
+ movi a9, 1
+ sub a9, a9, a10
+ j .Ldiv_ynormalized
+
+.Ldiv_yzero:
+ /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
+ slli a4, a2, 1
+ srli a4, a4, 1
+ srli a2, a7, 31
+ slli a2, a2, 31
+ or a2, a2, a6
+ bnez a4, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Ldiv_xexpzero:
+ /* Clear the sign bit of x. */
+ slli a2, a2, 1
+ srli a2, a2, 1
+
+ /* If x is zero, return zero. */
+ beqz a2, .Ldiv_return_zero
+
+ /* Normalize x. Adjust the exponent in a8. */
+ do_nsau a10, a2, a4, a5
+ addi a10, a10, -8
+ ssl a10
+ sll a2, a2
+ movi a8, 1
+ sub a8, a8, a10
+ j .Ldiv_xnormalized
+
+.Ldiv_return_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ leaf_return
+
+.Ldiv_xnan_or_inf:
+ /* Set the sign bit of the result. */
+ srli a7, a3, 31
+ slli a7, a7, 31
+ xor a2, a2, a7
+ /* If y is NaN or Inf, return NaN. */
+ bnall a3, a6, 1f
+ movi a4, 0x400000 /* make it a quiet NaN */
+ or a2, a2, a4
+1: leaf_return
+
+.Ldiv_ynan_or_inf:
+ /* If y is Infinity, return zero. */
+ slli a8, a3, 9
+ beqz a8, .Ldiv_return_zero
+ /* y is NaN; return it. */
+ mov a2, a3
+ leaf_return
+
+ .align 4
+ .global __divsf3
+ .type __divsf3, @function
+__divsf3:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+
+ /* Get the sign of the result. */
+ xor a7, a2, a3
+
+ /* Check for NaN and infinity. */
+ ball a2, a6, .Ldiv_xnan_or_inf
+ ball a3, a6, .Ldiv_ynan_or_inf
+
+ /* Extract the exponents. */
+ extui a8, a2, 23, 8
+ extui a9, a3, 23, 8
+
+ beqz a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:
+ beqz a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:
+
+ /* Subtract the exponents. */
+ sub a8, a8, a9
+
+ /* Replace sign/exponent fields with explicit "1.0". */
+ movi a10, 0xffffff
+ or a2, a2, a6
+ and a2, a2, a10
+ or a3, a3, a6
+ and a3, a3, a10
+
+ /* The first digit of the mantissa division must be a one.
+ Shift x (and adjust the exponent) as needed to make this true. */
+ bltu a3, a2, 1f
+ slli a2, a2, 1
+ addi a8, a8, -1
+1:
+ /* Do the first subtraction and shift. */
+ sub a2, a2, a3
+ slli a2, a2, 1
+
+ /* Put the quotient into a10. */
+ movi a10, 1
+
+ /* Divide one bit at a time for 23 bits. */
+ movi a9, 23
+#if XCHAL_HAVE_LOOPS
+ loop a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+ /* Shift the quotient << 1. */
+ slli a10, a10, 1
+
+ /* Is this digit a 0 or 1? */
+ bltu a2, a3, 1f
+
+ /* Output a 1 and subtract. */
+ addi a10, a10, 1
+ sub a2, a2, a3
+
+ /* Shift the dividend << 1. */
+1: slli a2, a2, 1
+
+#if !XCHAL_HAVE_LOOPS
+ addi a9, a9, -1
+ bnez a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+ /* Add the exponent bias (less one to account for the explicit "1.0"
+ of the mantissa that will be added to the exponent in the final
+ result). */
+ addi a8, a8, 0x7e
+
+ /* Check for over/underflow. The value in a8 is one less than the
+ final exponent, so values in the range 0..fd are OK here. */
+ movi a4, 0xfe
+ bgeu a8, a4, .Ldiv_overflow
+
+.Ldiv_round:
+ /* Round. The remainder (<< 1) is in a2. */
+ bltu a2, a3, .Ldiv_rounded
+ addi a10, a10, 1
+ beq a2, a3, .Ldiv_exactlyhalf
+
+.Ldiv_rounded:
+ /* Add the exponent to the mantissa. */
+ slli a8, a8, 23
+ add a2, a10, a8
+
+.Ldiv_addsign:
+ /* Add the sign bit. */
+ srli a7, a7, 31
+ slli a7, a7, 31
+ or a2, a2, a7
+ leaf_return
+
+.Ldiv_overflow:
+ bltz a8, .Ldiv_underflow
+ /* Return +/- Infinity. */
+ addi a8, a4, 1 /* 0xff */
+ slli a2, a8, 23
+ j .Ldiv_addsign
+
+.Ldiv_exactlyhalf:
+ /* Remainder is exactly half the divisor. Round even. */
+ srli a10, a10, 1
+ slli a10, a10, 1
+ j .Ldiv_rounded
+
+.Ldiv_underflow:
+ /* Create a subnormal value, where the exponent field contains zero,
+ but the effective exponent is 1. The value of a8 is one less than
+ the actual exponent, so just negate it to get the shift amount. */
+ neg a8, a8
+ ssr a8
+ bgeui a8, 32, .Ldiv_flush_to_zero
+
+ /* Shift a10 right. Any bits that are shifted out of a10 are
+ saved in a6 for rounding the result. */
+ sll a6, a10
+ srl a10, a10
+
+ /* Set the exponent to zero. */
+ movi a8, 0
+
+ /* Pack any nonzero remainder (in a2) into a6. */
+ beqz a2, 1f
+ movi a9, 1
+ or a6, a6, a9
+
+ /* Round a10 based on the bits shifted out into a6. */
+1: bgez a6, .Ldiv_rounded
+ addi a10, a10, 1
+ slli a6, a6, 1
+ bnez a6, .Ldiv_rounded
+ srli a10, a10, 1
+ slli a10, a10, 1
+ j .Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+ /* Return zero with the appropriate sign bit. */
+ srli a2, a7, 31
+ slli a2, a2, 31
+ leaf_return
+
+#endif /* L_divsf3 */
+
+#ifdef L_cmpsf2
+
+ /* Equal and Not Equal */
+
+ .align 4
+ .global __eqsf2
+ .global __nesf2
+ .set __nesf2, __eqsf2
+ .type __eqsf2, @function
+__eqsf2:
+ leaf_entry sp, 16
+ bne a2, a3, 4f
+
+ /* The values are equal but NaN != NaN. Check the exponent. */
+ movi a6, 0x7f800000
+ ball a2, a6, 3f
+
+ /* Equal. */
+ movi a2, 0
+ leaf_return
+
+ /* Not equal. */
+2: movi a2, 1
+ leaf_return
+
+ /* Check if the mantissas are nonzero. */
+3: slli a7, a2, 9
+ j 5f
+
+ /* Check if x and y are zero with different signs. */
+4: or a7, a2, a3
+ slli a7, a7, 1
+
+ /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+ or x when exponent(x) = 0x7f8 and x == y. */
+5: movi a2, 0
+ movi a3, 1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than */
+
+ .align 4
+ .global __gtsf2
+ .type __gtsf2, @function
+__gtsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Lle_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+
+ /* Less Than or Equal */
+
+ .align 4
+ .global __lesf2
+ .type __lesf2, @function
+__lesf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Lle_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Lle_cmp
+ movi a2, 1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+.Lle_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, a2, a3
+ bltz a7, .Lle_diff_signs
+
+ /* Check if x is negative. */
+ bltz a2, .Lle_xneg
+
+ /* Check if x <= y. */
+ bltu a3, a2, 5f
+4: movi a2, 0
+ leaf_return
+
+.Lle_xneg:
+ /* Check if y <= x. */
+ bgeu a2, a3, 4b
+5: movi a2, 1
+ leaf_return
+
+.Lle_diff_signs:
+ bltz a2, 4b
+
+ /* Check if both x and y are zero. */
+ or a7, a2, a3
+ slli a7, a7, 1
+ movi a2, 1
+ movi a3, 0
+ moveqz a2, a3, a7
+ leaf_return
+
+
+ /* Greater Than or Equal */
+
+ .align 4
+ .global __gesf2
+ .type __gesf2, @function
+__gesf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Llt_cmp
+ movi a2, -1
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, -1
+ leaf_return
+
+
+ /* Less Than */
+
+ .align 4
+ .global __ltsf2
+ .type __ltsf2, @function
+__ltsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 2f
+1: bnall a3, a6, .Llt_cmp
+
+ /* Check if y is a NaN. */
+ slli a7, a3, 9
+ beqz a7, .Llt_cmp
+ movi a2, 0
+ leaf_return
+
+ /* Check if x is a NaN. */
+2: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 0
+ leaf_return
+
+.Llt_cmp:
+ /* Check if x and y have different signs. */
+ xor a7, a2, a3
+ bltz a7, .Llt_diff_signs
+
+ /* Check if x is negative. */
+ bltz a2, .Llt_xneg
+
+ /* Check if x < y. */
+ bgeu a2, a3, 5f
+4: movi a2, -1
+ leaf_return
+
+.Llt_xneg:
+ /* Check if y < x. */
+ bltu a3, a2, 4b
+5: movi a2, 0
+ leaf_return
+
+.Llt_diff_signs:
+ bgez a2, 5b
+
+ /* Check if both x and y are nonzero. */
+ or a7, a2, a3
+ slli a7, a7, 1
+ movi a2, 0
+ movi a3, -1
+ movnez a2, a3, a7
+ leaf_return
+
+
+ /* Unordered */
+
+ .align 4
+ .global __unordsf2
+ .type __unordsf2, @function
+__unordsf2:
+ leaf_entry sp, 16
+ movi a6, 0x7f800000
+ ball a2, a6, 3f
+1: ball a3, a6, 4f
+2: movi a2, 0
+ leaf_return
+
+3: slli a7, a2, 9
+ beqz a7, 1b
+ movi a2, 1
+ leaf_return
+
+4: slli a7, a3, 9
+ beqz a7, 2b
+ movi a2, 1
+ leaf_return
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_fixsfsi
+
+ .align 4
+ .global __fixsfsi
+ .type __fixsfsi, @function
+__fixsfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixsfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7e
+ bgei a4, 32, .Lfixsfsi_maxint
+ blti a4, 1, .Lfixsfsi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli a5, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixsfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixsfsi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi a2, 0
+
+.Lfixsfsi_maxint:
+ slli a4, a6, 8 /* 0x80000000 */
+ addi a5, a4, -1 /* 0x7fffffff */
+ movgez a4, a5, a2
+ mov a2, a4
+ leaf_return
+
+.Lfixsfsi_zero:
+ movi a2, 0
+ leaf_return
+
+#endif /* L_fixsfsi */
+
+#ifdef L_fixsfdi
+
+ .align 4
+ .global __fixsfdi
+ .type __fixsfdi, @function
+__fixsfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixsfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7e
+ bgei a4, 64, .Lfixsfdi_maxint
+ blti a4, 1, .Lfixsfdi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli xh, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixsfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixsfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixsfdi_smallshift:
+ movi xl, 0
+ sll xl, xh
+ srl xh, xh
+ j .Lfixsfdi_shifted
+
+.Lfixsfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixsfdi_maxint
+
+ /* Translate NaN to +maxint. */
+ movi a2, 0
+
+.Lfixsfdi_maxint:
+ slli a7, a6, 8 /* 0x80000000 */
+ bgez a2, 1f
+ mov xh, a7
+ movi xl, 0
+ leaf_return
+
+1: addi xh, a7, -1 /* 0x7fffffff */
+ movi xl, -1
+ leaf_return
+
+.Lfixsfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+#endif /* L_fixsfdi */
+
+#ifdef L_fixunssfsi
+
+ .align 4
+ .global __fixunssfsi
+ .type __fixunssfsi, @function
+__fixunssfsi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixunssfsi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7f
+ bgei a4, 32, .Lfixunssfsi_maxint
+ bltz a4, .Lfixunssfsi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli a5, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 32, .Lfixunssfsi_bigexp
+ ssl a4 /* shift by 32 - a4 */
+ srl a5, a5
+
+ /* Negate the result if sign != 0. */
+ neg a2, a5
+ movgez a2, a5, a7
+ leaf_return
+
+.Lfixunssfsi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixunssfsi_maxint
+
+ /* Translate NaN to 0xffffffff. */
+ movi a2, -1
+ leaf_return
+
+.Lfixunssfsi_maxint:
+ slli a4, a6, 8 /* 0x80000000 */
+ movi a5, -1 /* 0xffffffff */
+ movgez a4, a5, a2
+ mov a2, a4
+ leaf_return
+
+.Lfixunssfsi_zero:
+ movi a2, 0
+ leaf_return
+
+.Lfixunssfsi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a2, 1f
+ mov a2, a5 /* no shift needed */
+ leaf_return
+
+ /* Return 0x80000000 if negative. */
+1: slli a2, a6, 8
+ leaf_return
+
+#endif /* L_fixunssfsi */
+
+#ifdef L_fixunssfdi
+
+ .align 4
+ .global __fixunssfdi
+ .type __fixunssfdi, @function
+__fixunssfdi:
+ leaf_entry sp, 16
+
+ /* Check for NaN and Infinity. */
+ movi a6, 0x7f800000
+ ball a2, a6, .Lfixunssfdi_nan_or_inf
+
+ /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
+ extui a4, a2, 23, 8
+ addi a4, a4, -0x7f
+ bgei a4, 64, .Lfixunssfdi_maxint
+ bltz a4, .Lfixunssfdi_zero
+
+ /* Add explicit "1.0" and shift << 8. */
+ or a7, a2, a6
+ slli xh, a7, 8
+
+ /* Shift back to the right, based on the exponent. */
+ addi a4, a4, 1
+ beqi a4, 64, .Lfixunssfdi_bigexp
+ ssl a4 /* shift by 64 - a4 */
+ bgei a4, 32, .Lfixunssfdi_smallshift
+ srl xl, xh
+ movi xh, 0
+
+.Lfixunssfdi_shifted:
+ /* Negate the result if sign != 0. */
+ bgez a7, 1f
+ neg xl, xl
+ neg xh, xh
+ beqz xl, 1f
+ addi xh, xh, -1
+1: leaf_return
+
+.Lfixunssfdi_smallshift:
+ movi xl, 0
+ src xl, xh, xl
+ srl xh, xh
+ j .Lfixunssfdi_shifted
+
+.Lfixunssfdi_nan_or_inf:
+ /* Handle Infinity and NaN. */
+ slli a4, a2, 9
+ beqz a4, .Lfixunssfdi_maxint
+
+ /* Translate NaN to 0xffffffff.... */
+1: movi xh, -1
+ movi xl, -1
+ leaf_return
+
+.Lfixunssfdi_maxint:
+ bgez a2, 1b
+2: slli xh, a6, 8 /* 0x80000000 */
+ movi xl, 0
+ leaf_return
+
+.Lfixunssfdi_zero:
+ movi xh, 0
+ movi xl, 0
+ leaf_return
+
+.Lfixunssfdi_bigexp:
+ /* Handle unsigned maximum exponent case. */
+ bltz a7, 2b
+ movi xl, 0
+ leaf_return /* no shift needed */
+
+#endif /* L_fixunssfdi */
+
+#ifdef L_floatsisf
+
+ .align 4
+ .global __floatunsisf
+ .type __floatunsisf, @function
+__floatunsisf:
+ leaf_entry sp, 16
+ beqz a2, .Lfloatsisf_return
+
+ /* Set the sign to zero and jump to the floatsisf code. */
+ movi a7, 0
+ j .Lfloatsisf_normalize
+
+ .align 4
+ .global __floatsisf
+ .type __floatsisf, @function
+__floatsisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ beqz a2, .Lfloatsisf_return
+
+ /* Save the sign. */
+ extui a7, a2, 31, 1
+
+ /* Get the absolute value. */
+#if XCHAL_HAVE_ABS
+ abs a2, a2
+#else
+ neg a4, a2
+ movltz a2, a4, a2
+#endif
+
+.Lfloatsisf_normalize:
+ /* Normalize with the first 1 bit in the msb. */
+ do_nsau a4, a2, a5, a6
+ ssl a4
+ sll a5, a2
+
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ srli a2, a5, 8
+ slli a6, a5, (32 - 8)
+
+ /* Set the exponent. */
+ movi a5, 0x9d /* 0x7e + 31 */
+ sub a5, a5, a4
+ slli a5, a5, 23
+ add a2, a2, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or a2, a2, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, .Lfloatsisf_return
+ addi a2, a2, 1 /* Overflow to the exponent is OK. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatsisf_exactlyhalf
+
+.Lfloatsisf_return:
+ leaf_return
+
+.Lfloatsisf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+#endif /* L_floatsisf */
+
+#ifdef L_floatdisf
+
+ .align 4
+ .global __floatundisf
+ .type __floatundisf, @function
+__floatundisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Set the sign to zero and jump to the floatdisf code. */
+ movi a7, 0
+ j .Lfloatdisf_normalize
+
+ .align 4
+ .global __floatdisf
+ .type __floatdisf, @function
+__floatdisf:
+ leaf_entry sp, 16
+
+ /* Check for zero. */
+ or a4, xh, xl
+ beqz a4, 2f
+
+ /* Save the sign. */
+ extui a7, xh, 31, 1
+
+ /* Get the absolute value. */
+ bgez xh, .Lfloatdisf_normalize
+ neg xl, xl
+ neg xh, xh
+ beqz xl, .Lfloatdisf_normalize
+ addi xh, xh, -1
+
+.Lfloatdisf_normalize:
+ /* Normalize with the first 1 bit in the msb of xh. */
+ beqz xh, .Lfloatdisf_bigshift
+ do_nsau a4, xh, a5, a6
+ ssl a4
+ src xh, xh, xl
+ sll xl, xl
+
+.Lfloatdisf_shifted:
+ /* Shift the mantissa into position, with rounding bits in a6. */
+ ssai 8
+ sll a5, xl
+ src a6, xh, xl
+ srl xh, xh
+ beqz a5, 1f
+ movi a5, 1
+ or a6, a6, a5
+1:
+ /* Set the exponent. */
+ movi a5, 0xbd /* 0x7e + 63 */
+ sub a5, a5, a4
+ slli a5, a5, 23
+ add a2, xh, a5
+
+ /* Add the sign. */
+ slli a7, a7, 31
+ or a2, a2, a7
+
+ /* Round up if the leftover fraction is >= 1/2. */
+ bgez a6, 2f
+ addi a2, a2, 1 /* Overflow to the exponent is OK. */
+
+ /* Check if the leftover fraction is exactly 1/2. */
+ slli a6, a6, 1
+ beqz a6, .Lfloatdisf_exactlyhalf
+2: leaf_return
+
+.Lfloatdisf_bigshift:
+ /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
+ do_nsau a4, xl, a5, a6
+ ssl a4
+ sll xh, xl
+ movi xl, 0
+ addi a4, a4, 32
+ j .Lfloatdisf_shifted
+
+.Lfloatdisf_exactlyhalf:
+ /* Round down to the nearest even value. */
+ srli a2, a2, 1
+ slli a2, a2, 1
+ leaf_return
+
+#endif /* L_floatdisf */
diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S
new file mode 100644
index 00000000000..071b9171177
--- /dev/null
+++ b/libgcc/config/xtensa/lib1funcs.S
@@ -0,0 +1,845 @@
+/* Assembly functions for the Xtensa version of libgcc1.
+ Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
+ Free Software Foundation, Inc.
+ Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "xtensa-config.h"
+
+/* Define macros for the ABS and ADDX* instructions to handle cases
+ where they are not included in the Xtensa processor configuration. */
+
+ .macro do_abs dst, src, tmp
+#if XCHAL_HAVE_ABS
+ abs \dst, \src
+#else
+ neg \tmp, \src
+ movgez \tmp, \src, \src
+ mov \dst, \tmp
+#endif
+ .endm
+
+ .macro do_addx2 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+ addx2 \dst, \as, \at
+#else
+ slli \tmp, \as, 1
+ add \dst, \tmp, \at
+#endif
+ .endm
+
+ .macro do_addx4 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+ addx4 \dst, \as, \at
+#else
+ slli \tmp, \as, 2
+ add \dst, \tmp, \at
+#endif
+ .endm
+
+ .macro do_addx8 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+ addx8 \dst, \as, \at
+#else
+ slli \tmp, \as, 3
+ add \dst, \tmp, \at
+#endif
+ .endm
+
+/* Define macros for leaf function entry and return, supporting either the
+ standard register windowed ABI or the non-windowed call0 ABI. These
+ macros do not allocate any extra stack space, so they only work for
+ leaf functions that do not need to spill anything to the stack. */
+
+ .macro leaf_entry reg, size
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ entry \reg, \size
+#else
+ /* do nothing */
+#endif
+ .endm
+
+ .macro leaf_return
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+ retw
+#else
+ ret
+#endif
+ .endm
+
+
+#ifdef L_mulsi3
+ .align 4
+ .global __mulsi3
+ .type __mulsi3, @function
+__mulsi3:
+ leaf_entry sp, 16
+
+#if XCHAL_HAVE_MUL32
+ mull a2, a2, a3
+
+#elif XCHAL_HAVE_MUL16
+ or a4, a2, a3
+ srai a4, a4, 16
+ bnez a4, .LMUL16
+ mul16u a2, a2, a3
+ leaf_return
+.LMUL16:
+ srai a4, a2, 16
+ srai a5, a3, 16
+ mul16u a7, a4, a3
+ mul16u a6, a5, a2
+ mul16u a4, a2, a3
+ add a7, a7, a6
+ slli a7, a7, 16
+ add a2, a7, a4
+
+#elif XCHAL_HAVE_MAC16
+ mul.aa.hl a2, a3
+ mula.aa.lh a2, a3
+ rsr a5, ACCLO
+ umul.aa.ll a2, a3
+ rsr a4, ACCLO
+ slli a5, a5, 16
+ add a2, a4, a5
+
+#else /* !MUL32 && !MUL16 && !MAC16 */
+
+ /* Multiply one bit at a time, but unroll the loop 4x to better
+ exploit the addx instructions and avoid overhead.
+ Peel the first iteration to save a cycle on init. */
+
+ /* Avoid negative numbers. */
+ xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
+ do_abs a3, a3, a6
+ do_abs a2, a2, a6
+
+ /* Swap so the second argument is smaller. */
+ sub a7, a2, a3
+ mov a4, a3
+ movgez a4, a2, a7 /* a4 = max (a2, a3) */
+ movltz a3, a2, a7 /* a3 = min (a2, a3) */
+
+ movi a2, 0
+ extui a6, a3, 0, 1
+ movnez a2, a4, a6
+
+ do_addx2 a7, a4, a2, a7
+ extui a6, a3, 1, 1
+ movnez a2, a7, a6
+
+ do_addx4 a7, a4, a2, a7
+ extui a6, a3, 2, 1
+ movnez a2, a7, a6
+
+ do_addx8 a7, a4, a2, a7
+ extui a6, a3, 3, 1
+ movnez a2, a7, a6
+
+ bgeui a3, 16, .Lmult_main_loop
+ neg a3, a2
+ movltz a2, a3, a5
+ leaf_return
+
+ .align 4
+.Lmult_main_loop:
+ srli a3, a3, 4
+ slli a4, a4, 4
+
+ add a7, a4, a2
+ extui a6, a3, 0, 1
+ movnez a2, a7, a6
+
+ do_addx2 a7, a4, a2, a7
+ extui a6, a3, 1, 1
+ movnez a2, a7, a6
+
+ do_addx4 a7, a4, a2, a7
+ extui a6, a3, 2, 1
+ movnez a2, a7, a6
+
+ do_addx8 a7, a4, a2, a7
+ extui a6, a3, 3, 1
+ movnez a2, a7, a6
+
+ bgeui a3, 16, .Lmult_main_loop
+
+ neg a3, a2
+ movltz a2, a3, a5
+
+#endif /* !MUL32 && !MUL16 && !MAC16 */
+
+ leaf_return
+ .size __mulsi3, . - __mulsi3
+
+#endif /* L_mulsi3 */
+
+
+#ifdef L_umulsidi3
+
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+ .align 4
+ .global __umulsidi3
+ .type __umulsidi3, @function
+__umulsidi3:
+#if __XTENSA_CALL0_ABI__
+ leaf_entry sp, 32
+ addi sp, sp, -32
+ s32i a12, sp, 16
+ s32i a13, sp, 20
+ s32i a14, sp, 24
+ s32i a15, sp, 28
+#elif XCHAL_NO_MUL
+ /* This is not really a leaf function; allocate enough stack space
+ to allow CALL12s to a helper function. */
+ leaf_entry sp, 48
+#else
+ leaf_entry sp, 16
+#endif
+
+#ifdef __XTENSA_EB__
+#define wh a2
+#define wl a3
+#else
+#define wh a3
+#define wl a2
+#endif /* __XTENSA_EB__ */
+
+ /* This code is taken from the mulsf3 routine in ieee754-sf.S.
+ See more comments there. */
+
+#if XCHAL_HAVE_MUL32_HIGH
+ mull a6, a2, a3
+ muluh wh, a2, a3
+ mov wl, a6
+
+#else /* ! MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* a0 and a8 will be clobbered by calling the multiply function
+ but a8 is not used here and need not be saved. */
+ s32i a0, sp, 0
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+ /* Get the high halves of the inputs into registers. */
+ srli a2h, a2, 16
+ srli a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+ /* Clear the high halves of the inputs. This does not matter
+ for MUL16 because the high bits are ignored. */
+ extui a2, a2, 0, 16
+ extui a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mul16u dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ mull dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+ a period in the definition of do_mul below. These macros are a workaround
+ using underscores instead of periods when doing the concatenation. */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ umul_aa_ ## xhalf ## yhalf xreg, yreg; \
+ rsr dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+ extui dst, src, 0, 16
+#define set_arg_h(dst, src) \
+ srli dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a13, xreg); \
+ set_arg_ ## yhalf (a14, yreg); \
+ call0 .Lmul_mulsi3; \
+ mov dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+ set_arg_ ## xhalf (a14, xreg); \
+ set_arg_ ## yhalf (a15, yreg); \
+ call12 .Lmul_mulsi3; \
+ mov dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+ /* Add pp1 and pp2 into a6 with carry-out in a9. */
+ do_mul(a6, a2, l, a3, h) /* pp 1 */
+ do_mul(a11, a2, h, a3, l) /* pp 2 */
+ movi a9, 0
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Shift the high half of a9/a6 into position in a9. Note that
+ this value can be safely incremented without any carry-outs. */
+ ssai 16
+ src a9, a9, a6
+
+ /* Compute the low word into a6. */
+ do_mul(a11, a2, l, a3, l) /* pp 0 */
+ sll a6, a6
+ add a6, a6, a11
+ bgeu a6, a11, 1f
+ addi a9, a9, 1
+1:
+ /* Compute the high word into wh. */
+ do_mul(wh, a2, h, a3, h) /* pp 3 */
+ add wh, wh, a9
+ mov wl, a6
+
+#endif /* !MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+ /* Restore the original return address. */
+ l32i a0, sp, 0
+#endif
+#if __XTENSA_CALL0_ABI__
+ l32i a12, sp, 16
+ l32i a13, sp, 20
+ l32i a14, sp, 24
+ l32i a15, sp, 28
+ addi sp, sp, 32
+#endif
+ leaf_return
+
+#if XCHAL_NO_MUL
+
+ /* For Xtensa processors with no multiply hardware, this simplified
+ version of _mulsi3 is used for multiplying 16-bit chunks of
+ the floating-point mantissas. When using CALL0, this function
+ uses a custom ABI: the inputs are passed in a13 and a14, the
+ result is returned in a12, and a8 and a15 are clobbered. */
+ .align 4
+.Lmul_mulsi3:
+ leaf_entry sp, 16
+ .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+ movi \dst, 0
+1: add \tmp1, \src2, \dst
+ extui \tmp2, \src1, 0, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx2 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 1, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx4 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 2, 1
+ movnez \dst, \tmp1, \tmp2
+
+ do_addx8 \tmp1, \src2, \dst, \tmp1
+ extui \tmp2, \src1, 3, 1
+ movnez \dst, \tmp1, \tmp2
+
+ srli \src1, \src1, 4
+ slli \src2, \src2, 4
+ bnez \src1, 1b
+ .endm
+#if __XTENSA_CALL0_ABI__
+ mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+ /* The result will be written into a2, so save that argument in a4. */
+ mov a4, a2
+ mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+ leaf_return
+#endif /* XCHAL_NO_MUL */
+
+ .size __umulsidi3, . - __umulsidi3
+
+#endif /* L_umulsidi3 */
+
+
+/* Define a macro for the NSAU (unsigned normalize shift amount)
+ instruction, which computes the number of leading zero bits,
+ to handle cases where it is not included in the Xtensa processor
+ configuration. */
+
+ .macro do_nsau cnt, val, tmp, a
+#if XCHAL_HAVE_NSA
+ nsau \cnt, \val
+#else
+ mov \a, \val
+ movi \cnt, 0
+ extui \tmp, \a, 16, 16
+ bnez \tmp, 0f
+ movi \cnt, 16
+ slli \a, \a, 16
+0:
+ extui \tmp, \a, 24, 8
+ bnez \tmp, 1f
+ addi \cnt, \cnt, 8
+ slli \a, \a, 8
+1:
+ movi \tmp, __nsau_data
+ extui \a, \a, 24, 8
+ add \tmp, \tmp, \a
+ l8ui \tmp, \tmp, 0
+ add \cnt, \cnt, \tmp
+#endif /* !XCHAL_HAVE_NSA */
+ .endm
+
+#ifdef L_clz
+ .section .rodata
+ .align 4
+ .global __nsau_data
+ .type __nsau_data, @object
+__nsau_data:
+#if !XCHAL_HAVE_NSA
+ .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
+ .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+ .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+ .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif /* !XCHAL_HAVE_NSA */
+ .size __nsau_data, . - __nsau_data
+ .hidden __nsau_data
+#endif /* L_clz */
+
+
+#ifdef L_clzsi2
+ .align 4
+ .global __clzsi2
+ .type __clzsi2, @function
+__clzsi2:
+ leaf_entry sp, 16
+ do_nsau a2, a2, a3, a4
+ leaf_return
+ .size __clzsi2, . - __clzsi2
+
+#endif /* L_clzsi2 */
+
+
+#ifdef L_ctzsi2
+ .align 4
+ .global __ctzsi2
+ .type __ctzsi2, @function
+__ctzsi2:
+ leaf_entry sp, 16
+ neg a3, a2
+ and a3, a3, a2
+ do_nsau a2, a3, a4, a5
+ neg a2, a2
+ addi a2, a2, 31
+ leaf_return
+ .size __ctzsi2, . - __ctzsi2
+
+#endif /* L_ctzsi2 */
+
+
+#ifdef L_ffssi2
+ .align 4
+ .global __ffssi2
+ .type __ffssi2, @function
+__ffssi2:
+ leaf_entry sp, 16
+ neg a3, a2
+ and a3, a3, a2
+ do_nsau a2, a3, a4, a5
+ neg a2, a2
+ addi a2, a2, 32
+ leaf_return
+ .size __ffssi2, . - __ffssi2
+
+#endif /* L_ffssi2 */
+
+
+#ifdef L_udivsi3
+ .align 4
+ .global __udivsi3
+ .type __udivsi3, @function
+__udivsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ quou a2, a2, a3
+#else
+ bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
+
+ mov a6, a2 /* keep dividend in a6 */
+ do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
+ do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
+ ssl a4
+ sll a3, a3 /* divisor <<= count */
+ movi a2, 0 /* quotient = 0 */
+
+ /* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a6, a3, .Lzerobit
+ sub a6, a6, a3
+ addi a2, a2, 1
+.Lzerobit:
+ slli a2, a2, 1
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+ bltu a6, a3, .Lreturn
+ addi a2, a2, 1 /* increment quotient if dividend >= divisor */
+.Lreturn:
+ leaf_return
+
+.Lle_one:
+ beqz a3, .Lerror /* if divisor == 1, return the dividend */
+ leaf_return
+
+.Lspecial:
+ /* return dividend >= divisor */
+ bltu a6, a3, .Lreturn0
+ movi a2, 1
+ leaf_return
+
+.Lerror:
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __udivsi3, . - __udivsi3
+
+#endif /* L_udivsi3 */
+
+
+#ifdef L_divsi3
+ .align 4
+ .global __divsi3
+ .type __divsi3, @function
+__divsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ quos a2, a2, a3
+#else
+ xor a7, a2, a3 /* sign = dividend ^ divisor */
+ do_abs a6, a2, a4 /* udividend = abs (dividend) */
+ do_abs a3, a3, a4 /* udivisor = abs (divisor) */
+ bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
+ do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
+ do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
+ ssl a4
+ sll a3, a3 /* udivisor <<= count */
+ movi a2, 0 /* quotient = 0 */
+
+ /* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a6, a3, .Lzerobit
+ sub a6, a6, a3
+ addi a2, a2, 1
+.Lzerobit:
+ slli a2, a2, 1
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+ bltu a6, a3, .Lreturn
+ addi a2, a2, 1 /* increment if udividend >= udivisor */
+.Lreturn:
+ neg a5, a2
+ movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
+ leaf_return
+
+.Lle_one:
+ beqz a3, .Lerror
+ neg a2, a6 /* if udivisor == 1, then return... */
+ movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
+ leaf_return
+
+.Lspecial:
+ bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
+ movi a2, 1
+ movi a4, -1
+ movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
+ leaf_return
+
+.Lerror:
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __divsi3, . - __divsi3
+
+#endif /* L_divsi3 */
+
+
+#ifdef L_umodsi3
+ .align 4
+ .global __umodsi3
+ .type __umodsi3, @function
+__umodsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ remu a2, a2, a3
+#else
+ bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
+
+ do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
+ do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
+ ssl a4
+ sll a3, a3 /* divisor <<= count */
+
+ /* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a2, a3, .Lzerobit
+ sub a2, a2, a3
+.Lzerobit:
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+ bltu a2, a3, .Lreturn
+ sub a2, a2, a3 /* subtract once more if dividend >= divisor */
+.Lreturn:
+ leaf_return
+
+.Lle_one:
+ bnez a3, .Lreturn0
+
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __umodsi3, . - __umodsi3
+
+#endif /* L_umodsi3 */
+
+
+#ifdef L_modsi3
+ .align 4
+ .global __modsi3
+ .type __modsi3, @function
+__modsi3:
+ leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+ rems a2, a2, a3
+#else
+ mov a7, a2 /* save original (signed) dividend */
+ do_abs a2, a2, a4 /* udividend = abs (dividend) */
+ do_abs a3, a3, a4 /* udivisor = abs (divisor) */
+ bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
+ do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
+ do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
+ bgeu a5, a4, .Lspecial
+
+ sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
+ ssl a4
+ sll a3, a3 /* udivisor <<= count */
+
+ /* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+ bltu a2, a3, .Lzerobit
+ sub a2, a2, a3
+.Lzerobit:
+ srli a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+ addi a4, a4, -1
+ bnez a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+ bltu a2, a3, .Lreturn
+ sub a2, a2, a3 /* subtract again if udividend >= udivisor */
+.Lreturn:
+ bgez a7, .Lpositive
+ neg a2, a2 /* if (dividend < 0), return -udividend */
+.Lpositive:
+ leaf_return
+
+.Lle_one:
+ bnez a3, .Lreturn0
+
+ /* Divide by zero: Use an illegal instruction to force an exception.
+ The subsequent "DIV0" string can be recognized by the exception
+ handler to identify the real cause of the exception. */
+ ill
+ .ascii "DIV0"
+
+.Lreturn0:
+ movi a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+ leaf_return
+ .size __modsi3, . - __modsi3
+
+#endif /* L_modsi3 */
+
+
+#ifdef __XTENSA_EB__
+#define uh a2
+#define ul a3
+#else
+#define uh a3
+#define ul a2
+#endif /* __XTENSA_EB__ */
+
+
+#ifdef L_ashldi3
+ .align 4
+ .global __ashldi3
+ .type __ashldi3, @function
+__ashldi3:
+ leaf_entry sp, 16
+ ssl a4
+ bgei a4, 32, .Llow_only
+ src uh, uh, ul
+ sll ul, ul
+ leaf_return
+
+.Llow_only:
+ sll uh, ul
+ movi ul, 0
+ leaf_return
+ .size __ashldi3, . - __ashldi3
+
+#endif /* L_ashldi3 */
+
+
+#ifdef L_ashrdi3
+ .align 4
+ .global __ashrdi3
+ .type __ashrdi3, @function
+__ashrdi3:
+ leaf_entry sp, 16
+ ssr a4
+ bgei a4, 32, .Lhigh_only
+ src ul, uh, ul
+ sra uh, uh
+ leaf_return
+
+.Lhigh_only:
+ sra ul, uh
+ srai uh, uh, 31
+ leaf_return
+ .size __ashrdi3, . - __ashrdi3
+
+#endif /* L_ashrdi3 */
+
+
+#ifdef L_lshrdi3
+ .align 4
+ .global __lshrdi3
+ .type __lshrdi3, @function
+__lshrdi3:
+ leaf_entry sp, 16
+ ssr a4
+ bgei a4, 32, .Lhigh_only1
+ src ul, uh, ul
+ srl uh, uh
+ leaf_return
+
+.Lhigh_only1:
+ srl ul, uh
+ movi uh, 0
+ leaf_return
+ .size __lshrdi3, . - __lshrdi3
+
+#endif /* L_lshrdi3 */
+
+
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa
index 7d9e9db0487..5bcc0946243 100644
--- a/libgcc/config/xtensa/t-xtensa
+++ b/libgcc/config/xtensa/t-xtensa
@@ -1,2 +1,14 @@
+LIB1ASMSRC = xtensa/lib1funcs.S
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
+ _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
+ _ashldi3 _ashrdi3 _lshrdi3 \
+ _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
+ _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
+ _floatdisf _floatundisf \
+ _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
+ _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
+ _floatdidf _floatundidf \
+ _truncdfsf2 _extendsfdf2
+
LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \
$(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
OpenPOWER on IntegriCloud