/* * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines * * Author: Nicolas Pitre * - contributed to gcc-3.4 on Sep 30, 2003 * - adapted for the Linux kernel on Oct 2, 2003 */ /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. * SPDX-License-Identifier: GPL-2.0+ */ #include #include /* * U-Boot compatibility bit, define empty UNWIND() macro as, since we * do not support stack unwinding and define CONFIG_AEABI to make all * of the functions available without diverging from Linux code. */ #ifdef __UBOOT__ #define UNWIND(x...) #define CONFIG_AEABI #endif .macro ARM_DIV_BODY dividend, divisor, result, curbit #if __LINUX_ARM_ARCH__ >= 5 clz \curbit, \divisor clz \result, \dividend sub \result, \curbit, \result mov \curbit, #1 mov \divisor, \divisor, lsl \result mov \curbit, \curbit, lsl \result mov \result, #0 #else @ Initially shift the divisor left 3 bits if possible, @ set curbit accordingly. This allows for curbit to be located @ at the left end of each 4 bit nibbles in the division loop @ to save one loop in most cases. tst \divisor, #0xe0000000 moveq \divisor, \divisor, lsl #3 moveq \curbit, #8 movne \curbit, #1 @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 movlo \curbit, \curbit, lsl #4 blo 1b @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 movlo \curbit, \curbit, lsl #1 blo 1b mov \result, #0 #endif @ Division loop 1: cmp \dividend, \divisor subhs \dividend, \dividend, \divisor orrhs \result, \result, \curbit cmp \dividend, \divisor, lsr #1 subhs \dividend, \dividend, \divisor, lsr #1 orrhs \result, \result, \curbit, lsr #1 cmp \dividend, \divisor, lsr #2 subhs \dividend, \dividend, \divisor, lsr #2 orrhs \result, \result, \curbit, lsr #2 cmp \dividend, \divisor, lsr #3 subhs \dividend, \dividend, \divisor, lsr #3 orrhs \result, \result, \curbit, lsr #3 cmp \dividend, #0 @ Early termination? movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? movne \divisor, \divisor, lsr #4 bne 1b .endm .macro ARM_DIV2_ORDER divisor, order #if __LINUX_ARM_ARCH__ >= 5 clz \order, \divisor rsb \order, \order, #31 #else cmp \divisor, #(1 << 16) movhs \divisor, \divisor, lsr #16 movhs \order, #16 movlo \order, #0 cmp \divisor, #(1 << 8) movhs \divisor, \divisor, lsr #8 addhs \order, \order, #8 cmp \divisor, #(1 << 4) movhs \divisor, \divisor, lsr #4 addhs \order, \order, #4 cmp \divisor, #(1 << 2) addhi \order, \order, #3 addls \order, \order, \divisor, lsr #1 #endif .endm .macro ARM_MOD_BODY dividend, divisor, order, spare #if __LINUX_ARM_ARCH__ >= 5 clz \order, \divisor clz \spare, \dividend sub \order, \order, \spare mov \divisor, \divisor, lsl \order #else mov \order, #0 @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 addlo \order, \order, #4 blo 1b @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 addlo \order, \order, #1 blo 1b #endif @ Perform all needed subtractions to keep only the reminder. @ Do comparisons in batch of 4 first. subs \order, \order, #3 @ yes, 3 is intended here blt 2f 1: cmp \dividend, \divisor subhs \dividend, \dividend, \divisor cmp \dividend, \divisor, lsr #1 subhs \dividend, \dividend, \divisor, lsr #1 cmp \dividend, \divisor, lsr #2 subhs \dividend, \dividend, \divisor, lsr #2 cmp \dividend, \divisor, lsr #3 subhs \dividend, \dividend, \divisor, lsr #3 cmp \dividend, #1 mov \divisor, \divisor, lsr #4 subsge \order, \order, #4 bge 1b tst \order, #3 teqne \dividend, #0 beq 5f @ Either 1, 2 or 3 comparison/subtractions are left. 2: cmn \order, #2 blt 4f beq 3f cmp \dividend, \divisor subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 3: cmp \dividend, \divisor subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 4: cmp \dividend, \divisor subhs \dividend, \dividend, \divisor 5: .endm .pushsection .text.__udivsi3, "ax" ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) UNWIND(.fnstart) subs r2, r1, #1 reteq lr bcc Ldiv0 cmp r0, r1 bls 11f tst r1, r2 beq 12f ARM_DIV_BODY r0, r1, r2, r3 mov r0, r2 ret lr 11: moveq r0, #1 movne r0, #0 ret lr 12: ARM_DIV2_ORDER r1, r2 mov r0, r0, lsr r2 ret lr UNWIND(.fnend) ENDPROC(__udivsi3) ENDPROC(__aeabi_uidiv) .popsection .pushsection .text.__umodsi3, "ax" ENTRY(__umodsi3) UNWIND(.fnstart) subs r2, r1, #1 @ compare divisor with 1 bcc Ldiv0 cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 tsthi r1, r2 @ see if divisor is power of 2 andeq r0, r0, r2 retls lr ARM_MOD_BODY r0, r1, r2, r3 ret lr UNWIND(.fnend) ENDPROC(__umodsi3) .popsection .pushsection .text.__divsi3, "ax" ENTRY(__divsi3) ENTRY(__aeabi_idiv) UNWIND(.fnstart) cmp r1, #0 eor ip, r0, r1 @ save the sign of the result. beq Ldiv0 rsbmi r1, r1, #0 @ loops below use unsigned. subs r2, r1, #1 @ division by 1 or -1 ? beq 10f movs r3, r0 rsbmi r3, r0, #0 @ positive dividend value cmp r3, r1 bls 11f tst r1, r2 @ divisor is power of 2 ? beq 12f ARM_DIV_BODY r3, r1, r0, r2 cmp ip, #0 rsbmi r0, r0, #0 ret lr 10: teq ip, r0 @ same sign ? rsbmi r0, r0, #0 ret lr 11: movlo r0, #0 moveq r0, ip, asr #31 orreq r0, r0, #1 ret lr 12: ARM_DIV2_ORDER r1, r2 cmp ip, #0 mov r0, r3, lsr r2 rsbmi r0, r0, #0 ret lr UNWIND(.fnend) ENDPROC(__divsi3) ENDPROC(__aeabi_idiv) .popsection .pushsection .text.__modsi3, "ax" ENTRY(__modsi3) UNWIND(.fnstart) cmp r1, #0 beq Ldiv0 rsbmi r1, r1, #0 @ loops below use unsigned. movs ip, r0 @ preserve sign of dividend rsbmi r0, r0, #0 @ if negative make positive subs r2, r1, #1 @ compare divisor with 1 cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 tsthi r1, r2 @ see if divisor is power of 2 andeq r0, r0, r2 bls 10f ARM_MOD_BODY r0, r1, r2, r3 10: cmp ip, #0 rsbmi r0, r0, #0 ret lr UNWIND(.fnend) ENDPROC(__modsi3) .popsection #ifdef CONFIG_AEABI .pushsection .text.__aeabi_uidivmod, "ax" ENTRY(__aeabi_uidivmod) UNWIND(.fnstart) UNWIND(.save {r0, r1, ip, lr} ) stmfd sp!, {r0, r1, ip, lr} bl __aeabi_uidiv ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 ret lr UNWIND(.fnend) ENDPROC(__aeabi_uidivmod) .popsection .pushsection .text.__aeabi_uidivmod, "ax" ENTRY(__aeabi_idivmod) UNWIND(.fnstart) UNWIND(.save {r0, r1, ip, lr} ) stmfd sp!, {r0, r1, ip, lr} bl __aeabi_idiv ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 ret lr UNWIND(.fnend) ENDPROC(__aeabi_idivmod) .popsection #endif .pushsection .text.Ldiv0, "ax" Ldiv0: UNWIND(.fnstart) UNWIND(.pad #4) UNWIND(.save {lr}) str lr, [sp, #-8]! bl __div0 mov r0, #0 @ About as wrong as it could be. ldr pc, [sp], #8 UNWIND(.fnend) ENDPROC(Ldiv0) .popsection /* Thumb-1 specialities */ #if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2) .pushsection .text.__gnu_thumb1_case_sqi, "ax" ENTRY(__gnu_thumb1_case_sqi) push {r1} mov r1, lr lsrs r1, r1, #1 lsls r1, r1, #1 ldrsb r1, [r1, r0] lsls r1, r1, #1 add lr, lr, r1 pop {r1} bx lr ENDPROC(__gnu_thumb1_case_sqi) .popsection .pushsection .text.__gnu_thumb1_case_uqi, "ax" ENTRY(__gnu_thumb1_case_uqi) push {r1} mov r1, lr lsrs r1, r1, #1 lsls r1, r1, #1 ldrb r1, [r1, r0] lsls r1, r1, #1 add lr, lr, r1 pop {r1} bx lr ENDPROC(__gnu_thumb1_case_uqi) .popsection .pushsection .text.__gnu_thumb1_case_shi, "ax" ENTRY(__gnu_thumb1_case_shi) push {r0, r1} mov r1, lr lsrs r1, r1, #1 lsls r0, r0, #1 lsls r1, r1, #1 ldrsh r1, [r1, r0] lsls r1, r1, #1 add lr, lr, r1 pop {r0, r1} bx lr ENDPROC(__gnu_thumb1_case_shi) .popsection .pushsection .text.__gnu_thumb1_case_uhi, "ax" ENTRY(__gnu_thumb1_case_uhi) push {r0, r1} mov r1, lr lsrs r1, r1, #1 lsls r0, r0, #1 lsls r1, r1, #1 ldrh r1, [r1, r0] lsls r1, r1, #1 add lr, lr, r1 pop {r0, r1} bx lr ENDPROC(__gnu_thumb1_case_uhi) .popsection #endif