diff options
Diffstat (limited to 'src/ssx/ppc32/div64.S')
-rwxr-xr-x | src/ssx/ppc32/div64.S | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/src/ssx/ppc32/div64.S b/src/ssx/ppc32/div64.S new file mode 100755 index 0000000..04ee008 --- /dev/null +++ b/src/ssx/ppc32/div64.S @@ -0,0 +1,255 @@ +// $Id: div64.S,v 1.1.1.1 2013/12/11 21:03:25 bcbrock Exp $ +// $Source: /afs/awd/projects/eclipz/KnowledgeBase/.cvsroot/eclipz/chips/p8/working/procedures/ssx/ppc32/div64.S,v $ +//----------------------------------------------------------------------------- +// *! (C) Copyright International Business Machines Corp. 2013 +// *! All Rights Reserved -- Property of IBM +// *! *** IBM Confidential *** +//----------------------------------------------------------------------------- + +/// \file div64.S +/// \brief Unsigned 64/64 bit division +/// +/// This is IBM code, originally part of OS Open. The code has been slightly +/// modified from its original form, both to be compatible with SSX and to +/// change the function prototype slightly. +/// +/// The code was provided by Matt Tyrlik in Raleigh. + +/* @#START#@ +** +** PSCN (Power Service and Control Network) +** Cage Controller OS Open Code +** +** (C) Copyright International Business Machines Corporation 2002 +** All Rights Reserved +** Licensed Material - Program Property of I B M +** Refer to copyright instructions: Form G120-2083 +** +** Module: +** div64.s +** +** Description: +** Divide 64 bit unsigned values on 32 bit CPU +** div64(uint64_t dividen, uint64_t divisor, +** uint64_t *quotient, uint64_t *remainder) +** +** Original source from: +** "The PowerPC Compiler Writer's Guide", pp62-65 by +** Steve Hoxey, Faraydon Karim, Bill Hay, Hank Warray, +** published by Warthman Associates, 240 Hamilton Avenue, +** Palo Alto, CA 94301, USA, 1996 for IBM. +** ISBN 0-9649654-0-2. +** +** This version checks for divisor equal to zero. +** +** Environment: +** OS Open (XCOFF) +** +** Linkage: +** AIX 4.3.3 +** +** @author +** Thomas Richter +** +** History: +** Date Author Description +** ----------------------------------------------------------------------------- +** 23-Sep-02 Richter Created +** +** @#END#@*/ + + .nolist +#include "ssx.h" + .list + + .set r0, 0 + .set r1, 1 + .set r2, 2 + .set r3, 3 + .set r4, 4 + .set r5, 5 + .set r6, 6 + .set r7, 7 + .set r8, 8 + .set r9, 9 + .set r10, 10 + .set r11, 11 + .set r12, 12 + + .global_function __ppc32_udiv64 + + /* + ** Code comment notation: + ** msw = most-significant (high-order) word, i.e. bits 0..31 + ** lsw = least-significant (low-order) word, i.e. bits 32..63 + ** LZ = Leading Zeroes + ** SD = Significant Digits + ** + ** R3:R4 = Input parameter, dividend. + ** R5:R6 = Input parameter, divisor. + ** R7 = Output parameter, pointer to quotient. + ** R8 = Output parameter, pointer to remainder. + ** + ** Pointer arguments point to a uint64_t. + ** + ** Division is achieved using a shift/rotate/substract algorithsm + ** described above. + ** The registers are used as follows: + ** R3:R4 = dividend (upper 32bits:lower 32bits) + ** R5:R6 = divisor (upper 32bits:lower 32bits) + ** + ** R7:R8 = temporary 64 bit register (upper 32bits:lower 32bits) + ** count the number of leading 0s in the dividend + ** + ** Here is the description from the book. The dividend is placed + ** in the low order part of a 4 (32bit) register sequence named + ** tmp-high:tmp-low:dividend-high:dividend:low or tmp:dvd for short. + ** + ** Each iteration includes the following steps: + ** 1. Shift tmp:dvd by one bit to the left. + ** 2. Subtract the divisor from tmp. This is a 64 bit operation. + ** 3. If result is greater than or equal, place result in tmp and + ** set the low order bit of dividend + ** 4. If result is negative, do not modify tmp and + ** clear the low order bit of dividend + ** 5. If the number of iterations is less than the width of the + ** dividend, goto step 1. + ** + ** Now the algorithm can be improved by reducing the number of + ** iterations to be executed. + ** 1. Calculate the leading zeroes of the dividend. + ** 2. Calculate the leading zeroes of the divisor. + ** 3. Calculate the significant ones of the dividend. + ** 4. Calculate the significant ones of the divisor. + ** + ** Initial tmp := dvd >> (dvd.SD - dvs.SD) + ** Initial dvd := dvd << (dvd.LZ + dvs.SD) + ** Loops: dvd.SD - dvs.SD. + ** + ** Warning: Special care must be taken if dvd.LZ == dvs.LZ. The code + ** below does so by reducing the number of dvs.SD by one. This leads + ** to the loop being executed 1 more time than really necessary, + ** but avoids to check for the case when dvd.LZ == dvs.LZ. + ** This case (dvd.LZ == dvs.LZ) only checks for the number of leading + ** zeroes, but does not check if dividend is really greater than the + ** divisor. + ** Consider 16/17, both have an LZ value of 59. The code sets dvs.LZ + ** 60. This resutls in dvs.SD to 4, thus one iteration after which + ** tmp is the remainder 16. + */ + +__ppc32_udiv64: // SSX + /* Save result pointers on volatile spare registers */ + ori r12, r8, 0 /* Save remainder address */ + ori r11, r7, 0 /* Save quotient address */ + + /* count the number of leading 0s in the dividend */ + cmpwi cr0, r3, 0 /* dvd.msw == 0? */ + cntlzw r0, r3 /* R0 = dvd.msw.LZ */ + cntlzw r9, r4 /* R9 = dvd.lsw.LZ */ + bne cr0, lab1 /* if(dvd.msw == 0) dvd.LZ = dvd.msw.LZ */ + addi r0, r9, 32 /* dvd.LZ = dvd.lsw.LZ + 32 */ +lab1: + /* count the number of leading 0s in the divisor */ + cmpwi cr0, r5, 0 /* dvd.msw == 0? */ + cmpwi cr1, r6, 0 /* dvd.lsw == 0? */ + cntlzw r9, r5 /* R9 = dvs.msw.LZ */ + cntlzw r10, r6 /* R10 = dvs.lsw.LZ */ + bne cr0, lab2 /* if(dvs.msw == 0) dvs.LZ = dvs.msw.LZ */ + beq cr1, lab10 /* dvs.msw == 0 */ + addi r9, r10, 32 /* dvs.LZ = dvs.lsw.LZ + 32 */ + +lab2: + /* Determine shift amounts to minimize the number of iterations */ + cmpw cr0, r0, r9 /* Compare dvd.LZ to dvs.LZ */ + subfic r10, r0, 64 /* R10 = dvd.SD */ + bgt cr0, lab9 /* if(dvs > dvd) quotient = 0 */ + addi r9, r9, 1 /* See comment above. ++dvs.LZ (or --dvs.SD) */ + subfic r9, r9, 64 /* R9 = dvs.SD */ + add r0, r0, r9 /* (dvd.LZ + dvs.SD) = left shift of dvd for */ + /* initial dvd */ + subf r9, r9, r10 /* (dvd.SD - dvs.SD) = right shift of dvd for */ + /* initial tmp */ + mtctr r9 /* Number of iterations = dvd.SD - dvs.SD */ + + /* R7:R8 = R3:R4 >> R9 */ + cmpwi cr0, r9, 32 /* compare R9 to 32 */ + addi r7, r9, -32 + blt cr0, lab3 /* if(R9 < 32) jump to lab3 */ + srw r8, r3, r7 /* tmp.lsw = dvd.msw >> (R9 - 32) */ + addi r7, r0, 0 /* tmp.msw = 0 */ + b lab4 + +lab3: + srw r8, r4, r9 /* R8 = dvd.lsw >> R9 */ + subfic r7, r9, 32 + slw r7,r3,r7 /* R7 = dvd.msw << 32 - R9 */ + or r8, r8,r7 /* tmp.lsw = R8 | R7 */ + srw r7,r3,r9 /* tmp.msw = dvd.msw >> R9 */ +lab4: + /* R3:R4 = R3:R4 << R0 */ + cmpwi cr0, r0, 32 /* Compare R0 to 32 */ + addic r9, r0, -32 + blt cr0, lab5 /* if(R0 < 32) jump to lab5 */ + slw r3, r4, r9 /* dvd.msw = dvd.lsw << R9 */ + addi r4, r0, 0 /* dvd.lsw = 0 */ + b lab6 + +lab5: + slw r3, r3, r0 /* r3 = dvd.msw << r0 */ + subfic r9, r0, 32 + srw r9, r4, r9 /* r9 = dvd.lsw >> 32 - r0 */ + or r3, r3, r9 /* dvd.msw = r3 | r9 */ + slw r4, r4, r0 /* dvd.lsw = dvd.lsw << r0 */ +lab6: + /* Restoring division shift and subtract loop */ + addi r10, r0, -1 /* r10 = -1 */ + addic r7, r7, 0 /* Clear carry bit before loop starts */ +lab7: + /* + ** tmp:dvd is considered one large register + ** each portion is shifted left 1 bit by adding it to itself + ** adde sums the carry from the previous and creates a new carry + */ + adde r4, r4, r4 /* Shift dvd.lsw left 1 bit */ + adde r3, r3, r3 /* Shift dvd.msw to left 1 bit */ + adde r8, r8, r8 /* Shift tmp.lsw to left 1 bit */ + adde r7, r7, r7 /* Shift tmp.msw to left 1 bit */ + subfc r0, r6, r8 /* tmp.lsw - dvs.lsw */ + subfe. r9, r5, r7 /* tmp.msw - dvs.msw */ + blt cr0, lab8 /* if(result < 0) clear carry bit */ + or r8, r0, r0 /* Move lsw */ + or r7, r9, r9 /* Move msw */ + addic r0, r10, 1 /* Set carry bit */ + +lab8: + bdnz lab7 + + /* Write quotient and remainder */ + adde r4, r4, r4 /* quo.lsw (lsb = CA) */ + adde r3, r3, r3 /* quo.msw (lsb from lsw) */ + stw r4, 4(r11) + stw r3, 0(r11) + stw r8, 4(r12) /* rem.lsw */ + stw r7, 0(r12) /* rem.msw */ + blr + +lab9: + /* Qoutient is 0, divisor > dividend */ + addi r0, r0, 0 + stw r3, 0(r12) /* Store remainder */ + stw r4, 4(r12) + stw r0, 0(r11) /* Set quotient to zero */ + stw r0, 4(r11) + blr + +lab10: + /* Divisor is 0 */ + addi r0, r0, -1 + stw r0, 0(r12) /* Set remainder to zero */ + stw r0, 4(r12) + stw r0, 0(r11) /* Set quotient to zero */ + stw r0, 4(r11) + blr + + .epilogue __ppc32_udiv64 |