summaryrefslogtreecommitdiffstats
path: root/pk/ppe42/div64.S
diff options
context:
space:
mode:
Diffstat (limited to 'pk/ppe42/div64.S')
-rw-r--r--pk/ppe42/div64.S248
1 files changed, 0 insertions, 248 deletions
diff --git a/pk/ppe42/div64.S b/pk/ppe42/div64.S
deleted file mode 100644
index 7d4b1ffa..00000000
--- a/pk/ppe42/div64.S
+++ /dev/null
@@ -1,248 +0,0 @@
-//-----------------------------------------------------------------------------
-// *! (C) Copyright International Business Machines Corp. 2014
-// *! All Rights Reserved -- Property of IBM
-// *! *** IBM Confidential ***
-//-----------------------------------------------------------------------------
-
-/// \file div64.S
-/// \brief Unsigned 64/64 bit division
-///
-/// This is IBM code, originally part of OS Open. The code has been slightly
-/// modified from its original form, both to be compatible with PK and to
-/// change the function prototype slightly.
-///
-/// The code was provided by Matt Tyrlik in Raleigh.
-
-/* @#START#@
-**
-** PSCN (Power Service and Control Network)
-** Cage Controller OS Open Code
-**
-** (C) Copyright International Business Machines Corporation 2002
-** All Rights Reserved
-** Licensed Material - Program Property of I B M
-** Refer to copyright instructions: Form G120-2083
-**
-** Module:
-** div64.s
-**
-** Description:
-** Divide 64 bit unsigned values on 32 bit CPU
-** div64(uint64_t dividen, uint64_t divisor,
-** uint64_t *quotient, uint64_t *remainder)
-**
-** Original source from:
-** "The PowerPC Compiler Writer's Guide", pp62-65 by
-** Steve Hoxey, Faraydon Karim, Bill Hay, Hank Warray,
-** published by Warthman Associates, 240 Hamilton Avenue,
-** Palo Alto, CA 94301, USA, 1996 for IBM.
-** ISBN 0-9649654-0-2.
-**
-** This version checks for divisor equal to zero.
-**
-** Environment:
-** OS Open (XCOFF)
-**
-** Linkage:
-** AIX 4.3.3
-**
-** @author
-** Thomas Richter
-**
-** History:
-** Date Author Description
-** -----------------------------------------------------------------------------
-** 23-Sep-02 Richter Created
-**
-** @#END#@*/
-
- .nolist
-#include "pk.h"
- .list
-
- .global_function __ppe42_udiv64
-
- /*
- ** Code comment notation:
- ** msw = most-significant (high-order) word, i.e. bits 0..31
- ** lsw = least-significant (low-order) word, i.e. bits 32..63
- ** LZ = Leading Zeroes
- ** SD = Significant Digits
- **
- ** R3:R4 = Input parameter, dividend.
- ** R5:R6 = Input parameter, divisor.
- ** R7 = Output parameter, pointer to quotient.
- ** R8 = Output parameter, pointer to remainder.
- **
- ** Pointer arguments point to a uint64_t.
- **
- ** Division is achieved using a shift/rotate/substract algorithsm
- ** described above.
- ** The registers are used as follows:
- ** R3:R4 = dividend (upper 32bits:lower 32bits)
- ** R5:R6 = divisor (upper 32bits:lower 32bits)
- **
- ** R7:R8 = temporary 64 bit register (upper 32bits:lower 32bits)
- ** count the number of leading 0s in the dividend
- **
- ** Here is the description from the book. The dividend is placed
- ** in the low order part of a 4 (32bit) register sequence named
- ** tmp-high:tmp-low:dividend-high:dividend:low or tmp:dvd for short.
- **
- ** Each iteration includes the following steps:
- ** 1. Shift tmp:dvd by one bit to the left.
- ** 2. Subtract the divisor from tmp. This is a 64 bit operation.
- ** 3. If result is greater than or equal, place result in tmp and
- ** set the low order bit of dividend
- ** 4. If result is negative, do not modify tmp and
- ** clear the low order bit of dividend
- ** 5. If the number of iterations is less than the width of the
- ** dividend, goto step 1.
- **
- ** Now the algorithm can be improved by reducing the number of
- ** iterations to be executed.
- ** 1. Calculate the leading zeroes of the dividend.
- ** 2. Calculate the leading zeroes of the divisor.
- ** 3. Calculate the significant ones of the dividend.
- ** 4. Calculate the significant ones of the divisor.
- **
- ** Initial tmp := dvd >> (dvd.SD - dvs.SD)
- ** Initial dvd := dvd << (dvd.LZ + dvs.SD)
- ** Loops: dvd.SD - dvs.SD.
- **
- ** Warning: Special care must be taken if dvd.LZ == dvs.LZ. The code
- ** below does so by reducing the number of dvs.SD by one. This leads
- ** to the loop being executed 1 more time than really necessary,
- ** but avoids to check for the case when dvd.LZ == dvs.LZ.
- ** This case (dvd.LZ == dvs.LZ) only checks for the number of leading
- ** zeroes, but does not check if dividend is really greater than the
- ** divisor.
- ** Consider 16/17, both have an LZ value of 59. The code sets dvs.LZ
- ** 60. This resutls in dvs.SD to 4, thus one iteration after which
- ** tmp is the remainder 16.
- */
-
-__ppe42_udiv64: // PK
-
- /* push R30 & R31 onto the stack */
- stwu r1, -16(r1)
- stvd r30, 8(r1)
-
- /* Save result pointers on volatile spare registers */
- ori r31, r8, 0 /* Save remainder address */
- ori r30, r7, 0 /* Save quotient address */
-
- /* count the number of leading 0s in the dividend */
- cmpwi cr0, r3, 0 /* dvd.msw == 0? */
- cntlzw r0, r3 /* R0 = dvd.msw.LZ */
- cntlzw r9, r4 /* R9 = dvd.lsw.LZ */
- bne cr0, lab1 /* if(dvd.msw == 0) dvd.LZ = dvd.msw.LZ */
- addi r0, r9, 32 /* dvd.LZ = dvd.lsw.LZ + 32 */
-lab1:
- /* count the number of leading 0s in the divisor */
- cmpwi cr0, r5, 0 /* dvd.msw == 0? */
- cntlzw r9, r5 /* R9 = dvs.msw.LZ */
- cntlzw r10, r6 /* R10 = dvs.lsw.LZ */
- bne cr0, lab2 /* if(dvs.msw == 0) dvs.LZ = dvs.msw.LZ */
- cmpwi cr0, r6, 0 /* dvd.lsw == 0? */
- beq cr0, lab10 /* dvs.msw == 0 */
- addi r9, r10, 32 /* dvs.LZ = dvs.lsw.LZ + 32 */
-
-lab2:
- /* Determine shift amounts to minimize the number of iterations */
- cmpw cr0, r0, r9 /* Compare dvd.LZ to dvs.LZ */
- subfic r10, r0, 64 /* R10 = dvd.SD */
- bgt cr0, lab9 /* if(dvs > dvd) quotient = 0 */
- addi r9, r9, 1 /* See comment above. ++dvs.LZ (or --dvs.SD) */
- subfic r9, r9, 64 /* R9 = dvs.SD */
- add r0, r0, r9 /* (dvd.LZ + dvs.SD) = left shift of dvd for */
- /* initial dvd */
- subf r9, r9, r10 /* (dvd.SD - dvs.SD) = right shift of dvd for */
- /* initial tmp */
- mtctr r9 /* Number of iterations = dvd.SD - dvs.SD */
-
- /* R7:R8 = R3:R4 >> R9 */
- cmpwi cr0, r9, 32 /* compare R9 to 32 */
- addi r7, r9, -32
- blt cr0, lab3 /* if(R9 < 32) jump to lab3 */
- srw r8, r3, r7 /* tmp.lsw = dvd.msw >> (R9 - 32) */
- addi r7, r0, 0 /* tmp.msw = 0 */
- b lab4
-
-lab3:
- srw r8, r4, r9 /* R8 = dvd.lsw >> R9 */
- subfic r7, r9, 32
- slw r7,r3,r7 /* R7 = dvd.msw << 32 - R9 */
- or r8, r8,r7 /* tmp.lsw = R8 | R7 */
- srw r7,r3,r9 /* tmp.msw = dvd.msw >> R9 */
-lab4:
- /* R3:R4 = R3:R4 << R0 */
- cmpwi cr0, r0, 32 /* Compare R0 to 32 */
- addic r9, r0, -32
- blt cr0, lab5 /* if(R0 < 32) jump to lab5 */
- slw r3, r4, r9 /* dvd.msw = dvd.lsw << R9 */
- addi r4, r0, 0 /* dvd.lsw = 0 */
- b lab6
-
-lab5:
- slw r3, r3, r0 /* r3 = dvd.msw << r0 */
- subfic r9, r0, 32
- srw r9, r4, r9 /* r9 = dvd.lsw >> 32 - r0 */
- or r3, r3, r9 /* dvd.msw = r3 | r9 */
- slw r4, r4, r0 /* dvd.lsw = dvd.lsw << r0 */
-lab6:
- /* Restoring division shift and subtract loop */
- addi r10, r0, -1 /* r10 = -1 */
- addic r7, r7, 0 /* Clear carry bit before loop starts */
-lab7:
- /*
- ** tmp:dvd is considered one large register
- ** each portion is shifted left 1 bit by adding it to itself
- ** adde sums the carry from the previous and creates a new carry
- */
- adde r4, r4, r4 /* Shift dvd.lsw left 1 bit */
- adde r3, r3, r3 /* Shift dvd.msw to left 1 bit */
- adde r8, r8, r8 /* Shift tmp.lsw to left 1 bit */
- adde r7, r7, r7 /* Shift tmp.msw to left 1 bit */
- subfc r0, r6, r8 /* tmp.lsw - dvs.lsw */
- subfe. r9, r5, r7 /* tmp.msw - dvs.msw */
- blt cr0, lab8 /* if(result < 0) clear carry bit */
- or r8, r0, r0 /* Move lsw */
- or r7, r9, r9 /* Move msw */
- addic r0, r10, 1 /* Set carry bit */
-
-lab8:
- bdnz lab7
-
- /* Write quotient and remainder */
- adde r4, r4, r4 /* quo.lsw (lsb = CA) */
- adde r3, r3, r3 /* quo.msw (lsb from lsw) */
- stw r4, 4(r30)
- stw r3, 0(r30)
- stw r8, 4(r31) /* rem.lsw */
- stw r7, 0(r31) /* rem.msw */
- b lab11
-
-lab9:
- /* Qoutient is 0, divisor > dividend */
- addi r0, r0, 0
- stw r3, 0(r31) /* Store remainder */
- stw r4, 4(r31)
- stw r0, 0(r30) /* Set quotient to zero */
- stw r0, 4(r30)
- b lab11
-
-lab10:
- /* Divisor is 0 */
- addi r0, r0, -1
- stw r0, 0(r31) /* Set remainder to zero */
- stw r0, 4(r31)
- stw r0, 0(r30) /* Set quotient to zero */
- stw r0, 4(r30)
-
-lab11:
- //pop r30 & r31 from stack
- lvd r30, 8(r1)
- lwz r1, 0(r1)
- blr
- .epilogue __ppe42_udiv64
OpenPOWER on IntegriCloud