summaryrefslogtreecommitdiffstats
path: root/tools/PowerPCtoPPE/ppe42_divwu.S
diff options
context:
space:
mode:
Diffstat (limited to 'tools/PowerPCtoPPE/ppe42_divwu.S')
-rw-r--r--tools/PowerPCtoPPE/ppe42_divwu.S184
1 files changed, 184 insertions, 0 deletions
diff --git a/tools/PowerPCtoPPE/ppe42_divwu.S b/tools/PowerPCtoPPE/ppe42_divwu.S
new file mode 100644
index 00000000..a208df65
--- /dev/null
+++ b/tools/PowerPCtoPPE/ppe42_divwu.S
@@ -0,0 +1,184 @@
+/// \file ppe42_divwu.S
+/// \brief PPC405 word division instructions implemented by PPE ISA
+///
+/// This file includes implementation for the following PPC405 instructions
+/// divwu RT, RA, RB
+///
+/// Note: PPE ISA specific "fused compare and branch" instructions are used
+///
+/// Revision History:
+/// 09-22-2014: Initial Version by daviddu
+///
+
+ .file "ppe42_divwu.S"
+ .section ".text"
+
+ /*
+ ** Code comment notation:
+ **
+ ** msw = most-significant (high-order) word, i.e. bits 0..31
+ ** lsw = least-significant (low-order) word, i.e. bits 32..63
+ ** msh = most-significant (high-order) halfword, i.e. bits 0..15
+ ** lsh = least-significant (low-order) halfword, i.e. bits 16..63
+ **
+ ** LZ = Leading Zeroes
+ ** SD = Significant Digits
+ ** OW = Register is overwritten, previous value is lost,
+ ** correct if previous value is no longer needed.
+ ** FU = Register is not overwritten, but its value is no longer needed,
+ ** in another word, the register is "free for use".
+ **
+ ** PPE GPR Registers are: R0-R10, R13, R28-R31
+ ** Volatile Registers are: R0, R3-R10
+ ** Non-volatile registers are R28-R31
+ */
+
+ /*
+ ** Caling Convention
+ **
+ ** R2 and R13 are never saved or restored. In ABI or EABI application
+ ** these registers are constant. The other touched volatile registers
+ ** will be saved and restored by the subroutines. Note the caller
+ ** wont be saving those registers because these subroutines will be
+ ** instrumented into caller's body without compiler knowledge.
+ **
+ ** Note R3 is not saved and restored because it will be changed for
+ ** return value anyways, the p2p script will make sure to restore it.
+ ** Also CR is hanlded because of compare and branch, but XER/CTR/LR
+ ** are not hanlded because they are untouched by the instructions used.
+ **
+ ** Stack layout:
+ **
+ ** 0x00 -- R1, Dedicated for Stack Pointer
+ ** 0x04 -- slot reserved for LR
+ ** 0x08 -- R4, Volatile, Private
+ ** 0x0c -- R5, Volatile, Private
+ ** 0x10 -- R6, Volatile, Private
+ ** 0x14 -- R7, Volatile, Private
+ ** 0x18 -- R8, Volatile, Private
+ ** 0x1c -- R9, Volatile, Private
+ ** 0x20 -- CR, Condition Register
+ ** 0x24 --
+ **
+ ** 0x28 -- Stack Size, Must be 8-byte aligned
+ */
+
+ /*
+ ** Division Procedures:
+ **
+ ** __ppe42_divwu(dividend, divisor)
+ ** __ppe42_divw(dividend, divisor)
+ **
+ ** R3 = Input parameter, dividend. then Return value, quotient.
+ ** R4 = Input parameter, divisor.
+ ** R5 = Output parameter, quotient.
+ ** R6 = Output parameter, remainder.
+ ** R7 = Temporary register, counter.
+ **
+ ** General Algorithm
+ **
+ ** Using standard shift and subtract method to emulate
+ ** Note: dividend,divisor,quotient,remainder are all 32-bit integers
+ **
+ ** Precondition Check:
+ **
+ ** if (divisor == dividend) {
+ ** quotient = 1;
+ ** remainder = 0;
+ ** }
+ **
+ ** if (divisor == 0) {
+ ** quotient = 0;
+ ** remainder = 0;
+ ** }
+ **
+ ** if (divisor > dividend) {
+ ** quotient = 0;
+ ** remainder = dividend;
+ ** }
+ */
+
+/*****************************************************************************/
+
+ /*
+ ** Divide Word Unsigned (__ppe42_divwu)
+ **
+ ** The implementation uses standard shift and subtract approach.
+ ** The following is an example in C. Note the implementation doesnt
+ ** exactly follow the C example.
+ **
+ ** num_of_bits = 32;
+ ** while(num_bits) {
+ ** dbit = (dividend & 0x80000000) >> 31;
+ ** remainder = (remainder << 1) | dbit;
+ ** dividend = dividend << 1;
+ ** quotient = quotient << 1;
+ ** num_of_bits--;
+ ** if(remainder < divisor)
+ ** continue;
+ ** temp = remainder - divisor;
+ ** qbit = !((temp & 0x80000000) >> 31);
+ ** quotient = quotient | qbit;
+ ** remainder = temp;
+ ** }
+ */
+
+ .align 2
+ .global __ppe42_divwu
+ .type __ppe42_divwu, @function
+
+__ppe42_divwu:
+
+ stwu %r1, -0x28(%r1) // allocate stack frame
+
+ stvd %d4, 0x08(%r1) // save off r4 & r5 in stack
+ stvd %d6, 0x10(%r1) // save off r6 & r7 in stack
+ stvd %d8, 0x18(%r1) // save off r8 & r9 in stack
+
+ mfcr %r5 // save off cr
+ stw %r5, 0x20(%r1) // store cr in stack
+
+ li %r5, 1 // quotient = 1
+ li %r6, 0 // remainder = 0
+ cmplwbc 1, 2, %r3, %r4, __ppe42_divwu_ret // ret(divisor == dividend)
+
+ li %r5, 0 // quotient = 0
+ li %r6, 0 // remainder = 0
+ cmpwibc 1, 2, %r4, 0, __ppe42_divwu_ret // ret(divisor == 0)
+
+ li %r5, 0 // quotient = 0
+ mr %r6, %r3 // remainder = dividend
+ cmplwbc 1, 0, %r3, %r4, __ppe42_divwu_ret // ret(divisor > dividend)
+
+ li %r7, 32 // num_of_bits = 32
+
+__ppe42_divwu_sas: // <<shift and subtract loop>>
+
+ slwi %r6, %r6, 1 // remainder <<= 1
+ inslwi %r6, %r3, 1, 31 // remainder[31] = dividend[0]
+ slwi %r3, %r3, 1 // dividend <<= 1
+ slwi %r5, %r5, 1 // quotient <<= 1
+ subi %r7, %r7, 1 // num_of_bits--
+ cmplwbc 1, 0, %r6, %r4, __ppe42_divwu_sas // continue(remainder<divisor)
+
+ sub %r6, %r6, %r4 // reminder -= divisor
+ addi %r5, %r5, 1 // quotient++
+ cmpwibc 0, 2, %r7, 0, __ppe42_divwu_sas // while(num_of_bits)
+
+__ppe42_divwu_ret: // <<return subroutine>>
+
+ mr %r3, %r5 // r3 is the default return
+ lwz %r5, 0x20(%r1) // load cr from stack
+ mtcr0 %r5 // restore cr
+
+ lvd %d4, 0x08(%r1) // restore r4 & r5 from stack
+ lvd %d6, 0x10(%r1) // restore r6 & r7 from stack
+ lvd %d8, 0x18(%r1) // restore r8 & r9 from stack
+
+ lwz %r1, 0(%r1) // restore stack pointer
+
+ blr // branch back
+
+ .size __ppe42_divwu, .-__ppe42_divwu
+
+
OpenPOWER on IntegriCloud