summaryrefslogtreecommitdiffstats
path: root/tools/PowerPCtoPPE/ppe42_mulhw.S
diff options
context:
space:
mode:
Diffstat (limited to 'tools/PowerPCtoPPE/ppe42_mulhw.S')
-rw-r--r--tools/PowerPCtoPPE/ppe42_mulhw.S193
1 files changed, 0 insertions, 193 deletions
diff --git a/tools/PowerPCtoPPE/ppe42_mulhw.S b/tools/PowerPCtoPPE/ppe42_mulhw.S
deleted file mode 100644
index d229121b..00000000
--- a/tools/PowerPCtoPPE/ppe42_mulhw.S
+++ /dev/null
@@ -1,193 +0,0 @@
-/// \file ppe42_mulhw.S
-/// \brief PPC405 word multiplication instructions implemented by PPE ISA
-///
-/// This file includes implementation for the following PPC405 instructions
-/// mulhw
-///
-/// Note: PPE ISA specific "fused compare and branch" instructions are used
-///
-/// Revision History:
-/// 09-15-2014: Initial Version by daviddu
-///
-
- .file "ppe42_mulhw.S"
- .section ".text"
-
- /*
- ** Code comment notation:
- **
- ** msw = most-significant (high-order) word, i.e. bits 0..31
- ** lsw = least-significant (low-order) word, i.e. bits 32..63
- ** msh = most-significant (high-order) halfword, i.e. bits 0..15
- ** lsh = least-significant (low-order) halfword, i.e. bits 16..63
- ** LZ = Leading Zeroes
- ** SD = Significant Digits
- ** OW = Register is overwritten, previous value is lost,
- ** correct if previous value is no longer needed.
- ** FU = Register is not overwritten, but its value is no longer needed,
- ** in another word, the register is "free for use".
- **
- ** PPE GPR Registers are: R0-R10, R13, R28-R31
- ** Volatile Registers are: R0, R3-R10
- ** Non-volatile registers are R28-R31
- */
-
- /*
- ** Caling Convention
- **
- ** R2 and R13 are never saved or restored. In ABI or EABI application
- ** these registers are constant. The other touched volatile registers
- ** will be saved and restored by the subroutines. Note the caller
- ** wont be saving those registers because these subroutines will be
- ** instrumented into caller's body without compiler knowledge.
- **
- ** Note R3 is not saved and restored because it will be changed for
- ** return value anyways, the p2p script will make sure to restore it.
- ** Also CR is hanlded because of compare and branch, but XER/CTR/LR
- ** are not hanlded because they are untouched by the instructions used.
- **
- ** Stack layout:
- **
- ** 0x00 -- R1, Dedicated for Stack Pointer
- ** 0x04 -- slot reserved for LR
- ** 0x08 -- R4, Volatile, Private
- ** 0x0c -- R5, Volatile, Private
- ** 0x10 -- R6, Volatile, Private
- ** 0x14 -- R7, Volatile, Private
- ** 0x18 -- R8, Volatile, Private
- ** 0x1c -- R9, Volatile, Private
- ** 0x20 -- CR, Condition Register
- ** 0x24 --
- **
- ** 0x28 -- Stack Size, Must be 8-byte aligned
- */
-
- /*
- ** Multiplication Procedures:
- **
- ** __ppe42_mulhwu(U,V)
- ** __ppe42_mulhw(U,V)
- ** __ppe42_mullw(U,V)
- **
- ** R3:R4 = Input parameter, multipliers: U, V.
- ** R3 = Output parameter, either product.msh or product.lsh.
- ** R5-R9 = Temporary registers
- **
- ** General Algorithm
- **
- ** Using PPC405 ISA instruction 'mullhw' to emulate
- ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh)
- **
- ** U.msh U.lsh
- ** X V.msh V.lsh
- ** ------------------------
- ** A.msh A.lsh
- ** B.msh B.lsh
- ** C.msh C.lsh
- ** D.msh D.lsh
- ** ------------------------
- ** Product.msw Product.lsw
- **
- ** __ppe42_mulhwu: Return Product.msh (unsigned)
- ** __ppe42_mulhw: Return Product.msh (signed)
- ** __ppe42_mullw: Return Product.lsh
- **
- ** Precondition Check:
- **
- ** if( U == 0 || V == 0 ) return P=0;
- */
-
-/*****************************************************************************/
-
- /*
- ** Multiply High Word Signed (__ppe42_mulhw)
- **
- ** Using Multiply High Word Unsigned(mulhwu) to emulate
- **
- ** u = absolute(U);
- ** v = absolute(V);
- ** p = __ppe42_mulhwu(u, v);
- ** if( U[0] xor V[0] )
- ** p = -p
- */
-
- .align 2
- .global __ppe42_mulhw
- .type __ppe42_mulhw, @function
-
-__ppe42_mulhw:
-
- stwu %r1, -0x28(%r1) // allocate stack frame
-
- stvd %d4, 0x08(%r1) // save off r4 & r5 in stack
- stvd %d6, 0x10(%r1) // save off r6 & r7 in stack
- stvd %d8, 0x18(%r1) // save off r8 & r9 in stack
-
- mfcr %r5 // save off cr
- stw %r5, 0x20(%r1) // store cr in stack
-
- li %r5, 0 // r5 = 0
- cmpwibc 1, 2, %r3, 0, __ppe42_mulhw_ret // U=0 -> ret
- cmpwibc 1, 2, %r4, 0, __ppe42_mulhw_ret // V=0 -> ret
-
- cmpwibc 1, 1, %r3, 0, __ppe42_mulhw_csc // U>0 -> csc
- neg %r3, %r3 // absolute(U)
- li %r5, 1 // U<0 -> r5 = 1
-
-__ppe42_mulhw_csc: // <<continue sign check>>
-
- li %r6, 0 // V>0 -> r6 = 0
- cmpwibc 1, 1, %r4, 0, __ppe42_mulhw_uns // V>0 -> uns
- neg %r4, %r4 // absolute(V)
- li %r6, 1 // V<0 -> r6 = 1
-
-__ppe42_mulhw_uns: // <<unsigned multiplication>>
-
- xor %r9, %r5, %r6 // remember sign difference
-
- extrwi %r5, %r3, 16, 16
- srwi %r3, %r3, 16
- extrwi %r6, %r4, 16, 16
- srwi %r4, %r4, 16
-
- mullhwu %r7, %r5, %r6
- srwi %r7, %r7, 16
-
- mullhwu %r6, %r3, %r6
- extrwi %r8, %r6, 16, 16
- srwi %r6, %r6, 16
- add %r7, %r8, %r7
-
- mullhwu %r5, %r5, %r4
- extrwi %r8, %r5, 16, 16
- srwi %r5, %r5, 16
- add %r7, %r8, %r7
-
- srwi %r7, %r7, 16
- add %r7, %r7, %r6
- add %r7, %r7, %r5
-
- mullhwu %r3, %r3, %r4
- add %r5, %r3, %r7
-
- cmpwibc 1, 2, %r9, 0, __ppe42_mulhw_ret // if same sign, r5 stays +
- neg %r5, %r5 // otherwise, neg(r5)
-
-__ppe42_mulhw_ret:
-
- mr %r3, %r5 // put return value to r3
-
- lwz %r5, 0x20(%r1) // load cr from stack
- mtcr0 %r5 // restore cr
-
- lvd %d4, 0x08(%r1) // restore r4 & r5 from stack
- lvd %d6, 0x10(%r1) // restore r6 & r7 from stack
- lvd %d8, 0x18(%r1) // restore r8 & r9 from stack
-
- lwz %r1, 0(%r1) // restore stack pointer
-
- blr
-
- .size __ppe42_mulhw, .-__ppe42_mulhw
-
-
OpenPOWER on IntegriCloud