diff options
Diffstat (limited to 'tools/PowerPCtoPPE/ppe42_mulhw.S')
-rw-r--r-- | tools/PowerPCtoPPE/ppe42_mulhw.S | 193 |
1 files changed, 0 insertions, 193 deletions
diff --git a/tools/PowerPCtoPPE/ppe42_mulhw.S b/tools/PowerPCtoPPE/ppe42_mulhw.S deleted file mode 100644 index d229121b..00000000 --- a/tools/PowerPCtoPPE/ppe42_mulhw.S +++ /dev/null @@ -1,193 +0,0 @@ -/// \file ppe42_mulhw.S -/// \brief PPC405 word multiplication instructions implemented by PPE ISA -/// -/// This file includes implementation for the following PPC405 instructions -/// mulhw -/// -/// Note: PPE ISA specific "fused compare and branch" instructions are used -/// -/// Revision History: -/// 09-15-2014: Initial Version by daviddu -/// - - .file "ppe42_mulhw.S" - .section ".text" - - /* - ** Code comment notation: - ** - ** msw = most-significant (high-order) word, i.e. bits 0..31 - ** lsw = least-significant (low-order) word, i.e. bits 32..63 - ** msh = most-significant (high-order) halfword, i.e. bits 0..15 - ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 - ** LZ = Leading Zeroes - ** SD = Significant Digits - ** OW = Register is overwritten, previous value is lost, - ** correct if previous value is no longer needed. - ** FU = Register is not overwritten, but its value is no longer needed, - ** in another word, the register is "free for use". - ** - ** PPE GPR Registers are: R0-R10, R13, R28-R31 - ** Volatile Registers are: R0, R3-R10 - ** Non-volatile registers are R28-R31 - */ - - /* - ** Caling Convention - ** - ** R2 and R13 are never saved or restored. In ABI or EABI application - ** these registers are constant. The other touched volatile registers - ** will be saved and restored by the subroutines. Note the caller - ** wont be saving those registers because these subroutines will be - ** instrumented into caller's body without compiler knowledge. - ** - ** Note R3 is not saved and restored because it will be changed for - ** return value anyways, the p2p script will make sure to restore it. - ** Also CR is hanlded because of compare and branch, but XER/CTR/LR - ** are not hanlded because they are untouched by the instructions used. - ** - ** Stack layout: - ** - ** 0x00 -- R1, Dedicated for Stack Pointer - ** 0x04 -- slot reserved for LR - ** 0x08 -- R4, Volatile, Private - ** 0x0c -- R5, Volatile, Private - ** 0x10 -- R6, Volatile, Private - ** 0x14 -- R7, Volatile, Private - ** 0x18 -- R8, Volatile, Private - ** 0x1c -- R9, Volatile, Private - ** 0x20 -- CR, Condition Register - ** 0x24 -- - ** - ** 0x28 -- Stack Size, Must be 8-byte aligned - */ - - /* - ** Multiplication Procedures: - ** - ** __ppe42_mulhwu(U,V) - ** __ppe42_mulhw(U,V) - ** __ppe42_mullw(U,V) - ** - ** R3:R4 = Input parameter, multipliers: U, V. - ** R3 = Output parameter, either product.msh or product.lsh. - ** R5-R9 = Temporary registers - ** - ** General Algorithm - ** - ** Using PPC405 ISA instruction 'mullhw' to emulate - ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh) - ** - ** U.msh U.lsh - ** X V.msh V.lsh - ** ------------------------ - ** A.msh A.lsh - ** B.msh B.lsh - ** C.msh C.lsh - ** D.msh D.lsh - ** ------------------------ - ** Product.msw Product.lsw - ** - ** __ppe42_mulhwu: Return Product.msh (unsigned) - ** __ppe42_mulhw: Return Product.msh (signed) - ** __ppe42_mullw: Return Product.lsh - ** - ** Precondition Check: - ** - ** if( U == 0 || V == 0 ) return P=0; - */ - -/*****************************************************************************/ - - /* - ** Multiply High Word Signed (__ppe42_mulhw) - ** - ** Using Multiply High Word Unsigned(mulhwu) to emulate - ** - ** u = absolute(U); - ** v = absolute(V); - ** p = __ppe42_mulhwu(u, v); - ** if( U[0] xor V[0] ) - ** p = -p - */ - - .align 2 - .global __ppe42_mulhw - .type __ppe42_mulhw, @function - -__ppe42_mulhw: - - stwu %r1, -0x28(%r1) // allocate stack frame - - stvd %d4, 0x08(%r1) // save off r4 & r5 in stack - stvd %d6, 0x10(%r1) // save off r6 & r7 in stack - stvd %d8, 0x18(%r1) // save off r8 & r9 in stack - - mfcr %r5 // save off cr - stw %r5, 0x20(%r1) // store cr in stack - - li %r5, 0 // r5 = 0 - cmpwibc 1, 2, %r3, 0, __ppe42_mulhw_ret // U=0 -> ret - cmpwibc 1, 2, %r4, 0, __ppe42_mulhw_ret // V=0 -> ret - - cmpwibc 1, 1, %r3, 0, __ppe42_mulhw_csc // U>0 -> csc - neg %r3, %r3 // absolute(U) - li %r5, 1 // U<0 -> r5 = 1 - -__ppe42_mulhw_csc: // <<continue sign check>> - - li %r6, 0 // V>0 -> r6 = 0 - cmpwibc 1, 1, %r4, 0, __ppe42_mulhw_uns // V>0 -> uns - neg %r4, %r4 // absolute(V) - li %r6, 1 // V<0 -> r6 = 1 - -__ppe42_mulhw_uns: // <<unsigned multiplication>> - - xor %r9, %r5, %r6 // remember sign difference - - extrwi %r5, %r3, 16, 16 - srwi %r3, %r3, 16 - extrwi %r6, %r4, 16, 16 - srwi %r4, %r4, 16 - - mullhwu %r7, %r5, %r6 - srwi %r7, %r7, 16 - - mullhwu %r6, %r3, %r6 - extrwi %r8, %r6, 16, 16 - srwi %r6, %r6, 16 - add %r7, %r8, %r7 - - mullhwu %r5, %r5, %r4 - extrwi %r8, %r5, 16, 16 - srwi %r5, %r5, 16 - add %r7, %r8, %r7 - - srwi %r7, %r7, 16 - add %r7, %r7, %r6 - add %r7, %r7, %r5 - - mullhwu %r3, %r3, %r4 - add %r5, %r3, %r7 - - cmpwibc 1, 2, %r9, 0, __ppe42_mulhw_ret // if same sign, r5 stays + - neg %r5, %r5 // otherwise, neg(r5) - -__ppe42_mulhw_ret: - - mr %r3, %r5 // put return value to r3 - - lwz %r5, 0x20(%r1) // load cr from stack - mtcr0 %r5 // restore cr - - lvd %d4, 0x08(%r1) // restore r4 & r5 from stack - lvd %d6, 0x10(%r1) // restore r6 & r7 from stack - lvd %d8, 0x18(%r1) // restore r8 & r9 from stack - - lwz %r1, 0(%r1) // restore stack pointer - - blr - - .size __ppe42_mulhw, .-__ppe42_mulhw - - |