summaryrefslogtreecommitdiffstats
path: root/tools/PowerPCtoPPE/ppc-ppe-pcp.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/PowerPCtoPPE/ppc-ppe-pcp.py')
-rwxr-xr-xtools/PowerPCtoPPE/ppc-ppe-pcp.py980
1 files changed, 980 insertions, 0 deletions
diff --git a/tools/PowerPCtoPPE/ppc-ppe-pcp.py b/tools/PowerPCtoPPE/ppc-ppe-pcp.py
new file mode 100755
index 00000000..53c2757f
--- /dev/null
+++ b/tools/PowerPCtoPPE/ppc-ppe-pcp.py
@@ -0,0 +1,980 @@
+#!/usr/bin/python2.6
+
+# \file ppc-ppe-pcp.py
+# \brief PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P)
+#
+# ---------------------------------------------------------------
+# Revision History
+# ---------------------------------------------------------------
+# 10-07-2014: project completed
+# daviddu added optimization profile support
+#
+# 10-06-2014: added fused compare and branch supprot
+# daviddu added support for combining two ld/st into one double word
+# added support to insert branch upon .p2align directive
+#
+# 09-27-2014: added subroutine support for mul* and div*
+# daviddu added virtual double word replacing multiple word support
+#
+# 09-13-2014: initial version
+# daviddu only instruction inline replacement is supported
+# ---------------------------------------------------------------
+
+P2P_VERSION = "10-07-2014" # version number as last modified date
+P2P_PPC_EXT = '.s' # PPC Assembly filename extension
+P2P_PPE_EXT = '.es' # PPE Assembly filename extension
+P2P_PPE_PRE = '__ppe42_' # PPE Assembly subroutine prefix
+
+import sys
+import os
+import re
+import fnmatch
+import fileinput
+
+# ---------------------------------------------------------------
+# PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P)
+# ---------------------------------------------------------------
+#
+# Description:
+#
+# This post-compiler processor will take PPC405 assembly file(s) produced
+# by powerpc-linux-gcc or hand coded and replace some of the instructions
+# supported by PPC405 ISA but not PPE42 ISA with a set of instructions
+# supported by PPE42 ISA. Outcome of this program is PPE42 assembly file(s).
+#
+# Assumptions:
+#
+# - Input/Output File Name Extension:
+#
+# PPC405 assembly file generated by powerpc-linux-gcc has filename extension
+# defined by "P2P_PPC_EXT" global variable, while PPE42 assembly file
+# consumed by PPE Assembler has filename extension defined by "P2P_PPE_EXT"
+# global variable. Both should be consistant with Makefile rules.
+#
+# - Registers:
+#
+# Instructions in input file should only use registers supported by PPE,
+# that is R0-R10,R13,R28-R31 for GPRs and PPE only SPRs(for example, PPE
+# only has CR0 instead of CR0-7).
+#
+# GCC flag -ffixed can be used to enforce compiler to not use certain
+# registers if compiler generates input files to this script. Note certian
+# optimization level, such as -Os, of GGC will still use certain registers
+# regardless if -ffixed flag is used. Furthermore, compiler should not
+# generate multiple word instructions(lmw/stmw) that covers the registers
+# forbidden to use by -ffixed flag.
+#
+# Example of using -ffixed flag in this case:
+# -ffixed-r11 -ffixed-r12 -ffixed-r14 -ffixed-r15 \
+# -ffixed-r16 -ffixed-r17 -ffixed-r18 -ffixed-r19 \
+# -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
+# -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
+# -ffixed-cr1 -ffixed-cr2 -ffixed-cr3 -ffixed-cr4 \
+# -ffixed-cr5 -ffixed-cr6 -ffixed-cr7
+#
+# - Instructions:
+#
+# Instructions in input file should only use PowerPC 405 instructions
+# covered by "PowerPC 405-S Embedded Processor Core" manual; however,
+# there is an assumption on certain catalog of instructions will never be
+# generated by power-linux-gcc compiler(or disabled by compiler switch).
+#
+# Also, compiler should generate extended mnemonics instead of its base
+# instruction when extended mnemonics fits.
+#
+# Via -falign-labels=n and -O2 or -O3, the compiler inserts .p2align
+# directive to help instruction alignment for best cache performance.
+#
+# - Assembly Syntax:
+#
+# There should be only white spaces before instruction mnemonics, in
+# another word, all inline comments should be put behind the instrution.
+#
+# "Label:" and an instruction should not be on the same line, hand coded
+# assembly should be consistant to this same compiler output format.
+#
+# Depandences:
+#
+# In order to utilize assembly subroutines implemented for supporting
+# missing instructions of multiplication and division in PPE42 ISA, a given
+# library(with assembly files and header) must be compiled and linked with
+# any source code that use this program to generate PPE binary.
+#
+# Usage:
+#
+# ./<ThisScript> -f <a filename with path> --- process single file
+# ./<ThisScript> -d <a directory path> --- process multiple files
+# ./<ThisScript> -h --- detailed usage on other flags
+# ./<ThisScript> -v --- version of the program
+# ./<ThisScript> -d <a directory path> -s --- perform result profiling
+#
+# Functions:
+#
+# p2p_main - main function, parse options and arguments
+# p2p_onefile - processing single PPC Assembly File
+# p2p_combine - processing two PPC instructions in input file
+# p2p_replace - processing single PPC instruction in input file
+#
+# Data Structures:
+#
+# ReplaceRules = { ppc_op : [rule, ppe_op] }
+# CombineRules = { ppc_op : [rule, ppe_op] }
+# FuseBranches = [ list of branches qualified for fusing with compares ]
+#
+#------------------------------------------------------------------------------
+# ReplaceRules: [ 'r', 'u', 'a', 'h', 's', 'o', 'd', 'm' ]
+#-------|-------------------------|--------------------------------------------
+# Rule | Example (PPC to PPE) | Description
+#-------|-------------------------|--------------------------------------------
+# 'r' | [ppc] ppc_op RT, RA, RB | simply 'replace' ppc opcode with ppe opcode
+# 0 0 | [ppe] ppe_op RT, RA, RB | while the operands, if any, stay the same
+#-------|-------------------------|--------------------------------------------
+# 'ru' | [ppc] ppc_op RT, RA, RB | on top of 'r' rule, emulate 'update' by
+# | [ppe] ppe_op RT, RA, RB | appending "add" instruction after the
+# +1 +1 | add RA, RA, RB | original instruction to update RA
+#-------|-------------------------|--------------------------------------------
+# 'ra' | [ppc] ppc_op RT, D(RA) | on top of 'r' rule, emulate 'algebraic' by
+# | [ppe] ppe_op RT, D(RA) | appending "extsh" instruction after the
+# +1 +1 | extsh RT, RT | original instruction to sign-extend RT
+#-------|-------------------------|--------------------------------------------
+# 'rau' | [ppc] ppc_op RT, RA, RB | combining rule 'r', 'a', and 'u' above.
+# | [ppe] ppe_op RT, RA, RB | Note: there is no requirement on whether
+# | add RA, RA, RB | rule 'a' or rule 'u' should be applied
+# +2 +2 | extsh RT, RT | first, the outcome should be the same.
+#-------|-------------------------|--------------------------------------------
+# 'h' | [ppc] ppc_op RT, RA, RB | rule of 'halfword' multiplication
+# | [ppe] srwi RA, 16 | emulate multiply "high halfword" with
+# | srwi RB, 16 | multiply "low halfword" by shifting
+# +2 +2 | ppe_op RT, RA, RB | the operands first
+#-------|-------------------------|--------------------------------------------
+# 's' | [ppc] ppc_op RT, RA, RB | emulate word multiply and divide
+# | [ppe] stwu R1, -24(R1)| instructions with calling 'subroutines'
+# | stvd D3, 8(R1) | implemented in ppe42_mul.S and ppe42_div.S
+# | mflr R3 |
+# | stw R3, 16(R1) | Calling Conventions:(SS = Stack Size)
+# | mr R3, RA |
+# | mr R4, RB | Caller is responsible for
+# | bl target | 1) create stack frame
+# | mr RT, R3 | 2) save off R3 and R4 to stack
+# | lwz R3, 16(R1) | 3) save off link register to stack
+# | mtlr R3 | 4) put operands into R3, R4 before branch
+# | lvd D3, 8(R1) | 5) put result in R3 to RT after branch
+# | lwz R1, 0(R1) | 6) restore link register from stack
+# | | 7) restore R3 and R4 from stack
+# | [sub] stwu R1, -SS(R1) | 8) remove the stack frame
+# | <save volatile> |
+# | (subroutine body) | Callee is responsible for
+# | <restore volatile>| 1) create and remove stack frame
+# | lwz R1, 0(R1) | 2) save and restore volatile registers
+# +X +Y | blr | 3) subroutine will not touch LR again
+#-------|-------------------------|--------------------------------------------
+# 'o' | [ppc] ppc_op[o] RT ... | rule of 'o' form for overflow
+# | [ppe] ppe_op RT ... | Note: "mullwo", "divwo" and "divwuo" each
+# | <inst specific> | has unique setting for XER[OV,SO] if OE = 1
+#-------|-------------------------|--------------------------------------------
+# 'd' | [ppc] ppc_op[.] RT ... | rule of '.' or 'dot' form for recording
+# | [ppe] ppe_op RT ... | using "cmpwli" to emulate the [.] form
+# | cmpwli RT, 0 | to the instruction result and CR0 fields
+#-------|-------------------------|--------------------------------------------
+# 'm' | [ppc] ppc_op RT, D(RA) | emulate PowerPC load/store multiple word
+# | [ppe] ppe_op DT, D(RA) | instructions with PPE specific
+# | (doubleword ld/st)| 'virtual doubleword' instructions if target
+# | or | address is 8-byte aligned; otherwise, using
+# | (singleword ld/st)| single word instructions instead or mix both
+# | or | Note only RA == R1/R2/R13 will always meet
+# -1 -1 | (single & double) | alignment requirement of virtual doubleword
+#-------|-------------------------|--------------------------------------------
+#
+ReplaceRules = {#ppc_op : [ rule | ppe_op ]
+ #----------------------------
+ #synchronization instructions
+ 'eieio' : [ 'r', 'sync' ],
+ 'isync' : [ 'r', 'nop' ],
+ 'icbi' : [ 'r', 'nop' ],
+ 'icbt' : [ 'r', 'nop' ],
+ 'mtcr' : [ 'r', 'mtcr0'],
+ #load/store with [u/x/a] form
+ 'stbux' : [ 'ru', 'stbx' ],
+ 'sthux' : [ 'ru', 'sthx' ],
+ 'stwux' : [ 'ru', 'stwx' ],
+ 'lbzux' : [ 'ru', 'lbzx' ],
+ 'lhzux' : [ 'ru', 'lhzx' ],
+ 'lwzux' : [ 'ru', 'lwzx' ],
+ 'lha' : [ 'ra', 'lhz' ],
+ 'lhau' : [ 'ra', 'lhzu' ],
+ 'lhax' : [ 'ra', 'lhzx' ],
+ 'lhaux' : [ 'rau', 'lhzx' ],
+ #multiply/divide with [./o] form
+ 'mulhhw' : [ 'h', 'mullhw' ],
+ 'mulhhw.' : [ 'h', 'mullhw.' ],
+ 'mulhhwu' : [ 'h', 'mullhwu' ],
+ 'mulhhwu.': [ 'h', 'mullhwu.' ],
+ 'mulhw' : [ 's', 'mulhw' ],
+ 'mulhw.' : [ 'sd', 'mulhw' ],
+ 'mulhwu' : [ 's', 'mulhwu' ],
+ 'mulhwu.' : [ 'sd', 'mulhwu' ],
+ 'mullw' : [ 's', 'mullw' ],
+ 'mullw.' : [ 'sd', 'mullw' ],
+ 'mullwo' : [ 'so', 'mullw' ],
+ 'mullwo.' : [ 'sod', 'mullw' ],
+ 'mulli' : [ 's', 'mullw' ],
+ 'divw' : [ 's', 'divw' ],
+ 'divw.' : [ 'sd', 'divw' ],
+ 'divwo' : [ 'so', 'divw' ],
+ 'divwo.' : [ 'sod', 'divw' ],
+ 'divwu' : [ 's', 'divwu' ],
+ 'divwu.' : [ 'sd', 'divwu' ],
+ 'divwuo' : [ 'so', 'divwu' ],
+ 'divwuo.' : [ 'sod', 'divwu' ],
+ #load/store multiple word(Rx-R31)
+ 'lmw' : [ 'm', 'lvd,lwz' ],
+ 'stmw' : [ 'm', 'stvd,stw' ]}
+
+
+#------------------------------------------------------------------------------
+# CombineRules: [ 'f', 'v', 'l' ]
+#-------|-------------------------|--------------------------------------------
+# 'f' | [ppc] ppc_op(cmp*) | rule for 'fusing' adjacent pair of compare
+# | ppc_op(b*) | and branch(PPE specific). Note: only
+# -1 0 | [ppe] ppe_op(cmp*b*) | extended mnemonics of compares are handled
+#-------|-------------------------|--------------------------------------------
+# 'v' | [ppc] ppc_op(lwz/stw) | rule for combining double word aligned
+# | ppc_op(lwz/stw) | load/store pairs into signle 'virtual'
+# -1 -1 | [ppe] ppe_op(lvd/stvd) | double word instructions(PPE specific)
+#-------|-------------------------|--------------------------------------------
+# 'l' | [ppc] .p2align | compiler will insert ".p2align" directive to
+# | Label: | help instructions align from label to label.
+# | [ppe] b Label | then assembler will insert "nop" on .p2align
+# | .p2align | directive. a "branch" to skip the nops will
+# 0 -1 | Label: | improve the performance while still aligned
+#-------|-------------------------|--------------------------------------------
+#
+CombineRules = {#ppc_op : [ rule | ppe_cp ]
+ #--------------------------
+ #8byte aligned loads/stores
+ 'lwz' : [ 'v', 'lvd' ],
+ 'stw' : [ 'v', 'stvd' ],
+ #compares fusable to branch
+ 'cmplw' : [ 'f', 'cmplw' ],
+ 'cmpw' : [ 'f', 'cmpw' ],
+ 'cmpwi' : [ 'f', 'cmpwi' ],
+ #'.p2align' before 'label:'
+ '.p2align' : [ 'l', 'b' ]}
+
+
+#------------------------------------------------------------------------------
+# FuseBranches: [ Branches can be fused into cmp*b* ]
+#------------------------------------------------------------------------------
+#
+FuseBranches = ['bc', 'bcl',
+ 'blt', 'bltl', 'ble', 'blel',
+ 'bgt', 'bgtl', 'bge', 'bgel',
+ 'beq', 'beql', 'bne', 'bnel']
+
+
+# -----------------------------------------------------------------------------
+# p2p_replace:
+# process each line(filtered) in the assembly file to replace PPC instruction
+# to supported PPE instruction(s)
+#
+# Arguments:
+# string: line - assembly file line to be replaced
+# ppc_op - detected PPC opcode that needs to be replaced
+# Return:
+# boolean: True - Return without Error
+# False - Error Detected
+# Variables:
+# string: inst, rule, ppe_op, newline, temp_op
+# double_inst, single_inst, virtual_reg, base_offset, address_reg
+# Subroutine:
+# NONE
+# -----------------------------------------------------------------------------
+def p2p_replace(line, ppc_op):
+
+ # parse PPC instruction as in I or D form with opcode and upto 3 operands:
+ # possible forms: opcode
+ # opcode RT, RA, RB
+ # opcode RT, RA, IM
+ # opcode RT, D(RA)
+ # inst.group(0) : <whole instruction>
+ # inst.group(1) : " "
+ # inst.group(2) : Opcode(.)
+ # inst.group(3) : " "
+ # inst.group(4) : GPR
+ # inst.group(5) : " , "
+ # inst.group(6) : GPR or Immediate(D)
+ # inst.group(7) : " , " or " ( "
+ # inst.group(8) : GPR or Immediate(IM)
+ # inst.group(9) : " ) "
+ inst = re.search(r"([\s]*)([a-zA-Z\.]+)([\s]*)([%r0-9]*)([\s,]*)([%r0-9\-]*)([\s,\(]*)([%r0-9\-]*)([\s\)]*)", line)
+
+ # detect an error
+ if inst is None or ppc_op != inst.group(2):
+ return False
+
+ # look up rule to process the instruction
+ rule, ppe_op = ReplaceRules[ppc_op]
+
+ # if enabled, put a mark in the output file
+ if P2P_COMMENT: print "#P2P(%s):" % rule + line,
+
+ # start cases of replacing PPC instruction with PPE instruction(s)
+ #---r------------------------------------------------------------------------
+ if 'r' in rule:
+
+ # replace opcode under rule 'r' and rewrite the instruction
+ newline = line.replace(ppc_op, ppe_op)
+ print newline,
+
+ # do not continue if there is 'a' or 'u' rule to process on this line
+ if 'u' not in rule and 'a' not in rule:
+ return True
+
+ #---u------------------------------------------------------------------------
+ if 'u' in rule:
+
+ # construct and write "add RA, RA, RB" under rule 'u'
+ newline = inst.group(1) + 'add' + inst.group(3) + inst.group(6) +\
+ inst.group(5) + inst.group(6) + inst.group(7) + inst.group(8)
+ print newline
+
+ # do not continue if there is 'a' rule to process on this line
+ if 'a' not in rule:
+ return True
+
+ #---a------------------------------------------------------------------------
+ if 'a' in rule:
+
+ # construct and write "extsh RT, RT" under rule 'a'
+ newline = inst.group(1) + 'extsh' + inst.group(3) + inst.group(4) +\
+ inst.group(5) + inst.group(4)
+ print newline
+ return True
+
+ #---h------------------------------------------------------------------------
+ if 'h' in rule:
+
+ # construct and write "srwi RA, 16" under rule 'h'
+ newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(6) +\
+ inst.group(5) + "16"
+ print newline
+
+ # construct and write "srwi RB, 16" under rule 'h'
+ newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(8) +\
+ inst.group(5) + "16"
+ print newline
+
+ # replace opcode in original instruction and write under rule 'h'
+ newline = line.replace(ppc_op, ppe_op)
+ print newline
+ return True
+
+ #---s------------------------------------------------------------------------
+ if 's' in rule:
+
+ # construct branch target label
+ ppe_op = P2P_PPE_PRE + ppe_op
+
+ # construct and write "stwu R1, -24(R1)" to create the stack frame
+ newline = inst.group(1) + 'stwu' + inst.group(3) + '1' +\
+ inst.group(5) + '-24(1)'
+ print newline
+
+ # construct and write "stvd D3, 8(R1)" to save off R3 and R4
+ newline = inst.group(1) + 'stvd' + inst.group(3) + '3' +\
+ inst.group(5) + '8(1)'
+ print newline
+
+ # construct and write "mflr R3" to fetch the current link address
+ newline = inst.group(1) + 'mflr' + inst.group(3) + '3'
+ print newline
+
+ # construct and write "stw R3, 16(R1)" to save off current LR to stack
+ newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\
+ inst.group(5) + '16(1)'
+ print newline
+
+ # construct and write "mr R3, RA" to copy the operand RA to R3
+ newline = inst.group(1) + 'mr' + inst.group(3) + '3' +\
+ inst.group(5) + inst.group(6)
+ print newline
+
+ # if 'mulli' is detected, using 'li' instead of 'mr' for second operand
+ if ppc_op == 'mulli':
+ temp_op = 'li'
+ else:
+ temp_op = 'mr'
+
+ # construct and write "mr R4, RB" to copy the operand RB to R4
+ # or in 'mulli' case, "li R4, IM" to copy the operand IM to R4
+ newline = inst.group(1) + temp_op + inst.group(3) + '4' +\
+ inst.group(5) + inst.group(8)
+ print newline
+
+ # using branch and link(bl) to branch to subroutine
+ # later subroutine can branch back using branch link register(blr)
+ # Assumption: the subroutine will be responsible for saving
+ # and restoring all the volatilo registers used in the subroutine
+ newline = inst.group(1) + 'bl' + inst.group(3) + ppe_op
+ print newline
+
+ # construct and write "mr RT, R3" to copy the result in R3 to RT
+ newline = inst.group(1) + 'mr' + inst.group(3) + inst.group(4) +\
+ inst.group(5) + '3'
+ print newline
+
+ # construct and write "lwz R3, 16(R1)" to fetch the LR value from stack
+ newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\
+ inst.group(5) + '16(1)'
+ print newline
+
+ # construct and write "mtlr R3" to restore the link register
+ newline = inst.group(1) + 'mtlr' + inst.group(3) + '3'
+ print newline
+
+ # construct and write "lvd D3, 8(R1)" to restore R3 and R4
+ newline = inst.group(1) + 'lvd' + inst.group(3) + '3' +\
+ inst.group(5) + '8(1)'
+ print newline
+
+ # construct and write "lwz R1, 0(R1)" to destroy the stack frame
+ newline = inst.group(1) + 'lwz' + inst.group(3) + '1' +\
+ inst.group(5) + '0(1)'
+ print newline
+ return True
+
+ #---m------------------------------------------------------------------------
+ if 'm' in rule:
+
+ # parse instruction information
+ # note register can be in either "N" form or "%rN" form
+ double_inst,single_inst = ppe_op.split(',')
+ virtual_reg = int(re.search(r'\d+', inst.group(4)).group())
+ base_offset = int(inst.group(6))
+ address_reg = int(re.search(r'\d+', inst.group(8)).group())
+
+ # consider illegal if multiple word instruction covers non-exist registers
+ if virtual_reg < 28:
+ return False
+
+ # loop until and include GPR31
+ while virtual_reg < 32:
+ # page 270 of 405 manual, only do this for load instructions
+ if virtual_reg == address_reg != 31 and 'l' in single_inst:
+ base_offset += 4
+ virtual_reg += 1
+ continue
+
+ # if other GPRs being address_reg there is no guarantee for alignment
+ if address_reg not in [1,2,13]:
+ # construct and write "lwz/stw RT, D(RA)" for every registers
+ newline = inst.group(1) + single_inst + inst.group(3) +\
+ str(virtual_reg) + inst.group(5) + str(base_offset) +\
+ inst.group(7) + inst.group(8) + inst.group(9)
+ print newline
+ base_offset += 4
+ virtual_reg += 1
+ else:
+ # if base_offset is also aligned with base address in the address_reg
+ # & there are at least two more registers to perform doubleword ld/st
+ if not (base_offset % 8) and (virtual_reg + 1) < 32:
+ # construct and write "lvd/stvd DR, D(RA)" under rule 'v'
+ newline = inst.group(1) + double_inst + inst.group(3) +\
+ str(virtual_reg) + inst.group(5) + str(base_offset) +\
+ inst.group(7) + inst.group(8) + inst.group(9)
+ print newline
+ base_offset += 8
+ virtual_reg += 2
+ # either only one register left or base_offset isnt aligned
+ else:
+ # construct and write "lwz/stwz SR, D(RA)" under rule 'v'
+ newline = inst.group(1) + single_inst + inst.group(3) +\
+ str(virtual_reg) + inst.group(5) + str(base_offset) +\
+ inst.group(7) + inst.group(8) + inst.group(9)
+ print newline
+ base_offset += 4
+ virtual_reg += 1
+ # end of this if-else
+ # end of while loop
+ return True
+ # end of last if
+
+
+# -----------------------------------------------------------------------------
+# p2p_combine:
+# process each two lines(filtered) in the assembly file to combine two PPC
+# instructions to one PPE specific instruction for better performance
+#
+# Arguments:
+# string: first_line - 1st assembly file line to be combined
+# second_line - 2nd assembly file line to be combined
+# first_op - 1st detected PPC opcode that needs to be combined
+# second_op - 2nd detected PPC opcode that needs to be combined
+# Return:
+# boolean: done - True: return without error
+# - False: return with error detected
+# match - True: eventually matched and combined
+# - False: fail to qualify to be combined
+# Variables:
+# string: first_inst, second_inst, rule, ppe_op, newline
+# bo, px_bix, compare_operands, target
+# Subroutine:
+# NONE
+# -----------------------------------------------------------------------------
+def p2p_combine(first_line, second_line, first_op, second_op):
+
+ global P2P_SPACE; global P2P_CYCLE
+ global P2P_COMPARE_BRANCH; global P2P_VIRTUAL_DOUBLE
+
+ # parse PPC instruction as in I or B or D form with opcode and upto 3 operands
+ # possible form : [1st] opcode [CR,] RA, RB
+ # [1st] opcode [CR,] RA, IM
+ # [1st] opcode RT, D(RA)
+ # [2nd] opcode [CR,] Target
+ # [2nd] opcode BO, BI, Target
+ # [2nd] opcode RT, D(RA)
+ # inst.group(0) : <whole instruction>
+ # inst.group(1) : " "
+ # inst.group(2) : Opcode(+/-/.)
+ # inst.group(3) : " "
+ # inst.group(4) : GPR or CR or BO or Target
+ # inst.group(5) : " , "
+ # inst.group(6) : GPR or IM or D(label+offset@sda21) or BI or Target
+ # inst.group(7) : " , " or " ( "
+ # inst.group(8) : GPR or IM or Target
+ # inst.group(9) : " ) "
+ first_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", first_line)
+ second_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", second_line)
+
+ # detect an error
+ if first_inst is None or second_inst is None or \
+ first_op != first_inst.group(2) or second_op not in second_inst.group(2):
+ return False,False
+
+ # look up rule to process the instruction
+ rule, ppe_op = CombineRules[first_op]
+
+ # start cases of combining two PPC instructions into PPE instruction
+ #---f------------------------------------------------------------------------
+ if 'f' in rule:
+
+ if not P2P_COMPARE_BRANCH:
+ return True,False
+
+ # fusing compare and branch
+ ppe_op = ppe_op + second_op
+
+ # for cmpwib* case, there is a difference between
+ # cmpwi SI operand as signed 16-bit integer and then got sign extended and
+ # cmpwib* UIX operand as unsigned 5-bit integer and then got zero extended
+ # thus, will not fuse the two if the integer operand is not in range(0,31)
+ # if cr field is omitted:
+ if ',' in first_inst.group(7):
+ # cr field must be cr0 or 0, error out if it is something else:
+ if '0' not in first_inst.group(4):
+ return False, True
+ if 'i' in first_op and (int(first_inst.group(8)) < 0 or \
+ int(first_inst.group(8)) > 31):
+ return True,False
+ else:
+ compare_operands = first_inst.group(6) + first_inst.group(7) + \
+ first_inst.group(8) + ', '
+ else:
+ if 'i' in first_op and (int(first_inst.group(6)) < 0 or \
+ int(first_inst.group(6)) > 31):
+ return True,False
+ else:
+ compare_operands = first_inst.group(4) + first_inst.group(5) + \
+ first_inst.group(6) + ', '
+
+ # if 'bc' then extract BO,BI fields and convert to PX,BIX fields of 'cmp*b*'
+ # Note CTR decreament and branch always cases are not handled, and
+ # python bin() outputs "0bXXXXX" form so bo[2] is actually BO bit 0
+ # else there is no need for PX,BIX fields for extended mnemonics
+ if 'bc' in second_op:
+ bo = bin(int(second_inst.group(4)))
+
+ # do not handle CRT decreament or branch always cases
+ if bo[4] == 0 or bo[2] == 1:
+ return True,False
+
+ # PX = BO bit 1, BIX = BI = [0,1,2,3] due to only CR0 is used
+ px_bix = bo[3] + second_inst.group(5) + \
+ second_inst.group(6) + second_inst.group(7)
+ target = second_inst.group(8)
+ else:
+ px_bix = ""
+ # if cr field is omitted:
+ if ',' in second_inst.group(5):
+ # cr field must be cr0 or 0, error out if it is something else:
+ if '0' not in second_inst.group(4):
+ return False, True
+ target = second_inst.group(6)
+ else:
+ target = second_inst.group(4)
+
+ # profile: space--, cycle is the same because 1+2==3
+ P2P_SPACE -= 1
+
+ # if enabled, put a mark in the output file
+ if P2P_COMMENT:
+ print "#P2P(%s):" % rule + first_line,
+ print "#P2P(%s):" % rule + second_line,
+
+ # construct and write "cmp*b* [PX,BIX,] RA,RB/IM,target" under 'f' rule
+ newline = first_inst.group(1) + ppe_op + first_inst.group(3) +\
+ px_bix + compare_operands + target
+ print newline
+ return True,True
+
+
+ #---v------------------------------------------------------------------------
+ if 'v' in rule:
+
+ if not P2P_VIRTUAL_DOUBLE:
+ return True,False
+
+ global P2P_VDW_SDA
+
+ # Combinable Conditions:
+ # 1) base address registers must be the same and one of R1/R2/R13
+ # 2) address offsets have to be 8-bytes continuous and aligned
+ # 3) target or source registers must qualify to be double word register
+ # Note: label+offset@sda21 format is coverted to target r13 after link
+ # assume data go in and out r13 or SDA space is always 8-byte aligned
+ # here we only check the continous of address offset and register pair
+ if first_inst.group(8) == second_inst.group(8) in ['1','2','13'] or \
+ ("@sda21" in first_inst.group(6) and \
+ "@sda21" in second_inst.group(6) and \
+ P2P_VDW_SDA):
+
+ if ((first_inst.group(6).replace("@sda21","") + "+4" == \
+ second_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \
+ ((first_inst.group(6).isdigit() and \
+ not int(first_inst.group(6)) % 8) and \
+ int(first_inst.group(6)) + 4 == int(second_inst.group(6)))) and \
+ (int(first_inst.group(4)) + 1 == int(second_inst.group(4)) or \
+ (int(first_inst.group(4)) == 31 and \
+ int(second_inst.group(4)) == 0)):
+ newline = first_line.replace(first_op, ppe_op)
+ elif ((second_inst.group(6).replace("@sda21","") + "+4" == \
+ first_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \
+ ((second_inst.group(6).isdigit() and \
+ not int(second_inst.group(6)) % 8) and \
+ int(second_inst.group(6)) + 4 == int(first_inst.group(6)))) and \
+ (int(second_inst.group(4)) + 1 == int(first_inst.group(4)) or \
+ (int(second_inst.group(4)) == 31 and \
+ int(first_inst.group(4)) == 0)):
+ newline = second_line.replace(second_op, ppe_op)
+ else:
+ return True,False
+
+ # profile: space--, cycle--(same delay but 1 less from issue)
+ P2P_SPACE -= 1; P2P_CYCLE -= 1
+
+ # if enabled, put a mark in the output file
+ if P2P_COMMENT:
+ print "#P2P(%s):" % rule + first_line,
+ print "#P2P(%s):" % rule + second_line,
+
+ print newline,
+ return True,True
+ else:
+ return True,False
+
+
+# -----------------------------------------------------------------------------
+# p2p_onefile:
+# process single PPC assembly file to convert it into PPE assembly file
+# also filter out non-instruction lines before calling the subroutine
+#
+# Arguments:
+# string: ppcFileName
+# Return:
+# boolean: done - True if file processing completed without error
+# - False if file processing failed due to an error
+# Variables:
+# boolean: match, done
+# string: ppeFileName, line, ppc_op, pre_op, pre_line, section, label
+# integer: line_num, first_label_ln, second_label_ln, misalign
+# Subroutine:
+# p2p_combine
+# p2p_replace
+# -----------------------------------------------------------------------------
+def p2p_onefile(ppcFileName):
+
+ global P2P_SPACE; P2P_SPACE = 0 # profile count
+ global P2P_CYCLE; P2P_CYCLE = 0 # profile count
+
+ if P2P_VERBOSE : print "Translate PPC assembly: " + ppcFileName
+
+ # new PPE assembly file is renamed as <filename>.s
+ ppeFileName = ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT)
+ os.rename(ppcFileName, ppeFileName)
+
+ # initialize storage variables for previous line that needs to be remembered
+ pre_line = ""
+ pre_op = ""
+
+ # use inline file editing, back up original PPC assembly file as <filename>.S
+ for line in fileinput.input(ppeFileName, inplace=1, backup='.405'):
+
+ # in case of "mtmsr 0; isync"
+ line = line.replace('isync','nop')
+
+ # skip blank line
+ if not line.strip():
+ if pre_line:
+ print pre_line,
+ pre_line = ""
+ print line,
+ continue
+
+ # skip comments line
+ if re.search("^[\s]*(//|#)", line):
+ if pre_line:
+ print pre_line,
+ pre_line = ""
+ print line,
+ continue
+
+ # skip .section code except .p2align and label:
+ section = re.search("^[\s]*(\.[0-9a-zA-Z_]+)", line)
+ if section is not None and ':' not in line and \
+ section.group(1) != '.p2align':
+ if pre_line:
+ print pre_line,
+ pre_line = ""
+ print line,
+ continue
+
+ # apply specical 'l' rule in CombineRules for '.p2align' before a 'label:'
+ label = re.search("^[\s]*[\.0-9a-zA-Z_]+[:]+", line)
+ if label is not None:
+ if pre_line and pre_op == '.p2align':
+ second_label_ln = fileinput.lineno()
+ misalign = 8 - (second_label_ln - first_label_ln - 2) % 8
+ if misalign in [3,4,5,6,7]:
+ # profile: same space, but save cycles, branch penalty is 2
+ P2P_CYCLE -= misalign - 2
+ if P2P_COMMENT: print "#P2P(l):"
+ print '\tb ' + label.group(0).split(':')[0]
+ print pre_line,
+ pre_line = ""
+ first_label_ln = fileinput.lineno()
+ if pre_line:
+ print pre_line,
+ pre_line = ""
+ print line,
+ continue
+
+ # extract opcode field from line
+ ppc_op = line.split()[0]
+ done,match = False,False
+
+ # detect the 2nd possible combinable instruction
+ if pre_line and P2P_COMBINE:
+ # ignore +/- signs for branch prediction
+ if '+' in ppc_op or '-' in ppc_op:
+ ppc_op = ppc_op[:-1]
+ if 'cmp' in pre_op and ppc_op in FuseBranches or \
+ 'cmp' not in pre_op and ppc_op == pre_op:
+ done,match = p2p_combine(pre_line, line, pre_op, ppc_op)
+ if not match:
+ print pre_line,
+ else:
+ print pre_line,
+ done,match = True,False
+ pre_line = ""
+
+ # detect the 1st possible combinable instruction
+ if not pre_line and not match and P2P_COMBINE:
+ if ppc_op in CombineRules.keys():
+ pre_op = ppc_op
+ pre_line = line
+ done,match = True,True
+ else:
+ done,match = True,False
+
+ # defect possible replacable instruction
+ if not match:
+ if ppc_op in ReplaceRules.keys() and P2P_REPLACE:
+ done = p2p_replace(line, ppc_op)
+ else:
+ print line,
+ done = True
+
+ # if instruction process is not done due to error
+ if not done:
+ line_num = fileinput.lineno()
+ break
+
+ # close the output file and restore the original input file
+ fileinput.close()
+ os.rename(ppeFileName+'.405', ppcFileName)
+
+ # in case last line of the file qualified to be a pre_line and was not printed
+ if pre_line:
+ f = open(ppeFileName, 'a')
+ f.write(pre_line)
+ f.close()
+
+ # print error debug message
+ if not done:
+ print "Error: target instruction detected at line [%d]:" % line_num
+ print " " + line
+ print " but fail to recognize instruction format."
+ # terminate Makefile or execution if an error is detected
+ sys.exit(1)
+
+ if P2P_COMMENT:
+ f = open(ppeFileName, 'a')
+ f.write("#P2P: space(%d) cycle(%d)" % (P2P_SPACE,P2P_CYCLE))
+ f.close()
+
+ if P2P_VERBOSE :
+ print "Generated PPE assembly: " + ppeFileName
+ print "Optimization Profiling: " + str(P2P_SPACE*4) + " bytes, " +\
+ str(P2P_CYCLE) + " cycles."
+
+
+# -----------------------------------------------------------------------------
+# p2p_profile
+# profiling how much performance and code size are saved by optimization
+#
+# Arguments:
+# string: ppcFileName
+# Return:
+# list: [space, cycle]
+# Variables:
+# string: line, profile
+# Subroutine:
+# None
+# -----------------------------------------------------------------------------
+def p2p_profile(ppcFileName):
+ f = open(ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT), 'r')
+ for line in f:
+ pass
+ f.close()
+ profile = re.search(r"^\#P2P: space\(([0-9\-]+)\) cycle\(([0-9\-]+)\)", line)
+ if profile is not None:
+ return [int(profile.group(1)), int(profile.group(2))]
+ else:
+ return [0,0]
+
+# -----------------------------------------------------------------------------
+# p2p_main:
+# main of this script
+# print usage info
+# parse options and arguments
+# process one file or a directory of files
+# -----------------------------------------------------------------------------
+def p2p_main():
+
+ # command-line option parsing
+ from optparse import OptionParser
+ usage = "usage: %prog [options]"
+ version= "%prog v." + P2P_VERSION
+ parser = OptionParser(usage=usage, version=version)
+ parser.add_option("-d", "--directory", metavar="PATH", dest="ppcPath",
+ help="process all files in a directory given by PATH")
+ parser.add_option("-f", "--filename", metavar="FILE", dest="ppcFile",
+ help="process single file(with path in the filename)")
+ parser.add_option("-p", "--parallel",
+ action="store_true", dest="parallel", default=False,
+ help="processing all files in parallel processes")
+ parser.add_option("-s", "--statistics",
+ action="store_true", dest="profile", default=False,
+ help="optimization profiling, require comment in outputs")
+ parser.add_option("-c", "--combine-only",
+ action="store_false", dest="replace", default=True,
+ help="enable only combine function by disabling replace")
+ parser.add_option("-r", "--replace-only",
+ action="store_false", dest="combine", default=True,
+ help="enable only replace function by disabling combine")
+ parser.add_option("-b", "--compare branch disable",
+ action="store_false", dest="compare_branch", default=True,
+ help="only disabling fused compare branch function")
+ parser.add_option("-v", "--virtual double disable",
+ action="store_false", dest="virtual_double", default=True,
+ help="only disabling fused virtual double function")
+ parser.add_option("-e", "--eabi",
+ action="store_true", dest="vdw_sda", default=False,
+ help="enable virtual double word fusion targeting sda")
+ parser.add_option("-n", "--no-comment",
+ action="store_false", dest="comment", default=True,
+ help="don't leave comment mark in output file")
+ parser.add_option("-q", "--quiet",
+ action="store_false", dest="verbose", default=True,
+ help="don't print status messages to stdout")
+ (options, args) = parser.parse_args()
+ # global program output verbose switch
+ global P2P_VERBOSE; P2P_VERBOSE = options.verbose
+ # leave a comment mark in output files
+ global P2P_COMMENT; P2P_COMMENT = options.comment
+ # enable instruction replace functions
+ global P2P_REPLACE; P2P_REPLACE = options.replace
+ # enable instruction combine functions
+ global P2P_COMBINE; P2P_COMBINE = options.combine
+ # enable virtual double word fusion targeting sda
+ global P2P_VDW_SDA; P2P_VDW_SDA = options.vdw_sda
+ # enable only fused compare and branch function
+ global P2P_COMPARE_BRANCH; P2P_COMPARE_BRANCH = options.compare_branch
+ # enable only combined virtual double function
+ global P2P_VIRTUAL_DOUBLE; P2P_VIRTUAL_DOUBLE = options.virtual_double
+
+ if P2P_VERBOSE :
+ print "PPC405 Assembly to PPE42 Assembly Post-Compiler Proceesor (P2P)"
+ print "Version: " + P2P_VERSION
+
+ # single file processing
+ if options.ppcFile:
+
+ if P2P_VERBOSE :
+ print "Processing signle file: " + options.ppcFile
+
+ p2p_onefile(options.ppcFile)
+
+ # multiple files processing
+ if options.ppcPath:
+
+ if P2P_VERBOSE :
+ print "Accessing all files at: " + options.ppcPath
+ print "*Parallel Process Mode: " + ("Off", "On")[options.parallel]
+
+ if options.profile:
+ bytes = 0; cycles = 0
+
+ fileList = []
+ for root, subdirs, files in os.walk(options.ppcPath):
+ for file in fnmatch.filter(files, '*'+P2P_PPC_EXT):
+ if options.parallel :
+ fileList.append(os.path.join(root, file))
+ else:
+ if options.profile:
+ space,cycle = p2p_profile(os.path.join(root, file))
+ bytes += space*4
+ cycles += cycle
+ else:
+ p2p_onefile(os.path.join(root, file))
+
+ if options.profile:
+ print "Optimization Profiling: " + str(bytes) + " bytes, " +\
+ str(cycles) + " cycles."
+
+ # parallel processing mode
+ if options.parallel:
+ from multiprocessing import Pool
+ p = Pool()
+ p.map(p2p_onefile, fileList)
+ p.close()
+ p.join()
+
+ if P2P_VERBOSE : print "Done"
+
+
+# -----------------------------------------------------------------------------
+# python main
+if __name__ == '__main__':
+ p2p_main()
+
OpenPOWER on IntegriCloud