diff options
Diffstat (limited to 'tools/PowerPCtoPPE/ppc-ppe-pcp.py')
-rwxr-xr-x | tools/PowerPCtoPPE/ppc-ppe-pcp.py | 998 |
1 files changed, 0 insertions, 998 deletions
diff --git a/tools/PowerPCtoPPE/ppc-ppe-pcp.py b/tools/PowerPCtoPPE/ppc-ppe-pcp.py deleted file mode 100755 index 7dd427d6..00000000 --- a/tools/PowerPCtoPPE/ppc-ppe-pcp.py +++ /dev/null @@ -1,998 +0,0 @@ -#!/usr/bin/python2.6 - -# \file ppc-ppe-pcp.py -# \brief PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) -# -# --------------------------------------------------------------- -# Revision History -# --------------------------------------------------------------- -# 10-07-2014: project completed -# daviddu added optimization profile support -# -# 10-06-2014: added fused compare and branch supprot -# daviddu added support for combining two ld/st into one double word -# added support to insert branch upon .p2align directive -# -# 09-27-2014: added subroutine support for mul* and div* -# daviddu added virtual double word replacing multiple word support -# -# 09-13-2014: initial version -# daviddu only instruction inline replacement is supported -# --------------------------------------------------------------- - -P2P_VERSION = "10-07-2014" # version number as last modified date -P2P_PPC_EXT = '.s' # PPC Assembly filename extension -P2P_PPE_EXT = '.es' # PPE Assembly filename extension -P2P_PPE_PRE = '__ppe42_' # PPE Assembly subroutine prefix - -import sys -import os -import re -import fnmatch -import fileinput - -# --------------------------------------------------------------- -# PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) -# --------------------------------------------------------------- -# -# Description: -# -# This post-compiler processor will take PPC405 assembly file(s) produced -# by powerpc-linux-gcc or hand coded and replace some of the instructions -# supported by PPC405 ISA but not PPE42 ISA with a set of instructions -# supported by PPE42 ISA. Outcome of this program is PPE42 assembly file(s). -# -# Assumptions: -# -# - Input/Output File Name Extension: -# -# PPC405 assembly file generated by powerpc-linux-gcc has filename extension -# defined by "P2P_PPC_EXT" global variable, while PPE42 assembly file -# consumed by PPE Assembler has filename extension defined by "P2P_PPE_EXT" -# global variable. Both should be consistant with Makefile rules. -# -# - Registers: -# -# Instructions in input file should only use registers supported by PPE, -# that is R0-R10,R13,R28-R31 for GPRs and PPE only SPRs(for example, PPE -# only has CR0 instead of CR0-7). -# -# GCC flag -ffixed can be used to enforce compiler to not use certain -# registers if compiler generates input files to this script. Note certian -# optimization level, such as -Os, of GGC will still use certain registers -# regardless if -ffixed flag is used. Furthermore, compiler should not -# generate multiple word instructions(lmw/stmw) that covers the registers -# forbidden to use by -ffixed flag. -# -# Example of using -ffixed flag in this case: -# -ffixed-r11 -ffixed-r12 -ffixed-r14 -ffixed-r15 \ -# -ffixed-r16 -ffixed-r17 -ffixed-r18 -ffixed-r19 \ -# -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \ -# -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \ -# -ffixed-cr1 -ffixed-cr2 -ffixed-cr3 -ffixed-cr4 \ -# -ffixed-cr5 -ffixed-cr6 -ffixed-cr7 -# -# - Instructions: -# -# Instructions in input file should only use PowerPC 405 instructions -# covered by "PowerPC 405-S Embedded Processor Core" manual; however, -# there is an assumption on certain catalog of instructions will never be -# generated by power-linux-gcc compiler(or disabled by compiler switch). -# -# Also, compiler should generate extended mnemonics instead of its base -# instruction when extended mnemonics fits. -# -# Via -falign-labels=n and -O2 or -O3, the compiler inserts .p2align -# directive to help instruction alignment for best cache performance. -# -# - Assembly Syntax: -# -# There should be only white spaces before instruction mnemonics, in -# another word, all inline comments should be put behind the instrution. -# -# "Label:" and an instruction should not be on the same line, hand coded -# assembly should be consistant to this same compiler output format. -# -# Depandences: -# -# In order to utilize assembly subroutines implemented for supporting -# missing instructions of multiplication and division in PPE42 ISA, a given -# library(with assembly files and header) must be compiled and linked with -# any source code that use this program to generate PPE binary. -# -# Usage: -# -# ./<ThisScript> -f <a filename with path> --- process single file -# ./<ThisScript> -d <a directory path> --- process multiple files -# ./<ThisScript> -h --- detailed usage on other flags -# ./<ThisScript> -v --- version of the program -# ./<ThisScript> -d <a directory path> -s --- perform result profiling -# -# Functions: -# -# p2p_main - main function, parse options and arguments -# p2p_onefile - processing single PPC Assembly File -# p2p_combine - processing two PPC instructions in input file -# p2p_replace - processing single PPC instruction in input file -# -# Data Structures: -# -# ReplaceRules = { ppc_op : [rule, ppe_op] } -# CombineRules = { ppc_op : [rule, ppe_op] } -# FuseBranches = [ list of branches qualified for fusing with compares ] -# -#------------------------------------------------------------------------------ -# ReplaceRules: [ 'r', 'u', 'a', 'h', 's', 'o', 'd', 'm' ] -#-------|-------------------------|-------------------------------------------- -# Rule | Example (PPC to PPE) | Description -#-------|-------------------------|-------------------------------------------- -# 'r' | [ppc] ppc_op RT, RA, RB | simply 'replace' ppc opcode with ppe opcode -# 0 0 | [ppe] ppe_op RT, RA, RB | while the operands, if any, stay the same -#-------|-------------------------|-------------------------------------------- -# 'ru' | [ppc] ppc_op RT, RA, RB | on top of 'r' rule, emulate 'update' by -# | [ppe] ppe_op RT, RA, RB | appending "add" instruction after the -# +1 +1 | add RA, RA, RB | original instruction to update RA -#-------|-------------------------|-------------------------------------------- -# 'ra' | [ppc] ppc_op RT, D(RA) | on top of 'r' rule, emulate 'algebraic' by -# | [ppe] ppe_op RT, D(RA) | appending "extsh" instruction after the -# +1 +1 | extsh RT, RT | original instruction to sign-extend RT -#-------|-------------------------|-------------------------------------------- -# 'rau' | [ppc] ppc_op RT, RA, RB | combining rule 'r', 'a', and 'u' above. -# | [ppe] ppe_op RT, RA, RB | Note: there is no requirement on whether -# | add RA, RA, RB | rule 'a' or rule 'u' should be applied -# +2 +2 | extsh RT, RT | first, the outcome should be the same. -#-------|-------------------------|-------------------------------------------- -# 'h' | [ppc] ppc_op RT, RA, RB | rule of 'halfword' multiplication -# | [ppe] srwi RA, 16 | emulate multiply "high halfword" with -# | srwi RB, 16 | multiply "low halfword" by shifting -# +2 +2 | ppe_op RT, RA, RB | the operands first -#-------|-------------------------|-------------------------------------------- -# 's' | [ppc] ppc_op RT, RA, RB | emulate word multiply and divide -# | [ppe] stwu R1, -24(R1)| instructions with calling 'subroutines' -# | stvd D3, 8(R1) | implemented in ppe42_mul.S and ppe42_div.S -# | mflr R3 | -# | stw R3, 16(R1) | Calling Conventions:(SS = Stack Size) -# | mr R3, RA | -# | mr R4, RB | Caller is responsible for -# | bl target | 1) create stack frame -# | mr RT, R3 | 2) save off R3 and R4 to stack -# | lwz R3, 16(R1) | 3) save off link register to stack -# | mtlr R3 | 4) put operands into R3, R4 before branch -# | lvd D3, 8(R1) | 5) put result in R3 to RT after branch -# | lwz R1, 0(R1) | 6) restore link register from stack -# | | 7) restore R3 and R4 from stack -# | [sub] stwu R1, -SS(R1) | 8) remove the stack frame -# | <save volatile> | -# | (subroutine body) | Callee is responsible for -# | <restore volatile>| 1) create and remove stack frame -# | lwz R1, 0(R1) | 2) save and restore volatile registers -# +X +Y | blr | 3) subroutine will not touch LR again -#-------|-------------------------|-------------------------------------------- -# 'o' | [ppc] ppc_op[o] RT ... | rule of 'o' form for overflow -# | [ppe] ppe_op RT ... | Note: "mullwo", "divwo" and "divwuo" each -# | <inst specific> | has unique setting for XER[OV,SO] if OE = 1 -#-------|-------------------------|-------------------------------------------- -# 'd' | [ppc] ppc_op[.] RT ... | rule of '.' or 'dot' form for recording -# | [ppe] ppe_op RT ... | using "cmpwli" to emulate the [.] form -# | cmpwli RT, 0 | to the instruction result and CR0 fields -#-------|-------------------------|-------------------------------------------- -# 'm' | [ppc] ppc_op RT, D(RA) | emulate PowerPC load/store multiple word -# | [ppe] ppe_op DT, D(RA) | instructions with PPE specific -# | (doubleword ld/st)| 'virtual doubleword' instructions if target -# | or | address is 8-byte aligned; otherwise, using -# | (singleword ld/st)| single word instructions instead or mix both -# | or | Note only RA == R1/R2/R13 will always meet -# -1 -1 | (single & double) | alignment requirement of virtual doubleword -#-------|-------------------------|-------------------------------------------- -# -ReplaceRules = {#ppc_op : [ rule | ppe_op ] - #---------------------------- - #synchronization instructions - 'eieio' : [ 'r', 'sync' ], - 'isync' : [ 'r', 'nop' ], - 'icbi' : [ 'r', 'nop' ], - 'icbt' : [ 'r', 'nop' ], - 'mtcr' : [ 'r', 'mtcr0'], - #load/store with [u/x/a] form - 'stbux' : [ 'ru', 'stbx' ], - 'sthux' : [ 'ru', 'sthx' ], - 'stwux' : [ 'ru', 'stwx' ], - 'lbzux' : [ 'ru', 'lbzx' ], - 'lhzux' : [ 'ru', 'lhzx' ], - 'lwzux' : [ 'ru', 'lwzx' ], - 'lha' : [ 'ra', 'lhz' ], - 'lhau' : [ 'ra', 'lhzu' ], - 'lhax' : [ 'ra', 'lhzx' ], - 'lhaux' : [ 'rau', 'lhzx' ], - #multiply/divide with [./o] form - 'mulhhw' : [ 'h', 'mullhw' ], - 'mulhhw.' : [ 'h', 'mullhw.' ], - 'mulhhwu' : [ 'h', 'mullhwu' ], - 'mulhhwu.': [ 'h', 'mullhwu.' ], - 'mulhw' : [ 's', 'mulhw' ], - 'mulhw.' : [ 'sd', 'mulhw' ], - 'mulhwu' : [ 's', 'mulhwu' ], - 'mulhwu.' : [ 'sd', 'mulhwu' ], - 'mullw' : [ 's', 'mullw' ], - 'mullw.' : [ 'sd', 'mullw' ], - 'mullwo' : [ 'so', 'mullw' ], - 'mullwo.' : [ 'sod', 'mullw' ], - 'mulli' : [ 's', 'mullw' ], - 'divw' : [ 's', 'divw' ], - 'divw.' : [ 'sd', 'divw' ], - 'divwo' : [ 'so', 'divw' ], - 'divwo.' : [ 'sod', 'divw' ], - 'divwu' : [ 's', 'divwu' ], - 'divwu.' : [ 'sd', 'divwu' ], - 'divwuo' : [ 'so', 'divwu' ], - 'divwuo.' : [ 'sod', 'divwu' ], - #load/store multiple word(Rx-R31) - 'lmw' : [ 'm', 'lvd,lwz' ], - 'stmw' : [ 'm', 'stvd,stw' ]} - - -#------------------------------------------------------------------------------ -# CombineRules: [ 'f', 'v', 'l' ] -#-------|-------------------------|-------------------------------------------- -# 'f' | [ppc] ppc_op(cmp*) | rule for 'fusing' adjacent pair of compare -# | ppc_op(b*) | and branch(PPE specific). Note: only -# -1 0 | [ppe] ppe_op(cmp*b*) | extended mnemonics of compares are handled -#-------|-------------------------|-------------------------------------------- -# 'v' | [ppc] ppc_op(lwz/stw) | rule for combining double word aligned -# | ppc_op(lwz/stw) | load/store pairs into signle 'virtual' -# -1 -1 | [ppe] ppe_op(lvd/stvd) | double word instructions(PPE specific) -#-------|-------------------------|-------------------------------------------- -# 'l' | [ppc] .p2align | compiler will insert ".p2align" directive to -# | Label: | help instructions align from label to label. -# | [ppe] b Label | then assembler will insert "nop" on .p2align -# | .p2align | directive. a "branch" to skip the nops will -# 0 -1 | Label: | improve the performance while still aligned -#-------|-------------------------|-------------------------------------------- -# -CombineRules = {#ppc_op : [ rule | ppe_cp ] - #-------------------------- - #8byte aligned loads/stores - 'lwz' : [ 'v', 'lvd' ], - 'stw' : [ 'v', 'stvd' ], - #compares fusable to branch - 'cmplw' : [ 'f', 'cmplw' ], - 'cmpw' : [ 'f', 'cmpw' ], - 'cmpwi' : [ 'f', 'cmpwi' ], - #'.p2align' before 'label:' - '.p2align' : [ 'l', 'b' ]} - - -#------------------------------------------------------------------------------ -# FuseBranches: [ Branches can be fused into cmp*b* ] -#------------------------------------------------------------------------------ -# -FuseBranches = ['bc', 'bcl', - 'blt', 'bltl', 'ble', 'blel', - 'bgt', 'bgtl', 'bge', 'bgel', - 'beq', 'beql', 'bne', 'bnel'] - - -# ----------------------------------------------------------------------------- -# p2p_replace: -# process each line(filtered) in the assembly file to replace PPC instruction -# to supported PPE instruction(s) -# -# Arguments: -# string: line - assembly file line to be replaced -# ppc_op - detected PPC opcode that needs to be replaced -# Return: -# boolean: True - Return without Error -# False - Error Detected -# Variables: -# string: inst, rule, ppe_op, newline, temp_op -# double_inst, single_inst, virtual_reg, base_offset, address_reg -# Subroutine: -# NONE -# ----------------------------------------------------------------------------- -def p2p_replace(line, ppc_op): - - # parse PPC instruction as in I or D form with opcode and upto 3 operands: - # possible forms: opcode - # opcode RT, RA, RB - # opcode RT, RA, IM - # opcode RT, D(RA) - # inst.group(0) : <whole instruction> - # inst.group(1) : " " - # inst.group(2) : Opcode(.) - # inst.group(3) : " " - # inst.group(4) : GPR - # inst.group(5) : " , " - # inst.group(6) : GPR or Immediate(D) - # inst.group(7) : " , " or " ( " - # inst.group(8) : GPR or Immediate(IM) - # inst.group(9) : " ) " - inst = re.search(r"([\s]*)([a-zA-Z\.]+)([\s]*)([%r0-9]*)([\s,]*)([%r0-9\-]*)([\s,\(]*)([%r0-9\-]*)([\s\)]*)", line) - - # detect an error - if inst is None or ppc_op != inst.group(2): - return False - - # look up rule to process the instruction - rule, ppe_op = ReplaceRules[ppc_op] - - # if enabled, put a mark in the output file - if P2P_COMMENT: print "#P2P(%s):" % rule + line, - - # start cases of replacing PPC instruction with PPE instruction(s) - #---r------------------------------------------------------------------------ - if 'r' in rule: - - # replace opcode under rule 'r' and rewrite the instruction - newline = line.replace(ppc_op, ppe_op) - print newline, - - # do not continue if there is 'a' or 'u' rule to process on this line - if 'u' not in rule and 'a' not in rule: - return True - - #---u------------------------------------------------------------------------ - if 'u' in rule: - - # construct and write "add RA, RA, RB" under rule 'u' - newline = inst.group(1) + 'add' + inst.group(3) + inst.group(6) +\ - inst.group(5) + inst.group(6) + inst.group(7) + inst.group(8) - print newline - - # do not continue if there is 'a' rule to process on this line - if 'a' not in rule: - return True - - #---a------------------------------------------------------------------------ - if 'a' in rule: - - # construct and write "extsh RT, RT" under rule 'a' - newline = inst.group(1) + 'extsh' + inst.group(3) + inst.group(4) +\ - inst.group(5) + inst.group(4) - print newline - return True - - #---h------------------------------------------------------------------------ - if 'h' in rule: - - # construct and write "srwi RA, 16" under rule 'h' - newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(6) +\ - inst.group(5) + "16" - print newline - - # construct and write "srwi RB, 16" under rule 'h' - newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(8) +\ - inst.group(5) + "16" - print newline - - # replace opcode in original instruction and write under rule 'h' - newline = line.replace(ppc_op, ppe_op) - print newline - return True - - #---s------------------------------------------------------------------------ - if 's' in rule: - - # construct branch target label - ppe_op = P2P_PPE_PRE + ppe_op - - # construct and write "stwu R1, -24(R1)" to create the stack frame - newline = inst.group(1) + 'stwu' + inst.group(3) + '1' +\ - inst.group(5) + '-24(1)' - print newline - - # construct and write "stvd D3, 8(R1)" to save off R3 and R4 - newline = inst.group(1) + 'stvd' + inst.group(3) + '3' +\ - inst.group(5) + '8(1)' - print newline - - # construct and write "mflr R3" to fetch the current link address - newline = inst.group(1) + 'mflr' + inst.group(3) + '3' - print newline - - # construct and write "stw R3, 16(R1)" to save off current LR to stack - newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\ - inst.group(5) + '16(1)' - print newline - - # construct and write "mr R3, RA" to copy the operand RA to R3 - # if RA == R3 then R3 was clobbered, restore R3 from stack - if inst.group(6) == '3': - newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\ - inst.group(5) + '8(1)' - print newline - else: - newline = inst.group(1) + 'mr' + inst.group(3) + '3' +\ - inst.group(5) + inst.group(6) - print newline - - # if 'mulli' is detected, using 'li' instead of 'mr' for second operand - if ppc_op == 'mulli': - temp_op = 'li' - else: - temp_op = 'mr' - - # Set R4 if R4 is not already RB - if temp_op == 'li' or inst.group(8) != '4': - # construct and write "mr R4, RB" to copy the operand RB to R4 - # or in 'mulli' case, "li R4, IM" to copy the operand IM to R4 - newline = inst.group(1) + temp_op + inst.group(3) + '4' +\ - inst.group(5) + inst.group(8) - print newline - - # using branch and link(bl) to branch to subroutine - # later subroutine can branch back using branch link register(blr) - # Assumption: the subroutine will be responsible for saving - # and restoring all the volatilo registers used in the subroutine - newline = inst.group(1) + 'bl' + inst.group(3) + ppe_op - print newline - - # if RT is not already R3 then copy R3 to RT - if inst.group(4) != '3': - # construct and write "mr RT, R3" to copy the result in R3 to RT - newline = inst.group(1) + 'mr' + inst.group(3) + inst.group(4) +\ - inst.group(5) + '3' - print newline - else: - # save return on stack - newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\ - inst.group(5) + '8(1)' - print newline - - # construct and write "lwz R3, 16(R1)" to fetch the LR value from stack - newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\ - inst.group(5) + '16(1)' - print newline - - # construct and write "mtlr R3" to restore the link register - newline = inst.group(1) + 'mtlr' + inst.group(3) + '3' - print newline - - # construct and write "lvd D3, 8(R1)" to restore R3 and R4 - newline = inst.group(1) + 'lvd' + inst.group(3) + '3' +\ - inst.group(5) + '8(1)' - print newline - - # construct and write "lwz R1, 0(R1)" to destroy the stack frame - newline = inst.group(1) + 'lwz' + inst.group(3) + '1' +\ - inst.group(5) + '0(1)' - print newline - return True - - #---m------------------------------------------------------------------------ - if 'm' in rule: - - # parse instruction information - # note register can be in either "N" form or "%rN" form - double_inst,single_inst = ppe_op.split(',') - virtual_reg = int(re.search(r'\d+', inst.group(4)).group()) - base_offset = int(inst.group(6)) - address_reg = int(re.search(r'\d+', inst.group(8)).group()) - - # consider illegal if multiple word instruction covers non-exist registers - if virtual_reg < 28: - return False - - # loop until and include GPR31 - while virtual_reg < 32: - # page 270 of 405 manual, only do this for load instructions - if virtual_reg == address_reg != 31 and 'l' in single_inst: - base_offset += 4 - virtual_reg += 1 - continue - - # if other GPRs being address_reg there is no guarantee for alignment - if address_reg not in [1,2,13]: - # construct and write "lwz/stw RT, D(RA)" for every registers - newline = inst.group(1) + single_inst + inst.group(3) +\ - str(virtual_reg) + inst.group(5) + str(base_offset) +\ - inst.group(7) + inst.group(8) + inst.group(9) - print newline - base_offset += 4 - virtual_reg += 1 - else: - # if base_offset is also aligned with base address in the address_reg - # & there are at least two more registers to perform doubleword ld/st - if not (base_offset % 8) and (virtual_reg + 1) < 32: - # construct and write "lvd/stvd DR, D(RA)" under rule 'v' - newline = inst.group(1) + double_inst + inst.group(3) +\ - str(virtual_reg) + inst.group(5) + str(base_offset) +\ - inst.group(7) + inst.group(8) + inst.group(9) - print newline - base_offset += 8 - virtual_reg += 2 - # either only one register left or base_offset isnt aligned - else: - # construct and write "lwz/stwz SR, D(RA)" under rule 'v' - newline = inst.group(1) + single_inst + inst.group(3) +\ - str(virtual_reg) + inst.group(5) + str(base_offset) +\ - inst.group(7) + inst.group(8) + inst.group(9) - print newline - base_offset += 4 - virtual_reg += 1 - # end of this if-else - # end of while loop - return True - # end of last if - - -# ----------------------------------------------------------------------------- -# p2p_combine: -# process each two lines(filtered) in the assembly file to combine two PPC -# instructions to one PPE specific instruction for better performance -# -# Arguments: -# string: first_line - 1st assembly file line to be combined -# second_line - 2nd assembly file line to be combined -# first_op - 1st detected PPC opcode that needs to be combined -# second_op - 2nd detected PPC opcode that needs to be combined -# Return: -# boolean: done - True: return without error -# - False: return with error detected -# match - True: eventually matched and combined -# - False: fail to qualify to be combined -# Variables: -# string: first_inst, second_inst, rule, ppe_op, newline -# bo, px_bix, compare_operands, target -# Subroutine: -# NONE -# ----------------------------------------------------------------------------- -def p2p_combine(first_line, second_line, first_op, second_op): - - global P2P_SPACE; global P2P_CYCLE - global P2P_COMPARE_BRANCH; global P2P_VIRTUAL_DOUBLE - - # parse PPC instruction as in I or B or D form with opcode and upto 3 operands - # possible form : [1st] opcode [CR,] RA, RB - # [1st] opcode [CR,] RA, IM - # [1st] opcode RT, D(RA) - # [2nd] opcode [CR,] Target - # [2nd] opcode BO, BI, Target - # [2nd] opcode RT, D(RA) - # inst.group(0) : <whole instruction> - # inst.group(1) : " " - # inst.group(2) : Opcode(+/-/.) - # inst.group(3) : " " - # inst.group(4) : GPR or CR or BO or Target - # inst.group(5) : " , " - # inst.group(6) : GPR or IM or D(label+offset@sda21) or BI or Target - # inst.group(7) : " , " or " ( " - # inst.group(8) : GPR or IM or Target - # inst.group(9) : " ) " - first_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", first_line) - second_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", second_line) - - # detect an error - if first_inst is None or second_inst is None or \ - first_op != first_inst.group(2) or second_op not in second_inst.group(2): - return False,False - - # look up rule to process the instruction - rule, ppe_op = CombineRules[first_op] - - # start cases of combining two PPC instructions into PPE instruction - #---f------------------------------------------------------------------------ - if 'f' in rule: - - if not P2P_COMPARE_BRANCH: - return True,False - - # fusing compare and branch - ppe_op = ppe_op + second_op - - # for cmpwib* case, there is a difference between - # cmpwi SI operand as signed 16-bit integer and then got sign extended and - # cmpwib* UIX operand as unsigned 5-bit integer and then got zero extended - # thus, will not fuse the two if the integer operand is not in range(0,31) - # if cr field is omitted: - if ',' in first_inst.group(7): - # cr field must be cr0 or 0, error out if it is something else: - if '0' not in first_inst.group(4): - return False, True - if 'i' in first_op and (int(first_inst.group(8)) < 0 or \ - int(first_inst.group(8)) > 31): - return True,False - else: - compare_operands = first_inst.group(6) + first_inst.group(7) + \ - first_inst.group(8) + ', ' - else: - if 'i' in first_op and (int(first_inst.group(6)) < 0 or \ - int(first_inst.group(6)) > 31): - return True,False - else: - compare_operands = first_inst.group(4) + first_inst.group(5) + \ - first_inst.group(6) + ', ' - - # if 'bc' then extract BO,BI fields and convert to PX,BIX fields of 'cmp*b*' - # Note CTR decreament and branch always cases are not handled, and - # python bin() outputs "0bXXXXX" form so bo[2] is actually BO bit 0 - # else there is no need for PX,BIX fields for extended mnemonics - if 'bc' in second_op: - bo = bin(int(second_inst.group(4))) - - # do not handle CRT decreament or branch always cases - if bo[4] == 0 or bo[2] == 1: - return True,False - - # PX = BO bit 1, BIX = BI = [0,1,2,3] due to only CR0 is used - px_bix = bo[3] + second_inst.group(5) + \ - second_inst.group(6) + second_inst.group(7) - target = second_inst.group(8) - else: - px_bix = "" - # if cr field is omitted: - if ',' in second_inst.group(5): - # cr field must be cr0 or 0, error out if it is something else: - if '0' not in second_inst.group(4): - return False, True - target = second_inst.group(6) - else: - target = second_inst.group(4) - - # profile: space--, cycle is the same because 1+2==3 - P2P_SPACE -= 1 - - # if enabled, put a mark in the output file - if P2P_COMMENT: - print "#P2P(%s):" % rule + first_line, - print "#P2P(%s):" % rule + second_line, - - # construct and write "cmp*b* [PX,BIX,] RA,RB/IM,target" under 'f' rule - newline = first_inst.group(1) + ppe_op + first_inst.group(3) +\ - px_bix + compare_operands + target - print newline - return True,True - - - #---v------------------------------------------------------------------------ - if 'v' in rule: - - if not P2P_VIRTUAL_DOUBLE: - return True,False - - global P2P_VDW_SDA - - # Combinable Conditions: - # 1) base address registers must be the same and one of R1/R2/R13 - # 2) address offsets have to be 8-bytes continuous and aligned - # 3) target or source registers must qualify to be double word register - # Note: label+offset@sda21 format is coverted to target r13 after link - # assume data go in and out r13 or SDA space is always 8-byte aligned - # here we only check the continous of address offset and register pair - if first_inst.group(8) == second_inst.group(8) in ['1','2','13'] or \ - ("@sda21" in first_inst.group(6) and \ - "@sda21" in second_inst.group(6) and \ - P2P_VDW_SDA): - - if ((first_inst.group(6).replace("@sda21","") + "+4" == \ - second_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ - ((first_inst.group(6).isdigit() and \ - not int(first_inst.group(6)) % 8) and \ - int(first_inst.group(6)) + 4 == int(second_inst.group(6)))) and \ - (int(first_inst.group(4)) + 1 == int(second_inst.group(4)) or \ - (int(first_inst.group(4)) == 31 and \ - int(second_inst.group(4)) == 0)): - newline = first_line.replace(first_op, ppe_op) - elif ((second_inst.group(6).replace("@sda21","") + "+4" == \ - first_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ - ((second_inst.group(6).isdigit() and \ - not int(second_inst.group(6)) % 8) and \ - int(second_inst.group(6)) + 4 == int(first_inst.group(6)))) and \ - (int(second_inst.group(4)) + 1 == int(first_inst.group(4)) or \ - (int(second_inst.group(4)) == 31 and \ - int(first_inst.group(4)) == 0)): - newline = second_line.replace(second_op, ppe_op) - else: - return True,False - - # profile: space--, cycle--(same delay but 1 less from issue) - P2P_SPACE -= 1; P2P_CYCLE -= 1 - - # if enabled, put a mark in the output file - if P2P_COMMENT: - print "#P2P(%s):" % rule + first_line, - print "#P2P(%s):" % rule + second_line, - - print newline, - return True,True - else: - return True,False - - -# ----------------------------------------------------------------------------- -# p2p_onefile: -# process single PPC assembly file to convert it into PPE assembly file -# also filter out non-instruction lines before calling the subroutine -# -# Arguments: -# string: ppcFileName -# Return: -# boolean: done - True if file processing completed without error -# - False if file processing failed due to an error -# Variables: -# boolean: match, done -# string: ppeFileName, line, ppc_op, pre_op, pre_line, section, label -# integer: line_num, first_label_ln, second_label_ln, misalign -# Subroutine: -# p2p_combine -# p2p_replace -# ----------------------------------------------------------------------------- -def p2p_onefile(ppcFileName): - - global P2P_SPACE; P2P_SPACE = 0 # profile count - global P2P_CYCLE; P2P_CYCLE = 0 # profile count - - if P2P_VERBOSE : print "Translate PPC assembly: " + ppcFileName - - # new PPE assembly file is renamed as <filename>.s - ppeFileName = ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT) - os.rename(ppcFileName, ppeFileName) - - # initialize storage variables for previous line that needs to be remembered - pre_line = "" - pre_op = "" - - # use inline file editing, back up original PPC assembly file as <filename>.S - for line in fileinput.input(ppeFileName, inplace=1, backup='.405'): - - # in case of "mtmsr 0; isync" - line = line.replace('isync','nop') - - # skip blank line - if not line.strip(): - if pre_line: - print pre_line, - pre_line = "" - print line, - continue - - # skip comments line - if re.search("^[\s]*(//|#)", line): - if pre_line: - print pre_line, - pre_line = "" - print line, - continue - - # skip .section code except .p2align and label: - section = re.search("^[\s]*(\.[0-9a-zA-Z_]+)", line) - if section is not None and ':' not in line and \ - section.group(1) != '.p2align': - if pre_line: - print pre_line, - pre_line = "" - print line, - continue - - # apply specical 'l' rule in CombineRules for '.p2align' before a 'label:' - label = re.search("^[\s]*[\.0-9a-zA-Z_]+[:]+", line) - if label is not None: - if pre_line and pre_op == '.p2align': - second_label_ln = fileinput.lineno() - misalign = 8 - (second_label_ln - first_label_ln - 2) % 8 - if misalign in [3,4,5,6,7]: - # profile: same space, but save cycles, branch penalty is 2 - P2P_CYCLE -= misalign - 2 - if P2P_COMMENT: print "#P2P(l):" - print '\tb ' + label.group(0).split(':')[0] - print pre_line, - pre_line = "" - first_label_ln = fileinput.lineno() - if pre_line: - print pre_line, - pre_line = "" - print line, - continue - - # extract opcode field from line - ppc_op = line.split()[0] - done,match = False,False - - # detect the 2nd possible combinable instruction - if pre_line and P2P_COMBINE: - # ignore +/- signs for branch prediction - if '+' in ppc_op or '-' in ppc_op: - ppc_op = ppc_op[:-1] - if 'cmp' in pre_op and ppc_op in FuseBranches or \ - 'cmp' not in pre_op and ppc_op == pre_op: - done,match = p2p_combine(pre_line, line, pre_op, ppc_op) - if not match: - print pre_line, - else: - print pre_line, - done,match = True,False - pre_line = "" - - # detect the 1st possible combinable instruction - if not pre_line and not match and P2P_COMBINE: - if ppc_op in CombineRules.keys(): - pre_op = ppc_op - pre_line = line - done,match = True,True - else: - done,match = True,False - - # defect possible replacable instruction - if not match: - if ppc_op in ReplaceRules.keys() and P2P_REPLACE: - done = p2p_replace(line, ppc_op) - else: - print line, - done = True - - # if instruction process is not done due to error - if not done: - line_num = fileinput.lineno() - break - - # close the output file and restore the original input file - fileinput.close() - os.rename(ppeFileName+'.405', ppcFileName) - - # in case last line of the file qualified to be a pre_line and was not printed - if pre_line: - f = open(ppeFileName, 'a') - f.write(pre_line) - f.close() - - # print error debug message - if not done: - print "Error: target instruction detected at line [%d]:" % line_num - print " " + line - print " but fail to recognize instruction format." - # terminate Makefile or execution if an error is detected - sys.exit(1) - - if P2P_COMMENT and P2P_PROFILE: - f = open(ppeFileName, 'a') - f.write("#P2P: space(%d) cycle(%d)" % (P2P_SPACE,P2P_CYCLE)) - f.close() - - if P2P_VERBOSE: - print "Generated PPE assembly: " + ppeFileName - if P2P_PROFILE: - print "Optimization Profiling: " + str(P2P_SPACE*4) + " bytes, " +\ - str(P2P_CYCLE) + " cycles." - - -# ----------------------------------------------------------------------------- -# p2p_profile -# profiling how much performance and code size are saved by optimization -# -# Arguments: -# string: ppcFileName -# Return: -# list: [space, cycle] -# Variables: -# string: line, profile -# Subroutine: -# None -# ----------------------------------------------------------------------------- -def p2p_profile(ppcFileName): - f = open(ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT), 'r') - for line in f: - pass - f.close() - profile = re.search(r"^\#P2P: space\(([0-9\-]+)\) cycle\(([0-9\-]+)\)", line) - if profile is not None: - return [int(profile.group(1)), int(profile.group(2))] - else: - return [0,0] - -# ----------------------------------------------------------------------------- -# p2p_main: -# main of this script -# print usage info -# parse options and arguments -# process one file or a directory of files -# ----------------------------------------------------------------------------- -def p2p_main(): - - # command-line option parsing - from optparse import OptionParser - usage = "usage: %prog [options]" - version= "%prog v." + P2P_VERSION - parser = OptionParser(usage=usage, version=version) - parser.add_option("-d", "--directory", metavar="PATH", dest="ppcPath", - help="process all files in a directory given by PATH") - parser.add_option("-f", "--filename", metavar="FILE", dest="ppcFile", - help="process single file(with path in the filename)") - parser.add_option("-p", "--parallel", - action="store_true", dest="parallel", default=False, - help="processing all files in parallel processes") - parser.add_option("-s", "--statistics", - action="store_true", dest="profile", default=False, - help="optimization profiling, require comment in outputs") - parser.add_option("-c", "--combine-only", - action="store_false", dest="replace", default=True, - help="enable only combine function by disabling replace") - parser.add_option("-r", "--replace-only", - action="store_false", dest="combine", default=True, - help="enable only replace function by disabling combine") - parser.add_option("-b", "--compare branch disable", - action="store_false", dest="compare_branch", default=True, - help="only disabling fused compare branch function") - parser.add_option("-v", "--virtual double disable", - action="store_false", dest="virtual_double", default=True, - help="only disabling fused virtual double function") - parser.add_option("-e", "--eabi", - action="store_true", dest="vdw_sda", default=False, - help="enable virtual double word fusion targeting sda") - parser.add_option("-n", "--no-comment", - action="store_false", dest="comment", default=True, - help="don't leave comment mark in output file") - parser.add_option("-q", "--quiet", - action="store_false", dest="verbose", default=True, - help="don't print status messages to stdout") - (options, args) = parser.parse_args() - # global program output verbose switch - global P2P_VERBOSE; P2P_VERBOSE = options.verbose - # leave a comment mark in output files - global P2P_COMMENT; P2P_COMMENT = options.comment - # space/performance profiling function - global P2P_PROFILE; P2P_PROFILE = options.profile - # enable instruction replace functions - global P2P_REPLACE; P2P_REPLACE = options.replace - # enable instruction combine functions - global P2P_COMBINE; P2P_COMBINE = options.combine - # enable virtual double word fusion targeting sda - global P2P_VDW_SDA; P2P_VDW_SDA = options.vdw_sda - # enable only fused compare and branch function - global P2P_COMPARE_BRANCH; P2P_COMPARE_BRANCH = options.compare_branch - # enable only combined virtual double function - global P2P_VIRTUAL_DOUBLE; P2P_VIRTUAL_DOUBLE = options.virtual_double - - if P2P_VERBOSE : - print "PPC405 Assembly to PPE42 Assembly Post-Compiler Proceesor (P2P)" - print "Version: " + P2P_VERSION - - # single file processing - if options.ppcFile: - - if P2P_VERBOSE : - print "Processing signle file: " + options.ppcFile - - p2p_onefile(options.ppcFile) - - # multiple files processing - if options.ppcPath: - - if P2P_VERBOSE : - print "Accessing all files at: " + options.ppcPath - print "*Parallel Process Mode: " + ("Off", "On")[options.parallel] - - if options.profile: - bytes = 0; cycles = 0 - - fileList = [] - for root, subdirs, files in os.walk(options.ppcPath): - for file in fnmatch.filter(files, '*'+P2P_PPC_EXT): - if options.parallel : - fileList.append(os.path.join(root, file)) - else: - if options.profile: - space,cycle = p2p_profile(os.path.join(root, file)) - bytes += space*4 - cycles += cycle - else: - p2p_onefile(os.path.join(root, file)) - - if options.profile: - print "Optimization Profiling: " + str(bytes) + " bytes, " +\ - str(cycles) + " cycles." - - # parallel processing mode - if options.parallel: - from multiprocessing import Pool - p = Pool() - p.map(p2p_onefile, fileList) - p.close() - p.join() - - if P2P_VERBOSE : print "Done" - - -# ----------------------------------------------------------------------------- -# python main -if __name__ == '__main__': - p2p_main() - |