#!/usr/bin/python2.6 # \file ppc-ppe-pcp.py # \brief PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) # # --------------------------------------------------------------- # Revision History # --------------------------------------------------------------- # 10-07-2014: project completed # daviddu added optimization profile support # # 10-06-2014: added fused compare and branch supprot # daviddu added support for combining two ld/st into one double word # added support to insert branch upon .p2align directive # # 09-27-2014: added subroutine support for mul* and div* # daviddu added virtual double word replacing multiple word support # # 09-13-2014: initial version # daviddu only instruction inline replacement is supported # --------------------------------------------------------------- P2P_VERSION = "10-07-2014" # version number as last modified date P2P_PPC_EXT = '.s' # PPC Assembly filename extension P2P_PPE_EXT = '.es' # PPE Assembly filename extension P2P_PPE_PRE = '__ppe42_' # PPE Assembly subroutine prefix import sys import os import re import fnmatch import fileinput # --------------------------------------------------------------- # PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) # --------------------------------------------------------------- # # Description: # # This post-compiler processor will take PPC405 assembly file(s) produced # by powerpc-linux-gcc or hand coded and replace some of the instructions # supported by PPC405 ISA but not PPE42 ISA with a set of instructions # supported by PPE42 ISA. Outcome of this program is PPE42 assembly file(s). # # Assumptions: # # - Input/Output File Name Extension: # # PPC405 assembly file generated by powerpc-linux-gcc has filename extension # defined by "P2P_PPC_EXT" global variable, while PPE42 assembly file # consumed by PPE Assembler has filename extension defined by "P2P_PPE_EXT" # global variable. Both should be consistant with Makefile rules. # # - Registers: # # Instructions in input file should only use registers supported by PPE, # that is R0-R10,R13,R28-R31 for GPRs and PPE only SPRs(for example, PPE # only has CR0 instead of CR0-7). # # GCC flag -ffixed can be used to enforce compiler to not use certain # registers if compiler generates input files to this script. Note certian # optimization level, such as -Os, of GGC will still use certain registers # regardless if -ffixed flag is used. Furthermore, compiler should not # generate multiple word instructions(lmw/stmw) that covers the registers # forbidden to use by -ffixed flag. # # Example of using -ffixed flag in this case: # -ffixed-r11 -ffixed-r12 -ffixed-r14 -ffixed-r15 \ # -ffixed-r16 -ffixed-r17 -ffixed-r18 -ffixed-r19 \ # -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \ # -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \ # -ffixed-cr1 -ffixed-cr2 -ffixed-cr3 -ffixed-cr4 \ # -ffixed-cr5 -ffixed-cr6 -ffixed-cr7 # # - Instructions: # # Instructions in input file should only use PowerPC 405 instructions # covered by "PowerPC 405-S Embedded Processor Core" manual; however, # there is an assumption on certain catalog of instructions will never be # generated by power-linux-gcc compiler(or disabled by compiler switch). # # Also, compiler should generate extended mnemonics instead of its base # instruction when extended mnemonics fits. # # Via -falign-labels=n and -O2 or -O3, the compiler inserts .p2align # directive to help instruction alignment for best cache performance. # # - Assembly Syntax: # # There should be only white spaces before instruction mnemonics, in # another word, all inline comments should be put behind the instrution. # # "Label:" and an instruction should not be on the same line, hand coded # assembly should be consistant to this same compiler output format. # # Depandences: # # In order to utilize assembly subroutines implemented for supporting # missing instructions of multiplication and division in PPE42 ISA, a given # library(with assembly files and header) must be compiled and linked with # any source code that use this program to generate PPE binary. # # Usage: # # ./ -f --- process single file # ./ -d --- process multiple files # ./ -h --- detailed usage on other flags # ./ -v --- version of the program # ./ -d -s --- perform result profiling # # Functions: # # p2p_main - main function, parse options and arguments # p2p_onefile - processing single PPC Assembly File # p2p_combine - processing two PPC instructions in input file # p2p_replace - processing single PPC instruction in input file # # Data Structures: # # ReplaceRules = { ppc_op : [rule, ppe_op] } # CombineRules = { ppc_op : [rule, ppe_op] } # FuseBranches = [ list of branches qualified for fusing with compares ] # #------------------------------------------------------------------------------ # ReplaceRules: [ 'r', 'u', 'a', 'h', 's', 'o', 'd', 'm' ] #-------|-------------------------|-------------------------------------------- # Rule | Example (PPC to PPE) | Description #-------|-------------------------|-------------------------------------------- # 'r' | [ppc] ppc_op RT, RA, RB | simply 'replace' ppc opcode with ppe opcode # 0 0 | [ppe] ppe_op RT, RA, RB | while the operands, if any, stay the same #-------|-------------------------|-------------------------------------------- # 'ru' | [ppc] ppc_op RT, RA, RB | on top of 'r' rule, emulate 'update' by # | [ppe] ppe_op RT, RA, RB | appending "add" instruction after the # +1 +1 | add RA, RA, RB | original instruction to update RA #-------|-------------------------|-------------------------------------------- # 'ra' | [ppc] ppc_op RT, D(RA) | on top of 'r' rule, emulate 'algebraic' by # | [ppe] ppe_op RT, D(RA) | appending "extsh" instruction after the # +1 +1 | extsh RT, RT | original instruction to sign-extend RT #-------|-------------------------|-------------------------------------------- # 'rau' | [ppc] ppc_op RT, RA, RB | combining rule 'r', 'a', and 'u' above. # | [ppe] ppe_op RT, RA, RB | Note: there is no requirement on whether # | add RA, RA, RB | rule 'a' or rule 'u' should be applied # +2 +2 | extsh RT, RT | first, the outcome should be the same. #-------|-------------------------|-------------------------------------------- # 'h' | [ppc] ppc_op RT, RA, RB | rule of 'halfword' multiplication # | [ppe] srwi RA, 16 | emulate multiply "high halfword" with # | srwi RB, 16 | multiply "low halfword" by shifting # +2 +2 | ppe_op RT, RA, RB | the operands first #-------|-------------------------|-------------------------------------------- # 's' | [ppc] ppc_op RT, RA, RB | emulate word multiply and divide # | [ppe] stwu R1, -24(R1)| instructions with calling 'subroutines' # | stvd D3, 8(R1) | implemented in ppe42_mul.S and ppe42_div.S # | mflr R3 | # | stw R3, 16(R1) | Calling Conventions:(SS = Stack Size) # | mr R3, RA | # | mr R4, RB | Caller is responsible for # | bl target | 1) create stack frame # | mr RT, R3 | 2) save off R3 and R4 to stack # | lwz R3, 16(R1) | 3) save off link register to stack # | mtlr R3 | 4) put operands into R3, R4 before branch # | lvd D3, 8(R1) | 5) put result in R3 to RT after branch # | lwz R1, 0(R1) | 6) restore link register from stack # | | 7) restore R3 and R4 from stack # | [sub] stwu R1, -SS(R1) | 8) remove the stack frame # | | # | (subroutine body) | Callee is responsible for # | | 1) create and remove stack frame # | lwz R1, 0(R1) | 2) save and restore volatile registers # +X +Y | blr | 3) subroutine will not touch LR again #-------|-------------------------|-------------------------------------------- # 'o' | [ppc] ppc_op[o] RT ... | rule of 'o' form for overflow # | [ppe] ppe_op RT ... | Note: "mullwo", "divwo" and "divwuo" each # | | has unique setting for XER[OV,SO] if OE = 1 #-------|-------------------------|-------------------------------------------- # 'd' | [ppc] ppc_op[.] RT ... | rule of '.' or 'dot' form for recording # | [ppe] ppe_op RT ... | using "cmpwli" to emulate the [.] form # | cmpwli RT, 0 | to the instruction result and CR0 fields #-------|-------------------------|-------------------------------------------- # 'm' | [ppc] ppc_op RT, D(RA) | emulate PowerPC load/store multiple word # | [ppe] ppe_op DT, D(RA) | instructions with PPE specific # | (doubleword ld/st)| 'virtual doubleword' instructions if target # | or | address is 8-byte aligned; otherwise, using # | (singleword ld/st)| single word instructions instead or mix both # | or | Note only RA == R1/R2/R13 will always meet # -1 -1 | (single & double) | alignment requirement of virtual doubleword #-------|-------------------------|-------------------------------------------- # ReplaceRules = {#ppc_op : [ rule | ppe_op ] #---------------------------- #synchronization instructions 'eieio' : [ 'r', 'sync' ], 'isync' : [ 'r', 'nop' ], 'icbi' : [ 'r', 'nop' ], 'icbt' : [ 'r', 'nop' ], 'mtcr' : [ 'r', 'mtcr0'], #load/store with [u/x/a] form 'stbux' : [ 'ru', 'stbx' ], 'sthux' : [ 'ru', 'sthx' ], 'stwux' : [ 'ru', 'stwx' ], 'lbzux' : [ 'ru', 'lbzx' ], 'lhzux' : [ 'ru', 'lhzx' ], 'lwzux' : [ 'ru', 'lwzx' ], 'lha' : [ 'ra', 'lhz' ], 'lhau' : [ 'ra', 'lhzu' ], 'lhax' : [ 'ra', 'lhzx' ], 'lhaux' : [ 'rau', 'lhzx' ], #multiply/divide with [./o] form 'mulhhw' : [ 'h', 'mullhw' ], 'mulhhw.' : [ 'h', 'mullhw.' ], 'mulhhwu' : [ 'h', 'mullhwu' ], 'mulhhwu.': [ 'h', 'mullhwu.' ], 'mulhw' : [ 's', 'mulhw' ], 'mulhw.' : [ 'sd', 'mulhw' ], 'mulhwu' : [ 's', 'mulhwu' ], 'mulhwu.' : [ 'sd', 'mulhwu' ], 'mullw' : [ 's', 'mullw' ], 'mullw.' : [ 'sd', 'mullw' ], 'mullwo' : [ 'so', 'mullw' ], 'mullwo.' : [ 'sod', 'mullw' ], 'mulli' : [ 's', 'mullw' ], 'divw' : [ 's', 'divw' ], 'divw.' : [ 'sd', 'divw' ], 'divwo' : [ 'so', 'divw' ], 'divwo.' : [ 'sod', 'divw' ], 'divwu' : [ 's', 'divwu' ], 'divwu.' : [ 'sd', 'divwu' ], 'divwuo' : [ 'so', 'divwu' ], 'divwuo.' : [ 'sod', 'divwu' ], #load/store multiple word(Rx-R31) 'lmw' : [ 'm', 'lvd,lwz' ], 'stmw' : [ 'm', 'stvd,stw' ]} #------------------------------------------------------------------------------ # CombineRules: [ 'f', 'v', 'l' ] #-------|-------------------------|-------------------------------------------- # 'f' | [ppc] ppc_op(cmp*) | rule for 'fusing' adjacent pair of compare # | ppc_op(b*) | and branch(PPE specific). Note: only # -1 0 | [ppe] ppe_op(cmp*b*) | extended mnemonics of compares are handled #-------|-------------------------|-------------------------------------------- # 'v' | [ppc] ppc_op(lwz/stw) | rule for combining double word aligned # | ppc_op(lwz/stw) | load/store pairs into signle 'virtual' # -1 -1 | [ppe] ppe_op(lvd/stvd) | double word instructions(PPE specific) #-------|-------------------------|-------------------------------------------- # 'l' | [ppc] .p2align | compiler will insert ".p2align" directive to # | Label: | help instructions align from label to label. # | [ppe] b Label | then assembler will insert "nop" on .p2align # | .p2align | directive. a "branch" to skip the nops will # 0 -1 | Label: | improve the performance while still aligned #-------|-------------------------|-------------------------------------------- # CombineRules = {#ppc_op : [ rule | ppe_cp ] #-------------------------- #8byte aligned loads/stores 'lwz' : [ 'v', 'lvd' ], 'stw' : [ 'v', 'stvd' ], #compares fusable to branch 'cmplw' : [ 'f', 'cmplw' ], 'cmpw' : [ 'f', 'cmpw' ], 'cmpwi' : [ 'f', 'cmpwi' ], #'.p2align' before 'label:' '.p2align' : [ 'l', 'b' ]} #------------------------------------------------------------------------------ # FuseBranches: [ Branches can be fused into cmp*b* ] #------------------------------------------------------------------------------ # FuseBranches = ['bc', 'bcl', 'blt', 'bltl', 'ble', 'blel', 'bgt', 'bgtl', 'bge', 'bgel', 'beq', 'beql', 'bne', 'bnel'] # ----------------------------------------------------------------------------- # p2p_replace: # process each line(filtered) in the assembly file to replace PPC instruction # to supported PPE instruction(s) # # Arguments: # string: line - assembly file line to be replaced # ppc_op - detected PPC opcode that needs to be replaced # Return: # boolean: True - Return without Error # False - Error Detected # Variables: # string: inst, rule, ppe_op, newline, temp_op # double_inst, single_inst, virtual_reg, base_offset, address_reg # Subroutine: # NONE # ----------------------------------------------------------------------------- def p2p_replace(line, ppc_op): # parse PPC instruction as in I or D form with opcode and upto 3 operands: # possible forms: opcode # opcode RT, RA, RB # opcode RT, RA, IM # opcode RT, D(RA) # inst.group(0) : # inst.group(1) : " " # inst.group(2) : Opcode(.) # inst.group(3) : " " # inst.group(4) : GPR # inst.group(5) : " , " # inst.group(6) : GPR or Immediate(D) # inst.group(7) : " , " or " ( " # inst.group(8) : GPR or Immediate(IM) # inst.group(9) : " ) " inst = re.search(r"([\s]*)([a-zA-Z\.]+)([\s]*)([%r0-9]*)([\s,]*)([%r0-9\-]*)([\s,\(]*)([%r0-9\-]*)([\s\)]*)", line) # detect an error if inst is None or ppc_op != inst.group(2): return False # look up rule to process the instruction rule, ppe_op = ReplaceRules[ppc_op] # if enabled, put a mark in the output file if P2P_COMMENT: print "#P2P(%s):" % rule + line, # start cases of replacing PPC instruction with PPE instruction(s) #---r------------------------------------------------------------------------ if 'r' in rule: # replace opcode under rule 'r' and rewrite the instruction newline = line.replace(ppc_op, ppe_op) print newline, # do not continue if there is 'a' or 'u' rule to process on this line if 'u' not in rule and 'a' not in rule: return True #---u------------------------------------------------------------------------ if 'u' in rule: # construct and write "add RA, RA, RB" under rule 'u' newline = inst.group(1) + 'add' + inst.group(3) + inst.group(6) +\ inst.group(5) + inst.group(6) + inst.group(7) + inst.group(8) print newline # do not continue if there is 'a' rule to process on this line if 'a' not in rule: return True #---a------------------------------------------------------------------------ if 'a' in rule: # construct and write "extsh RT, RT" under rule 'a' newline = inst.group(1) + 'extsh' + inst.group(3) + inst.group(4) +\ inst.group(5) + inst.group(4) print newline return True #---h------------------------------------------------------------------------ if 'h' in rule: # construct and write "srwi RA, 16" under rule 'h' newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(6) +\ inst.group(5) + "16" print newline # construct and write "srwi RB, 16" under rule 'h' newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(8) +\ inst.group(5) + "16" print newline # replace opcode in original instruction and write under rule 'h' newline = line.replace(ppc_op, ppe_op) print newline return True #---s------------------------------------------------------------------------ if 's' in rule: # construct branch target label ppe_op = P2P_PPE_PRE + ppe_op # construct and write "stwu R1, -24(R1)" to create the stack frame newline = inst.group(1) + 'stwu' + inst.group(3) + '1' +\ inst.group(5) + '-24(1)' print newline # construct and write "stvd D3, 8(R1)" to save off R3 and R4 newline = inst.group(1) + 'stvd' + inst.group(3) + '3' +\ inst.group(5) + '8(1)' print newline # construct and write "mflr R3" to fetch the current link address newline = inst.group(1) + 'mflr' + inst.group(3) + '3' print newline # construct and write "stw R3, 16(R1)" to save off current LR to stack newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\ inst.group(5) + '16(1)' print newline # construct and write "mr R3, RA" to copy the operand RA to R3 # if RA == R3 then R3 was clobbered, restore R3 from stack if inst.group(6) == '3': newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\ inst.group(5) + '8(1)' print newline else: newline = inst.group(1) + 'mr' + inst.group(3) + '3' +\ inst.group(5) + inst.group(6) print newline # if 'mulli' is detected, using 'li' instead of 'mr' for second operand if ppc_op == 'mulli': temp_op = 'li' else: temp_op = 'mr' # Set R4 if R4 is not already RB if temp_op == 'li' or inst.group(8) != '4': # construct and write "mr R4, RB" to copy the operand RB to R4 # or in 'mulli' case, "li R4, IM" to copy the operand IM to R4 newline = inst.group(1) + temp_op + inst.group(3) + '4' +\ inst.group(5) + inst.group(8) print newline # using branch and link(bl) to branch to subroutine # later subroutine can branch back using branch link register(blr) # Assumption: the subroutine will be responsible for saving # and restoring all the volatilo registers used in the subroutine newline = inst.group(1) + 'bl' + inst.group(3) + ppe_op print newline # if RT is not already R3 then copy R3 to RT if inst.group(4) != '3': # construct and write "mr RT, R3" to copy the result in R3 to RT newline = inst.group(1) + 'mr' + inst.group(3) + inst.group(4) +\ inst.group(5) + '3' print newline else: # save return on stack newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\ inst.group(5) + '8(1)' print newline # construct and write "lwz R3, 16(R1)" to fetch the LR value from stack newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\ inst.group(5) + '16(1)' print newline # construct and write "mtlr R3" to restore the link register newline = inst.group(1) + 'mtlr' + inst.group(3) + '3' print newline # construct and write "lvd D3, 8(R1)" to restore R3 and R4 newline = inst.group(1) + 'lvd' + inst.group(3) + '3' +\ inst.group(5) + '8(1)' print newline # construct and write "lwz R1, 0(R1)" to destroy the stack frame newline = inst.group(1) + 'lwz' + inst.group(3) + '1' +\ inst.group(5) + '0(1)' print newline return True #---m------------------------------------------------------------------------ if 'm' in rule: # parse instruction information # note register can be in either "N" form or "%rN" form double_inst,single_inst = ppe_op.split(',') virtual_reg = int(re.search(r'\d+', inst.group(4)).group()) base_offset = int(inst.group(6)) address_reg = int(re.search(r'\d+', inst.group(8)).group()) # consider illegal if multiple word instruction covers non-exist registers if virtual_reg < 28: return False # loop until and include GPR31 while virtual_reg < 32: # page 270 of 405 manual, only do this for load instructions if virtual_reg == address_reg != 31 and 'l' in single_inst: base_offset += 4 virtual_reg += 1 continue # if other GPRs being address_reg there is no guarantee for alignment if address_reg not in [1,2,13]: # construct and write "lwz/stw RT, D(RA)" for every registers newline = inst.group(1) + single_inst + inst.group(3) +\ str(virtual_reg) + inst.group(5) + str(base_offset) +\ inst.group(7) + inst.group(8) + inst.group(9) print newline base_offset += 4 virtual_reg += 1 else: # if base_offset is also aligned with base address in the address_reg # & there are at least two more registers to perform doubleword ld/st if not (base_offset % 8) and (virtual_reg + 1) < 32: # construct and write "lvd/stvd DR, D(RA)" under rule 'v' newline = inst.group(1) + double_inst + inst.group(3) +\ str(virtual_reg) + inst.group(5) + str(base_offset) +\ inst.group(7) + inst.group(8) + inst.group(9) print newline base_offset += 8 virtual_reg += 2 # either only one register left or base_offset isnt aligned else: # construct and write "lwz/stwz SR, D(RA)" under rule 'v' newline = inst.group(1) + single_inst + inst.group(3) +\ str(virtual_reg) + inst.group(5) + str(base_offset) +\ inst.group(7) + inst.group(8) + inst.group(9) print newline base_offset += 4 virtual_reg += 1 # end of this if-else # end of while loop return True # end of last if # ----------------------------------------------------------------------------- # p2p_combine: # process each two lines(filtered) in the assembly file to combine two PPC # instructions to one PPE specific instruction for better performance # # Arguments: # string: first_line - 1st assembly file line to be combined # second_line - 2nd assembly file line to be combined # first_op - 1st detected PPC opcode that needs to be combined # second_op - 2nd detected PPC opcode that needs to be combined # Return: # boolean: done - True: return without error # - False: return with error detected # match - True: eventually matched and combined # - False: fail to qualify to be combined # Variables: # string: first_inst, second_inst, rule, ppe_op, newline # bo, px_bix, compare_operands, target # Subroutine: # NONE # ----------------------------------------------------------------------------- def p2p_combine(first_line, second_line, first_op, second_op): global P2P_SPACE; global P2P_CYCLE global P2P_COMPARE_BRANCH; global P2P_VIRTUAL_DOUBLE # parse PPC instruction as in I or B or D form with opcode and upto 3 operands # possible form : [1st] opcode [CR,] RA, RB # [1st] opcode [CR,] RA, IM # [1st] opcode RT, D(RA) # [2nd] opcode [CR,] Target # [2nd] opcode BO, BI, Target # [2nd] opcode RT, D(RA) # inst.group(0) : # inst.group(1) : " " # inst.group(2) : Opcode(+/-/.) # inst.group(3) : " " # inst.group(4) : GPR or CR or BO or Target # inst.group(5) : " , " # inst.group(6) : GPR or IM or D(label+offset@sda21) or BI or Target # inst.group(7) : " , " or " ( " # inst.group(8) : GPR or IM or Target # inst.group(9) : " ) " first_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", first_line) second_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", second_line) # detect an error if first_inst is None or second_inst is None or \ first_op != first_inst.group(2) or second_op not in second_inst.group(2): return False,False # look up rule to process the instruction rule, ppe_op = CombineRules[first_op] # start cases of combining two PPC instructions into PPE instruction #---f------------------------------------------------------------------------ if 'f' in rule: if not P2P_COMPARE_BRANCH: return True,False # fusing compare and branch ppe_op = ppe_op + second_op # for cmpwib* case, there is a difference between # cmpwi SI operand as signed 16-bit integer and then got sign extended and # cmpwib* UIX operand as unsigned 5-bit integer and then got zero extended # thus, will not fuse the two if the integer operand is not in range(0,31) # if cr field is omitted: if ',' in first_inst.group(7): # cr field must be cr0 or 0, error out if it is something else: if '0' not in first_inst.group(4): return False, True if 'i' in first_op and (int(first_inst.group(8)) < 0 or \ int(first_inst.group(8)) > 31): return True,False else: compare_operands = first_inst.group(6) + first_inst.group(7) + \ first_inst.group(8) + ', ' else: if 'i' in first_op and (int(first_inst.group(6)) < 0 or \ int(first_inst.group(6)) > 31): return True,False else: compare_operands = first_inst.group(4) + first_inst.group(5) + \ first_inst.group(6) + ', ' # if 'bc' then extract BO,BI fields and convert to PX,BIX fields of 'cmp*b*' # Note CTR decreament and branch always cases are not handled, and # python bin() outputs "0bXXXXX" form so bo[2] is actually BO bit 0 # else there is no need for PX,BIX fields for extended mnemonics if 'bc' in second_op: bo = bin(int(second_inst.group(4))) # do not handle CRT decreament or branch always cases if bo[4] == 0 or bo[2] == 1: return True,False # PX = BO bit 1, BIX = BI = [0,1,2,3] due to only CR0 is used px_bix = bo[3] + second_inst.group(5) + \ second_inst.group(6) + second_inst.group(7) target = second_inst.group(8) else: px_bix = "" # if cr field is omitted: if ',' in second_inst.group(5): # cr field must be cr0 or 0, error out if it is something else: if '0' not in second_inst.group(4): return False, True target = second_inst.group(6) else: target = second_inst.group(4) # profile: space--, cycle is the same because 1+2==3 P2P_SPACE -= 1 # if enabled, put a mark in the output file if P2P_COMMENT: print "#P2P(%s):" % rule + first_line, print "#P2P(%s):" % rule + second_line, # construct and write "cmp*b* [PX,BIX,] RA,RB/IM,target" under 'f' rule newline = first_inst.group(1) + ppe_op + first_inst.group(3) +\ px_bix + compare_operands + target print newline return True,True #---v------------------------------------------------------------------------ if 'v' in rule: if not P2P_VIRTUAL_DOUBLE: return True,False global P2P_VDW_SDA # Combinable Conditions: # 1) base address registers must be the same and one of R1/R2/R13 # 2) address offsets have to be 8-bytes continuous and aligned # 3) target or source registers must qualify to be double word register # Note: label+offset@sda21 format is coverted to target r13 after link # assume data go in and out r13 or SDA space is always 8-byte aligned # here we only check the continous of address offset and register pair if first_inst.group(8) == second_inst.group(8) in ['1','2','13'] or \ ("@sda21" in first_inst.group(6) and \ "@sda21" in second_inst.group(6) and \ P2P_VDW_SDA): if ((first_inst.group(6).replace("@sda21","") + "+4" == \ second_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ ((first_inst.group(6).isdigit() and \ not int(first_inst.group(6)) % 8) and \ int(first_inst.group(6)) + 4 == int(second_inst.group(6)))) and \ (int(first_inst.group(4)) + 1 == int(second_inst.group(4)) or \ (int(first_inst.group(4)) == 31 and \ int(second_inst.group(4)) == 0)): newline = first_line.replace(first_op, ppe_op) elif ((second_inst.group(6).replace("@sda21","") + "+4" == \ first_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ ((second_inst.group(6).isdigit() and \ not int(second_inst.group(6)) % 8) and \ int(second_inst.group(6)) + 4 == int(first_inst.group(6)))) and \ (int(second_inst.group(4)) + 1 == int(first_inst.group(4)) or \ (int(second_inst.group(4)) == 31 and \ int(first_inst.group(4)) == 0)): newline = second_line.replace(second_op, ppe_op) else: return True,False # profile: space--, cycle--(same delay but 1 less from issue) P2P_SPACE -= 1; P2P_CYCLE -= 1 # if enabled, put a mark in the output file if P2P_COMMENT: print "#P2P(%s):" % rule + first_line, print "#P2P(%s):" % rule + second_line, print newline, return True,True else: return True,False # ----------------------------------------------------------------------------- # p2p_onefile: # process single PPC assembly file to convert it into PPE assembly file # also filter out non-instruction lines before calling the subroutine # # Arguments: # string: ppcFileName # Return: # boolean: done - True if file processing completed without error # - False if file processing failed due to an error # Variables: # boolean: match, done # string: ppeFileName, line, ppc_op, pre_op, pre_line, section, label # integer: line_num, first_label_ln, second_label_ln, misalign # Subroutine: # p2p_combine # p2p_replace # ----------------------------------------------------------------------------- def p2p_onefile(ppcFileName): global P2P_SPACE; P2P_SPACE = 0 # profile count global P2P_CYCLE; P2P_CYCLE = 0 # profile count if P2P_VERBOSE : print "Translate PPC assembly: " + ppcFileName # new PPE assembly file is renamed as .s ppeFileName = ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT) os.rename(ppcFileName, ppeFileName) # initialize storage variables for previous line that needs to be remembered pre_line = "" pre_op = "" # use inline file editing, back up original PPC assembly file as .S for line in fileinput.input(ppeFileName, inplace=1, backup='.405'): # in case of "mtmsr 0; isync" line = line.replace('isync','nop') # skip blank line if not line.strip(): if pre_line: print pre_line, pre_line = "" print line, continue # skip comments line if re.search("^[\s]*(//|#)", line): if pre_line: print pre_line, pre_line = "" print line, continue # skip .section code except .p2align and label: section = re.search("^[\s]*(\.[0-9a-zA-Z_]+)", line) if section is not None and ':' not in line and \ section.group(1) != '.p2align': if pre_line: print pre_line, pre_line = "" print line, continue # apply specical 'l' rule in CombineRules for '.p2align' before a 'label:' label = re.search("^[\s]*[\.0-9a-zA-Z_]+[:]+", line) if label is not None: if pre_line and pre_op == '.p2align': second_label_ln = fileinput.lineno() misalign = 8 - (second_label_ln - first_label_ln - 2) % 8 if misalign in [3,4,5,6,7]: # profile: same space, but save cycles, branch penalty is 2 P2P_CYCLE -= misalign - 2 if P2P_COMMENT: print "#P2P(l):" print '\tb ' + label.group(0).split(':')[0] print pre_line, pre_line = "" first_label_ln = fileinput.lineno() if pre_line: print pre_line, pre_line = "" print line, continue # extract opcode field from line ppc_op = line.split()[0] done,match = False,False # detect the 2nd possible combinable instruction if pre_line and P2P_COMBINE: # ignore +/- signs for branch prediction if '+' in ppc_op or '-' in ppc_op: ppc_op = ppc_op[:-1] if 'cmp' in pre_op and ppc_op in FuseBranches or \ 'cmp' not in pre_op and ppc_op == pre_op: done,match = p2p_combine(pre_line, line, pre_op, ppc_op) if not match: print pre_line, else: print pre_line, done,match = True,False pre_line = "" # detect the 1st possible combinable instruction if not pre_line and not match and P2P_COMBINE: if ppc_op in CombineRules.keys(): pre_op = ppc_op pre_line = line done,match = True,True else: done,match = True,False # defect possible replacable instruction if not match: if ppc_op in ReplaceRules.keys() and P2P_REPLACE: done = p2p_replace(line, ppc_op) else: print line, done = True # if instruction process is not done due to error if not done: line_num = fileinput.lineno() break # close the output file and restore the original input file fileinput.close() os.rename(ppeFileName+'.405', ppcFileName) # in case last line of the file qualified to be a pre_line and was not printed if pre_line: f = open(ppeFileName, 'a') f.write(pre_line) f.close() # print error debug message if not done: print "Error: target instruction detected at line [%d]:" % line_num print " " + line print " but fail to recognize instruction format." # terminate Makefile or execution if an error is detected sys.exit(1) if P2P_COMMENT and P2P_PROFILE: f = open(ppeFileName, 'a') f.write("#P2P: space(%d) cycle(%d)" % (P2P_SPACE,P2P_CYCLE)) f.close() if P2P_VERBOSE: print "Generated PPE assembly: " + ppeFileName if P2P_PROFILE: print "Optimization Profiling: " + str(P2P_SPACE*4) + " bytes, " +\ str(P2P_CYCLE) + " cycles." # ----------------------------------------------------------------------------- # p2p_profile # profiling how much performance and code size are saved by optimization # # Arguments: # string: ppcFileName # Return: # list: [space, cycle] # Variables: # string: line, profile # Subroutine: # None # ----------------------------------------------------------------------------- def p2p_profile(ppcFileName): f = open(ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT), 'r') for line in f: pass f.close() profile = re.search(r"^\#P2P: space\(([0-9\-]+)\) cycle\(([0-9\-]+)\)", line) if profile is not None: return [int(profile.group(1)), int(profile.group(2))] else: return [0,0] # ----------------------------------------------------------------------------- # p2p_main: # main of this script # print usage info # parse options and arguments # process one file or a directory of files # ----------------------------------------------------------------------------- def p2p_main(): # command-line option parsing from optparse import OptionParser usage = "usage: %prog [options]" version= "%prog v." + P2P_VERSION parser = OptionParser(usage=usage, version=version) parser.add_option("-d", "--directory", metavar="PATH", dest="ppcPath", help="process all files in a directory given by PATH") parser.add_option("-f", "--filename", metavar="FILE", dest="ppcFile", help="process single file(with path in the filename)") parser.add_option("-p", "--parallel", action="store_true", dest="parallel", default=False, help="processing all files in parallel processes") parser.add_option("-s", "--statistics", action="store_true", dest="profile", default=False, help="optimization profiling, require comment in outputs") parser.add_option("-c", "--combine-only", action="store_false", dest="replace", default=True, help="enable only combine function by disabling replace") parser.add_option("-r", "--replace-only", action="store_false", dest="combine", default=True, help="enable only replace function by disabling combine") parser.add_option("-b", "--compare branch disable", action="store_false", dest="compare_branch", default=True, help="only disabling fused compare branch function") parser.add_option("-v", "--virtual double disable", action="store_false", dest="virtual_double", default=True, help="only disabling fused virtual double function") parser.add_option("-e", "--eabi", action="store_true", dest="vdw_sda", default=False, help="enable virtual double word fusion targeting sda") parser.add_option("-n", "--no-comment", action="store_false", dest="comment", default=True, help="don't leave comment mark in output file") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") (options, args) = parser.parse_args() # global program output verbose switch global P2P_VERBOSE; P2P_VERBOSE = options.verbose # leave a comment mark in output files global P2P_COMMENT; P2P_COMMENT = options.comment # space/performance profiling function global P2P_PROFILE; P2P_PROFILE = options.profile # enable instruction replace functions global P2P_REPLACE; P2P_REPLACE = options.replace # enable instruction combine functions global P2P_COMBINE; P2P_COMBINE = options.combine # enable virtual double word fusion targeting sda global P2P_VDW_SDA; P2P_VDW_SDA = options.vdw_sda # enable only fused compare and branch function global P2P_COMPARE_BRANCH; P2P_COMPARE_BRANCH = options.compare_branch # enable only combined virtual double function global P2P_VIRTUAL_DOUBLE; P2P_VIRTUAL_DOUBLE = options.virtual_double if P2P_VERBOSE : print "PPC405 Assembly to PPE42 Assembly Post-Compiler Proceesor (P2P)" print "Version: " + P2P_VERSION # single file processing if options.ppcFile: if P2P_VERBOSE : print "Processing signle file: " + options.ppcFile p2p_onefile(options.ppcFile) # multiple files processing if options.ppcPath: if P2P_VERBOSE : print "Accessing all files at: " + options.ppcPath print "*Parallel Process Mode: " + ("Off", "On")[options.parallel] if options.profile: bytes = 0; cycles = 0 fileList = [] for root, subdirs, files in os.walk(options.ppcPath): for file in fnmatch.filter(files, '*'+P2P_PPC_EXT): if options.parallel : fileList.append(os.path.join(root, file)) else: if options.profile: space,cycle = p2p_profile(os.path.join(root, file)) bytes += space*4 cycles += cycle else: p2p_onefile(os.path.join(root, file)) if options.profile: print "Optimization Profiling: " + str(bytes) + " bytes, " +\ str(cycles) + " cycles." # parallel processing mode if options.parallel: from multiprocessing import Pool p = Pool() p.map(p2p_onefile, fileList) p.close() p.join() if P2P_VERBOSE : print "Done" # ----------------------------------------------------------------------------- # python main if __name__ == '__main__': p2p_main()