From a28f852be2197680c6864a8b66b8cb0743893471 Mon Sep 17 00:00:00 2001 From: Glenn Miles Date: Mon, 23 Feb 2015 14:34:25 -0600 Subject: Added ppe tools directory with contents Change-Id: I9da1e93f2624a8da121548b7af67002a98d61fe2 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/15907 Reviewed-by: Glenn R. Miles Reviewed-by: David Young Tested-by: David Young --- tools/PowerPCtoPPE/ppc-ppe-pcp.py | 980 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 980 insertions(+) create mode 100755 tools/PowerPCtoPPE/ppc-ppe-pcp.py (limited to 'tools/PowerPCtoPPE/ppc-ppe-pcp.py') diff --git a/tools/PowerPCtoPPE/ppc-ppe-pcp.py b/tools/PowerPCtoPPE/ppc-ppe-pcp.py new file mode 100755 index 00000000..53c2757f --- /dev/null +++ b/tools/PowerPCtoPPE/ppc-ppe-pcp.py @@ -0,0 +1,980 @@ +#!/usr/bin/python2.6 + +# \file ppc-ppe-pcp.py +# \brief PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) +# +# --------------------------------------------------------------- +# Revision History +# --------------------------------------------------------------- +# 10-07-2014: project completed +# daviddu added optimization profile support +# +# 10-06-2014: added fused compare and branch supprot +# daviddu added support for combining two ld/st into one double word +# added support to insert branch upon .p2align directive +# +# 09-27-2014: added subroutine support for mul* and div* +# daviddu added virtual double word replacing multiple word support +# +# 09-13-2014: initial version +# daviddu only instruction inline replacement is supported +# --------------------------------------------------------------- + +P2P_VERSION = "10-07-2014" # version number as last modified date +P2P_PPC_EXT = '.s' # PPC Assembly filename extension +P2P_PPE_EXT = '.es' # PPE Assembly filename extension +P2P_PPE_PRE = '__ppe42_' # PPE Assembly subroutine prefix + +import sys +import os +import re +import fnmatch +import fileinput + +# --------------------------------------------------------------- +# PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) +# --------------------------------------------------------------- +# +# Description: +# +# This post-compiler processor will take PPC405 assembly file(s) produced +# by powerpc-linux-gcc or hand coded and replace some of the instructions +# supported by PPC405 ISA but not PPE42 ISA with a set of instructions +# supported by PPE42 ISA. Outcome of this program is PPE42 assembly file(s). +# +# Assumptions: +# +# - Input/Output File Name Extension: +# +# PPC405 assembly file generated by powerpc-linux-gcc has filename extension +# defined by "P2P_PPC_EXT" global variable, while PPE42 assembly file +# consumed by PPE Assembler has filename extension defined by "P2P_PPE_EXT" +# global variable. Both should be consistant with Makefile rules. +# +# - Registers: +# +# Instructions in input file should only use registers supported by PPE, +# that is R0-R10,R13,R28-R31 for GPRs and PPE only SPRs(for example, PPE +# only has CR0 instead of CR0-7). +# +# GCC flag -ffixed can be used to enforce compiler to not use certain +# registers if compiler generates input files to this script. Note certian +# optimization level, such as -Os, of GGC will still use certain registers +# regardless if -ffixed flag is used. Furthermore, compiler should not +# generate multiple word instructions(lmw/stmw) that covers the registers +# forbidden to use by -ffixed flag. +# +# Example of using -ffixed flag in this case: +# -ffixed-r11 -ffixed-r12 -ffixed-r14 -ffixed-r15 \ +# -ffixed-r16 -ffixed-r17 -ffixed-r18 -ffixed-r19 \ +# -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \ +# -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \ +# -ffixed-cr1 -ffixed-cr2 -ffixed-cr3 -ffixed-cr4 \ +# -ffixed-cr5 -ffixed-cr6 -ffixed-cr7 +# +# - Instructions: +# +# Instructions in input file should only use PowerPC 405 instructions +# covered by "PowerPC 405-S Embedded Processor Core" manual; however, +# there is an assumption on certain catalog of instructions will never be +# generated by power-linux-gcc compiler(or disabled by compiler switch). +# +# Also, compiler should generate extended mnemonics instead of its base +# instruction when extended mnemonics fits. +# +# Via -falign-labels=n and -O2 or -O3, the compiler inserts .p2align +# directive to help instruction alignment for best cache performance. +# +# - Assembly Syntax: +# +# There should be only white spaces before instruction mnemonics, in +# another word, all inline comments should be put behind the instrution. +# +# "Label:" and an instruction should not be on the same line, hand coded +# assembly should be consistant to this same compiler output format. +# +# Depandences: +# +# In order to utilize assembly subroutines implemented for supporting +# missing instructions of multiplication and division in PPE42 ISA, a given +# library(with assembly files and header) must be compiled and linked with +# any source code that use this program to generate PPE binary. +# +# Usage: +# +# ./ -f --- process single file +# ./ -d --- process multiple files +# ./ -h --- detailed usage on other flags +# ./ -v --- version of the program +# ./ -d -s --- perform result profiling +# +# Functions: +# +# p2p_main - main function, parse options and arguments +# p2p_onefile - processing single PPC Assembly File +# p2p_combine - processing two PPC instructions in input file +# p2p_replace - processing single PPC instruction in input file +# +# Data Structures: +# +# ReplaceRules = { ppc_op : [rule, ppe_op] } +# CombineRules = { ppc_op : [rule, ppe_op] } +# FuseBranches = [ list of branches qualified for fusing with compares ] +# +#------------------------------------------------------------------------------ +# ReplaceRules: [ 'r', 'u', 'a', 'h', 's', 'o', 'd', 'm' ] +#-------|-------------------------|-------------------------------------------- +# Rule | Example (PPC to PPE) | Description +#-------|-------------------------|-------------------------------------------- +# 'r' | [ppc] ppc_op RT, RA, RB | simply 'replace' ppc opcode with ppe opcode +# 0 0 | [ppe] ppe_op RT, RA, RB | while the operands, if any, stay the same +#-------|-------------------------|-------------------------------------------- +# 'ru' | [ppc] ppc_op RT, RA, RB | on top of 'r' rule, emulate 'update' by +# | [ppe] ppe_op RT, RA, RB | appending "add" instruction after the +# +1 +1 | add RA, RA, RB | original instruction to update RA +#-------|-------------------------|-------------------------------------------- +# 'ra' | [ppc] ppc_op RT, D(RA) | on top of 'r' rule, emulate 'algebraic' by +# | [ppe] ppe_op RT, D(RA) | appending "extsh" instruction after the +# +1 +1 | extsh RT, RT | original instruction to sign-extend RT +#-------|-------------------------|-------------------------------------------- +# 'rau' | [ppc] ppc_op RT, RA, RB | combining rule 'r', 'a', and 'u' above. +# | [ppe] ppe_op RT, RA, RB | Note: there is no requirement on whether +# | add RA, RA, RB | rule 'a' or rule 'u' should be applied +# +2 +2 | extsh RT, RT | first, the outcome should be the same. +#-------|-------------------------|-------------------------------------------- +# 'h' | [ppc] ppc_op RT, RA, RB | rule of 'halfword' multiplication +# | [ppe] srwi RA, 16 | emulate multiply "high halfword" with +# | srwi RB, 16 | multiply "low halfword" by shifting +# +2 +2 | ppe_op RT, RA, RB | the operands first +#-------|-------------------------|-------------------------------------------- +# 's' | [ppc] ppc_op RT, RA, RB | emulate word multiply and divide +# | [ppe] stwu R1, -24(R1)| instructions with calling 'subroutines' +# | stvd D3, 8(R1) | implemented in ppe42_mul.S and ppe42_div.S +# | mflr R3 | +# | stw R3, 16(R1) | Calling Conventions:(SS = Stack Size) +# | mr R3, RA | +# | mr R4, RB | Caller is responsible for +# | bl target | 1) create stack frame +# | mr RT, R3 | 2) save off R3 and R4 to stack +# | lwz R3, 16(R1) | 3) save off link register to stack +# | mtlr R3 | 4) put operands into R3, R4 before branch +# | lvd D3, 8(R1) | 5) put result in R3 to RT after branch +# | lwz R1, 0(R1) | 6) restore link register from stack +# | | 7) restore R3 and R4 from stack +# | [sub] stwu R1, -SS(R1) | 8) remove the stack frame +# | | +# | (subroutine body) | Callee is responsible for +# | | 1) create and remove stack frame +# | lwz R1, 0(R1) | 2) save and restore volatile registers +# +X +Y | blr | 3) subroutine will not touch LR again +#-------|-------------------------|-------------------------------------------- +# 'o' | [ppc] ppc_op[o] RT ... | rule of 'o' form for overflow +# | [ppe] ppe_op RT ... | Note: "mullwo", "divwo" and "divwuo" each +# | | has unique setting for XER[OV,SO] if OE = 1 +#-------|-------------------------|-------------------------------------------- +# 'd' | [ppc] ppc_op[.] RT ... | rule of '.' or 'dot' form for recording +# | [ppe] ppe_op RT ... | using "cmpwli" to emulate the [.] form +# | cmpwli RT, 0 | to the instruction result and CR0 fields +#-------|-------------------------|-------------------------------------------- +# 'm' | [ppc] ppc_op RT, D(RA) | emulate PowerPC load/store multiple word +# | [ppe] ppe_op DT, D(RA) | instructions with PPE specific +# | (doubleword ld/st)| 'virtual doubleword' instructions if target +# | or | address is 8-byte aligned; otherwise, using +# | (singleword ld/st)| single word instructions instead or mix both +# | or | Note only RA == R1/R2/R13 will always meet +# -1 -1 | (single & double) | alignment requirement of virtual doubleword +#-------|-------------------------|-------------------------------------------- +# +ReplaceRules = {#ppc_op : [ rule | ppe_op ] + #---------------------------- + #synchronization instructions + 'eieio' : [ 'r', 'sync' ], + 'isync' : [ 'r', 'nop' ], + 'icbi' : [ 'r', 'nop' ], + 'icbt' : [ 'r', 'nop' ], + 'mtcr' : [ 'r', 'mtcr0'], + #load/store with [u/x/a] form + 'stbux' : [ 'ru', 'stbx' ], + 'sthux' : [ 'ru', 'sthx' ], + 'stwux' : [ 'ru', 'stwx' ], + 'lbzux' : [ 'ru', 'lbzx' ], + 'lhzux' : [ 'ru', 'lhzx' ], + 'lwzux' : [ 'ru', 'lwzx' ], + 'lha' : [ 'ra', 'lhz' ], + 'lhau' : [ 'ra', 'lhzu' ], + 'lhax' : [ 'ra', 'lhzx' ], + 'lhaux' : [ 'rau', 'lhzx' ], + #multiply/divide with [./o] form + 'mulhhw' : [ 'h', 'mullhw' ], + 'mulhhw.' : [ 'h', 'mullhw.' ], + 'mulhhwu' : [ 'h', 'mullhwu' ], + 'mulhhwu.': [ 'h', 'mullhwu.' ], + 'mulhw' : [ 's', 'mulhw' ], + 'mulhw.' : [ 'sd', 'mulhw' ], + 'mulhwu' : [ 's', 'mulhwu' ], + 'mulhwu.' : [ 'sd', 'mulhwu' ], + 'mullw' : [ 's', 'mullw' ], + 'mullw.' : [ 'sd', 'mullw' ], + 'mullwo' : [ 'so', 'mullw' ], + 'mullwo.' : [ 'sod', 'mullw' ], + 'mulli' : [ 's', 'mullw' ], + 'divw' : [ 's', 'divw' ], + 'divw.' : [ 'sd', 'divw' ], + 'divwo' : [ 'so', 'divw' ], + 'divwo.' : [ 'sod', 'divw' ], + 'divwu' : [ 's', 'divwu' ], + 'divwu.' : [ 'sd', 'divwu' ], + 'divwuo' : [ 'so', 'divwu' ], + 'divwuo.' : [ 'sod', 'divwu' ], + #load/store multiple word(Rx-R31) + 'lmw' : [ 'm', 'lvd,lwz' ], + 'stmw' : [ 'm', 'stvd,stw' ]} + + +#------------------------------------------------------------------------------ +# CombineRules: [ 'f', 'v', 'l' ] +#-------|-------------------------|-------------------------------------------- +# 'f' | [ppc] ppc_op(cmp*) | rule for 'fusing' adjacent pair of compare +# | ppc_op(b*) | and branch(PPE specific). Note: only +# -1 0 | [ppe] ppe_op(cmp*b*) | extended mnemonics of compares are handled +#-------|-------------------------|-------------------------------------------- +# 'v' | [ppc] ppc_op(lwz/stw) | rule for combining double word aligned +# | ppc_op(lwz/stw) | load/store pairs into signle 'virtual' +# -1 -1 | [ppe] ppe_op(lvd/stvd) | double word instructions(PPE specific) +#-------|-------------------------|-------------------------------------------- +# 'l' | [ppc] .p2align | compiler will insert ".p2align" directive to +# | Label: | help instructions align from label to label. +# | [ppe] b Label | then assembler will insert "nop" on .p2align +# | .p2align | directive. a "branch" to skip the nops will +# 0 -1 | Label: | improve the performance while still aligned +#-------|-------------------------|-------------------------------------------- +# +CombineRules = {#ppc_op : [ rule | ppe_cp ] + #-------------------------- + #8byte aligned loads/stores + 'lwz' : [ 'v', 'lvd' ], + 'stw' : [ 'v', 'stvd' ], + #compares fusable to branch + 'cmplw' : [ 'f', 'cmplw' ], + 'cmpw' : [ 'f', 'cmpw' ], + 'cmpwi' : [ 'f', 'cmpwi' ], + #'.p2align' before 'label:' + '.p2align' : [ 'l', 'b' ]} + + +#------------------------------------------------------------------------------ +# FuseBranches: [ Branches can be fused into cmp*b* ] +#------------------------------------------------------------------------------ +# +FuseBranches = ['bc', 'bcl', + 'blt', 'bltl', 'ble', 'blel', + 'bgt', 'bgtl', 'bge', 'bgel', + 'beq', 'beql', 'bne', 'bnel'] + + +# ----------------------------------------------------------------------------- +# p2p_replace: +# process each line(filtered) in the assembly file to replace PPC instruction +# to supported PPE instruction(s) +# +# Arguments: +# string: line - assembly file line to be replaced +# ppc_op - detected PPC opcode that needs to be replaced +# Return: +# boolean: True - Return without Error +# False - Error Detected +# Variables: +# string: inst, rule, ppe_op, newline, temp_op +# double_inst, single_inst, virtual_reg, base_offset, address_reg +# Subroutine: +# NONE +# ----------------------------------------------------------------------------- +def p2p_replace(line, ppc_op): + + # parse PPC instruction as in I or D form with opcode and upto 3 operands: + # possible forms: opcode + # opcode RT, RA, RB + # opcode RT, RA, IM + # opcode RT, D(RA) + # inst.group(0) : + # inst.group(1) : " " + # inst.group(2) : Opcode(.) + # inst.group(3) : " " + # inst.group(4) : GPR + # inst.group(5) : " , " + # inst.group(6) : GPR or Immediate(D) + # inst.group(7) : " , " or " ( " + # inst.group(8) : GPR or Immediate(IM) + # inst.group(9) : " ) " + inst = re.search(r"([\s]*)([a-zA-Z\.]+)([\s]*)([%r0-9]*)([\s,]*)([%r0-9\-]*)([\s,\(]*)([%r0-9\-]*)([\s\)]*)", line) + + # detect an error + if inst is None or ppc_op != inst.group(2): + return False + + # look up rule to process the instruction + rule, ppe_op = ReplaceRules[ppc_op] + + # if enabled, put a mark in the output file + if P2P_COMMENT: print "#P2P(%s):" % rule + line, + + # start cases of replacing PPC instruction with PPE instruction(s) + #---r------------------------------------------------------------------------ + if 'r' in rule: + + # replace opcode under rule 'r' and rewrite the instruction + newline = line.replace(ppc_op, ppe_op) + print newline, + + # do not continue if there is 'a' or 'u' rule to process on this line + if 'u' not in rule and 'a' not in rule: + return True + + #---u------------------------------------------------------------------------ + if 'u' in rule: + + # construct and write "add RA, RA, RB" under rule 'u' + newline = inst.group(1) + 'add' + inst.group(3) + inst.group(6) +\ + inst.group(5) + inst.group(6) + inst.group(7) + inst.group(8) + print newline + + # do not continue if there is 'a' rule to process on this line + if 'a' not in rule: + return True + + #---a------------------------------------------------------------------------ + if 'a' in rule: + + # construct and write "extsh RT, RT" under rule 'a' + newline = inst.group(1) + 'extsh' + inst.group(3) + inst.group(4) +\ + inst.group(5) + inst.group(4) + print newline + return True + + #---h------------------------------------------------------------------------ + if 'h' in rule: + + # construct and write "srwi RA, 16" under rule 'h' + newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(6) +\ + inst.group(5) + "16" + print newline + + # construct and write "srwi RB, 16" under rule 'h' + newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(8) +\ + inst.group(5) + "16" + print newline + + # replace opcode in original instruction and write under rule 'h' + newline = line.replace(ppc_op, ppe_op) + print newline + return True + + #---s------------------------------------------------------------------------ + if 's' in rule: + + # construct branch target label + ppe_op = P2P_PPE_PRE + ppe_op + + # construct and write "stwu R1, -24(R1)" to create the stack frame + newline = inst.group(1) + 'stwu' + inst.group(3) + '1' +\ + inst.group(5) + '-24(1)' + print newline + + # construct and write "stvd D3, 8(R1)" to save off R3 and R4 + newline = inst.group(1) + 'stvd' + inst.group(3) + '3' +\ + inst.group(5) + '8(1)' + print newline + + # construct and write "mflr R3" to fetch the current link address + newline = inst.group(1) + 'mflr' + inst.group(3) + '3' + print newline + + # construct and write "stw R3, 16(R1)" to save off current LR to stack + newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\ + inst.group(5) + '16(1)' + print newline + + # construct and write "mr R3, RA" to copy the operand RA to R3 + newline = inst.group(1) + 'mr' + inst.group(3) + '3' +\ + inst.group(5) + inst.group(6) + print newline + + # if 'mulli' is detected, using 'li' instead of 'mr' for second operand + if ppc_op == 'mulli': + temp_op = 'li' + else: + temp_op = 'mr' + + # construct and write "mr R4, RB" to copy the operand RB to R4 + # or in 'mulli' case, "li R4, IM" to copy the operand IM to R4 + newline = inst.group(1) + temp_op + inst.group(3) + '4' +\ + inst.group(5) + inst.group(8) + print newline + + # using branch and link(bl) to branch to subroutine + # later subroutine can branch back using branch link register(blr) + # Assumption: the subroutine will be responsible for saving + # and restoring all the volatilo registers used in the subroutine + newline = inst.group(1) + 'bl' + inst.group(3) + ppe_op + print newline + + # construct and write "mr RT, R3" to copy the result in R3 to RT + newline = inst.group(1) + 'mr' + inst.group(3) + inst.group(4) +\ + inst.group(5) + '3' + print newline + + # construct and write "lwz R3, 16(R1)" to fetch the LR value from stack + newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\ + inst.group(5) + '16(1)' + print newline + + # construct and write "mtlr R3" to restore the link register + newline = inst.group(1) + 'mtlr' + inst.group(3) + '3' + print newline + + # construct and write "lvd D3, 8(R1)" to restore R3 and R4 + newline = inst.group(1) + 'lvd' + inst.group(3) + '3' +\ + inst.group(5) + '8(1)' + print newline + + # construct and write "lwz R1, 0(R1)" to destroy the stack frame + newline = inst.group(1) + 'lwz' + inst.group(3) + '1' +\ + inst.group(5) + '0(1)' + print newline + return True + + #---m------------------------------------------------------------------------ + if 'm' in rule: + + # parse instruction information + # note register can be in either "N" form or "%rN" form + double_inst,single_inst = ppe_op.split(',') + virtual_reg = int(re.search(r'\d+', inst.group(4)).group()) + base_offset = int(inst.group(6)) + address_reg = int(re.search(r'\d+', inst.group(8)).group()) + + # consider illegal if multiple word instruction covers non-exist registers + if virtual_reg < 28: + return False + + # loop until and include GPR31 + while virtual_reg < 32: + # page 270 of 405 manual, only do this for load instructions + if virtual_reg == address_reg != 31 and 'l' in single_inst: + base_offset += 4 + virtual_reg += 1 + continue + + # if other GPRs being address_reg there is no guarantee for alignment + if address_reg not in [1,2,13]: + # construct and write "lwz/stw RT, D(RA)" for every registers + newline = inst.group(1) + single_inst + inst.group(3) +\ + str(virtual_reg) + inst.group(5) + str(base_offset) +\ + inst.group(7) + inst.group(8) + inst.group(9) + print newline + base_offset += 4 + virtual_reg += 1 + else: + # if base_offset is also aligned with base address in the address_reg + # & there are at least two more registers to perform doubleword ld/st + if not (base_offset % 8) and (virtual_reg + 1) < 32: + # construct and write "lvd/stvd DR, D(RA)" under rule 'v' + newline = inst.group(1) + double_inst + inst.group(3) +\ + str(virtual_reg) + inst.group(5) + str(base_offset) +\ + inst.group(7) + inst.group(8) + inst.group(9) + print newline + base_offset += 8 + virtual_reg += 2 + # either only one register left or base_offset isnt aligned + else: + # construct and write "lwz/stwz SR, D(RA)" under rule 'v' + newline = inst.group(1) + single_inst + inst.group(3) +\ + str(virtual_reg) + inst.group(5) + str(base_offset) +\ + inst.group(7) + inst.group(8) + inst.group(9) + print newline + base_offset += 4 + virtual_reg += 1 + # end of this if-else + # end of while loop + return True + # end of last if + + +# ----------------------------------------------------------------------------- +# p2p_combine: +# process each two lines(filtered) in the assembly file to combine two PPC +# instructions to one PPE specific instruction for better performance +# +# Arguments: +# string: first_line - 1st assembly file line to be combined +# second_line - 2nd assembly file line to be combined +# first_op - 1st detected PPC opcode that needs to be combined +# second_op - 2nd detected PPC opcode that needs to be combined +# Return: +# boolean: done - True: return without error +# - False: return with error detected +# match - True: eventually matched and combined +# - False: fail to qualify to be combined +# Variables: +# string: first_inst, second_inst, rule, ppe_op, newline +# bo, px_bix, compare_operands, target +# Subroutine: +# NONE +# ----------------------------------------------------------------------------- +def p2p_combine(first_line, second_line, first_op, second_op): + + global P2P_SPACE; global P2P_CYCLE + global P2P_COMPARE_BRANCH; global P2P_VIRTUAL_DOUBLE + + # parse PPC instruction as in I or B or D form with opcode and upto 3 operands + # possible form : [1st] opcode [CR,] RA, RB + # [1st] opcode [CR,] RA, IM + # [1st] opcode RT, D(RA) + # [2nd] opcode [CR,] Target + # [2nd] opcode BO, BI, Target + # [2nd] opcode RT, D(RA) + # inst.group(0) : + # inst.group(1) : " " + # inst.group(2) : Opcode(+/-/.) + # inst.group(3) : " " + # inst.group(4) : GPR or CR or BO or Target + # inst.group(5) : " , " + # inst.group(6) : GPR or IM or D(label+offset@sda21) or BI or Target + # inst.group(7) : " , " or " ( " + # inst.group(8) : GPR or IM or Target + # inst.group(9) : " ) " + first_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", first_line) + second_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", second_line) + + # detect an error + if first_inst is None or second_inst is None or \ + first_op != first_inst.group(2) or second_op not in second_inst.group(2): + return False,False + + # look up rule to process the instruction + rule, ppe_op = CombineRules[first_op] + + # start cases of combining two PPC instructions into PPE instruction + #---f------------------------------------------------------------------------ + if 'f' in rule: + + if not P2P_COMPARE_BRANCH: + return True,False + + # fusing compare and branch + ppe_op = ppe_op + second_op + + # for cmpwib* case, there is a difference between + # cmpwi SI operand as signed 16-bit integer and then got sign extended and + # cmpwib* UIX operand as unsigned 5-bit integer and then got zero extended + # thus, will not fuse the two if the integer operand is not in range(0,31) + # if cr field is omitted: + if ',' in first_inst.group(7): + # cr field must be cr0 or 0, error out if it is something else: + if '0' not in first_inst.group(4): + return False, True + if 'i' in first_op and (int(first_inst.group(8)) < 0 or \ + int(first_inst.group(8)) > 31): + return True,False + else: + compare_operands = first_inst.group(6) + first_inst.group(7) + \ + first_inst.group(8) + ', ' + else: + if 'i' in first_op and (int(first_inst.group(6)) < 0 or \ + int(first_inst.group(6)) > 31): + return True,False + else: + compare_operands = first_inst.group(4) + first_inst.group(5) + \ + first_inst.group(6) + ', ' + + # if 'bc' then extract BO,BI fields and convert to PX,BIX fields of 'cmp*b*' + # Note CTR decreament and branch always cases are not handled, and + # python bin() outputs "0bXXXXX" form so bo[2] is actually BO bit 0 + # else there is no need for PX,BIX fields for extended mnemonics + if 'bc' in second_op: + bo = bin(int(second_inst.group(4))) + + # do not handle CRT decreament or branch always cases + if bo[4] == 0 or bo[2] == 1: + return True,False + + # PX = BO bit 1, BIX = BI = [0,1,2,3] due to only CR0 is used + px_bix = bo[3] + second_inst.group(5) + \ + second_inst.group(6) + second_inst.group(7) + target = second_inst.group(8) + else: + px_bix = "" + # if cr field is omitted: + if ',' in second_inst.group(5): + # cr field must be cr0 or 0, error out if it is something else: + if '0' not in second_inst.group(4): + return False, True + target = second_inst.group(6) + else: + target = second_inst.group(4) + + # profile: space--, cycle is the same because 1+2==3 + P2P_SPACE -= 1 + + # if enabled, put a mark in the output file + if P2P_COMMENT: + print "#P2P(%s):" % rule + first_line, + print "#P2P(%s):" % rule + second_line, + + # construct and write "cmp*b* [PX,BIX,] RA,RB/IM,target" under 'f' rule + newline = first_inst.group(1) + ppe_op + first_inst.group(3) +\ + px_bix + compare_operands + target + print newline + return True,True + + + #---v------------------------------------------------------------------------ + if 'v' in rule: + + if not P2P_VIRTUAL_DOUBLE: + return True,False + + global P2P_VDW_SDA + + # Combinable Conditions: + # 1) base address registers must be the same and one of R1/R2/R13 + # 2) address offsets have to be 8-bytes continuous and aligned + # 3) target or source registers must qualify to be double word register + # Note: label+offset@sda21 format is coverted to target r13 after link + # assume data go in and out r13 or SDA space is always 8-byte aligned + # here we only check the continous of address offset and register pair + if first_inst.group(8) == second_inst.group(8) in ['1','2','13'] or \ + ("@sda21" in first_inst.group(6) and \ + "@sda21" in second_inst.group(6) and \ + P2P_VDW_SDA): + + if ((first_inst.group(6).replace("@sda21","") + "+4" == \ + second_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ + ((first_inst.group(6).isdigit() and \ + not int(first_inst.group(6)) % 8) and \ + int(first_inst.group(6)) + 4 == int(second_inst.group(6)))) and \ + (int(first_inst.group(4)) + 1 == int(second_inst.group(4)) or \ + (int(first_inst.group(4)) == 31 and \ + int(second_inst.group(4)) == 0)): + newline = first_line.replace(first_op, ppe_op) + elif ((second_inst.group(6).replace("@sda21","") + "+4" == \ + first_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ + ((second_inst.group(6).isdigit() and \ + not int(second_inst.group(6)) % 8) and \ + int(second_inst.group(6)) + 4 == int(first_inst.group(6)))) and \ + (int(second_inst.group(4)) + 1 == int(first_inst.group(4)) or \ + (int(second_inst.group(4)) == 31 and \ + int(first_inst.group(4)) == 0)): + newline = second_line.replace(second_op, ppe_op) + else: + return True,False + + # profile: space--, cycle--(same delay but 1 less from issue) + P2P_SPACE -= 1; P2P_CYCLE -= 1 + + # if enabled, put a mark in the output file + if P2P_COMMENT: + print "#P2P(%s):" % rule + first_line, + print "#P2P(%s):" % rule + second_line, + + print newline, + return True,True + else: + return True,False + + +# ----------------------------------------------------------------------------- +# p2p_onefile: +# process single PPC assembly file to convert it into PPE assembly file +# also filter out non-instruction lines before calling the subroutine +# +# Arguments: +# string: ppcFileName +# Return: +# boolean: done - True if file processing completed without error +# - False if file processing failed due to an error +# Variables: +# boolean: match, done +# string: ppeFileName, line, ppc_op, pre_op, pre_line, section, label +# integer: line_num, first_label_ln, second_label_ln, misalign +# Subroutine: +# p2p_combine +# p2p_replace +# ----------------------------------------------------------------------------- +def p2p_onefile(ppcFileName): + + global P2P_SPACE; P2P_SPACE = 0 # profile count + global P2P_CYCLE; P2P_CYCLE = 0 # profile count + + if P2P_VERBOSE : print "Translate PPC assembly: " + ppcFileName + + # new PPE assembly file is renamed as .s + ppeFileName = ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT) + os.rename(ppcFileName, ppeFileName) + + # initialize storage variables for previous line that needs to be remembered + pre_line = "" + pre_op = "" + + # use inline file editing, back up original PPC assembly file as .S + for line in fileinput.input(ppeFileName, inplace=1, backup='.405'): + + # in case of "mtmsr 0; isync" + line = line.replace('isync','nop') + + # skip blank line + if not line.strip(): + if pre_line: + print pre_line, + pre_line = "" + print line, + continue + + # skip comments line + if re.search("^[\s]*(//|#)", line): + if pre_line: + print pre_line, + pre_line = "" + print line, + continue + + # skip .section code except .p2align and label: + section = re.search("^[\s]*(\.[0-9a-zA-Z_]+)", line) + if section is not None and ':' not in line and \ + section.group(1) != '.p2align': + if pre_line: + print pre_line, + pre_line = "" + print line, + continue + + # apply specical 'l' rule in CombineRules for '.p2align' before a 'label:' + label = re.search("^[\s]*[\.0-9a-zA-Z_]+[:]+", line) + if label is not None: + if pre_line and pre_op == '.p2align': + second_label_ln = fileinput.lineno() + misalign = 8 - (second_label_ln - first_label_ln - 2) % 8 + if misalign in [3,4,5,6,7]: + # profile: same space, but save cycles, branch penalty is 2 + P2P_CYCLE -= misalign - 2 + if P2P_COMMENT: print "#P2P(l):" + print '\tb ' + label.group(0).split(':')[0] + print pre_line, + pre_line = "" + first_label_ln = fileinput.lineno() + if pre_line: + print pre_line, + pre_line = "" + print line, + continue + + # extract opcode field from line + ppc_op = line.split()[0] + done,match = False,False + + # detect the 2nd possible combinable instruction + if pre_line and P2P_COMBINE: + # ignore +/- signs for branch prediction + if '+' in ppc_op or '-' in ppc_op: + ppc_op = ppc_op[:-1] + if 'cmp' in pre_op and ppc_op in FuseBranches or \ + 'cmp' not in pre_op and ppc_op == pre_op: + done,match = p2p_combine(pre_line, line, pre_op, ppc_op) + if not match: + print pre_line, + else: + print pre_line, + done,match = True,False + pre_line = "" + + # detect the 1st possible combinable instruction + if not pre_line and not match and P2P_COMBINE: + if ppc_op in CombineRules.keys(): + pre_op = ppc_op + pre_line = line + done,match = True,True + else: + done,match = True,False + + # defect possible replacable instruction + if not match: + if ppc_op in ReplaceRules.keys() and P2P_REPLACE: + done = p2p_replace(line, ppc_op) + else: + print line, + done = True + + # if instruction process is not done due to error + if not done: + line_num = fileinput.lineno() + break + + # close the output file and restore the original input file + fileinput.close() + os.rename(ppeFileName+'.405', ppcFileName) + + # in case last line of the file qualified to be a pre_line and was not printed + if pre_line: + f = open(ppeFileName, 'a') + f.write(pre_line) + f.close() + + # print error debug message + if not done: + print "Error: target instruction detected at line [%d]:" % line_num + print " " + line + print " but fail to recognize instruction format." + # terminate Makefile or execution if an error is detected + sys.exit(1) + + if P2P_COMMENT: + f = open(ppeFileName, 'a') + f.write("#P2P: space(%d) cycle(%d)" % (P2P_SPACE,P2P_CYCLE)) + f.close() + + if P2P_VERBOSE : + print "Generated PPE assembly: " + ppeFileName + print "Optimization Profiling: " + str(P2P_SPACE*4) + " bytes, " +\ + str(P2P_CYCLE) + " cycles." + + +# ----------------------------------------------------------------------------- +# p2p_profile +# profiling how much performance and code size are saved by optimization +# +# Arguments: +# string: ppcFileName +# Return: +# list: [space, cycle] +# Variables: +# string: line, profile +# Subroutine: +# None +# ----------------------------------------------------------------------------- +def p2p_profile(ppcFileName): + f = open(ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT), 'r') + for line in f: + pass + f.close() + profile = re.search(r"^\#P2P: space\(([0-9\-]+)\) cycle\(([0-9\-]+)\)", line) + if profile is not None: + return [int(profile.group(1)), int(profile.group(2))] + else: + return [0,0] + +# ----------------------------------------------------------------------------- +# p2p_main: +# main of this script +# print usage info +# parse options and arguments +# process one file or a directory of files +# ----------------------------------------------------------------------------- +def p2p_main(): + + # command-line option parsing + from optparse import OptionParser + usage = "usage: %prog [options]" + version= "%prog v." + P2P_VERSION + parser = OptionParser(usage=usage, version=version) + parser.add_option("-d", "--directory", metavar="PATH", dest="ppcPath", + help="process all files in a directory given by PATH") + parser.add_option("-f", "--filename", metavar="FILE", dest="ppcFile", + help="process single file(with path in the filename)") + parser.add_option("-p", "--parallel", + action="store_true", dest="parallel", default=False, + help="processing all files in parallel processes") + parser.add_option("-s", "--statistics", + action="store_true", dest="profile", default=False, + help="optimization profiling, require comment in outputs") + parser.add_option("-c", "--combine-only", + action="store_false", dest="replace", default=True, + help="enable only combine function by disabling replace") + parser.add_option("-r", "--replace-only", + action="store_false", dest="combine", default=True, + help="enable only replace function by disabling combine") + parser.add_option("-b", "--compare branch disable", + action="store_false", dest="compare_branch", default=True, + help="only disabling fused compare branch function") + parser.add_option("-v", "--virtual double disable", + action="store_false", dest="virtual_double", default=True, + help="only disabling fused virtual double function") + parser.add_option("-e", "--eabi", + action="store_true", dest="vdw_sda", default=False, + help="enable virtual double word fusion targeting sda") + parser.add_option("-n", "--no-comment", + action="store_false", dest="comment", default=True, + help="don't leave comment mark in output file") + parser.add_option("-q", "--quiet", + action="store_false", dest="verbose", default=True, + help="don't print status messages to stdout") + (options, args) = parser.parse_args() + # global program output verbose switch + global P2P_VERBOSE; P2P_VERBOSE = options.verbose + # leave a comment mark in output files + global P2P_COMMENT; P2P_COMMENT = options.comment + # enable instruction replace functions + global P2P_REPLACE; P2P_REPLACE = options.replace + # enable instruction combine functions + global P2P_COMBINE; P2P_COMBINE = options.combine + # enable virtual double word fusion targeting sda + global P2P_VDW_SDA; P2P_VDW_SDA = options.vdw_sda + # enable only fused compare and branch function + global P2P_COMPARE_BRANCH; P2P_COMPARE_BRANCH = options.compare_branch + # enable only combined virtual double function + global P2P_VIRTUAL_DOUBLE; P2P_VIRTUAL_DOUBLE = options.virtual_double + + if P2P_VERBOSE : + print "PPC405 Assembly to PPE42 Assembly Post-Compiler Proceesor (P2P)" + print "Version: " + P2P_VERSION + + # single file processing + if options.ppcFile: + + if P2P_VERBOSE : + print "Processing signle file: " + options.ppcFile + + p2p_onefile(options.ppcFile) + + # multiple files processing + if options.ppcPath: + + if P2P_VERBOSE : + print "Accessing all files at: " + options.ppcPath + print "*Parallel Process Mode: " + ("Off", "On")[options.parallel] + + if options.profile: + bytes = 0; cycles = 0 + + fileList = [] + for root, subdirs, files in os.walk(options.ppcPath): + for file in fnmatch.filter(files, '*'+P2P_PPC_EXT): + if options.parallel : + fileList.append(os.path.join(root, file)) + else: + if options.profile: + space,cycle = p2p_profile(os.path.join(root, file)) + bytes += space*4 + cycles += cycle + else: + p2p_onefile(os.path.join(root, file)) + + if options.profile: + print "Optimization Profiling: " + str(bytes) + " bytes, " +\ + str(cycles) + " cycles." + + # parallel processing mode + if options.parallel: + from multiprocessing import Pool + p = Pool() + p.map(p2p_onefile, fileList) + p.close() + p.join() + + if P2P_VERBOSE : print "Done" + + +# ----------------------------------------------------------------------------- +# python main +if __name__ == '__main__': + p2p_main() + -- cgit v1.2.1