summaryrefslogtreecommitdiffstats
path: root/tools/PowerPCtoPPE/ppc-ppe-pcp.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/PowerPCtoPPE/ppc-ppe-pcp.py')
-rwxr-xr-xtools/PowerPCtoPPE/ppc-ppe-pcp.py998
1 files changed, 0 insertions, 998 deletions
diff --git a/tools/PowerPCtoPPE/ppc-ppe-pcp.py b/tools/PowerPCtoPPE/ppc-ppe-pcp.py
deleted file mode 100755
index 7dd427d6..00000000
--- a/tools/PowerPCtoPPE/ppc-ppe-pcp.py
+++ /dev/null
@@ -1,998 +0,0 @@
-#!/usr/bin/python2.6
-
-# \file ppc-ppe-pcp.py
-# \brief PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P)
-#
-# ---------------------------------------------------------------
-# Revision History
-# ---------------------------------------------------------------
-# 10-07-2014: project completed
-# daviddu added optimization profile support
-#
-# 10-06-2014: added fused compare and branch supprot
-# daviddu added support for combining two ld/st into one double word
-# added support to insert branch upon .p2align directive
-#
-# 09-27-2014: added subroutine support for mul* and div*
-# daviddu added virtual double word replacing multiple word support
-#
-# 09-13-2014: initial version
-# daviddu only instruction inline replacement is supported
-# ---------------------------------------------------------------
-
-P2P_VERSION = "10-07-2014" # version number as last modified date
-P2P_PPC_EXT = '.s' # PPC Assembly filename extension
-P2P_PPE_EXT = '.es' # PPE Assembly filename extension
-P2P_PPE_PRE = '__ppe42_' # PPE Assembly subroutine prefix
-
-import sys
-import os
-import re
-import fnmatch
-import fileinput
-
-# ---------------------------------------------------------------
-# PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P)
-# ---------------------------------------------------------------
-#
-# Description:
-#
-# This post-compiler processor will take PPC405 assembly file(s) produced
-# by powerpc-linux-gcc or hand coded and replace some of the instructions
-# supported by PPC405 ISA but not PPE42 ISA with a set of instructions
-# supported by PPE42 ISA. Outcome of this program is PPE42 assembly file(s).
-#
-# Assumptions:
-#
-# - Input/Output File Name Extension:
-#
-# PPC405 assembly file generated by powerpc-linux-gcc has filename extension
-# defined by "P2P_PPC_EXT" global variable, while PPE42 assembly file
-# consumed by PPE Assembler has filename extension defined by "P2P_PPE_EXT"
-# global variable. Both should be consistant with Makefile rules.
-#
-# - Registers:
-#
-# Instructions in input file should only use registers supported by PPE,
-# that is R0-R10,R13,R28-R31 for GPRs and PPE only SPRs(for example, PPE
-# only has CR0 instead of CR0-7).
-#
-# GCC flag -ffixed can be used to enforce compiler to not use certain
-# registers if compiler generates input files to this script. Note certian
-# optimization level, such as -Os, of GGC will still use certain registers
-# regardless if -ffixed flag is used. Furthermore, compiler should not
-# generate multiple word instructions(lmw/stmw) that covers the registers
-# forbidden to use by -ffixed flag.
-#
-# Example of using -ffixed flag in this case:
-# -ffixed-r11 -ffixed-r12 -ffixed-r14 -ffixed-r15 \
-# -ffixed-r16 -ffixed-r17 -ffixed-r18 -ffixed-r19 \
-# -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
-# -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
-# -ffixed-cr1 -ffixed-cr2 -ffixed-cr3 -ffixed-cr4 \
-# -ffixed-cr5 -ffixed-cr6 -ffixed-cr7
-#
-# - Instructions:
-#
-# Instructions in input file should only use PowerPC 405 instructions
-# covered by "PowerPC 405-S Embedded Processor Core" manual; however,
-# there is an assumption on certain catalog of instructions will never be
-# generated by power-linux-gcc compiler(or disabled by compiler switch).
-#
-# Also, compiler should generate extended mnemonics instead of its base
-# instruction when extended mnemonics fits.
-#
-# Via -falign-labels=n and -O2 or -O3, the compiler inserts .p2align
-# directive to help instruction alignment for best cache performance.
-#
-# - Assembly Syntax:
-#
-# There should be only white spaces before instruction mnemonics, in
-# another word, all inline comments should be put behind the instrution.
-#
-# "Label:" and an instruction should not be on the same line, hand coded
-# assembly should be consistant to this same compiler output format.
-#
-# Depandences:
-#
-# In order to utilize assembly subroutines implemented for supporting
-# missing instructions of multiplication and division in PPE42 ISA, a given
-# library(with assembly files and header) must be compiled and linked with
-# any source code that use this program to generate PPE binary.
-#
-# Usage:
-#
-# ./<ThisScript> -f <a filename with path> --- process single file
-# ./<ThisScript> -d <a directory path> --- process multiple files
-# ./<ThisScript> -h --- detailed usage on other flags
-# ./<ThisScript> -v --- version of the program
-# ./<ThisScript> -d <a directory path> -s --- perform result profiling
-#
-# Functions:
-#
-# p2p_main - main function, parse options and arguments
-# p2p_onefile - processing single PPC Assembly File
-# p2p_combine - processing two PPC instructions in input file
-# p2p_replace - processing single PPC instruction in input file
-#
-# Data Structures:
-#
-# ReplaceRules = { ppc_op : [rule, ppe_op] }
-# CombineRules = { ppc_op : [rule, ppe_op] }
-# FuseBranches = [ list of branches qualified for fusing with compares ]
-#
-#------------------------------------------------------------------------------
-# ReplaceRules: [ 'r', 'u', 'a', 'h', 's', 'o', 'd', 'm' ]
-#-------|-------------------------|--------------------------------------------
-# Rule | Example (PPC to PPE) | Description
-#-------|-------------------------|--------------------------------------------
-# 'r' | [ppc] ppc_op RT, RA, RB | simply 'replace' ppc opcode with ppe opcode
-# 0 0 | [ppe] ppe_op RT, RA, RB | while the operands, if any, stay the same
-#-------|-------------------------|--------------------------------------------
-# 'ru' | [ppc] ppc_op RT, RA, RB | on top of 'r' rule, emulate 'update' by
-# | [ppe] ppe_op RT, RA, RB | appending "add" instruction after the
-# +1 +1 | add RA, RA, RB | original instruction to update RA
-#-------|-------------------------|--------------------------------------------
-# 'ra' | [ppc] ppc_op RT, D(RA) | on top of 'r' rule, emulate 'algebraic' by
-# | [ppe] ppe_op RT, D(RA) | appending "extsh" instruction after the
-# +1 +1 | extsh RT, RT | original instruction to sign-extend RT
-#-------|-------------------------|--------------------------------------------
-# 'rau' | [ppc] ppc_op RT, RA, RB | combining rule 'r', 'a', and 'u' above.
-# | [ppe] ppe_op RT, RA, RB | Note: there is no requirement on whether
-# | add RA, RA, RB | rule 'a' or rule 'u' should be applied
-# +2 +2 | extsh RT, RT | first, the outcome should be the same.
-#-------|-------------------------|--------------------------------------------
-# 'h' | [ppc] ppc_op RT, RA, RB | rule of 'halfword' multiplication
-# | [ppe] srwi RA, 16 | emulate multiply "high halfword" with
-# | srwi RB, 16 | multiply "low halfword" by shifting
-# +2 +2 | ppe_op RT, RA, RB | the operands first
-#-------|-------------------------|--------------------------------------------
-# 's' | [ppc] ppc_op RT, RA, RB | emulate word multiply and divide
-# | [ppe] stwu R1, -24(R1)| instructions with calling 'subroutines'
-# | stvd D3, 8(R1) | implemented in ppe42_mul.S and ppe42_div.S
-# | mflr R3 |
-# | stw R3, 16(R1) | Calling Conventions:(SS = Stack Size)
-# | mr R3, RA |
-# | mr R4, RB | Caller is responsible for
-# | bl target | 1) create stack frame
-# | mr RT, R3 | 2) save off R3 and R4 to stack
-# | lwz R3, 16(R1) | 3) save off link register to stack
-# | mtlr R3 | 4) put operands into R3, R4 before branch
-# | lvd D3, 8(R1) | 5) put result in R3 to RT after branch
-# | lwz R1, 0(R1) | 6) restore link register from stack
-# | | 7) restore R3 and R4 from stack
-# | [sub] stwu R1, -SS(R1) | 8) remove the stack frame
-# | <save volatile> |
-# | (subroutine body) | Callee is responsible for
-# | <restore volatile>| 1) create and remove stack frame
-# | lwz R1, 0(R1) | 2) save and restore volatile registers
-# +X +Y | blr | 3) subroutine will not touch LR again
-#-------|-------------------------|--------------------------------------------
-# 'o' | [ppc] ppc_op[o] RT ... | rule of 'o' form for overflow
-# | [ppe] ppe_op RT ... | Note: "mullwo", "divwo" and "divwuo" each
-# | <inst specific> | has unique setting for XER[OV,SO] if OE = 1
-#-------|-------------------------|--------------------------------------------
-# 'd' | [ppc] ppc_op[.] RT ... | rule of '.' or 'dot' form for recording
-# | [ppe] ppe_op RT ... | using "cmpwli" to emulate the [.] form
-# | cmpwli RT, 0 | to the instruction result and CR0 fields
-#-------|-------------------------|--------------------------------------------
-# 'm' | [ppc] ppc_op RT, D(RA) | emulate PowerPC load/store multiple word
-# | [ppe] ppe_op DT, D(RA) | instructions with PPE specific
-# | (doubleword ld/st)| 'virtual doubleword' instructions if target
-# | or | address is 8-byte aligned; otherwise, using
-# | (singleword ld/st)| single word instructions instead or mix both
-# | or | Note only RA == R1/R2/R13 will always meet
-# -1 -1 | (single & double) | alignment requirement of virtual doubleword
-#-------|-------------------------|--------------------------------------------
-#
-ReplaceRules = {#ppc_op : [ rule | ppe_op ]
- #----------------------------
- #synchronization instructions
- 'eieio' : [ 'r', 'sync' ],
- 'isync' : [ 'r', 'nop' ],
- 'icbi' : [ 'r', 'nop' ],
- 'icbt' : [ 'r', 'nop' ],
- 'mtcr' : [ 'r', 'mtcr0'],
- #load/store with [u/x/a] form
- 'stbux' : [ 'ru', 'stbx' ],
- 'sthux' : [ 'ru', 'sthx' ],
- 'stwux' : [ 'ru', 'stwx' ],
- 'lbzux' : [ 'ru', 'lbzx' ],
- 'lhzux' : [ 'ru', 'lhzx' ],
- 'lwzux' : [ 'ru', 'lwzx' ],
- 'lha' : [ 'ra', 'lhz' ],
- 'lhau' : [ 'ra', 'lhzu' ],
- 'lhax' : [ 'ra', 'lhzx' ],
- 'lhaux' : [ 'rau', 'lhzx' ],
- #multiply/divide with [./o] form
- 'mulhhw' : [ 'h', 'mullhw' ],
- 'mulhhw.' : [ 'h', 'mullhw.' ],
- 'mulhhwu' : [ 'h', 'mullhwu' ],
- 'mulhhwu.': [ 'h', 'mullhwu.' ],
- 'mulhw' : [ 's', 'mulhw' ],
- 'mulhw.' : [ 'sd', 'mulhw' ],
- 'mulhwu' : [ 's', 'mulhwu' ],
- 'mulhwu.' : [ 'sd', 'mulhwu' ],
- 'mullw' : [ 's', 'mullw' ],
- 'mullw.' : [ 'sd', 'mullw' ],
- 'mullwo' : [ 'so', 'mullw' ],
- 'mullwo.' : [ 'sod', 'mullw' ],
- 'mulli' : [ 's', 'mullw' ],
- 'divw' : [ 's', 'divw' ],
- 'divw.' : [ 'sd', 'divw' ],
- 'divwo' : [ 'so', 'divw' ],
- 'divwo.' : [ 'sod', 'divw' ],
- 'divwu' : [ 's', 'divwu' ],
- 'divwu.' : [ 'sd', 'divwu' ],
- 'divwuo' : [ 'so', 'divwu' ],
- 'divwuo.' : [ 'sod', 'divwu' ],
- #load/store multiple word(Rx-R31)
- 'lmw' : [ 'm', 'lvd,lwz' ],
- 'stmw' : [ 'm', 'stvd,stw' ]}
-
-
-#------------------------------------------------------------------------------
-# CombineRules: [ 'f', 'v', 'l' ]
-#-------|-------------------------|--------------------------------------------
-# 'f' | [ppc] ppc_op(cmp*) | rule for 'fusing' adjacent pair of compare
-# | ppc_op(b*) | and branch(PPE specific). Note: only
-# -1 0 | [ppe] ppe_op(cmp*b*) | extended mnemonics of compares are handled
-#-------|-------------------------|--------------------------------------------
-# 'v' | [ppc] ppc_op(lwz/stw) | rule for combining double word aligned
-# | ppc_op(lwz/stw) | load/store pairs into signle 'virtual'
-# -1 -1 | [ppe] ppe_op(lvd/stvd) | double word instructions(PPE specific)
-#-------|-------------------------|--------------------------------------------
-# 'l' | [ppc] .p2align | compiler will insert ".p2align" directive to
-# | Label: | help instructions align from label to label.
-# | [ppe] b Label | then assembler will insert "nop" on .p2align
-# | .p2align | directive. a "branch" to skip the nops will
-# 0 -1 | Label: | improve the performance while still aligned
-#-------|-------------------------|--------------------------------------------
-#
-CombineRules = {#ppc_op : [ rule | ppe_cp ]
- #--------------------------
- #8byte aligned loads/stores
- 'lwz' : [ 'v', 'lvd' ],
- 'stw' : [ 'v', 'stvd' ],
- #compares fusable to branch
- 'cmplw' : [ 'f', 'cmplw' ],
- 'cmpw' : [ 'f', 'cmpw' ],
- 'cmpwi' : [ 'f', 'cmpwi' ],
- #'.p2align' before 'label:'
- '.p2align' : [ 'l', 'b' ]}
-
-
-#------------------------------------------------------------------------------
-# FuseBranches: [ Branches can be fused into cmp*b* ]
-#------------------------------------------------------------------------------
-#
-FuseBranches = ['bc', 'bcl',
- 'blt', 'bltl', 'ble', 'blel',
- 'bgt', 'bgtl', 'bge', 'bgel',
- 'beq', 'beql', 'bne', 'bnel']
-
-
-# -----------------------------------------------------------------------------
-# p2p_replace:
-# process each line(filtered) in the assembly file to replace PPC instruction
-# to supported PPE instruction(s)
-#
-# Arguments:
-# string: line - assembly file line to be replaced
-# ppc_op - detected PPC opcode that needs to be replaced
-# Return:
-# boolean: True - Return without Error
-# False - Error Detected
-# Variables:
-# string: inst, rule, ppe_op, newline, temp_op
-# double_inst, single_inst, virtual_reg, base_offset, address_reg
-# Subroutine:
-# NONE
-# -----------------------------------------------------------------------------
-def p2p_replace(line, ppc_op):
-
- # parse PPC instruction as in I or D form with opcode and upto 3 operands:
- # possible forms: opcode
- # opcode RT, RA, RB
- # opcode RT, RA, IM
- # opcode RT, D(RA)
- # inst.group(0) : <whole instruction>
- # inst.group(1) : " "
- # inst.group(2) : Opcode(.)
- # inst.group(3) : " "
- # inst.group(4) : GPR
- # inst.group(5) : " , "
- # inst.group(6) : GPR or Immediate(D)
- # inst.group(7) : " , " or " ( "
- # inst.group(8) : GPR or Immediate(IM)
- # inst.group(9) : " ) "
- inst = re.search(r"([\s]*)([a-zA-Z\.]+)([\s]*)([%r0-9]*)([\s,]*)([%r0-9\-]*)([\s,\(]*)([%r0-9\-]*)([\s\)]*)", line)
-
- # detect an error
- if inst is None or ppc_op != inst.group(2):
- return False
-
- # look up rule to process the instruction
- rule, ppe_op = ReplaceRules[ppc_op]
-
- # if enabled, put a mark in the output file
- if P2P_COMMENT: print "#P2P(%s):" % rule + line,
-
- # start cases of replacing PPC instruction with PPE instruction(s)
- #---r------------------------------------------------------------------------
- if 'r' in rule:
-
- # replace opcode under rule 'r' and rewrite the instruction
- newline = line.replace(ppc_op, ppe_op)
- print newline,
-
- # do not continue if there is 'a' or 'u' rule to process on this line
- if 'u' not in rule and 'a' not in rule:
- return True
-
- #---u------------------------------------------------------------------------
- if 'u' in rule:
-
- # construct and write "add RA, RA, RB" under rule 'u'
- newline = inst.group(1) + 'add' + inst.group(3) + inst.group(6) +\
- inst.group(5) + inst.group(6) + inst.group(7) + inst.group(8)
- print newline
-
- # do not continue if there is 'a' rule to process on this line
- if 'a' not in rule:
- return True
-
- #---a------------------------------------------------------------------------
- if 'a' in rule:
-
- # construct and write "extsh RT, RT" under rule 'a'
- newline = inst.group(1) + 'extsh' + inst.group(3) + inst.group(4) +\
- inst.group(5) + inst.group(4)
- print newline
- return True
-
- #---h------------------------------------------------------------------------
- if 'h' in rule:
-
- # construct and write "srwi RA, 16" under rule 'h'
- newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(6) +\
- inst.group(5) + "16"
- print newline
-
- # construct and write "srwi RB, 16" under rule 'h'
- newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(8) +\
- inst.group(5) + "16"
- print newline
-
- # replace opcode in original instruction and write under rule 'h'
- newline = line.replace(ppc_op, ppe_op)
- print newline
- return True
-
- #---s------------------------------------------------------------------------
- if 's' in rule:
-
- # construct branch target label
- ppe_op = P2P_PPE_PRE + ppe_op
-
- # construct and write "stwu R1, -24(R1)" to create the stack frame
- newline = inst.group(1) + 'stwu' + inst.group(3) + '1' +\
- inst.group(5) + '-24(1)'
- print newline
-
- # construct and write "stvd D3, 8(R1)" to save off R3 and R4
- newline = inst.group(1) + 'stvd' + inst.group(3) + '3' +\
- inst.group(5) + '8(1)'
- print newline
-
- # construct and write "mflr R3" to fetch the current link address
- newline = inst.group(1) + 'mflr' + inst.group(3) + '3'
- print newline
-
- # construct and write "stw R3, 16(R1)" to save off current LR to stack
- newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\
- inst.group(5) + '16(1)'
- print newline
-
- # construct and write "mr R3, RA" to copy the operand RA to R3
- # if RA == R3 then R3 was clobbered, restore R3 from stack
- if inst.group(6) == '3':
- newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\
- inst.group(5) + '8(1)'
- print newline
- else:
- newline = inst.group(1) + 'mr' + inst.group(3) + '3' +\
- inst.group(5) + inst.group(6)
- print newline
-
- # if 'mulli' is detected, using 'li' instead of 'mr' for second operand
- if ppc_op == 'mulli':
- temp_op = 'li'
- else:
- temp_op = 'mr'
-
- # Set R4 if R4 is not already RB
- if temp_op == 'li' or inst.group(8) != '4':
- # construct and write "mr R4, RB" to copy the operand RB to R4
- # or in 'mulli' case, "li R4, IM" to copy the operand IM to R4
- newline = inst.group(1) + temp_op + inst.group(3) + '4' +\
- inst.group(5) + inst.group(8)
- print newline
-
- # using branch and link(bl) to branch to subroutine
- # later subroutine can branch back using branch link register(blr)
- # Assumption: the subroutine will be responsible for saving
- # and restoring all the volatilo registers used in the subroutine
- newline = inst.group(1) + 'bl' + inst.group(3) + ppe_op
- print newline
-
- # if RT is not already R3 then copy R3 to RT
- if inst.group(4) != '3':
- # construct and write "mr RT, R3" to copy the result in R3 to RT
- newline = inst.group(1) + 'mr' + inst.group(3) + inst.group(4) +\
- inst.group(5) + '3'
- print newline
- else:
- # save return on stack
- newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\
- inst.group(5) + '8(1)'
- print newline
-
- # construct and write "lwz R3, 16(R1)" to fetch the LR value from stack
- newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\
- inst.group(5) + '16(1)'
- print newline
-
- # construct and write "mtlr R3" to restore the link register
- newline = inst.group(1) + 'mtlr' + inst.group(3) + '3'
- print newline
-
- # construct and write "lvd D3, 8(R1)" to restore R3 and R4
- newline = inst.group(1) + 'lvd' + inst.group(3) + '3' +\
- inst.group(5) + '8(1)'
- print newline
-
- # construct and write "lwz R1, 0(R1)" to destroy the stack frame
- newline = inst.group(1) + 'lwz' + inst.group(3) + '1' +\
- inst.group(5) + '0(1)'
- print newline
- return True
-
- #---m------------------------------------------------------------------------
- if 'm' in rule:
-
- # parse instruction information
- # note register can be in either "N" form or "%rN" form
- double_inst,single_inst = ppe_op.split(',')
- virtual_reg = int(re.search(r'\d+', inst.group(4)).group())
- base_offset = int(inst.group(6))
- address_reg = int(re.search(r'\d+', inst.group(8)).group())
-
- # consider illegal if multiple word instruction covers non-exist registers
- if virtual_reg < 28:
- return False
-
- # loop until and include GPR31
- while virtual_reg < 32:
- # page 270 of 405 manual, only do this for load instructions
- if virtual_reg == address_reg != 31 and 'l' in single_inst:
- base_offset += 4
- virtual_reg += 1
- continue
-
- # if other GPRs being address_reg there is no guarantee for alignment
- if address_reg not in [1,2,13]:
- # construct and write "lwz/stw RT, D(RA)" for every registers
- newline = inst.group(1) + single_inst + inst.group(3) +\
- str(virtual_reg) + inst.group(5) + str(base_offset) +\
- inst.group(7) + inst.group(8) + inst.group(9)
- print newline
- base_offset += 4
- virtual_reg += 1
- else:
- # if base_offset is also aligned with base address in the address_reg
- # & there are at least two more registers to perform doubleword ld/st
- if not (base_offset % 8) and (virtual_reg + 1) < 32:
- # construct and write "lvd/stvd DR, D(RA)" under rule 'v'
- newline = inst.group(1) + double_inst + inst.group(3) +\
- str(virtual_reg) + inst.group(5) + str(base_offset) +\
- inst.group(7) + inst.group(8) + inst.group(9)
- print newline
- base_offset += 8
- virtual_reg += 2
- # either only one register left or base_offset isnt aligned
- else:
- # construct and write "lwz/stwz SR, D(RA)" under rule 'v'
- newline = inst.group(1) + single_inst + inst.group(3) +\
- str(virtual_reg) + inst.group(5) + str(base_offset) +\
- inst.group(7) + inst.group(8) + inst.group(9)
- print newline
- base_offset += 4
- virtual_reg += 1
- # end of this if-else
- # end of while loop
- return True
- # end of last if
-
-
-# -----------------------------------------------------------------------------
-# p2p_combine:
-# process each two lines(filtered) in the assembly file to combine two PPC
-# instructions to one PPE specific instruction for better performance
-#
-# Arguments:
-# string: first_line - 1st assembly file line to be combined
-# second_line - 2nd assembly file line to be combined
-# first_op - 1st detected PPC opcode that needs to be combined
-# second_op - 2nd detected PPC opcode that needs to be combined
-# Return:
-# boolean: done - True: return without error
-# - False: return with error detected
-# match - True: eventually matched and combined
-# - False: fail to qualify to be combined
-# Variables:
-# string: first_inst, second_inst, rule, ppe_op, newline
-# bo, px_bix, compare_operands, target
-# Subroutine:
-# NONE
-# -----------------------------------------------------------------------------
-def p2p_combine(first_line, second_line, first_op, second_op):
-
- global P2P_SPACE; global P2P_CYCLE
- global P2P_COMPARE_BRANCH; global P2P_VIRTUAL_DOUBLE
-
- # parse PPC instruction as in I or B or D form with opcode and upto 3 operands
- # possible form : [1st] opcode [CR,] RA, RB
- # [1st] opcode [CR,] RA, IM
- # [1st] opcode RT, D(RA)
- # [2nd] opcode [CR,] Target
- # [2nd] opcode BO, BI, Target
- # [2nd] opcode RT, D(RA)
- # inst.group(0) : <whole instruction>
- # inst.group(1) : " "
- # inst.group(2) : Opcode(+/-/.)
- # inst.group(3) : " "
- # inst.group(4) : GPR or CR or BO or Target
- # inst.group(5) : " , "
- # inst.group(6) : GPR or IM or D(label+offset@sda21) or BI or Target
- # inst.group(7) : " , " or " ( "
- # inst.group(8) : GPR or IM or Target
- # inst.group(9) : " ) "
- first_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", first_line)
- second_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", second_line)
-
- # detect an error
- if first_inst is None or second_inst is None or \
- first_op != first_inst.group(2) or second_op not in second_inst.group(2):
- return False,False
-
- # look up rule to process the instruction
- rule, ppe_op = CombineRules[first_op]
-
- # start cases of combining two PPC instructions into PPE instruction
- #---f------------------------------------------------------------------------
- if 'f' in rule:
-
- if not P2P_COMPARE_BRANCH:
- return True,False
-
- # fusing compare and branch
- ppe_op = ppe_op + second_op
-
- # for cmpwib* case, there is a difference between
- # cmpwi SI operand as signed 16-bit integer and then got sign extended and
- # cmpwib* UIX operand as unsigned 5-bit integer and then got zero extended
- # thus, will not fuse the two if the integer operand is not in range(0,31)
- # if cr field is omitted:
- if ',' in first_inst.group(7):
- # cr field must be cr0 or 0, error out if it is something else:
- if '0' not in first_inst.group(4):
- return False, True
- if 'i' in first_op and (int(first_inst.group(8)) < 0 or \
- int(first_inst.group(8)) > 31):
- return True,False
- else:
- compare_operands = first_inst.group(6) + first_inst.group(7) + \
- first_inst.group(8) + ', '
- else:
- if 'i' in first_op and (int(first_inst.group(6)) < 0 or \
- int(first_inst.group(6)) > 31):
- return True,False
- else:
- compare_operands = first_inst.group(4) + first_inst.group(5) + \
- first_inst.group(6) + ', '
-
- # if 'bc' then extract BO,BI fields and convert to PX,BIX fields of 'cmp*b*'
- # Note CTR decreament and branch always cases are not handled, and
- # python bin() outputs "0bXXXXX" form so bo[2] is actually BO bit 0
- # else there is no need for PX,BIX fields for extended mnemonics
- if 'bc' in second_op:
- bo = bin(int(second_inst.group(4)))
-
- # do not handle CRT decreament or branch always cases
- if bo[4] == 0 or bo[2] == 1:
- return True,False
-
- # PX = BO bit 1, BIX = BI = [0,1,2,3] due to only CR0 is used
- px_bix = bo[3] + second_inst.group(5) + \
- second_inst.group(6) + second_inst.group(7)
- target = second_inst.group(8)
- else:
- px_bix = ""
- # if cr field is omitted:
- if ',' in second_inst.group(5):
- # cr field must be cr0 or 0, error out if it is something else:
- if '0' not in second_inst.group(4):
- return False, True
- target = second_inst.group(6)
- else:
- target = second_inst.group(4)
-
- # profile: space--, cycle is the same because 1+2==3
- P2P_SPACE -= 1
-
- # if enabled, put a mark in the output file
- if P2P_COMMENT:
- print "#P2P(%s):" % rule + first_line,
- print "#P2P(%s):" % rule + second_line,
-
- # construct and write "cmp*b* [PX,BIX,] RA,RB/IM,target" under 'f' rule
- newline = first_inst.group(1) + ppe_op + first_inst.group(3) +\
- px_bix + compare_operands + target
- print newline
- return True,True
-
-
- #---v------------------------------------------------------------------------
- if 'v' in rule:
-
- if not P2P_VIRTUAL_DOUBLE:
- return True,False
-
- global P2P_VDW_SDA
-
- # Combinable Conditions:
- # 1) base address registers must be the same and one of R1/R2/R13
- # 2) address offsets have to be 8-bytes continuous and aligned
- # 3) target or source registers must qualify to be double word register
- # Note: label+offset@sda21 format is coverted to target r13 after link
- # assume data go in and out r13 or SDA space is always 8-byte aligned
- # here we only check the continous of address offset and register pair
- if first_inst.group(8) == second_inst.group(8) in ['1','2','13'] or \
- ("@sda21" in first_inst.group(6) and \
- "@sda21" in second_inst.group(6) and \
- P2P_VDW_SDA):
-
- if ((first_inst.group(6).replace("@sda21","") + "+4" == \
- second_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \
- ((first_inst.group(6).isdigit() and \
- not int(first_inst.group(6)) % 8) and \
- int(first_inst.group(6)) + 4 == int(second_inst.group(6)))) and \
- (int(first_inst.group(4)) + 1 == int(second_inst.group(4)) or \
- (int(first_inst.group(4)) == 31 and \
- int(second_inst.group(4)) == 0)):
- newline = first_line.replace(first_op, ppe_op)
- elif ((second_inst.group(6).replace("@sda21","") + "+4" == \
- first_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \
- ((second_inst.group(6).isdigit() and \
- not int(second_inst.group(6)) % 8) and \
- int(second_inst.group(6)) + 4 == int(first_inst.group(6)))) and \
- (int(second_inst.group(4)) + 1 == int(first_inst.group(4)) or \
- (int(second_inst.group(4)) == 31 and \
- int(first_inst.group(4)) == 0)):
- newline = second_line.replace(second_op, ppe_op)
- else:
- return True,False
-
- # profile: space--, cycle--(same delay but 1 less from issue)
- P2P_SPACE -= 1; P2P_CYCLE -= 1
-
- # if enabled, put a mark in the output file
- if P2P_COMMENT:
- print "#P2P(%s):" % rule + first_line,
- print "#P2P(%s):" % rule + second_line,
-
- print newline,
- return True,True
- else:
- return True,False
-
-
-# -----------------------------------------------------------------------------
-# p2p_onefile:
-# process single PPC assembly file to convert it into PPE assembly file
-# also filter out non-instruction lines before calling the subroutine
-#
-# Arguments:
-# string: ppcFileName
-# Return:
-# boolean: done - True if file processing completed without error
-# - False if file processing failed due to an error
-# Variables:
-# boolean: match, done
-# string: ppeFileName, line, ppc_op, pre_op, pre_line, section, label
-# integer: line_num, first_label_ln, second_label_ln, misalign
-# Subroutine:
-# p2p_combine
-# p2p_replace
-# -----------------------------------------------------------------------------
-def p2p_onefile(ppcFileName):
-
- global P2P_SPACE; P2P_SPACE = 0 # profile count
- global P2P_CYCLE; P2P_CYCLE = 0 # profile count
-
- if P2P_VERBOSE : print "Translate PPC assembly: " + ppcFileName
-
- # new PPE assembly file is renamed as <filename>.s
- ppeFileName = ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT)
- os.rename(ppcFileName, ppeFileName)
-
- # initialize storage variables for previous line that needs to be remembered
- pre_line = ""
- pre_op = ""
-
- # use inline file editing, back up original PPC assembly file as <filename>.S
- for line in fileinput.input(ppeFileName, inplace=1, backup='.405'):
-
- # in case of "mtmsr 0; isync"
- line = line.replace('isync','nop')
-
- # skip blank line
- if not line.strip():
- if pre_line:
- print pre_line,
- pre_line = ""
- print line,
- continue
-
- # skip comments line
- if re.search("^[\s]*(//|#)", line):
- if pre_line:
- print pre_line,
- pre_line = ""
- print line,
- continue
-
- # skip .section code except .p2align and label:
- section = re.search("^[\s]*(\.[0-9a-zA-Z_]+)", line)
- if section is not None and ':' not in line and \
- section.group(1) != '.p2align':
- if pre_line:
- print pre_line,
- pre_line = ""
- print line,
- continue
-
- # apply specical 'l' rule in CombineRules for '.p2align' before a 'label:'
- label = re.search("^[\s]*[\.0-9a-zA-Z_]+[:]+", line)
- if label is not None:
- if pre_line and pre_op == '.p2align':
- second_label_ln = fileinput.lineno()
- misalign = 8 - (second_label_ln - first_label_ln - 2) % 8
- if misalign in [3,4,5,6,7]:
- # profile: same space, but save cycles, branch penalty is 2
- P2P_CYCLE -= misalign - 2
- if P2P_COMMENT: print "#P2P(l):"
- print '\tb ' + label.group(0).split(':')[0]
- print pre_line,
- pre_line = ""
- first_label_ln = fileinput.lineno()
- if pre_line:
- print pre_line,
- pre_line = ""
- print line,
- continue
-
- # extract opcode field from line
- ppc_op = line.split()[0]
- done,match = False,False
-
- # detect the 2nd possible combinable instruction
- if pre_line and P2P_COMBINE:
- # ignore +/- signs for branch prediction
- if '+' in ppc_op or '-' in ppc_op:
- ppc_op = ppc_op[:-1]
- if 'cmp' in pre_op and ppc_op in FuseBranches or \
- 'cmp' not in pre_op and ppc_op == pre_op:
- done,match = p2p_combine(pre_line, line, pre_op, ppc_op)
- if not match:
- print pre_line,
- else:
- print pre_line,
- done,match = True,False
- pre_line = ""
-
- # detect the 1st possible combinable instruction
- if not pre_line and not match and P2P_COMBINE:
- if ppc_op in CombineRules.keys():
- pre_op = ppc_op
- pre_line = line
- done,match = True,True
- else:
- done,match = True,False
-
- # defect possible replacable instruction
- if not match:
- if ppc_op in ReplaceRules.keys() and P2P_REPLACE:
- done = p2p_replace(line, ppc_op)
- else:
- print line,
- done = True
-
- # if instruction process is not done due to error
- if not done:
- line_num = fileinput.lineno()
- break
-
- # close the output file and restore the original input file
- fileinput.close()
- os.rename(ppeFileName+'.405', ppcFileName)
-
- # in case last line of the file qualified to be a pre_line and was not printed
- if pre_line:
- f = open(ppeFileName, 'a')
- f.write(pre_line)
- f.close()
-
- # print error debug message
- if not done:
- print "Error: target instruction detected at line [%d]:" % line_num
- print " " + line
- print " but fail to recognize instruction format."
- # terminate Makefile or execution if an error is detected
- sys.exit(1)
-
- if P2P_COMMENT and P2P_PROFILE:
- f = open(ppeFileName, 'a')
- f.write("#P2P: space(%d) cycle(%d)" % (P2P_SPACE,P2P_CYCLE))
- f.close()
-
- if P2P_VERBOSE:
- print "Generated PPE assembly: " + ppeFileName
- if P2P_PROFILE:
- print "Optimization Profiling: " + str(P2P_SPACE*4) + " bytes, " +\
- str(P2P_CYCLE) + " cycles."
-
-
-# -----------------------------------------------------------------------------
-# p2p_profile
-# profiling how much performance and code size are saved by optimization
-#
-# Arguments:
-# string: ppcFileName
-# Return:
-# list: [space, cycle]
-# Variables:
-# string: line, profile
-# Subroutine:
-# None
-# -----------------------------------------------------------------------------
-def p2p_profile(ppcFileName):
- f = open(ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT), 'r')
- for line in f:
- pass
- f.close()
- profile = re.search(r"^\#P2P: space\(([0-9\-]+)\) cycle\(([0-9\-]+)\)", line)
- if profile is not None:
- return [int(profile.group(1)), int(profile.group(2))]
- else:
- return [0,0]
-
-# -----------------------------------------------------------------------------
-# p2p_main:
-# main of this script
-# print usage info
-# parse options and arguments
-# process one file or a directory of files
-# -----------------------------------------------------------------------------
-def p2p_main():
-
- # command-line option parsing
- from optparse import OptionParser
- usage = "usage: %prog [options]"
- version= "%prog v." + P2P_VERSION
- parser = OptionParser(usage=usage, version=version)
- parser.add_option("-d", "--directory", metavar="PATH", dest="ppcPath",
- help="process all files in a directory given by PATH")
- parser.add_option("-f", "--filename", metavar="FILE", dest="ppcFile",
- help="process single file(with path in the filename)")
- parser.add_option("-p", "--parallel",
- action="store_true", dest="parallel", default=False,
- help="processing all files in parallel processes")
- parser.add_option("-s", "--statistics",
- action="store_true", dest="profile", default=False,
- help="optimization profiling, require comment in outputs")
- parser.add_option("-c", "--combine-only",
- action="store_false", dest="replace", default=True,
- help="enable only combine function by disabling replace")
- parser.add_option("-r", "--replace-only",
- action="store_false", dest="combine", default=True,
- help="enable only replace function by disabling combine")
- parser.add_option("-b", "--compare branch disable",
- action="store_false", dest="compare_branch", default=True,
- help="only disabling fused compare branch function")
- parser.add_option("-v", "--virtual double disable",
- action="store_false", dest="virtual_double", default=True,
- help="only disabling fused virtual double function")
- parser.add_option("-e", "--eabi",
- action="store_true", dest="vdw_sda", default=False,
- help="enable virtual double word fusion targeting sda")
- parser.add_option("-n", "--no-comment",
- action="store_false", dest="comment", default=True,
- help="don't leave comment mark in output file")
- parser.add_option("-q", "--quiet",
- action="store_false", dest="verbose", default=True,
- help="don't print status messages to stdout")
- (options, args) = parser.parse_args()
- # global program output verbose switch
- global P2P_VERBOSE; P2P_VERBOSE = options.verbose
- # leave a comment mark in output files
- global P2P_COMMENT; P2P_COMMENT = options.comment
- # space/performance profiling function
- global P2P_PROFILE; P2P_PROFILE = options.profile
- # enable instruction replace functions
- global P2P_REPLACE; P2P_REPLACE = options.replace
- # enable instruction combine functions
- global P2P_COMBINE; P2P_COMBINE = options.combine
- # enable virtual double word fusion targeting sda
- global P2P_VDW_SDA; P2P_VDW_SDA = options.vdw_sda
- # enable only fused compare and branch function
- global P2P_COMPARE_BRANCH; P2P_COMPARE_BRANCH = options.compare_branch
- # enable only combined virtual double function
- global P2P_VIRTUAL_DOUBLE; P2P_VIRTUAL_DOUBLE = options.virtual_double
-
- if P2P_VERBOSE :
- print "PPC405 Assembly to PPE42 Assembly Post-Compiler Proceesor (P2P)"
- print "Version: " + P2P_VERSION
-
- # single file processing
- if options.ppcFile:
-
- if P2P_VERBOSE :
- print "Processing signle file: " + options.ppcFile
-
- p2p_onefile(options.ppcFile)
-
- # multiple files processing
- if options.ppcPath:
-
- if P2P_VERBOSE :
- print "Accessing all files at: " + options.ppcPath
- print "*Parallel Process Mode: " + ("Off", "On")[options.parallel]
-
- if options.profile:
- bytes = 0; cycles = 0
-
- fileList = []
- for root, subdirs, files in os.walk(options.ppcPath):
- for file in fnmatch.filter(files, '*'+P2P_PPC_EXT):
- if options.parallel :
- fileList.append(os.path.join(root, file))
- else:
- if options.profile:
- space,cycle = p2p_profile(os.path.join(root, file))
- bytes += space*4
- cycles += cycle
- else:
- p2p_onefile(os.path.join(root, file))
-
- if options.profile:
- print "Optimization Profiling: " + str(bytes) + " bytes, " +\
- str(cycles) + " cycles."
-
- # parallel processing mode
- if options.parallel:
- from multiprocessing import Pool
- p = Pool()
- p.map(p2p_onefile, fileList)
- p.close()
- p.join()
-
- if P2P_VERBOSE : print "Done"
-
-
-# -----------------------------------------------------------------------------
-# python main
-if __name__ == '__main__':
- p2p_main()
-
OpenPOWER on IntegriCloud