From ae389ca4d9f1c23ae986f6e4d2b24d734f6e83b3 Mon Sep 17 00:00:00 2001 From: Sachin Gupta Date: Tue, 12 Jul 2016 04:23:47 -0500 Subject: Partial cleanup for tools directory These changes are done 1. Removal of PowerPCtoPPE folder 2. Remving binaries from tools/ppetracepp. Generate these binaries at runtime. Change-Id: If884ba1cc5e2747265ce3618e82d1fb439babcd4 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/26889 Tested-by: Jenkins Server Reviewed-by: Shakeeb A. Pasha B K Reviewed-by: Sachin Gupta --- tools/PowerPCtoPPE/Makefile | 23 - tools/PowerPCtoPPE/p2p-test-gen.py | 147 ------ tools/PowerPCtoPPE/p2pfiles.mk | 14 - tools/PowerPCtoPPE/ppc-ppe-pcp.py | 998 ------------------------------------- tools/PowerPCtoPPE/ppe42_divw.S | 208 -------- tools/PowerPCtoPPE/ppe42_divwu.S | 184 ------- tools/PowerPCtoPPE/ppe42_mulhw.S | 193 ------- tools/PowerPCtoPPE/ppe42_mulhwu.S | 202 -------- tools/PowerPCtoPPE/ppe42_mullw.S | 174 ------- tools/ppetracepp/Makefile | 32 +- tools/ppetracepp/ppe2fsp | Bin 19306 -> 0 bytes tools/ppetracepp/ppetracepp | Bin 321115 -> 0 bytes tools/ppetracepp/ppetracepp.C | 26 +- 13 files changed, 54 insertions(+), 2147 deletions(-) delete mode 100644 tools/PowerPCtoPPE/Makefile delete mode 100755 tools/PowerPCtoPPE/p2p-test-gen.py delete mode 100644 tools/PowerPCtoPPE/p2pfiles.mk delete mode 100755 tools/PowerPCtoPPE/ppc-ppe-pcp.py delete mode 100644 tools/PowerPCtoPPE/ppe42_divw.S delete mode 100644 tools/PowerPCtoPPE/ppe42_divwu.S delete mode 100644 tools/PowerPCtoPPE/ppe42_mulhw.S delete mode 100644 tools/PowerPCtoPPE/ppe42_mulhwu.S delete mode 100644 tools/PowerPCtoPPE/ppe42_mullw.S delete mode 100755 tools/ppetracepp/ppe2fsp delete mode 100755 tools/ppetracepp/ppetracepp (limited to 'tools') diff --git a/tools/PowerPCtoPPE/Makefile b/tools/PowerPCtoPPE/Makefile deleted file mode 100644 index d38e72d4..00000000 --- a/tools/PowerPCtoPPE/Makefile +++ /dev/null @@ -1,23 +0,0 @@ - -export SUB_OBJDIR = /p2p - -include img_defs.mk -include p2pfiles.mk - -OBJS := $(addprefix $(OBJDIR)/, $(P2P_OBJECTS)) - -libp2p.a: $(OBJS) - $(AR) crs $(OBJDIR)/libp2p.a $(OBJDIR)/*.o - -.PHONY: clean p2p -p2p: $(OBJS) - -$(OBJS) $(OBJS:.o=.d): | $(OBJDIR) - -$(OBJDIR): - mkdir -p $(OBJDIR) - -ifneq ($(MAKECMDGOALS),clean) -include $(OBJS:.o=.d) -endif - diff --git a/tools/PowerPCtoPPE/p2p-test-gen.py b/tools/PowerPCtoPPE/p2p-test-gen.py deleted file mode 100755 index 983cec03..00000000 --- a/tools/PowerPCtoPPE/p2p-test-gen.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/python2.6 - -# \file p2p-test-gen.py -# \brief this program generates random constructed test cases -# in the form of input file consumed by ppc-ppe-pcp.py -# \usage create a file named 'test.s' and make sure it has at -# least one blank line before executing this program. - -import fileinput -import random - -DotLabel = ['', 'Label:', '.Label'] - -Comments = ['', '// Comments', '/* Comments */'] - -TabSpace = ['', '\t', ' ', '\t ', ' \t', ' \t '] - -RegLabel = ['', '%r'] - -Register = [0,1,2,3,4,5,6,7,8,9,10,13,28,29,30,31] - -TestEnable = [0,1,2,3] - -TestBook = {'eieio' : 0, - 'isync' : 0, - 'icbi' : 0, - 'icbt' : 0, - 'stbux' : 3, - 'sthux' : 3, - 'stwux' : 3, - 'lbzux' : 3, - 'lhzux' : 3, - 'lwzux' : 3, - 'lha' : 2, - 'lhau' : 2, - 'lhax' : 3, - 'lhaux' : 3, - 'mulhhw' : 3, - 'mulhhwu' : 3, - 'mulhw' : 3, - 'mulhwu' : 3, - 'mullw' : 3, - 'mulli' : 1, - 'divw' : 3, - 'divwu' : 3, - 'lmw' : 2, - 'stmw' : 2, - 'lwz' : 4, - 'stw' : 4, - 'cmplw' : 5, - 'cmpw' : 5, - 'cmpwi' : 5} - -BranchList = ['bc', 'bcl', 'blt', 'bltl', 'ble', 'blel', 'bgt', 'bgtl', 'bge', - 'bgel', 'beq', 'beql', 'bne', 'bnel'] - -def p2p_test(): - for line in fileinput.input('test.s', inplace=1): - print '// start generating test cases:', - for opcode,format in TestBook.iteritems(): - opcode += ' ' - if random.randint(1, 10) > 5: - print random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] +\ - random.sample(TabSpace,1)[0] - else: - print random.sample(TabSpace,1)[0] + random.sample(DotLabel,1)[0] +\ - random.sample(TabSpace,1)[0] - if format == 0 in TestEnable: - print random.sample(TabSpace,1)[0] + opcode +\ - random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] - if format == 3 in TestEnable: - regs = random.sample(Register, 3) - reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ - random.sample(TabSpace,1)[0] +\ - random.sample(RegLabel,1)[0] + str(regs[1]) + ',' +\ - random.sample(TabSpace,1)[0] +\ - random.sample(RegLabel,1)[0] + str(regs[2]) - print random.sample(TabSpace,1)[0] + opcode +\ - random.sample(TabSpace,1)[0] + reg_field +\ - random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] - if format == 1 in TestEnable: - regs = random.sample(Register, 2) - reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ - random.sample(TabSpace,1)[0] +\ - random.sample(RegLabel,1)[0] + str(regs[1]) + ',' +\ - random.sample(TabSpace,1)[0] +\ - str(random.randint(-128, 128)) - print random.sample(TabSpace,1)[0] + opcode +\ - random.sample(TabSpace,1)[0] + reg_field +\ - random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] - if format == 2 in TestEnable: - regs = random.sample(Register, 2) - reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ - random.sample(TabSpace,1)[0] +\ - str(random.randint(-128, 128)) +\ - '(' + random.sample(RegLabel,1)[0] + str(regs[1]) + ')' - print random.sample(TabSpace,1)[0] + opcode +\ - random.sample(TabSpace,1)[0] + reg_field +\ - random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] - if format == 4 in TestEnable: - for i in [1,2]: - regs = random.sample(Register, 2) - reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ - random.sample(TabSpace,1)[0] +\ - str(random.randint(-128, 128)) +\ - '(' + random.sample(RegLabel,1)[0] + str(regs[1]) + ')' - print random.sample(TabSpace,1)[0] + opcode +\ - random.sample(TabSpace,1)[0] + reg_field +\ - random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] - if format == 5 in TestEnable: - if 'i' in opcode: - regs = random.sample(Register, 1) - reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ - random.sample(TabSpace,1)[0] +\ - random.sample(RegLabel,1)[0] +\ - str(random.randint(-128, 128)) - else: - regs = random.sample(Register, 2) - reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ - random.sample(TabSpace,1)[0] +\ - random.sample(RegLabel,1)[0] + str(regs[1]) - print random.sample(TabSpace,1)[0] + opcode +\ - random.sample(TabSpace,1)[0] + reg_field +\ - random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] - branch = random.sample(BranchList, 1)[0] + ' ' - if 'bc' in branch: - reg_field = random.sample(TabSpace,1)[0] +\ - str(random.randint(0, 15)) + ',' +\ - random.sample(TabSpace,1)[0] +\ - str(random.randint(0, 7)) + ',' +\ - random.sample(TabSpace,1)[0] +\ - str(random.randint(-128, 128)) +\ - random.sample(TabSpace,1)[0] - else: - reg_field = random.sample(TabSpace,1)[0] +\ - str(random.randint(-128, 128)) +\ - random.sample(TabSpace,1)[0] - print random.sample(TabSpace,1)[0] + branch +\ - random.sample(TabSpace,1)[0] + reg_field +\ - random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] - fileinput.close() - -if __name__ == '__main__': - p2p_test() - - - diff --git a/tools/PowerPCtoPPE/p2pfiles.mk b/tools/PowerPCtoPPE/p2pfiles.mk deleted file mode 100644 index 3d4fd3fb..00000000 --- a/tools/PowerPCtoPPE/p2pfiles.mk +++ /dev/null @@ -1,14 +0,0 @@ -# @file p2pfiles.mk -# -# @brief mk for including P2P support library object files -# - -########################################################################## -# Object Files -########################################################################## -P2P-S-SOURCES = ppe42_mulhw.S ppe42_mulhwu.S ppe42_mullw.S \ - ppe42_divw.S ppe42_divwu.S - -P2P_OBJECTS = $(P2P-S-SOURCES:.S=.o) - - diff --git a/tools/PowerPCtoPPE/ppc-ppe-pcp.py b/tools/PowerPCtoPPE/ppc-ppe-pcp.py deleted file mode 100755 index 7dd427d6..00000000 --- a/tools/PowerPCtoPPE/ppc-ppe-pcp.py +++ /dev/null @@ -1,998 +0,0 @@ -#!/usr/bin/python2.6 - -# \file ppc-ppe-pcp.py -# \brief PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) -# -# --------------------------------------------------------------- -# Revision History -# --------------------------------------------------------------- -# 10-07-2014: project completed -# daviddu added optimization profile support -# -# 10-06-2014: added fused compare and branch supprot -# daviddu added support for combining two ld/st into one double word -# added support to insert branch upon .p2align directive -# -# 09-27-2014: added subroutine support for mul* and div* -# daviddu added virtual double word replacing multiple word support -# -# 09-13-2014: initial version -# daviddu only instruction inline replacement is supported -# --------------------------------------------------------------- - -P2P_VERSION = "10-07-2014" # version number as last modified date -P2P_PPC_EXT = '.s' # PPC Assembly filename extension -P2P_PPE_EXT = '.es' # PPE Assembly filename extension -P2P_PPE_PRE = '__ppe42_' # PPE Assembly subroutine prefix - -import sys -import os -import re -import fnmatch -import fileinput - -# --------------------------------------------------------------- -# PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) -# --------------------------------------------------------------- -# -# Description: -# -# This post-compiler processor will take PPC405 assembly file(s) produced -# by powerpc-linux-gcc or hand coded and replace some of the instructions -# supported by PPC405 ISA but not PPE42 ISA with a set of instructions -# supported by PPE42 ISA. Outcome of this program is PPE42 assembly file(s). -# -# Assumptions: -# -# - Input/Output File Name Extension: -# -# PPC405 assembly file generated by powerpc-linux-gcc has filename extension -# defined by "P2P_PPC_EXT" global variable, while PPE42 assembly file -# consumed by PPE Assembler has filename extension defined by "P2P_PPE_EXT" -# global variable. Both should be consistant with Makefile rules. -# -# - Registers: -# -# Instructions in input file should only use registers supported by PPE, -# that is R0-R10,R13,R28-R31 for GPRs and PPE only SPRs(for example, PPE -# only has CR0 instead of CR0-7). -# -# GCC flag -ffixed can be used to enforce compiler to not use certain -# registers if compiler generates input files to this script. Note certian -# optimization level, such as -Os, of GGC will still use certain registers -# regardless if -ffixed flag is used. Furthermore, compiler should not -# generate multiple word instructions(lmw/stmw) that covers the registers -# forbidden to use by -ffixed flag. -# -# Example of using -ffixed flag in this case: -# -ffixed-r11 -ffixed-r12 -ffixed-r14 -ffixed-r15 \ -# -ffixed-r16 -ffixed-r17 -ffixed-r18 -ffixed-r19 \ -# -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \ -# -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \ -# -ffixed-cr1 -ffixed-cr2 -ffixed-cr3 -ffixed-cr4 \ -# -ffixed-cr5 -ffixed-cr6 -ffixed-cr7 -# -# - Instructions: -# -# Instructions in input file should only use PowerPC 405 instructions -# covered by "PowerPC 405-S Embedded Processor Core" manual; however, -# there is an assumption on certain catalog of instructions will never be -# generated by power-linux-gcc compiler(or disabled by compiler switch). -# -# Also, compiler should generate extended mnemonics instead of its base -# instruction when extended mnemonics fits. -# -# Via -falign-labels=n and -O2 or -O3, the compiler inserts .p2align -# directive to help instruction alignment for best cache performance. -# -# - Assembly Syntax: -# -# There should be only white spaces before instruction mnemonics, in -# another word, all inline comments should be put behind the instrution. -# -# "Label:" and an instruction should not be on the same line, hand coded -# assembly should be consistant to this same compiler output format. -# -# Depandences: -# -# In order to utilize assembly subroutines implemented for supporting -# missing instructions of multiplication and division in PPE42 ISA, a given -# library(with assembly files and header) must be compiled and linked with -# any source code that use this program to generate PPE binary. -# -# Usage: -# -# ./ -f --- process single file -# ./ -d --- process multiple files -# ./ -h --- detailed usage on other flags -# ./ -v --- version of the program -# ./ -d -s --- perform result profiling -# -# Functions: -# -# p2p_main - main function, parse options and arguments -# p2p_onefile - processing single PPC Assembly File -# p2p_combine - processing two PPC instructions in input file -# p2p_replace - processing single PPC instruction in input file -# -# Data Structures: -# -# ReplaceRules = { ppc_op : [rule, ppe_op] } -# CombineRules = { ppc_op : [rule, ppe_op] } -# FuseBranches = [ list of branches qualified for fusing with compares ] -# -#------------------------------------------------------------------------------ -# ReplaceRules: [ 'r', 'u', 'a', 'h', 's', 'o', 'd', 'm' ] -#-------|-------------------------|-------------------------------------------- -# Rule | Example (PPC to PPE) | Description -#-------|-------------------------|-------------------------------------------- -# 'r' | [ppc] ppc_op RT, RA, RB | simply 'replace' ppc opcode with ppe opcode -# 0 0 | [ppe] ppe_op RT, RA, RB | while the operands, if any, stay the same -#-------|-------------------------|-------------------------------------------- -# 'ru' | [ppc] ppc_op RT, RA, RB | on top of 'r' rule, emulate 'update' by -# | [ppe] ppe_op RT, RA, RB | appending "add" instruction after the -# +1 +1 | add RA, RA, RB | original instruction to update RA -#-------|-------------------------|-------------------------------------------- -# 'ra' | [ppc] ppc_op RT, D(RA) | on top of 'r' rule, emulate 'algebraic' by -# | [ppe] ppe_op RT, D(RA) | appending "extsh" instruction after the -# +1 +1 | extsh RT, RT | original instruction to sign-extend RT -#-------|-------------------------|-------------------------------------------- -# 'rau' | [ppc] ppc_op RT, RA, RB | combining rule 'r', 'a', and 'u' above. -# | [ppe] ppe_op RT, RA, RB | Note: there is no requirement on whether -# | add RA, RA, RB | rule 'a' or rule 'u' should be applied -# +2 +2 | extsh RT, RT | first, the outcome should be the same. -#-------|-------------------------|-------------------------------------------- -# 'h' | [ppc] ppc_op RT, RA, RB | rule of 'halfword' multiplication -# | [ppe] srwi RA, 16 | emulate multiply "high halfword" with -# | srwi RB, 16 | multiply "low halfword" by shifting -# +2 +2 | ppe_op RT, RA, RB | the operands first -#-------|-------------------------|-------------------------------------------- -# 's' | [ppc] ppc_op RT, RA, RB | emulate word multiply and divide -# | [ppe] stwu R1, -24(R1)| instructions with calling 'subroutines' -# | stvd D3, 8(R1) | implemented in ppe42_mul.S and ppe42_div.S -# | mflr R3 | -# | stw R3, 16(R1) | Calling Conventions:(SS = Stack Size) -# | mr R3, RA | -# | mr R4, RB | Caller is responsible for -# | bl target | 1) create stack frame -# | mr RT, R3 | 2) save off R3 and R4 to stack -# | lwz R3, 16(R1) | 3) save off link register to stack -# | mtlr R3 | 4) put operands into R3, R4 before branch -# | lvd D3, 8(R1) | 5) put result in R3 to RT after branch -# | lwz R1, 0(R1) | 6) restore link register from stack -# | | 7) restore R3 and R4 from stack -# | [sub] stwu R1, -SS(R1) | 8) remove the stack frame -# | | -# | (subroutine body) | Callee is responsible for -# | | 1) create and remove stack frame -# | lwz R1, 0(R1) | 2) save and restore volatile registers -# +X +Y | blr | 3) subroutine will not touch LR again -#-------|-------------------------|-------------------------------------------- -# 'o' | [ppc] ppc_op[o] RT ... | rule of 'o' form for overflow -# | [ppe] ppe_op RT ... | Note: "mullwo", "divwo" and "divwuo" each -# | | has unique setting for XER[OV,SO] if OE = 1 -#-------|-------------------------|-------------------------------------------- -# 'd' | [ppc] ppc_op[.] RT ... | rule of '.' or 'dot' form for recording -# | [ppe] ppe_op RT ... | using "cmpwli" to emulate the [.] form -# | cmpwli RT, 0 | to the instruction result and CR0 fields -#-------|-------------------------|-------------------------------------------- -# 'm' | [ppc] ppc_op RT, D(RA) | emulate PowerPC load/store multiple word -# | [ppe] ppe_op DT, D(RA) | instructions with PPE specific -# | (doubleword ld/st)| 'virtual doubleword' instructions if target -# | or | address is 8-byte aligned; otherwise, using -# | (singleword ld/st)| single word instructions instead or mix both -# | or | Note only RA == R1/R2/R13 will always meet -# -1 -1 | (single & double) | alignment requirement of virtual doubleword -#-------|-------------------------|-------------------------------------------- -# -ReplaceRules = {#ppc_op : [ rule | ppe_op ] - #---------------------------- - #synchronization instructions - 'eieio' : [ 'r', 'sync' ], - 'isync' : [ 'r', 'nop' ], - 'icbi' : [ 'r', 'nop' ], - 'icbt' : [ 'r', 'nop' ], - 'mtcr' : [ 'r', 'mtcr0'], - #load/store with [u/x/a] form - 'stbux' : [ 'ru', 'stbx' ], - 'sthux' : [ 'ru', 'sthx' ], - 'stwux' : [ 'ru', 'stwx' ], - 'lbzux' : [ 'ru', 'lbzx' ], - 'lhzux' : [ 'ru', 'lhzx' ], - 'lwzux' : [ 'ru', 'lwzx' ], - 'lha' : [ 'ra', 'lhz' ], - 'lhau' : [ 'ra', 'lhzu' ], - 'lhax' : [ 'ra', 'lhzx' ], - 'lhaux' : [ 'rau', 'lhzx' ], - #multiply/divide with [./o] form - 'mulhhw' : [ 'h', 'mullhw' ], - 'mulhhw.' : [ 'h', 'mullhw.' ], - 'mulhhwu' : [ 'h', 'mullhwu' ], - 'mulhhwu.': [ 'h', 'mullhwu.' ], - 'mulhw' : [ 's', 'mulhw' ], - 'mulhw.' : [ 'sd', 'mulhw' ], - 'mulhwu' : [ 's', 'mulhwu' ], - 'mulhwu.' : [ 'sd', 'mulhwu' ], - 'mullw' : [ 's', 'mullw' ], - 'mullw.' : [ 'sd', 'mullw' ], - 'mullwo' : [ 'so', 'mullw' ], - 'mullwo.' : [ 'sod', 'mullw' ], - 'mulli' : [ 's', 'mullw' ], - 'divw' : [ 's', 'divw' ], - 'divw.' : [ 'sd', 'divw' ], - 'divwo' : [ 'so', 'divw' ], - 'divwo.' : [ 'sod', 'divw' ], - 'divwu' : [ 's', 'divwu' ], - 'divwu.' : [ 'sd', 'divwu' ], - 'divwuo' : [ 'so', 'divwu' ], - 'divwuo.' : [ 'sod', 'divwu' ], - #load/store multiple word(Rx-R31) - 'lmw' : [ 'm', 'lvd,lwz' ], - 'stmw' : [ 'm', 'stvd,stw' ]} - - -#------------------------------------------------------------------------------ -# CombineRules: [ 'f', 'v', 'l' ] -#-------|-------------------------|-------------------------------------------- -# 'f' | [ppc] ppc_op(cmp*) | rule for 'fusing' adjacent pair of compare -# | ppc_op(b*) | and branch(PPE specific). Note: only -# -1 0 | [ppe] ppe_op(cmp*b*) | extended mnemonics of compares are handled -#-------|-------------------------|-------------------------------------------- -# 'v' | [ppc] ppc_op(lwz/stw) | rule for combining double word aligned -# | ppc_op(lwz/stw) | load/store pairs into signle 'virtual' -# -1 -1 | [ppe] ppe_op(lvd/stvd) | double word instructions(PPE specific) -#-------|-------------------------|-------------------------------------------- -# 'l' | [ppc] .p2align | compiler will insert ".p2align" directive to -# | Label: | help instructions align from label to label. -# | [ppe] b Label | then assembler will insert "nop" on .p2align -# | .p2align | directive. a "branch" to skip the nops will -# 0 -1 | Label: | improve the performance while still aligned -#-------|-------------------------|-------------------------------------------- -# -CombineRules = {#ppc_op : [ rule | ppe_cp ] - #-------------------------- - #8byte aligned loads/stores - 'lwz' : [ 'v', 'lvd' ], - 'stw' : [ 'v', 'stvd' ], - #compares fusable to branch - 'cmplw' : [ 'f', 'cmplw' ], - 'cmpw' : [ 'f', 'cmpw' ], - 'cmpwi' : [ 'f', 'cmpwi' ], - #'.p2align' before 'label:' - '.p2align' : [ 'l', 'b' ]} - - -#------------------------------------------------------------------------------ -# FuseBranches: [ Branches can be fused into cmp*b* ] -#------------------------------------------------------------------------------ -# -FuseBranches = ['bc', 'bcl', - 'blt', 'bltl', 'ble', 'blel', - 'bgt', 'bgtl', 'bge', 'bgel', - 'beq', 'beql', 'bne', 'bnel'] - - -# ----------------------------------------------------------------------------- -# p2p_replace: -# process each line(filtered) in the assembly file to replace PPC instruction -# to supported PPE instruction(s) -# -# Arguments: -# string: line - assembly file line to be replaced -# ppc_op - detected PPC opcode that needs to be replaced -# Return: -# boolean: True - Return without Error -# False - Error Detected -# Variables: -# string: inst, rule, ppe_op, newline, temp_op -# double_inst, single_inst, virtual_reg, base_offset, address_reg -# Subroutine: -# NONE -# ----------------------------------------------------------------------------- -def p2p_replace(line, ppc_op): - - # parse PPC instruction as in I or D form with opcode and upto 3 operands: - # possible forms: opcode - # opcode RT, RA, RB - # opcode RT, RA, IM - # opcode RT, D(RA) - # inst.group(0) : - # inst.group(1) : " " - # inst.group(2) : Opcode(.) - # inst.group(3) : " " - # inst.group(4) : GPR - # inst.group(5) : " , " - # inst.group(6) : GPR or Immediate(D) - # inst.group(7) : " , " or " ( " - # inst.group(8) : GPR or Immediate(IM) - # inst.group(9) : " ) " - inst = re.search(r"([\s]*)([a-zA-Z\.]+)([\s]*)([%r0-9]*)([\s,]*)([%r0-9\-]*)([\s,\(]*)([%r0-9\-]*)([\s\)]*)", line) - - # detect an error - if inst is None or ppc_op != inst.group(2): - return False - - # look up rule to process the instruction - rule, ppe_op = ReplaceRules[ppc_op] - - # if enabled, put a mark in the output file - if P2P_COMMENT: print "#P2P(%s):" % rule + line, - - # start cases of replacing PPC instruction with PPE instruction(s) - #---r------------------------------------------------------------------------ - if 'r' in rule: - - # replace opcode under rule 'r' and rewrite the instruction - newline = line.replace(ppc_op, ppe_op) - print newline, - - # do not continue if there is 'a' or 'u' rule to process on this line - if 'u' not in rule and 'a' not in rule: - return True - - #---u------------------------------------------------------------------------ - if 'u' in rule: - - # construct and write "add RA, RA, RB" under rule 'u' - newline = inst.group(1) + 'add' + inst.group(3) + inst.group(6) +\ - inst.group(5) + inst.group(6) + inst.group(7) + inst.group(8) - print newline - - # do not continue if there is 'a' rule to process on this line - if 'a' not in rule: - return True - - #---a------------------------------------------------------------------------ - if 'a' in rule: - - # construct and write "extsh RT, RT" under rule 'a' - newline = inst.group(1) + 'extsh' + inst.group(3) + inst.group(4) +\ - inst.group(5) + inst.group(4) - print newline - return True - - #---h------------------------------------------------------------------------ - if 'h' in rule: - - # construct and write "srwi RA, 16" under rule 'h' - newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(6) +\ - inst.group(5) + "16" - print newline - - # construct and write "srwi RB, 16" under rule 'h' - newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(8) +\ - inst.group(5) + "16" - print newline - - # replace opcode in original instruction and write under rule 'h' - newline = line.replace(ppc_op, ppe_op) - print newline - return True - - #---s------------------------------------------------------------------------ - if 's' in rule: - - # construct branch target label - ppe_op = P2P_PPE_PRE + ppe_op - - # construct and write "stwu R1, -24(R1)" to create the stack frame - newline = inst.group(1) + 'stwu' + inst.group(3) + '1' +\ - inst.group(5) + '-24(1)' - print newline - - # construct and write "stvd D3, 8(R1)" to save off R3 and R4 - newline = inst.group(1) + 'stvd' + inst.group(3) + '3' +\ - inst.group(5) + '8(1)' - print newline - - # construct and write "mflr R3" to fetch the current link address - newline = inst.group(1) + 'mflr' + inst.group(3) + '3' - print newline - - # construct and write "stw R3, 16(R1)" to save off current LR to stack - newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\ - inst.group(5) + '16(1)' - print newline - - # construct and write "mr R3, RA" to copy the operand RA to R3 - # if RA == R3 then R3 was clobbered, restore R3 from stack - if inst.group(6) == '3': - newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\ - inst.group(5) + '8(1)' - print newline - else: - newline = inst.group(1) + 'mr' + inst.group(3) + '3' +\ - inst.group(5) + inst.group(6) - print newline - - # if 'mulli' is detected, using 'li' instead of 'mr' for second operand - if ppc_op == 'mulli': - temp_op = 'li' - else: - temp_op = 'mr' - - # Set R4 if R4 is not already RB - if temp_op == 'li' or inst.group(8) != '4': - # construct and write "mr R4, RB" to copy the operand RB to R4 - # or in 'mulli' case, "li R4, IM" to copy the operand IM to R4 - newline = inst.group(1) + temp_op + inst.group(3) + '4' +\ - inst.group(5) + inst.group(8) - print newline - - # using branch and link(bl) to branch to subroutine - # later subroutine can branch back using branch link register(blr) - # Assumption: the subroutine will be responsible for saving - # and restoring all the volatilo registers used in the subroutine - newline = inst.group(1) + 'bl' + inst.group(3) + ppe_op - print newline - - # if RT is not already R3 then copy R3 to RT - if inst.group(4) != '3': - # construct and write "mr RT, R3" to copy the result in R3 to RT - newline = inst.group(1) + 'mr' + inst.group(3) + inst.group(4) +\ - inst.group(5) + '3' - print newline - else: - # save return on stack - newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\ - inst.group(5) + '8(1)' - print newline - - # construct and write "lwz R3, 16(R1)" to fetch the LR value from stack - newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\ - inst.group(5) + '16(1)' - print newline - - # construct and write "mtlr R3" to restore the link register - newline = inst.group(1) + 'mtlr' + inst.group(3) + '3' - print newline - - # construct and write "lvd D3, 8(R1)" to restore R3 and R4 - newline = inst.group(1) + 'lvd' + inst.group(3) + '3' +\ - inst.group(5) + '8(1)' - print newline - - # construct and write "lwz R1, 0(R1)" to destroy the stack frame - newline = inst.group(1) + 'lwz' + inst.group(3) + '1' +\ - inst.group(5) + '0(1)' - print newline - return True - - #---m------------------------------------------------------------------------ - if 'm' in rule: - - # parse instruction information - # note register can be in either "N" form or "%rN" form - double_inst,single_inst = ppe_op.split(',') - virtual_reg = int(re.search(r'\d+', inst.group(4)).group()) - base_offset = int(inst.group(6)) - address_reg = int(re.search(r'\d+', inst.group(8)).group()) - - # consider illegal if multiple word instruction covers non-exist registers - if virtual_reg < 28: - return False - - # loop until and include GPR31 - while virtual_reg < 32: - # page 270 of 405 manual, only do this for load instructions - if virtual_reg == address_reg != 31 and 'l' in single_inst: - base_offset += 4 - virtual_reg += 1 - continue - - # if other GPRs being address_reg there is no guarantee for alignment - if address_reg not in [1,2,13]: - # construct and write "lwz/stw RT, D(RA)" for every registers - newline = inst.group(1) + single_inst + inst.group(3) +\ - str(virtual_reg) + inst.group(5) + str(base_offset) +\ - inst.group(7) + inst.group(8) + inst.group(9) - print newline - base_offset += 4 - virtual_reg += 1 - else: - # if base_offset is also aligned with base address in the address_reg - # & there are at least two more registers to perform doubleword ld/st - if not (base_offset % 8) and (virtual_reg + 1) < 32: - # construct and write "lvd/stvd DR, D(RA)" under rule 'v' - newline = inst.group(1) + double_inst + inst.group(3) +\ - str(virtual_reg) + inst.group(5) + str(base_offset) +\ - inst.group(7) + inst.group(8) + inst.group(9) - print newline - base_offset += 8 - virtual_reg += 2 - # either only one register left or base_offset isnt aligned - else: - # construct and write "lwz/stwz SR, D(RA)" under rule 'v' - newline = inst.group(1) + single_inst + inst.group(3) +\ - str(virtual_reg) + inst.group(5) + str(base_offset) +\ - inst.group(7) + inst.group(8) + inst.group(9) - print newline - base_offset += 4 - virtual_reg += 1 - # end of this if-else - # end of while loop - return True - # end of last if - - -# ----------------------------------------------------------------------------- -# p2p_combine: -# process each two lines(filtered) in the assembly file to combine two PPC -# instructions to one PPE specific instruction for better performance -# -# Arguments: -# string: first_line - 1st assembly file line to be combined -# second_line - 2nd assembly file line to be combined -# first_op - 1st detected PPC opcode that needs to be combined -# second_op - 2nd detected PPC opcode that needs to be combined -# Return: -# boolean: done - True: return without error -# - False: return with error detected -# match - True: eventually matched and combined -# - False: fail to qualify to be combined -# Variables: -# string: first_inst, second_inst, rule, ppe_op, newline -# bo, px_bix, compare_operands, target -# Subroutine: -# NONE -# ----------------------------------------------------------------------------- -def p2p_combine(first_line, second_line, first_op, second_op): - - global P2P_SPACE; global P2P_CYCLE - global P2P_COMPARE_BRANCH; global P2P_VIRTUAL_DOUBLE - - # parse PPC instruction as in I or B or D form with opcode and upto 3 operands - # possible form : [1st] opcode [CR,] RA, RB - # [1st] opcode [CR,] RA, IM - # [1st] opcode RT, D(RA) - # [2nd] opcode [CR,] Target - # [2nd] opcode BO, BI, Target - # [2nd] opcode RT, D(RA) - # inst.group(0) : - # inst.group(1) : " " - # inst.group(2) : Opcode(+/-/.) - # inst.group(3) : " " - # inst.group(4) : GPR or CR or BO or Target - # inst.group(5) : " , " - # inst.group(6) : GPR or IM or D(label+offset@sda21) or BI or Target - # inst.group(7) : " , " or " ( " - # inst.group(8) : GPR or IM or Target - # inst.group(9) : " ) " - first_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", first_line) - second_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", second_line) - - # detect an error - if first_inst is None or second_inst is None or \ - first_op != first_inst.group(2) or second_op not in second_inst.group(2): - return False,False - - # look up rule to process the instruction - rule, ppe_op = CombineRules[first_op] - - # start cases of combining two PPC instructions into PPE instruction - #---f------------------------------------------------------------------------ - if 'f' in rule: - - if not P2P_COMPARE_BRANCH: - return True,False - - # fusing compare and branch - ppe_op = ppe_op + second_op - - # for cmpwib* case, there is a difference between - # cmpwi SI operand as signed 16-bit integer and then got sign extended and - # cmpwib* UIX operand as unsigned 5-bit integer and then got zero extended - # thus, will not fuse the two if the integer operand is not in range(0,31) - # if cr field is omitted: - if ',' in first_inst.group(7): - # cr field must be cr0 or 0, error out if it is something else: - if '0' not in first_inst.group(4): - return False, True - if 'i' in first_op and (int(first_inst.group(8)) < 0 or \ - int(first_inst.group(8)) > 31): - return True,False - else: - compare_operands = first_inst.group(6) + first_inst.group(7) + \ - first_inst.group(8) + ', ' - else: - if 'i' in first_op and (int(first_inst.group(6)) < 0 or \ - int(first_inst.group(6)) > 31): - return True,False - else: - compare_operands = first_inst.group(4) + first_inst.group(5) + \ - first_inst.group(6) + ', ' - - # if 'bc' then extract BO,BI fields and convert to PX,BIX fields of 'cmp*b*' - # Note CTR decreament and branch always cases are not handled, and - # python bin() outputs "0bXXXXX" form so bo[2] is actually BO bit 0 - # else there is no need for PX,BIX fields for extended mnemonics - if 'bc' in second_op: - bo = bin(int(second_inst.group(4))) - - # do not handle CRT decreament or branch always cases - if bo[4] == 0 or bo[2] == 1: - return True,False - - # PX = BO bit 1, BIX = BI = [0,1,2,3] due to only CR0 is used - px_bix = bo[3] + second_inst.group(5) + \ - second_inst.group(6) + second_inst.group(7) - target = second_inst.group(8) - else: - px_bix = "" - # if cr field is omitted: - if ',' in second_inst.group(5): - # cr field must be cr0 or 0, error out if it is something else: - if '0' not in second_inst.group(4): - return False, True - target = second_inst.group(6) - else: - target = second_inst.group(4) - - # profile: space--, cycle is the same because 1+2==3 - P2P_SPACE -= 1 - - # if enabled, put a mark in the output file - if P2P_COMMENT: - print "#P2P(%s):" % rule + first_line, - print "#P2P(%s):" % rule + second_line, - - # construct and write "cmp*b* [PX,BIX,] RA,RB/IM,target" under 'f' rule - newline = first_inst.group(1) + ppe_op + first_inst.group(3) +\ - px_bix + compare_operands + target - print newline - return True,True - - - #---v------------------------------------------------------------------------ - if 'v' in rule: - - if not P2P_VIRTUAL_DOUBLE: - return True,False - - global P2P_VDW_SDA - - # Combinable Conditions: - # 1) base address registers must be the same and one of R1/R2/R13 - # 2) address offsets have to be 8-bytes continuous and aligned - # 3) target or source registers must qualify to be double word register - # Note: label+offset@sda21 format is coverted to target r13 after link - # assume data go in and out r13 or SDA space is always 8-byte aligned - # here we only check the continous of address offset and register pair - if first_inst.group(8) == second_inst.group(8) in ['1','2','13'] or \ - ("@sda21" in first_inst.group(6) and \ - "@sda21" in second_inst.group(6) and \ - P2P_VDW_SDA): - - if ((first_inst.group(6).replace("@sda21","") + "+4" == \ - second_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ - ((first_inst.group(6).isdigit() and \ - not int(first_inst.group(6)) % 8) and \ - int(first_inst.group(6)) + 4 == int(second_inst.group(6)))) and \ - (int(first_inst.group(4)) + 1 == int(second_inst.group(4)) or \ - (int(first_inst.group(4)) == 31 and \ - int(second_inst.group(4)) == 0)): - newline = first_line.replace(first_op, ppe_op) - elif ((second_inst.group(6).replace("@sda21","") + "+4" == \ - first_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ - ((second_inst.group(6).isdigit() and \ - not int(second_inst.group(6)) % 8) and \ - int(second_inst.group(6)) + 4 == int(first_inst.group(6)))) and \ - (int(second_inst.group(4)) + 1 == int(first_inst.group(4)) or \ - (int(second_inst.group(4)) == 31 and \ - int(first_inst.group(4)) == 0)): - newline = second_line.replace(second_op, ppe_op) - else: - return True,False - - # profile: space--, cycle--(same delay but 1 less from issue) - P2P_SPACE -= 1; P2P_CYCLE -= 1 - - # if enabled, put a mark in the output file - if P2P_COMMENT: - print "#P2P(%s):" % rule + first_line, - print "#P2P(%s):" % rule + second_line, - - print newline, - return True,True - else: - return True,False - - -# ----------------------------------------------------------------------------- -# p2p_onefile: -# process single PPC assembly file to convert it into PPE assembly file -# also filter out non-instruction lines before calling the subroutine -# -# Arguments: -# string: ppcFileName -# Return: -# boolean: done - True if file processing completed without error -# - False if file processing failed due to an error -# Variables: -# boolean: match, done -# string: ppeFileName, line, ppc_op, pre_op, pre_line, section, label -# integer: line_num, first_label_ln, second_label_ln, misalign -# Subroutine: -# p2p_combine -# p2p_replace -# ----------------------------------------------------------------------------- -def p2p_onefile(ppcFileName): - - global P2P_SPACE; P2P_SPACE = 0 # profile count - global P2P_CYCLE; P2P_CYCLE = 0 # profile count - - if P2P_VERBOSE : print "Translate PPC assembly: " + ppcFileName - - # new PPE assembly file is renamed as .s - ppeFileName = ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT) - os.rename(ppcFileName, ppeFileName) - - # initialize storage variables for previous line that needs to be remembered - pre_line = "" - pre_op = "" - - # use inline file editing, back up original PPC assembly file as .S - for line in fileinput.input(ppeFileName, inplace=1, backup='.405'): - - # in case of "mtmsr 0; isync" - line = line.replace('isync','nop') - - # skip blank line - if not line.strip(): - if pre_line: - print pre_line, - pre_line = "" - print line, - continue - - # skip comments line - if re.search("^[\s]*(//|#)", line): - if pre_line: - print pre_line, - pre_line = "" - print line, - continue - - # skip .section code except .p2align and label: - section = re.search("^[\s]*(\.[0-9a-zA-Z_]+)", line) - if section is not None and ':' not in line and \ - section.group(1) != '.p2align': - if pre_line: - print pre_line, - pre_line = "" - print line, - continue - - # apply specical 'l' rule in CombineRules for '.p2align' before a 'label:' - label = re.search("^[\s]*[\.0-9a-zA-Z_]+[:]+", line) - if label is not None: - if pre_line and pre_op == '.p2align': - second_label_ln = fileinput.lineno() - misalign = 8 - (second_label_ln - first_label_ln - 2) % 8 - if misalign in [3,4,5,6,7]: - # profile: same space, but save cycles, branch penalty is 2 - P2P_CYCLE -= misalign - 2 - if P2P_COMMENT: print "#P2P(l):" - print '\tb ' + label.group(0).split(':')[0] - print pre_line, - pre_line = "" - first_label_ln = fileinput.lineno() - if pre_line: - print pre_line, - pre_line = "" - print line, - continue - - # extract opcode field from line - ppc_op = line.split()[0] - done,match = False,False - - # detect the 2nd possible combinable instruction - if pre_line and P2P_COMBINE: - # ignore +/- signs for branch prediction - if '+' in ppc_op or '-' in ppc_op: - ppc_op = ppc_op[:-1] - if 'cmp' in pre_op and ppc_op in FuseBranches or \ - 'cmp' not in pre_op and ppc_op == pre_op: - done,match = p2p_combine(pre_line, line, pre_op, ppc_op) - if not match: - print pre_line, - else: - print pre_line, - done,match = True,False - pre_line = "" - - # detect the 1st possible combinable instruction - if not pre_line and not match and P2P_COMBINE: - if ppc_op in CombineRules.keys(): - pre_op = ppc_op - pre_line = line - done,match = True,True - else: - done,match = True,False - - # defect possible replacable instruction - if not match: - if ppc_op in ReplaceRules.keys() and P2P_REPLACE: - done = p2p_replace(line, ppc_op) - else: - print line, - done = True - - # if instruction process is not done due to error - if not done: - line_num = fileinput.lineno() - break - - # close the output file and restore the original input file - fileinput.close() - os.rename(ppeFileName+'.405', ppcFileName) - - # in case last line of the file qualified to be a pre_line and was not printed - if pre_line: - f = open(ppeFileName, 'a') - f.write(pre_line) - f.close() - - # print error debug message - if not done: - print "Error: target instruction detected at line [%d]:" % line_num - print " " + line - print " but fail to recognize instruction format." - # terminate Makefile or execution if an error is detected - sys.exit(1) - - if P2P_COMMENT and P2P_PROFILE: - f = open(ppeFileName, 'a') - f.write("#P2P: space(%d) cycle(%d)" % (P2P_SPACE,P2P_CYCLE)) - f.close() - - if P2P_VERBOSE: - print "Generated PPE assembly: " + ppeFileName - if P2P_PROFILE: - print "Optimization Profiling: " + str(P2P_SPACE*4) + " bytes, " +\ - str(P2P_CYCLE) + " cycles." - - -# ----------------------------------------------------------------------------- -# p2p_profile -# profiling how much performance and code size are saved by optimization -# -# Arguments: -# string: ppcFileName -# Return: -# list: [space, cycle] -# Variables: -# string: line, profile -# Subroutine: -# None -# ----------------------------------------------------------------------------- -def p2p_profile(ppcFileName): - f = open(ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT), 'r') - for line in f: - pass - f.close() - profile = re.search(r"^\#P2P: space\(([0-9\-]+)\) cycle\(([0-9\-]+)\)", line) - if profile is not None: - return [int(profile.group(1)), int(profile.group(2))] - else: - return [0,0] - -# ----------------------------------------------------------------------------- -# p2p_main: -# main of this script -# print usage info -# parse options and arguments -# process one file or a directory of files -# ----------------------------------------------------------------------------- -def p2p_main(): - - # command-line option parsing - from optparse import OptionParser - usage = "usage: %prog [options]" - version= "%prog v." + P2P_VERSION - parser = OptionParser(usage=usage, version=version) - parser.add_option("-d", "--directory", metavar="PATH", dest="ppcPath", - help="process all files in a directory given by PATH") - parser.add_option("-f", "--filename", metavar="FILE", dest="ppcFile", - help="process single file(with path in the filename)") - parser.add_option("-p", "--parallel", - action="store_true", dest="parallel", default=False, - help="processing all files in parallel processes") - parser.add_option("-s", "--statistics", - action="store_true", dest="profile", default=False, - help="optimization profiling, require comment in outputs") - parser.add_option("-c", "--combine-only", - action="store_false", dest="replace", default=True, - help="enable only combine function by disabling replace") - parser.add_option("-r", "--replace-only", - action="store_false", dest="combine", default=True, - help="enable only replace function by disabling combine") - parser.add_option("-b", "--compare branch disable", - action="store_false", dest="compare_branch", default=True, - help="only disabling fused compare branch function") - parser.add_option("-v", "--virtual double disable", - action="store_false", dest="virtual_double", default=True, - help="only disabling fused virtual double function") - parser.add_option("-e", "--eabi", - action="store_true", dest="vdw_sda", default=False, - help="enable virtual double word fusion targeting sda") - parser.add_option("-n", "--no-comment", - action="store_false", dest="comment", default=True, - help="don't leave comment mark in output file") - parser.add_option("-q", "--quiet", - action="store_false", dest="verbose", default=True, - help="don't print status messages to stdout") - (options, args) = parser.parse_args() - # global program output verbose switch - global P2P_VERBOSE; P2P_VERBOSE = options.verbose - # leave a comment mark in output files - global P2P_COMMENT; P2P_COMMENT = options.comment - # space/performance profiling function - global P2P_PROFILE; P2P_PROFILE = options.profile - # enable instruction replace functions - global P2P_REPLACE; P2P_REPLACE = options.replace - # enable instruction combine functions - global P2P_COMBINE; P2P_COMBINE = options.combine - # enable virtual double word fusion targeting sda - global P2P_VDW_SDA; P2P_VDW_SDA = options.vdw_sda - # enable only fused compare and branch function - global P2P_COMPARE_BRANCH; P2P_COMPARE_BRANCH = options.compare_branch - # enable only combined virtual double function - global P2P_VIRTUAL_DOUBLE; P2P_VIRTUAL_DOUBLE = options.virtual_double - - if P2P_VERBOSE : - print "PPC405 Assembly to PPE42 Assembly Post-Compiler Proceesor (P2P)" - print "Version: " + P2P_VERSION - - # single file processing - if options.ppcFile: - - if P2P_VERBOSE : - print "Processing signle file: " + options.ppcFile - - p2p_onefile(options.ppcFile) - - # multiple files processing - if options.ppcPath: - - if P2P_VERBOSE : - print "Accessing all files at: " + options.ppcPath - print "*Parallel Process Mode: " + ("Off", "On")[options.parallel] - - if options.profile: - bytes = 0; cycles = 0 - - fileList = [] - for root, subdirs, files in os.walk(options.ppcPath): - for file in fnmatch.filter(files, '*'+P2P_PPC_EXT): - if options.parallel : - fileList.append(os.path.join(root, file)) - else: - if options.profile: - space,cycle = p2p_profile(os.path.join(root, file)) - bytes += space*4 - cycles += cycle - else: - p2p_onefile(os.path.join(root, file)) - - if options.profile: - print "Optimization Profiling: " + str(bytes) + " bytes, " +\ - str(cycles) + " cycles." - - # parallel processing mode - if options.parallel: - from multiprocessing import Pool - p = Pool() - p.map(p2p_onefile, fileList) - p.close() - p.join() - - if P2P_VERBOSE : print "Done" - - -# ----------------------------------------------------------------------------- -# python main -if __name__ == '__main__': - p2p_main() - diff --git a/tools/PowerPCtoPPE/ppe42_divw.S b/tools/PowerPCtoPPE/ppe42_divw.S deleted file mode 100644 index 563a8d48..00000000 --- a/tools/PowerPCtoPPE/ppe42_divw.S +++ /dev/null @@ -1,208 +0,0 @@ -/// \file ppe42_divw.S -/// \brief PPC405 word division instructions implemented by PPE ISA -/// -/// This file includes implementation for the following PPC405 instructions -/// divw RT, RA, RB -/// -/// Note: PPE ISA specific "fused compare and branch" instructions are used -/// -/// Revision History: -/// 09-22-2014: Initial Version by daviddu -/// - - .file "ppe42_divw.S" - .section ".text" - - /* - ** Code comment notation: - ** - ** msw = most-significant (high-order) word, i.e. bits 0..31 - ** lsw = least-significant (low-order) word, i.e. bits 32..63 - ** msh = most-significant (high-order) halfword, i.e. bits 0..15 - ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 - ** - ** LZ = Leading Zeroes - ** SD = Significant Digits - ** OW = Register is overwritten, previous value is lost, - ** correct if previous value is no longer needed. - ** FU = Register is not overwritten, but its value is no longer needed, - ** in another word, the register is "free for use". - ** - ** PPE GPR Registers are: R0-R10, R13, R28-R31 - ** Volatile Registers are: R0, R3-R10 - ** Non-volatile registers are R28-R31 - */ - - /* - ** Caling Convention - ** - ** R2 and R13 are never saved or restored. In ABI or EABI application - ** these registers are constant. The other touched volatile registers - ** will be saved and restored by the subroutines. Note the caller - ** wont be saving those registers because these subroutines will be - ** instrumented into caller's body without compiler knowledge. - ** - ** Note R3 is not saved and restored because it will be changed for - ** return value anyways, the p2p script will make sure to restore it. - ** Also CR is hanlded because of compare and branch, but XER/CTR/LR - ** are not hanlded because they are untouched by the instructions used. - ** - ** Stack layout: - ** - ** 0x00 -- R1, Dedicated for Stack Pointer - ** 0x04 -- slot reserved for LR - ** 0x08 -- R4, Volatile, Private - ** 0x0c -- R5, Volatile, Private - ** 0x10 -- R6, Volatile, Private - ** 0x14 -- R7, Volatile, Private - ** 0x18 -- R8, Volatile, Private - ** 0x1c -- R9, Volatile, Private - ** 0x20 -- CR, Condition Register - ** 0x24 -- - ** - ** 0x28 -- Stack Size, Must be 8-byte aligned - */ - - /* - ** Division Procedures: - ** - ** __ppe42_divwu(dividend, divisor) - ** __ppe42_divw(dividend, divisor) - ** - ** R3 = Input parameter, dividend. then Return value, quotient. - ** R4 = Input parameter, divisor. - ** R5 = Output parameter, quotient. - ** R6 = Output parameter, remainder. - ** R7 = Temporary register, counter. - ** - ** General Algorithm - ** - ** Using standard shift and subtract method to emulate - ** Note: dividend,divisor,quotient,remainder are all 32-bit integers - ** - ** Precondition Check: - ** - ** if (divisor == dividend) { - ** quotient = 1; - ** remainder = 0; - ** } - ** - ** if (divisor == 0) { - ** quotient = 0; - ** remainder = 0; - ** } - ** - ** if (divisor > dividend) { - ** quotient = 0; - ** remainder = dividend; - ** } - */ - -/*****************************************************************************/ - - /* - ** Divide Word Signed (__ppe42_divw) - ** - ** Using Divide Word Unsigned(divwu) to emulate - ** - ** dd = absolute(dividend); - ** dr = absolute(divisor); - ** [q,r] = __ppe42_divwu(dd, dr); - ** - ** quotient = q; - ** if (dividend < 0) { - ** remainder = -r; - ** if (divisor > 0) - ** quotient = -q; - ** } - ** else { - ** remainder = r; - ** if (divisor < 0) - ** quotient = -q; - ** } - */ - - .align 2 - .global __ppe42_divw - .type __ppe42_divw, @function - -__ppe42_divw: - - stwu %r1, -0x28(%r1) // allocate stack frame - - stvd %d4, 0x08(%r1) // save off r4 & r5 in stack - stvd %d6, 0x10(%r1) // save off r6 & r7 in stack - stvd %d8, 0x18(%r1) // save off r8 & r9 in stack - - mfcr %r5 // save off cr - stw %r5, 0x20(%r1) // store cr in stack - - li %r5, 1 // quotient = 1 - li %r6, 0 // remainder = 0 - cmplwbc 1, 2, %r3, %r4, __ppe42_divw_ret // ret(divisor == dividend) - - li %r5, 0 // quotient = 0 - li %r6, 0 // remainder = 0 - cmpwibc 1, 2, %r4, 0, __ppe42_divw_ret // ret(divisor == 0) - - cmpwibc 1, 1, %r3, 0, __ppe42_divw_csc // dividend(+) -> csc - neg %r3, %r3 // absolute(dividend) - li %r5, 1 // note dividend < 0 - -__ppe42_divw_csc: // <> - - cmpwibc 1, 1, %r4, 0, __ppe42_divw_uns // divisor(+) -> uns - neg %r4, %r4 // absolute(divisor) - li %r6, 1 // note divisor < 0 - -__ppe42_divw_uns: // <> - - mr %r8, %r5 // remember if dividend > 0 - xor %r9, %r5, %r6 // remember sign difference - - li %r5, 0 // quotient = 0 - mr %r6, %r3 // remainder = dividend - cmplwbc 1, 0, %r3, %r4, __ppe42_divw_sign // ret(divisor > dividend) - - li %r7, 32 // num_of_bits = 32 - -__ppe42_divw_sas: // <> - - slwi %r6, %r6, 1 // remainder <<= 1 - inslwi %r6, %r3, 1, 31 // remainder[31] = dividend[0] - slwi %r3, %r3, 1 // dividend <<= 1 - slwi %r5, %r5, 1 // quotient <<= 1 - subi %r7, %r7, 1 // num_of_bits-- - cmplwbc 1, 0, %r6, %r4, __ppe42_divw_sas // continue(remainder> - - cmpwibc 1, 2, %r9, 0, __ppe42_divw_csh // if same sign, r5 stays + - neg %r5, %r5 // otherwise, neg(r5) - -__ppe42_divw_csh: // <> - - cmpwibc 1, 2, %r8, 0, __ppe42_divw_ret // if dividend>0, r6 stays + - neg %r6, %r6 // otherwise, neg(r6) - -__ppe42_divw_ret: // <> - - mr %r3, %r5 // r3 is the default return - - lwz %r5, 0x20(%r1) // load cr from stack - mtcr0 %r5 // restore cr - - lvd %d4, 0x08(%r1) // restore r4 & r5 from stack - lvd %d6, 0x10(%r1) // restore r6 & r7 from stack - lvd %d8, 0x18(%r1) // restore r8 & r9 from stack - - lwz %r1, 0(%r1) // restore stack pointer - - blr // branch back - - .size __ppe42_divw, .-__ppe42_divw - diff --git a/tools/PowerPCtoPPE/ppe42_divwu.S b/tools/PowerPCtoPPE/ppe42_divwu.S deleted file mode 100644 index a208df65..00000000 --- a/tools/PowerPCtoPPE/ppe42_divwu.S +++ /dev/null @@ -1,184 +0,0 @@ -/// \file ppe42_divwu.S -/// \brief PPC405 word division instructions implemented by PPE ISA -/// -/// This file includes implementation for the following PPC405 instructions -/// divwu RT, RA, RB -/// -/// Note: PPE ISA specific "fused compare and branch" instructions are used -/// -/// Revision History: -/// 09-22-2014: Initial Version by daviddu -/// - - .file "ppe42_divwu.S" - .section ".text" - - /* - ** Code comment notation: - ** - ** msw = most-significant (high-order) word, i.e. bits 0..31 - ** lsw = least-significant (low-order) word, i.e. bits 32..63 - ** msh = most-significant (high-order) halfword, i.e. bits 0..15 - ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 - ** - ** LZ = Leading Zeroes - ** SD = Significant Digits - ** OW = Register is overwritten, previous value is lost, - ** correct if previous value is no longer needed. - ** FU = Register is not overwritten, but its value is no longer needed, - ** in another word, the register is "free for use". - ** - ** PPE GPR Registers are: R0-R10, R13, R28-R31 - ** Volatile Registers are: R0, R3-R10 - ** Non-volatile registers are R28-R31 - */ - - /* - ** Caling Convention - ** - ** R2 and R13 are never saved or restored. In ABI or EABI application - ** these registers are constant. The other touched volatile registers - ** will be saved and restored by the subroutines. Note the caller - ** wont be saving those registers because these subroutines will be - ** instrumented into caller's body without compiler knowledge. - ** - ** Note R3 is not saved and restored because it will be changed for - ** return value anyways, the p2p script will make sure to restore it. - ** Also CR is hanlded because of compare and branch, but XER/CTR/LR - ** are not hanlded because they are untouched by the instructions used. - ** - ** Stack layout: - ** - ** 0x00 -- R1, Dedicated for Stack Pointer - ** 0x04 -- slot reserved for LR - ** 0x08 -- R4, Volatile, Private - ** 0x0c -- R5, Volatile, Private - ** 0x10 -- R6, Volatile, Private - ** 0x14 -- R7, Volatile, Private - ** 0x18 -- R8, Volatile, Private - ** 0x1c -- R9, Volatile, Private - ** 0x20 -- CR, Condition Register - ** 0x24 -- - ** - ** 0x28 -- Stack Size, Must be 8-byte aligned - */ - - /* - ** Division Procedures: - ** - ** __ppe42_divwu(dividend, divisor) - ** __ppe42_divw(dividend, divisor) - ** - ** R3 = Input parameter, dividend. then Return value, quotient. - ** R4 = Input parameter, divisor. - ** R5 = Output parameter, quotient. - ** R6 = Output parameter, remainder. - ** R7 = Temporary register, counter. - ** - ** General Algorithm - ** - ** Using standard shift and subtract method to emulate - ** Note: dividend,divisor,quotient,remainder are all 32-bit integers - ** - ** Precondition Check: - ** - ** if (divisor == dividend) { - ** quotient = 1; - ** remainder = 0; - ** } - ** - ** if (divisor == 0) { - ** quotient = 0; - ** remainder = 0; - ** } - ** - ** if (divisor > dividend) { - ** quotient = 0; - ** remainder = dividend; - ** } - */ - -/*****************************************************************************/ - - /* - ** Divide Word Unsigned (__ppe42_divwu) - ** - ** The implementation uses standard shift and subtract approach. - ** The following is an example in C. Note the implementation doesnt - ** exactly follow the C example. - ** - ** num_of_bits = 32; - ** while(num_bits) { - ** dbit = (dividend & 0x80000000) >> 31; - ** remainder = (remainder << 1) | dbit; - ** dividend = dividend << 1; - ** quotient = quotient << 1; - ** num_of_bits--; - ** if(remainder < divisor) - ** continue; - ** temp = remainder - divisor; - ** qbit = !((temp & 0x80000000) >> 31); - ** quotient = quotient | qbit; - ** remainder = temp; - ** } - */ - - .align 2 - .global __ppe42_divwu - .type __ppe42_divwu, @function - -__ppe42_divwu: - - stwu %r1, -0x28(%r1) // allocate stack frame - - stvd %d4, 0x08(%r1) // save off r4 & r5 in stack - stvd %d6, 0x10(%r1) // save off r6 & r7 in stack - stvd %d8, 0x18(%r1) // save off r8 & r9 in stack - - mfcr %r5 // save off cr - stw %r5, 0x20(%r1) // store cr in stack - - li %r5, 1 // quotient = 1 - li %r6, 0 // remainder = 0 - cmplwbc 1, 2, %r3, %r4, __ppe42_divwu_ret // ret(divisor == dividend) - - li %r5, 0 // quotient = 0 - li %r6, 0 // remainder = 0 - cmpwibc 1, 2, %r4, 0, __ppe42_divwu_ret // ret(divisor == 0) - - li %r5, 0 // quotient = 0 - mr %r6, %r3 // remainder = dividend - cmplwbc 1, 0, %r3, %r4, __ppe42_divwu_ret // ret(divisor > dividend) - - li %r7, 32 // num_of_bits = 32 - -__ppe42_divwu_sas: // <> - - slwi %r6, %r6, 1 // remainder <<= 1 - inslwi %r6, %r3, 1, 31 // remainder[31] = dividend[0] - slwi %r3, %r3, 1 // dividend <<= 1 - slwi %r5, %r5, 1 // quotient <<= 1 - subi %r7, %r7, 1 // num_of_bits-- - cmplwbc 1, 0, %r6, %r4, __ppe42_divwu_sas // continue(remainder> - - mr %r3, %r5 // r3 is the default return - lwz %r5, 0x20(%r1) // load cr from stack - mtcr0 %r5 // restore cr - - lvd %d4, 0x08(%r1) // restore r4 & r5 from stack - lvd %d6, 0x10(%r1) // restore r6 & r7 from stack - lvd %d8, 0x18(%r1) // restore r8 & r9 from stack - - lwz %r1, 0(%r1) // restore stack pointer - - blr // branch back - - .size __ppe42_divwu, .-__ppe42_divwu - - diff --git a/tools/PowerPCtoPPE/ppe42_mulhw.S b/tools/PowerPCtoPPE/ppe42_mulhw.S deleted file mode 100644 index d229121b..00000000 --- a/tools/PowerPCtoPPE/ppe42_mulhw.S +++ /dev/null @@ -1,193 +0,0 @@ -/// \file ppe42_mulhw.S -/// \brief PPC405 word multiplication instructions implemented by PPE ISA -/// -/// This file includes implementation for the following PPC405 instructions -/// mulhw -/// -/// Note: PPE ISA specific "fused compare and branch" instructions are used -/// -/// Revision History: -/// 09-15-2014: Initial Version by daviddu -/// - - .file "ppe42_mulhw.S" - .section ".text" - - /* - ** Code comment notation: - ** - ** msw = most-significant (high-order) word, i.e. bits 0..31 - ** lsw = least-significant (low-order) word, i.e. bits 32..63 - ** msh = most-significant (high-order) halfword, i.e. bits 0..15 - ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 - ** LZ = Leading Zeroes - ** SD = Significant Digits - ** OW = Register is overwritten, previous value is lost, - ** correct if previous value is no longer needed. - ** FU = Register is not overwritten, but its value is no longer needed, - ** in another word, the register is "free for use". - ** - ** PPE GPR Registers are: R0-R10, R13, R28-R31 - ** Volatile Registers are: R0, R3-R10 - ** Non-volatile registers are R28-R31 - */ - - /* - ** Caling Convention - ** - ** R2 and R13 are never saved or restored. In ABI or EABI application - ** these registers are constant. The other touched volatile registers - ** will be saved and restored by the subroutines. Note the caller - ** wont be saving those registers because these subroutines will be - ** instrumented into caller's body without compiler knowledge. - ** - ** Note R3 is not saved and restored because it will be changed for - ** return value anyways, the p2p script will make sure to restore it. - ** Also CR is hanlded because of compare and branch, but XER/CTR/LR - ** are not hanlded because they are untouched by the instructions used. - ** - ** Stack layout: - ** - ** 0x00 -- R1, Dedicated for Stack Pointer - ** 0x04 -- slot reserved for LR - ** 0x08 -- R4, Volatile, Private - ** 0x0c -- R5, Volatile, Private - ** 0x10 -- R6, Volatile, Private - ** 0x14 -- R7, Volatile, Private - ** 0x18 -- R8, Volatile, Private - ** 0x1c -- R9, Volatile, Private - ** 0x20 -- CR, Condition Register - ** 0x24 -- - ** - ** 0x28 -- Stack Size, Must be 8-byte aligned - */ - - /* - ** Multiplication Procedures: - ** - ** __ppe42_mulhwu(U,V) - ** __ppe42_mulhw(U,V) - ** __ppe42_mullw(U,V) - ** - ** R3:R4 = Input parameter, multipliers: U, V. - ** R3 = Output parameter, either product.msh or product.lsh. - ** R5-R9 = Temporary registers - ** - ** General Algorithm - ** - ** Using PPC405 ISA instruction 'mullhw' to emulate - ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh) - ** - ** U.msh U.lsh - ** X V.msh V.lsh - ** ------------------------ - ** A.msh A.lsh - ** B.msh B.lsh - ** C.msh C.lsh - ** D.msh D.lsh - ** ------------------------ - ** Product.msw Product.lsw - ** - ** __ppe42_mulhwu: Return Product.msh (unsigned) - ** __ppe42_mulhw: Return Product.msh (signed) - ** __ppe42_mullw: Return Product.lsh - ** - ** Precondition Check: - ** - ** if( U == 0 || V == 0 ) return P=0; - */ - -/*****************************************************************************/ - - /* - ** Multiply High Word Signed (__ppe42_mulhw) - ** - ** Using Multiply High Word Unsigned(mulhwu) to emulate - ** - ** u = absolute(U); - ** v = absolute(V); - ** p = __ppe42_mulhwu(u, v); - ** if( U[0] xor V[0] ) - ** p = -p - */ - - .align 2 - .global __ppe42_mulhw - .type __ppe42_mulhw, @function - -__ppe42_mulhw: - - stwu %r1, -0x28(%r1) // allocate stack frame - - stvd %d4, 0x08(%r1) // save off r4 & r5 in stack - stvd %d6, 0x10(%r1) // save off r6 & r7 in stack - stvd %d8, 0x18(%r1) // save off r8 & r9 in stack - - mfcr %r5 // save off cr - stw %r5, 0x20(%r1) // store cr in stack - - li %r5, 0 // r5 = 0 - cmpwibc 1, 2, %r3, 0, __ppe42_mulhw_ret // U=0 -> ret - cmpwibc 1, 2, %r4, 0, __ppe42_mulhw_ret // V=0 -> ret - - cmpwibc 1, 1, %r3, 0, __ppe42_mulhw_csc // U>0 -> csc - neg %r3, %r3 // absolute(U) - li %r5, 1 // U<0 -> r5 = 1 - -__ppe42_mulhw_csc: // <> - - li %r6, 0 // V>0 -> r6 = 0 - cmpwibc 1, 1, %r4, 0, __ppe42_mulhw_uns // V>0 -> uns - neg %r4, %r4 // absolute(V) - li %r6, 1 // V<0 -> r6 = 1 - -__ppe42_mulhw_uns: // <> - - xor %r9, %r5, %r6 // remember sign difference - - extrwi %r5, %r3, 16, 16 - srwi %r3, %r3, 16 - extrwi %r6, %r4, 16, 16 - srwi %r4, %r4, 16 - - mullhwu %r7, %r5, %r6 - srwi %r7, %r7, 16 - - mullhwu %r6, %r3, %r6 - extrwi %r8, %r6, 16, 16 - srwi %r6, %r6, 16 - add %r7, %r8, %r7 - - mullhwu %r5, %r5, %r4 - extrwi %r8, %r5, 16, 16 - srwi %r5, %r5, 16 - add %r7, %r8, %r7 - - srwi %r7, %r7, 16 - add %r7, %r7, %r6 - add %r7, %r7, %r5 - - mullhwu %r3, %r3, %r4 - add %r5, %r3, %r7 - - cmpwibc 1, 2, %r9, 0, __ppe42_mulhw_ret // if same sign, r5 stays + - neg %r5, %r5 // otherwise, neg(r5) - -__ppe42_mulhw_ret: - - mr %r3, %r5 // put return value to r3 - - lwz %r5, 0x20(%r1) // load cr from stack - mtcr0 %r5 // restore cr - - lvd %d4, 0x08(%r1) // restore r4 & r5 from stack - lvd %d6, 0x10(%r1) // restore r6 & r7 from stack - lvd %d8, 0x18(%r1) // restore r8 & r9 from stack - - lwz %r1, 0(%r1) // restore stack pointer - - blr - - .size __ppe42_mulhw, .-__ppe42_mulhw - - diff --git a/tools/PowerPCtoPPE/ppe42_mulhwu.S b/tools/PowerPCtoPPE/ppe42_mulhwu.S deleted file mode 100644 index e92ee7ec..00000000 --- a/tools/PowerPCtoPPE/ppe42_mulhwu.S +++ /dev/null @@ -1,202 +0,0 @@ -/// \file ppe42_mulhwu.S -/// \brief PPC405 word multiplication instructions implemented by PPE ISA -/// -/// This file includes implementation for the following PPC405 instructions -/// mulhwu -/// -/// Note: PPE ISA specific "fused compare and branch" instructions are used -/// -/// Revision History: -/// 09-15-2014: Initial Version by daviddu -/// - - .file "ppe42_mulhwu.S" - .section ".text" - - /* - ** Code comment notation: - ** - ** msw = most-significant (high-order) word, i.e. bits 0..31 - ** lsw = least-significant (low-order) word, i.e. bits 32..63 - ** msh = most-significant (high-order) halfword, i.e. bits 0..15 - ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 - ** LZ = Leading Zeroes - ** SD = Significant Digits - ** OW = Register is overwritten, previous value is lost, - ** correct if previous value is no longer needed. - ** FU = Register is not overwritten, but its value is no longer needed, - ** in another word, the register is "free for use". - ** - ** PPE GPR Registers are: R0-R10, R13, R28-R31 - ** Volatile Registers are: R0, R3-R10 - ** Non-volatile registers are R28-R31 - */ - - /* - ** Caling Convention - ** - ** R2 and R13 are never saved or restored. In ABI or EABI application - ** these registers are constant. The other touched volatile registers - ** will be saved and restored by the subroutines. Note the caller - ** wont be saving those registers because these subroutines will be - ** instrumented into caller's body without compiler knowledge. - ** - ** Note R3 is not saved and restored because it will be changed for - ** return value anyways, the p2p script will make sure to restore it. - ** Also CR is hanlded because of compare and branch, but XER/CTR/LR - ** are not hanlded because they are untouched by the instructions used. - ** - ** Stack layout: - ** - ** 0x00 -- R1, Dedicated for Stack Pointer - ** 0x04 -- slot reserved for LR - ** 0x08 -- R4, Volatile, Private - ** 0x0c -- R5, Volatile, Private - ** 0x10 -- R6, Volatile, Private - ** 0x14 -- R7, Volatile, Private - ** 0x18 -- R8, Volatile, Private - ** 0x1c -- R9, Volatile, Private - ** 0x20 -- CR, Condition Register - ** 0x24 -- - ** - ** 0x28 -- Stack Size, Must be 8-byte aligned - */ - - /* - ** Multiplication Procedures: - ** - ** __ppe42_mulhwu(U,V) - ** __ppe42_mulhw(U,V) - ** __ppe42_mullw(U,V) - ** - ** R3:R4 = Input parameter, multipliers: U, V. - ** R3 = Output parameter, either product.msh or product.lsh. - ** R5-R9 = Temporary registers - ** - ** General Algorithm - ** - ** Using PPC405 ISA instruction 'mullhw' to emulate - ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh) - ** - ** U.msh U.lsh - ** X V.msh V.lsh - ** ------------------------ - ** A.msh A.lsh - ** B.msh B.lsh - ** C.msh C.lsh - ** D.msh D.lsh - ** ------------------------ - ** Product.msw Product.lsw - ** - ** __ppe42_mulhwu: Return Product.msh (unsigned) - ** __ppe42_mulhw: Return Product.msh (signed) - ** __ppe42_mullw: Return Product.lsh - ** - ** Precondition Check: - ** - ** if( U == 0 || V == 0 ) return P=0; - */ - -/*****************************************************************************/ - - /* - ** Multiply High Word Unsigned (__ppe42_mulhwu) - ** - ** r5 = U[16:31] or U.lsh | r5 = r3 & 0xffff - ** r3 = U[0:15] or U.msh | r3 = r3 >> 16 (r3 OW) - ** r6 = V[16:31] or V.lsh | r6 = r4 & 0xffff - ** r4 = V[0:15] or V.msh | r4 = r4 >> 16 (r4 OW) - ** - ** 4th column(drop A.lsh): - ** A = U.lsh * V.lsh [32] | r7 = r5 * r6 - ** A = A.msh [16] | r7 = r7 >> 16 (r7 OW) - ** - ** 3rd column(A = A.msh + B.lsh + C.lsh): - ** B = U.msh * U.lsh [32] | r6 = r3 * r6 (r6 OW) - ** T = B.lsh [16] | r8 = r6 & 0xffff - ** B = B.msh [16] | r6 = r6 >> 16 (r6 OW) - ** A = T + A [16] | r7 = r8 + r7 (r7 OW, r8 FU) - ** - ** C = U.lsh * V.msh [32] | r5 = r5 * r4 (r5 OW) - ** T = C.lsh [16] | r8 = r5 & 0xffff (r8 OW) - ** C = C.msh [16] | r5 = r5 >> 16 (r5 OW) - ** A = T + A [16] | r7 = r8 + r7 (r7 OW, r8 FU) - ** - ** 2nd column(A = 3rd_carry + B.msh + C.msh): - ** A = A.msh [16] | r7 = r7 >> 16 (r7 OW) - ** A = A + B [16] | r7 = r7 + r6 (r7 OW, r6 FU) - ** A = A + C [16] | r7 = r7 + r5 (r7 OW, r5 FU) - ** - ** 1st column(A = D + A): - ** D = U.msh * V.msh [32] | r3 = r3 * r4 (r3 OW, r4 FU) - ** P = D + A [32] | r5 = r3 + r7 (r3, r7 FU) - ** - ** Return P(r3=r5) as Product.msw unsigned - ** - ** Note: the implementation can be even shorter, the current - ** implementation is ensuring the overflow is avoided - ** by always adding 16 bits integer together. - */ - - .align 2 - .global __ppe42_mulhwu - .type __ppe42_mulhwu, @function - -__ppe42_mulhwu: - - stwu %r1, -0x28(%r1) // allocate stack frame - - stvd %d4, 0x08(%r1) // save off r4 & r5 in stack - stvd %d6, 0x10(%r1) // save off r6 & r7 in stack - stvd %d8, 0x18(%r1) // save off r8 & r9 in stack - - mfcr %r5 // save off cr - stw %r5, 0x20(%r1) // store cr in stack - - li %r5, 0 // r5 = 0 - cmpwibc 1, 2, %r3, 0, __ppe42_mulhwu_ret // U=0 -> ret - cmpwibc 1, 2, %r4, 0, __ppe42_mulhwu_ret // V=0 -> ret - - extrwi %r5, %r3, 16, 16 - srwi %r3, %r3, 16 - extrwi %r6, %r4, 16, 16 - srwi %r4, %r4, 16 - - mullhwu %r7, %r5, %r6 - srwi %r7, %r7, 16 - - mullhwu %r6, %r3, %r6 - extrwi %r8, %r6, 16, 16 - srwi %r6, %r6, 16 - add %r7, %r8, %r7 - - mullhwu %r5, %r5, %r4 - extrwi %r8, %r5, 16, 16 - srwi %r5, %r5, 16 - add %r7, %r8, %r7 - - srwi %r7, %r7, 16 - add %r7, %r7, %r6 - add %r7, %r7, %r5 - - mullhwu %r3, %r3, %r4 - add %r5, %r3, %r7 - -__ppe42_mulhwu_ret: - - mr %r3, %r5 // put return value to r3 - - lwz %r5, 0x20(%r1) // load cr from stack - mtcr0 %r5 // restore cr - - lvd %d4, 0x08(%r1) // restore r4 & r5 from stack - lvd %d6, 0x10(%r1) // restore r6 & r7 from stack - lvd %d8, 0x18(%r1) // restore r8 & r9 from stack - - lwz %r1, 0(%r1) // restore stack pointer - - blr - - .size __ppe42_mulhwu, .-__ppe42_mulhwu - - diff --git a/tools/PowerPCtoPPE/ppe42_mullw.S b/tools/PowerPCtoPPE/ppe42_mullw.S deleted file mode 100644 index ee56df16..00000000 --- a/tools/PowerPCtoPPE/ppe42_mullw.S +++ /dev/null @@ -1,174 +0,0 @@ -/// \file ppe42_mullw.S -/// \brief PPC405 word multiplication instructions implemented by PPE ISA -/// -/// This file includes implementation for the following PPC405 instructions -/// mullw -/// -/// Note: PPE ISA specific "fused compare and branch" instructions are used -/// -/// Revision History: -/// 09-15-2014: Initial Version by daviddu -/// - - .file "ppe42_mullw.S" - .section ".text" - - /* - ** Code comment notation: - ** - ** msw = most-significant (high-order) word, i.e. bits 0..31 - ** lsw = least-significant (low-order) word, i.e. bits 32..63 - ** msh = most-significant (high-order) halfword, i.e. bits 0..15 - ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 - ** LZ = Leading Zeroes - ** SD = Significant Digits - ** OW = Register is overwritten, previous value is lost, - ** correct if previous value is no longer needed. - ** FU = Register is not overwritten, but its value is no longer needed, - ** in another word, the register is "free for use". - ** - ** PPE GPR Registers are: R0-R10, R13, R28-R31 - ** Volatile Registers are: R0, R3-R10 - ** Non-volatile registers are R28-R31 - */ - - /* - ** Caling Convention - ** - ** R2 and R13 are never saved or restored. In ABI or EABI application - ** these registers are constant. The other touched volatile registers - ** will be saved and restored by the subroutines. Note the caller - ** wont be saving those registers because these subroutines will be - ** instrumented into caller's body without compiler knowledge. - ** - ** Note R3 is not saved and restored because it will be changed for - ** return value anyways, the p2p script will make sure to restore it. - ** Also CR is hanlded because of compare and branch, but XER/CTR/LR - ** are not hanlded because they are untouched by the instructions used. - ** - ** Stack layout: - ** - ** 0x00 -- R1, Dedicated for Stack Pointer - ** 0x04 -- slot reserved for LR - ** 0x08 -- R4, Volatile, Private - ** 0x0c -- R5, Volatile, Private - ** 0x10 -- R6, Volatile, Private - ** 0x14 -- R7, Volatile, Private - ** 0x18 -- R8, Volatile, Private - ** 0x1c -- R9, Volatile, Private - ** 0x20 -- CR, Condition Register - ** 0x24 -- - ** - ** 0x28 -- Stack Size, Must be 8-byte aligned - */ - - /* - ** Multiplication Procedures: - ** - ** __ppe42_mulhwu(U,V) - ** __ppe42_mulhw(U,V) - ** __ppe42_mullw(U,V) - ** - ** R3:R4 = Input parameter, multipliers: U, V. - ** R3 = Output parameter, either product.msh or product.lsh. - ** R5-R9 = Temporary registers - ** - ** General Algorithm - ** - ** Using PPC405 ISA instruction 'mullhw' to emulate - ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh) - ** - ** U.msh U.lsh - ** X V.msh V.lsh - ** ------------------------ - ** A.msh A.lsh - ** B.msh B.lsh - ** C.msh C.lsh - ** D.msh D.lsh - ** ------------------------ - ** Product.msw Product.lsw - ** - ** __ppe42_mulhwu: Return Product.msh (unsigned) - ** __ppe42_mulhw: Return Product.msh (signed) - ** __ppe42_mullw: Return Product.lsh - ** - ** Precondition Check: - ** - ** if( U == 0 || V == 0 ) return P=0; - */ - -/*****************************************************************************/ - - /* - ** Multiply Low Word (__ppe42_mullw) - ** - ** r5 = U[16:31] or U.lsh | r5 = r3 & 0xffff - ** r3 = U[0:15] or U.msh | r3 = r3 >> 16 (r3 OW) - ** r6 = V[16:31] or V.lsh | r6 = r4 & 0xffff - ** r4 = V[0:15] or V.msh | r4 = r4 >> 16 (r4 OW) - ** - ** B = U.msh * V.lsh | r3 = r3 * r6 (r3 OW) - ** B = B << 16 | r3 = r3 << 16 - ** C = U.lsh * V.msh | r4 = r5 * r4 (r4 OW) - ** C = C << 16 | r4 = r4 << 16 - ** A = U.lsh * V.lsh | r5 = r5 * r6 (r5 OW, r6 FU) - ** A = A + B | r5 = r5 + r3 (r3 FU) - ** P = A + C | r5 = r5 + r4 (r4 FU) - ** - ** Return P(r3=r5) as Product.lsw - ** - ** Note: there is no overflow case with this function - */ - - .align 2 - .global __ppe42_mullw - .type __ppe42_mullw, @function - -__ppe42_mullw: - - stwu %r1, -0x28(%r1) // allocate stack frame - - stvd %d4, 0x08(%r1) // save off r4 & r5 in stack - stvd %d6, 0x10(%r1) // save off r6 & r7 in stack - stvd %d8, 0x18(%r1) // save off r8 & r9 in stack - - mfcr %r5 // save off cr - stw %r5, 0x20(%r1) // store cr in stack - - li %r5, 0 // r5 = 0 - cmpwibc 1, 2, %r3, 0, __ppe42_mullw_ret // U=0 -> ret - cmpwibc 1, 2, %r4, 0, __ppe42_mullw_ret // V=0 -> ret - - extrwi %r5, %r3, 16, 16 - srwi %r3, %r3, 16 - extrwi %r6, %r4, 16, 16 - srwi %r4, %r4, 16 - - mullhwu %r3, %r3, %r6 - slwi %r3, %r3, 16 - - mullhwu %r4, %r5, %r4 - slwi %r4, %r4, 16 - - mullhwu %r5, %r5, %r6 - add %r5, %r5, %r3 - add %r5, %r5, %r4 - -__ppe42_mullw_ret: - - mr %r3, %r5 // put return value to r3 - - lwz %r5, 0x20(%r1) // load cr from stack - mtcr0 %r5 // restore cr - - lvd %d4, 0x08(%r1) // restore r4 & r5 from stack - lvd %d6, 0x10(%r1) // restore r6 & r7 from stack - lvd %d8, 0x18(%r1) // restore r8 & r9 from stack - - lwz %r1, 0(%r1) // restore stack pointer - - blr - - .size __ppe42_mullw, .-__ppe42_mullw - - diff --git a/tools/ppetracepp/Makefile b/tools/ppetracepp/Makefile index 41ad1cd2..abb4ce92 100644 --- a/tools/ppetracepp/Makefile +++ b/tools/ppetracepp/Makefile @@ -1,12 +1,38 @@ +# IBM_PROLOG_BEGIN_TAG +# This is an automatically generated prolog. +# +# $Source: tools/ppetracepp/Makefile $ +# +# OpenPOWER sbe Project +# +# Contributors Listed Below - COPYRIGHT 2015,2016 +# [+] International Business Machines Corp. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# IBM_PROLOG_END_TAG +include img_defs.mk + all: ppetracepp ppe2fsp ppetracepp: ppetracepp.C - g++ -m32 -O3 -w -g -I./ ppetracepp.C -o ppetracepp + g++ -m32 -O3 -w -g -I./ ppetracepp.C -o $(PPETRACEPP_BIN_DIR)/ppetracepp # g++ -O3 -w -x c++ -fPIC -g -I./ ppetracepp.C -o ppetracepp ppe2fsp: ppe2fsp.c ppe2fsp_cmd.c - gcc -m32 -w -g -I./ -I../../pk/trace ppe2fsp.c ppe2fsp_cmd.c -o ppe2fsp + gcc -m32 -w -g -I./ -I../../pk/trace ppe2fsp.c ppe2fsp_cmd.c -o $(PPETRACEPP_BIN_DIR)/ppe2fsp clean: - rm ppetracepp ppe2fsp + rm $(PPETRACEPP_BIN_DIR)/ppetracepp $(PPETRACEPP_BIN_DIR)/ppe2fsp diff --git a/tools/ppetracepp/ppe2fsp b/tools/ppetracepp/ppe2fsp deleted file mode 100755 index ea96923f..00000000 Binary files a/tools/ppetracepp/ppe2fsp and /dev/null differ diff --git a/tools/ppetracepp/ppetracepp b/tools/ppetracepp/ppetracepp deleted file mode 100755 index 61b5f04b..00000000 Binary files a/tools/ppetracepp/ppetracepp and /dev/null differ diff --git a/tools/ppetracepp/ppetracepp.C b/tools/ppetracepp/ppetracepp.C index 5c4b069f..5cd7a2ff 100755 --- a/tools/ppetracepp/ppetracepp.C +++ b/tools/ppetracepp/ppetracepp.C @@ -1,3 +1,27 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: tools/ppetracepp/ppetracepp.C $ */ +/* */ +/* OpenPOWER sbe Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015,2016 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ /* # *** ppetracepp - a fsp/common Linux trace pre processor @@ -48,7 +72,7 @@ #include #include #include - +#include #include typedef u_int32_t u32 ; typedef u_int8_t u8 ; -- cgit v1.2.1