From a28f852be2197680c6864a8b66b8cb0743893471 Mon Sep 17 00:00:00 2001 From: Glenn Miles Date: Mon, 23 Feb 2015 14:34:25 -0600 Subject: Added ppe tools directory with contents Change-Id: I9da1e93f2624a8da121548b7af67002a98d61fe2 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/15907 Reviewed-by: Glenn R. Miles Reviewed-by: David Young Tested-by: David Young --- tools/PowerPCtoPPE/Makefile | 23 + tools/PowerPCtoPPE/p2p-test-gen.py | 147 ++++++ tools/PowerPCtoPPE/p2pfiles.mk | 14 + tools/PowerPCtoPPE/ppc-ppe-pcp.py | 980 +++++++++++++++++++++++++++++++++++++ tools/PowerPCtoPPE/ppe42_divw.S | 208 ++++++++ tools/PowerPCtoPPE/ppe42_divwu.S | 184 +++++++ tools/PowerPCtoPPE/ppe42_mulhw.S | 193 ++++++++ tools/PowerPCtoPPE/ppe42_mulhwu.S | 202 ++++++++ tools/PowerPCtoPPE/ppe42_mullw.S | 174 +++++++ tools/ppetracepp/Makefile | 12 + tools/ppetracepp/jhash.h | 143 ++++++ tools/ppetracepp/ppe2fsp | Bin 0 -> 18634 bytes tools/ppetracepp/ppe2fsp.c | 500 +++++++++++++++++++ tools/ppetracepp/ppe2fsp.h | 10 + tools/ppetracepp/ppe2fsp_cmd.c | 115 +++++ tools/ppetracepp/ppetracepp | Bin 0 -> 230943 bytes tools/ppetracepp/ppetracepp.C | 922 ++++++++++++++++++++++++++++++++++ tools/ppetracepp/trac_interface.h | 363 ++++++++++++++ tools/ppetracepp/tracehash.pl | 873 +++++++++++++++++++++++++++++++++ 19 files changed, 5063 insertions(+) create mode 100644 tools/PowerPCtoPPE/Makefile create mode 100755 tools/PowerPCtoPPE/p2p-test-gen.py create mode 100644 tools/PowerPCtoPPE/p2pfiles.mk create mode 100755 tools/PowerPCtoPPE/ppc-ppe-pcp.py create mode 100644 tools/PowerPCtoPPE/ppe42_divw.S create mode 100644 tools/PowerPCtoPPE/ppe42_divwu.S create mode 100644 tools/PowerPCtoPPE/ppe42_mulhw.S create mode 100644 tools/PowerPCtoPPE/ppe42_mulhwu.S create mode 100644 tools/PowerPCtoPPE/ppe42_mullw.S create mode 100644 tools/ppetracepp/Makefile create mode 100755 tools/ppetracepp/jhash.h create mode 100755 tools/ppetracepp/ppe2fsp create mode 100755 tools/ppetracepp/ppe2fsp.c create mode 100644 tools/ppetracepp/ppe2fsp.h create mode 100644 tools/ppetracepp/ppe2fsp_cmd.c create mode 100755 tools/ppetracepp/ppetracepp create mode 100755 tools/ppetracepp/ppetracepp.C create mode 100755 tools/ppetracepp/trac_interface.h create mode 100755 tools/ppetracepp/tracehash.pl (limited to 'tools') diff --git a/tools/PowerPCtoPPE/Makefile b/tools/PowerPCtoPPE/Makefile new file mode 100644 index 00000000..d38e72d4 --- /dev/null +++ b/tools/PowerPCtoPPE/Makefile @@ -0,0 +1,23 @@ + +export SUB_OBJDIR = /p2p + +include img_defs.mk +include p2pfiles.mk + +OBJS := $(addprefix $(OBJDIR)/, $(P2P_OBJECTS)) + +libp2p.a: $(OBJS) + $(AR) crs $(OBJDIR)/libp2p.a $(OBJDIR)/*.o + +.PHONY: clean p2p +p2p: $(OBJS) + +$(OBJS) $(OBJS:.o=.d): | $(OBJDIR) + +$(OBJDIR): + mkdir -p $(OBJDIR) + +ifneq ($(MAKECMDGOALS),clean) +include $(OBJS:.o=.d) +endif + diff --git a/tools/PowerPCtoPPE/p2p-test-gen.py b/tools/PowerPCtoPPE/p2p-test-gen.py new file mode 100755 index 00000000..983cec03 --- /dev/null +++ b/tools/PowerPCtoPPE/p2p-test-gen.py @@ -0,0 +1,147 @@ +#!/usr/bin/python2.6 + +# \file p2p-test-gen.py +# \brief this program generates random constructed test cases +# in the form of input file consumed by ppc-ppe-pcp.py +# \usage create a file named 'test.s' and make sure it has at +# least one blank line before executing this program. + +import fileinput +import random + +DotLabel = ['', 'Label:', '.Label'] + +Comments = ['', '// Comments', '/* Comments */'] + +TabSpace = ['', '\t', ' ', '\t ', ' \t', ' \t '] + +RegLabel = ['', '%r'] + +Register = [0,1,2,3,4,5,6,7,8,9,10,13,28,29,30,31] + +TestEnable = [0,1,2,3] + +TestBook = {'eieio' : 0, + 'isync' : 0, + 'icbi' : 0, + 'icbt' : 0, + 'stbux' : 3, + 'sthux' : 3, + 'stwux' : 3, + 'lbzux' : 3, + 'lhzux' : 3, + 'lwzux' : 3, + 'lha' : 2, + 'lhau' : 2, + 'lhax' : 3, + 'lhaux' : 3, + 'mulhhw' : 3, + 'mulhhwu' : 3, + 'mulhw' : 3, + 'mulhwu' : 3, + 'mullw' : 3, + 'mulli' : 1, + 'divw' : 3, + 'divwu' : 3, + 'lmw' : 2, + 'stmw' : 2, + 'lwz' : 4, + 'stw' : 4, + 'cmplw' : 5, + 'cmpw' : 5, + 'cmpwi' : 5} + +BranchList = ['bc', 'bcl', 'blt', 'bltl', 'ble', 'blel', 'bgt', 'bgtl', 'bge', + 'bgel', 'beq', 'beql', 'bne', 'bnel'] + +def p2p_test(): + for line in fileinput.input('test.s', inplace=1): + print '// start generating test cases:', + for opcode,format in TestBook.iteritems(): + opcode += ' ' + if random.randint(1, 10) > 5: + print random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] +\ + random.sample(TabSpace,1)[0] + else: + print random.sample(TabSpace,1)[0] + random.sample(DotLabel,1)[0] +\ + random.sample(TabSpace,1)[0] + if format == 0 in TestEnable: + print random.sample(TabSpace,1)[0] + opcode +\ + random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] + if format == 3 in TestEnable: + regs = random.sample(Register, 3) + reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ + random.sample(TabSpace,1)[0] +\ + random.sample(RegLabel,1)[0] + str(regs[1]) + ',' +\ + random.sample(TabSpace,1)[0] +\ + random.sample(RegLabel,1)[0] + str(regs[2]) + print random.sample(TabSpace,1)[0] + opcode +\ + random.sample(TabSpace,1)[0] + reg_field +\ + random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] + if format == 1 in TestEnable: + regs = random.sample(Register, 2) + reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ + random.sample(TabSpace,1)[0] +\ + random.sample(RegLabel,1)[0] + str(regs[1]) + ',' +\ + random.sample(TabSpace,1)[0] +\ + str(random.randint(-128, 128)) + print random.sample(TabSpace,1)[0] + opcode +\ + random.sample(TabSpace,1)[0] + reg_field +\ + random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] + if format == 2 in TestEnable: + regs = random.sample(Register, 2) + reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ + random.sample(TabSpace,1)[0] +\ + str(random.randint(-128, 128)) +\ + '(' + random.sample(RegLabel,1)[0] + str(regs[1]) + ')' + print random.sample(TabSpace,1)[0] + opcode +\ + random.sample(TabSpace,1)[0] + reg_field +\ + random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] + if format == 4 in TestEnable: + for i in [1,2]: + regs = random.sample(Register, 2) + reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ + random.sample(TabSpace,1)[0] +\ + str(random.randint(-128, 128)) +\ + '(' + random.sample(RegLabel,1)[0] + str(regs[1]) + ')' + print random.sample(TabSpace,1)[0] + opcode +\ + random.sample(TabSpace,1)[0] + reg_field +\ + random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] + if format == 5 in TestEnable: + if 'i' in opcode: + regs = random.sample(Register, 1) + reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ + random.sample(TabSpace,1)[0] +\ + random.sample(RegLabel,1)[0] +\ + str(random.randint(-128, 128)) + else: + regs = random.sample(Register, 2) + reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\ + random.sample(TabSpace,1)[0] +\ + random.sample(RegLabel,1)[0] + str(regs[1]) + print random.sample(TabSpace,1)[0] + opcode +\ + random.sample(TabSpace,1)[0] + reg_field +\ + random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] + branch = random.sample(BranchList, 1)[0] + ' ' + if 'bc' in branch: + reg_field = random.sample(TabSpace,1)[0] +\ + str(random.randint(0, 15)) + ',' +\ + random.sample(TabSpace,1)[0] +\ + str(random.randint(0, 7)) + ',' +\ + random.sample(TabSpace,1)[0] +\ + str(random.randint(-128, 128)) +\ + random.sample(TabSpace,1)[0] + else: + reg_field = random.sample(TabSpace,1)[0] +\ + str(random.randint(-128, 128)) +\ + random.sample(TabSpace,1)[0] + print random.sample(TabSpace,1)[0] + branch +\ + random.sample(TabSpace,1)[0] + reg_field +\ + random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] + fileinput.close() + +if __name__ == '__main__': + p2p_test() + + + diff --git a/tools/PowerPCtoPPE/p2pfiles.mk b/tools/PowerPCtoPPE/p2pfiles.mk new file mode 100644 index 00000000..3d4fd3fb --- /dev/null +++ b/tools/PowerPCtoPPE/p2pfiles.mk @@ -0,0 +1,14 @@ +# @file p2pfiles.mk +# +# @brief mk for including P2P support library object files +# + +########################################################################## +# Object Files +########################################################################## +P2P-S-SOURCES = ppe42_mulhw.S ppe42_mulhwu.S ppe42_mullw.S \ + ppe42_divw.S ppe42_divwu.S + +P2P_OBJECTS = $(P2P-S-SOURCES:.S=.o) + + diff --git a/tools/PowerPCtoPPE/ppc-ppe-pcp.py b/tools/PowerPCtoPPE/ppc-ppe-pcp.py new file mode 100755 index 00000000..53c2757f --- /dev/null +++ b/tools/PowerPCtoPPE/ppc-ppe-pcp.py @@ -0,0 +1,980 @@ +#!/usr/bin/python2.6 + +# \file ppc-ppe-pcp.py +# \brief PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) +# +# --------------------------------------------------------------- +# Revision History +# --------------------------------------------------------------- +# 10-07-2014: project completed +# daviddu added optimization profile support +# +# 10-06-2014: added fused compare and branch supprot +# daviddu added support for combining two ld/st into one double word +# added support to insert branch upon .p2align directive +# +# 09-27-2014: added subroutine support for mul* and div* +# daviddu added virtual double word replacing multiple word support +# +# 09-13-2014: initial version +# daviddu only instruction inline replacement is supported +# --------------------------------------------------------------- + +P2P_VERSION = "10-07-2014" # version number as last modified date +P2P_PPC_EXT = '.s' # PPC Assembly filename extension +P2P_PPE_EXT = '.es' # PPE Assembly filename extension +P2P_PPE_PRE = '__ppe42_' # PPE Assembly subroutine prefix + +import sys +import os +import re +import fnmatch +import fileinput + +# --------------------------------------------------------------- +# PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P) +# --------------------------------------------------------------- +# +# Description: +# +# This post-compiler processor will take PPC405 assembly file(s) produced +# by powerpc-linux-gcc or hand coded and replace some of the instructions +# supported by PPC405 ISA but not PPE42 ISA with a set of instructions +# supported by PPE42 ISA. Outcome of this program is PPE42 assembly file(s). +# +# Assumptions: +# +# - Input/Output File Name Extension: +# +# PPC405 assembly file generated by powerpc-linux-gcc has filename extension +# defined by "P2P_PPC_EXT" global variable, while PPE42 assembly file +# consumed by PPE Assembler has filename extension defined by "P2P_PPE_EXT" +# global variable. Both should be consistant with Makefile rules. +# +# - Registers: +# +# Instructions in input file should only use registers supported by PPE, +# that is R0-R10,R13,R28-R31 for GPRs and PPE only SPRs(for example, PPE +# only has CR0 instead of CR0-7). +# +# GCC flag -ffixed can be used to enforce compiler to not use certain +# registers if compiler generates input files to this script. Note certian +# optimization level, such as -Os, of GGC will still use certain registers +# regardless if -ffixed flag is used. Furthermore, compiler should not +# generate multiple word instructions(lmw/stmw) that covers the registers +# forbidden to use by -ffixed flag. +# +# Example of using -ffixed flag in this case: +# -ffixed-r11 -ffixed-r12 -ffixed-r14 -ffixed-r15 \ +# -ffixed-r16 -ffixed-r17 -ffixed-r18 -ffixed-r19 \ +# -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \ +# -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \ +# -ffixed-cr1 -ffixed-cr2 -ffixed-cr3 -ffixed-cr4 \ +# -ffixed-cr5 -ffixed-cr6 -ffixed-cr7 +# +# - Instructions: +# +# Instructions in input file should only use PowerPC 405 instructions +# covered by "PowerPC 405-S Embedded Processor Core" manual; however, +# there is an assumption on certain catalog of instructions will never be +# generated by power-linux-gcc compiler(or disabled by compiler switch). +# +# Also, compiler should generate extended mnemonics instead of its base +# instruction when extended mnemonics fits. +# +# Via -falign-labels=n and -O2 or -O3, the compiler inserts .p2align +# directive to help instruction alignment for best cache performance. +# +# - Assembly Syntax: +# +# There should be only white spaces before instruction mnemonics, in +# another word, all inline comments should be put behind the instrution. +# +# "Label:" and an instruction should not be on the same line, hand coded +# assembly should be consistant to this same compiler output format. +# +# Depandences: +# +# In order to utilize assembly subroutines implemented for supporting +# missing instructions of multiplication and division in PPE42 ISA, a given +# library(with assembly files and header) must be compiled and linked with +# any source code that use this program to generate PPE binary. +# +# Usage: +# +# ./ -f --- process single file +# ./ -d --- process multiple files +# ./ -h --- detailed usage on other flags +# ./ -v --- version of the program +# ./ -d -s --- perform result profiling +# +# Functions: +# +# p2p_main - main function, parse options and arguments +# p2p_onefile - processing single PPC Assembly File +# p2p_combine - processing two PPC instructions in input file +# p2p_replace - processing single PPC instruction in input file +# +# Data Structures: +# +# ReplaceRules = { ppc_op : [rule, ppe_op] } +# CombineRules = { ppc_op : [rule, ppe_op] } +# FuseBranches = [ list of branches qualified for fusing with compares ] +# +#------------------------------------------------------------------------------ +# ReplaceRules: [ 'r', 'u', 'a', 'h', 's', 'o', 'd', 'm' ] +#-------|-------------------------|-------------------------------------------- +# Rule | Example (PPC to PPE) | Description +#-------|-------------------------|-------------------------------------------- +# 'r' | [ppc] ppc_op RT, RA, RB | simply 'replace' ppc opcode with ppe opcode +# 0 0 | [ppe] ppe_op RT, RA, RB | while the operands, if any, stay the same +#-------|-------------------------|-------------------------------------------- +# 'ru' | [ppc] ppc_op RT, RA, RB | on top of 'r' rule, emulate 'update' by +# | [ppe] ppe_op RT, RA, RB | appending "add" instruction after the +# +1 +1 | add RA, RA, RB | original instruction to update RA +#-------|-------------------------|-------------------------------------------- +# 'ra' | [ppc] ppc_op RT, D(RA) | on top of 'r' rule, emulate 'algebraic' by +# | [ppe] ppe_op RT, D(RA) | appending "extsh" instruction after the +# +1 +1 | extsh RT, RT | original instruction to sign-extend RT +#-------|-------------------------|-------------------------------------------- +# 'rau' | [ppc] ppc_op RT, RA, RB | combining rule 'r', 'a', and 'u' above. +# | [ppe] ppe_op RT, RA, RB | Note: there is no requirement on whether +# | add RA, RA, RB | rule 'a' or rule 'u' should be applied +# +2 +2 | extsh RT, RT | first, the outcome should be the same. +#-------|-------------------------|-------------------------------------------- +# 'h' | [ppc] ppc_op RT, RA, RB | rule of 'halfword' multiplication +# | [ppe] srwi RA, 16 | emulate multiply "high halfword" with +# | srwi RB, 16 | multiply "low halfword" by shifting +# +2 +2 | ppe_op RT, RA, RB | the operands first +#-------|-------------------------|-------------------------------------------- +# 's' | [ppc] ppc_op RT, RA, RB | emulate word multiply and divide +# | [ppe] stwu R1, -24(R1)| instructions with calling 'subroutines' +# | stvd D3, 8(R1) | implemented in ppe42_mul.S and ppe42_div.S +# | mflr R3 | +# | stw R3, 16(R1) | Calling Conventions:(SS = Stack Size) +# | mr R3, RA | +# | mr R4, RB | Caller is responsible for +# | bl target | 1) create stack frame +# | mr RT, R3 | 2) save off R3 and R4 to stack +# | lwz R3, 16(R1) | 3) save off link register to stack +# | mtlr R3 | 4) put operands into R3, R4 before branch +# | lvd D3, 8(R1) | 5) put result in R3 to RT after branch +# | lwz R1, 0(R1) | 6) restore link register from stack +# | | 7) restore R3 and R4 from stack +# | [sub] stwu R1, -SS(R1) | 8) remove the stack frame +# | | +# | (subroutine body) | Callee is responsible for +# | | 1) create and remove stack frame +# | lwz R1, 0(R1) | 2) save and restore volatile registers +# +X +Y | blr | 3) subroutine will not touch LR again +#-------|-------------------------|-------------------------------------------- +# 'o' | [ppc] ppc_op[o] RT ... | rule of 'o' form for overflow +# | [ppe] ppe_op RT ... | Note: "mullwo", "divwo" and "divwuo" each +# | | has unique setting for XER[OV,SO] if OE = 1 +#-------|-------------------------|-------------------------------------------- +# 'd' | [ppc] ppc_op[.] RT ... | rule of '.' or 'dot' form for recording +# | [ppe] ppe_op RT ... | using "cmpwli" to emulate the [.] form +# | cmpwli RT, 0 | to the instruction result and CR0 fields +#-------|-------------------------|-------------------------------------------- +# 'm' | [ppc] ppc_op RT, D(RA) | emulate PowerPC load/store multiple word +# | [ppe] ppe_op DT, D(RA) | instructions with PPE specific +# | (doubleword ld/st)| 'virtual doubleword' instructions if target +# | or | address is 8-byte aligned; otherwise, using +# | (singleword ld/st)| single word instructions instead or mix both +# | or | Note only RA == R1/R2/R13 will always meet +# -1 -1 | (single & double) | alignment requirement of virtual doubleword +#-------|-------------------------|-------------------------------------------- +# +ReplaceRules = {#ppc_op : [ rule | ppe_op ] + #---------------------------- + #synchronization instructions + 'eieio' : [ 'r', 'sync' ], + 'isync' : [ 'r', 'nop' ], + 'icbi' : [ 'r', 'nop' ], + 'icbt' : [ 'r', 'nop' ], + 'mtcr' : [ 'r', 'mtcr0'], + #load/store with [u/x/a] form + 'stbux' : [ 'ru', 'stbx' ], + 'sthux' : [ 'ru', 'sthx' ], + 'stwux' : [ 'ru', 'stwx' ], + 'lbzux' : [ 'ru', 'lbzx' ], + 'lhzux' : [ 'ru', 'lhzx' ], + 'lwzux' : [ 'ru', 'lwzx' ], + 'lha' : [ 'ra', 'lhz' ], + 'lhau' : [ 'ra', 'lhzu' ], + 'lhax' : [ 'ra', 'lhzx' ], + 'lhaux' : [ 'rau', 'lhzx' ], + #multiply/divide with [./o] form + 'mulhhw' : [ 'h', 'mullhw' ], + 'mulhhw.' : [ 'h', 'mullhw.' ], + 'mulhhwu' : [ 'h', 'mullhwu' ], + 'mulhhwu.': [ 'h', 'mullhwu.' ], + 'mulhw' : [ 's', 'mulhw' ], + 'mulhw.' : [ 'sd', 'mulhw' ], + 'mulhwu' : [ 's', 'mulhwu' ], + 'mulhwu.' : [ 'sd', 'mulhwu' ], + 'mullw' : [ 's', 'mullw' ], + 'mullw.' : [ 'sd', 'mullw' ], + 'mullwo' : [ 'so', 'mullw' ], + 'mullwo.' : [ 'sod', 'mullw' ], + 'mulli' : [ 's', 'mullw' ], + 'divw' : [ 's', 'divw' ], + 'divw.' : [ 'sd', 'divw' ], + 'divwo' : [ 'so', 'divw' ], + 'divwo.' : [ 'sod', 'divw' ], + 'divwu' : [ 's', 'divwu' ], + 'divwu.' : [ 'sd', 'divwu' ], + 'divwuo' : [ 'so', 'divwu' ], + 'divwuo.' : [ 'sod', 'divwu' ], + #load/store multiple word(Rx-R31) + 'lmw' : [ 'm', 'lvd,lwz' ], + 'stmw' : [ 'm', 'stvd,stw' ]} + + +#------------------------------------------------------------------------------ +# CombineRules: [ 'f', 'v', 'l' ] +#-------|-------------------------|-------------------------------------------- +# 'f' | [ppc] ppc_op(cmp*) | rule for 'fusing' adjacent pair of compare +# | ppc_op(b*) | and branch(PPE specific). Note: only +# -1 0 | [ppe] ppe_op(cmp*b*) | extended mnemonics of compares are handled +#-------|-------------------------|-------------------------------------------- +# 'v' | [ppc] ppc_op(lwz/stw) | rule for combining double word aligned +# | ppc_op(lwz/stw) | load/store pairs into signle 'virtual' +# -1 -1 | [ppe] ppe_op(lvd/stvd) | double word instructions(PPE specific) +#-------|-------------------------|-------------------------------------------- +# 'l' | [ppc] .p2align | compiler will insert ".p2align" directive to +# | Label: | help instructions align from label to label. +# | [ppe] b Label | then assembler will insert "nop" on .p2align +# | .p2align | directive. a "branch" to skip the nops will +# 0 -1 | Label: | improve the performance while still aligned +#-------|-------------------------|-------------------------------------------- +# +CombineRules = {#ppc_op : [ rule | ppe_cp ] + #-------------------------- + #8byte aligned loads/stores + 'lwz' : [ 'v', 'lvd' ], + 'stw' : [ 'v', 'stvd' ], + #compares fusable to branch + 'cmplw' : [ 'f', 'cmplw' ], + 'cmpw' : [ 'f', 'cmpw' ], + 'cmpwi' : [ 'f', 'cmpwi' ], + #'.p2align' before 'label:' + '.p2align' : [ 'l', 'b' ]} + + +#------------------------------------------------------------------------------ +# FuseBranches: [ Branches can be fused into cmp*b* ] +#------------------------------------------------------------------------------ +# +FuseBranches = ['bc', 'bcl', + 'blt', 'bltl', 'ble', 'blel', + 'bgt', 'bgtl', 'bge', 'bgel', + 'beq', 'beql', 'bne', 'bnel'] + + +# ----------------------------------------------------------------------------- +# p2p_replace: +# process each line(filtered) in the assembly file to replace PPC instruction +# to supported PPE instruction(s) +# +# Arguments: +# string: line - assembly file line to be replaced +# ppc_op - detected PPC opcode that needs to be replaced +# Return: +# boolean: True - Return without Error +# False - Error Detected +# Variables: +# string: inst, rule, ppe_op, newline, temp_op +# double_inst, single_inst, virtual_reg, base_offset, address_reg +# Subroutine: +# NONE +# ----------------------------------------------------------------------------- +def p2p_replace(line, ppc_op): + + # parse PPC instruction as in I or D form with opcode and upto 3 operands: + # possible forms: opcode + # opcode RT, RA, RB + # opcode RT, RA, IM + # opcode RT, D(RA) + # inst.group(0) : + # inst.group(1) : " " + # inst.group(2) : Opcode(.) + # inst.group(3) : " " + # inst.group(4) : GPR + # inst.group(5) : " , " + # inst.group(6) : GPR or Immediate(D) + # inst.group(7) : " , " or " ( " + # inst.group(8) : GPR or Immediate(IM) + # inst.group(9) : " ) " + inst = re.search(r"([\s]*)([a-zA-Z\.]+)([\s]*)([%r0-9]*)([\s,]*)([%r0-9\-]*)([\s,\(]*)([%r0-9\-]*)([\s\)]*)", line) + + # detect an error + if inst is None or ppc_op != inst.group(2): + return False + + # look up rule to process the instruction + rule, ppe_op = ReplaceRules[ppc_op] + + # if enabled, put a mark in the output file + if P2P_COMMENT: print "#P2P(%s):" % rule + line, + + # start cases of replacing PPC instruction with PPE instruction(s) + #---r------------------------------------------------------------------------ + if 'r' in rule: + + # replace opcode under rule 'r' and rewrite the instruction + newline = line.replace(ppc_op, ppe_op) + print newline, + + # do not continue if there is 'a' or 'u' rule to process on this line + if 'u' not in rule and 'a' not in rule: + return True + + #---u------------------------------------------------------------------------ + if 'u' in rule: + + # construct and write "add RA, RA, RB" under rule 'u' + newline = inst.group(1) + 'add' + inst.group(3) + inst.group(6) +\ + inst.group(5) + inst.group(6) + inst.group(7) + inst.group(8) + print newline + + # do not continue if there is 'a' rule to process on this line + if 'a' not in rule: + return True + + #---a------------------------------------------------------------------------ + if 'a' in rule: + + # construct and write "extsh RT, RT" under rule 'a' + newline = inst.group(1) + 'extsh' + inst.group(3) + inst.group(4) +\ + inst.group(5) + inst.group(4) + print newline + return True + + #---h------------------------------------------------------------------------ + if 'h' in rule: + + # construct and write "srwi RA, 16" under rule 'h' + newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(6) +\ + inst.group(5) + "16" + print newline + + # construct and write "srwi RB, 16" under rule 'h' + newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(8) +\ + inst.group(5) + "16" + print newline + + # replace opcode in original instruction and write under rule 'h' + newline = line.replace(ppc_op, ppe_op) + print newline + return True + + #---s------------------------------------------------------------------------ + if 's' in rule: + + # construct branch target label + ppe_op = P2P_PPE_PRE + ppe_op + + # construct and write "stwu R1, -24(R1)" to create the stack frame + newline = inst.group(1) + 'stwu' + inst.group(3) + '1' +\ + inst.group(5) + '-24(1)' + print newline + + # construct and write "stvd D3, 8(R1)" to save off R3 and R4 + newline = inst.group(1) + 'stvd' + inst.group(3) + '3' +\ + inst.group(5) + '8(1)' + print newline + + # construct and write "mflr R3" to fetch the current link address + newline = inst.group(1) + 'mflr' + inst.group(3) + '3' + print newline + + # construct and write "stw R3, 16(R1)" to save off current LR to stack + newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\ + inst.group(5) + '16(1)' + print newline + + # construct and write "mr R3, RA" to copy the operand RA to R3 + newline = inst.group(1) + 'mr' + inst.group(3) + '3' +\ + inst.group(5) + inst.group(6) + print newline + + # if 'mulli' is detected, using 'li' instead of 'mr' for second operand + if ppc_op == 'mulli': + temp_op = 'li' + else: + temp_op = 'mr' + + # construct and write "mr R4, RB" to copy the operand RB to R4 + # or in 'mulli' case, "li R4, IM" to copy the operand IM to R4 + newline = inst.group(1) + temp_op + inst.group(3) + '4' +\ + inst.group(5) + inst.group(8) + print newline + + # using branch and link(bl) to branch to subroutine + # later subroutine can branch back using branch link register(blr) + # Assumption: the subroutine will be responsible for saving + # and restoring all the volatilo registers used in the subroutine + newline = inst.group(1) + 'bl' + inst.group(3) + ppe_op + print newline + + # construct and write "mr RT, R3" to copy the result in R3 to RT + newline = inst.group(1) + 'mr' + inst.group(3) + inst.group(4) +\ + inst.group(5) + '3' + print newline + + # construct and write "lwz R3, 16(R1)" to fetch the LR value from stack + newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\ + inst.group(5) + '16(1)' + print newline + + # construct and write "mtlr R3" to restore the link register + newline = inst.group(1) + 'mtlr' + inst.group(3) + '3' + print newline + + # construct and write "lvd D3, 8(R1)" to restore R3 and R4 + newline = inst.group(1) + 'lvd' + inst.group(3) + '3' +\ + inst.group(5) + '8(1)' + print newline + + # construct and write "lwz R1, 0(R1)" to destroy the stack frame + newline = inst.group(1) + 'lwz' + inst.group(3) + '1' +\ + inst.group(5) + '0(1)' + print newline + return True + + #---m------------------------------------------------------------------------ + if 'm' in rule: + + # parse instruction information + # note register can be in either "N" form or "%rN" form + double_inst,single_inst = ppe_op.split(',') + virtual_reg = int(re.search(r'\d+', inst.group(4)).group()) + base_offset = int(inst.group(6)) + address_reg = int(re.search(r'\d+', inst.group(8)).group()) + + # consider illegal if multiple word instruction covers non-exist registers + if virtual_reg < 28: + return False + + # loop until and include GPR31 + while virtual_reg < 32: + # page 270 of 405 manual, only do this for load instructions + if virtual_reg == address_reg != 31 and 'l' in single_inst: + base_offset += 4 + virtual_reg += 1 + continue + + # if other GPRs being address_reg there is no guarantee for alignment + if address_reg not in [1,2,13]: + # construct and write "lwz/stw RT, D(RA)" for every registers + newline = inst.group(1) + single_inst + inst.group(3) +\ + str(virtual_reg) + inst.group(5) + str(base_offset) +\ + inst.group(7) + inst.group(8) + inst.group(9) + print newline + base_offset += 4 + virtual_reg += 1 + else: + # if base_offset is also aligned with base address in the address_reg + # & there are at least two more registers to perform doubleword ld/st + if not (base_offset % 8) and (virtual_reg + 1) < 32: + # construct and write "lvd/stvd DR, D(RA)" under rule 'v' + newline = inst.group(1) + double_inst + inst.group(3) +\ + str(virtual_reg) + inst.group(5) + str(base_offset) +\ + inst.group(7) + inst.group(8) + inst.group(9) + print newline + base_offset += 8 + virtual_reg += 2 + # either only one register left or base_offset isnt aligned + else: + # construct and write "lwz/stwz SR, D(RA)" under rule 'v' + newline = inst.group(1) + single_inst + inst.group(3) +\ + str(virtual_reg) + inst.group(5) + str(base_offset) +\ + inst.group(7) + inst.group(8) + inst.group(9) + print newline + base_offset += 4 + virtual_reg += 1 + # end of this if-else + # end of while loop + return True + # end of last if + + +# ----------------------------------------------------------------------------- +# p2p_combine: +# process each two lines(filtered) in the assembly file to combine two PPC +# instructions to one PPE specific instruction for better performance +# +# Arguments: +# string: first_line - 1st assembly file line to be combined +# second_line - 2nd assembly file line to be combined +# first_op - 1st detected PPC opcode that needs to be combined +# second_op - 2nd detected PPC opcode that needs to be combined +# Return: +# boolean: done - True: return without error +# - False: return with error detected +# match - True: eventually matched and combined +# - False: fail to qualify to be combined +# Variables: +# string: first_inst, second_inst, rule, ppe_op, newline +# bo, px_bix, compare_operands, target +# Subroutine: +# NONE +# ----------------------------------------------------------------------------- +def p2p_combine(first_line, second_line, first_op, second_op): + + global P2P_SPACE; global P2P_CYCLE + global P2P_COMPARE_BRANCH; global P2P_VIRTUAL_DOUBLE + + # parse PPC instruction as in I or B or D form with opcode and upto 3 operands + # possible form : [1st] opcode [CR,] RA, RB + # [1st] opcode [CR,] RA, IM + # [1st] opcode RT, D(RA) + # [2nd] opcode [CR,] Target + # [2nd] opcode BO, BI, Target + # [2nd] opcode RT, D(RA) + # inst.group(0) : + # inst.group(1) : " " + # inst.group(2) : Opcode(+/-/.) + # inst.group(3) : " " + # inst.group(4) : GPR or CR or BO or Target + # inst.group(5) : " , " + # inst.group(6) : GPR or IM or D(label+offset@sda21) or BI or Target + # inst.group(7) : " , " or " ( " + # inst.group(8) : GPR or IM or Target + # inst.group(9) : " ) " + first_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", first_line) + second_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", second_line) + + # detect an error + if first_inst is None or second_inst is None or \ + first_op != first_inst.group(2) or second_op not in second_inst.group(2): + return False,False + + # look up rule to process the instruction + rule, ppe_op = CombineRules[first_op] + + # start cases of combining two PPC instructions into PPE instruction + #---f------------------------------------------------------------------------ + if 'f' in rule: + + if not P2P_COMPARE_BRANCH: + return True,False + + # fusing compare and branch + ppe_op = ppe_op + second_op + + # for cmpwib* case, there is a difference between + # cmpwi SI operand as signed 16-bit integer and then got sign extended and + # cmpwib* UIX operand as unsigned 5-bit integer and then got zero extended + # thus, will not fuse the two if the integer operand is not in range(0,31) + # if cr field is omitted: + if ',' in first_inst.group(7): + # cr field must be cr0 or 0, error out if it is something else: + if '0' not in first_inst.group(4): + return False, True + if 'i' in first_op and (int(first_inst.group(8)) < 0 or \ + int(first_inst.group(8)) > 31): + return True,False + else: + compare_operands = first_inst.group(6) + first_inst.group(7) + \ + first_inst.group(8) + ', ' + else: + if 'i' in first_op and (int(first_inst.group(6)) < 0 or \ + int(first_inst.group(6)) > 31): + return True,False + else: + compare_operands = first_inst.group(4) + first_inst.group(5) + \ + first_inst.group(6) + ', ' + + # if 'bc' then extract BO,BI fields and convert to PX,BIX fields of 'cmp*b*' + # Note CTR decreament and branch always cases are not handled, and + # python bin() outputs "0bXXXXX" form so bo[2] is actually BO bit 0 + # else there is no need for PX,BIX fields for extended mnemonics + if 'bc' in second_op: + bo = bin(int(second_inst.group(4))) + + # do not handle CRT decreament or branch always cases + if bo[4] == 0 or bo[2] == 1: + return True,False + + # PX = BO bit 1, BIX = BI = [0,1,2,3] due to only CR0 is used + px_bix = bo[3] + second_inst.group(5) + \ + second_inst.group(6) + second_inst.group(7) + target = second_inst.group(8) + else: + px_bix = "" + # if cr field is omitted: + if ',' in second_inst.group(5): + # cr field must be cr0 or 0, error out if it is something else: + if '0' not in second_inst.group(4): + return False, True + target = second_inst.group(6) + else: + target = second_inst.group(4) + + # profile: space--, cycle is the same because 1+2==3 + P2P_SPACE -= 1 + + # if enabled, put a mark in the output file + if P2P_COMMENT: + print "#P2P(%s):" % rule + first_line, + print "#P2P(%s):" % rule + second_line, + + # construct and write "cmp*b* [PX,BIX,] RA,RB/IM,target" under 'f' rule + newline = first_inst.group(1) + ppe_op + first_inst.group(3) +\ + px_bix + compare_operands + target + print newline + return True,True + + + #---v------------------------------------------------------------------------ + if 'v' in rule: + + if not P2P_VIRTUAL_DOUBLE: + return True,False + + global P2P_VDW_SDA + + # Combinable Conditions: + # 1) base address registers must be the same and one of R1/R2/R13 + # 2) address offsets have to be 8-bytes continuous and aligned + # 3) target or source registers must qualify to be double word register + # Note: label+offset@sda21 format is coverted to target r13 after link + # assume data go in and out r13 or SDA space is always 8-byte aligned + # here we only check the continous of address offset and register pair + if first_inst.group(8) == second_inst.group(8) in ['1','2','13'] or \ + ("@sda21" in first_inst.group(6) and \ + "@sda21" in second_inst.group(6) and \ + P2P_VDW_SDA): + + if ((first_inst.group(6).replace("@sda21","") + "+4" == \ + second_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ + ((first_inst.group(6).isdigit() and \ + not int(first_inst.group(6)) % 8) and \ + int(first_inst.group(6)) + 4 == int(second_inst.group(6)))) and \ + (int(first_inst.group(4)) + 1 == int(second_inst.group(4)) or \ + (int(first_inst.group(4)) == 31 and \ + int(second_inst.group(4)) == 0)): + newline = first_line.replace(first_op, ppe_op) + elif ((second_inst.group(6).replace("@sda21","") + "+4" == \ + first_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \ + ((second_inst.group(6).isdigit() and \ + not int(second_inst.group(6)) % 8) and \ + int(second_inst.group(6)) + 4 == int(first_inst.group(6)))) and \ + (int(second_inst.group(4)) + 1 == int(first_inst.group(4)) or \ + (int(second_inst.group(4)) == 31 and \ + int(first_inst.group(4)) == 0)): + newline = second_line.replace(second_op, ppe_op) + else: + return True,False + + # profile: space--, cycle--(same delay but 1 less from issue) + P2P_SPACE -= 1; P2P_CYCLE -= 1 + + # if enabled, put a mark in the output file + if P2P_COMMENT: + print "#P2P(%s):" % rule + first_line, + print "#P2P(%s):" % rule + second_line, + + print newline, + return True,True + else: + return True,False + + +# ----------------------------------------------------------------------------- +# p2p_onefile: +# process single PPC assembly file to convert it into PPE assembly file +# also filter out non-instruction lines before calling the subroutine +# +# Arguments: +# string: ppcFileName +# Return: +# boolean: done - True if file processing completed without error +# - False if file processing failed due to an error +# Variables: +# boolean: match, done +# string: ppeFileName, line, ppc_op, pre_op, pre_line, section, label +# integer: line_num, first_label_ln, second_label_ln, misalign +# Subroutine: +# p2p_combine +# p2p_replace +# ----------------------------------------------------------------------------- +def p2p_onefile(ppcFileName): + + global P2P_SPACE; P2P_SPACE = 0 # profile count + global P2P_CYCLE; P2P_CYCLE = 0 # profile count + + if P2P_VERBOSE : print "Translate PPC assembly: " + ppcFileName + + # new PPE assembly file is renamed as .s + ppeFileName = ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT) + os.rename(ppcFileName, ppeFileName) + + # initialize storage variables for previous line that needs to be remembered + pre_line = "" + pre_op = "" + + # use inline file editing, back up original PPC assembly file as .S + for line in fileinput.input(ppeFileName, inplace=1, backup='.405'): + + # in case of "mtmsr 0; isync" + line = line.replace('isync','nop') + + # skip blank line + if not line.strip(): + if pre_line: + print pre_line, + pre_line = "" + print line, + continue + + # skip comments line + if re.search("^[\s]*(//|#)", line): + if pre_line: + print pre_line, + pre_line = "" + print line, + continue + + # skip .section code except .p2align and label: + section = re.search("^[\s]*(\.[0-9a-zA-Z_]+)", line) + if section is not None and ':' not in line and \ + section.group(1) != '.p2align': + if pre_line: + print pre_line, + pre_line = "" + print line, + continue + + # apply specical 'l' rule in CombineRules for '.p2align' before a 'label:' + label = re.search("^[\s]*[\.0-9a-zA-Z_]+[:]+", line) + if label is not None: + if pre_line and pre_op == '.p2align': + second_label_ln = fileinput.lineno() + misalign = 8 - (second_label_ln - first_label_ln - 2) % 8 + if misalign in [3,4,5,6,7]: + # profile: same space, but save cycles, branch penalty is 2 + P2P_CYCLE -= misalign - 2 + if P2P_COMMENT: print "#P2P(l):" + print '\tb ' + label.group(0).split(':')[0] + print pre_line, + pre_line = "" + first_label_ln = fileinput.lineno() + if pre_line: + print pre_line, + pre_line = "" + print line, + continue + + # extract opcode field from line + ppc_op = line.split()[0] + done,match = False,False + + # detect the 2nd possible combinable instruction + if pre_line and P2P_COMBINE: + # ignore +/- signs for branch prediction + if '+' in ppc_op or '-' in ppc_op: + ppc_op = ppc_op[:-1] + if 'cmp' in pre_op and ppc_op in FuseBranches or \ + 'cmp' not in pre_op and ppc_op == pre_op: + done,match = p2p_combine(pre_line, line, pre_op, ppc_op) + if not match: + print pre_line, + else: + print pre_line, + done,match = True,False + pre_line = "" + + # detect the 1st possible combinable instruction + if not pre_line and not match and P2P_COMBINE: + if ppc_op in CombineRules.keys(): + pre_op = ppc_op + pre_line = line + done,match = True,True + else: + done,match = True,False + + # defect possible replacable instruction + if not match: + if ppc_op in ReplaceRules.keys() and P2P_REPLACE: + done = p2p_replace(line, ppc_op) + else: + print line, + done = True + + # if instruction process is not done due to error + if not done: + line_num = fileinput.lineno() + break + + # close the output file and restore the original input file + fileinput.close() + os.rename(ppeFileName+'.405', ppcFileName) + + # in case last line of the file qualified to be a pre_line and was not printed + if pre_line: + f = open(ppeFileName, 'a') + f.write(pre_line) + f.close() + + # print error debug message + if not done: + print "Error: target instruction detected at line [%d]:" % line_num + print " " + line + print " but fail to recognize instruction format." + # terminate Makefile or execution if an error is detected + sys.exit(1) + + if P2P_COMMENT: + f = open(ppeFileName, 'a') + f.write("#P2P: space(%d) cycle(%d)" % (P2P_SPACE,P2P_CYCLE)) + f.close() + + if P2P_VERBOSE : + print "Generated PPE assembly: " + ppeFileName + print "Optimization Profiling: " + str(P2P_SPACE*4) + " bytes, " +\ + str(P2P_CYCLE) + " cycles." + + +# ----------------------------------------------------------------------------- +# p2p_profile +# profiling how much performance and code size are saved by optimization +# +# Arguments: +# string: ppcFileName +# Return: +# list: [space, cycle] +# Variables: +# string: line, profile +# Subroutine: +# None +# ----------------------------------------------------------------------------- +def p2p_profile(ppcFileName): + f = open(ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT), 'r') + for line in f: + pass + f.close() + profile = re.search(r"^\#P2P: space\(([0-9\-]+)\) cycle\(([0-9\-]+)\)", line) + if profile is not None: + return [int(profile.group(1)), int(profile.group(2))] + else: + return [0,0] + +# ----------------------------------------------------------------------------- +# p2p_main: +# main of this script +# print usage info +# parse options and arguments +# process one file or a directory of files +# ----------------------------------------------------------------------------- +def p2p_main(): + + # command-line option parsing + from optparse import OptionParser + usage = "usage: %prog [options]" + version= "%prog v." + P2P_VERSION + parser = OptionParser(usage=usage, version=version) + parser.add_option("-d", "--directory", metavar="PATH", dest="ppcPath", + help="process all files in a directory given by PATH") + parser.add_option("-f", "--filename", metavar="FILE", dest="ppcFile", + help="process single file(with path in the filename)") + parser.add_option("-p", "--parallel", + action="store_true", dest="parallel", default=False, + help="processing all files in parallel processes") + parser.add_option("-s", "--statistics", + action="store_true", dest="profile", default=False, + help="optimization profiling, require comment in outputs") + parser.add_option("-c", "--combine-only", + action="store_false", dest="replace", default=True, + help="enable only combine function by disabling replace") + parser.add_option("-r", "--replace-only", + action="store_false", dest="combine", default=True, + help="enable only replace function by disabling combine") + parser.add_option("-b", "--compare branch disable", + action="store_false", dest="compare_branch", default=True, + help="only disabling fused compare branch function") + parser.add_option("-v", "--virtual double disable", + action="store_false", dest="virtual_double", default=True, + help="only disabling fused virtual double function") + parser.add_option("-e", "--eabi", + action="store_true", dest="vdw_sda", default=False, + help="enable virtual double word fusion targeting sda") + parser.add_option("-n", "--no-comment", + action="store_false", dest="comment", default=True, + help="don't leave comment mark in output file") + parser.add_option("-q", "--quiet", + action="store_false", dest="verbose", default=True, + help="don't print status messages to stdout") + (options, args) = parser.parse_args() + # global program output verbose switch + global P2P_VERBOSE; P2P_VERBOSE = options.verbose + # leave a comment mark in output files + global P2P_COMMENT; P2P_COMMENT = options.comment + # enable instruction replace functions + global P2P_REPLACE; P2P_REPLACE = options.replace + # enable instruction combine functions + global P2P_COMBINE; P2P_COMBINE = options.combine + # enable virtual double word fusion targeting sda + global P2P_VDW_SDA; P2P_VDW_SDA = options.vdw_sda + # enable only fused compare and branch function + global P2P_COMPARE_BRANCH; P2P_COMPARE_BRANCH = options.compare_branch + # enable only combined virtual double function + global P2P_VIRTUAL_DOUBLE; P2P_VIRTUAL_DOUBLE = options.virtual_double + + if P2P_VERBOSE : + print "PPC405 Assembly to PPE42 Assembly Post-Compiler Proceesor (P2P)" + print "Version: " + P2P_VERSION + + # single file processing + if options.ppcFile: + + if P2P_VERBOSE : + print "Processing signle file: " + options.ppcFile + + p2p_onefile(options.ppcFile) + + # multiple files processing + if options.ppcPath: + + if P2P_VERBOSE : + print "Accessing all files at: " + options.ppcPath + print "*Parallel Process Mode: " + ("Off", "On")[options.parallel] + + if options.profile: + bytes = 0; cycles = 0 + + fileList = [] + for root, subdirs, files in os.walk(options.ppcPath): + for file in fnmatch.filter(files, '*'+P2P_PPC_EXT): + if options.parallel : + fileList.append(os.path.join(root, file)) + else: + if options.profile: + space,cycle = p2p_profile(os.path.join(root, file)) + bytes += space*4 + cycles += cycle + else: + p2p_onefile(os.path.join(root, file)) + + if options.profile: + print "Optimization Profiling: " + str(bytes) + " bytes, " +\ + str(cycles) + " cycles." + + # parallel processing mode + if options.parallel: + from multiprocessing import Pool + p = Pool() + p.map(p2p_onefile, fileList) + p.close() + p.join() + + if P2P_VERBOSE : print "Done" + + +# ----------------------------------------------------------------------------- +# python main +if __name__ == '__main__': + p2p_main() + diff --git a/tools/PowerPCtoPPE/ppe42_divw.S b/tools/PowerPCtoPPE/ppe42_divw.S new file mode 100644 index 00000000..563a8d48 --- /dev/null +++ b/tools/PowerPCtoPPE/ppe42_divw.S @@ -0,0 +1,208 @@ +/// \file ppe42_divw.S +/// \brief PPC405 word division instructions implemented by PPE ISA +/// +/// This file includes implementation for the following PPC405 instructions +/// divw RT, RA, RB +/// +/// Note: PPE ISA specific "fused compare and branch" instructions are used +/// +/// Revision History: +/// 09-22-2014: Initial Version by daviddu +/// + + .file "ppe42_divw.S" + .section ".text" + + /* + ** Code comment notation: + ** + ** msw = most-significant (high-order) word, i.e. bits 0..31 + ** lsw = least-significant (low-order) word, i.e. bits 32..63 + ** msh = most-significant (high-order) halfword, i.e. bits 0..15 + ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 + ** + ** LZ = Leading Zeroes + ** SD = Significant Digits + ** OW = Register is overwritten, previous value is lost, + ** correct if previous value is no longer needed. + ** FU = Register is not overwritten, but its value is no longer needed, + ** in another word, the register is "free for use". + ** + ** PPE GPR Registers are: R0-R10, R13, R28-R31 + ** Volatile Registers are: R0, R3-R10 + ** Non-volatile registers are R28-R31 + */ + + /* + ** Caling Convention + ** + ** R2 and R13 are never saved or restored. In ABI or EABI application + ** these registers are constant. The other touched volatile registers + ** will be saved and restored by the subroutines. Note the caller + ** wont be saving those registers because these subroutines will be + ** instrumented into caller's body without compiler knowledge. + ** + ** Note R3 is not saved and restored because it will be changed for + ** return value anyways, the p2p script will make sure to restore it. + ** Also CR is hanlded because of compare and branch, but XER/CTR/LR + ** are not hanlded because they are untouched by the instructions used. + ** + ** Stack layout: + ** + ** 0x00 -- R1, Dedicated for Stack Pointer + ** 0x04 -- slot reserved for LR + ** 0x08 -- R4, Volatile, Private + ** 0x0c -- R5, Volatile, Private + ** 0x10 -- R6, Volatile, Private + ** 0x14 -- R7, Volatile, Private + ** 0x18 -- R8, Volatile, Private + ** 0x1c -- R9, Volatile, Private + ** 0x20 -- CR, Condition Register + ** 0x24 -- + ** + ** 0x28 -- Stack Size, Must be 8-byte aligned + */ + + /* + ** Division Procedures: + ** + ** __ppe42_divwu(dividend, divisor) + ** __ppe42_divw(dividend, divisor) + ** + ** R3 = Input parameter, dividend. then Return value, quotient. + ** R4 = Input parameter, divisor. + ** R5 = Output parameter, quotient. + ** R6 = Output parameter, remainder. + ** R7 = Temporary register, counter. + ** + ** General Algorithm + ** + ** Using standard shift and subtract method to emulate + ** Note: dividend,divisor,quotient,remainder are all 32-bit integers + ** + ** Precondition Check: + ** + ** if (divisor == dividend) { + ** quotient = 1; + ** remainder = 0; + ** } + ** + ** if (divisor == 0) { + ** quotient = 0; + ** remainder = 0; + ** } + ** + ** if (divisor > dividend) { + ** quotient = 0; + ** remainder = dividend; + ** } + */ + +/*****************************************************************************/ + + /* + ** Divide Word Signed (__ppe42_divw) + ** + ** Using Divide Word Unsigned(divwu) to emulate + ** + ** dd = absolute(dividend); + ** dr = absolute(divisor); + ** [q,r] = __ppe42_divwu(dd, dr); + ** + ** quotient = q; + ** if (dividend < 0) { + ** remainder = -r; + ** if (divisor > 0) + ** quotient = -q; + ** } + ** else { + ** remainder = r; + ** if (divisor < 0) + ** quotient = -q; + ** } + */ + + .align 2 + .global __ppe42_divw + .type __ppe42_divw, @function + +__ppe42_divw: + + stwu %r1, -0x28(%r1) // allocate stack frame + + stvd %d4, 0x08(%r1) // save off r4 & r5 in stack + stvd %d6, 0x10(%r1) // save off r6 & r7 in stack + stvd %d8, 0x18(%r1) // save off r8 & r9 in stack + + mfcr %r5 // save off cr + stw %r5, 0x20(%r1) // store cr in stack + + li %r5, 1 // quotient = 1 + li %r6, 0 // remainder = 0 + cmplwbc 1, 2, %r3, %r4, __ppe42_divw_ret // ret(divisor == dividend) + + li %r5, 0 // quotient = 0 + li %r6, 0 // remainder = 0 + cmpwibc 1, 2, %r4, 0, __ppe42_divw_ret // ret(divisor == 0) + + cmpwibc 1, 1, %r3, 0, __ppe42_divw_csc // dividend(+) -> csc + neg %r3, %r3 // absolute(dividend) + li %r5, 1 // note dividend < 0 + +__ppe42_divw_csc: // <> + + cmpwibc 1, 1, %r4, 0, __ppe42_divw_uns // divisor(+) -> uns + neg %r4, %r4 // absolute(divisor) + li %r6, 1 // note divisor < 0 + +__ppe42_divw_uns: // <> + + mr %r8, %r5 // remember if dividend > 0 + xor %r9, %r5, %r6 // remember sign difference + + li %r5, 0 // quotient = 0 + mr %r6, %r3 // remainder = dividend + cmplwbc 1, 0, %r3, %r4, __ppe42_divw_sign // ret(divisor > dividend) + + li %r7, 32 // num_of_bits = 32 + +__ppe42_divw_sas: // <> + + slwi %r6, %r6, 1 // remainder <<= 1 + inslwi %r6, %r3, 1, 31 // remainder[31] = dividend[0] + slwi %r3, %r3, 1 // dividend <<= 1 + slwi %r5, %r5, 1 // quotient <<= 1 + subi %r7, %r7, 1 // num_of_bits-- + cmplwbc 1, 0, %r6, %r4, __ppe42_divw_sas // continue(remainder> + + cmpwibc 1, 2, %r9, 0, __ppe42_divw_csh // if same sign, r5 stays + + neg %r5, %r5 // otherwise, neg(r5) + +__ppe42_divw_csh: // <> + + cmpwibc 1, 2, %r8, 0, __ppe42_divw_ret // if dividend>0, r6 stays + + neg %r6, %r6 // otherwise, neg(r6) + +__ppe42_divw_ret: // <> + + mr %r3, %r5 // r3 is the default return + + lwz %r5, 0x20(%r1) // load cr from stack + mtcr0 %r5 // restore cr + + lvd %d4, 0x08(%r1) // restore r4 & r5 from stack + lvd %d6, 0x10(%r1) // restore r6 & r7 from stack + lvd %d8, 0x18(%r1) // restore r8 & r9 from stack + + lwz %r1, 0(%r1) // restore stack pointer + + blr // branch back + + .size __ppe42_divw, .-__ppe42_divw + diff --git a/tools/PowerPCtoPPE/ppe42_divwu.S b/tools/PowerPCtoPPE/ppe42_divwu.S new file mode 100644 index 00000000..a208df65 --- /dev/null +++ b/tools/PowerPCtoPPE/ppe42_divwu.S @@ -0,0 +1,184 @@ +/// \file ppe42_divwu.S +/// \brief PPC405 word division instructions implemented by PPE ISA +/// +/// This file includes implementation for the following PPC405 instructions +/// divwu RT, RA, RB +/// +/// Note: PPE ISA specific "fused compare and branch" instructions are used +/// +/// Revision History: +/// 09-22-2014: Initial Version by daviddu +/// + + .file "ppe42_divwu.S" + .section ".text" + + /* + ** Code comment notation: + ** + ** msw = most-significant (high-order) word, i.e. bits 0..31 + ** lsw = least-significant (low-order) word, i.e. bits 32..63 + ** msh = most-significant (high-order) halfword, i.e. bits 0..15 + ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 + ** + ** LZ = Leading Zeroes + ** SD = Significant Digits + ** OW = Register is overwritten, previous value is lost, + ** correct if previous value is no longer needed. + ** FU = Register is not overwritten, but its value is no longer needed, + ** in another word, the register is "free for use". + ** + ** PPE GPR Registers are: R0-R10, R13, R28-R31 + ** Volatile Registers are: R0, R3-R10 + ** Non-volatile registers are R28-R31 + */ + + /* + ** Caling Convention + ** + ** R2 and R13 are never saved or restored. In ABI or EABI application + ** these registers are constant. The other touched volatile registers + ** will be saved and restored by the subroutines. Note the caller + ** wont be saving those registers because these subroutines will be + ** instrumented into caller's body without compiler knowledge. + ** + ** Note R3 is not saved and restored because it will be changed for + ** return value anyways, the p2p script will make sure to restore it. + ** Also CR is hanlded because of compare and branch, but XER/CTR/LR + ** are not hanlded because they are untouched by the instructions used. + ** + ** Stack layout: + ** + ** 0x00 -- R1, Dedicated for Stack Pointer + ** 0x04 -- slot reserved for LR + ** 0x08 -- R4, Volatile, Private + ** 0x0c -- R5, Volatile, Private + ** 0x10 -- R6, Volatile, Private + ** 0x14 -- R7, Volatile, Private + ** 0x18 -- R8, Volatile, Private + ** 0x1c -- R9, Volatile, Private + ** 0x20 -- CR, Condition Register + ** 0x24 -- + ** + ** 0x28 -- Stack Size, Must be 8-byte aligned + */ + + /* + ** Division Procedures: + ** + ** __ppe42_divwu(dividend, divisor) + ** __ppe42_divw(dividend, divisor) + ** + ** R3 = Input parameter, dividend. then Return value, quotient. + ** R4 = Input parameter, divisor. + ** R5 = Output parameter, quotient. + ** R6 = Output parameter, remainder. + ** R7 = Temporary register, counter. + ** + ** General Algorithm + ** + ** Using standard shift and subtract method to emulate + ** Note: dividend,divisor,quotient,remainder are all 32-bit integers + ** + ** Precondition Check: + ** + ** if (divisor == dividend) { + ** quotient = 1; + ** remainder = 0; + ** } + ** + ** if (divisor == 0) { + ** quotient = 0; + ** remainder = 0; + ** } + ** + ** if (divisor > dividend) { + ** quotient = 0; + ** remainder = dividend; + ** } + */ + +/*****************************************************************************/ + + /* + ** Divide Word Unsigned (__ppe42_divwu) + ** + ** The implementation uses standard shift and subtract approach. + ** The following is an example in C. Note the implementation doesnt + ** exactly follow the C example. + ** + ** num_of_bits = 32; + ** while(num_bits) { + ** dbit = (dividend & 0x80000000) >> 31; + ** remainder = (remainder << 1) | dbit; + ** dividend = dividend << 1; + ** quotient = quotient << 1; + ** num_of_bits--; + ** if(remainder < divisor) + ** continue; + ** temp = remainder - divisor; + ** qbit = !((temp & 0x80000000) >> 31); + ** quotient = quotient | qbit; + ** remainder = temp; + ** } + */ + + .align 2 + .global __ppe42_divwu + .type __ppe42_divwu, @function + +__ppe42_divwu: + + stwu %r1, -0x28(%r1) // allocate stack frame + + stvd %d4, 0x08(%r1) // save off r4 & r5 in stack + stvd %d6, 0x10(%r1) // save off r6 & r7 in stack + stvd %d8, 0x18(%r1) // save off r8 & r9 in stack + + mfcr %r5 // save off cr + stw %r5, 0x20(%r1) // store cr in stack + + li %r5, 1 // quotient = 1 + li %r6, 0 // remainder = 0 + cmplwbc 1, 2, %r3, %r4, __ppe42_divwu_ret // ret(divisor == dividend) + + li %r5, 0 // quotient = 0 + li %r6, 0 // remainder = 0 + cmpwibc 1, 2, %r4, 0, __ppe42_divwu_ret // ret(divisor == 0) + + li %r5, 0 // quotient = 0 + mr %r6, %r3 // remainder = dividend + cmplwbc 1, 0, %r3, %r4, __ppe42_divwu_ret // ret(divisor > dividend) + + li %r7, 32 // num_of_bits = 32 + +__ppe42_divwu_sas: // <> + + slwi %r6, %r6, 1 // remainder <<= 1 + inslwi %r6, %r3, 1, 31 // remainder[31] = dividend[0] + slwi %r3, %r3, 1 // dividend <<= 1 + slwi %r5, %r5, 1 // quotient <<= 1 + subi %r7, %r7, 1 // num_of_bits-- + cmplwbc 1, 0, %r6, %r4, __ppe42_divwu_sas // continue(remainder> + + mr %r3, %r5 // r3 is the default return + lwz %r5, 0x20(%r1) // load cr from stack + mtcr0 %r5 // restore cr + + lvd %d4, 0x08(%r1) // restore r4 & r5 from stack + lvd %d6, 0x10(%r1) // restore r6 & r7 from stack + lvd %d8, 0x18(%r1) // restore r8 & r9 from stack + + lwz %r1, 0(%r1) // restore stack pointer + + blr // branch back + + .size __ppe42_divwu, .-__ppe42_divwu + + diff --git a/tools/PowerPCtoPPE/ppe42_mulhw.S b/tools/PowerPCtoPPE/ppe42_mulhw.S new file mode 100644 index 00000000..d229121b --- /dev/null +++ b/tools/PowerPCtoPPE/ppe42_mulhw.S @@ -0,0 +1,193 @@ +/// \file ppe42_mulhw.S +/// \brief PPC405 word multiplication instructions implemented by PPE ISA +/// +/// This file includes implementation for the following PPC405 instructions +/// mulhw +/// +/// Note: PPE ISA specific "fused compare and branch" instructions are used +/// +/// Revision History: +/// 09-15-2014: Initial Version by daviddu +/// + + .file "ppe42_mulhw.S" + .section ".text" + + /* + ** Code comment notation: + ** + ** msw = most-significant (high-order) word, i.e. bits 0..31 + ** lsw = least-significant (low-order) word, i.e. bits 32..63 + ** msh = most-significant (high-order) halfword, i.e. bits 0..15 + ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 + ** LZ = Leading Zeroes + ** SD = Significant Digits + ** OW = Register is overwritten, previous value is lost, + ** correct if previous value is no longer needed. + ** FU = Register is not overwritten, but its value is no longer needed, + ** in another word, the register is "free for use". + ** + ** PPE GPR Registers are: R0-R10, R13, R28-R31 + ** Volatile Registers are: R0, R3-R10 + ** Non-volatile registers are R28-R31 + */ + + /* + ** Caling Convention + ** + ** R2 and R13 are never saved or restored. In ABI or EABI application + ** these registers are constant. The other touched volatile registers + ** will be saved and restored by the subroutines. Note the caller + ** wont be saving those registers because these subroutines will be + ** instrumented into caller's body without compiler knowledge. + ** + ** Note R3 is not saved and restored because it will be changed for + ** return value anyways, the p2p script will make sure to restore it. + ** Also CR is hanlded because of compare and branch, but XER/CTR/LR + ** are not hanlded because they are untouched by the instructions used. + ** + ** Stack layout: + ** + ** 0x00 -- R1, Dedicated for Stack Pointer + ** 0x04 -- slot reserved for LR + ** 0x08 -- R4, Volatile, Private + ** 0x0c -- R5, Volatile, Private + ** 0x10 -- R6, Volatile, Private + ** 0x14 -- R7, Volatile, Private + ** 0x18 -- R8, Volatile, Private + ** 0x1c -- R9, Volatile, Private + ** 0x20 -- CR, Condition Register + ** 0x24 -- + ** + ** 0x28 -- Stack Size, Must be 8-byte aligned + */ + + /* + ** Multiplication Procedures: + ** + ** __ppe42_mulhwu(U,V) + ** __ppe42_mulhw(U,V) + ** __ppe42_mullw(U,V) + ** + ** R3:R4 = Input parameter, multipliers: U, V. + ** R3 = Output parameter, either product.msh or product.lsh. + ** R5-R9 = Temporary registers + ** + ** General Algorithm + ** + ** Using PPC405 ISA instruction 'mullhw' to emulate + ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh) + ** + ** U.msh U.lsh + ** X V.msh V.lsh + ** ------------------------ + ** A.msh A.lsh + ** B.msh B.lsh + ** C.msh C.lsh + ** D.msh D.lsh + ** ------------------------ + ** Product.msw Product.lsw + ** + ** __ppe42_mulhwu: Return Product.msh (unsigned) + ** __ppe42_mulhw: Return Product.msh (signed) + ** __ppe42_mullw: Return Product.lsh + ** + ** Precondition Check: + ** + ** if( U == 0 || V == 0 ) return P=0; + */ + +/*****************************************************************************/ + + /* + ** Multiply High Word Signed (__ppe42_mulhw) + ** + ** Using Multiply High Word Unsigned(mulhwu) to emulate + ** + ** u = absolute(U); + ** v = absolute(V); + ** p = __ppe42_mulhwu(u, v); + ** if( U[0] xor V[0] ) + ** p = -p + */ + + .align 2 + .global __ppe42_mulhw + .type __ppe42_mulhw, @function + +__ppe42_mulhw: + + stwu %r1, -0x28(%r1) // allocate stack frame + + stvd %d4, 0x08(%r1) // save off r4 & r5 in stack + stvd %d6, 0x10(%r1) // save off r6 & r7 in stack + stvd %d8, 0x18(%r1) // save off r8 & r9 in stack + + mfcr %r5 // save off cr + stw %r5, 0x20(%r1) // store cr in stack + + li %r5, 0 // r5 = 0 + cmpwibc 1, 2, %r3, 0, __ppe42_mulhw_ret // U=0 -> ret + cmpwibc 1, 2, %r4, 0, __ppe42_mulhw_ret // V=0 -> ret + + cmpwibc 1, 1, %r3, 0, __ppe42_mulhw_csc // U>0 -> csc + neg %r3, %r3 // absolute(U) + li %r5, 1 // U<0 -> r5 = 1 + +__ppe42_mulhw_csc: // <> + + li %r6, 0 // V>0 -> r6 = 0 + cmpwibc 1, 1, %r4, 0, __ppe42_mulhw_uns // V>0 -> uns + neg %r4, %r4 // absolute(V) + li %r6, 1 // V<0 -> r6 = 1 + +__ppe42_mulhw_uns: // <> + + xor %r9, %r5, %r6 // remember sign difference + + extrwi %r5, %r3, 16, 16 + srwi %r3, %r3, 16 + extrwi %r6, %r4, 16, 16 + srwi %r4, %r4, 16 + + mullhwu %r7, %r5, %r6 + srwi %r7, %r7, 16 + + mullhwu %r6, %r3, %r6 + extrwi %r8, %r6, 16, 16 + srwi %r6, %r6, 16 + add %r7, %r8, %r7 + + mullhwu %r5, %r5, %r4 + extrwi %r8, %r5, 16, 16 + srwi %r5, %r5, 16 + add %r7, %r8, %r7 + + srwi %r7, %r7, 16 + add %r7, %r7, %r6 + add %r7, %r7, %r5 + + mullhwu %r3, %r3, %r4 + add %r5, %r3, %r7 + + cmpwibc 1, 2, %r9, 0, __ppe42_mulhw_ret // if same sign, r5 stays + + neg %r5, %r5 // otherwise, neg(r5) + +__ppe42_mulhw_ret: + + mr %r3, %r5 // put return value to r3 + + lwz %r5, 0x20(%r1) // load cr from stack + mtcr0 %r5 // restore cr + + lvd %d4, 0x08(%r1) // restore r4 & r5 from stack + lvd %d6, 0x10(%r1) // restore r6 & r7 from stack + lvd %d8, 0x18(%r1) // restore r8 & r9 from stack + + lwz %r1, 0(%r1) // restore stack pointer + + blr + + .size __ppe42_mulhw, .-__ppe42_mulhw + + diff --git a/tools/PowerPCtoPPE/ppe42_mulhwu.S b/tools/PowerPCtoPPE/ppe42_mulhwu.S new file mode 100644 index 00000000..e92ee7ec --- /dev/null +++ b/tools/PowerPCtoPPE/ppe42_mulhwu.S @@ -0,0 +1,202 @@ +/// \file ppe42_mulhwu.S +/// \brief PPC405 word multiplication instructions implemented by PPE ISA +/// +/// This file includes implementation for the following PPC405 instructions +/// mulhwu +/// +/// Note: PPE ISA specific "fused compare and branch" instructions are used +/// +/// Revision History: +/// 09-15-2014: Initial Version by daviddu +/// + + .file "ppe42_mulhwu.S" + .section ".text" + + /* + ** Code comment notation: + ** + ** msw = most-significant (high-order) word, i.e. bits 0..31 + ** lsw = least-significant (low-order) word, i.e. bits 32..63 + ** msh = most-significant (high-order) halfword, i.e. bits 0..15 + ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 + ** LZ = Leading Zeroes + ** SD = Significant Digits + ** OW = Register is overwritten, previous value is lost, + ** correct if previous value is no longer needed. + ** FU = Register is not overwritten, but its value is no longer needed, + ** in another word, the register is "free for use". + ** + ** PPE GPR Registers are: R0-R10, R13, R28-R31 + ** Volatile Registers are: R0, R3-R10 + ** Non-volatile registers are R28-R31 + */ + + /* + ** Caling Convention + ** + ** R2 and R13 are never saved or restored. In ABI or EABI application + ** these registers are constant. The other touched volatile registers + ** will be saved and restored by the subroutines. Note the caller + ** wont be saving those registers because these subroutines will be + ** instrumented into caller's body without compiler knowledge. + ** + ** Note R3 is not saved and restored because it will be changed for + ** return value anyways, the p2p script will make sure to restore it. + ** Also CR is hanlded because of compare and branch, but XER/CTR/LR + ** are not hanlded because they are untouched by the instructions used. + ** + ** Stack layout: + ** + ** 0x00 -- R1, Dedicated for Stack Pointer + ** 0x04 -- slot reserved for LR + ** 0x08 -- R4, Volatile, Private + ** 0x0c -- R5, Volatile, Private + ** 0x10 -- R6, Volatile, Private + ** 0x14 -- R7, Volatile, Private + ** 0x18 -- R8, Volatile, Private + ** 0x1c -- R9, Volatile, Private + ** 0x20 -- CR, Condition Register + ** 0x24 -- + ** + ** 0x28 -- Stack Size, Must be 8-byte aligned + */ + + /* + ** Multiplication Procedures: + ** + ** __ppe42_mulhwu(U,V) + ** __ppe42_mulhw(U,V) + ** __ppe42_mullw(U,V) + ** + ** R3:R4 = Input parameter, multipliers: U, V. + ** R3 = Output parameter, either product.msh or product.lsh. + ** R5-R9 = Temporary registers + ** + ** General Algorithm + ** + ** Using PPC405 ISA instruction 'mullhw' to emulate + ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh) + ** + ** U.msh U.lsh + ** X V.msh V.lsh + ** ------------------------ + ** A.msh A.lsh + ** B.msh B.lsh + ** C.msh C.lsh + ** D.msh D.lsh + ** ------------------------ + ** Product.msw Product.lsw + ** + ** __ppe42_mulhwu: Return Product.msh (unsigned) + ** __ppe42_mulhw: Return Product.msh (signed) + ** __ppe42_mullw: Return Product.lsh + ** + ** Precondition Check: + ** + ** if( U == 0 || V == 0 ) return P=0; + */ + +/*****************************************************************************/ + + /* + ** Multiply High Word Unsigned (__ppe42_mulhwu) + ** + ** r5 = U[16:31] or U.lsh | r5 = r3 & 0xffff + ** r3 = U[0:15] or U.msh | r3 = r3 >> 16 (r3 OW) + ** r6 = V[16:31] or V.lsh | r6 = r4 & 0xffff + ** r4 = V[0:15] or V.msh | r4 = r4 >> 16 (r4 OW) + ** + ** 4th column(drop A.lsh): + ** A = U.lsh * V.lsh [32] | r7 = r5 * r6 + ** A = A.msh [16] | r7 = r7 >> 16 (r7 OW) + ** + ** 3rd column(A = A.msh + B.lsh + C.lsh): + ** B = U.msh * U.lsh [32] | r6 = r3 * r6 (r6 OW) + ** T = B.lsh [16] | r8 = r6 & 0xffff + ** B = B.msh [16] | r6 = r6 >> 16 (r6 OW) + ** A = T + A [16] | r7 = r8 + r7 (r7 OW, r8 FU) + ** + ** C = U.lsh * V.msh [32] | r5 = r5 * r4 (r5 OW) + ** T = C.lsh [16] | r8 = r5 & 0xffff (r8 OW) + ** C = C.msh [16] | r5 = r5 >> 16 (r5 OW) + ** A = T + A [16] | r7 = r8 + r7 (r7 OW, r8 FU) + ** + ** 2nd column(A = 3rd_carry + B.msh + C.msh): + ** A = A.msh [16] | r7 = r7 >> 16 (r7 OW) + ** A = A + B [16] | r7 = r7 + r6 (r7 OW, r6 FU) + ** A = A + C [16] | r7 = r7 + r5 (r7 OW, r5 FU) + ** + ** 1st column(A = D + A): + ** D = U.msh * V.msh [32] | r3 = r3 * r4 (r3 OW, r4 FU) + ** P = D + A [32] | r5 = r3 + r7 (r3, r7 FU) + ** + ** Return P(r3=r5) as Product.msw unsigned + ** + ** Note: the implementation can be even shorter, the current + ** implementation is ensuring the overflow is avoided + ** by always adding 16 bits integer together. + */ + + .align 2 + .global __ppe42_mulhwu + .type __ppe42_mulhwu, @function + +__ppe42_mulhwu: + + stwu %r1, -0x28(%r1) // allocate stack frame + + stvd %d4, 0x08(%r1) // save off r4 & r5 in stack + stvd %d6, 0x10(%r1) // save off r6 & r7 in stack + stvd %d8, 0x18(%r1) // save off r8 & r9 in stack + + mfcr %r5 // save off cr + stw %r5, 0x20(%r1) // store cr in stack + + li %r5, 0 // r5 = 0 + cmpwibc 1, 2, %r3, 0, __ppe42_mulhwu_ret // U=0 -> ret + cmpwibc 1, 2, %r4, 0, __ppe42_mulhwu_ret // V=0 -> ret + + extrwi %r5, %r3, 16, 16 + srwi %r3, %r3, 16 + extrwi %r6, %r4, 16, 16 + srwi %r4, %r4, 16 + + mullhwu %r7, %r5, %r6 + srwi %r7, %r7, 16 + + mullhwu %r6, %r3, %r6 + extrwi %r8, %r6, 16, 16 + srwi %r6, %r6, 16 + add %r7, %r8, %r7 + + mullhwu %r5, %r5, %r4 + extrwi %r8, %r5, 16, 16 + srwi %r5, %r5, 16 + add %r7, %r8, %r7 + + srwi %r7, %r7, 16 + add %r7, %r7, %r6 + add %r7, %r7, %r5 + + mullhwu %r3, %r3, %r4 + add %r5, %r3, %r7 + +__ppe42_mulhwu_ret: + + mr %r3, %r5 // put return value to r3 + + lwz %r5, 0x20(%r1) // load cr from stack + mtcr0 %r5 // restore cr + + lvd %d4, 0x08(%r1) // restore r4 & r5 from stack + lvd %d6, 0x10(%r1) // restore r6 & r7 from stack + lvd %d8, 0x18(%r1) // restore r8 & r9 from stack + + lwz %r1, 0(%r1) // restore stack pointer + + blr + + .size __ppe42_mulhwu, .-__ppe42_mulhwu + + diff --git a/tools/PowerPCtoPPE/ppe42_mullw.S b/tools/PowerPCtoPPE/ppe42_mullw.S new file mode 100644 index 00000000..ee56df16 --- /dev/null +++ b/tools/PowerPCtoPPE/ppe42_mullw.S @@ -0,0 +1,174 @@ +/// \file ppe42_mullw.S +/// \brief PPC405 word multiplication instructions implemented by PPE ISA +/// +/// This file includes implementation for the following PPC405 instructions +/// mullw +/// +/// Note: PPE ISA specific "fused compare and branch" instructions are used +/// +/// Revision History: +/// 09-15-2014: Initial Version by daviddu +/// + + .file "ppe42_mullw.S" + .section ".text" + + /* + ** Code comment notation: + ** + ** msw = most-significant (high-order) word, i.e. bits 0..31 + ** lsw = least-significant (low-order) word, i.e. bits 32..63 + ** msh = most-significant (high-order) halfword, i.e. bits 0..15 + ** lsh = least-significant (low-order) halfword, i.e. bits 16..63 + ** LZ = Leading Zeroes + ** SD = Significant Digits + ** OW = Register is overwritten, previous value is lost, + ** correct if previous value is no longer needed. + ** FU = Register is not overwritten, but its value is no longer needed, + ** in another word, the register is "free for use". + ** + ** PPE GPR Registers are: R0-R10, R13, R28-R31 + ** Volatile Registers are: R0, R3-R10 + ** Non-volatile registers are R28-R31 + */ + + /* + ** Caling Convention + ** + ** R2 and R13 are never saved or restored. In ABI or EABI application + ** these registers are constant. The other touched volatile registers + ** will be saved and restored by the subroutines. Note the caller + ** wont be saving those registers because these subroutines will be + ** instrumented into caller's body without compiler knowledge. + ** + ** Note R3 is not saved and restored because it will be changed for + ** return value anyways, the p2p script will make sure to restore it. + ** Also CR is hanlded because of compare and branch, but XER/CTR/LR + ** are not hanlded because they are untouched by the instructions used. + ** + ** Stack layout: + ** + ** 0x00 -- R1, Dedicated for Stack Pointer + ** 0x04 -- slot reserved for LR + ** 0x08 -- R4, Volatile, Private + ** 0x0c -- R5, Volatile, Private + ** 0x10 -- R6, Volatile, Private + ** 0x14 -- R7, Volatile, Private + ** 0x18 -- R8, Volatile, Private + ** 0x1c -- R9, Volatile, Private + ** 0x20 -- CR, Condition Register + ** 0x24 -- + ** + ** 0x28 -- Stack Size, Must be 8-byte aligned + */ + + /* + ** Multiplication Procedures: + ** + ** __ppe42_mulhwu(U,V) + ** __ppe42_mulhw(U,V) + ** __ppe42_mullw(U,V) + ** + ** R3:R4 = Input parameter, multipliers: U, V. + ** R3 = Output parameter, either product.msh or product.lsh. + ** R5-R9 = Temporary registers + ** + ** General Algorithm + ** + ** Using PPC405 ISA instruction 'mullhw' to emulate + ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh) + ** + ** U.msh U.lsh + ** X V.msh V.lsh + ** ------------------------ + ** A.msh A.lsh + ** B.msh B.lsh + ** C.msh C.lsh + ** D.msh D.lsh + ** ------------------------ + ** Product.msw Product.lsw + ** + ** __ppe42_mulhwu: Return Product.msh (unsigned) + ** __ppe42_mulhw: Return Product.msh (signed) + ** __ppe42_mullw: Return Product.lsh + ** + ** Precondition Check: + ** + ** if( U == 0 || V == 0 ) return P=0; + */ + +/*****************************************************************************/ + + /* + ** Multiply Low Word (__ppe42_mullw) + ** + ** r5 = U[16:31] or U.lsh | r5 = r3 & 0xffff + ** r3 = U[0:15] or U.msh | r3 = r3 >> 16 (r3 OW) + ** r6 = V[16:31] or V.lsh | r6 = r4 & 0xffff + ** r4 = V[0:15] or V.msh | r4 = r4 >> 16 (r4 OW) + ** + ** B = U.msh * V.lsh | r3 = r3 * r6 (r3 OW) + ** B = B << 16 | r3 = r3 << 16 + ** C = U.lsh * V.msh | r4 = r5 * r4 (r4 OW) + ** C = C << 16 | r4 = r4 << 16 + ** A = U.lsh * V.lsh | r5 = r5 * r6 (r5 OW, r6 FU) + ** A = A + B | r5 = r5 + r3 (r3 FU) + ** P = A + C | r5 = r5 + r4 (r4 FU) + ** + ** Return P(r3=r5) as Product.lsw + ** + ** Note: there is no overflow case with this function + */ + + .align 2 + .global __ppe42_mullw + .type __ppe42_mullw, @function + +__ppe42_mullw: + + stwu %r1, -0x28(%r1) // allocate stack frame + + stvd %d4, 0x08(%r1) // save off r4 & r5 in stack + stvd %d6, 0x10(%r1) // save off r6 & r7 in stack + stvd %d8, 0x18(%r1) // save off r8 & r9 in stack + + mfcr %r5 // save off cr + stw %r5, 0x20(%r1) // store cr in stack + + li %r5, 0 // r5 = 0 + cmpwibc 1, 2, %r3, 0, __ppe42_mullw_ret // U=0 -> ret + cmpwibc 1, 2, %r4, 0, __ppe42_mullw_ret // V=0 -> ret + + extrwi %r5, %r3, 16, 16 + srwi %r3, %r3, 16 + extrwi %r6, %r4, 16, 16 + srwi %r4, %r4, 16 + + mullhwu %r3, %r3, %r6 + slwi %r3, %r3, 16 + + mullhwu %r4, %r5, %r4 + slwi %r4, %r4, 16 + + mullhwu %r5, %r5, %r6 + add %r5, %r5, %r3 + add %r5, %r5, %r4 + +__ppe42_mullw_ret: + + mr %r3, %r5 // put return value to r3 + + lwz %r5, 0x20(%r1) // load cr from stack + mtcr0 %r5 // restore cr + + lvd %d4, 0x08(%r1) // restore r4 & r5 from stack + lvd %d6, 0x10(%r1) // restore r6 & r7 from stack + lvd %d8, 0x18(%r1) // restore r8 & r9 from stack + + lwz %r1, 0(%r1) // restore stack pointer + + blr + + .size __ppe42_mullw, .-__ppe42_mullw + + diff --git a/tools/ppetracepp/Makefile b/tools/ppetracepp/Makefile new file mode 100644 index 00000000..41ad1cd2 --- /dev/null +++ b/tools/ppetracepp/Makefile @@ -0,0 +1,12 @@ +all: ppetracepp ppe2fsp + +ppetracepp: ppetracepp.C + g++ -m32 -O3 -w -g -I./ ppetracepp.C -o ppetracepp +# g++ -O3 -w -x c++ -fPIC -g -I./ ppetracepp.C -o ppetracepp + +ppe2fsp: ppe2fsp.c ppe2fsp_cmd.c + gcc -m32 -w -g -I./ -I../../pk/trace ppe2fsp.c ppe2fsp_cmd.c -o ppe2fsp + +clean: + rm ppetracepp ppe2fsp + diff --git a/tools/ppetracepp/jhash.h b/tools/ppetracepp/jhash.h new file mode 100755 index 00000000..128ca9a7 --- /dev/null +++ b/tools/ppetracepp/jhash.h @@ -0,0 +1,143 @@ +#ifndef _LINUX_JHASH_H +#define _LINUX_JHASH_H + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup2.c, by Bob Jenkins, December 1996, Public Domain. + * hash(), hash2(), hash3, and mix() are externally useful functions. + * Routines to test the hash are included if SELF_TEST is defined. + * You can use this free for any purpose. It has no warranty. + * + * Copyright (C) 2003 David S. Miller (davem@redhat.com) + * + * I've modified Bob's hash to be useful in the Linux kernel, and + * any bugs present are surely my fault. -DaveM + */ + +/* NOTE: Arguments are modified. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* The golden ration: an arbitrary value */ +#define JHASH_GOLDEN_RATIO 0x9e3779b9 + +/* The most generic version, hashes an arbitrary sequence + * of bytes. No alignment or length assumptions are made about + * the input key. + */ +static inline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c, len; + const u8 *k = (const u8*)key; + + len = length; + a = b = JHASH_GOLDEN_RATIO; + c = initval; + + while (len >= 12) { + a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24)); + b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24)); + c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24)); + + __jhash_mix(a,b,c); + + k += 12; + len -= 12; + } + + c += length; + switch (len) { + case 11: c += ((u32)k[10]<<24); + case 10: c += ((u32)k[9]<<16); + case 9 : c += ((u32)k[8]<<8); + case 8 : b += ((u32)k[7]<<24); + case 7 : b += ((u32)k[6]<<16); + case 6 : b += ((u32)k[5]<<8); + case 5 : b += k[4]; + case 4 : a += ((u32)k[3]<<24); + case 3 : a += ((u32)k[2]<<16); + case 2 : a += ((u32)k[1]<<8); + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + +/* A special optimized version that handles 1 or more of u32s. + * The length parameter here is the number of u32s in the key. + */ +static inline u32 jhash2(const u32 *k, u32 length, u32 initval) +{ + u32 a, b, c, len; + + a = b = JHASH_GOLDEN_RATIO; + c = initval; + len = length; + + while (len >= 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + k += 3; len -= 3; + } + + c += length * 4; + + switch (len) { + case 2 : b += k[1]; + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + + +/* A special ultra-optimized versions that knows they are hashing exactly + * 3, 2 or 1 word(s). + * + * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally + * done at the end is not done here. + */ +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += initval; + + __jhash_mix(a, b, c); + + return c; +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return jhash_3words(a, b, 0, initval); +} + +static inline u32 jhash_1word(u32 a, u32 initval) +{ + return jhash_3words(a, 0, 0, initval); +} + +#endif /* _LINUX_JHASH_H */ diff --git a/tools/ppetracepp/ppe2fsp b/tools/ppetracepp/ppe2fsp new file mode 100755 index 00000000..1f7b56f2 Binary files /dev/null and b/tools/ppetracepp/ppe2fsp differ diff --git a/tools/ppetracepp/ppe2fsp.c b/tools/ppetracepp/ppe2fsp.c new file mode 100755 index 00000000..419a8d49 --- /dev/null +++ b/tools/ppetracepp/ppe2fsp.c @@ -0,0 +1,500 @@ +#include "pk_trace.h" +#include "ppe2fsp.h" +#include "trac_interface.h" +#include +#include +#include + +#define TRACE_BUF_VERSION 0x01 /*!< Trace buffer version */ +#define TRACE_FIELDTRACE 0x4654 /*!< Field Trace - "FT" */ +#define TRACE_FIELDBIN 0x4644 /*!< Binary Field Trace - "FD" */ + +#define TRAC_TIME_REAL 0 // upper 32 = seconds, lower 32 = nanoseconds +#define TRAC_TIME_50MHZ 1 +#define TRAC_TIME_200MHZ 2 +#define TRAC_TIME_167MHZ 3 // 166666667Hz + +typedef struct +{ + trace_entry_stamp_t stamp; + trace_entry_head_t head; + union + { + uint8_t data[PK_TRACE_MAX_BINARY + 1]; //add 1 byte for padding + uint32_t parms[PK_TRACE_MAX_PARMS]; + }; + uint32_t size; +}largest_fsp_entry_t; + +typedef struct +{ + union + { + uint8_t binary_data[PK_TRACE_MAX_BINARY + 1]; + struct + { + uint8_t rsvd[(PK_TRACE_MAX_BINARY + 1) - (PK_TRACE_MAX_PARMS * sizeof(uint32_t))]; + uint32_t parms[PK_TRACE_MAX_PARMS]; + }; + }; + PkTraceEntryFooter footer; +}LargestPpeEntry; + +//convert a ppe timestamp to an fsp trace timestamp +uint64_t ppe2fsp_time(uint64_t ppe_time, uint32_t hz) +{ + uint32_t seconds; + uint32_t remainder; + uint32_t nseconds; + + //convert from ppe ticks to seconds and nanoseconds + seconds = ppe_time / hz; + remainder = ppe_time - (((uint64_t)seconds) * hz); + nseconds = (((uint64_t)remainder) * 1000000000) / hz; + return (((uint64_t)seconds) << 32) | nseconds; +} + +//Writes an fsp trace entry to the fsp trace buffer +fsp_put_entry(trace_buf_head_t* tb, largest_fsp_entry_t* fte, size_t entry_size, uint32_t bytes_left) +{ + char* buffer = ((char*)tb) + sizeof(trace_buf_head_t); + char* tb_start; + char* fte_start; + uint32_t copy_bytes; + + if(entry_size <= bytes_left) + { + tb_start = buffer + bytes_left - entry_size; + fte_start = (char*)fte; + copy_bytes = entry_size; + } + else + { + tb_start = buffer; + fte_start = ((char*)fte) + (entry_size - bytes_left); + copy_bytes = bytes_left; + } + + memcpy(tb_start, fte_start, copy_bytes); +} + + +//convert a ppe trace entry to an fsp trace entry +size_t pte2fte(PkTraceBuffer* ptb, + LargestPpeEntry* pte, + size_t pte_size, + largest_fsp_entry_t* fte, + uint64_t ppe_time64) +{ + size_t entry_size; + PkTraceGeneric* pte_footer = &pte->footer.generic; + uint32_t format; + uint32_t hash32; + uint32_t hash32_partial; + uint32_t* parm_start; + uint32_t parm_bytes; + uint64_t fsp_time64; + + //convert the ppe trace time to an fsp trace time + fsp_time64 = ppe2fsp_time(ppe_time64, ntohl(ptb->hz)); + + //fill in the 64 bit timestamp + fte->stamp.tbh = htonl((uint32_t)(fsp_time64 >> 32)); + fte->stamp.tbl = htonl((uint32_t)(fsp_time64 & 0x00000000ffffffffull)); + + //use the ppe instance id as the thread id. + fte->stamp.tid = htonl((uint32_t)ntohs(ptb->instance_id)); + + //merge the hash prefix and the string_id fields together for a 32 bit hash value + hash32 = ((uint32_t)ntohs(ptb->hash_prefix)) << 16; + hash32 |= pte_footer->string_id; + fte->head.hash = htonl(hash32); + + //generate the 32bit hash value for a partial trace entry in case it's needed + hash32_partial = ((uint32_t)ntohs(ptb->hash_prefix)) << 16; + hash32_partial |= ntohs(ptb->partial_trace_hash); + + //set the line number to 1 + fte->head.line = htonl(1); + + //determine the FSP trace format + format = PK_GET_TRACE_FORMAT(pte_footer->time_format.word32); + if(format == PK_TRACE_FORMAT_BINARY) + { + fte->head.tag = htons(TRACE_FIELDBIN); + } + else + { + fte->head.tag = htons(TRACE_FIELDTRACE); + } + + parm_start = (uint32_t*)(((char*)pte) + (sizeof(LargestPpeEntry) - pte_size)); + + //fill in the parameters/binary data and size at the end + switch(format) + { + + case PK_TRACE_FORMAT_TINY: + //one or 0 parameters + entry_size = sizeof(trace_entry_stamp_t) + + sizeof(trace_entry_head_t) + + sizeof(uint32_t); + fte->parms[0] = htonl((uint32_t)(pte_footer->parm16)); + fte->head.length = htons(sizeof(uint32_t)); + parm_bytes = 0; + break; + + case PK_TRACE_FORMAT_BIG: + //1 - 4 parameters + // + //If the trace entry data is incomplete (not all parm data + //had been written at the time the trace was captured) then + //we will write a trace to the fsp buffer that says + //"PARTIAL TRACE ENTRY. HASH_ID = %d" + if(pte_footer->complete) + { + parm_bytes = pte_footer->bytes_or_parms_count * sizeof(uint32_t); + fte->head.length = htons(parm_bytes + sizeof(uint32_t)); + entry_size = sizeof(trace_entry_stamp_t) + + sizeof(trace_entry_head_t) + + parm_bytes + sizeof(uint32_t); + } + else + { + parm_bytes = 0; + entry_size = sizeof(trace_entry_stamp_t) + + sizeof(trace_entry_head_t) + + sizeof(uint32_t); + fte->parms[0] = fte->head.hash; //already corrected for endianess + fte->head.hash = htonl(hash32_partial); + fte->head.length = htons(sizeof(uint32_t)); + } + break; + + case PK_TRACE_FORMAT_BINARY: + //If the trace entry data is incomplete (not all parm data + //had been written at the time the trace was captured) then + //we will write a trace to the fsp buffer that says + //"PARTIAL TRACE ENTRY. HASH_ID = %d" + if(pte_footer->complete) + { + parm_bytes = pte_footer->bytes_or_parms_count; + fte->head.length = htons((uint16_t)parm_bytes); + entry_size = sizeof(trace_entry_stamp_t) + + sizeof(trace_entry_head_t) + + parm_bytes; + + //pad to 4 byte boundary + entry_size = (entry_size + 3) & ~3; + } + else + { + parm_bytes = 0; + entry_size = sizeof(trace_entry_stamp_t) + + sizeof(trace_entry_head_t) + + sizeof(uint32_t); + fte->parms[0] = fte->head.hash; + fte->head.hash = htonl(hash32_partial); + fte->head.length = htons(sizeof(uint32_t)); + fte->head.tag = htons(TRACE_FIELDTRACE); + } + break; + + + default: + entry_size = 0; + parm_bytes = 0; + break; + } + + //copy parameter bytes to the fsp entry if necessary + if(parm_bytes) + { + memcpy(fte->data, parm_start, parm_bytes); + } + + //add the entry size to the end + if(entry_size) + { + uint32_t new_entry_size = entry_size + sizeof(uint32_t); + *((uint32_t*)(((char*)fte) + entry_size)) = htonl(new_entry_size); + entry_size = new_entry_size; + } + + return entry_size; +} + +//retrieve a ppe trace entry from a ppe trace buffer +size_t ppe_get_entry(PkTraceBuffer* tb, uint32_t offset, LargestPpeEntry* pte) +{ + uint32_t mask = ntohs(tb->size) - 1; + PkTraceEntryFooter* footer; + size_t entry_size; + size_t parm_size; + char* dest = (char*)pte; + uint32_t format; + uint32_t start_index; + uint32_t bytes_left; + uint32_t bytes_to_copy; + + //Find the footer in the circular buffer + footer = (PkTraceEntryFooter*)(&tb->cb[(offset - sizeof(PkTraceEntryFooter)) & mask]); + + //always correct endianess for the time and string id words + pte->footer.generic.time_format.word32 = ntohl(footer->generic.time_format.word32); + pte->footer.generic.string_id = ntohs(footer->generic.string_id); + + //only need to byte swap the parm16 value if this is a tiny format + pte->footer.generic.parm16 = footer->generic.parm16; + + //use footer data to determine the length of the binary data or parameters + format = PK_GET_TRACE_FORMAT(pte->footer.generic.time_format.word32); + switch(format) + { + case PK_TRACE_FORMAT_TINY: + pte->footer.generic.parm16 = ntohs(pte->footer.generic.parm16); + parm_size = 0; + entry_size = sizeof(PkTraceEntryFooter); + break; + + case PK_TRACE_FORMAT_BIG: + parm_size = pte->footer.generic.bytes_or_parms_count * sizeof(uint32_t); + entry_size = sizeof(PkTraceEntryFooter); + break; + + case PK_TRACE_FORMAT_BINARY: + parm_size = pte->footer.generic.bytes_or_parms_count; + entry_size = sizeof(PkTraceEntryFooter); + break; + + default: + entry_size = 0; + parm_size = 0; + break; + } + + //pad to 8 byte boundary + parm_size = (parm_size + 7) & ~0x00000007ul; + + //add the parameter size to the total entry size + entry_size += parm_size; + + //copy the entry from the circular buffer to pte + start_index = (offset - entry_size) & mask; + bytes_left = ntohs(tb->size) - start_index; + + //only copy up to the end of the circular buffer + if(parm_size < bytes_left) + { + bytes_to_copy = parm_size; + } + else + { + bytes_to_copy = bytes_left; + } + + dest += sizeof(LargestPpeEntry) - entry_size; + memcpy(dest, &tb->cb[start_index], bytes_to_copy); + + //now copy the rest of the data starting from the beginning of the + //circular buffer. + if(bytes_to_copy < parm_size) + { + memcpy(dest + bytes_to_copy, tb->cb, parm_size - bytes_to_copy); + } + + //return the size of the entry + return entry_size; +} + +//convert a ppe trace buffer to an fsp trace buffer +int ppe2fsp(void* in, unsigned long in_size, void* out, unsigned long* io_size) +{ + PkTraceBuffer* ptb = (PkTraceBuffer*)in; + trace_buf_head_t* ftb = (trace_buf_head_t*)out; + uint32_t ppe_bytes_left; + uint32_t fsp_bytes_left; + int rc = 0; + uint32_t ptb_offset; + PkTraceEntryFooter* ptb_te; + uint64_t ppe_time64; + uint32_t fte_size, pte_size; + uint32_t fsp_te_count = 0; + uint32_t time_diff32, prev_time32, new_time32; + PkTraceGeneric* pte_footer; + largest_fsp_entry_t fte; + LargestPpeEntry pte; + + do + { + if(!ptb || !ftb || !io_size) + { + rc = P2F_NULL_POINTER; + break; + } + + if(ntohs(ptb->version) != PK_TRACE_VERSION) + { + rc = P2F_INVALID_VERSION; + break; + } + + //check that the input buffer is large enough to have a ppe trace buffer + if(in_size < (((uint32_t)(&ptb->cb[0])) - (uint32_t)(ptb))) + { + rc = P2F_INPUT_BUFFER_TOO_SMALL; + break; + } + + //initialize some locals + fsp_bytes_left = *io_size - sizeof(trace_buf_head_t); + ppe_bytes_left = ntohs(ptb->size); + ptb_offset = ntohl(ptb->state.offset); + + //make sure the ppe buffer size is a power of two + if((ppe_bytes_left - 1) & ppe_bytes_left) + { + //size is not a power of two + rc = P2F_INVALID_INPUT_SIZE; + break; + } + + //The ppe bytes field should always be a multiple of 8 + if(ptb_offset & 0x7) + { + rc = P2F_INVALID_PPE_OFFSET; + break; + } + + //make sure there is enough room for the fsp header + if(*io_size < sizeof(trace_buf_head_t)) + { + rc = P2F_OUTPUT_BUFFER_TOO_SMALL; + break; + } + + + //initialize the fsp header + ftb->ver = TRACE_BUF_VERSION; + ftb->hdr_len = sizeof(trace_buf_head_t); + ftb->time_flg = TRAC_TIME_REAL; + ftb->endian_flg = 'B'; //big endian + memcpy(ftb->comp, ptb->image_str, sizeof(ftb->comp)); + ftb->times_wrap = htonl(1); + ftb->size = htonl(sizeof(trace_buf_head_t) + sizeof(uint32_t)); + ftb->next_free = htonl(sizeof(trace_buf_head_t)); + ftb->extracted = htonl(0); + ftb->te_count = htonl(0); + + //find the latest timestamp so that we can work back from there + ppe_time64 = ((uint64_t)(ntohl(ptb->state.tbu32) & 0xefffffff)) << 32; + pte_size = ppe_get_entry(ptb, ptb_offset, &pte); + prev_time32 = PK_GET_TRACE_TIME(pte.footer.generic.time_format.word32); + ppe_time64 |= prev_time32; + + //process all of the input bytes one trace entry at a time + //from newest to oldest (backwards) until we run out of input bytes or + //we run out of output space. + while(1) + { + //check if we have enough data for a ppe footer + if(ppe_bytes_left < sizeof(PkTraceEntryFooter)) + { + break; + } + + //get the next ppe entry + pte_size = ppe_get_entry(ptb, ptb_offset, &pte); + + //Stop if there are no more entries to retrieve from the ppe trace buffer + if(!pte_size) + { + break; + } + pte_footer = &pte.footer.generic; + + //mark the entry as incomplete if we didn't have enough data + //for the entire entry + if(pte_size > ppe_bytes_left) + { + pte_footer->complete = 0; + ppe_bytes_left = 0; + } + else + { + ppe_bytes_left -= pte_size; + ptb_offset -= pte_size; + } + + //Calculate the 64 bit timestamp for this entry.... + //On PPE, getting the timestamp is not done atomically with writing + //the entry to the buffer. This means that an entry with an older + //timestamp could possibly be added to the buffer after an entry + //with a newer timestamp. Detect this condition by checking if the + //time difference is bigger than the max difference. The max + //difference is enforced by the PPE having a trace added on a + //shorter time boundary (using a timer). + new_time32 = PK_GET_TRACE_TIME(pte_footer->time_format.word32); + time_diff32 = prev_time32 - new_time32; + + if(time_diff32 > ntohl(ptb->max_time_change)) + { + time_diff32 = new_time32 - prev_time32; + ppe_time64 += time_diff32; + } + else + { + ppe_time64 -= time_diff32; + } + + //save off the lower 32bit timestamp for the next iteration + prev_time32 = new_time32; + + //convert the ppe trace entry to an fsp trace entry + fte_size = pte2fte(ptb, &pte, pte_size, &fte, ppe_time64); + + //fit as much of the entry into the fsp trace buffer as possible + fsp_put_entry(ftb, &fte, fte_size, fsp_bytes_left); + + //update the fsp trace entry count + fsp_te_count++; + + //stop if there is no more room left in the fsp trace buffer + if(fte_size >= fsp_bytes_left) + { + fsp_bytes_left = 0; + ftb->times_wrap = htonl(1); + break; + } + else + { + fsp_bytes_left -= fte_size; + } + }//while(1) + + + //shift the trace data up if there is space to do so + if(fsp_bytes_left) + { + char* dest = ((char*)ftb) + sizeof(trace_buf_head_t); + char* src = dest + fsp_bytes_left; + size_t data_size = *io_size - sizeof(trace_buf_head_t) - fsp_bytes_left; + memmove(dest, src, data_size); + } + + //update the fsp header to reflect the true size and entry count + ftb->te_count = htonl(fsp_te_count); + + //inform the caller of how many bytes were actually used + *io_size -= fsp_bytes_left; + + //shrink the size field to what we actually ended up using + ftb->size = htonl(*io_size); + + }while(0); + + return rc; +} + + + diff --git a/tools/ppetracepp/ppe2fsp.h b/tools/ppetracepp/ppe2fsp.h new file mode 100644 index 00000000..04eee616 --- /dev/null +++ b/tools/ppetracepp/ppe2fsp.h @@ -0,0 +1,10 @@ + + +#define P2F_NULL_POINTER 1 +#define P2F_INVALID_INPUT_SIZE 2 +#define P2F_INVALID_PPE_OFFSET 3 +#define P2F_OUTPUT_BUFFER_TOO_SMALL 4 +#define P2F_INPUT_BUFFER_TOO_SMALL 5 +#define P2F_INVALID_VERSION 6 + +int ppe2fsp(void* in, unsigned long in_size, void* out, unsigned long* io_size); diff --git a/tools/ppetracepp/ppe2fsp_cmd.c b/tools/ppetracepp/ppe2fsp_cmd.c new file mode 100644 index 00000000..c99734d8 --- /dev/null +++ b/tools/ppetracepp/ppe2fsp_cmd.c @@ -0,0 +1,115 @@ +#include +#include "ppe2fsp.h" +#include "pk_trace.h" + +#define MAX_INPUT_SIZE 0x2040 //8k +#define MAX_OUTPUT_SIZE (4 * MAX_INPUT_SIZE) + +char* inbuf[MAX_INPUT_SIZE]; +char* outbuf[MAX_OUTPUT_SIZE]; +; +int main(int argc, char** argv) +{ + FILE* in; + FILE* out; + size_t input_size; + size_t output_size; + size_t bytes_written; + int rc = -1; + + do + { + if(argc > 3) + { + fprintf(stderr, "Usage: %s [input file] [output file]\n", argv[0]); + } + + if(argc < 3) + { + out = stdout; + } + else + { + //open the output file for writing + out = fopen(argv[2], "w"); + if(!out) + { + perror("failed to open file for writing"); + break; + } + } + + if(argc < 2) + { + in = stdin; + } + else + { + //open the input file for reading + in = fopen(argv[1], "r"); + if(!in) + { + perror("failed to open file for reading"); + break; + } + } + + //read the input stream until we reach EOF or the max size + input_size = fread(inbuf, 1, MAX_INPUT_SIZE, in); + if(!feof(in)) + { + if(ferror(in)) + { + perror("failed to read input file"); + break; + } + else + { + fprintf(stderr, "Input stream exceeds max size of %d bytes. Exiting.\n", MAX_INPUT_SIZE); + break; + } + } + + output_size = MAX_OUTPUT_SIZE; + + //Actual size of output buffer will be set upon successful completion + rc = ppe2fsp(inbuf, input_size, outbuf, &output_size); + if(rc) + { + fprintf(stderr, "Failed converting ppe trace to fsp trace. rc = %d\n", rc); + if(rc == P2F_INVALID_VERSION) + { + fprintf(stderr, "PPE trace buffer must be version %d.\n", PK_TRACE_VERSION); + } + break; + } + + rc = -1; + //operation was successful. Write out the fsp trace data + bytes_written = fwrite(outbuf, 1, output_size, out); + if(bytes_written != output_size) + { + if(ferror(out)) + { + perror("Failed to write output stream"); + break; + } + fprintf(stderr, "Failure: Only able to write %d of %d bytes to output stream\n", bytes_written, output_size); + break; + } + + fclose(in); + fclose(out); + fclose(stderr); + + rc = 0; + }while(0); + + return rc; +} + + + + + + diff --git a/tools/ppetracepp/ppetracepp b/tools/ppetracepp/ppetracepp new file mode 100755 index 00000000..52269147 Binary files /dev/null and b/tools/ppetracepp/ppetracepp differ diff --git a/tools/ppetracepp/ppetracepp.C b/tools/ppetracepp/ppetracepp.C new file mode 100755 index 00000000..c5378fe7 --- /dev/null +++ b/tools/ppetracepp/ppetracepp.C @@ -0,0 +1,922 @@ + +/* +# *** ppetracepp - a fsp/common Linux trace pre processor +# this one replaces the trace strings by the corresponding hash value +# (i.e. the complete call to trace_adal_hash is replaced) + +# *** Usage +# +# prepend compiler call with the call of this pre processor, i.e if you have +# $(CC) $(CFLAGS) -o $@ $< +# in your Makefile change it to this: +# ppetracepp $(CC) $(CFLAGS) -o $@ $< +# ppetracepp will use "$(CC) -E" to call the C pre processor "cpp". +# you can set a env var "REALCPP" to the name of a program to select +# a different programm as cpp +# +# ppetracepp creates a file "$target.hash" with the trace strings and the hash values. +# +# to enable debug mode set envvar PPETRACEPPDEBUG to 1 or give '-d' as first arg + +# *** Change History +# +# 2003-02-26 RBa created from scratch +# 2003-02-28 RBa add C++ support (C++ interface uses own type for the hash) +# 2003-05-28 RBa if cc should link instead of compile just call compiler +# 2003-07-11 AGe Change search alg. slightly and put just format back +# 2003-07-25 RBa just call gcc if called to link instead to compile +# eat argument for option -x +# 2003-11-26 RBa fix c/c++ algo: compile as c++ if realcc=*g++ +# 2004-02-02 RBa remove explicit test whether source file is readable +# it is obsolete and might lead to an error if afs is used +# 2004-02-13 RBa add support for dependency generation (-MD/-MG, -MF) +# don't prepend './' to object filename +# 2006-04-19 RBa rewrite trace_adal_write_all support, handle C and C++ the same +# 2006-05-24 RBa fix handling of missing -o ; add TRAC_PPVER for macro/API version +# 2006-09-15 RBa add handling of \" in trace format strings ; reduce non-error output +# put object file in current dir if no -o given +# 2007-03-22 RBa handle more gcc options (-i) ; protect " in call to shell +# store output of cpp as "unmodified" output for debug +# only write string/hash file if strings found +# 2012-09-24 hlava Rewritten as C program for better build performance (was perl) +*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +typedef u_int32_t u32 ; +typedef u_int8_t u8 ; +#include + +using namespace std; + +static string version = "2.0"; +static string macro_version = "1"; + +static bool debug = false; +#define dprintf(format, ...) if (debug) { printf(format, ##__VA_ARGS__); fflush(stdout); } +static map hashtab; +static string hashtype; +static string hashtype_suffix; + +static string tmp; +static string cmd; +static FILE* CPP = NULL; // pipe from preprocessor +static FILE* CC = NULL; // pipe to compiler +static FILE* DEBUG = NULL; +static FILE* DEBUGIN = NULL; + +//***************************************************************************** +// replace_substr +//***************************************************************************** +void replace_substr(std::string& str, const std::string& oldStr, const std::string& newStr) +{ + size_t pos = 0; + while((pos = str.find(oldStr, pos)) != std::string::npos) + { + str.replace(pos, oldStr.length(), newStr); + pos += newStr.length(); + } + +} + +//***************************************************************************** +// fileparse +//***************************************************************************** +void fileparse(const string& in_str, string& name, string& dir, string& suff) +{ + string str(in_str); + size_t pos; + name = ""; + dir = ""; + suff = ""; + pos = str.find_last_of('.'); + if (pos != string::npos) + { + suff = str.substr(pos); + str = str.substr(0, pos); + } + pos = str.find_last_of('/'); + if (pos != string::npos) + { + name = str.substr(pos+1); + str = str.substr(0, pos+1); + } + dir = str; +} + +static const size_t TRACE_ADAL_HASH_LEN = 14; +//***************************************************************************** +// chop_up_line +//***************************************************************************** +bool chop_up_line(string& in_line, string& prefix, string& strings, string& salt, string& suffix) +{ + // First see if this line matches the pattern we're looking for + // Since this will return false 95%+ of the time this function it called, we do it + // before doing any other init for performance reasons. + size_t pos = in_line.find("trace_adal_hash"); + if (pos == string::npos) { return(false); } + + // trace_adal_hash ( "..." ".." "..." , 2 ) + // regex: PREFIX 'trace_adal_hash' space '(' space STRINGS space ',' space NUMBER space ')' SUFFIX + // STRINGS: '"' .* '"' space? + + + // Original perl magic incantation: + // while($line =~ m/^(.*?)trace_adal_hash\s*\(\s*((".*?(? $1 = everything up to the word "trace_adal_hash" + // trace_adal_hash = delimiter + // \s*\(\s* = delimiter = <0-n whitespace chars>, left paren, <0-n whitespace chars> + // ((".*?(? $2 = double-quote, some chars up to last closing double-quote ($3 used for nested regex) + // ,\s* = delimiter = comma followed by some whitespace + // (-?\d+)\s*\)(.*) => $4 and $5 + // $/) = end of the line input string + string line(in_line); + prefix = ""; + strings = ""; + salt = ""; + suffix = ""; + size_t pos1; + size_t pos2; + size_t pos3; + + pos1 = pos + 15; // pos1 = after "trace_adal_hash" + pos2 = line.find("(", pos1); + if (pos2 == string::npos) { return(false); } + ++pos2; + pos3 = line.find("\"", pos2); + if (pos3 == string::npos) { return(false); } + dprintf("--------\nchop_up_line: Passed basic checks. line= %s\n", line.c_str()); + dprintf("pos1=%d, pos2=%d, pos3=%d\n", pos1, pos2, pos3); + if ((pos1 != (pos2-1)) && (line.find_first_not_of(" \t", pos1, (pos2-pos1)+1) != string::npos)) { return(false); } //non-whitespace? + if ((pos2 != pos3) && (line.find_first_not_of(" \t", pos2, (pos3-pos2)) != string::npos)) { return(false); } //non-whitespace? + + // Get the prefix data + dprintf(">chop_up_line(\"%s\",...)\n", line.c_str()); + prefix = line.substr(0, pos); + line = line.substr(pos + TRACE_ADAL_HASH_LEN); + dprintf(" prefix=\"%s\"\n", prefix.c_str()); + + // Get the strings and join/fix them: Store all strings between paired double-quotes up to the + // first comma not inside a string + pos = line.find_first_of('('); + if (pos == string::npos) { return(false); } + line = line.substr(pos + 1); + strings = ""; + while(!line.empty()) + { + pos = line.find_first_of(','); + pos1 = line.find_first_of('"'); + if ((pos1 == string::npos) || ((pos != string::npos) && (pos < pos1))) { break; } // found comma before next quote + pos2 = line.find_first_of('"', (pos1+1)); + if (pos2 == string::npos) { return(false); } // unbalanced quotes! + while(line[pos2-1] == '\\') // skip escaped quotes in the string (they're not the ending quote) + { + pos2 = line.find_first_of('"', (pos2+1)); + if (pos2 == string::npos) { return(false); } // unbalanced quotes! + } + if (!strings.empty()) { strings += " "; } + strings += line.substr(pos1, (pos2-pos1)+1); + line = line.substr(pos2+1); + } + replace_substr(strings, "\" \"", ""); + replace_substr(strings, "\\\"", "ESCAPEDQUOTE"); + replace_substr(strings, "\"", ""); + replace_substr(strings, "ESCAPEDQUOTE", "\""); + // Remove trailing whitespace ah20130717 + pos = strings.find_last_not_of(" \t\n"); + if ((pos != string::npos) && (pos < (strings.length()-1))) + { + strings = strings.substr(0, pos+1); + } + + dprintf(" strings>%s<\n", strings.c_str()); + + // Get the salt + pos = line.find(","); + if (pos != string::npos) { line = line.substr(pos+1); } + pos = line.find_first_of(')'); + if (pos == string::npos) { return(false); } + salt = line.substr(0, pos); + line = line.substr(pos+1); + //dprintf(" salt=\"%s\"\n", salt.c_str()); + pos = salt.find_first_not_of(" \t\n"); + if (pos == string::npos) { return(false); } + salt = salt.substr(pos); + pos = salt.find_last_not_of(" \t\n"); + if (pos == string::npos) { return(false); } + salt = salt.substr(0, pos+1); + dprintf(" salt=\"%s\"\n", salt.c_str()); + + // Get the suffix (i.e. the rest) + suffix = line; + if (suffix[suffix.length()-1] == '\n') { suffix = suffix.substr(0, suffix.length()-1); } + dprintf(" suffix=\"%s\"\nget_hash(\"%s\",%d)\n", str.c_str(), salt_num); + + // Call jhash function to get the hash value + hash_num = jhash((void*)str.c_str(), str.length(), salt_num); + dprintf("jhash() returned: %u\n", hash_num); + sprintf(buf, "%u", hash_num & 0x0000ffff); + hash16 = buf; + sprintf(buf, "%u", ((salt_num << 16) | (hash_num & 0x0000ffff))); + hash32 = buf; + + // validate the hash value + size_t pos = hash32.find_first_not_of("0123456789"); + if (pos != string::npos) + { + fprintf(stderr, "trexhash error: %s\n", hash32.c_str()); + fprintf(stderr, "for call <<%s>>\n", cmd.c_str()); + exit(1); + } + +//removing this since it doesn't seem to have any affect on the output +#if 0 + // If hash is empty, use the sum of the ord values in the original string + if ((hash == "")||(hash == "0")) + { + unsigned int len = str.length(); + unsigned int hash_num = 0; + //unsigned char conv_buf[2] = { '\0', '\0' }; + u_int8_t conv_num; + for (unsigned int i=0; i < len; ++i) + { + //conv_buf[0] = str[i]; + conv_num = (u_int8_t)str[i]; + hash_num += (unsigned int)conv_num; + } + } +#endif + + dprintf("& rhash, string& line, string& out_line) +{ + // NOTE: "line" arg may get modified by this function! Caller must not assume it's unchanged. + string format; + string prefix; + string strings; + string tmp; + string salt; + string hash16; + string hash32; + int salt_num; + int format_salt; + string suffix; + string write_all_suffix; + size_t pos; + + out_line = ""; + // trace_adal_hash ( "..." ".." "..." , 2 ) + // regex: PREFIX 'trace_adal_hash' space '(' space STRINGS space ',' space NUMBER space ')' SUFFIX + // STRINGS: '"' .* '"' space? + + //while($line =~ m/^(.*?)trace_adal_hash\s*\(\s*((".*?(? 1) arg = argv[1]; + if ((argc < 2) || (arg == "-h")) + { + fprintf(stderr, "usage: %s realcompiler compileroptions -o target source\n", argv[0]); + exit(9); + } + string realcc(argv[argi++]); + string cctype("c++"); + bool optx_found = false; + + if (realcc == "-d") + { + debug = true; + realcc = argv[argi++]; + } + + // wait until -d options is handled before checking $debug + dprintf("ppetracepp version %s - API/macro version %s\n", version.c_str(), macro_version.c_str()); + + p_env = getenv("REALCPP"); + string realcpp; + if (p_env) + realcpp = p_env; + if (realcpp.empty()) + { + dprintf("cannot find cpp, using -E\n"); + realcpp = realcc; + realcpp += " -E"; + } + dprintf("realcpp is %s\n", realcpp.c_str()); + +//------------------------------------------------------------------------------ +// parse all the arguments +//------------------------------------------------------------------------------ +string source; +string object; +vector ccopts; +vector cppopts; +bool dodeps = false; +string depfile; +string pfx; +string sfx; +int origargi = argi; +for( ; argi < argc; ++argi) +{ + arg = argv[argi]; + dprintf("Processing argv[%d]: \"%s\"\n", argi, arg.c_str()); + if (arg.length() > 2) + { + pfx = arg.substr(0,2); + sfx = arg.substr(arg.length()-2); + } + else + { + pfx = arg; + sfx = arg; + } + dprintf(" pfx: \"%s\" sfx: \"%s\"\n", pfx.c_str(), sfx.c_str()); + + if (pfx == "-o") + { + if (! object.empty()) + { + fprintf(stderr, "two -o options, aborting\n"); + exit(1); + } + if (arg.length() > 2) + { + object = sfx; + } + else + { + object = argv[++argi]; + } + dprintf("object is now %s\n", object.c_str()); + } + else if (arg == "-c") + { + // don't call cpp with -c, this is for the compiler + ccopts.push_back(arg); + dprintf("found -c option\n"); + } + else if (pfx == "-l") + { + // cpp doesn't need library arguments + cppopts.push_back(arg); + } + else if (pfx == "-i") + { + // option takes an argument, handle it too + optarg = argv[argi++]; + ccopts.push_back(arg); + ccopts.push_back(optarg); + cppopts.push_back(arg); + cppopts.push_back(optarg); + dprintf("found option '%s %s'\n", arg.c_str(), optarg.c_str()); + } + else if ((arg == "-L") || + (arg == "-I") || + (arg == "-x") || + (arg == "-b") || + (arg == "-B") || + (arg == "-V") || + (arg == "-D") || + (arg == "--param") || + (arg == "-MQ") || + (arg == "-MT")) + { + // option takes an argument, handle it too + optarg = argv[argi++]; + ccopts.push_back(arg); + ccopts.push_back(optarg); + cppopts.push_back(arg); + cppopts.push_back(optarg); + dprintf("found option '%s %s'\n", arg.c_str(), optarg.c_str()); + if (arg == "-x") + { + // option x sets the language - c or c++ + if ((optarg != "c") && (optarg != "c++") && (optarg != "assembler-with-cpp")) + { + fprintf(stderr, "cannot process language '%s', aborting\n", optarg.c_str()); + exit(1); + } + cctype = optarg; + optx_found = true; + } + } + else if ((arg == "-MD")||(arg == "-MG")) + { + // gen deps + dodeps = true; + dprintf("found %s, creating dependencies\n", arg.c_str()); + } + else if (arg == "-MF") + { + // set dependencies file + depfile = argv[argi++]; + dprintf("set dependencies file to '%s'\n", depfile.c_str()); + } + else if (arg[0] == '-') + { + // arg starts with - so it's an option + ccopts.push_back(arg); + cppopts.push_back(arg); + dprintf("found option '%s'\n", arg.c_str()); + } + else if ((sfx == ".a") || + (sfx == ".o")) + { + // an object or archive, ignore this but give it to cc + ccopts.push_back(arg); + dprintf("found object/archive '%s'\n", arg.c_str()); + } + else if ((sfx == ".c") || + (sfx == ".C") || + (sfx == ".S") || + (arg.substr(arg.length()-4) == ".cpp") || + (arg.substr(arg.length()-4) == ".cxx")) + { + // the source file(s). we should only get one + if (!source.empty()) + { + fprintf(stderr, "don't know to handle two source files, aborting\n"); + exit(1); + } + source = arg; + // put the - (for read-from-stdin) where the source file was + // (order is important!) + ccopts.push_back("-"); + dprintf("found source file %s\n", source.c_str()); + } + else if (access(arg.c_str(), F_OK)) + { + // option but not a file, an unknown option? + ccopts.push_back(arg); + cppopts.push_back(arg); + dprintf("found unknown option '%s'\n", arg.c_str()); + } +} + +//------------------------------------------------------------------------------ +// set other parameters based on arguments specified +//------------------------------------------------------------------------------ +if (source.empty()) +{ + // this might be a call to link a program instead of compile a source (or asm source) + dprintf("NOTME: starting as cc '%s ...'\n", realcc.c_str()); + execvp(realcc.c_str(), &(argv[origargi])); + fprintf(stderr, "ERROR: returned from execvp() call to run %s\n", realcc.c_str()); +} +if (object.empty()) +{ + dprintf("no object file given, default to source name\n"); + // gcc builds object name from source name if no -o given, replacing + // suffix with .o. The file is placed in the current directory, + // not in the source directory! + string n; + string d; + string s; + fileparse(source, n, d, s); + if (!n.empty() && !s.empty()) + { + object = n + ".o"; + dprintf("tracpp: guessing object name %s\n", object.c_str()); + dprintf(" from source name %s\n", source.c_str()); + } + else + { + fprintf(stderr, "Unable to determine Source File Name\n"); + exit(1);; + } +} + +// set value of trace hash according to language +// check source file extension if no explicit -x option given +if (!optx_found) +{ + if (realcc.find("g++") != string::npos) + { + dprintf("compiler language: C++ (from compiler name)\n"); + cctype = "c++"; + } + else + { + if (source.substr(source.length()-2) == ".c") + { + dprintf("compiler language: C (from source file extension)\n"); + cctype = "c"; + } + else if (source.substr(source.length()-2) == ".S") + { + dprintf("compiler language: assembly (from source file extension)\n"); + cctype = "assembler-with-cpp"; + } + else + { + dprintf("compiler language: C++ (default)\n"); + cctype = "c++"; + } + } +} +else +{ + dprintf("compiler language: %s (from option '-x')\n", cctype.c_str()); +} + +if (cctype == "c") +{ + hashtype = "(unsigned short)"; + hashtype_suffix = "U"; +} +else if (cctype == "assembler-with-cpp") +{ + hashtype = ""; + hashtype_suffix = ""; +} +else +{ + hashtype = "(trace_hash_val)"; + hashtype_suffix = "U"; +} +// define TRAC_PPETRACEPP for macros +tmp = "-DTRAC_PPETRACEPP -DTRAC_PPVER="; +tmp += macro_version; +cppopts.push_back(tmp); +if (dodeps) +{ + if (depfile.empty()) + { + if ((p_env = getenv("DEPENDENCIES_OUTPUT")) != NULL) + { + depfile = p_env; + } + else if ((p_env = getenv("SUNPRO_DEPENDENCIES")) != NULL) + { + depfile = p_env; + } + else + { + depfile = object; + if (depfile.substr(depfile.length()-2) == ".o") + { + depfile = depfile.substr(0, depfile.length()-2); + depfile += ".d"; + } + } + } + tmp = "-MD -MF "; + tmp += depfile; + cppopts.push_back(tmp); +} + +//------------------------------------------------------------------------------ +// start cpp +//------------------------------------------------------------------------------ +cmd = realcpp; +for(vector::iterator p = cppopts.begin(); p != cppopts.end(); ++p) +{ + cmd += " "; + cmd += *p; +} +cmd += " "; +cmd += source; +cmd += " -o-"; +dprintf("starting as cpp '%s'\n", cmd.c_str()); +CPP = popen(cmd.c_str(), "r"); +if (CPP == NULL) +{ + fprintf(stderr, "cannot start cpp '%s'\n", realcpp.c_str()); + perror(""); + exit(1); +} + +//------------------------------------------------------------------------------ +// start cc. manually set language as source file extension not available to cc +//------------------------------------------------------------------------------ +string type_str = ""; +if (!optx_found) +{ + // no option -x given by caller, set manually + type_str = "-x "; + type_str += cctype; +} +cmd = realcc; +cmd += " "; +cmd += type_str; +for(vector::iterator p = ccopts.begin(); p != ccopts.end(); ++p) +{ + cmd += " "; + cmd += *p; +} +cmd += " -o "; +cmd += object; +dprintf("starting as cc '%s'\n", cmd.c_str()); +CC = popen(cmd.c_str(), "w"); +if (CC == NULL) +{ + fprintf(stderr, "cannot start cc '%s'\n", realcc.c_str()); + perror(""); + exit(1); +} + +string modifiedfile; +string unmodifiedfile; +if (debug) +{ + modifiedfile = object + ".debug"; + DEBUG = fopen(modifiedfile.c_str(), "w"); + if (DEBUG == NULL) + { + string msg = "cannot open file "; + msg += modifiedfile; + perror(msg.c_str()); + modifiedfile = ""; + } + else + { + fprintf(stderr, "writing preprocessed source to %s\n", modifiedfile.c_str()); + } + unmodifiedfile = object + ".debug_in"; + DEBUGIN = fopen(unmodifiedfile.c_str(), "w"); + if (DEBUGIN == NULL) + { + string msg = "cannot open file "; + msg += unmodifiedfile; + perror(msg.c_str()); + unmodifiedfile = ""; + } + else + { + fprintf(stderr, "writing unprocessed source to %s\n", unmodifiedfile.c_str()); + } +} + +string oldline; +string newline; +static const int MAX_BUFFER = 51200; +char buf[MAX_BUFFER]; +while (!feof(CPP)) +{ + if (fgets(buf, MAX_BUFFER, CPP) != NULL) + { + oldline = buf; + if (DEBUGIN) { fprintf(DEBUGIN, "%s", oldline.c_str()); } + parse_line(hashtab, oldline, newline); + //#print "oldline = $oldline"; + //#print "newline = $newline"; + if (newline.empty()) + { + fprintf(stderr, "hash error in/with file %s\n", source.c_str()); + exit(1); + } + //#print "newline = $newline\n"; + fprintf(CC, "%s", newline.c_str()); + if (DEBUG) { fprintf(DEBUG, "%s", newline.c_str()); } + } +} +if (DEBUG) { fclose(DEBUG); } +if (DEBUGIN) { fclose(DEBUGIN); } +int cmd_rc = pclose(CPP); +if (cmd_rc) +{ + fprintf(stderr, "error from cpp\n"); + if (cmd_rc & 127) + { + fprintf(stderr, "cpp got signal %d\n", (cmd_rc & 127)); + exit(1); + } + else if (cmd_rc >> 8) + { + fprintf(stderr, "cpp returned %d\n", (cmd_rc >> 8)); + exit(cmd_rc >> 8); + } +} +cmd_rc = pclose(CC); +if (cmd_rc) +{ + fprintf(stderr, "error from cc\n"); + if (cmd_rc & 127) + { + fprintf(stderr, "cc got signal %d\n", (cmd_rc & 127)); + exit(1); + } + else if (cmd_rc >> 8) + { + fprintf(stderr, "cc returned %d\n", (cmd_rc >> 8)); + exit(cmd_rc >> 8); + } +} + +if (!hashtab.empty()) +{ + string stringfile = object; + stringfile += ".hash"; + // open trace string file + FILE* TRC = fopen(stringfile.c_str(), "w"); + if (TRC == NULL) + { + fprintf(stderr, "cannot write trace string file '%s'\n", stringfile.c_str()); + exit(1); + } + dprintf("Writing to file %s\n", stringfile.c_str()); + + string pwd; + FILE* PWD = popen("pwd", "r"); + fgets(buf, MAX_BUFFER, PWD); + pwd = buf; + pclose(PWD); + time_t tt = time(NULL); + sprintf(buf, "%s", asctime(localtime(&tt))); + buf[strlen(buf)-1] = '\0'; // chop off extra newline + fprintf(TRC, "#FSP_TRACE_v2|||%s|||BUILD:%s", buf, pwd.c_str()); + + string srch_str = "||"; + srch_str += source; + int srch_str_len = srch_str.length(); + size_t pos; + for(map::iterator p = hashtab.begin(); p != hashtab.end(); ++p) + { + pos = (p->second).find(srch_str); + if ((pos != string::npos) && ((pos + srch_str_len) == (p->second).length())) + { + // source file name is already part of the string + fprintf(TRC, "%s||%s\n", (p->first).c_str(), (p->second).c_str()); + } + else + { + fprintf(TRC, "%s||%s||%s\n", (p->first).c_str(), (p->second).c_str(), source.c_str()); + } + //#print TRC "$key||$source||$hashtab{$key}\n"; + } + fclose(TRC); +} +else +{ + dprintf("No trace calls/strings found, not writing hash file\n"); +} +} // end main diff --git a/tools/ppetracepp/trac_interface.h b/tools/ppetracepp/trac_interface.h new file mode 100755 index 00000000..74510745 --- /dev/null +++ b/tools/ppetracepp/trac_interface.h @@ -0,0 +1,363 @@ +// IBM_PROLOG_BEGIN_TAG +// This is an automatically generated prolog. +// +// occ820 src/occc/405/occ/trac/trac_interface.h 1.4 +// +// IBM CONFIDENTIAL +// +// OBJECT CODE ONLY SOURCE MATERIALS +// +// COPYRIGHT International Business Machines Corp. 2011,2012 +// All Rights Reserved +// +// The source code for this program is not published or otherwise +// divested of its trade secrets, irrespective of what has been +// deposited with the U.S. Copyright Office. +// +// IBM_PROLOG_END_TAG +/****************************************************************************** +// @file trac_interface.h +// @brief Interface codes for TRAC component. +*/ +/****************************************************************************** + * + * @page ChangeLogs Change Logs + * @section _trac_interface_h trac_interface.h + * @verbatim + * + * Flag Def/Fea Userid Date Description + * ------- ---------- -------- ---------- ---------------------------------- + * TEAM 06/16/2010 Port + * @rc003 rickylie 02/03/2012 Verify & Clean Up OCC Headers & Comments + * @pb00E pbavari 03/11/2012 Added correct include file + * @at009 859308 alvinwan 10/15/2012 Added tracepp support + * @ai005 860268 ailutsar 11/20/2012 Create trace test applet + * @rc005 864101 rickylie 12/12/2012 add small circ buffer to handle ISR semaphore conflict + * + * @endverbatim + * + *///*************************************************************************/ + +#ifndef _TRAC_INTERFACE_H +#define _TRAC_INTERFACE_H + +/** \defgroup Trace Trace Component + * Port of the trace code used in the fsp and tpmd. + */ + +//************************************************************************* +// Includes +//************************************************************************* + +//************************************************************************* +// Externs +//************************************************************************* + +//************************************************************************* +// Macros +//************************************************************************* +/** + * \defgroup TracMacros Trace Component Macro Interfaces + * \ingroup Trace + * Used to trace 0 - 5 arguments or a binary buffer when using a hash value. + */ +/*@{*/ + + +// @at009c - start +#define TRACE(i_td,i_string,args...) \ + trace_adal_write_all(i_td,trace_adal_hash(i_string,-1),__LINE__,0,##args) + +#define TRACEBIN(i_td,i_string,i_ptr,i_size) \ + trac_write_bin(i_td,trace_adal_hash(i_string,0),__LINE__,i_ptr,i_size) + +#ifndef NO_TRAC_STRINGS + +#define FIELD(a) \ + printf("%s",a) + +#define FIELD1(a,b) \ + printf("%s%lx",a,(unsigned long)b) + +#else // NO_TRAC_STRINGS + +#define FIELD(a) + +#define FIELD1(a,b) + +#endif // NO_TRAC_STRINGS + +#define SUCCESS 0 +// @at009c - end + + + +/*@}*/ + +//************************************************************************* +// Defines/Enums +//************************************************************************* +#define TRACE_MAX_ARGS 5 /*!< Maximum number of args to trace */ + +typedef uint32_t trace_hash_val; + +// NOTE! Increment this when new components are added! +#define TRAC_NUM_TRACE_COMPONENTS 1 + + +#define TRACE_BUFFER_SIZE 8192 // @ai005a + +#define CIRCULAR_BUFFER_SIZE 4 // @rc005a + +typedef uint32_t UINT32; +typedef int32_t INT32; +typedef uint8_t UCHAR; +typedef uint8_t UINT8; +typedef int8_t INT8; +typedef uint16_t UINT16; +typedef int16_t INT16; +typedef char CHAR; +typedef unsigned int UINT; +typedef unsigned long ULONG; +typedef int INT; +typedef void VOID; + +//************************************************************************* +// Structures +//************************************************************************* +/* + * @brief Structure is put at beginning of all trace buffers + */ +typedef struct trace_buf_head { + UCHAR ver; /*!< version of this struct (1) */ + UCHAR hdr_len; /*!< size of this struct in bytes */ + UCHAR time_flg; /*!< meaning of timestamp entry field */ + UCHAR endian_flg; /*!< flag for big ('B') or little ('L') endian */ + CHAR comp[16]; /*!< the buffer name as specified in init call */ + UINT32 size; /*!< size of buffer, including this struct */ + UINT32 times_wrap; /*!< how often the buffer wrapped */ + UINT32 next_free; /*!< offset of the byte behind the latest entry */ + UINT32 te_count; /*!< Updated each time a trace is done */ + UINT32 extracted; /*!< Not currently used */ +}trace_buf_head_t; + +/*! + * @brief Timestamp and thread id for each trace entry. + */ +typedef struct trace_entry_stamp { + UINT32 tbh; /*!< timestamp upper part */ + UINT32 tbl; /*!< timestamp lower part */ + UINT32 tid; /*!< process/thread id */ +}trace_entry_stamp_t; + +/* + * @brief Structure is used by adal app. layer to fill in trace info. + */ +typedef struct trace_entry_head { + UINT16 length; /*!< size of trace entry */ + UINT16 tag; /*!< type of entry: xTRACE xDUMP, (un)packed */ + UINT32 hash; /*!< a value for the (format) string */ + UINT32 line; /*!< source file line number of trace call */ +}trace_entry_head_t; + +/* + * @brief Parameter traces can be all contained in one write. + */ +typedef struct trace_entire_entry { + trace_entry_stamp_t stamp; + trace_entry_head_t head; + UINT32 args[TRACE_MAX_ARGS + 1]; +} trace_entire_entry_t; + + +/* + * @brief Binary first writes header and time stamp. + */ +typedef struct trace_bin_entry { + trace_entry_stamp_t stamp; + trace_entry_head_t head; +} trace_bin_entry_t; + +/** + * @brief Used as input to traces to get to correct buffer. + */ +typedef trace_buf_head_t * tracDesc_t; + +/* + * @brief Structure is used to hold array of all trace descriptors + */ +typedef struct trace_descriptor_array +{ + tracDesc_t *entry; /*!< Pointer to trace descriptor */ + CHAR *comp; /*!< Pointer to component name */ +}trace_descriptor_array_t; + +// @rc005a - start +typedef struct circular_buf_head +{ + UINT32 head; // pointer to head + UINT32 tail; // pointer to tail + UINT32 entryCount; // nums of entry +} circular_buf_header_t; + + +typedef struct circular_entire_data { + UINT32 len; + CHAR comp[4]; + trace_entire_entry_t entry; +} circular_entire_data_t; + +// @rc005a - end + +//************************************************************************* +// Globals +//************************************************************************* +// All TPMF component trace descriptors. +extern tracDesc_t g_trac_inf; // @at009a +extern tracDesc_t g_trac_err; // @at009a +extern tracDesc_t g_trac_imp; // @at009a + +extern const trace_descriptor_array_t g_des_array[]; + +//************************************************************************* +// Function Prototypes +//************************************************************************* + +/** + * \defgroup TracIntefaces Trace Component Interfaces for External Use + * \ingroup Trace + */ +/*@{*/ + + +/** + * @brief Allocate and initialize all trace buffers in memory. + * + * This function will allocate memory for each of the pre-defined trace + * buffers, initilize the buffers with starting data, and setup the + * trace descriptors which each component will use to trace. + * + * This function must be called first before any components try to trace! + * + * @return Non-zero return code on error. + */ +UINT TRAC_init_buffers(void); + +/** + * @brief Retrieve full trace buffer for component i_comp + * + * This function assumes memory has already been allocated for + * the full trace buffer in o_data. + * + * @param i_td_ptr Trace descriptor of buffer to retrieve. + * @param o_data Pre-allocated pointer to where data will be stored. + * + * @return Non-zero return code on error + */ +UINT TRAC_get_buffer(const tracDesc_t i_td_ptr, + void *o_data); + +/** + * @brief Retrieve partial trace buffer for component i_comp + * + * This function assumes memory has already been allocated for + * the trace buffer (size io_size). This function will copy + * in up to io_size in bytes to the buffer and set io_size + * to the exact size that is copied in. + * + * @param i_td_ptr Trace descriptor of buffer to retrieve. + * @param o_data Pre-allocated pointer to where data will be stored. + * @param io_size Size of trace data to retrieve (input) + * Actual size of trace data stored (output) + * + * @return Non-zero return code on error + */ +UINT TRAC_get_buffer_partial(const tracDesc_t i_td_ptr, + void *o_data, + UINT *io_size); + +/** + * @brief Retrieve trace descriptor for input component name + * + * @param i_comp Component name to retrieve trace descriptor for. + * + * @return Valid trace descriptor on success, NULL on failure. + */ +tracDesc_t TRAC_get_td(const char *i_comp); + +/** + * @brief Reset all trace buffers + * + * @return Non-zero return code on error + */ +UINT TRAC_reset_buf(void); + +/*@}*/ // Ending tag for external interface module in doxygen + + +/** + * @brief Trace input integers to trace buffer. + * + * This function assumes i_td has been initialized. + * + * @param io_td Intialized trace descriptor point to buffer to trace to. + * @param i_hash Hash value to be recorded for this trace. + * @param i_fmt Output format + * @param i_line Line number trace is occurring on. + * @param i_type trace type. filed or debug. + * @param ... parames that are limited to a size of 4 bytes, i.e. int, uint32_t, nnn* + * + * @return Non-zero return code on error. + */ +UINT trace_adal_write_all(tracDesc_t io_td,const trace_hash_val i_hash, + const char *i_fmt,const ULONG i_line, const ULONG i_type,...); + + +/** + * @brief Trace input integers to trace buffer. + * + * This function assumes i_td has been initialized. + * + * @param io_td Intialized trace descriptor point to buffer to trace to. + * @param i_hash Hash value to be recorded for this trace. + * @param i_line Line number trace is occurring on. + * @param i_num_args Number of arguments to trace. + * @param i_1 Input Parameter 1 + * @param i_2 Input Parameter 2 + * @param i_3 Input Parameter 3 + * @param i_4 Input Parameter 4 + * @param i_5 Input Parameter 5 + * + * @return Non-zero return code on error. + */ +UINT trac_write_int(tracDesc_t io_td,const trace_hash_val i_hash, + const ULONG i_line, + const UINT i_num_args, + const ULONG i_1,const ULONG i_2,const ULONG i_3, + const ULONG i_4,const ULONG i_5 + ); + + + /** + * @brief Trace binary data to buffer. + * + * This function assumes i_td has been initialized. + * + * @param io_td Intialized trace descriptor point to buffer to trace to. + * @param i_hash Hash value to be recorded for this trace. + * @param i_line Line number trace is occurring on. + * @param i_ptr Pointer to binary data to trace. + * @param i_size Size of data to copy from i_ptr. + * + * @return Non-zero return code on error. + */ +UINT trac_write_bin(tracDesc_t io_td,const trace_hash_val i_hash, + const ULONG i_line, + const void *i_ptr, + const ULONG i_size); + +//************************************************************************* +// Functions +//************************************************************************* + +#endif //_TRAC_INTERFACE_H diff --git a/tools/ppetracepp/tracehash.pl b/tools/ppetracepp/tracehash.pl new file mode 100755 index 00000000..f12a0f8c --- /dev/null +++ b/tools/ppetracepp/tracehash.pl @@ -0,0 +1,873 @@ +#!/usr/bin/perl -w +# File tracehash.pl created by B J Zander. + +use strict; + +sub determine_args(); +sub launch_cpp_and_parse($$); +sub cpp_dir($); +sub read_string_file(); +sub collect_files($); +sub assimilate_file($); +sub hash_strings(); +sub write_string_file(); +sub help(); + +select (STDERR); +$| = 1; # Make all prints to STDERR flush the buffer immediately +select (STDOUT); +$| = 1; # Make all prints to STDOUT flush the buffer immediately + +# Constants +my $HEAD_SEP = "|||"; +my $HEAD_EYE_CATCHER = "#FSP_TRACE_v"; +my $HEAD_BUILD_FLAG = "BUILD:"; +my $HEAD_VER_FLAG = 2; +my $BB_STRING_FILE = "/opt/fsp/etc/BB_StringFile"; + +# Global Variables +my $debug = 0; +my $seperator = "&&&&"; +my $file_name = "trexStringFile"; +my $in_sand; +my ($backing) = $ENV{'bb'}; +my $hash_prog = "trexhash"; #default to in path +my $build = ""; +my ($sandbox) = $ENV{'SANDBOX'} || ""; +my ($context) = $ENV{'CONTEXT'} || ""; +my ($sandboxbase) = $ENV{'SANDBOXBASE'} || ""; +my ($bb); +my ($sourcebase) = "$sandboxbase/src"; +my ($version) = $HEAD_VER_FLAG; # default to oldest version +my ($collect) = 0; +my ($INCLUDE, $Arg, $file, $dir, $string_file); +my $args = ""; + +my $fail_on_collision = 0; # 1 = exit with error if hash collision occurs +my $hash_filename_too = 0; # 1 = hash is calculated over format string + filename + +print "sourcebase = $sourcebase\n" if $debug; +print "sandbox = $sandbox\n" if $debug; +print "backing = $backing\n" if $debug; +print "context = $context\n" if $debug; + +if ($context =~ /x86/) +{ + $bb = "i586-pc-linux-gnu"; +} +else +{ + $bb = "powerpc-linux"; +} + +if(($sourcebase =~ /\w+/) && ($sandbox =~ /\w+/)) +{ + $INCLUDE = "-I $sandboxbase/export/$context/fips/include -I $backing/export/$context/fips/include -I /opt/fsp/$bb/include/fsp -I/opt/fsp/$bb/include/ -include /opt/fsp/$bb/include/fsp/tracinterface.H"; +} +else +{ + print "Not in Sandbox so guessing Include Paths...\n" if $debug; + $INCLUDE = "-I/opt/fsp/i586-pc-linux-gnu/include/fsp -I/opt/fsp/i586-pc-linux-gnu/include/ -include /opt/fsp/i586-pc-linux-gnu/include/fsp/tracinterface.H"; +} + +# I/P Series work in ODE sandbox env. +if ($sandboxbase =~ /\w+/) +{ + $in_sand = 1; + print "backing = $backing\n" if $debug; +} +else +{ + $in_sand = 0; +} + + + +# Parse the input parameters. + +while (@ARGV) { + $Arg = shift; + + if ($Arg eq "-h" || $Arg eq "-H") { + help(); + exit(127); + } + if ($Arg eq "-f") { + $file = shift; + next; + } + if ($Arg eq "-d") { + $dir = shift; + next; + } + if ($Arg eq "-s") { + $string_file = shift; + next; + } + if ($Arg eq "-c") { + $collect = 1; + next; + } + if ($Arg eq "-v") { + $debug = 1; + print "debug on\n" if $debug; + next; + } + if ($Arg eq "-C") { # fail if a hash collision is detected + $fail_on_collision = 1; + next; + } + if ($Arg eq "-F") { # hash is calculated over format string + file name + $hash_filename_too = 1; + next; + } + if ($Arg eq "-S") { + $BB_STRING_FILE = ""; + next; + } + + #just pass it onto compiler + $args = $args . " " . $Arg; +} + +print "args = $args\n" if $debug; + +if (!$file && !$dir && !$in_sand) { + help(); + exit(127); +} + +################################# +# M A I N # +################################# + +my $clock = `date`; + +$build = $HEAD_EYE_CATCHER . "$HEAD_VER_FLAG" . $HEAD_SEP . $clock . $HEAD_SEP . $HEAD_BUILD_FLAG; + +$build =~ s/\n//g; + +# Global array to hold the parsed TRAC macro calls. +my @strings = (); + +# Assoc. arrays to hold hash|string values. +my %string_file_array = (); +my %hash_strings_array = (); + +# Check all provided arguments and look for defaults if not provided by user +determine_args(); + +# Scan the appropriate files or directories for TRAC macro calls. + +if (defined $dir) +{ + + $build = $build . $dir; # default to put at top of string file + if($collect) + { + collect_files($dir); + } + else + { + cpp_dir($dir); + # Hash the string that have been scanned. + %hash_strings_array = hash_strings(); + } +} +else +{ + $build = $build . $file; # default to put at top of string file + + if($collect) + { + assimilate_file($file); + } + else + { + # make sure include path includes directory that file is in + if($file =~ /^(.+)\/[^\/]+\.C$/) + { + + launch_cpp_and_parse($file,$1); + } + else + { + # No path in front of file so it has to be local dir + launch_cpp_and_parse($file,"./"); + } + # Hash the string that have been scanned. + %hash_strings_array = hash_strings(); + } +} + +# Read the existing string file into memory. +%string_file_array = read_string_file(); + +# Write out the new string file. check for collisions of new/old string here +write_string_file(); + +print "Hashing Started at $clock\n"; +$clock = `date`; +print "Hashing Finished at $clock\n"; + +exit 0; + + +################################# +# S U B R O U T I N E S # +################################# + +#============================================================================= +# Enhance usability by figuring out which build env. we are in +#============================================================================= +sub determine_args() { + + + # Find trexhash program + # but only if needed (i.e. not in collect mode) + if (!$collect) { + my $tmp = `which $hash_prog`; + chomp $tmp; + + if ($tmp eq '') { + print STDOUT "\nWarning: Program trexhash does not exist in path.\n" if $debug; + $hash_prog = "./trexhash"; + + $tmp = `which $hash_prog`; + chomp $tmp; + if ($tmp eq '') { + print STDOUT "\nError: Unable to find trexhash \n"; + exit(127); + } + } + } + + # Verify input values. + if ((!defined $file) && (!defined $dir)) { + if(!($in_sand)) + { + print STDOUT "\nError: No input directory or file provided as input to scan\n"; + exit(127); + } + + # Assume they want sandbox scanned + if($collect) + { + # collect all string files generated by tracepp and merge + $dir = "$sandboxbase/obj/"; + } + else + { + # generate our own string file by pre-compiling all source code + $dir = "$sandboxbase/src/"; + } + print STDOUT "\n-f or -d not found...scanning $dir by default\n\n"; + } + + if (!defined $string_file) + { + if ($in_sand) + { + + # Copy the current string file from backing build into our sandbox + system ("cp $backing/obj/$file_name $sandboxbase/obj/$file_name") + if !(-e "$sandboxbase/obj/$file_name"); + + $string_file = "$sandboxbase/obj/$file_name"; + } + else + { + $string_file = "./$file_name"; + } + print STDOUT "-sf not specified, using $string_file instead...\n\n" if $debug; + + } + + # Try Creating the string file + `touch $string_file`; + + if (! -f $string_file) { + print STDOUT "\nError: File $string_file does not exist. Current directory may not be writable.\n\n"; + help(); + exit(127); + } + + # Make sure trexStringFile is readable/writeable + system("chmod ugo+rw $string_file"); + +} + +#============================================================================= +# Launch cpp script and grab input from it looking for trace calls. +#============================================================================= +sub launch_cpp_and_parse($$) { + + my ($l_loc, $l_dir) = @_; + + print "Processing file $l_loc\n" if $debug; + my $cmd = "/usr/bin/cpp $INCLUDE -I $l_dir $args $l_loc|"; + print "$cmd\n" if $debug; + open(FH,"$cmd") + or die ("Cannot open $_:$!,stopped"); + + # Read through all lines in the file.. + my $line = ; + while (defined $line) + { + chop $line; # remove EOL + $line =~ s/^\s*//; # remove unneccesary beginning white space. + $line =~ s/\s*$//; # remove unneccesary ending white space. + # Look for lines that are trace macro calls. + #if (/(trace_adal_hash)(\()( *)(".+")(,)(\d)/) + #if ($line =~ /(.*?)(trace_adal_hash)(\()( *)(".+")(,)(\d)\)+(.*\d.*)/) + while($line =~ m/^(.*?)trace_adal_hash\s*\(\s*(("[^"]*"\s*)+),\s*(\d+)\s*\)(.*)$/) + { + my ($prefix, $strings, $salt, $suffix) = ($1, $2, $4, $5); + print STDOUT "$strings $salt\n" if $debug; + $strings =~ s/"\s*$//; # remove trailing " and space + $strings =~ s/^"//; # remove leading " + $strings =~ s/"\s*"//g; + # Check to see if it's contained on a single line, or if we + # have to combine lines to get a complete trace call. + + # Save the macro call so it can be hashed later.. + push (@strings, [$l_loc, $strings, $salt]); + $line = $suffix; # check rest of line for a second trace call + } + my $nextline = ; + last if !defined $nextline; + # if a trace call is spread over multiple lines we have to add the next + # line from the source. the only problem is the definition/declaration + # of trace_adal_hash: we have to ignore that. we catch that by requiring + # a " after the function name. hopefully nobody writes a comment with + # a " after the function declaration ... + if ($line =~ /trace_adal_hash.*"/) { + $line .= $nextline; + } else { + $line = $nextline; + } + } + close(FH); +} + +#============================================================================= +# run cpp on all files in this directory and return the output +#============================================================================= +sub cpp_dir($) { + + my ($l_dir) = @_; + my @dir_entry; + my $l_entry; + + # Open the directory and read all entry names. + opendir ( DH , "$l_dir") + or die ("Cannot open $l_dir: $!, stopped"); + + print STDOUT "Processing directory $l_dir\n" if $debug; + @dir_entry = readdir(DH); + closedir(DH); + + while (@dir_entry) { + $l_entry = shift(@dir_entry); + + if ($l_dir =~ m"/$") { + $l_entry = "$l_dir$l_entry"; + } + else { + $l_entry = "$l_dir/$l_entry"; + } + + # Is the entry a directory? + if (-d $l_entry) { + + if($l_entry =~ m"/?([^/]+)$") + { + # check dir we are going into + print "dir = $1\n" if $debug; + # should we recurse into this directory. + if ($1 =~ m/^(\.\.?|sim[ou]|bldv)$/) + { + next; # skip '.', '..' and some fips dirs + } + cpp_dir($l_entry); + } + else + { + # unable to determine name of dir (no / in filename) + # should we recurse into this directory. + if ($l_entry =~ m/^(\.\.?|sim[ou]|bldv)$/) + { + next; # skip '.', '..' and some fips dirs + } + cpp_dir($l_entry); + } + } + # Is the entry a file? + elsif ((-f $l_entry) && ($l_entry =~ m/\.C$/)) { + # it's a file so + launch_cpp_and_parse($l_entry,$l_dir); + } + else { + # Not a file or directory so ignore it... + } + } +} + +#============================================================================= +# Read in strings from the existing trace string file.... +#============================================================================= +sub read_string_file() { + + my %o_strings; + my ($line) = ""; + my ($l_hash) = ""; + my ($l_str) = ""; + my ($cur_build) = ""; + my ($l_file) = ""; + + + # Make sure we can open each file. + open ( FH , "<$string_file") + or die ("Cannot open $_: $!, stopped"); + + $line = ; + + print "first line in trexStringFile= $line\n" if $debug; + + if((defined $line) && ($line =~ /^$HEAD_EYE_CATCHER(\d)/)) + { + $version = $1; + + print "version = $version\n" if $debug; + + #Always put latest version in file + $line =~ s/^$HEAD_EYE_CATCHER\d/${HEAD_EYE_CATCHER}${HEAD_VER_FLAG}/; + + # Take previous version in file and use it. + $build = $line; + chomp($build); + $line = ; + + while (defined $line) { + chomp $line; # remove EOL + if($version eq "1") + { + ($l_hash, $l_file ,$l_str) = split(/\|\|/, $line); + } + elsif($version eq "2") + { + ($l_hash, $l_str ,$l_file) = split(/\|\|/, $line); + } + else + { + print "Unknown version of stringfile $version\n"; + exit(127); + } + $o_strings{$l_hash} = $l_str . "||" . $l_file; + $line = ; + } + + } + else + { # If there is a file then we are dealing with the first + # version of trexStringFile so don't look for file name. + if ($debug) { + print "version 0 stringfile detected: $string_file\n"; + } + + # there is a file and it doesn't have a header + $version = 0; + + while (defined $line) { + chomp $line; # remove EOL + ($l_hash,$l_str) = split(/\|\|/, $line); + $o_strings{$l_hash} =$l_str . "||" . "NO FILE"; + $line = ; + } + } + + close(FH); + + #Time to look for a building block string file + if($BB_STRING_FILE ne "" and $string_file ne $BB_STRING_FILE and -f $BB_STRING_FILE) + { + + # Make sure we can open the file. + open ( FH , "<$BB_STRING_FILE") + or die ("Cannot open $_: $!, stopped"); + + $line = ; + + print "first line in BB_StringFile = $line\n" if $debug; + if((defined $line) && ($line =~ /^$HEAD_EYE_CATCHER(\d)/)) + { + $version = $1; + + $line = ; + while (defined $line) + { + chop $line; # remove EOL + if($version eq "1") + { + ($l_hash, $l_file ,$l_str) = split(/\|\|/, $line); + } + elsif($version eq "2") + { + ($l_hash, $l_str ,$l_file) = split(/\|\|/, $line); + } + #($l_hash, $l_file ,$l_str) = split(/\|\|/, $line); + $o_strings{$l_hash} = $l_str . "||" . $l_file ; + $line = ; + } + } + else + { + print "*** ERROR: BB_StringFile '$BB_STRING_FILE' should always have version!!!\n" + } + + } + else + { + print "$BB_STRING_FILE is not available\n" if $debug; + } + #All files are latest version now. + $version = $HEAD_VER_FLAG; + return %o_strings; +} + +#============================================================================= +# Read in strings from the existing trace string file.... +#============================================================================= +sub collect_files($) { + + my ($l_dir) = @_; + my (@dir_entry); + my ($l_entry) = ""; + + # Open the directory and read all entry names. + opendir ( DH , "$l_dir") + or die ("Cannot open $l_dir: $!, stopped"); + + print STDOUT "Processing directory $l_dir\n" if $debug; + @dir_entry = readdir(DH); + closedir(DH); + + while (@dir_entry) { + $l_entry = shift(@dir_entry); + + if ($l_dir =~ m"/$") { + $l_entry = "$l_dir$l_entry"; + } + else { + $l_entry = "$l_dir/$l_entry"; + } + + # Is the entry a directory? + if (-d $l_entry) { + + # should we recurse into this directory. + if ($l_entry =~ m/\/(\.\.?|sim[ou]|bldv)$/) + { + next; # skip '.', '..' and some fips dirs + } + collect_files($l_entry); + } + # Is the entry a file? + elsif ((-f $l_entry) && ($l_entry =~ m"\.hash$")) { + # it's a file so + assimilate_file($l_entry); + } + else { + # Not a file or directory so ignore it... + } + } + +} + +#============================================================================= +# Read in data from file and add to master one +#============================================================================= +sub assimilate_file($) { + + my ($l_loc) = @_; + + my (%o_strings); + my ($line) = ""; + my ($l_hash) = ""; + my ($l_str) = ""; + my ($l_file) = ""; + + # Make sure we can open each file. + open ( FH , "<$l_loc") + or die ("Cannot open $_: $!, stopped"); + + $line = ; + + print "Assimilate: first line in $l_loc = $line" if $debug; + + if((defined $line) && ($line =~ /^$HEAD_EYE_CATCHER(\d)/)) + { + $version = $1; + if ($version eq "1") { + if ($hash_filename_too) { + print "*** ERROR: hash_filename_too (-F) isn't possible with trace version 1\n"; + print " please rebuild all .hash files and global trexStringFile\n"; + print " version 1 file is '$l_loc'\n"; + exit(127); + } + } elsif ($version ne "2") { + print "Unknown version of stringfile $version\n"; + exit(127); + } + + $line = ; + + + while (defined $line) { + chop $line; # remove EOL + if($version eq "1") + { + ($l_hash, $l_file ,$l_str) = split(/\|\|/, $line); + } + elsif($version eq "2") + { + ($l_hash, $l_str ,$l_file) = split(/\|\|/, $line); + } + my $newstring = $l_str . "||" . $l_file; + if (exists $hash_strings_array{$l_hash}) { + my $hashstr1 = $hash_strings_array{$l_hash}; + my $hashstr2 = $newstring; + if (!$hash_filename_too) { + # hash was made over format string only, remove file name + $hashstr1 =~ s/\|\|.*$//; + $hashstr2 = $l_str; + } + if ($debug) { + print "a_f: compare $hashstr1\n", + " vs. $hashstr2\n"; + } + if ($hashstr1 ne $hashstr2) + { + print "*** ERROR: HASH Collision! (a_f)\n", + " Two different strings have the same hash value ($l_hash)\n", + " String 1: $hash_strings_array{$l_hash}\n", + " String 2: $newstring\n"; + if ($fail_on_collision) { + exit(1); + } + } + } + $hash_strings_array{$l_hash} = $newstring; + $line = ; + } + + } + else + { # If there is a file then we are dealing with the first + # version of trexStringFile so don't look for file name. + # these files shouldn't be there anymore. we don't check for collisions here + if ($debug) { + print "version 0 stringfile detected: $string_file\n"; + } + + if(defined $line) + { + # there is a file and it doesn't have a header + $version = 0; + } + + while (defined $line) { + chop $line; # remove EOL + ($l_hash,$l_str) = split(/\|\|/, $line); + $hash_strings_array{$l_hash} = $l_str . "||" . "NO FILE"; + $line = ; + } + } + $version = $HEAD_VER_FLAG; + close(FH); +} + +#============================================================================= + +#============================================================================= +sub hash_strings() { + + my ($hash_val, $l_key, $l_hash, %l_hash_strings); + my ($line_feed) = chr(10); + my ($l_file_name) = "NO FILENAME"; + print "\nHashing printf strings.\n\n"; + + foreach my $str (@strings) { + my $printf_string; + $l_file_name = $str->[0]; + $printf_string = $str->[1]; + $l_key = $str->[2]; + print "printf_string = $printf_string\n" if $debug; + $printf_string =~ s/"\s?"//g; #multi line traces will have extra " in them + $printf_string =~ s/`/\\`/g; # excape ' + $printf_string =~ s/\\n/$line_feed/g; # escape \n + if ($hash_filename_too) { + $printf_string .= "||" . $l_file_name; + } + + # call the hasher. + print "$hash_prog \"$printf_string\" $l_key\n" if $debug; + $hash_val = `$hash_prog \"$printf_string\" $l_key`; + if ($?) { + my ($hp_ret, $hp_sig) = ($? >> 8, $? & 127); + if ($hp_sig) { + print "*** ERROR: $hash_prog died with signal $hp_sig\n"; + } elsif ($hp_ret) { + print "*** ERROR: $hash_prog returned the error $hp_ret\n"; + if ($hash_val) { + print " error from $hash_prog:\n$hash_val"; + } + } + exit(1); + } + print "printf_string = $printf_string l_key = $l_key hash val = $hash_val\n" if $debug; + + # undo escaping + $printf_string =~ s/$line_feed/\\n/g; + $printf_string =~ s/\\`/`/g; + + if (exists $l_hash_strings{$hash_val}) + { + # hash val was found before. check if it's the same string + # else we have a hash collision + my $l_tmp = $l_hash_strings{$hash_val}; + if (!$hash_filename_too) { + $l_tmp =~ s/\|\|.*$//; + } + if ($l_tmp ne $printf_string) + { + print "*** ERROR: HASH Collision! (h_s)\n", + " Two different strings have the same hash value ($hash_val)\n", + " String 1: $l_hash_strings{$hash_val}\n", + " String 2: $printf_string (file $l_file_name)\n"; + if ($fail_on_collision) { + exit(1); + } + } + } + # this will overwrite an old string with a new one if a collision occurred + # but we might want to bail out in this case anyway + $printf_string = $printf_string . "||" . $l_file_name; + $l_hash_strings{$hash_val} = $printf_string; + } + return %l_hash_strings; +} +#============================================================================= + +#============================================================================= +sub write_string_file() { + + my (@keys) = (); + my ($l_key) = ""; + + # Combine the contents of the existing string file with the trace calls + # that we have just hashed. + print STDOUT "\nCombining Hash strings...\n\n"; + + @keys = keys(%hash_strings_array); + + foreach $l_key (@keys) { + my $l_tmp = $hash_strings_array{$l_key}; # freshly collected strings + if (exists $string_file_array{$l_key}) + { # hash exists in list from trexStringFile + my $l_tmp2 = $string_file_array{$l_key}; + if (!$hash_filename_too) { + $l_tmp =~ s/\|\|.*$//; + $l_tmp2 =~ s/\|\|.*$//; + } + + # Check for hash collisions. + if ($l_tmp ne $l_tmp2) + { + print "*** ERROR: HASH Collision! (w_s_f)\n", + " Two different strings have the same hash value ($l_key)\n", + " String 1: $hash_strings_array{$l_key}\n", + " String 2: $string_file_array{$l_key}\n"; + if ($fail_on_collision) { + exit(1); + } + # don't fail, write new one + } + } + if($version > 0) + { + # add/replace the new string to the string_file_array. + $string_file_array{$l_key} = $hash_strings_array{$l_key} + } + else + { + # old version so only write out format string (not file name to) + $string_file_array{$l_key} = $l_tmp; + } + } + + # Write out the updated string file. + print STDOUT "\nWriting updated hash||string file...\n\n"; + + @keys = sort(keys(%string_file_array)); + + open ( FH , ">$string_file") + or die ("Cannot open $_: $!, stopped"); + + if($version > 0) + { + print FH "$build\n"; # only print version if newer then version 0 + } + foreach $l_key (@keys) { + print FH "$l_key||$string_file_array{$l_key}\n"; + } + close FH; +} +#============================================================================= + +#============================================================================= +# Display command invokation help for this program... +#============================================================================= +sub help() { + print << "EOF"; +tracehash.pl - create a trace string file from sources or collect tracepp files +Usage: tracehash.pl [options] + General options: + -h - Print this help text. + -v - Be verbose, tell what's going on (debug output) + Operation modes + -c - Collect StringFiles created by tracepp and merge. + default - Scan source files for trace calls. + +Collect mode: tracehash.pl -c [-vFCS] [-d ] [-s ] + tracehash.pl -c [-vFCS] [-f ] [-s ] + Collect string files created by tracepp (.hash) from directory tree at + or read them from string file and write to file + , adding entries already in this file. + -f - String file to read and write/add to . + -d - Start of directory tree to scan for .hash files. Default = . + -s - File with trace strings (and hashes) to read from and write to + default = ./trexStringFile + -F - hash is calculated over trace string and source file name, + otherwise without source file name + -C - report an error if a hash collisions occurs + -S - don't read global FLD-2.2 string file ($BB_STRING_FILE) + If tracehash.pl is called in a FipS build sandbox without -d and -f + defaults for the sandbox will be used. + +Scan mode: tracehash.pl [-vFCS] [-d ] [-s ] [ccpopts] + tracehash.pl [-vFCS] [-f ] [-s ] [cppopts] + Scan all files in directory tree or scan file and write + strings to file . Strings already in this file will be merged. + -f - Source file to scan for trace entries. + -d - Source directory to scan for trace entries. + -s - File with trace strings (and hashes) to read from and write to. + default = ./trexStringFile + -F - hash for string was build from format string + source file name + -C - report an error if hash collisions occur + -S - don't read global FLD-2.2 string file ($BB_STRING_FILE) + All other arguments will be passed verbatim to cpp +EOF +} +#============================================================================= + -- cgit v1.2.1