19 files changed, 5063 insertions, 0 deletions
diff --git a/tools/PowerPCtoPPE/Makefile b/tools/PowerPCtoPPE/Makefile
new file mode 100644
index 00000000..d38e72d4
--- /dev/null
+++ b/tools/PowerPCtoPPE/Makefile
@@ -0,0 +1,23 @@
+
+export SUB_OBJDIR = /p2p
+
+include img_defs.mk
+include p2pfiles.mk
+
+OBJS := $(addprefix $(OBJDIR)/, $(P2P_OBJECTS))
+
+libp2p.a: $(OBJS)
+	$(AR) crs $(OBJDIR)/libp2p.a $(OBJDIR)/*.o
+
+.PHONY: clean p2p
+p2p: $(OBJS)
+
+$(OBJS) $(OBJS:.o=.d): | $(OBJDIR)
+
+$(OBJDIR):
+	mkdir -p $(OBJDIR)
+
+ifneq ($(MAKECMDGOALS),clean)
+include $(OBJS:.o=.d)
+endif
+
diff --git a/tools/PowerPCtoPPE/p2p-test-gen.py b/tools/PowerPCtoPPE/p2p-test-gen.py
new file mode 100755
index 00000000..983cec03
--- /dev/null
+++ b/tools/PowerPCtoPPE/p2p-test-gen.py
@@ -0,0 +1,147 @@
+#!/usr/bin/python2.6
+
+# \file  p2p-test-gen.py
+# \brief this program generates random constructed test cases
+#        in the form of input file consumed by ppc-ppe-pcp.py
+# \usage create a file named 'test.s' and make sure it has at
+#        least one blank line before executing this program.
+
+import fileinput
+import random
+
+DotLabel = ['', 'Label:', '.Label']
+
+Comments = ['', '// Comments', '/* Comments */']
+
+TabSpace = ['', '\t', ' ', '\t ', ' \t', ' \t ']
+
+RegLabel = ['', '%r']
+
+Register = [0,1,2,3,4,5,6,7,8,9,10,13,28,29,30,31]
+
+TestEnable = [0,1,2,3]
+
+TestBook = {'eieio'   : 0,
+            'isync'   : 0,
+            'icbi'    : 0,
+            'icbt'    : 0,
+            'stbux'   : 3,
+            'sthux'   : 3,
+            'stwux'   : 3,
+            'lbzux'   : 3,
+            'lhzux'   : 3,
+            'lwzux'   : 3,
+            'lha'     : 2,
+            'lhau'    : 2,
+            'lhax'    : 3,
+            'lhaux'   : 3,
+            'mulhhw'  : 3,
+            'mulhhwu' : 3,
+            'mulhw'   : 3,
+            'mulhwu'  : 3,
+            'mullw'   : 3,
+            'mulli'   : 1,
+            'divw'    : 3,
+            'divwu'   : 3,
+            'lmw'     : 2,
+            'stmw'    : 2,
+            'lwz'     : 4,
+            'stw'     : 4,
+            'cmplw'   : 5,
+            'cmpw'    : 5,
+            'cmpwi'   : 5}
+
+BranchList = ['bc', 'bcl', 'blt', 'bltl', 'ble', 'blel', 'bgt', 'bgtl', 'bge',
+              'bgel', 'beq', 'beql', 'bne', 'bnel']
+
+def p2p_test():
+  for line in fileinput.input('test.s', inplace=1):
+    print '// start generating test cases:',
+    for opcode,format in TestBook.iteritems():
+      opcode += ' '
+      if random.randint(1, 10) > 5:
+        print random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0] +\
+              random.sample(TabSpace,1)[0]
+      else:
+        print random.sample(TabSpace,1)[0] + random.sample(DotLabel,1)[0] +\
+              random.sample(TabSpace,1)[0]
+      if format == 0 in TestEnable:
+        print random.sample(TabSpace,1)[0] + opcode +\
+              random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0]
+      if format == 3 in TestEnable:
+        regs = random.sample(Register, 3)
+        reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\
+                    random.sample(TabSpace,1)[0] +\
+                    random.sample(RegLabel,1)[0] + str(regs[1]) + ',' +\
+                    random.sample(TabSpace,1)[0] +\
+                    random.sample(RegLabel,1)[0] + str(regs[2])
+        print random.sample(TabSpace,1)[0] + opcode +\
+              random.sample(TabSpace,1)[0] + reg_field +\
+              random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0]
+      if format == 1 in TestEnable:
+        regs = random.sample(Register, 2)
+        reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\
+                    random.sample(TabSpace,1)[0] +\
+                    random.sample(RegLabel,1)[0] + str(regs[1]) + ',' +\
+                    random.sample(TabSpace,1)[0] +\
+                    str(random.randint(-128, 128))
+        print random.sample(TabSpace,1)[0] + opcode +\
+              random.sample(TabSpace,1)[0] + reg_field +\
+              random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0]
+      if format == 2 in TestEnable:
+        regs = random.sample(Register, 2)
+        reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\
+                    random.sample(TabSpace,1)[0] +\
+                    str(random.randint(-128, 128)) +\
+                    '(' + random.sample(RegLabel,1)[0] + str(regs[1]) + ')'
+        print random.sample(TabSpace,1)[0] + opcode +\
+              random.sample(TabSpace,1)[0] + reg_field +\
+              random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0]
+      if format == 4 in TestEnable:
+        for i in [1,2]:
+          regs = random.sample(Register, 2)
+          reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\
+                      random.sample(TabSpace,1)[0] +\
+                      str(random.randint(-128, 128)) +\
+                      '(' + random.sample(RegLabel,1)[0] + str(regs[1]) + ')'
+          print random.sample(TabSpace,1)[0] + opcode +\
+                random.sample(TabSpace,1)[0] + reg_field +\
+                random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0]
+      if format == 5 in TestEnable:
+        if 'i' in opcode:
+          regs = random.sample(Register, 1)
+          reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\
+                      random.sample(TabSpace,1)[0] +\
+                      random.sample(RegLabel,1)[0] +\
+                      str(random.randint(-128, 128))
+        else:
+          regs = random.sample(Register, 2)
+          reg_field = random.sample(RegLabel,1)[0] + str(regs[0]) + ',' +\
+                      random.sample(TabSpace,1)[0] +\
+                      random.sample(RegLabel,1)[0] + str(regs[1])
+        print random.sample(TabSpace,1)[0] + opcode +\
+              random.sample(TabSpace,1)[0] + reg_field +\
+              random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0]
+        branch = random.sample(BranchList, 1)[0] + ' '
+        if 'bc' in branch:
+          reg_field = random.sample(TabSpace,1)[0] +\
+                      str(random.randint(0, 15)) + ',' +\
+                      random.sample(TabSpace,1)[0] +\
+                      str(random.randint(0, 7)) + ',' +\
+                      random.sample(TabSpace,1)[0] +\
+                      str(random.randint(-128, 128)) +\
+                      random.sample(TabSpace,1)[0]
+        else:
+          reg_field = random.sample(TabSpace,1)[0] +\
+                      str(random.randint(-128, 128)) +\
+                      random.sample(TabSpace,1)[0]
+        print random.sample(TabSpace,1)[0] + branch +\
+              random.sample(TabSpace,1)[0] + reg_field +\
+              random.sample(TabSpace,1)[0] + random.sample(Comments,1)[0]
+  fileinput.close()
+
+if __name__ == '__main__':
+  p2p_test()
+
+
+
diff --git a/tools/PowerPCtoPPE/p2pfiles.mk b/tools/PowerPCtoPPE/p2pfiles.mk
new file mode 100644
index 00000000..3d4fd3fb
--- /dev/null
+++ b/tools/PowerPCtoPPE/p2pfiles.mk
@@ -0,0 +1,14 @@
+#  @file p2pfiles.mk
+#
+#  @brief mk for including P2P support library object files
+#
+
+##########################################################################
+# Object Files
+##########################################################################
+P2P-S-SOURCES = ppe42_mulhw.S ppe42_mulhwu.S ppe42_mullw.S \
+                ppe42_divw.S ppe42_divwu.S
+
+P2P_OBJECTS = $(P2P-S-SOURCES:.S=.o)
+
+
diff --git a/tools/PowerPCtoPPE/ppc-ppe-pcp.py b/tools/PowerPCtoPPE/ppc-ppe-pcp.py
new file mode 100755
index 00000000..53c2757f
--- /dev/null
+++ b/tools/PowerPCtoPPE/ppc-ppe-pcp.py
@@ -0,0 +1,980 @@
+#!/usr/bin/python2.6
+
+# \file  ppc-ppe-pcp.py
+# \brief PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P)
+#
+# ---------------------------------------------------------------
+# Revision History
+# ---------------------------------------------------------------
+#   10-07-2014: project completed
+#   daviddu     added optimization profile support
+#
+#   10-06-2014: added fused compare and branch supprot
+#   daviddu     added support for combining two ld/st into one double word
+#               added support to insert branch upon .p2align directive
+#
+#   09-27-2014: added subroutine support for mul* and div*
+#   daviddu     added virtual double word replacing multiple word support
+#
+#   09-13-2014: initial version
+#   daviddu     only instruction inline replacement is supported
+# ---------------------------------------------------------------
+
+P2P_VERSION = "10-07-2014" # version number as last modified date
+P2P_PPC_EXT = '.s'         # PPC Assembly filename extension
+P2P_PPE_EXT = '.es'        # PPE Assembly filename extension
+P2P_PPE_PRE = '__ppe42_'   # PPE Assembly subroutine prefix
+
+import sys
+import os
+import re
+import fnmatch
+import fileinput
+
+# ---------------------------------------------------------------
+# PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P)
+# ---------------------------------------------------------------
+#
+# Description:
+#
+#   This post-compiler processor will take PPC405 assembly file(s) produced
+#   by powerpc-linux-gcc or hand coded and replace some of the instructions
+#   supported by PPC405 ISA but not PPE42 ISA with a set of instructions
+#   supported by PPE42 ISA. Outcome of this program is PPE42 assembly file(s).
+#
+# Assumptions:
+#
+#   - Input/Output File Name Extension:
+#
+#     PPC405 assembly file generated by powerpc-linux-gcc has filename extension
+#     defined by "P2P_PPC_EXT" global variable, while PPE42 assembly file
+#     consumed by PPE Assembler has filename extension defined by "P2P_PPE_EXT"
+#     global variable. Both should be consistant with Makefile rules.
+#
+#   - Registers:
+#
+#     Instructions in input file should only use registers supported by PPE,
+#     that is R0-R10,R13,R28-R31 for GPRs and PPE only SPRs(for example, PPE
+#     only has CR0 instead of CR0-7).
+#
+#     GCC flag -ffixed can be used to enforce compiler to not use certain
+#     registers if compiler generates input files to this script. Note certian
+#     optimization level, such as -Os, of GGC will still use certain registers
+#     regardless if -ffixed flag is used. Furthermore, compiler should not
+#     generate multiple word instructions(lmw/stmw) that covers the registers
+#     forbidden to use by -ffixed flag.
+#
+#     Example of using -ffixed flag in this case:
+#       -ffixed-r11 -ffixed-r12 -ffixed-r14 -ffixed-r15 \
+#       -ffixed-r16 -ffixed-r17 -ffixed-r18 -ffixed-r19 \
+#       -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
+#       -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
+#       -ffixed-cr1 -ffixed-cr2 -ffixed-cr3 -ffixed-cr4 \
+#       -ffixed-cr5 -ffixed-cr6 -ffixed-cr7
+#
+#   - Instructions:
+#
+#     Instructions in input file should only use PowerPC 405 instructions
+#     covered by "PowerPC 405-S Embedded Processor Core" manual; however,
+#     there is an assumption on certain catalog of instructions will never be
+#     generated by power-linux-gcc compiler(or disabled by compiler switch).
+#
+#     Also, compiler should generate extended mnemonics instead of its base
+#     instruction when extended mnemonics fits.
+#
+#     Via -falign-labels=n and -O2 or -O3, the compiler inserts .p2align
+#     directive to help instruction alignment for best cache performance.
+#
+#   - Assembly Syntax:
+#
+#     There should be only white spaces before instruction mnemonics, in
+#     another word, all inline comments should be put behind the instrution.
+#
+#     "Label:" and an instruction should not be on the same line, hand coded
+#     assembly should be consistant to this same compiler output format.
+#
+# Depandences:
+#
+#   In order to utilize assembly subroutines implemented for supporting
+#   missing instructions of multiplication and division in PPE42 ISA, a given
+#   library(with assembly files and header) must be compiled and linked with
+#   any source code that use this program to generate PPE binary.
+#
+# Usage:
+#
+#   ./<ThisScript> -f <a filename with path>  --- process single file
+#   ./<ThisScript> -d <a directory path>      --- process multiple files
+#   ./<ThisScript> -h                         --- detailed usage on other flags
+#   ./<ThisScript> -v                         --- version of the program
+#   ./<ThisScript> -d <a directory path> -s   --- perform result profiling
+#
+# Functions:
+#
+#   p2p_main          - main function, parse options and arguments
+#     p2p_onefile     - processing single PPC Assembly File
+#       p2p_combine   - processing two PPC instructions in input file
+#       p2p_replace   - processing single PPC instruction in input file
+#
+# Data Structures:
+#
+#   ReplaceRules = { ppc_op : [rule, ppe_op] }
+#   CombineRules = { ppc_op : [rule, ppe_op] }
+#   FuseBranches = [ list of branches qualified for fusing with compares ]
+#
+#------------------------------------------------------------------------------
+# ReplaceRules: [ 'r', 'u', 'a', 'h', 's', 'o', 'd', 'm' ]
+#-------|-------------------------|--------------------------------------------
+# Rule  | Example (PPC to PPE)    | Description
+#-------|-------------------------|--------------------------------------------
+# 'r'   | [ppc] ppc_op RT, RA, RB | simply 'replace' ppc opcode with ppe opcode
+#  0  0 | [ppe] ppe_op RT, RA, RB | while the operands, if any, stay the same
+#-------|-------------------------|--------------------------------------------
+# 'ru'  | [ppc] ppc_op RT, RA, RB | on top of 'r' rule, emulate 'update' by
+#       | [ppe] ppe_op RT, RA, RB | appending "add" instruction after the
+# +1 +1 |       add    RA, RA, RB | original instruction to update RA
+#-------|-------------------------|--------------------------------------------
+# 'ra'  | [ppc] ppc_op RT, D(RA)  | on top of 'r' rule, emulate 'algebraic' by
+#       | [ppe] ppe_op RT, D(RA)  | appending "extsh" instruction after the
+# +1 +1 |       extsh  RT, RT     | original instruction to sign-extend RT
+#-------|-------------------------|--------------------------------------------
+# 'rau' | [ppc] ppc_op RT, RA, RB | combining rule 'r', 'a', and 'u' above.
+#       | [ppe] ppe_op RT, RA, RB | Note: there is no requirement on whether
+#       |       add    RA, RA, RB | rule 'a' or rule 'u' should be applied
+# +2 +2 |       extsh  RT, RT     | first, the outcome should be the same.
+#-------|-------------------------|--------------------------------------------
+# 'h'   | [ppc] ppc_op RT, RA, RB | rule of 'halfword' multiplication
+#       | [ppe] srwi   RA, 16     | emulate multiply "high halfword" with
+#       |       srwi   RB, 16     | multiply "low halfword" by shifting
+# +2 +2 |       ppe_op RT, RA, RB | the operands first
+#-------|-------------------------|--------------------------------------------
+# 's'   | [ppc] ppc_op RT, RA, RB | emulate word multiply and divide
+#       | [ppe] stwu   R1, -24(R1)| instructions with calling 'subroutines'
+#       |       stvd   D3, 8(R1)  | implemented in ppe42_mul.S and ppe42_div.S
+#       |       mflr   R3         |
+#       |       stw    R3, 16(R1) | Calling Conventions:(SS = Stack Size)
+#       |       mr     R3, RA     |
+#       |       mr     R4, RB     |  Caller is responsible for
+#       |       bl     target     |   1) create stack frame
+#       |       mr     RT, R3     |   2) save off R3 and R4 to stack
+#       |       lwz    R3, 16(R1) |   3) save off link register to stack
+#       |       mtlr   R3         |   4) put operands into R3, R4 before branch
+#       |       lvd    D3, 8(R1)  |   5) put result in R3 to RT after branch
+#       |       lwz    R1, 0(R1)  |   6) restore link register from stack
+#       |                         |   7) restore R3 and R4 from stack
+#       | [sub] stwu  R1, -SS(R1) |   8) remove the stack frame
+#       |       <save volatile>   |
+#       |       (subroutine body) |  Callee is responsible for
+#       |       <restore volatile>|   1) create and remove stack frame
+#       |       lwz    R1, 0(R1)  |   2) save and restore volatile registers
+# +X +Y |       blr               |   3) subroutine will not touch LR again
+#-------|-------------------------|--------------------------------------------
+# 'o'   | [ppc] ppc_op[o] RT ...  | rule of 'o' form for overflow
+#       | [ppe] ppe_op    RT ...  | Note: "mullwo", "divwo" and "divwuo" each
+#       |       <inst specific>   | has unique setting for XER[OV,SO] if OE = 1
+#-------|-------------------------|--------------------------------------------
+# 'd'   | [ppc] ppc_op[.] RT ...  | rule of '.' or 'dot' form for recording
+#       | [ppe] ppe_op    RT ...  | using "cmpwli" to emulate the [.] form
+#       |       cmpwli    RT, 0   | to the instruction result and CR0 fields
+#-------|-------------------------|--------------------------------------------
+# 'm'   | [ppc] ppc_op RT, D(RA)  | emulate PowerPC load/store multiple word
+#       | [ppe] ppe_op DT, D(RA)  | instructions with PPE specific
+#       |       (doubleword ld/st)| 'virtual doubleword' instructions if target
+#       |       or                | address is 8-byte aligned; otherwise, using
+#       |       (singleword ld/st)| single word instructions instead or mix both
+#       |       or                | Note only RA == R1/R2/R13 will always meet
+# -1 -1 |       (single & double) | alignment requirement of virtual doubleword
+#-------|-------------------------|--------------------------------------------
+#
+ReplaceRules = {#ppc_op   : [ rule | ppe_op ]
+                #----------------------------
+                #synchronization instructions
+                'eieio'   : [ 'r',   'sync' ],
+                'isync'   : [ 'r',   'nop'  ],
+                'icbi'    : [ 'r',   'nop'  ],
+                'icbt'    : [ 'r',   'nop'  ],
+                'mtcr'    : [ 'r',   'mtcr0'],
+                #load/store with [u/x/a] form
+                'stbux'   : [ 'ru',  'stbx' ],
+                'sthux'   : [ 'ru',  'sthx' ],
+                'stwux'   : [ 'ru',  'stwx' ],
+                'lbzux'   : [ 'ru',  'lbzx' ],
+                'lhzux'   : [ 'ru',  'lhzx' ],
+                'lwzux'   : [ 'ru',  'lwzx' ],
+                'lha'     : [ 'ra',  'lhz'  ],
+                'lhau'    : [ 'ra',  'lhzu' ],
+                'lhax'    : [ 'ra',  'lhzx' ],
+                'lhaux'   : [ 'rau', 'lhzx' ],
+                #multiply/divide with [./o] form
+                'mulhhw'  : [ 'h',   'mullhw'   ],
+                'mulhhw.' : [ 'h',   'mullhw.'  ],
+                'mulhhwu' : [ 'h',   'mullhwu'  ],
+                'mulhhwu.': [ 'h',   'mullhwu.' ],
+                'mulhw'   : [ 's',   'mulhw'    ],
+                'mulhw.'  : [ 'sd',  'mulhw'    ],
+                'mulhwu'  : [ 's',   'mulhwu'   ],
+                'mulhwu.' : [ 'sd',  'mulhwu'   ],
+                'mullw'   : [ 's',   'mullw'    ],
+                'mullw.'  : [ 'sd',  'mullw'    ],
+                'mullwo'  : [ 'so',  'mullw'    ],
+                'mullwo.' : [ 'sod', 'mullw'    ],
+                'mulli'   : [ 's',   'mullw'    ],
+                'divw'    : [ 's',   'divw'     ],
+                'divw.'   : [ 'sd',  'divw'     ],
+                'divwo'   : [ 'so',  'divw'     ],
+                'divwo.'  : [ 'sod', 'divw'     ],
+                'divwu'   : [ 's',   'divwu'    ],
+                'divwu.'  : [ 'sd',  'divwu'    ],
+                'divwuo'  : [ 'so',  'divwu'    ],
+                'divwuo.' : [ 'sod', 'divwu'    ],
+                #load/store multiple word(Rx-R31)
+                'lmw'     : [ 'm',   'lvd,lwz'  ],
+                'stmw'    : [ 'm',   'stvd,stw' ]}
+
+
+#------------------------------------------------------------------------------
+# CombineRules: [ 'f', 'v', 'l' ]
+#-------|-------------------------|--------------------------------------------
+# 'f'   | [ppc] ppc_op(cmp*)      | rule for 'fusing' adjacent pair of compare
+#       |       ppc_op(b*)        | and branch(PPE specific). Note: only
+# -1  0 | [ppe] ppe_op(cmp*b*)    | extended mnemonics of compares are handled
+#-------|-------------------------|--------------------------------------------
+# 'v'   | [ppc] ppc_op(lwz/stw)   | rule for combining double word aligned
+#       |       ppc_op(lwz/stw)   | load/store pairs into signle 'virtual'
+# -1 -1 | [ppe] ppe_op(lvd/stvd)  | double word instructions(PPE specific)
+#-------|-------------------------|--------------------------------------------
+# 'l'   | [ppc]        .p2align   | compiler will insert ".p2align" directive to
+#       |       Label:            | help instructions align from label to label.
+#       | [ppe]        b Label    | then assembler will insert "nop" on .p2align
+#       |              .p2align   | directive. a "branch" to skip the nops will
+#  0 -1 |       Label:            | improve the performance while still aligned
+#-------|-------------------------|--------------------------------------------
+#
+CombineRules = {#ppc_op : [ rule | ppe_cp ]
+                #--------------------------
+                #8byte aligned loads/stores
+                'lwz'    : [ 'v', 'lvd'   ],
+                'stw'    : [ 'v', 'stvd'  ],
+                #compares fusable to branch
+                'cmplw'  : [ 'f', 'cmplw' ],
+                'cmpw'   : [ 'f', 'cmpw'  ],
+                'cmpwi'  : [ 'f', 'cmpwi' ],
+                #'.p2align' before 'label:'
+                '.p2align' : [ 'l', 'b'   ]}
+
+
+#------------------------------------------------------------------------------
+# FuseBranches: [ Branches can be fused into cmp*b* ]
+#------------------------------------------------------------------------------
+#
+FuseBranches = ['bc', 'bcl',
+                'blt', 'bltl', 'ble', 'blel',
+                'bgt', 'bgtl', 'bge', 'bgel',
+                'beq', 'beql', 'bne', 'bnel']
+
+
+# -----------------------------------------------------------------------------
+# p2p_replace:
+#   process each line(filtered) in the assembly file to replace PPC instruction
+#   to supported PPE instruction(s)
+#
+# Arguments:
+#   string:  line   - assembly file line to be replaced
+#            ppc_op - detected PPC opcode that needs to be replaced
+# Return:
+#   boolean: True   - Return without Error
+#            False  - Error Detected
+# Variables:
+#   string:  inst, rule, ppe_op, newline, temp_op
+#            double_inst, single_inst, virtual_reg, base_offset, address_reg
+# Subroutine:
+#   NONE
+# -----------------------------------------------------------------------------
+def p2p_replace(line, ppc_op):
+
+  # parse PPC instruction as in I or D form with opcode and upto 3 operands:
+  # possible forms: opcode
+  #                 opcode RT, RA, RB
+  #                 opcode RT, RA, IM
+  #                 opcode RT, D(RA)
+  # inst.group(0) : <whole instruction>
+  # inst.group(1) : " "
+  # inst.group(2) : Opcode(.)
+  # inst.group(3) : " "
+  # inst.group(4) : GPR
+  # inst.group(5) : " , "
+  # inst.group(6) : GPR or Immediate(D)
+  # inst.group(7) : " , " or " ( "
+  # inst.group(8) : GPR or Immediate(IM)
+  # inst.group(9) : " ) "
+  inst = re.search(r"([\s]*)([a-zA-Z\.]+)([\s]*)([%r0-9]*)([\s,]*)([%r0-9\-]*)([\s,\(]*)([%r0-9\-]*)([\s\)]*)", line)
+
+  # detect an error
+  if inst is None or ppc_op != inst.group(2):
+    return False
+
+  # look up rule to process the instruction
+  rule, ppe_op = ReplaceRules[ppc_op]
+
+  # if enabled, put a mark in the output file
+  if P2P_COMMENT: print "#P2P(%s):" % rule + line,
+
+  # start cases of replacing PPC instruction with PPE instruction(s)
+  #---r------------------------------------------------------------------------
+  if 'r' in rule:
+
+    # replace opcode under rule 'r' and rewrite the instruction
+    newline = line.replace(ppc_op, ppe_op)
+    print newline,
+
+    # do not continue if there is 'a' or 'u' rule to process on this line
+    if 'u' not in rule and 'a' not in rule:
+      return True
+
+  #---u------------------------------------------------------------------------
+  if 'u' in rule:
+
+    # construct and write "add RA, RA, RB" under rule 'u'
+    newline = inst.group(1) + 'add' + inst.group(3) + inst.group(6) +\
+              inst.group(5) + inst.group(6) + inst.group(7) + inst.group(8)
+    print newline
+
+    # do not continue if there is 'a' rule to process on this line
+    if 'a' not in rule:
+      return True
+
+  #---a------------------------------------------------------------------------
+  if 'a' in rule:
+
+    # construct and write "extsh RT, RT" under rule 'a'
+    newline = inst.group(1) + 'extsh' + inst.group(3) + inst.group(4) +\
+              inst.group(5) + inst.group(4)
+    print newline
+    return True
+
+  #---h------------------------------------------------------------------------
+  if 'h' in rule:
+
+    # construct and write "srwi RA, 16" under rule 'h'
+    newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(6) +\
+              inst.group(5) + "16"
+    print newline
+
+    # construct and write "srwi RB, 16" under rule 'h'
+    newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(8) +\
+              inst.group(5) + "16"
+    print newline
+
+    # replace opcode in original instruction and write under rule 'h'
+    newline = line.replace(ppc_op, ppe_op)
+    print newline
+    return True
+
+  #---s------------------------------------------------------------------------
+  if 's' in rule:
+
+    # construct branch target label
+    ppe_op = P2P_PPE_PRE + ppe_op
+
+    # construct and write "stwu R1, -24(R1)" to create the stack frame
+    newline = inst.group(1) + 'stwu' + inst.group(3) + '1' +\
+              inst.group(5) + '-24(1)'
+    print newline
+
+    # construct and write "stvd D3, 8(R1)" to save off R3 and R4
+    newline = inst.group(1) + 'stvd' + inst.group(3) + '3' +\
+              inst.group(5) + '8(1)'
+    print newline
+
+    # construct and write "mflr R3" to fetch the current link address
+    newline = inst.group(1) + 'mflr' + inst.group(3) + '3'
+    print newline
+
+    # construct and write "stw R3, 16(R1)" to save off current LR to stack
+    newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\
+              inst.group(5) + '16(1)'
+    print newline
+
+    # construct and write "mr R3, RA" to copy the operand RA to R3
+    newline = inst.group(1) + 'mr' + inst.group(3) + '3' +\
+              inst.group(5) + inst.group(6)
+    print newline
+
+    # if 'mulli' is detected, using 'li' instead of 'mr' for second operand
+    if ppc_op == 'mulli':
+      temp_op = 'li'
+    else:
+      temp_op = 'mr'
+
+    # construct and write "mr R4, RB" to copy the operand RB to R4
+    # or in 'mulli' case, "li R4, IM" to copy the operand IM to R4
+    newline = inst.group(1) + temp_op + inst.group(3) + '4' +\
+              inst.group(5) + inst.group(8)
+    print newline
+
+    # using branch and link(bl) to branch to subroutine
+    # later subroutine can branch back using branch link register(blr)
+    # Assumption: the subroutine will be responsible for saving
+    # and restoring all the volatilo registers used in the subroutine
+    newline = inst.group(1) + 'bl' + inst.group(3) + ppe_op
+    print newline
+
+    # construct and write "mr RT, R3" to copy the result in R3 to RT
+    newline = inst.group(1) + 'mr' + inst.group(3) + inst.group(4) +\
+              inst.group(5) + '3'
+    print newline
+
+    # construct and write "lwz R3, 16(R1)" to fetch the LR value from stack
+    newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\
+              inst.group(5) + '16(1)'
+    print newline
+
+    # construct and write "mtlr R3" to restore the link register
+    newline = inst.group(1) + 'mtlr' + inst.group(3) + '3'
+    print newline
+
+    # construct and write "lvd D3, 8(R1)" to restore R3 and R4
+    newline = inst.group(1) + 'lvd' + inst.group(3) + '3' +\
+              inst.group(5) + '8(1)'
+    print newline
+
+    # construct and write "lwz R1, 0(R1)" to destroy the stack frame
+    newline = inst.group(1) + 'lwz' + inst.group(3) + '1' +\
+              inst.group(5) + '0(1)'
+    print newline
+    return True
+
+  #---m------------------------------------------------------------------------
+  if 'm' in rule:
+
+    # parse instruction information
+    # note register can be in either "N" form or "%rN" form
+    double_inst,single_inst = ppe_op.split(',')
+    virtual_reg = int(re.search(r'\d+', inst.group(4)).group())
+    base_offset = int(inst.group(6))
+    address_reg = int(re.search(r'\d+', inst.group(8)).group())
+
+    # consider illegal if multiple word instruction covers non-exist registers
+    if virtual_reg < 28:
+      return False
+
+    # loop until and include GPR31
+    while virtual_reg < 32:
+      # page 270 of 405 manual, only do this for load instructions
+      if virtual_reg == address_reg != 31 and 'l' in single_inst:
+         base_offset += 4
+         virtual_reg += 1
+         continue
+
+      # if other GPRs being address_reg there is no guarantee for alignment
+      if address_reg not in [1,2,13]:
+        # construct and write "lwz/stw RT, D(RA)" for every registers
+        newline = inst.group(1) + single_inst + inst.group(3) +\
+                  str(virtual_reg) + inst.group(5) + str(base_offset) +\
+                  inst.group(7) + inst.group(8) + inst.group(9)
+        print newline
+        base_offset += 4
+        virtual_reg += 1
+      else:
+        # if base_offset is also aligned with base address in the address_reg
+        # & there are at least two more registers to perform doubleword ld/st
+        if not (base_offset % 8) and (virtual_reg + 1) < 32:
+          # construct and write "lvd/stvd DR, D(RA)" under rule 'v'
+          newline = inst.group(1) + double_inst + inst.group(3) +\
+                    str(virtual_reg) + inst.group(5) + str(base_offset) +\
+                    inst.group(7) + inst.group(8) + inst.group(9)
+          print newline
+          base_offset += 8
+          virtual_reg += 2
+        # either only one register left or base_offset isnt aligned
+        else:
+          # construct and write "lwz/stwz SR, D(RA)" under rule 'v'
+          newline = inst.group(1) + single_inst + inst.group(3) +\
+                    str(virtual_reg) + inst.group(5) + str(base_offset) +\
+                    inst.group(7) + inst.group(8) + inst.group(9)
+          print newline
+          base_offset += 4
+          virtual_reg += 1
+      # end of this if-else
+    # end of while loop
+    return True
+  # end of last if
+
+
+# -----------------------------------------------------------------------------
+# p2p_combine:
+#   process each two lines(filtered) in the assembly file to combine two PPC
+#   instructions to one PPE specific instruction for better performance
+#
+# Arguments:
+#   string:  first_line   - 1st assembly file line to be combined
+#            second_line  - 2nd assembly file line to be combined
+#            first_op     - 1st detected PPC opcode that needs to be combined
+#            second_op    - 2nd detected PPC opcode that needs to be combined
+# Return:
+#   boolean: done         - True: return without error
+#                         - False: return with error detected
+#            match        - True: eventually matched and combined
+#                         - False: fail to qualify to be combined
+# Variables:
+#   string:  first_inst, second_inst, rule, ppe_op, newline
+#            bo, px_bix, compare_operands, target
+# Subroutine:
+#   NONE
+# -----------------------------------------------------------------------------
+def p2p_combine(first_line, second_line, first_op, second_op):
+
+  global P2P_SPACE; global P2P_CYCLE
+  global P2P_COMPARE_BRANCH; global P2P_VIRTUAL_DOUBLE
+
+  # parse PPC instruction as in I or B or D form with opcode and upto 3 operands
+  # possible form : [1st] opcode [CR,] RA, RB
+  #                 [1st] opcode [CR,] RA, IM
+  #                 [1st] opcode RT, D(RA)
+  #                 [2nd] opcode [CR,] Target
+  #                 [2nd] opcode BO, BI, Target
+  #                 [2nd] opcode RT, D(RA)
+  # inst.group(0) : <whole instruction>
+  # inst.group(1) : " "
+  # inst.group(2) : Opcode(+/-/.)
+  # inst.group(3) : " "
+  # inst.group(4) : GPR or CR or BO or Target
+  # inst.group(5) : " , "
+  # inst.group(6) : GPR or IM or D(label+offset@sda21) or BI or Target
+  # inst.group(7) : " , " or " ( "
+  # inst.group(8) : GPR or IM or Target
+  # inst.group(9) : " ) "
+  first_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", first_line)
+  second_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", second_line)
+
+  # detect an error
+  if first_inst is None or second_inst is None or \
+     first_op != first_inst.group(2) or second_op not in second_inst.group(2):
+    return False,False
+
+  # look up rule to process the instruction
+  rule, ppe_op = CombineRules[first_op]
+
+  # start cases of combining two PPC instructions into PPE instruction
+  #---f------------------------------------------------------------------------
+  if 'f' in rule:
+
+    if not P2P_COMPARE_BRANCH:
+      return True,False 
+
+    # fusing compare and branch
+    ppe_op = ppe_op + second_op
+
+    # for cmpwib* case, there is a difference between
+    # cmpwi SI operand as signed 16-bit integer and then got sign extended and
+    # cmpwib* UIX operand as unsigned 5-bit integer and then got zero extended
+    # thus, will not fuse the two if the integer operand is not in range(0,31)
+    # if cr field is omitted:
+    if ',' in first_inst.group(7):
+      # cr field must be cr0 or 0, error out if it is something else:
+      if '0' not in first_inst.group(4):
+        return False, True
+      if 'i' in first_op and (int(first_inst.group(8)) < 0 or \
+                              int(first_inst.group(8)) > 31):
+          return True,False
+      else:
+        compare_operands = first_inst.group(6) + first_inst.group(7) + \
+                           first_inst.group(8) + ', '
+    else:
+      if 'i' in first_op and (int(first_inst.group(6)) < 0 or \
+                              int(first_inst.group(6)) > 31):
+          return True,False
+      else:
+        compare_operands = first_inst.group(4) + first_inst.group(5) + \
+                           first_inst.group(6) + ', '
+
+    # if 'bc' then extract BO,BI fields and convert to PX,BIX fields of 'cmp*b*'
+    #   Note CTR decreament and branch always cases are not handled, and
+    #   python bin() outputs "0bXXXXX" form so bo[2] is actually BO bit 0
+    # else there is no need for PX,BIX fields for extended mnemonics
+    if 'bc' in second_op:
+      bo = bin(int(second_inst.group(4)))
+
+      # do not handle CRT decreament or branch always cases
+      if bo[4] == 0 or bo[2] == 1:
+        return True,False
+
+      # PX = BO bit 1, BIX = BI = [0,1,2,3] due to only CR0 is used
+      px_bix = bo[3] + second_inst.group(5) + \
+               second_inst.group(6) + second_inst.group(7)
+      target = second_inst.group(8)
+    else:
+      px_bix = ""
+      # if cr field is omitted:
+      if ',' in second_inst.group(5):
+        # cr field must be cr0 or 0, error out if it is something else:
+        if '0' not in second_inst.group(4):
+          return False, True
+        target = second_inst.group(6)
+      else:
+        target = second_inst.group(4)
+
+    # profile: space--, cycle is the same because 1+2==3
+    P2P_SPACE -= 1
+
+    # if enabled, put a mark in the output file
+    if P2P_COMMENT:
+      print "#P2P(%s):" % rule + first_line,
+      print "#P2P(%s):" % rule + second_line,
+
+    # construct and write "cmp*b* [PX,BIX,] RA,RB/IM,target" under 'f' rule
+    newline = first_inst.group(1) + ppe_op + first_inst.group(3) +\
+              px_bix + compare_operands + target
+    print newline
+    return True,True
+
+
+  #---v------------------------------------------------------------------------
+  if 'v' in rule:
+
+    if not P2P_VIRTUAL_DOUBLE:
+      return True,False
+
+    global P2P_VDW_SDA
+
+    # Combinable Conditions:
+    # 1) base address registers must be the same and one of R1/R2/R13
+    # 2) address offsets have to be 8-bytes continuous and aligned
+    # 3) target or source registers must qualify to be double word register
+    # Note: label+offset@sda21 format is coverted to target r13 after link
+    #       assume data go in and out r13 or SDA space is always 8-byte aligned
+    #       here we only check the continous of address offset and register pair
+    if first_inst.group(8) == second_inst.group(8) in ['1','2','13'] or \
+       ("@sda21" in first_inst.group(6) and \
+        "@sda21" in second_inst.group(6) and \
+        P2P_VDW_SDA):
+
+      if ((first_inst.group(6).replace("@sda21","") + "+4" == \
+           second_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \
+          ((first_inst.group(6).isdigit() and \
+           not int(first_inst.group(6)) % 8) and \
+           int(first_inst.group(6)) + 4 == int(second_inst.group(6)))) and \
+         (int(first_inst.group(4)) + 1 == int(second_inst.group(4)) or \
+          (int(first_inst.group(4)) == 31 and \
+          int(second_inst.group(4)) == 0)):
+        newline = first_line.replace(first_op, ppe_op)
+      elif ((second_inst.group(6).replace("@sda21","") + "+4" == \
+             first_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \
+            ((second_inst.group(6).isdigit() and \
+             not int(second_inst.group(6)) % 8) and \
+             int(second_inst.group(6)) + 4 == int(first_inst.group(6)))) and \
+           (int(second_inst.group(4)) + 1 == int(first_inst.group(4)) or \
+            (int(second_inst.group(4)) == 31 and \
+            int(first_inst.group(4)) == 0)):
+        newline = second_line.replace(second_op, ppe_op)
+      else:
+        return True,False
+
+      # profile: space--, cycle--(same delay but 1 less from issue)
+      P2P_SPACE -= 1; P2P_CYCLE -= 1
+
+      # if enabled, put a mark in the output file
+      if P2P_COMMENT:
+        print "#P2P(%s):" % rule + first_line,
+        print "#P2P(%s):" % rule + second_line,
+
+      print newline,
+      return True,True
+    else:
+      return True,False
+
+
+# -----------------------------------------------------------------------------
+# p2p_onefile:
+#   process single PPC assembly file to convert it into PPE assembly file
+#   also filter out non-instruction lines before calling the subroutine
+#
+# Arguments:
+#   string: ppcFileName
+# Return:
+#   boolean: done - True if file processing completed without error
+#                 - False if file processing failed due to an error
+# Variables:
+#   boolean: match, done
+#   string:  ppeFileName, line, ppc_op, pre_op, pre_line, section, label
+#   integer: line_num, first_label_ln, second_label_ln, misalign
+# Subroutine:
+#   p2p_combine
+#   p2p_replace
+# -----------------------------------------------------------------------------
+def p2p_onefile(ppcFileName):
+
+  global P2P_SPACE; P2P_SPACE = 0 # profile count
+  global P2P_CYCLE; P2P_CYCLE = 0 # profile count
+
+  if P2P_VERBOSE : print "Translate PPC assembly: " + ppcFileName
+
+  # new PPE assembly file is renamed as <filename>.s
+  ppeFileName = ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT)
+  os.rename(ppcFileName, ppeFileName)
+
+  # initialize storage variables for previous line that needs to be remembered
+  pre_line = ""
+  pre_op = ""
+
+  # use inline file editing, back up original PPC assembly file as <filename>.S
+  for line in fileinput.input(ppeFileName, inplace=1, backup='.405'):
+
+    # in case of "mtmsr 0; isync"
+    line = line.replace('isync','nop')
+
+    # skip blank line
+    if not line.strip():
+      if pre_line:
+        print pre_line,
+        pre_line = ""
+      print line,
+      continue
+
+    # skip comments line
+    if re.search("^[\s]*(//|#)", line):
+      if pre_line:
+        print pre_line,
+        pre_line = ""
+      print line,
+      continue
+
+    # skip .section code except .p2align and label:
+    section = re.search("^[\s]*(\.[0-9a-zA-Z_]+)", line)
+    if section is not None and ':' not in line and \
+       section.group(1) != '.p2align':
+      if pre_line:
+        print pre_line,
+        pre_line = ""
+      print line,
+      continue
+
+    # apply specical 'l' rule in CombineRules for '.p2align' before a 'label:'
+    label = re.search("^[\s]*[\.0-9a-zA-Z_]+[:]+", line)
+    if label is not None:
+      if pre_line and pre_op == '.p2align':
+        second_label_ln = fileinput.lineno()
+        misalign = 8 - (second_label_ln - first_label_ln - 2) % 8
+        if misalign in [3,4,5,6,7]:
+          # profile: same space, but save cycles, branch penalty is 2
+          P2P_CYCLE -= misalign - 2
+          if P2P_COMMENT: print "#P2P(l):"
+          print '\tb ' + label.group(0).split(':')[0]
+        print pre_line,
+        pre_line = ""
+      first_label_ln = fileinput.lineno()
+      if pre_line:
+        print pre_line,
+        pre_line = ""
+      print line,
+      continue
+
+    # extract opcode field from line
+    ppc_op = line.split()[0]
+    done,match = False,False
+
+    # detect the 2nd possible combinable instruction
+    if pre_line and P2P_COMBINE:
+      # ignore +/- signs for branch prediction
+      if '+' in ppc_op or '-' in ppc_op:
+        ppc_op = ppc_op[:-1]
+      if 'cmp' in pre_op and ppc_op in FuseBranches or \
+         'cmp' not in pre_op and ppc_op == pre_op:
+        done,match = p2p_combine(pre_line, line, pre_op, ppc_op)
+        if not match:
+          print pre_line,
+      else:
+        print pre_line,
+        done,match = True,False
+      pre_line = ""
+
+    # detect the 1st possible combinable instruction
+    if not pre_line and not match and P2P_COMBINE:
+      if ppc_op in CombineRules.keys():
+        pre_op = ppc_op
+        pre_line = line
+        done,match = True,True
+      else:
+        done,match = True,False
+
+    # defect possible replacable instruction
+    if not match:
+      if ppc_op in ReplaceRules.keys() and P2P_REPLACE:
+        done = p2p_replace(line, ppc_op)
+      else:
+        print line,
+        done = True
+
+    # if instruction process is not done due to error
+    if not done:
+      line_num = fileinput.lineno()
+      break
+
+  # close the output file and restore the original input file
+  fileinput.close()
+  os.rename(ppeFileName+'.405', ppcFileName)
+
+  # in case last line of the file qualified to be a pre_line and was not printed
+  if pre_line:
+    f = open(ppeFileName, 'a')
+    f.write(pre_line)
+    f.close()
+
+  # print error debug message
+  if not done:
+    print "Error: target instruction detected at line [%d]:" % line_num
+    print "       " + line
+    print "       but fail to recognize instruction format."
+    # terminate Makefile or execution if an error is detected
+    sys.exit(1)
+
+  if P2P_COMMENT:
+    f = open(ppeFileName, 'a')
+    f.write("#P2P: space(%d) cycle(%d)" % (P2P_SPACE,P2P_CYCLE))
+    f.close()
+
+  if P2P_VERBOSE :
+    print "Generated PPE assembly: " + ppeFileName
+    print "Optimization Profiling: " + str(P2P_SPACE*4) + " bytes, " +\
+                                       str(P2P_CYCLE) + " cycles."
+
+
+# -----------------------------------------------------------------------------
+# p2p_profile
+#   profiling how much performance and code size are saved by optimization
+#
+# Arguments:
+#   string: ppcFileName
+# Return:
+#   list: [space, cycle]
+# Variables:
+#   string: line, profile
+# Subroutine:
+#   None
+# -----------------------------------------------------------------------------
+def p2p_profile(ppcFileName):
+  f = open(ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT), 'r')
+  for line in f:
+    pass
+  f.close()
+  profile = re.search(r"^\#P2P: space\(([0-9\-]+)\) cycle\(([0-9\-]+)\)", line)
+  if profile is not None:
+    return [int(profile.group(1)), int(profile.group(2))]
+  else:
+    return [0,0]
+
+# -----------------------------------------------------------------------------
+# p2p_main:
+#   main of this script
+#     print usage info
+#     parse options and arguments
+#     process one file or a directory of files
+# -----------------------------------------------------------------------------
+def p2p_main():
+
+  # command-line option parsing
+  from optparse import OptionParser
+  usage  = "usage: %prog [options]"
+  version= "%prog v." + P2P_VERSION
+  parser = OptionParser(usage=usage, version=version)
+  parser.add_option("-d", "--directory", metavar="PATH", dest="ppcPath",
+                  help="process all files in a directory given by PATH")
+  parser.add_option("-f", "--filename",  metavar="FILE", dest="ppcFile",
+                  help="process single file(with path in the filename)")
+  parser.add_option("-p", "--parallel",
+                  action="store_true",  dest="parallel", default=False,
+                  help="processing all files in parallel processes")
+  parser.add_option("-s", "--statistics",
+                  action="store_true",  dest="profile", default=False,
+                  help="optimization profiling, require comment in outputs")
+  parser.add_option("-c", "--combine-only",
+                  action="store_false", dest="replace",  default=True,
+                  help="enable only combine function by disabling replace")
+  parser.add_option("-r", "--replace-only",
+                  action="store_false", dest="combine",  default=True,
+                  help="enable only replace function by disabling combine")
+  parser.add_option("-b", "--compare branch disable",
+                  action="store_false", dest="compare_branch",  default=True,
+                  help="only disabling fused compare branch function")
+  parser.add_option("-v", "--virtual double disable",
+                  action="store_false", dest="virtual_double",  default=True,
+                  help="only disabling fused virtual double function")
+  parser.add_option("-e", "--eabi",
+                  action="store_true", dest="vdw_sda",  default=False,
+                  help="enable virtual double word fusion targeting sda")
+  parser.add_option("-n", "--no-comment",
+                  action="store_false", dest="comment",  default=True,
+                  help="don't leave comment mark in output file")
+  parser.add_option("-q", "--quiet",
+                  action="store_false", dest="verbose",  default=True,
+                  help="don't print status messages to stdout")
+  (options, args) = parser.parse_args()
+  # global program output verbose switch
+  global P2P_VERBOSE; P2P_VERBOSE = options.verbose
+  # leave a comment mark in output files
+  global P2P_COMMENT; P2P_COMMENT = options.comment
+  # enable instruction replace functions
+  global P2P_REPLACE; P2P_REPLACE = options.replace
+  # enable instruction combine functions
+  global P2P_COMBINE; P2P_COMBINE = options.combine
+  # enable virtual double word fusion targeting sda
+  global P2P_VDW_SDA; P2P_VDW_SDA = options.vdw_sda
+  # enable only fused compare and branch function
+  global P2P_COMPARE_BRANCH; P2P_COMPARE_BRANCH = options.compare_branch
+   # enable only combined virtual double function
+  global P2P_VIRTUAL_DOUBLE; P2P_VIRTUAL_DOUBLE = options.virtual_double
+ 
+  if P2P_VERBOSE :
+    print "PPC405 Assembly to PPE42 Assembly Post-Compiler Proceesor (P2P)"
+    print "Version: " + P2P_VERSION
+
+  # single file processing
+  if options.ppcFile:
+
+    if P2P_VERBOSE :
+      print "Processing signle file: " + options.ppcFile
+
+    p2p_onefile(options.ppcFile)
+
+  # multiple files processing
+  if options.ppcPath:
+
+    if P2P_VERBOSE :
+      print "Accessing all files at: " + options.ppcPath
+      print "*Parallel Process Mode: " + ("Off", "On")[options.parallel]
+
+    if options.profile:
+      bytes = 0; cycles = 0
+
+    fileList = []
+    for root, subdirs, files in os.walk(options.ppcPath):
+      for file in fnmatch.filter(files, '*'+P2P_PPC_EXT):
+        if options.parallel :
+          fileList.append(os.path.join(root, file))
+        else:
+          if options.profile:
+            space,cycle = p2p_profile(os.path.join(root, file))
+            bytes += space*4
+            cycles += cycle
+          else:
+            p2p_onefile(os.path.join(root, file))
+
+    if options.profile:
+      print "Optimization Profiling: " + str(bytes) + " bytes, " +\
+                                         str(cycles) + " cycles."
+
+    # parallel processing mode
+    if options.parallel:
+      from multiprocessing import Pool
+      p = Pool()
+      p.map(p2p_onefile, fileList)
+      p.close()
+      p.join()
+
+  if P2P_VERBOSE : print "Done"
+
+
+# -----------------------------------------------------------------------------
+# python main
+if __name__ == '__main__':
+  p2p_main()
+
diff --git a/tools/PowerPCtoPPE/ppe42_divw.S b/tools/PowerPCtoPPE/ppe42_divw.S
new file mode 100644
index 00000000..563a8d48
--- /dev/null
+++ b/tools/PowerPCtoPPE/ppe42_divw.S
@@ -0,0 +1,208 @@
+/// \file ppe42_divw.S
+/// \brief PPC405 word division instructions implemented by PPE ISA
+///
+/// This file includes implementation for the following PPC405 instructions
+///     divw  RT, RA, RB
+///
+/// Note: PPE ISA specific "fused compare and branch" instructions are used
+///
+/// Revision History:
+///     09-22-2014: Initial Version by daviddu
+///
+
+        .file "ppe42_divw.S"
+        .section    ".text"
+
+        /*
+        ** Code comment notation:
+        **
+        ** msw = most-significant (high-order) word, i.e. bits 0..31
+        ** lsw = least-significant (low-order) word, i.e. bits 32..63
+        ** msh = most-significant (high-order) halfword, i.e. bits 0..15
+        ** lsh = least-significant (low-order) halfword, i.e. bits 16..63
+        **
+        ** LZ = Leading Zeroes
+        ** SD = Significant Digits
+        ** OW = Register is overwritten, previous value is lost,
+        **      correct if previous value is no longer needed.
+        ** FU = Register is not overwritten, but its value is no longer needed,
+        **      in another word, the register is "free for use".
+        **
+        ** PPE GPR Registers are: R0-R10, R13, R28-R31
+        ** Volatile Registers are: R0, R3-R10
+        ** Non-volatile registers are R28-R31
+        */
+
+        /*
+        ** Caling Convention
+        **
+        ** R2 and R13 are never saved or restored. In ABI or EABI application
+        ** these registers are constant. The other touched volatile registers
+        ** will be saved and restored by the subroutines. Note the caller
+        ** wont be saving those registers because these subroutines will be
+        ** instrumented into caller's body without compiler knowledge.
+        **
+        ** Note R3 is not saved and restored because it will be changed for
+        ** return value anyways, the p2p script will make sure to restore it.
+        ** Also CR is hanlded because of compare and branch, but XER/CTR/LR
+        ** are not hanlded because they are untouched by the instructions used.
+        **
+        ** Stack layout:
+        **
+        ** 0x00 -- R1, Dedicated for Stack Pointer
+        ** 0x04 -- slot reserved for LR
+        ** 0x08 -- R4, Volatile, Private
+        ** 0x0c -- R5, Volatile, Private
+        ** 0x10 -- R6, Volatile, Private
+        ** 0x14 -- R7, Volatile, Private
+        ** 0x18 -- R8, Volatile, Private
+        ** 0x1c -- R9, Volatile, Private
+        ** 0x20 -- CR, Condition Register
+        ** 0x24 --
+        **
+        ** 0x28 -- Stack Size, Must be 8-byte aligned
+        */
+
+        /*
+        ** Division Procedures:
+        **
+        ** __ppe42_divwu(dividend, divisor)
+        ** __ppe42_divw(dividend, divisor)
+        **
+        ** R3 = Input parameter,  dividend. then Return value, quotient.
+        ** R4 = Input parameter,  divisor.
+        ** R5 = Output parameter, quotient.
+        ** R6 = Output parameter, remainder.
+        ** R7 = Temporary register, counter.
+        **
+        ** General Algorithm
+        **
+        ** Using standard shift and subtract method to emulate
+        ** Note: dividend,divisor,quotient,remainder are all 32-bit integers
+        **
+        ** Precondition Check:
+        **
+        ** if (divisor == dividend) {
+        **     quotient = 1;
+        **     remainder = 0;
+        ** }
+        **
+        ** if (divisor == 0) {
+        **     quotient = 0;
+        **     remainder = 0;
+        ** }
+        **
+        ** if (divisor > dividend) {
+        **     quotient = 0;
+        **     remainder = dividend;
+        ** }
+        */
+
+/*****************************************************************************/
+
+        /*
+        ** Divide Word Signed (__ppe42_divw)
+        **
+        ** Using Divide Word Unsigned(divwu) to emulate
+        **
+        **  dd = absolute(dividend);
+        **  dr = absolute(divisor);
+        **  [q,r] = __ppe42_divwu(dd, dr);
+        **
+        **  quotient = q;
+        **  if (dividend < 0) {
+        **    remainder = -r;
+        **    if (divisor > 0)
+        **      quotient = -q;
+        **  }
+        **  else {
+        **    remainder = r;
+        **    if (divisor < 0)
+        **      quotient = -q;
+        **  }
+        */
+
+        .align  2
+        .global __ppe42_divw
+        .type   __ppe42_divw, @function
+
+__ppe42_divw:
+
+        stwu    %r1, -0x28(%r1)                   // allocate stack frame
+
+        stvd    %d4, 0x08(%r1)                    // save off r4 & r5 in stack
+        stvd    %d6, 0x10(%r1)                    // save off r6 & r7 in stack
+        stvd    %d8, 0x18(%r1)                    // save off r8 & r9 in stack
+
+        mfcr    %r5                               // save off cr
+        stw     %r5, 0x20(%r1)                    // store cr in stack
+
+        li      %r5, 1                            // quotient = 1
+        li      %r6, 0                            // remainder = 0
+        cmplwbc 1, 2, %r3, %r4, __ppe42_divw_ret  // ret(divisor == dividend)
+
+        li      %r5, 0                            // quotient = 0
+        li      %r6, 0                            // remainder = 0
+        cmpwibc 1, 2, %r4, 0, __ppe42_divw_ret    // ret(divisor == 0)
+
+        cmpwibc 1, 1, %r3, 0, __ppe42_divw_csc    // dividend(+) -> csc
+        neg     %r3, %r3                          // absolute(dividend)
+        li      %r5, 1                            // note dividend < 0
+
+__ppe42_divw_csc:                                 // <<continue sign check>>
+
+        cmpwibc 1, 1, %r4, 0, __ppe42_divw_uns    // divisor(+) -> uns
+        neg     %r4, %r4                          // absolute(divisor)
+        li      %r6, 1                            // note divisor < 0
+
+__ppe42_divw_uns:                                 // <<unsigned division>>
+
+        mr      %r8, %r5                          // remember if dividend > 0
+        xor     %r9, %r5, %r6                     // remember sign difference
+
+        li      %r5, 0                            // quotient = 0
+        mr      %r6, %r3                          // remainder = dividend
+        cmplwbc 1, 0, %r3, %r4, __ppe42_divw_sign // ret(divisor > dividend)
+
+        li      %r7, 32                           // num_of_bits = 32
+
+__ppe42_divw_sas:                                 // <<shift and subtract loop>>
+
+        slwi    %r6, %r6, 1                       // remainder <<= 1
+        inslwi  %r6, %r3, 1, 31                   // remainder[31] = dividend[0]
+        slwi    %r3, %r3, 1                       // dividend <<= 1
+        slwi    %r5, %r5, 1                       // quotient <<= 1
+        subi    %r7, %r7, 1                       // num_of_bits--
+        cmplwbc 1, 0, %r6, %r4, __ppe42_divw_sas  // continue(remainder<divisor)
+
+        sub     %r6, %r6, %r4                     // reminder -= divisor
+        addi    %r5, %r5, 1                       // quotient++
+        cmpwibc 0, 2, %r7, 0, __ppe42_divw_sas    // while(num_of_bits)
+
+__ppe42_divw_sign:                                // <<sign handling>>
+
+        cmpwibc 1, 2, %r9, 0, __ppe42_divw_csh    // if same sign, r5 stays +
+        neg     %r5, %r5                          // otherwise, neg(r5)
+
+__ppe42_divw_csh:                                 // <<continue sign handling>>
+
+        cmpwibc 1, 2, %r8, 0, __ppe42_divw_ret    // if dividend>0, r6 stays +
+        neg     %r6, %r6                          // otherwise, neg(r6)
+
+__ppe42_divw_ret:                                 // <<return subroutine>>
+
+        mr      %r3, %r5                          // r3 is the default return
+
+        lwz     %r5, 0x20(%r1)                    // load cr from stack
+        mtcr0   %r5                               // restore cr
+
+        lvd     %d4, 0x08(%r1)                    // restore r4 & r5 from stack
+        lvd     %d6, 0x10(%r1)                    // restore r6 & r7 from stack
+        lvd     %d8, 0x18(%r1)                    // restore r8 & r9 from stack
+
+        lwz     %r1, 0(%r1)                       // restore stack pointer
+
+        blr                                       // branch back
+
+        .size   __ppe42_divw, .-__ppe42_divw
+
diff --git a/tools/PowerPCtoPPE/ppe42_divwu.S b/tools/PowerPCtoPPE/ppe42_divwu.S
new file mode 100644
index 00000000..a208df65
--- /dev/null
+++ b/tools/PowerPCtoPPE/ppe42_divwu.S
@@ -0,0 +1,184 @@
+/// \file ppe42_divwu.S
+/// \brief PPC405 word division instructions implemented by PPE ISA
+///
+/// This file includes implementation for the following PPC405 instructions
+///     divwu RT, RA, RB
+///
+/// Note: PPE ISA specific "fused compare and branch" instructions are used
+///
+/// Revision History:
+///     09-22-2014: Initial Version by daviddu
+///
+
+        .file "ppe42_divwu.S"
+        .section    ".text"
+
+        /*
+        ** Code comment notation:
+        **
+        ** msw = most-significant (high-order) word, i.e. bits 0..31
+        ** lsw = least-significant (low-order) word, i.e. bits 32..63
+        ** msh = most-significant (high-order) halfword, i.e. bits 0..15
+        ** lsh = least-significant (low-order) halfword, i.e. bits 16..63
+        **
+        ** LZ = Leading Zeroes
+        ** SD = Significant Digits
+        ** OW = Register is overwritten, previous value is lost,
+        **      correct if previous value is no longer needed.
+        ** FU = Register is not overwritten, but its value is no longer needed,
+        **      in another word, the register is "free for use".
+        **
+        ** PPE GPR Registers are: R0-R10, R13, R28-R31
+        ** Volatile Registers are: R0, R3-R10
+        ** Non-volatile registers are R28-R31
+        */
+
+        /*
+        ** Caling Convention
+        **
+        ** R2 and R13 are never saved or restored. In ABI or EABI application
+        ** these registers are constant. The other touched volatile registers
+        ** will be saved and restored by the subroutines. Note the caller
+        ** wont be saving those registers because these subroutines will be
+        ** instrumented into caller's body without compiler knowledge.
+        **
+        ** Note R3 is not saved and restored because it will be changed for
+        ** return value anyways, the p2p script will make sure to restore it.
+        ** Also CR is hanlded because of compare and branch, but XER/CTR/LR
+        ** are not hanlded because they are untouched by the instructions used.
+        **
+        ** Stack layout:
+        **
+        ** 0x00 -- R1, Dedicated for Stack Pointer
+        ** 0x04 -- slot reserved for LR
+        ** 0x08 -- R4, Volatile, Private
+        ** 0x0c -- R5, Volatile, Private
+        ** 0x10 -- R6, Volatile, Private
+        ** 0x14 -- R7, Volatile, Private
+        ** 0x18 -- R8, Volatile, Private
+        ** 0x1c -- R9, Volatile, Private
+        ** 0x20 -- CR, Condition Register
+        ** 0x24 --
+        **
+        ** 0x28 -- Stack Size, Must be 8-byte aligned
+        */
+
+        /*
+        ** Division Procedures:
+        **
+        ** __ppe42_divwu(dividend, divisor)
+        ** __ppe42_divw(dividend, divisor)
+        **
+        ** R3 = Input parameter,  dividend. then Return value, quotient.
+        ** R4 = Input parameter,  divisor.
+        ** R5 = Output parameter, quotient.
+        ** R6 = Output parameter, remainder.
+        ** R7 = Temporary register, counter.
+        **
+        ** General Algorithm
+        **
+        ** Using standard shift and subtract method to emulate
+        ** Note: dividend,divisor,quotient,remainder are all 32-bit integers
+        **
+        ** Precondition Check:
+        **
+        ** if (divisor == dividend) {
+        **     quotient = 1;
+        **     remainder = 0;
+        ** }
+        **
+        ** if (divisor == 0) {
+        **     quotient = 0;
+        **     remainder = 0;
+        ** }
+        **
+        ** if (divisor > dividend) {
+        **     quotient = 0;
+        **     remainder = dividend;
+        ** }
+        */
+
+/*****************************************************************************/
+
+        /*
+        ** Divide Word Unsigned (__ppe42_divwu)
+        **
+        ** The implementation uses standard shift and subtract approach.
+        ** The following is an example in C. Note the implementation doesnt
+        ** exactly follow the C example.
+        **
+        **  num_of_bits = 32;
+        **  while(num_bits) {
+        **    dbit = (dividend & 0x80000000) >> 31;
+        **    remainder = (remainder << 1) | dbit;
+        **    dividend = dividend << 1;
+        **    quotient = quotient << 1;
+        **    num_of_bits--;
+        **    if(remainder < divisor)
+        **      continue;
+        **    temp = remainder - divisor;
+        **    qbit = !((temp & 0x80000000) >> 31);
+        **    quotient = quotient | qbit;
+        **    remainder = temp;
+        **  }
+        */
+
+        .align  2
+        .global __ppe42_divwu
+        .type   __ppe42_divwu, @function
+
+__ppe42_divwu:
+
+        stwu    %r1, -0x28(%r1)                   // allocate stack frame
+
+        stvd    %d4, 0x08(%r1)                    // save off r4 & r5 in stack
+        stvd    %d6, 0x10(%r1)                    // save off r6 & r7 in stack
+        stvd    %d8, 0x18(%r1)                    // save off r8 & r9 in stack
+
+        mfcr    %r5                               // save off cr
+        stw     %r5, 0x20(%r1)                    // store cr in stack
+
+        li      %r5, 1                            // quotient = 1
+        li      %r6, 0                            // remainder = 0
+        cmplwbc 1, 2, %r3, %r4, __ppe42_divwu_ret // ret(divisor == dividend)
+
+        li      %r5, 0                            // quotient = 0
+        li      %r6, 0                            // remainder = 0
+        cmpwibc 1, 2, %r4, 0, __ppe42_divwu_ret   // ret(divisor == 0)
+
+        li      %r5, 0                            // quotient = 0
+        mr      %r6, %r3                          // remainder = dividend
+        cmplwbc 1, 0, %r3, %r4, __ppe42_divwu_ret // ret(divisor > dividend)
+
+        li      %r7, 32                           // num_of_bits = 32
+
+__ppe42_divwu_sas:                                // <<shift and subtract loop>>
+
+        slwi    %r6, %r6, 1                       // remainder <<= 1
+        inslwi  %r6, %r3, 1, 31                   // remainder[31] = dividend[0]
+        slwi    %r3, %r3, 1                       // dividend <<= 1
+        slwi    %r5, %r5, 1                       // quotient <<= 1
+        subi    %r7, %r7, 1                       // num_of_bits--
+        cmplwbc 1, 0, %r6, %r4, __ppe42_divwu_sas // continue(remainder<divisor)
+
+        sub     %r6, %r6, %r4                     // reminder -= divisor
+        addi    %r5, %r5, 1                       // quotient++
+        cmpwibc 0, 2, %r7, 0, __ppe42_divwu_sas   // while(num_of_bits)
+
+__ppe42_divwu_ret:                                // <<return subroutine>>
+
+        mr      %r3, %r5                          // r3 is the default return
+        lwz     %r5, 0x20(%r1)                    // load cr from stack
+        mtcr0   %r5                               // restore cr
+
+        lvd     %d4, 0x08(%r1)                    // restore r4 & r5 from stack
+        lvd     %d6, 0x10(%r1)                    // restore r6 & r7 from stack
+        lvd     %d8, 0x18(%r1)                    // restore r8 & r9 from stack
+
+        lwz     %r1, 0(%r1)                       // restore stack pointer
+
+        blr                                       // branch back
+
+        .size   __ppe42_divwu, .-__ppe42_divwu
+
+
diff --git a/tools/PowerPCtoPPE/ppe42_mulhw.S b/tools/PowerPCtoPPE/ppe42_mulhw.S
new file mode 100644
index 00000000..d229121b
--- /dev/null
+++ b/tools/PowerPCtoPPE/ppe42_mulhw.S
@@ -0,0 +1,193 @@
+/// \file ppe42_mulhw.S
+/// \brief PPC405 word multiplication instructions implemented by PPE ISA
+///
+/// This file includes implementation for the following PPC405 instructions
+///     mulhw
+///
+/// Note: PPE ISA specific "fused compare and branch" instructions are used
+///
+/// Revision History:
+///     09-15-2014: Initial Version by daviddu
+///
+
+        .file "ppe42_mulhw.S"
+        .section    ".text"
+
+        /*
+        ** Code comment notation:
+        **
+        ** msw = most-significant (high-order) word, i.e. bits 0..31
+        ** lsw = least-significant (low-order) word, i.e. bits 32..63
+        ** msh = most-significant (high-order) halfword, i.e. bits 0..15
+        ** lsh = least-significant (low-order) halfword, i.e. bits 16..63
+        ** LZ = Leading Zeroes
+        ** SD = Significant Digits
+        ** OW = Register is overwritten, previous value is lost,
+        **      correct if previous value is no longer needed.
+        ** FU = Register is not overwritten, but its value is no longer needed,
+        **      in another word, the register is "free for use".
+        **
+        ** PPE GPR Registers are: R0-R10, R13, R28-R31
+        ** Volatile Registers are: R0, R3-R10
+        ** Non-volatile registers are R28-R31
+        */
+
+        /*
+        ** Caling Convention
+        **
+        ** R2 and R13 are never saved or restored. In ABI or EABI application
+        ** these registers are constant. The other touched volatile registers
+        ** will be saved and restored by the subroutines. Note the caller
+        ** wont be saving those registers because these subroutines will be
+        ** instrumented into caller's body without compiler knowledge.
+        **
+        ** Note R3 is not saved and restored because it will be changed for
+        ** return value anyways, the p2p script will make sure to restore it.
+        ** Also CR is hanlded because of compare and branch, but XER/CTR/LR
+        ** are not hanlded because they are untouched by the instructions used.
+        **
+        ** Stack layout:
+        **
+        ** 0x00 -- R1, Dedicated for Stack Pointer
+        ** 0x04 -- slot reserved for LR
+        ** 0x08 -- R4, Volatile, Private
+        ** 0x0c -- R5, Volatile, Private
+        ** 0x10 -- R6, Volatile, Private
+        ** 0x14 -- R7, Volatile, Private
+        ** 0x18 -- R8, Volatile, Private
+        ** 0x1c -- R9, Volatile, Private
+        ** 0x20 -- CR, Condition Register
+        ** 0x24 --
+        **
+        ** 0x28 -- Stack Size, Must be 8-byte aligned
+        */
+
+        /*
+        ** Multiplication Procedures:
+        **
+        ** __ppe42_mulhwu(U,V)
+        ** __ppe42_mulhw(U,V)
+        ** __ppe42_mullw(U,V)
+        **
+        ** R3:R4      = Input parameter, multipliers: U, V.
+        ** R3         = Output parameter, either product.msh or product.lsh.
+        ** R5-R9      = Temporary registers
+        **
+        ** General Algorithm
+        **
+        ** Using PPC405 ISA instruction 'mullhw' to emulate
+        ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh)
+        **
+        **             U.msh U.lsh
+        **  X          V.msh V.lsh
+        ** ------------------------
+        **             A.msh A.lsh
+        **       B.msh B.lsh
+        **       C.msh C.lsh
+        ** D.msh D.lsh
+        ** ------------------------
+        ** Product.msw Product.lsw
+        **
+        ** __ppe42_mulhwu: Return Product.msh (unsigned)
+        ** __ppe42_mulhw:  Return Product.msh (signed)
+        ** __ppe42_mullw:  Return Product.lsh
+        **
+        ** Precondition Check:
+        **
+        ** if( U == 0 || V == 0 ) return P=0;
+        */
+
+/*****************************************************************************/
+
+        /*
+        ** Multiply High Word Signed (__ppe42_mulhw)
+        **
+        ** Using Multiply High Word Unsigned(mulhwu) to emulate
+        **
+        ** u = absolute(U);
+        ** v = absolute(V);
+        ** p = __ppe42_mulhwu(u, v);
+        ** if( U[0] xor V[0] )
+        **     p = -p
+        */
+
+        .align  2
+        .global __ppe42_mulhw
+        .type   __ppe42_mulhw, @function
+
+__ppe42_mulhw:
+
+        stwu    %r1, -0x28(%r1)                   // allocate stack frame
+
+        stvd    %d4, 0x08(%r1)                    // save off r4 & r5 in stack
+        stvd    %d6, 0x10(%r1)                    // save off r6 & r7 in stack
+        stvd    %d8, 0x18(%r1)                    // save off r8 & r9 in stack
+
+        mfcr    %r5                               // save off cr
+        stw     %r5, 0x20(%r1)                    // store cr in stack
+
+        li        %r5, 0                          // r5 = 0
+        cmpwibc   1, 2, %r3, 0, __ppe42_mulhw_ret // U=0 -> ret
+        cmpwibc   1, 2, %r4, 0, __ppe42_mulhw_ret // V=0 -> ret
+
+        cmpwibc   1, 1, %r3, 0, __ppe42_mulhw_csc // U>0 -> csc
+        neg       %r3, %r3                        // absolute(U)
+        li        %r5, 1                          // U<0 -> r5 = 1
+
+__ppe42_mulhw_csc:                                // <<continue sign check>>
+
+        li      %r6, 0                            // V>0 -> r6 = 0
+        cmpwibc 1, 1, %r4, 0, __ppe42_mulhw_uns   // V>0 -> uns
+        neg     %r4, %r4                          // absolute(V)
+        li      %r6, 1                            // V<0 -> r6 = 1
+
+__ppe42_mulhw_uns:                                // <<unsigned multiplication>>
+
+        xor     %r9, %r5, %r6                     // remember sign difference
+
+        extrwi  %r5, %r3, 16, 16
+        srwi    %r3, %r3, 16
+        extrwi  %r6, %r4, 16, 16
+        srwi    %r4, %r4, 16
+
+        mullhwu %r7, %r5, %r6
+        srwi    %r7, %r7, 16
+
+        mullhwu %r6, %r3, %r6
+        extrwi  %r8, %r6, 16, 16
+        srwi    %r6, %r6, 16
+        add     %r7, %r8, %r7
+
+        mullhwu %r5, %r5, %r4
+        extrwi  %r8, %r5, 16, 16
+        srwi    %r5, %r5, 16
+        add     %r7, %r8, %r7
+
+        srwi    %r7, %r7, 16
+        add     %r7, %r7, %r6
+        add     %r7, %r7, %r5
+
+        mullhwu %r3, %r3, %r4
+        add     %r5, %r3, %r7
+
+        cmpwibc 1, 2, %r9, 0, __ppe42_mulhw_ret   // if same sign, r5 stays +
+        neg     %r5, %r5                          // otherwise, neg(r5)
+
+__ppe42_mulhw_ret:
+
+        mr      %r3, %r5                          // put return value to r3
+
+        lwz     %r5, 0x20(%r1)                    // load cr from stack
+        mtcr0   %r5                               // restore cr
+
+        lvd     %d4, 0x08(%r1)                    // restore r4 & r5 from stack
+        lvd     %d6, 0x10(%r1)                    // restore r6 & r7 from stack
+        lvd     %d8, 0x18(%r1)                    // restore r8 & r9 from stack
+
+        lwz     %r1, 0(%r1)                       // restore stack pointer
+
+        blr
+
+        .size __ppe42_mulhw, .-__ppe42_mulhw
+
+
diff --git a/tools/PowerPCtoPPE/ppe42_mulhwu.S b/tools/PowerPCtoPPE/ppe42_mulhwu.S
new file mode 100644
index 00000000..e92ee7ec
--- /dev/null
+++ b/tools/PowerPCtoPPE/ppe42_mulhwu.S
@@ -0,0 +1,202 @@
+/// \file ppe42_mulhwu.S
+/// \brief PPC405 word multiplication instructions implemented by PPE ISA
+///
+/// This file includes implementation for the following PPC405 instructions
+///     mulhwu
+///
+/// Note: PPE ISA specific "fused compare and branch" instructions are used
+///
+/// Revision History:
+///     09-15-2014: Initial Version by daviddu
+///
+
+        .file "ppe42_mulhwu.S"
+        .section    ".text"
+
+        /*
+        ** Code comment notation:
+        **
+        ** msw = most-significant (high-order) word, i.e. bits 0..31
+        ** lsw = least-significant (low-order) word, i.e. bits 32..63
+        ** msh = most-significant (high-order) halfword, i.e. bits 0..15
+        ** lsh = least-significant (low-order) halfword, i.e. bits 16..63
+        ** LZ = Leading Zeroes
+        ** SD = Significant Digits
+        ** OW = Register is overwritten, previous value is lost,
+        **      correct if previous value is no longer needed.
+        ** FU = Register is not overwritten, but its value is no longer needed,
+        **      in another word, the register is "free for use".
+        **
+        ** PPE GPR Registers are: R0-R10, R13, R28-R31
+        ** Volatile Registers are: R0, R3-R10
+        ** Non-volatile registers are R28-R31
+        */
+
+        /*
+        ** Caling Convention
+        **
+        ** R2 and R13 are never saved or restored. In ABI or EABI application
+        ** these registers are constant. The other touched volatile registers
+        ** will be saved and restored by the subroutines. Note the caller
+        ** wont be saving those registers because these subroutines will be
+        ** instrumented into caller's body without compiler knowledge.
+        **
+        ** Note R3 is not saved and restored because it will be changed for
+        ** return value anyways, the p2p script will make sure to restore it.
+        ** Also CR is hanlded because of compare and branch, but XER/CTR/LR
+        ** are not hanlded because they are untouched by the instructions used.
+        **
+        ** Stack layout:
+        **
+        ** 0x00 -- R1, Dedicated for Stack Pointer
+        ** 0x04 -- slot reserved for LR
+        ** 0x08 -- R4, Volatile, Private
+        ** 0x0c -- R5, Volatile, Private
+        ** 0x10 -- R6, Volatile, Private
+        ** 0x14 -- R7, Volatile, Private
+        ** 0x18 -- R8, Volatile, Private
+        ** 0x1c -- R9, Volatile, Private
+        ** 0x20 -- CR, Condition Register
+        ** 0x24 --
+        **
+        ** 0x28 -- Stack Size, Must be 8-byte aligned
+        */
+
+        /*
+        ** Multiplication Procedures:
+        **
+        ** __ppe42_mulhwu(U,V)
+        ** __ppe42_mulhw(U,V)
+        ** __ppe42_mullw(U,V)
+        **
+        ** R3:R4      = Input parameter, multipliers: U, V.
+        ** R3         = Output parameter, either product.msh or product.lsh.
+        ** R5-R9      = Temporary registers
+        **
+        ** General Algorithm
+        **
+        ** Using PPC405 ISA instruction 'mullhw' to emulate
+        ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh)
+        **
+        **             U.msh U.lsh
+        **  X          V.msh V.lsh
+        ** ------------------------
+        **             A.msh A.lsh
+        **       B.msh B.lsh
+        **       C.msh C.lsh
+        ** D.msh D.lsh
+        ** ------------------------
+        ** Product.msw Product.lsw
+        **
+        ** __ppe42_mulhwu: Return Product.msh (unsigned)
+        ** __ppe42_mulhw:  Return Product.msh (signed)
+        ** __ppe42_mullw:  Return Product.lsh
+        **
+        ** Precondition Check:
+        **
+        ** if( U == 0 || V == 0 ) return P=0;
+        */
+
+/*****************************************************************************/
+
+        /*
+        ** Multiply High Word Unsigned (__ppe42_mulhwu)
+        **
+        ** r5 = U[16:31] or U.lsh   |   r5 = r3 & 0xffff
+        ** r3 = U[0:15]  or U.msh   |   r3 = r3 >> 16     (r3 OW)
+        ** r6 = V[16:31] or V.lsh   |   r6 = r4 & 0xffff
+        ** r4 = V[0:15]  or V.msh   |   r4 = r4 >> 16     (r4 OW)
+        **
+        ** 4th column(drop A.lsh):
+        ** A = U.lsh * V.lsh [32]   |   r7 = r5 * r6
+        ** A = A.msh         [16]   |   r7 = r7 >> 16     (r7 OW)
+        **
+        ** 3rd column(A = A.msh + B.lsh + C.lsh):
+        ** B = U.msh * U.lsh [32]   |   r6 = r3 * r6      (r6 OW)
+        ** T = B.lsh         [16]   |   r8 = r6 & 0xffff
+        ** B = B.msh         [16]   |   r6 = r6 >> 16     (r6 OW)
+        ** A = T + A         [16]   |   r7 = r8 + r7      (r7 OW, r8 FU)
+        **
+        ** C = U.lsh * V.msh [32]   |   r5 = r5 * r4      (r5 OW)
+        ** T = C.lsh         [16]   |   r8 = r5 & 0xffff  (r8 OW)
+        ** C = C.msh         [16]   |   r5 = r5 >> 16     (r5 OW)
+        ** A = T + A         [16]   |   r7 = r8 + r7      (r7 OW, r8 FU)
+        **
+        ** 2nd column(A = 3rd_carry + B.msh + C.msh):
+        ** A = A.msh         [16]   |   r7 = r7 >> 16     (r7 OW)
+        ** A = A + B         [16]   |   r7 = r7 + r6      (r7 OW, r6 FU)
+        ** A = A + C         [16]   |   r7 = r7 + r5      (r7 OW, r5 FU)
+        **
+        ** 1st column(A = D + A):
+        ** D = U.msh * V.msh [32]   |   r3 = r3 * r4      (r3 OW, r4 FU)
+        ** P = D + A         [32]   |   r5 = r3 + r7      (r3, r7 FU)
+        **
+        ** Return P(r3=r5) as Product.msw unsigned
+        **
+        ** Note: the implementation can be even shorter, the current
+        **       implementation is ensuring the overflow is avoided
+        **       by always adding 16 bits integer together.
+        */
+
+        .align  2
+	.global __ppe42_mulhwu
+        .type   __ppe42_mulhwu, @function
+
+__ppe42_mulhwu:
+
+        stwu    %r1, -0x28(%r1)                   // allocate stack frame
+
+        stvd    %d4, 0x08(%r1)                    // save off r4 & r5 in stack
+        stvd    %d6, 0x10(%r1)                    // save off r6 & r7 in stack
+        stvd    %d8, 0x18(%r1)                    // save off r8 & r9 in stack
+
+        mfcr    %r5                               // save off cr
+        stw     %r5, 0x20(%r1)                    // store cr in stack
+
+        li      %r5, 0                            // r5 = 0
+        cmpwibc 1, 2, %r3, 0, __ppe42_mulhwu_ret  // U=0 -> ret
+        cmpwibc 1, 2, %r4, 0, __ppe42_mulhwu_ret  // V=0 -> ret
+
+        extrwi  %r5, %r3, 16, 16
+        srwi    %r3, %r3, 16
+        extrwi  %r6, %r4, 16, 16
+        srwi    %r4, %r4, 16
+
+        mullhwu %r7, %r5, %r6
+        srwi    %r7, %r7, 16
+
+        mullhwu %r6, %r3, %r6
+        extrwi  %r8, %r6, 16, 16
+        srwi    %r6, %r6, 16
+        add     %r7, %r8, %r7
+
+        mullhwu %r5, %r5, %r4
+        extrwi  %r8, %r5, 16, 16
+        srwi    %r5, %r5, 16
+        add     %r7, %r8, %r7
+
+        srwi    %r7, %r7, 16
+        add     %r7, %r7, %r6
+        add     %r7, %r7, %r5
+
+        mullhwu %r3, %r3, %r4
+        add     %r5, %r3, %r7
+
+__ppe42_mulhwu_ret:
+
+        mr      %r3, %r5                          // put return value to r3
+
+        lwz     %r5, 0x20(%r1)                    // load cr from stack
+        mtcr0   %r5                               // restore cr
+
+        lvd     %d4, 0x08(%r1)                    // restore r4 & r5 from stack
+        lvd     %d6, 0x10(%r1)                    // restore r6 & r7 from stack
+        lvd     %d8, 0x18(%r1)                    // restore r8 & r9 from stack
+
+        lwz     %r1, 0(%r1)                       // restore stack pointer
+
+        blr
+
+        .size __ppe42_mulhwu, .-__ppe42_mulhwu
+
+
diff --git a/tools/PowerPCtoPPE/ppe42_mullw.S b/tools/PowerPCtoPPE/ppe42_mullw.S
new file mode 100644
index 00000000..ee56df16
--- /dev/null
+++ b/tools/PowerPCtoPPE/ppe42_mullw.S
@@ -0,0 +1,174 @@
+/// \file ppe42_mullw.S
+/// \brief PPC405 word multiplication instructions implemented by PPE ISA
+///
+/// This file includes implementation for the following PPC405 instructions
+///     mullw
+///
+/// Note: PPE ISA specific "fused compare and branch" instructions are used
+///
+/// Revision History:
+///     09-15-2014: Initial Version by daviddu
+///
+
+        .file "ppe42_mullw.S"
+        .section    ".text"
+
+        /*
+        ** Code comment notation:
+        **
+        ** msw = most-significant (high-order) word, i.e. bits 0..31
+        ** lsw = least-significant (low-order) word, i.e. bits 32..63
+        ** msh = most-significant (high-order) halfword, i.e. bits 0..15
+        ** lsh = least-significant (low-order) halfword, i.e. bits 16..63
+        ** LZ = Leading Zeroes
+        ** SD = Significant Digits
+        ** OW = Register is overwritten, previous value is lost,
+        **      correct if previous value is no longer needed.
+        ** FU = Register is not overwritten, but its value is no longer needed,
+        **      in another word, the register is "free for use".
+        **
+        ** PPE GPR Registers are: R0-R10, R13, R28-R31
+        ** Volatile Registers are: R0, R3-R10
+        ** Non-volatile registers are R28-R31
+        */
+
+        /*
+        ** Caling Convention
+        **
+        ** R2 and R13 are never saved or restored. In ABI or EABI application
+        ** these registers are constant. The other touched volatile registers
+        ** will be saved and restored by the subroutines. Note the caller
+        ** wont be saving those registers because these subroutines will be
+        ** instrumented into caller's body without compiler knowledge.
+        **
+        ** Note R3 is not saved and restored because it will be changed for
+        ** return value anyways, the p2p script will make sure to restore it.
+        ** Also CR is hanlded because of compare and branch, but XER/CTR/LR
+        ** are not hanlded because they are untouched by the instructions used.
+        **
+        ** Stack layout:
+        **
+        ** 0x00 -- R1, Dedicated for Stack Pointer
+        ** 0x04 -- slot reserved for LR
+        ** 0x08 -- R4, Volatile, Private
+        ** 0x0c -- R5, Volatile, Private
+        ** 0x10 -- R6, Volatile, Private
+        ** 0x14 -- R7, Volatile, Private
+        ** 0x18 -- R8, Volatile, Private
+        ** 0x1c -- R9, Volatile, Private
+        ** 0x20 -- CR, Condition Register
+        ** 0x24 --
+        **
+        ** 0x28 -- Stack Size, Must be 8-byte aligned
+        */
+
+        /*
+        ** Multiplication Procedures:
+        **
+        ** __ppe42_mulhwu(U,V)
+        ** __ppe42_mulhw(U,V)
+        ** __ppe42_mullw(U,V)
+        **
+        ** R3:R4      = Input parameter, multipliers: U, V.
+        ** R3         = Output parameter, either product.msh or product.lsh.
+        ** R5-R9      = Temporary registers
+        **
+        ** General Algorithm
+        **
+        ** Using PPC405 ISA instruction 'mullhw' to emulate
+        ** Note: U,V,A,B,C,D,Product are all 32-bit integers(with msh and lsh)
+        **
+        **             U.msh U.lsh
+        **  X          V.msh V.lsh
+        ** ------------------------
+        **             A.msh A.lsh
+        **       B.msh B.lsh
+        **       C.msh C.lsh
+        ** D.msh D.lsh
+        ** ------------------------
+        ** Product.msw Product.lsw
+        **
+        ** __ppe42_mulhwu: Return Product.msh (unsigned)
+        ** __ppe42_mulhw:  Return Product.msh (signed)
+        ** __ppe42_mullw:  Return Product.lsh
+        **
+        ** Precondition Check:
+        **
+        ** if( U == 0 || V == 0 ) return P=0;
+        */
+
+/*****************************************************************************/
+
+        /*
+        ** Multiply Low Word (__ppe42_mullw)
+        **
+        ** r5 = U[16:31] or U.lsh   | r5 = r3 & 0xffff
+        ** r3 = U[0:15]  or U.msh   | r3 = r3 >> 16     (r3 OW)
+        ** r6 = V[16:31] or V.lsh   | r6 = r4 & 0xffff
+        ** r4 = V[0:15]  or V.msh   | r4 = r4 >> 16     (r4 OW)
+        **
+        ** B = U.msh * V.lsh        | r3 = r3 * r6      (r3 OW)
+        ** B = B << 16              | r3 = r3 << 16
+        ** C = U.lsh * V.msh        | r4 = r5 * r4      (r4 OW)
+        ** C = C << 16              | r4 = r4 << 16
+        ** A = U.lsh * V.lsh        | r5 = r5 * r6      (r5 OW, r6 FU)
+        ** A = A + B                | r5 = r5 + r3      (r3 FU)
+        ** P = A + C                | r5 = r5 + r4      (r4 FU)
+        **
+        ** Return P(r3=r5) as Product.lsw
+        **
+        ** Note: there is no overflow case with this function
+        */
+
+        .align  2
+        .global __ppe42_mullw
+        .type   __ppe42_mullw, @function
+
+__ppe42_mullw:
+
+        stwu    %r1, -0x28(%r1)                   // allocate stack frame
+
+        stvd    %d4, 0x08(%r1)                    // save off r4 & r5 in stack
+        stvd    %d6, 0x10(%r1)                    // save off r6 & r7 in stack
+        stvd    %d8, 0x18(%r1)                    // save off r8 & r9 in stack
+
+        mfcr    %r5                               // save off cr
+        stw     %r5, 0x20(%r1)                    // store cr in stack
+
+        li      %r5, 0                            // r5 = 0
+        cmpwibc 1, 2, %r3, 0, __ppe42_mullw_ret   // U=0 -> ret
+        cmpwibc 1, 2, %r4, 0, __ppe42_mullw_ret   // V=0 -> ret
+
+        extrwi  %r5, %r3, 16, 16
+        srwi    %r3, %r3, 16
+        extrwi  %r6, %r4, 16, 16
+        srwi    %r4, %r4, 16
+
+        mullhwu %r3, %r3, %r6
+        slwi    %r3, %r3, 16
+
+        mullhwu %r4, %r5, %r4
+        slwi    %r4, %r4, 16
+
+        mullhwu %r5, %r5, %r6
+        add     %r5, %r5, %r3
+        add     %r5, %r5, %r4
+
+__ppe42_mullw_ret:
+
+        mr      %r3, %r5                          // put return value to r3
+
+        lwz     %r5, 0x20(%r1)                    // load cr from stack
+        mtcr0   %r5                               // restore cr
+
+        lvd     %d4, 0x08(%r1)                    // restore r4 & r5 from stack
+        lvd     %d6, 0x10(%r1)                    // restore r6 & r7 from stack
+        lvd     %d8, 0x18(%r1)                    // restore r8 & r9 from stack
+
+        lwz     %r1, 0(%r1)                       // restore stack pointer
+
+        blr
+
+        .size __ppe42_mullw, .-__ppe42_mullw
+
+
diff --git a/tools/ppetracepp/Makefile b/tools/ppetracepp/Makefile
new file mode 100644
index 00000000..41ad1cd2
--- /dev/null
+++ b/tools/ppetracepp/Makefile
@@ -0,0 +1,12 @@
+all: ppetracepp ppe2fsp
+
+ppetracepp: ppetracepp.C
+	g++ -m32 -O3 -w -g -I./ ppetracepp.C -o ppetracepp
+#	g++ -O3 -w -x c++ -fPIC -g -I./ ppetracepp.C -o ppetracepp
+
+ppe2fsp: ppe2fsp.c ppe2fsp_cmd.c
+	gcc -m32 -w -g -I./ -I../../pk/trace ppe2fsp.c ppe2fsp_cmd.c -o ppe2fsp
+
+clean:
+	rm ppetracepp ppe2fsp
+
diff --git a/tools/ppetracepp/jhash.h b/tools/ppetracepp/jhash.h
new file mode 100755
index 00000000..128ca9a7
--- /dev/null
+++ b/tools/ppetracepp/jhash.h
@@ -0,0 +1,143 @@
+#ifndef _LINUX_JHASH_H
+#define _LINUX_JHASH_H
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
+ * hash(), hash2(), hash3, and mix() are externally useful functions.
+ * Routines to test the hash are included if SELF_TEST is defined.
+ * You can use this free for any purpose.  It has no warranty.
+ *
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are surely my fault.  -DaveM
+ */
+
+/* NOTE: Arguments are modified. */
+#define __jhash_mix(a, b, c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+/* The golden ration: an arbitrary value */
+#define JHASH_GOLDEN_RATIO	0x9e3779b9
+
+/* The most generic version, hashes an arbitrary sequence
+ * of bytes.  No alignment or length assumptions are made about
+ * the input key.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c, len;
+	const u8 *k = (const u8*)key;
+
+	len = length;
+	a = b = JHASH_GOLDEN_RATIO;
+	c = initval;
+
+	while (len >= 12) {
+		a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
+		b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
+		c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
+
+		__jhash_mix(a,b,c);
+
+		k += 12;
+		len -= 12;
+	}
+
+	c += length;
+	switch (len) {
+	case 11: c += ((u32)k[10]<<24);
+	case 10: c += ((u32)k[9]<<16);
+	case 9 : c += ((u32)k[8]<<8);
+	case 8 : b += ((u32)k[7]<<24);
+	case 7 : b += ((u32)k[6]<<16);
+	case 6 : b += ((u32)k[5]<<8);
+	case 5 : b += k[4];
+	case 4 : a += ((u32)k[3]<<24);
+	case 3 : a += ((u32)k[2]<<16);
+	case 2 : a += ((u32)k[1]<<8);
+	case 1 : a += k[0];
+	};
+
+	__jhash_mix(a,b,c);
+
+	return c;
+}
+
+/* A special optimized version that handles 1 or more of u32s.
+ * The length parameter here is the number of u32s in the key.
+ */
+static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+	u32 a, b, c, len;
+
+	a = b = JHASH_GOLDEN_RATIO;
+	c = initval;
+	len = length;
+
+	while (len >= 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+		__jhash_mix(a, b, c);
+		k += 3; len -= 3;
+	}
+
+	c += length * 4;
+
+	switch (len) {
+	case 2 : b += k[1];
+	case 1 : a += k[0];
+	};
+
+	__jhash_mix(a,b,c);
+
+	return c;
+}
+
+
+/* A special ultra-optimized versions that knows they are hashing exactly
+ * 3, 2 or 1 word(s).
+ *
+ * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
+ *       done at the end is not done here.
+ */
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += JHASH_GOLDEN_RATIO;
+	b += JHASH_GOLDEN_RATIO;
+	c += initval;
+
+	__jhash_mix(a, b, c);
+
+	return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return jhash_3words(a, b, 0, initval);
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+	return jhash_3words(a, 0, 0, initval);
+}
+
+#endif /* _LINUX_JHASH_H */
diff --git a/tools/ppetracepp/ppe2fsp b/tools/ppetracepp/ppe2fsp
new file mode 100755
index 00000000..1f7b56f2
--- /dev/null
+++ b/tools/ppetracepp/ppe2fsp
diff --git a/tools/ppetracepp/ppe2fsp.c b/tools/ppetracepp/ppe2fsp.c
new file mode 100755
index 00000000..419a8d49
--- /dev/null
+++ b/tools/ppetracepp/ppe2fsp.c
@@ -0,0 +1,500 @@
+#include "pk_trace.h"
+#include "ppe2fsp.h"
+#include "trac_interface.h"
+#include <arpa/inet.h>
+#include <string.h>
+#include <stdint.h>
+
+#define     TRACE_BUF_VERSION   0x01     /*!< Trace buffer version            */
+#define     TRACE_FIELDTRACE    0x4654   /*!< Field Trace - "FT"              */
+#define     TRACE_FIELDBIN      0x4644   /*!< Binary Field Trace - "FD"       */
+
+#define TRAC_TIME_REAL   0 // upper 32 = seconds, lower 32 = nanoseconds
+#define TRAC_TIME_50MHZ  1
+#define TRAC_TIME_200MHZ 2
+#define TRAC_TIME_167MHZ 3 // 166666667Hz
+
+typedef struct
+{
+    trace_entry_stamp_t     stamp;
+    trace_entry_head_t      head;
+    union
+    {
+        uint8_t     data[PK_TRACE_MAX_BINARY + 1]; //add 1 byte for padding
+        uint32_t    parms[PK_TRACE_MAX_PARMS];
+    };
+    uint32_t                size;
+}largest_fsp_entry_t;
+
+typedef struct
+{
+    union
+    {
+        uint8_t     binary_data[PK_TRACE_MAX_BINARY + 1];
+        struct
+        {
+            uint8_t     rsvd[(PK_TRACE_MAX_BINARY + 1) - (PK_TRACE_MAX_PARMS * sizeof(uint32_t))];
+            uint32_t    parms[PK_TRACE_MAX_PARMS];
+        };
+    };
+    PkTraceEntryFooter footer;
+}LargestPpeEntry;
+
+//convert a ppe timestamp to an fsp trace timestamp
+uint64_t ppe2fsp_time(uint64_t ppe_time, uint32_t hz)
+{
+    uint32_t    seconds;
+    uint32_t    remainder;
+    uint32_t    nseconds;
+
+    //convert from ppe ticks to seconds and nanoseconds
+    seconds = ppe_time / hz;
+    remainder = ppe_time - (((uint64_t)seconds) * hz);
+    nseconds = (((uint64_t)remainder) * 1000000000) / hz;
+    return (((uint64_t)seconds) << 32) | nseconds;
+}
+
+//Writes an fsp trace entry to the fsp trace buffer
+fsp_put_entry(trace_buf_head_t* tb, largest_fsp_entry_t* fte, size_t entry_size, uint32_t bytes_left)
+{
+    char*       buffer = ((char*)tb) + sizeof(trace_buf_head_t);
+    char*       tb_start;
+    char*       fte_start;
+    uint32_t    copy_bytes;
+
+    if(entry_size <= bytes_left)
+    {
+        tb_start = buffer + bytes_left - entry_size;
+        fte_start = (char*)fte;
+        copy_bytes = entry_size;
+    }
+    else
+    {
+        tb_start = buffer;
+        fte_start = ((char*)fte) + (entry_size - bytes_left);
+        copy_bytes = bytes_left;
+    }
+   
+    memcpy(tb_start, fte_start, copy_bytes);
+}
+
+
+//convert a ppe trace entry to an fsp trace entry
+size_t pte2fte(PkTraceBuffer* ptb,
+               LargestPpeEntry* pte,
+               size_t pte_size,
+               largest_fsp_entry_t* fte,
+               uint64_t ppe_time64)
+{
+    size_t              entry_size;
+    PkTraceGeneric*     pte_footer = &pte->footer.generic;
+    uint32_t            format;
+    uint32_t            hash32;
+    uint32_t            hash32_partial;
+    uint32_t*           parm_start;
+    uint32_t            parm_bytes;
+    uint64_t            fsp_time64;
+
+    //convert the ppe trace time to an fsp trace time
+    fsp_time64 = ppe2fsp_time(ppe_time64, ntohl(ptb->hz));
+
+    //fill in the 64 bit timestamp
+    fte->stamp.tbh = htonl((uint32_t)(fsp_time64 >> 32));
+    fte->stamp.tbl = htonl((uint32_t)(fsp_time64 & 0x00000000ffffffffull));
+
+    //use the ppe instance id as the thread id.
+    fte->stamp.tid = htonl((uint32_t)ntohs(ptb->instance_id));
+
+    //merge the hash prefix and the string_id fields together for a 32 bit hash value
+    hash32 = ((uint32_t)ntohs(ptb->hash_prefix)) << 16;
+    hash32 |= pte_footer->string_id;
+    fte->head.hash = htonl(hash32);
+
+    //generate the 32bit hash value for a partial trace entry in case it's needed
+    hash32_partial = ((uint32_t)ntohs(ptb->hash_prefix)) << 16;
+    hash32_partial |= ntohs(ptb->partial_trace_hash);
+
+    //set the line number to 1
+    fte->head.line = htonl(1);
+
+    //determine the FSP trace format
+    format = PK_GET_TRACE_FORMAT(pte_footer->time_format.word32);
+    if(format == PK_TRACE_FORMAT_BINARY)
+    {
+        fte->head.tag = htons(TRACE_FIELDBIN);
+    }
+    else
+    {
+        fte->head.tag = htons(TRACE_FIELDTRACE);
+    }
+
+    parm_start = (uint32_t*)(((char*)pte) + (sizeof(LargestPpeEntry) - pte_size));
+    
+    //fill in the parameters/binary data and size at the end
+    switch(format)
+    {
+
+        case PK_TRACE_FORMAT_TINY:
+            //one or 0 parameters
+            entry_size = sizeof(trace_entry_stamp_t) +
+                         sizeof(trace_entry_head_t) +
+                         sizeof(uint32_t);
+            fte->parms[0] = htonl((uint32_t)(pte_footer->parm16));
+            fte->head.length = htons(sizeof(uint32_t));
+            parm_bytes = 0;
+            break;
+
+        case PK_TRACE_FORMAT_BIG:
+            //1 - 4 parameters
+            //
+            //If the trace entry data is incomplete (not all parm data
+            //had been written at the time the trace was captured) then
+            //we will write a trace to the fsp buffer that says
+            //"PARTIAL TRACE ENTRY.  HASH_ID = %d"
+            if(pte_footer->complete)
+            {
+                parm_bytes = pte_footer->bytes_or_parms_count * sizeof(uint32_t);
+                fte->head.length = htons(parm_bytes + sizeof(uint32_t));
+                entry_size = sizeof(trace_entry_stamp_t) +
+                             sizeof(trace_entry_head_t) + 
+                             parm_bytes + sizeof(uint32_t); 
+            }
+            else
+            {
+                parm_bytes = 0;
+                entry_size = sizeof(trace_entry_stamp_t) + 
+                             sizeof(trace_entry_head_t) +
+                             sizeof(uint32_t);
+                fte->parms[0] = fte->head.hash; //already corrected for endianess
+                fte->head.hash = htonl(hash32_partial);
+                fte->head.length = htons(sizeof(uint32_t));
+            }
+            break;
+
+        case PK_TRACE_FORMAT_BINARY:
+            //If the trace entry data is incomplete (not all parm data
+            //had been written at the time the trace was captured) then
+            //we will write a trace to the fsp buffer that says
+            //"PARTIAL TRACE ENTRY.  HASH_ID = %d"
+            if(pte_footer->complete)
+            {
+                parm_bytes = pte_footer->bytes_or_parms_count;
+                fte->head.length = htons((uint16_t)parm_bytes);
+                entry_size = sizeof(trace_entry_stamp_t) +
+                             sizeof(trace_entry_head_t) + 
+                             parm_bytes;
+
+                //pad to 4 byte boundary
+                entry_size = (entry_size + 3) & ~3;
+            }
+            else
+            {
+                parm_bytes = 0;
+                entry_size = sizeof(trace_entry_stamp_t) + 
+                             sizeof(trace_entry_head_t) +
+                             sizeof(uint32_t);
+                fte->parms[0] = fte->head.hash;
+                fte->head.hash = htonl(hash32_partial);
+                fte->head.length = htons(sizeof(uint32_t));
+                fte->head.tag = htons(TRACE_FIELDTRACE);
+            }
+            break;
+            
+
+        default:
+            entry_size = 0;
+            parm_bytes = 0;
+            break;
+    }
+
+    //copy parameter bytes to the fsp entry if necessary
+    if(parm_bytes)
+    {
+        memcpy(fte->data, parm_start, parm_bytes);
+    }
+
+    //add the entry size to the end
+    if(entry_size)
+    {
+        uint32_t new_entry_size = entry_size + sizeof(uint32_t);
+        *((uint32_t*)(((char*)fte) + entry_size)) = htonl(new_entry_size);
+        entry_size = new_entry_size;
+    }
+
+    return entry_size;
+}
+
+//retrieve a ppe trace entry from a ppe trace buffer
+size_t ppe_get_entry(PkTraceBuffer* tb, uint32_t offset, LargestPpeEntry* pte)
+{
+    uint32_t            mask = ntohs(tb->size) - 1;
+    PkTraceEntryFooter* footer;
+    size_t              entry_size;
+    size_t              parm_size;
+    char*               dest = (char*)pte;
+    uint32_t            format;
+    uint32_t            start_index;
+    uint32_t            bytes_left;
+    uint32_t            bytes_to_copy;
+
+    //Find the footer in the circular buffer
+    footer = (PkTraceEntryFooter*)(&tb->cb[(offset - sizeof(PkTraceEntryFooter)) & mask]);
+
+    //always correct endianess for the time and string id words
+    pte->footer.generic.time_format.word32 = ntohl(footer->generic.time_format.word32);
+    pte->footer.generic.string_id = ntohs(footer->generic.string_id);
+
+    //only need to byte swap the parm16 value if this is a tiny format
+    pte->footer.generic.parm16 = footer->generic.parm16; 
+
+    //use footer data to determine the length of the binary data or parameters
+    format = PK_GET_TRACE_FORMAT(pte->footer.generic.time_format.word32);
+    switch(format)
+    {
+        case PK_TRACE_FORMAT_TINY:
+            pte->footer.generic.parm16 = ntohs(pte->footer.generic.parm16);
+            parm_size = 0;
+            entry_size = sizeof(PkTraceEntryFooter);
+            break;
+
+        case PK_TRACE_FORMAT_BIG:
+            parm_size = pte->footer.generic.bytes_or_parms_count * sizeof(uint32_t);
+            entry_size = sizeof(PkTraceEntryFooter);
+            break;
+
+        case PK_TRACE_FORMAT_BINARY:
+            parm_size = pte->footer.generic.bytes_or_parms_count;
+            entry_size = sizeof(PkTraceEntryFooter); 
+            break;
+
+        default:
+            entry_size = 0;
+            parm_size = 0;
+            break;
+    }
+
+    //pad to 8 byte boundary
+    parm_size = (parm_size + 7) & ~0x00000007ul;
+
+    //add the parameter size to the total entry size
+    entry_size += parm_size;
+
+    //copy the entry from the circular buffer to pte
+    start_index = (offset - entry_size) & mask;
+    bytes_left = ntohs(tb->size) - start_index;
+
+    //only copy up to the end of the circular buffer
+    if(parm_size < bytes_left)
+    {
+        bytes_to_copy = parm_size;
+    }
+    else
+    {
+        bytes_to_copy = bytes_left;
+    }
+
+    dest += sizeof(LargestPpeEntry) - entry_size;
+    memcpy(dest, &tb->cb[start_index], bytes_to_copy);
+
+    //now copy the rest of the data starting from the beginning of the
+    //circular buffer.
+    if(bytes_to_copy < parm_size)
+    {
+        memcpy(dest + bytes_to_copy, tb->cb, parm_size - bytes_to_copy);
+    }
+
+    //return the size of the entry 
+    return entry_size;
+}
+
+//convert a ppe trace buffer to an fsp trace buffer
+int ppe2fsp(void* in, unsigned long in_size, void* out, unsigned long* io_size)
+{
+    PkTraceBuffer*              ptb = (PkTraceBuffer*)in;
+    trace_buf_head_t*           ftb = (trace_buf_head_t*)out;
+    uint32_t                    ppe_bytes_left;
+    uint32_t                    fsp_bytes_left;
+    int                         rc = 0;
+    uint32_t                    ptb_offset;
+    PkTraceEntryFooter*         ptb_te;
+    uint64_t                    ppe_time64;
+    uint32_t                    fte_size, pte_size;
+    uint32_t                    fsp_te_count = 0;
+    uint32_t                    time_diff32, prev_time32, new_time32;
+    PkTraceGeneric*             pte_footer;
+    largest_fsp_entry_t         fte;
+    LargestPpeEntry             pte;
+
+    do
+    {
+        if(!ptb || !ftb || !io_size)
+        {
+            rc = P2F_NULL_POINTER;
+            break;
+        }
+
+        if(ntohs(ptb->version) != PK_TRACE_VERSION)
+        {
+            rc = P2F_INVALID_VERSION;
+            break;
+        }
+
+        //check that the input buffer is large enough to have a ppe trace buffer
+        if(in_size < (((uint32_t)(&ptb->cb[0])) - (uint32_t)(ptb)))
+        {
+            rc = P2F_INPUT_BUFFER_TOO_SMALL;
+            break;
+        }
+
+        //initialize some locals
+        fsp_bytes_left = *io_size - sizeof(trace_buf_head_t);
+        ppe_bytes_left = ntohs(ptb->size);
+        ptb_offset = ntohl(ptb->state.offset);
+
+        //make sure the ppe buffer size is a power of two
+        if((ppe_bytes_left - 1) & ppe_bytes_left)
+        {
+            //size is not a power of two
+            rc = P2F_INVALID_INPUT_SIZE;
+            break;
+        }
+
+        //The ppe bytes field should always be a multiple of 8
+        if(ptb_offset & 0x7)
+        {
+            rc = P2F_INVALID_PPE_OFFSET;
+            break;
+        }
+
+        //make sure there is enough room for the fsp header
+        if(*io_size < sizeof(trace_buf_head_t))
+        {
+            rc = P2F_OUTPUT_BUFFER_TOO_SMALL;
+            break;
+        }
+
+
+        //initialize the fsp header
+        ftb->ver = TRACE_BUF_VERSION;
+        ftb->hdr_len = sizeof(trace_buf_head_t);
+        ftb->time_flg = TRAC_TIME_REAL;
+        ftb->endian_flg = 'B'; //big endian
+        memcpy(ftb->comp, ptb->image_str, sizeof(ftb->comp));
+        ftb->times_wrap = htonl(1);
+        ftb->size = htonl(sizeof(trace_buf_head_t) + sizeof(uint32_t));
+        ftb->next_free = htonl(sizeof(trace_buf_head_t));
+        ftb->extracted = htonl(0);
+        ftb->te_count = htonl(0);
+
+        //find the latest timestamp so that we can work back from there
+        ppe_time64 = ((uint64_t)(ntohl(ptb->state.tbu32) & 0xefffffff)) << 32;
+        pte_size = ppe_get_entry(ptb, ptb_offset, &pte);
+        prev_time32 = PK_GET_TRACE_TIME(pte.footer.generic.time_format.word32);
+        ppe_time64 |= prev_time32;
+
+        //process all of the input bytes one trace entry at a time
+        //from newest to oldest (backwards) until we run out of input bytes or
+        //we run out of output space.
+        while(1)
+        {
+            //check if we have enough data for a ppe footer
+            if(ppe_bytes_left < sizeof(PkTraceEntryFooter))
+            {
+                break;
+            }
+
+            //get the next ppe entry
+            pte_size = ppe_get_entry(ptb, ptb_offset, &pte);
+
+            //Stop if there are no more entries to retrieve from the ppe trace buffer
+            if(!pte_size)
+            {
+                break;
+            }
+            pte_footer = &pte.footer.generic;
+
+            //mark the entry as incomplete if we didn't have enough data
+            //for the entire entry
+            if(pte_size > ppe_bytes_left)
+            {
+                pte_footer->complete = 0;
+                ppe_bytes_left = 0;
+            }
+            else
+            {
+                ppe_bytes_left -= pte_size;
+                ptb_offset -= pte_size;
+            }
+
+            //Calculate the 64 bit timestamp for this entry....
+            //On PPE, getting the timestamp is not done atomically with writing
+            //the entry to the buffer.  This means that an entry with an older
+            //timestamp could possibly be added to the buffer after an entry
+            //with a newer timestamp.  Detect this condition by checking if the
+            //time difference is bigger than the max difference.  The max
+            //difference is enforced by the PPE having a trace added on a
+            //shorter time boundary (using a timer).
+            new_time32 = PK_GET_TRACE_TIME(pte_footer->time_format.word32);
+            time_diff32 = prev_time32 - new_time32;
+
+            if(time_diff32 > ntohl(ptb->max_time_change))
+            {
+                time_diff32 = new_time32 - prev_time32;
+                ppe_time64 += time_diff32;
+            }
+            else
+            {
+                ppe_time64 -= time_diff32;
+            }
+
+            //save off the lower 32bit timestamp for the next iteration
+            prev_time32 = new_time32;
+
+            //convert the ppe trace entry to an fsp trace entry
+            fte_size = pte2fte(ptb, &pte, pte_size, &fte, ppe_time64);
+
+            //fit as much of the entry into the fsp trace buffer as possible
+            fsp_put_entry(ftb, &fte, fte_size, fsp_bytes_left);
+
+            //update the fsp trace entry count
+            fsp_te_count++;
+
+            //stop if there is no more room left in the fsp trace buffer
+            if(fte_size >= fsp_bytes_left)
+            {
+                fsp_bytes_left = 0;
+                ftb->times_wrap = htonl(1);
+                break;
+            }
+            else
+            {
+                fsp_bytes_left -= fte_size;
+            }
+        }//while(1)
+
+
+        //shift the trace data up if there is space to do so
+        if(fsp_bytes_left)
+        {
+            char* dest = ((char*)ftb) + sizeof(trace_buf_head_t);
+            char* src = dest + fsp_bytes_left;
+            size_t data_size = *io_size - sizeof(trace_buf_head_t) - fsp_bytes_left;
+            memmove(dest, src, data_size);
+        }
+
+        //update the fsp header to reflect the true size and entry count
+        ftb->te_count = htonl(fsp_te_count);
+
+        //inform the caller of how many bytes were actually used
+        *io_size -= fsp_bytes_left;
+
+        //shrink the size field to what we actually ended up using
+        ftb->size = htonl(*io_size);
+
+    }while(0);
+
+    return rc;
+}
+
+
+
diff --git a/tools/ppetracepp/ppe2fsp.h b/tools/ppetracepp/ppe2fsp.h
new file mode 100644
index 00000000..04eee616
--- /dev/null
+++ b/tools/ppetracepp/ppe2fsp.h
@@ -0,0 +1,10 @@
+
+
+#define P2F_NULL_POINTER                1
+#define P2F_INVALID_INPUT_SIZE          2
+#define P2F_INVALID_PPE_OFFSET          3
+#define P2F_OUTPUT_BUFFER_TOO_SMALL     4
+#define P2F_INPUT_BUFFER_TOO_SMALL      5
+#define P2F_INVALID_VERSION             6
+
+int ppe2fsp(void* in, unsigned long in_size, void* out, unsigned long* io_size);
diff --git a/tools/ppetracepp/ppe2fsp_cmd.c b/tools/ppetracepp/ppe2fsp_cmd.c
new file mode 100644
index 00000000..c99734d8
--- /dev/null
+++ b/tools/ppetracepp/ppe2fsp_cmd.c
@@ -0,0 +1,115 @@
+#include <stdio.h>
+#include "ppe2fsp.h"
+#include "pk_trace.h"
+
+#define MAX_INPUT_SIZE 0x2040       //8k
+#define MAX_OUTPUT_SIZE (4 * MAX_INPUT_SIZE) 
+
+char* inbuf[MAX_INPUT_SIZE];
+char* outbuf[MAX_OUTPUT_SIZE];
+;
+int main(int argc, char** argv)
+{
+    FILE* in;
+    FILE* out;
+    size_t  input_size;
+    size_t  output_size;
+    size_t  bytes_written;
+    int rc = -1;
+
+    do
+    {
+        if(argc > 3)
+        {
+            fprintf(stderr, "Usage: %s [input file] [output file]\n", argv[0]);
+        }
+
+        if(argc < 3)
+        {
+            out = stdout;
+        }
+        else
+        {
+            //open the output file for writing
+            out = fopen(argv[2], "w");
+            if(!out)
+            {
+                perror("failed to open file for writing");
+                break;
+            }
+        }
+    
+        if(argc < 2)
+        {
+            in = stdin;
+        }
+        else
+        {
+            //open the input file for reading
+            in = fopen(argv[1], "r");
+            if(!in)
+            {
+                perror("failed to open file for reading");
+                break;
+            }
+        }
+    
+        //read the input stream until we reach EOF or the max size
+        input_size = fread(inbuf, 1, MAX_INPUT_SIZE, in);
+        if(!feof(in))
+        {
+            if(ferror(in))
+            {
+                perror("failed to read input file");
+                break;
+            }
+            else
+            {
+                fprintf(stderr, "Input stream exceeds max size of %d bytes. Exiting.\n", MAX_INPUT_SIZE);
+                break;
+            }
+        }
+
+        output_size = MAX_OUTPUT_SIZE;
+
+        //Actual size of output buffer will be set upon successful completion
+        rc = ppe2fsp(inbuf, input_size, outbuf, &output_size);
+        if(rc)
+        {
+            fprintf(stderr, "Failed converting ppe trace to fsp trace. rc = %d\n", rc);
+            if(rc == P2F_INVALID_VERSION)
+            {
+                fprintf(stderr, "PPE trace buffer must be version %d.\n", PK_TRACE_VERSION);
+            }
+            break;
+        }
+
+        rc = -1;
+        //operation was successful.  Write out the fsp trace data
+        bytes_written = fwrite(outbuf, 1, output_size, out);
+        if(bytes_written != output_size)
+        {
+            if(ferror(out))
+            {
+                perror("Failed to write output stream");
+                break;
+            }
+            fprintf(stderr, "Failure: Only able to write %d of %d bytes to output stream\n", bytes_written, output_size);
+            break;
+        }
+
+        fclose(in);
+        fclose(out);
+        fclose(stderr);
+
+        rc = 0;
+    }while(0);
+
+    return rc;
+}
+
+    
+    
+    
+    
+
diff --git a/tools/ppetracepp/ppetracepp b/tools/ppetracepp/ppetracepp
new file mode 100755
index 00000000..52269147
--- /dev/null
+++ b/tools/ppetracepp/ppetracepp
diff --git a/tools/ppetracepp/ppetracepp.C b/tools/ppetracepp/ppetracepp.C
new file mode 100755
index 00000000..c5378fe7
--- /dev/null
+++ b/tools/ppetracepp/ppetracepp.C
@@ -0,0 +1,922 @@
+
+/*
+# *** ppetracepp - a fsp/common Linux trace pre processor
+# this one replaces the trace strings by the corresponding hash value
+# (i.e. the complete call to trace_adal_hash is replaced)
+
+# *** Usage
+#
+# prepend compiler call with the call of this pre processor, i.e if you have
+#      $(CC) $(CFLAGS) -o $@ $<
+# in your Makefile change it to this:
+#      ppetracepp $(CC) $(CFLAGS) -o $@ $<
+# ppetracepp will use "$(CC) -E" to call the C pre processor "cpp".
+# you can set a env var "REALCPP" to the name of a program to select
+# a different programm as cpp
+#
+# ppetracepp creates a file "$target.hash" with the trace strings and the hash values.
+#
+# to enable debug mode set envvar PPETRACEPPDEBUG to 1 or give '-d' as first arg
+
+# *** Change History
+#
+# 2003-02-26  RBa  created from scratch
+# 2003-02-28  RBa  add C++ support (C++ interface uses own type for the hash)
+# 2003-05-28  RBa  if cc should link instead of compile just call compiler
+# 2003-07-11  AGe  Change search alg. slightly and put just format back
+# 2003-07-25  RBa  just call gcc if called to link instead to compile
+#                  eat argument for option -x
+# 2003-11-26  RBa  fix c/c++ algo: compile as c++ if realcc=*g++
+# 2004-02-02  RBa  remove explicit test whether source file is readable
+#                  it is obsolete and might lead to an error if afs is used
+# 2004-02-13  RBa  add support for dependency generation (-MD/-MG, -MF)
+#                  don't prepend './' to object filename
+# 2006-04-19  RBa  rewrite trace_adal_write_all support, handle C and C++ the same
+# 2006-05-24  RBa  fix handling of missing -o ; add TRAC_PPVER for macro/API version
+# 2006-09-15  RBa  add handling of \" in trace format strings ; reduce non-error output
+#                  put object file in current dir if no -o given
+# 2007-03-22  RBa  handle more gcc options (-i) ; protect " in call to shell
+#                  store output of cpp as "unmodified" output for debug
+#                  only write string/hash file if strings found
+# 2012-09-24 hlava Rewritten as C program for better build performance (was perl)
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <string>
+#include <time.h>
+#include <fcntl.h>
+#include <map>
+#include <vector>
+
+#include <sys/types.h>
+typedef u_int32_t u32 ;
+typedef u_int8_t u8 ;
+#include <jhash.h>
+
+using namespace std;
+
+static string version = "2.0";
+static string macro_version = "1";
+
+static bool debug = false;
+#define dprintf(format, ...) if (debug) { printf(format, ##__VA_ARGS__); fflush(stdout); }
+static map<string,string> hashtab;
+static string hashtype;
+static string hashtype_suffix;
+
+static string tmp;
+static string cmd;
+static FILE* CPP = NULL; // pipe from preprocessor
+static FILE* CC = NULL;  // pipe to compiler
+static FILE* DEBUG = NULL;
+static FILE* DEBUGIN = NULL;
+
+//*****************************************************************************
+// replace_substr
+//*****************************************************************************
+void replace_substr(std::string& str, const std::string& oldStr, const std::string& newStr)
+{
+	size_t pos = 0;
+	while((pos = str.find(oldStr, pos)) != std::string::npos)
+	{
+		str.replace(pos, oldStr.length(), newStr);
+		pos += newStr.length();
+	}
+
+}
+
+//*****************************************************************************
+// fileparse
+//*****************************************************************************
+void fileparse(const string& in_str, string& name, string& dir, string& suff)
+{
+	string str(in_str);
+	size_t pos;
+	name = "";
+	dir = "";
+	suff = "";
+	pos = str.find_last_of('.');
+	if (pos != string::npos)
+	{
+		suff = str.substr(pos);
+		str = str.substr(0, pos);
+	}
+	pos = str.find_last_of('/');
+	if (pos != string::npos)
+	{
+		name = str.substr(pos+1);
+		str = str.substr(0, pos+1);
+	}
+	dir = str;
+}
+
+static const size_t TRACE_ADAL_HASH_LEN = 14;
+//*****************************************************************************
+// chop_up_line
+//*****************************************************************************
+bool chop_up_line(string& in_line, string& prefix, string& strings, string& salt, string& suffix)
+{
+	// First see if this line matches the pattern we're looking for
+	// Since this will return false 95%+ of the time this function it called, we do it
+	// before doing any other init for performance reasons.
+	size_t pos = in_line.find("trace_adal_hash");
+	if (pos == string::npos) { return(false); }
+
+	// trace_adal_hash ( "..." ".." "..." , 2 )
+	// regex: PREFIX 'trace_adal_hash' space '(' space STRINGS  space ',' space NUMBER space ')' SUFFIX
+	// STRINGS:  '"' .* '"' space? +
+
+	// Original perl magic incantation:
+	//  	while($line =~ m/^(.*?)trace_adal_hash\s*\(\s*((".*?(?<!\\)"\s*)+),\s*(-?\d+)\s*\)(.*)$/) {
+	//        	($prefix, $strings, $salt, $suffix) = ($1, $2, $4, $5);
+	//
+	// Decrypting the magic pattern matching...
+	// (.*?)            => $1 = everything up to the word "trace_adal_hash"
+	// trace_adal_hash  = delimiter
+	// \s*\(\s*         = delimiter = <0-n whitespace chars>, left paren, <0-n whitespace chars>
+	// ((".*?(?<!\\)"\s*)+) => $2 = double-quote, some chars up to last closing double-quote ($3 used for nested regex)
+	// ,\s*             = delimiter = comma followed by some whitespace
+	// (-?\d+)\s*\)(.*) => $4 and $5
+	// $/)             = end of the line input string
+	string line(in_line);
+	prefix = "";
+	strings = "";
+	salt = "";
+	suffix = "";
+	size_t pos1;
+	size_t pos2;
+	size_t pos3;
+
+	pos1 = pos + 15; // pos1 = after "trace_adal_hash"
+	pos2 = line.find("(", pos1);
+	if (pos2 == string::npos) { return(false); }
+	++pos2;
+	pos3 = line.find("\"", pos2);
+	if (pos3 == string::npos) { return(false); }
+	dprintf("--------\nchop_up_line: Passed basic checks. line= %s\n", line.c_str());
+	dprintf("pos1=%d, pos2=%d, pos3=%d\n", pos1, pos2, pos3);
+	if ((pos1 != (pos2-1)) && (line.find_first_not_of(" \t", pos1, (pos2-pos1)+1) != string::npos)) { return(false); } //non-whitespace?
+	if ((pos2 != pos3) && (line.find_first_not_of(" \t", pos2, (pos3-pos2)) != string::npos)) { return(false); } //non-whitespace?
+
+	// Get the prefix data
+	dprintf(">chop_up_line(\"%s\",...)\n", line.c_str());
+	prefix = line.substr(0, pos);
+	line = line.substr(pos + TRACE_ADAL_HASH_LEN);
+	dprintf("    prefix=\"%s\"\n", prefix.c_str());
+
+	// Get the strings and join/fix them: Store all strings between paired double-quotes up to the
+	// first comma not inside a string
+	pos = line.find_first_of('(');
+	if (pos == string::npos) { return(false); }
+	line = line.substr(pos + 1);
+	strings = "";
+	while(!line.empty())
+	{
+		pos = line.find_first_of(',');
+		pos1 = line.find_first_of('"');
+		if ((pos1 == string::npos) || ((pos != string::npos) && (pos < pos1))) { break; } // found comma before next quote
+		pos2 = line.find_first_of('"', (pos1+1));
+		if (pos2 == string::npos) { return(false); } // unbalanced quotes!
+		while(line[pos2-1] == '\\') // skip escaped quotes in the string (they're not the ending quote)
+		{
+			pos2 = line.find_first_of('"', (pos2+1));
+			if (pos2 == string::npos) { return(false); } // unbalanced quotes!
+		}
+		if (!strings.empty()) { strings += " "; }
+		strings += line.substr(pos1, (pos2-pos1)+1);
+		line = line.substr(pos2+1);
+	}
+	replace_substr(strings, "\" \"", "");
+	replace_substr(strings, "\\\"", "ESCAPEDQUOTE");
+	replace_substr(strings, "\"", "");
+	replace_substr(strings, "ESCAPEDQUOTE", "\"");
+	// Remove trailing whitespace                           ah20130717
+	pos = strings.find_last_not_of(" \t\n");
+	if ((pos != string::npos) && (pos < (strings.length()-1)))
+	{
+		strings = strings.substr(0, pos+1);				
+	}
+
+	dprintf("    strings>%s<\n", strings.c_str());
+
+	// Get the salt
+	pos = line.find(",");
+	if (pos != string::npos) { line = line.substr(pos+1); }
+	pos = line.find_first_of(')');
+	if (pos == string::npos) { return(false); }
+	salt = line.substr(0, pos);
+	line = line.substr(pos+1);
+	//dprintf("    salt=\"%s\"\n", salt.c_str());
+	pos = salt.find_first_not_of(" \t\n");
+	if (pos == string::npos) { return(false); }
+	salt = salt.substr(pos);
+	pos = salt.find_last_not_of(" \t\n");
+	if (pos == string::npos) { return(false); }
+	salt = salt.substr(0, pos+1);
+	dprintf("    salt=\"%s\"\n", salt.c_str());
+
+	// Get the suffix (i.e. the rest)
+	suffix = line;
+	if (suffix[suffix.length()-1] == '\n') { suffix = suffix.substr(0, suffix.length()-1); }
+	dprintf("    suffix=\"%s\"\n<chop_up_line() returning true\n", suffix.c_str());
+
+	return(true);
+}
+
+//*****************************************************************************
+// get_format_string
+//*****************************************************************************
+int get_format_string(const string& in_str, string& format)
+{
+	int format_salt = 0;
+	size_t pos;
+	size_t pos_end;
+	string str(in_str);
+	// (@format_param) = ($strings =~ /(%[#0\- +'I]*\d*(?:\.\d*)?[hlLqjzt]*[diouxXeEfFgGaAcsCSpn])/g);
+	// $format = join(',', @format_param);
+	// Decrypting the regular expression magic...
+	// (%[#0\- +'I]*\d*(?:\.\d*)?[hlLqjzt]*[diouxXeEfFgGaAcsCSpn])
+
+	format = "";
+	while(!str.empty())
+	{
+		pos = str.find("%");		
+		if (pos == string::npos) { break; }
+		if (pos == (str.length()-1)) { break; } // last char in string?  just skip it
+		if (str[pos+1] == '%') // double percent sign?  just skip first one
+		{
+			str = str.substr(pos+1);
+			continue;
+		}
+		pos_end = str.find_first_of("cdieEfgGosuxXpn", pos); // find formatting specifier
+		if (pos_end == string::npos)
+		{
+			 fprintf(stderr, "ERROR: ppetracepp could not parse trace formatting string \"%s\" in \"%s\"\n", str.c_str(), in_str.c_str());
+			 break;
+		}
+
+		if (!format.empty())
+			format += ",";
+		format += str.substr(pos, (pos_end-pos)+1);
+		++format_salt;
+
+		str = str.substr(pos_end+1);
+	}
+	// Correct for escaped percent signs
+	string temp_str(in_str);
+	while((pos = temp_str.find("%%")) != string::npos)
+	{
+		if (pos < (temp_str.length()-2)) // Not the last thing in the string?
+		{
+			dprintf("    decrementing salt value %d\n", format_salt);
+			--format_salt;
+			temp_str = temp_str.substr(pos+2);
+		}
+		else
+		{
+			temp_str = "";
+		}
+	}
+
+	return(format_salt);
+}
+
+#define BUF_SIZE 40
+//*****************************************************************************
+// get_hash
+//*****************************************************************************
+void get_hash(const string& i_str, const unsigned int salt_num, string& hash32, string& hash16)
+{
+	string str(i_str);
+	unsigned int hash_num = 0;
+	char buf[BUF_SIZE];
+
+	hash16 = "";
+    hash32 = "";
+	dprintf(">get_hash(\"%s\",%d)\n", str.c_str(), salt_num);
+
+	// Call jhash function to get the hash value
+	hash_num = jhash((void*)str.c_str(), str.length(), salt_num);
+	dprintf("jhash() returned: %u\n", hash_num);
+	sprintf(buf, "%u", hash_num & 0x0000ffff);
+	hash16 = buf;
+	sprintf(buf, "%u", ((salt_num << 16) | (hash_num & 0x0000ffff)));
+	hash32 = buf;
+
+	// validate the hash value
+    size_t pos = hash32.find_first_not_of("0123456789");
+    if (pos != string::npos)
+    {
+		fprintf(stderr, "trexhash error: %s\n", hash32.c_str());
+		fprintf(stderr, "for call <<%s>>\n", cmd.c_str());
+		exit(1);
+	}
+
+//removing this since it doesn't seem to have any affect on the output
+#if 0 
+	// If hash is empty, use the sum of the ord values in the original string
+	if ((hash == "")||(hash == "0"))
+	{
+		unsigned int len = str.length();
+		unsigned int hash_num = 0;
+		//unsigned char conv_buf[2] = { '\0', '\0' };
+		u_int8_t conv_num;
+		for (unsigned int i=0; i < len; ++i)
+		{
+			//conv_buf[0] = str[i];
+			conv_num = (u_int8_t)str[i];
+			hash_num += (unsigned int)conv_num;
+		}
+	}
+#endif
+
+	dprintf("<get_hash(\"%s\",%d) returned hash: %s\n", str.c_str(), salt_num, hash32.c_str());
+}
+
+//*****************************************************************************
+// parse_line
+//*****************************************************************************
+void parse_line(map<string,string>& rhash, string& line, string& out_line)
+{
+	// NOTE: "line" arg may get modified by this function!  Caller must not assume it's unchanged.
+	string format;
+	string prefix;
+	string strings;
+	string tmp;
+	string salt;
+	string hash16;
+	string hash32;
+	int salt_num;
+	int format_salt;
+	string suffix;
+	string write_all_suffix;
+	size_t pos;
+
+	out_line = "";
+	// trace_adal_hash ( "..." ".." "..." , 2 )
+	// regex: PREFIX 'trace_adal_hash' space '(' space STRINGS  space ',' space NUMBER space ')' SUFFIX
+	// STRINGS:  '"' .* '"' space? +
+	//while($line =~ m/^(.*?)trace_adal_hash\s*\(\s*((".*?(?<!\\)"\s*)+),\s*(-?\d+)\s*\)(.*)$/) {
+	// Attempt to approximate the above amazing perl regex...
+	while( chop_up_line(line, prefix, strings, salt, suffix) )
+	{
+		//dprintf("\n\nprefix = %s\nstrings = %s\nsalt = %s\nsuffix = %s\n",
+		//	prefix.c_str(), strings.c_str(), salt.c_str(), suffix.c_str());
+		// is this a trace_adal_write_all call?
+		pos = prefix.find("trace_adal_write_all");
+		if (pos != string::npos)
+		{
+			// yes. replace trace_adal_hash with hash value and reduced format string
+			format_salt = get_format_string(strings, format);
+			// reduced format string will be added after hash value
+			write_all_suffix = ",\" ";
+			write_all_suffix += format;
+			write_all_suffix += "\"";
+
+			if (!salt.empty())
+			{
+				salt_num = atoi(salt.c_str());
+			}
+			else
+			{
+				salt_num = -1;
+			}
+
+			if (salt_num == -1)
+			{
+				salt_num = format_salt;
+			}
+			else if (salt_num != format_salt)
+			{
+				fprintf(stderr, "ERROR: printf mismatch in '%s': TRACE says %d, format says %d args\n",
+						line.c_str(), salt_num, format_salt);
+			}
+		}
+		else
+		{
+			write_all_suffix = "";
+			salt_num = atoi(salt.c_str());
+		}
+
+		// get the trex hash value for the strings
+		get_hash(strings, salt_num, hash32, hash16);
+
+		// check for duplicate and store the mapping
+		if ((rhash.find(hash32) != rhash.end()) && (rhash[hash32] != strings))
+		{
+			fprintf(stderr, "hash collision: two different strings give the same hash value '%s'\n", hash32.c_str());
+			fprintf(stderr, "%s\n%s\n", strings.c_str(), rhash[hash32].c_str());
+			return;
+		}
+		rhash[hash32] = strings;
+
+		// add the modified line to the output
+		tmp = prefix;
+		tmp += "(";	
+		tmp += hashtype;
+		tmp += " ";
+		tmp += hash16;
+        tmp += hashtype_suffix;
+		tmp += ")";
+		tmp += write_all_suffix;
+		out_line += tmp;
+		dprintf("changed call: %s...\n", tmp.c_str());
+		line = suffix;
+	}
+	out_line += line;
+	if (out_line[out_line.length()-1] != '\n')
+		out_line += "\n";
+}
+
+//*****************************************************************************
+//                                 main
+//*****************************************************************************
+int main(int argc, char** argv)
+{
+	char* p_env = getenv("PPETRACEPPDEBUG");
+	if (p_env)
+		debug = true;
+
+
+	int argi = 1;
+	string arg;
+	string optarg;
+	if (argc > 1) arg = argv[1];
+	if ((argc < 2) || (arg == "-h"))
+	{
+		fprintf(stderr, "usage: %s realcompiler compileroptions -o target source\n", argv[0]);
+		exit(9);
+	}
+	string realcc(argv[argi++]);
+	string cctype("c++");
+	bool optx_found = false;
+
+	if (realcc == "-d")
+	{
+		debug = true;
+		realcc = argv[argi++];
+	}
+
+	// wait until -d options is handled before checking $debug
+	dprintf("ppetracepp version %s - API/macro version %s\n", version.c_str(), macro_version.c_str());
+
+	p_env = getenv("REALCPP");
+	string realcpp;
+	if (p_env)
+		realcpp = p_env;
+	if (realcpp.empty())
+	{
+		dprintf("cannot find cpp, using <realcompiler> -E\n");
+		realcpp = realcc;
+		realcpp += " -E";
+	}
+	dprintf("realcpp is %s\n", realcpp.c_str());
+
+//------------------------------------------------------------------------------
+// parse all the arguments
+//------------------------------------------------------------------------------
+string source;
+string object;
+vector<string> ccopts;
+vector<string> cppopts;
+bool dodeps = false;
+string depfile;
+string pfx;
+string sfx;
+int origargi = argi;
+for( ; argi < argc; ++argi)
+{
+	arg = argv[argi];
+	dprintf("Processing argv[%d]: \"%s\"\n", argi, arg.c_str());
+	if (arg.length() > 2)
+	{
+		pfx = arg.substr(0,2);
+		sfx = arg.substr(arg.length()-2);
+	}
+	else
+	{
+		pfx = arg;
+		sfx = arg;
+	}
+	dprintf("   pfx: \"%s\"    sfx: \"%s\"\n", pfx.c_str(), sfx.c_str());
+
+	if (pfx == "-o")
+	{
+		if (! object.empty())
+		{
+			fprintf(stderr, "two -o options, aborting\n");
+			exit(1);
+		}
+		if (arg.length() > 2)
+		{
+			object = sfx;
+		}
+		else
+		{
+			object = argv[++argi];
+		}
+		dprintf("object is now %s\n", object.c_str());
+	}
+	else if (arg == "-c")
+	{
+		// don't call cpp with -c, this is for the compiler
+		ccopts.push_back(arg);
+		dprintf("found -c option\n");
+	}
+	else if (pfx == "-l")
+	{
+		// cpp doesn't need library arguments
+		cppopts.push_back(arg);
+	}
+	else if (pfx == "-i")
+	{
+		// option takes an argument, handle it too
+		optarg = argv[argi++];
+		ccopts.push_back(arg);
+		ccopts.push_back(optarg);
+		cppopts.push_back(arg);
+		cppopts.push_back(optarg);
+		dprintf("found option '%s %s'\n", arg.c_str(), optarg.c_str());
+	}
+	else if ((arg == "-L") ||
+	         (arg == "-I") ||
+	         (arg == "-x") ||
+	         (arg == "-b") ||
+	         (arg == "-B") ||
+	         (arg == "-V") ||
+	         (arg == "-D") ||
+	         (arg == "--param") ||
+	         (arg == "-MQ") ||
+	         (arg == "-MT"))
+	{
+		// option takes an argument, handle it too
+		optarg = argv[argi++];
+		ccopts.push_back(arg);
+		ccopts.push_back(optarg);
+		cppopts.push_back(arg);
+		cppopts.push_back(optarg);
+		dprintf("found option '%s %s'\n", arg.c_str(), optarg.c_str());
+		if (arg == "-x")
+		{
+			// option x sets the language - c or c++
+			if ((optarg != "c") && (optarg != "c++") && (optarg != "assembler-with-cpp"))
+			{
+				fprintf(stderr, "cannot process language '%s', aborting\n", optarg.c_str());
+				exit(1);
+			}
+			cctype = optarg;
+			optx_found = true;
+		}
+	}
+	else if ((arg == "-MD")||(arg == "-MG"))
+	{
+		// gen deps
+		dodeps = true;
+		dprintf("found %s, creating dependencies\n", arg.c_str());
+	}
+	else if (arg == "-MF")
+	{
+		// set dependencies file
+		depfile = argv[argi++];
+		dprintf("set dependencies file to '%s'\n", depfile.c_str());
+	}
+	else if (arg[0] == '-')
+	{
+		// arg starts with - so it's an option
+		ccopts.push_back(arg);
+		cppopts.push_back(arg);
+		dprintf("found option '%s'\n", arg.c_str());
+	}
+	else if ((sfx == ".a") ||
+	         (sfx == ".o"))
+	{
+		// an object or archive, ignore this but give it to cc
+		ccopts.push_back(arg);
+		dprintf("found object/archive '%s'\n", arg.c_str());
+	}
+	else if ((sfx == ".c") ||
+	         (sfx == ".C") ||
+             (sfx == ".S") ||
+	         (arg.substr(arg.length()-4) == ".cpp") ||
+	         (arg.substr(arg.length()-4) == ".cxx"))
+	{
+		// the source file(s). we should only get one
+		if (!source.empty())
+		{
+			fprintf(stderr, "don't know to handle two source files, aborting\n");
+			exit(1);
+		}
+		source = arg;
+		// put the - (for read-from-stdin) where the source file was
+		// (order is important!)
+		ccopts.push_back("-");
+		dprintf("found source file %s\n", source.c_str());
+	}
+	else if (access(arg.c_str(), F_OK))
+	{
+		// option but not a file, an unknown option?
+		ccopts.push_back(arg);
+		cppopts.push_back(arg);
+		dprintf("found unknown option '%s'\n", arg.c_str());
+	}
+}
+
+//------------------------------------------------------------------------------
+// set other parameters based on arguments specified
+//------------------------------------------------------------------------------
+if (source.empty())
+{
+	// this might be a call to link a program instead of compile a source (or asm source)
+	dprintf("NOTME: starting as cc '%s ...'\n", realcc.c_str());
+	execvp(realcc.c_str(), &(argv[origargi]));
+	fprintf(stderr, "ERROR: returned from execvp() call to run %s\n", realcc.c_str());
+}
+if (object.empty())
+{
+	dprintf("no object file given, default to source name\n");
+	// gcc builds object name from source name if no -o given, replacing
+	// suffix with .o. The file is placed in the current directory,
+	// not in the source directory!
+	string n;
+	string d;
+	string s;
+	fileparse(source, n, d, s);
+	if (!n.empty() && !s.empty())
+	{
+		object = n + ".o";
+		dprintf("tracpp: guessing object name %s\n", object.c_str());
+		dprintf("        from source name     %s\n", source.c_str());
+	}
+	else
+	{
+		fprintf(stderr, "Unable to determine Source File Name\n");
+		exit(1);;
+	}
+}
+
+// set value of trace hash according to language
+// check source file extension if no explicit -x option given
+if (!optx_found)
+{
+	if (realcc.find("g++") != string::npos)
+	{
+		dprintf("compiler language: C++ (from compiler name)\n");
+		cctype = "c++";
+	}
+	else
+	{
+		if (source.substr(source.length()-2) == ".c")
+		{
+			dprintf("compiler language: C (from source file extension)\n");
+			cctype = "c";
+		}
+        else if (source.substr(source.length()-2) == ".S")
+        {
+            dprintf("compiler language: assembly (from source file extension)\n");
+            cctype = "assembler-with-cpp";
+        }
+		else
+		{
+			dprintf("compiler language: C++ (default)\n");
+			cctype = "c++";
+		}
+	}
+}
+else
+{
+	dprintf("compiler language: %s (from option '-x')\n", cctype.c_str());
+}
+
+if (cctype == "c")
+{
+	hashtype = "(unsigned short)";
+    hashtype_suffix = "U";
+}
+else if (cctype == "assembler-with-cpp")
+{
+    hashtype = "";
+    hashtype_suffix = "";
+}
+else
+{
+	hashtype = "(trace_hash_val)";
+    hashtype_suffix = "U";
+}
+// define TRAC_PPETRACEPP for macros
+tmp = "-DTRAC_PPETRACEPP -DTRAC_PPVER=";
+tmp += macro_version;
+cppopts.push_back(tmp);
+if (dodeps)
+{
+	if (depfile.empty())
+	{
+		if ((p_env = getenv("DEPENDENCIES_OUTPUT")) != NULL)
+		{
+			depfile = p_env;
+		}
+		else if ((p_env = getenv("SUNPRO_DEPENDENCIES")) != NULL)
+		{
+			depfile = p_env;
+		}
+		else
+		{
+			depfile = object;
+			if (depfile.substr(depfile.length()-2) == ".o")
+			{
+				depfile = depfile.substr(0, depfile.length()-2);
+				depfile += ".d";
+			}
+		}
+	}
+	tmp = "-MD -MF ";
+	tmp += depfile;
+	cppopts.push_back(tmp);
+}
+
+//------------------------------------------------------------------------------
+// start cpp
+//------------------------------------------------------------------------------
+cmd = realcpp;
+for(vector<string>::iterator p = cppopts.begin(); p != cppopts.end(); ++p)
+{
+	cmd += " ";
+	cmd += *p;
+}
+cmd += " ";
+cmd += source;
+cmd += " -o-";
+dprintf("starting as cpp '%s'\n", cmd.c_str());
+CPP = popen(cmd.c_str(), "r");
+if (CPP == NULL)
+{
+	fprintf(stderr, "cannot start cpp '%s'\n", realcpp.c_str());
+	perror("");
+	exit(1);
+}
+
+//------------------------------------------------------------------------------
+// start cc. manually set language as source file extension not available to cc
+//------------------------------------------------------------------------------
+string type_str = "";
+if (!optx_found)
+{
+	// no option -x given by caller, set manually
+	type_str = "-x ";
+	type_str += cctype;
+}
+cmd = realcc;
+cmd += " ";
+cmd += type_str;
+for(vector<string>::iterator p = ccopts.begin(); p != ccopts.end(); ++p)
+{
+	cmd += " ";
+	cmd += *p;
+}
+cmd += " -o ";
+cmd += object;
+dprintf("starting as cc '%s'\n", cmd.c_str());
+CC = popen(cmd.c_str(), "w");
+if (CC == NULL)
+{
+	fprintf(stderr, "cannot start cc '%s'\n", realcc.c_str());
+	perror("");
+	exit(1);
+}
+
+string modifiedfile;
+string unmodifiedfile;
+if (debug)
+{
+	modifiedfile = object + ".debug";
+	DEBUG = fopen(modifiedfile.c_str(), "w");
+	if (DEBUG == NULL)
+	{
+		string msg = "cannot open file ";
+		msg += modifiedfile;
+		perror(msg.c_str());
+		modifiedfile = "";
+	}
+	else
+	{
+		fprintf(stderr, "writing preprocessed source to %s\n", modifiedfile.c_str());
+	}
+	unmodifiedfile = object + ".debug_in";
+	DEBUGIN = fopen(unmodifiedfile.c_str(), "w");
+	if (DEBUGIN == NULL)
+	{
+		string msg = "cannot open file ";
+		msg += unmodifiedfile;
+		perror(msg.c_str());
+		unmodifiedfile = "";
+	}
+	else
+	{
+		fprintf(stderr, "writing unprocessed source to %s\n", unmodifiedfile.c_str());
+	}
+}
+
+string oldline;
+string newline;
+static const int MAX_BUFFER = 51200;
+char buf[MAX_BUFFER];
+while (!feof(CPP))
+{
+	if (fgets(buf, MAX_BUFFER, CPP) != NULL)
+	{
+		oldline = buf;
+		if (DEBUGIN) { fprintf(DEBUGIN, "%s", oldline.c_str()); }
+		parse_line(hashtab, oldline, newline);
+		//#print "oldline = $oldline";
+		//#print "newline = $newline";
+		if (newline.empty())
+		{
+			fprintf(stderr, "hash error in/with file %s\n", source.c_str());
+			exit(1);
+		}
+		//#print "newline = $newline\n";
+		fprintf(CC, "%s", newline.c_str());
+		if (DEBUG) { fprintf(DEBUG, "%s", newline.c_str()); }
+	}
+}
+if (DEBUG) { fclose(DEBUG); }
+if (DEBUGIN) { fclose(DEBUGIN); }
+int cmd_rc = pclose(CPP);
+if (cmd_rc)
+{
+	fprintf(stderr, "error from cpp\n");
+	if (cmd_rc & 127)
+	{
+		fprintf(stderr, "cpp got signal %d\n", (cmd_rc & 127));
+		exit(1);
+	}
+	else if (cmd_rc >> 8)
+	{
+		fprintf(stderr, "cpp returned %d\n", (cmd_rc >> 8));
+		exit(cmd_rc >> 8);
+	}
+}
+cmd_rc = pclose(CC);
+if (cmd_rc)
+{
+	fprintf(stderr, "error from cc\n");
+	if (cmd_rc & 127)
+	{
+		fprintf(stderr, "cc got signal %d\n", (cmd_rc & 127));
+		exit(1);
+	}
+	else if (cmd_rc >> 8)
+	{
+		fprintf(stderr, "cc returned %d\n", (cmd_rc >> 8));
+		exit(cmd_rc >> 8);
+	}
+}
+
+if (!hashtab.empty())
+{
+	string stringfile = object;
+	stringfile += ".hash";
+	// open trace string file
+	FILE* TRC = fopen(stringfile.c_str(), "w");
+	if (TRC == NULL)
+	{
+		fprintf(stderr, "cannot write trace string file '%s'\n", stringfile.c_str());
+		exit(1);
+	}
+	dprintf("Writing to file %s\n", stringfile.c_str());
+
+	string pwd;
+	FILE* PWD = popen("pwd", "r");
+	fgets(buf, MAX_BUFFER, PWD);
+    pwd = buf;
+    pclose(PWD);
+    time_t tt = time(NULL);
+    sprintf(buf, "%s", asctime(localtime(&tt)));
+    buf[strlen(buf)-1] = '\0'; // chop off extra newline
+	fprintf(TRC, "#FSP_TRACE_v2|||%s|||BUILD:%s", buf, pwd.c_str());
+
+	string srch_str = "||";
+	srch_str += source;
+	int srch_str_len = srch_str.length();
+	size_t pos;
+	for(map<string,string>::iterator p = hashtab.begin(); p != hashtab.end(); ++p)
+	{
+		pos = (p->second).find(srch_str);
+		if ((pos != string::npos) && ((pos + srch_str_len) == (p->second).length()))
+		{
+			// source file name is already part of the string
+			fprintf(TRC, "%s||%s\n", (p->first).c_str(), (p->second).c_str());
+		}
+		else
+		{
+			fprintf(TRC, "%s||%s||%s\n", (p->first).c_str(), (p->second).c_str(), source.c_str());
+		}
+		//#print TRC "$key||$source||$hashtab{$key}\n";
+	}
+	fclose(TRC);
+}
+else
+{
+	dprintf("No trace calls/strings found, not writing hash file\n");
+}
+} // end main
diff --git a/tools/ppetracepp/trac_interface.h b/tools/ppetracepp/trac_interface.h
new file mode 100755
index 00000000..74510745
--- /dev/null
+++ b/tools/ppetracepp/trac_interface.h
@@ -0,0 +1,363 @@
+// IBM_PROLOG_BEGIN_TAG 
+// This is an automatically generated prolog. 
+//  
+// occ820 src/occc/405/occ/trac/trac_interface.h 1.4 
+//  
+// IBM CONFIDENTIAL 
+//  
+// OBJECT CODE ONLY SOURCE MATERIALS 
+//  
+// COPYRIGHT International Business Machines Corp. 2011,2012 
+// All Rights Reserved 
+//  
+// The source code for this program is not published or otherwise 
+// divested of its trade secrets, irrespective of what has been 
+// deposited with the U.S. Copyright Office. 
+//  
+// IBM_PROLOG_END_TAG 
+/******************************************************************************
+// @file trac_interface.h
+// @brief Interface codes for TRAC component.
+*/
+/******************************************************************************
+ *
+ *       @page ChangeLogs Change Logs
+ *       @section _trac_interface_h trac_interface.h
+ *       @verbatim
+ *
+ *   Flag    Def/Fea    Userid    Date        Description
+ *   ------- ---------- --------  ----------  ----------------------------------
+ *                      TEAM      06/16/2010  Port  
+ *   @rc003             rickylie  02/03/2012  Verify & Clean Up OCC Headers & Comments
+ *   @pb00E             pbavari   03/11/2012  Added correct include file
+ *   @at009  859308     alvinwan  10/15/2012  Added tracepp support
+ *   @ai005  860268     ailutsar  11/20/2012  Create trace test applet
+ *   @rc005  864101     rickylie  12/12/2012  add small circ buffer to handle ISR semaphore conflict
+ *
+ *  @endverbatim
+ *
+ *///*************************************************************************/
+ 
+#ifndef _TRAC_INTERFACE_H
+#define _TRAC_INTERFACE_H
+
+/** \defgroup Trace Trace Component
+ * Port of the trace code used in the fsp and tpmd.
+ */
+
+//*************************************************************************
+// Includes
+//*************************************************************************
+
+//*************************************************************************
+// Externs
+//*************************************************************************
+
+//*************************************************************************
+// Macros
+//*************************************************************************
+/**
+ * \defgroup TracMacros Trace Component Macro Interfaces
+ * \ingroup Trace
+ * Used to trace 0 - 5 arguments or a binary buffer when using a hash value.
+ */
+/*@{*/
+
+
+// @at009c - start
+#define TRACE(i_td,i_string,args...)                  \
+                trace_adal_write_all(i_td,trace_adal_hash(i_string,-1),__LINE__,0,##args)
+
+#define TRACEBIN(i_td,i_string,i_ptr,i_size)          \
+                trac_write_bin(i_td,trace_adal_hash(i_string,0),__LINE__,i_ptr,i_size)
+
+#ifndef NO_TRAC_STRINGS
+
+#define FIELD(a) \
+        printf("%s",a)
+
+#define FIELD1(a,b) \
+        printf("%s%lx",a,(unsigned long)b)
+
+#else  // NO_TRAC_STRINGS
+
+#define FIELD(a)
+
+#define FIELD1(a,b)
+
+#endif  // NO_TRAC_STRINGS
+
+#define SUCCESS 0
+// @at009c - end
+
+
+
+/*@}*/
+
+//*************************************************************************
+// Defines/Enums
+//*************************************************************************
+#define     TRACE_MAX_ARGS      5        /*!< Maximum number of args to trace */
+
+typedef uint32_t trace_hash_val;          
+
+// NOTE!  Increment this when new components are added!
+#define TRAC_NUM_TRACE_COMPONENTS           1
+
+
+#define TRACE_BUFFER_SIZE                   8192 // @ai005a
+
+#define CIRCULAR_BUFFER_SIZE                4   // @rc005a
+
+typedef uint32_t      UINT32;
+typedef int32_t       INT32;
+typedef uint8_t       UCHAR;
+typedef uint8_t       UINT8;
+typedef int8_t        INT8;
+typedef uint16_t      UINT16;
+typedef int16_t       INT16;
+typedef char          CHAR;
+typedef unsigned int  UINT;
+typedef unsigned long ULONG;
+typedef int           INT;
+typedef void          VOID;
+
+//*************************************************************************
+// Structures
+//*************************************************************************
+/*
+ * @brief Structure is put at beginning of all trace buffers
+ */
+typedef struct trace_buf_head {
+    UCHAR ver;         /*!< version of this struct (1)                      */
+    UCHAR hdr_len;     /*!< size of this struct in bytes                    */
+    UCHAR time_flg;    /*!< meaning of timestamp entry field                */
+    UCHAR endian_flg;  /*!< flag for big ('B') or little ('L') endian       */
+    CHAR comp[16];     /*!< the buffer name as specified in init call       */
+    UINT32 size;       /*!< size of buffer, including this struct           */
+    UINT32 times_wrap; /*!< how often the buffer wrapped                    */
+    UINT32 next_free;  /*!< offset of the byte behind the latest entry      */
+    UINT32 te_count;   /*!< Updated each time a trace is done               */
+    UINT32 extracted;  /*!< Not currently used                              */
+}trace_buf_head_t;
+
+/*!
+ * @brief Timestamp and thread id for each trace entry.
+ */
+typedef struct trace_entry_stamp {
+    UINT32 tbh;        /*!< timestamp upper part                            */
+    UINT32 tbl;        /*!< timestamp lower part                            */
+    UINT32 tid;        /*!< process/thread id                               */
+}trace_entry_stamp_t;
+
+/*
+ * @brief Structure is used by adal app. layer to fill in trace info.
+ */
+typedef struct trace_entry_head {
+    UINT16 length;      /*!< size of trace entry                             */
+    UINT16 tag;         /*!< type of entry: xTRACE xDUMP, (un)packed         */
+    UINT32 hash;       /*!< a value for the (format) string                 */
+    UINT32 line;       /*!< source file line number of trace call           */
+}trace_entry_head_t;
+
+/*
+ * @brief Parameter traces can be all contained in one write.
+ */
+typedef struct trace_entire_entry {
+    trace_entry_stamp_t stamp;
+    trace_entry_head_t head;
+    UINT32 args[TRACE_MAX_ARGS + 1];
+} trace_entire_entry_t;
+
+
+/*
+ * @brief Binary first writes header and time stamp.
+ */
+typedef struct trace_bin_entry {
+    trace_entry_stamp_t stamp;
+    trace_entry_head_t head;
+} trace_bin_entry_t;
+
+/**
+ * @brief Used as input to traces to get to correct buffer.
+ */
+typedef trace_buf_head_t * tracDesc_t;
+
+/*
+ * @brief Structure is used to hold array of all trace descriptors
+ */
+typedef struct trace_descriptor_array
+{
+    tracDesc_t  *entry; /*!< Pointer to trace descriptor                   */
+    CHAR        *comp;  /*!< Pointer to component name                     */
+}trace_descriptor_array_t;
+
+// @rc005a - start
+typedef struct circular_buf_head
+{
+    UINT32 head;       // pointer to head
+    UINT32 tail;       // pointer to tail
+    UINT32 entryCount; // nums of entry
+} circular_buf_header_t;
+
+
+typedef struct circular_entire_data {
+    UINT32 len;
+    CHAR comp[4];
+    trace_entire_entry_t entry;
+} circular_entire_data_t;
+
+// @rc005a - end
+
+//*************************************************************************
+// Globals
+//*************************************************************************
+// All TPMF component trace descriptors.
+extern tracDesc_t g_trac_inf;  // @at009a
+extern tracDesc_t g_trac_err;  // @at009a
+extern tracDesc_t g_trac_imp;  // @at009a
+
+extern const trace_descriptor_array_t g_des_array[];
+
+//*************************************************************************
+// Function Prototypes
+//*************************************************************************
+
+/**
+ * \defgroup TracIntefaces Trace Component Interfaces for External Use
+  * \ingroup Trace
+ */
+/*@{*/
+
+
+/**
+ *  @brief  Allocate and initialize all trace buffers in memory.
+ *
+ *  This function will allocate memory for each of the pre-defined trace
+ *  buffers, initilize the buffers with starting data, and setup the 
+ *  trace descriptors which each component will use to trace.
+ *
+ *  This function must be called first before any components try to trace!
+ *
+ *  @return Non-zero return code on error.
+ */
+UINT TRAC_init_buffers(void);
+
+/**
+ *  @brief  Retrieve full trace buffer for component i_comp
+ *
+ *  This function assumes memory has already been allocated for
+ *  the full trace buffer in o_data.
+ *
+ *  @param i_td_ptr Trace descriptor of buffer to retrieve.
+ *  @param o_data Pre-allocated pointer to where data will be stored.
+ *
+ *  @return Non-zero return code on error
+ */
+UINT TRAC_get_buffer(const tracDesc_t i_td_ptr,
+                     void *o_data);
+
+/**
+ *  @brief  Retrieve partial trace buffer for component i_comp
+ *
+ *  This function assumes memory has already been allocated for
+ *  the trace buffer (size io_size).  This function will copy
+ *  in up to io_size in bytes to the buffer and set io_size
+ *  to the exact size that is copied in.
+ *
+ *  @param i_td_ptr Trace descriptor of buffer to retrieve.
+ *  @param o_data Pre-allocated pointer to where data will be stored.
+ *  @param io_size Size of trace data to retrieve (input)
+ *                 Actual size of trace data stored (output)
+ *
+ *  @return Non-zero return code on error
+ */
+UINT TRAC_get_buffer_partial(const tracDesc_t i_td_ptr,
+                    void *o_data,
+                    UINT *io_size);
+                    
+/**
+ *  @brief  Retrieve trace descriptor for input component name
+ *
+ *  @param i_comp Component name to retrieve trace descriptor for.
+ *
+ *  @return Valid trace descriptor on success, NULL on failure.
+ */                    
+tracDesc_t TRAC_get_td(const char *i_comp);
+
+/**
+ *  @brief  Reset all trace buffers
+ *
+ *  @return Non-zero return code on error
+ */                    
+UINT TRAC_reset_buf(void);
+
+/*@}*/  // Ending tag for external interface module in doxygen
+
+
+/**
+ *  @brief  Trace input integers to trace buffer.
+ *
+ *  This function assumes i_td has been initialized.
+ *
+ *  @param io_td Intialized trace descriptor point to buffer to trace to.
+ *  @param i_hash Hash value to be recorded for this trace.
+ *  @param i_fmt Output format
+ *  @param i_line Line number trace is occurring on.
+ *  @param i_type trace type. filed or debug.
+ *  @param ... parames that are limited to a size of 4 bytes, i.e. int, uint32_t, nnn*
+ *
+ *  @return Non-zero return code on error.
+ */
+UINT trace_adal_write_all(tracDesc_t io_td,const trace_hash_val i_hash,
+                     const char *i_fmt,const ULONG i_line, const ULONG i_type,...);
+
+
+/**
+ *  @brief  Trace input integers to trace buffer.
+ *
+ *  This function assumes i_td has been initialized.
+ *
+ *  @param io_td Intialized trace descriptor point to buffer to trace to.
+ *  @param i_hash Hash value to be recorded for this trace.
+ *  @param i_line Line number trace is occurring on.
+ *  @param i_num_args Number of arguments to trace.
+ *  @param i_1 Input Parameter 1
+ *  @param i_2 Input Parameter 2
+ *  @param i_3 Input Parameter 3
+ *  @param i_4 Input Parameter 4
+ *  @param i_5 Input Parameter 5
+ *
+ *  @return Non-zero return code on error.
+ */
+UINT trac_write_int(tracDesc_t io_td,const trace_hash_val i_hash,
+                    const ULONG i_line,
+                    const UINT i_num_args,
+                    const ULONG i_1,const ULONG i_2,const ULONG i_3,
+                    const ULONG i_4,const ULONG i_5
+                   );
+
+
+ /**
+ *  @brief  Trace binary data to buffer.
+ *
+ *  This function assumes i_td has been initialized.
+ *
+ *  @param io_td Intialized trace descriptor point to buffer to trace to.
+ *  @param i_hash Hash value to be recorded for this trace.
+ *  @param i_line Line number trace is occurring on.
+ *  @param i_ptr Pointer to binary data to trace.
+ *  @param i_size Size of data to copy from i_ptr.
+ *
+ *  @return Non-zero return code on error.
+ */
+UINT trac_write_bin(tracDesc_t io_td,const trace_hash_val i_hash,
+                    const ULONG i_line,
+                    const void *i_ptr,
+                    const ULONG i_size);
+
+//*************************************************************************
+// Functions
+//*************************************************************************
+
+#endif //_TRAC_INTERFACE_H
diff --git a/tools/ppetracepp/tracehash.pl b/tools/ppetracepp/tracehash.pl
new file mode 100755
index 00000000..f12a0f8c
--- /dev/null
+++ b/tools/ppetracepp/tracehash.pl
@@ -0,0 +1,873 @@
+#!/usr/bin/perl -w
+# File tracehash.pl created by B J Zander.
+
+use strict;
+
+sub determine_args();
+sub launch_cpp_and_parse($$);
+sub cpp_dir($);
+sub read_string_file();
+sub collect_files($);
+sub assimilate_file($);
+sub hash_strings();
+sub write_string_file();
+sub help();
+
+select (STDERR);
+$| = 1;		# Make all prints to STDERR flush the buffer immediately
+select (STDOUT);
+$| = 1;		# Make all prints to STDOUT flush the buffer immediately
+
+# Constants
+my $HEAD_SEP = "|||";
+my $HEAD_EYE_CATCHER = "#FSP_TRACE_v";
+my $HEAD_BUILD_FLAG = "BUILD:";
+my $HEAD_VER_FLAG = 2;
+my $BB_STRING_FILE = "/opt/fsp/etc/BB_StringFile";
+
+# Global Variables
+my $debug = 0;
+my $seperator = "&&&&";
+my $file_name = "trexStringFile";
+my $in_sand;
+my ($backing) = $ENV{'bb'};
+my $hash_prog = "trexhash";    #default to in path
+my $build = ""; 
+my ($sandbox) = $ENV{'SANDBOX'} || "";
+my ($context) = $ENV{'CONTEXT'} || "";
+my ($sandboxbase) = $ENV{'SANDBOXBASE'} || "";
+my ($bb);
+my ($sourcebase) = "$sandboxbase/src";
+my ($version) = $HEAD_VER_FLAG; # default to oldest version
+my ($collect) = 0;
+my ($INCLUDE, $Arg, $file, $dir, $string_file);
+my $args = "";
+
+my $fail_on_collision = 0; # 1 = exit with error if hash collision occurs
+my $hash_filename_too = 0; # 1 = hash is calculated over format string + filename
+
+print "sourcebase = $sourcebase\n" if $debug;
+print "sandbox = $sandbox\n" if $debug;
+print "backing = $backing\n" if $debug;
+print "context = $context\n" if $debug;
+
+if ($context =~ /x86/)
+{
+    $bb = "i586-pc-linux-gnu";
+}
+else
+{
+    $bb = "powerpc-linux";
+}
+
+if(($sourcebase =~ /\w+/) && ($sandbox =~ /\w+/))
+{
+    $INCLUDE = "-I $sandboxbase/export/$context/fips/include  -I $backing/export/$context/fips/include -I /opt/fsp/$bb/include/fsp -I/opt/fsp/$bb/include/ -include /opt/fsp/$bb/include/fsp/tracinterface.H";
+}
+else
+{
+    print "Not in Sandbox so guessing Include Paths...\n" if $debug;
+    $INCLUDE = "-I/opt/fsp/i586-pc-linux-gnu/include/fsp -I/opt/fsp/i586-pc-linux-gnu/include/ -include /opt/fsp/i586-pc-linux-gnu/include/fsp/tracinterface.H";
+}
+
+# I/P Series work in ODE sandbox env.
+if ($sandboxbase =~ /\w+/)
+{
+    $in_sand = 1;
+    print "backing = $backing\n" if $debug;
+}
+else
+{
+    $in_sand = 0;
+}
+
+
+
+#  Parse the input parameters.
+
+while (@ARGV) {
+    $Arg = shift;
+
+    if ($Arg eq "-h" || $Arg eq "-H") {
+        help();
+        exit(127);
+    }
+    if ($Arg eq "-f") {
+        $file = shift;
+        next;
+    }
+    if ($Arg eq "-d") {
+        $dir = shift;
+        next;
+    }
+    if ($Arg eq "-s") {
+        $string_file = shift;
+        next;
+    }
+    if ($Arg eq "-c") {
+        $collect = 1;
+        next;
+    }
+    if ($Arg eq "-v") {
+	$debug = 1;
+	print "debug on\n" if $debug;
+	next;
+    }
+    if ($Arg eq "-C") { # fail if a hash collision is detected
+        $fail_on_collision = 1;
+        next;
+    }
+    if ($Arg eq "-F") { # hash is calculated over format string + file name
+        $hash_filename_too = 1;
+        next;
+    }
+    if ($Arg eq "-S") {
+        $BB_STRING_FILE = "";
+        next;
+    }
+
+    #just pass it onto compiler
+    $args = $args . " " . $Arg;
+}
+
+print "args = $args\n" if $debug;
+
+if (!$file && !$dir && !$in_sand) {
+    help();
+    exit(127);
+}
+
+#################################
+#          M A I N              #
+#################################
+
+my $clock = `date`;
+
+$build = $HEAD_EYE_CATCHER . "$HEAD_VER_FLAG" . $HEAD_SEP . $clock . $HEAD_SEP  . $HEAD_BUILD_FLAG;
+
+$build =~ s/\n//g;
+
+# Global array to hold the parsed TRAC macro calls.
+my @strings = ();
+
+# Assoc. arrays to hold hash|string values.
+my %string_file_array = ();
+my %hash_strings_array = ();
+
+# Check all provided arguments and look for defaults if not provided by user
+determine_args();
+
+# Scan the appropriate files or directories for TRAC macro calls.
+
+if (defined $dir)
+{
+
+    $build = $build . $dir; # default to put at top of string file
+    if($collect)
+    {
+	collect_files($dir);
+    }
+    else
+    {
+	cpp_dir($dir);
+	# Hash the string that have been scanned.
+	%hash_strings_array = hash_strings();
+    }
+}
+else
+{
+    $build = $build . $file; # default to put at top of string file
+
+    if($collect)
+    {
+	assimilate_file($file);
+    }
+    else
+    {
+	# make sure include path includes directory that file is in
+	if($file =~ /^(.+)\/[^\/]+\.C$/)
+	{
+
+	    launch_cpp_and_parse($file,$1);
+	}
+	else
+	{
+	    # No path in front of file so it has to be local dir
+	    launch_cpp_and_parse($file,"./");
+	}
+	# Hash the string that have been scanned.
+	%hash_strings_array = hash_strings();
+    }
+}
+
+# Read the existing string file into memory.
+%string_file_array = read_string_file();
+
+# Write out the new string file. check for collisions of new/old string here
+write_string_file();
+
+print "Hashing Started at $clock\n";
+$clock = `date`;
+print "Hashing Finished at $clock\n";
+
+exit 0;
+
+
+#################################
+#     S U B R O U T I N E S     #
+#################################
+
+#=============================================================================
+#  Enhance usability by figuring out which build env. we are in
+#=============================================================================
+sub determine_args() {
+
+ 
+    # Find trexhash program
+    # but only if needed (i.e. not in collect mode)
+    if (!$collect) {
+        my $tmp = `which $hash_prog`;
+        chomp $tmp;
+
+        if ($tmp eq '') {
+	    print STDOUT "\nWarning: Program trexhash does not exist in path.\n" if $debug;
+	    $hash_prog = "./trexhash";
+
+	    $tmp = `which $hash_prog`;
+	    chomp $tmp;
+	    if ($tmp eq '') {
+	        print STDOUT "\nError: Unable to find trexhash \n";
+	        exit(127);
+	    }
+        }
+    }
+
+    # Verify input values.
+    if ((!defined $file) && (!defined $dir)) {
+	if(!($in_sand))
+	{
+	    print STDOUT "\nError: No input directory or file provided as input to scan\n";
+	    exit(127);
+	}
+
+	# Assume they want sandbox scanned
+	if($collect)
+	{
+	    # collect all string files generated by tracepp and merge
+	    $dir = "$sandboxbase/obj/";
+	}
+	else
+	{
+	    # generate our own string file by pre-compiling all source code
+	    $dir = "$sandboxbase/src/";
+	}
+	print STDOUT "\n-f <file> or -d <dir> not found...scanning $dir by default\n\n";
+    }
+
+    if (!defined $string_file)
+    {
+	if ($in_sand)
+	{
+
+	    # Copy the current string file from backing build into our sandbox
+	    system ("cp $backing/obj/$file_name $sandboxbase/obj/$file_name")
+	      if !(-e "$sandboxbase/obj/$file_name");
+
+	    $string_file = "$sandboxbase/obj/$file_name";
+	}
+	else
+	{
+	    $string_file = "./$file_name";
+	}
+	print STDOUT "-sf <string_file> not specified, using $string_file instead...\n\n" if $debug;
+
+    }
+
+    # Try Creating the string file
+    `touch $string_file`;
+
+    if (! -f $string_file) {
+	print STDOUT "\nError: File $string_file does not exist.  Current directory may not be writable.\n\n";
+	help();
+	exit(127);
+    }
+
+    # Make sure trexStringFile is readable/writeable
+    system("chmod ugo+rw $string_file");
+
+}
+
+#=============================================================================
+#  Launch cpp script and grab input from it looking for trace calls.
+#=============================================================================
+sub launch_cpp_and_parse($$) {
+
+    my ($l_loc, $l_dir) = @_;
+
+    print "Processing file $l_loc\n" if $debug;
+    my $cmd = "/usr/bin/cpp $INCLUDE -I $l_dir $args $l_loc|";
+    print "$cmd\n" if $debug;
+    open(FH,"$cmd")
+      or die ("Cannot open $_:$!,stopped");
+
+    # Read through all lines in the file..
+    my $line = <FH>;
+    while (defined $line)
+    {
+	chop $line;         # remove EOL
+	$line =~ s/^\s*//;  # remove unneccesary beginning white space.
+	$line =~ s/\s*$//;  # remove unneccesary ending white space.
+	# Look for lines that are trace macro calls.
+	#if (/(trace_adal_hash)(\()( *)(".+")(,)(\d)/)
+	#if ($line =~ /(.*?)(trace_adal_hash)(\()( *)(".+")(,)(\d)\)+(.*\d.*)/)
+	while($line =~ m/^(.*?)trace_adal_hash\s*\(\s*(("[^"]*"\s*)+),\s*(\d+)\s*\)(.*)$/)
+	{
+	    my ($prefix, $strings, $salt, $suffix) = ($1, $2, $4, $5);
+	    print STDOUT "$strings $salt\n" if $debug;
+	    $strings =~ s/"\s*$//; # remove trailing " and space
+	    $strings =~ s/^"//;    # remove leading "
+	    $strings =~ s/"\s*"//g;
+	    # Check to see if it's contained on a single line, or if we
+	    # have to combine lines to get a complete trace call.
+
+	    # Save the macro call so it can be hashed later..
+	    push (@strings, [$l_loc, $strings, $salt]);
+	    $line = $suffix; # check rest of line for a second trace call
+	}
+	my $nextline = <FH>;
+	last if !defined $nextline;
+	# if a trace call is spread over multiple lines we have to add the next
+	# line from the source. the only problem is the definition/declaration
+	# of trace_adal_hash: we have to ignore that. we catch that by requiring
+	# a " after the function name. hopefully nobody writes a comment with
+	# a " after the function declaration ...
+	if ($line =~ /trace_adal_hash.*"/) {
+		$line .= $nextline;
+	} else {
+		$line = $nextline;
+	}
+    }
+    close(FH);
+}
+
+#=============================================================================
+#  run cpp on all files in this directory and return the output
+#=============================================================================
+sub cpp_dir($) {
+
+    my ($l_dir) = @_;
+    my @dir_entry;
+    my $l_entry;
+
+    # Open the directory and read all entry names.
+    opendir ( DH , "$l_dir")
+      or die ("Cannot open $l_dir: $!, stopped");
+
+    print STDOUT "Processing directory $l_dir\n" if $debug;
+    @dir_entry = readdir(DH);
+    closedir(DH);
+
+    while (@dir_entry) {
+        $l_entry = shift(@dir_entry);
+
+        if ($l_dir =~ m"/$") {
+            $l_entry = "$l_dir$l_entry";
+        }
+        else {
+            $l_entry = "$l_dir/$l_entry";
+        }
+
+        # Is the entry a directory?
+        if (-d $l_entry) {
+
+	    if($l_entry =~ m"/?([^/]+)$")
+	    {
+		# check dir we are going into
+		print "dir = $1\n" if $debug;
+		# should we recurse into this directory.
+		if ($1 =~ m/^(\.\.?|sim[ou]|bldv)$/)
+		{
+		    next; # skip '.', '..' and some fips dirs
+		}
+		cpp_dir($l_entry);
+	    }
+	    else
+	    {
+		# unable to determine name of dir (no / in filename)
+		# should we recurse into this directory.
+		if ($l_entry =~ m/^(\.\.?|sim[ou]|bldv)$/)
+		{
+		    next; # skip '.', '..' and some fips dirs
+		}
+		cpp_dir($l_entry);
+	    }
+        }
+        # Is the entry a file?
+        elsif ((-f $l_entry) && ($l_entry =~ m/\.C$/)) {
+	    # it's a file so
+	    launch_cpp_and_parse($l_entry,$l_dir);
+        }
+        else {
+            # Not a file or directory so ignore it...
+        }
+    }
+}
+
+#=============================================================================
+#  Read in strings from the existing trace string file....
+#=============================================================================
+sub read_string_file() {
+
+    my %o_strings;
+    my ($line) = "";
+    my ($l_hash) = "";
+    my ($l_str) = "";
+    my ($cur_build) = "";
+    my ($l_file) = "";
+
+
+    # Make sure we can open each file.
+    open ( FH , "<$string_file")
+      or die ("Cannot open $_: $!, stopped");
+
+    $line = <FH>;
+
+    print "first line in trexStringFile= $line\n" if $debug;
+
+    if((defined $line) && ($line =~ /^$HEAD_EYE_CATCHER(\d)/))
+    {
+	$version = $1;
+
+	print "version = $version\n" if $debug;
+
+	#Always put latest version in file
+	$line =~ s/^$HEAD_EYE_CATCHER\d/${HEAD_EYE_CATCHER}${HEAD_VER_FLAG}/;
+
+	# Take previous version in file and use it.
+	$build = $line;
+	chomp($build);
+	$line = <FH>;
+
+	while (defined $line) {
+	    chomp $line;         # remove EOL
+	    if($version eq "1")
+	    {
+		($l_hash, $l_file ,$l_str) = split(/\|\|/, $line);
+	    }
+	    elsif($version eq "2")
+	    {
+		($l_hash, $l_str ,$l_file) = split(/\|\|/, $line);
+	    }
+	    else
+	    {
+		print "Unknown version of stringfile $version\n";
+		exit(127);
+	    }
+	    $o_strings{$l_hash} = $l_str . "||" . $l_file;
+	    $line = <FH>;
+	}
+
+    }
+    else
+    {	  # If there is a file then we are dealing with the first
+	  # version of trexStringFile so don't look for file name.
+	if ($debug) {
+		print "version 0 stringfile detected: $string_file\n";
+	}
+
+	# there is a file and it doesn't have a header
+	$version = 0;
+
+	while (defined $line) {
+	    chomp $line;         # remove EOL
+	    ($l_hash,$l_str) = split(/\|\|/, $line);
+	    $o_strings{$l_hash} =$l_str . "||" . "NO FILE";
+	    $line = <FH>;
+	}
+    }
+
+    close(FH);
+
+    #Time to look for a building block string file
+    if($BB_STRING_FILE ne "" and $string_file ne $BB_STRING_FILE and -f $BB_STRING_FILE)
+    {
+
+	# Make sure we can open the file.
+	open ( FH , "<$BB_STRING_FILE")
+	  or die ("Cannot open $_: $!, stopped");
+
+	$line = <FH>;
+
+	print "first line in BB_StringFile = $line\n" if $debug;
+	if((defined $line) && ($line =~ /^$HEAD_EYE_CATCHER(\d)/))
+	{
+	    $version = $1;
+
+	    $line = <FH>;
+	    while (defined $line)
+	    {
+		chop $line;         # remove EOL
+		if($version eq "1")
+		{
+		    ($l_hash, $l_file ,$l_str) = split(/\|\|/, $line);
+		}
+		elsif($version eq "2")
+		{
+		    ($l_hash, $l_str ,$l_file) = split(/\|\|/, $line);
+		}
+		#($l_hash, $l_file ,$l_str) = split(/\|\|/, $line);
+		$o_strings{$l_hash} = $l_str . "||" . $l_file ;
+		$line = <FH>;
+	    }
+	}
+	else
+	{
+	    print "*** ERROR: BB_StringFile '$BB_STRING_FILE' should always have version!!!\n"
+	}
+
+    }
+    else
+    {
+	print "$BB_STRING_FILE is not available\n" if $debug;
+    }
+    #All files are latest version now.
+    $version = $HEAD_VER_FLAG;
+    return %o_strings;
+}
+
+#=============================================================================
+#  Read in strings from the existing trace string file....
+#=============================================================================
+sub collect_files($) {
+
+    my ($l_dir) = @_;
+    my (@dir_entry);
+    my ($l_entry) = "";
+
+    # Open the directory and read all entry names.
+    opendir ( DH , "$l_dir")
+      or die ("Cannot open $l_dir: $!, stopped");
+
+    print STDOUT "Processing directory $l_dir\n" if $debug;
+    @dir_entry = readdir(DH);
+    closedir(DH);
+
+    while (@dir_entry) {
+        $l_entry = shift(@dir_entry);
+
+        if ($l_dir =~ m"/$") {
+            $l_entry = "$l_dir$l_entry";
+        }
+        else {
+            $l_entry = "$l_dir/$l_entry";
+        }
+
+        # Is the entry a directory?
+        if (-d $l_entry) {
+
+            # should we recurse into this directory.
+	    if ($l_entry =~ m/\/(\.\.?|sim[ou]|bldv)$/)
+	    {
+		next; # skip '.', '..' and some fips dirs
+	    }
+	    collect_files($l_entry);
+        }
+        # Is the entry a file?
+        elsif ((-f $l_entry) && ($l_entry =~ m"\.hash$")) {
+	    # it's a file so
+	    assimilate_file($l_entry);
+        }
+        else {
+            # Not a file or directory so ignore it...
+        }
+    }
+
+}
+
+#=============================================================================
+#  Read in data from file and add to master one
+#=============================================================================
+sub assimilate_file($) {
+
+    my ($l_loc) = @_;
+
+    my (%o_strings);
+    my ($line) = "";
+    my ($l_hash) = "";
+    my ($l_str) = "";
+    my ($l_file) = "";
+
+    # Make sure we can open each file.
+    open ( FH , "<$l_loc")
+      or die ("Cannot open $_: $!, stopped");
+
+    $line = <FH>;
+
+    print "Assimilate: first line in $l_loc = $line" if $debug;
+
+    if((defined $line) && ($line =~ /^$HEAD_EYE_CATCHER(\d)/))
+    {
+	$version = $1;
+	if ($version eq "1") {
+	    if ($hash_filename_too) {
+	    	print "*** ERROR: hash_filename_too (-F) isn't possible with trace version 1\n";
+		print "           please rebuild all .hash files and global trexStringFile\n";
+		print "           version 1 file is '$l_loc'\n";
+		exit(127);
+	    }
+	} elsif ($version ne "2") {
+	    print "Unknown version of stringfile $version\n";
+	    exit(127);
+	}
+
+	$line = <FH>;
+
+
+	while (defined $line) {
+	    chop $line;         # remove EOL
+	    if($version eq "1")
+	    {
+		($l_hash, $l_file ,$l_str) = split(/\|\|/, $line);
+	    }
+	    elsif($version eq "2")
+	    {
+		($l_hash, $l_str ,$l_file) = split(/\|\|/, $line);
+	    }
+	    my $newstring = $l_str . "||" . $l_file;
+	    if (exists $hash_strings_array{$l_hash}) {
+		my $hashstr1 = $hash_strings_array{$l_hash};
+		my $hashstr2 = $newstring;
+		if (!$hash_filename_too) {
+		    # hash was made over format string only, remove file name
+    		    $hashstr1 =~ s/\|\|.*$//;
+    		    $hashstr2 = $l_str;
+		}
+		if ($debug) {
+		    print "a_f: compare $hashstr1\n",
+		          "     vs.     $hashstr2\n";
+		}
+		if ($hashstr1 ne $hashstr2)
+		{
+		    print "*** ERROR: HASH Collision! (a_f)\n",
+			  "    Two different strings have the same hash value ($l_hash)\n",
+			  "    String 1: $hash_strings_array{$l_hash}\n",
+			  "    String 2: $newstring\n";
+		    if ($fail_on_collision) {
+		    	exit(1);
+		    }
+		}
+	    }
+	    $hash_strings_array{$l_hash} = $newstring;
+	    $line = <FH>;
+	}
+
+    }
+    else
+    {	# If there is a file then we are dealing with the first
+	# version of trexStringFile so don't look for file name.
+	# these files shouldn't be there anymore. we don't check for collisions here
+	if ($debug) {
+	    print "version 0 stringfile detected: $string_file\n";
+	}
+
+	if(defined $line)
+	{
+	    # there is a file and it doesn't have a header
+	    $version = 0;
+	}
+
+	while (defined $line) {
+	    chop $line;         # remove EOL
+	    ($l_hash,$l_str) = split(/\|\|/, $line);
+	    $hash_strings_array{$l_hash} = $l_str . "||" . "NO FILE";
+	    $line = <FH>;
+	}
+    }
+    $version = $HEAD_VER_FLAG;
+    close(FH);
+}
+
+#=============================================================================
+
+#=============================================================================
+sub hash_strings() {
+
+    my ($hash_val, $l_key, $l_hash, %l_hash_strings);
+    my ($line_feed) = chr(10);
+    my ($l_file_name) = "NO FILENAME";
+    print "\nHashing printf strings.\n\n";
+
+    foreach my $str (@strings) {
+    	my $printf_string;
+	$l_file_name = $str->[0];
+	$printf_string = $str->[1];
+	$l_key = $str->[2];
+	print "printf_string = $printf_string\n" if $debug;
+	$printf_string =~ s/"\s?"//g; #multi line traces will have extra " in them
+	$printf_string =~ s/`/\\`/g; # excape '
+	$printf_string =~ s/\\n/$line_feed/g; # escape \n
+	if ($hash_filename_too) {
+	    $printf_string .= "||" . $l_file_name;
+	}
+
+	# call the hasher.
+	print "$hash_prog \"$printf_string\" $l_key\n" if $debug;
+	$hash_val = `$hash_prog \"$printf_string\" $l_key`;
+	if ($?) {
+		my ($hp_ret, $hp_sig) = ($? >> 8, $? & 127);
+		if ($hp_sig) {
+			print "*** ERROR: $hash_prog died with signal $hp_sig\n";
+		} elsif ($hp_ret) {
+			print "*** ERROR: $hash_prog returned the error $hp_ret\n";
+			if ($hash_val) {
+				print "   error from $hash_prog:\n$hash_val";
+			}
+		}
+		exit(1);
+	}
+	print "printf_string = $printf_string l_key = $l_key hash val = $hash_val\n" if $debug;
+
+    	# undo escaping
+	$printf_string =~ s/$line_feed/\\n/g;
+        $printf_string =~ s/\\`/`/g;
+
+	if (exists $l_hash_strings{$hash_val})
+	{
+	    # hash val was found before. check if it's the same string
+	    # else we have a hash collision
+	    my $l_tmp = $l_hash_strings{$hash_val};
+	    if (!$hash_filename_too) {
+	    	$l_tmp =~ s/\|\|.*$//;
+	    }
+	    if ($l_tmp ne $printf_string)
+	    {
+		print "*** ERROR: HASH Collision! (h_s)\n",
+		      "    Two different strings have the same hash value ($hash_val)\n",
+		      "    String 1: $l_hash_strings{$hash_val}\n",
+		      "    String 2: $printf_string (file $l_file_name)\n";
+		if ($fail_on_collision) {
+		    exit(1);
+		}
+	    }
+	}
+	# this will overwrite an old string with a new one if a collision occurred
+	# but we might want to bail out in this case anyway
+	$printf_string = $printf_string . "||" . $l_file_name;
+	$l_hash_strings{$hash_val} = $printf_string;
+    }	
+    return %l_hash_strings;
+}
+#=============================================================================
+
+#=============================================================================
+sub write_string_file() {
+
+    my (@keys) = ();
+    my ($l_key) = "";
+
+    # Combine the contents of the existing string file with the trace calls
+    # that we have just hashed.
+    print STDOUT "\nCombining Hash strings...\n\n";
+
+    @keys = keys(%hash_strings_array);
+
+    foreach $l_key (@keys) {
+	my $l_tmp = $hash_strings_array{$l_key}; # freshly collected strings
+	if (exists $string_file_array{$l_key})
+	{ # hash exists in list from trexStringFile
+	    my $l_tmp2 = $string_file_array{$l_key};
+    	    if (!$hash_filename_too) {
+		$l_tmp =~ s/\|\|.*$//;
+		$l_tmp2 =~ s/\|\|.*$//;
+	    }
+
+            # Check for hash collisions.
+            if ($l_tmp ne $l_tmp2)
+            {
+		print "*** ERROR: HASH Collision! (w_s_f)\n",
+		      "    Two different strings have the same hash value ($l_key)\n",
+		      "    String 1: $hash_strings_array{$l_key}\n",
+		      "    String 2: $string_file_array{$l_key}\n";
+		if ($fail_on_collision) {
+		    exit(1);
+		}
+		# don't fail, write new one
+            }
+	}
+	if($version > 0)
+	{
+	    # add/replace the new string to the string_file_array.
+	    $string_file_array{$l_key} = $hash_strings_array{$l_key}
+	}
+	else
+	{
+	    # old version so only write out format string (not file name to)
+	    $string_file_array{$l_key} = $l_tmp;
+	}
+    }
+
+    # Write out the updated string file.
+    print STDOUT "\nWriting updated hash||string file...\n\n";
+
+    @keys = sort(keys(%string_file_array));
+
+    open ( FH , ">$string_file")
+      or die ("Cannot open $_: $!, stopped");
+
+    if($version > 0)
+    {
+	print FH "$build\n"; # only print version if newer then version 0
+    }
+    foreach $l_key (@keys) {
+        print FH "$l_key||$string_file_array{$l_key}\n";
+    }
+    close FH;
+}
+#=============================================================================
+
+#=============================================================================
+#  Display command invokation help for this program...
+#=============================================================================
+sub help() {
+    print << "EOF";
+tracehash.pl - create a trace string file from sources or collect tracepp files
+Usage: tracehash.pl [options]
+    General options:
+      -h   - Print this help text.
+      -v   - Be verbose, tell what's going on (debug output)
+    Operation modes
+      -c   - Collect StringFiles created by tracepp and merge.
+      default - Scan source files for trace calls.
+
+Collect mode: tracehash.pl -c [-vFCS] [-d <dir>] [-s <string_file>]
+              tracehash.pl -c [-vFCS] [-f <file>] [-s <string_file>]
+    Collect string files created by tracepp (.hash) from directory tree at
+    <dir> or read them from string file <file> and write to file
+    <string_file>, adding entries already in this file.
+      -f   - String file to read and write/add to <string_file>.
+      -d   - Start of directory tree to scan for .hash files. Default = .
+      -s   - File with trace strings (and hashes) to read from and write to
+             default = ./trexStringFile
+      -F   - hash is calculated over trace string and source file name,
+             otherwise without source file name
+      -C   - report an error if a hash collisions occurs
+      -S   - don't read global FLD-2.2 string file ($BB_STRING_FILE)
+    If tracehash.pl is called in a FipS build sandbox without -d and -f
+    defaults for the sandbox will be used.
+
+Scan mode: tracehash.pl [-vFCS] [-d <dir>] [-s <string_file>] [ccpopts]
+           tracehash.pl [-vFCS] [-f <file>] [-s <string_file>] [cppopts]
+    Scan all files in directory tree <dir> or scan file <file> and write
+    strings to file <string_file>. Strings already in this file will be merged.
+      -f   - Source file to scan for trace entries.
+      -d   - Source directory to scan for trace entries.
+      -s   - File with trace strings (and hashes) to read from and write to.
+             default = ./trexStringFile
+      -F   - hash for string was build from format string + source file name
+      -C   - report an error if hash collisions occur
+      -S   - don't read global FLD-2.2 string file ($BB_STRING_FILE)
+      All other arguments will be passed verbatim to cpp
+EOF
+}
+#=============================================================================
+