summaryrefslogtreecommitdiffstats
path: root/tools/PowerPCtoPPE/ppc-ppe-pcp.py
blob: 80bb3f355d40cd679505b9839fb17eab59623e26 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
#!/usr/bin/python2.6

# \file  ppc-ppe-pcp.py
# \brief PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P)
#
# ---------------------------------------------------------------
# Revision History
# ---------------------------------------------------------------
#   10-07-2014: project completed
#   daviddu     added optimization profile support
#
#   10-06-2014: added fused compare and branch supprot
#   daviddu     added support for combining two ld/st into one double word
#               added support to insert branch upon .p2align directive
#
#   09-27-2014: added subroutine support for mul* and div*
#   daviddu     added virtual double word replacing multiple word support
#
#   09-13-2014: initial version
#   daviddu     only instruction inline replacement is supported
# ---------------------------------------------------------------

P2P_VERSION = "10-07-2014" # version number as last modified date
P2P_PPC_EXT = '.s'         # PPC Assembly filename extension
P2P_PPE_EXT = '.es'        # PPE Assembly filename extension
P2P_PPE_PRE = '__ppe42_'   # PPE Assembly subroutine prefix

import sys
import os
import re
import fnmatch
import fileinput

# ---------------------------------------------------------------
# PPC405 Assembly to PPE42 Assembly Post-Compiler Processor (P2P)
# ---------------------------------------------------------------
#
# Description:
#
#   This post-compiler processor will take PPC405 assembly file(s) produced
#   by powerpc-linux-gcc or hand coded and replace some of the instructions
#   supported by PPC405 ISA but not PPE42 ISA with a set of instructions
#   supported by PPE42 ISA. Outcome of this program is PPE42 assembly file(s).
#
# Assumptions:
#
#   - Input/Output File Name Extension:
#
#     PPC405 assembly file generated by powerpc-linux-gcc has filename extension
#     defined by "P2P_PPC_EXT" global variable, while PPE42 assembly file
#     consumed by PPE Assembler has filename extension defined by "P2P_PPE_EXT"
#     global variable. Both should be consistant with Makefile rules.
#
#   - Registers:
#
#     Instructions in input file should only use registers supported by PPE,
#     that is R0-R10,R13,R28-R31 for GPRs and PPE only SPRs(for example, PPE
#     only has CR0 instead of CR0-7).
#
#     GCC flag -ffixed can be used to enforce compiler to not use certain
#     registers if compiler generates input files to this script. Note certian
#     optimization level, such as -Os, of GGC will still use certain registers
#     regardless if -ffixed flag is used. Furthermore, compiler should not
#     generate multiple word instructions(lmw/stmw) that covers the registers
#     forbidden to use by -ffixed flag.
#
#     Example of using -ffixed flag in this case:
#       -ffixed-r11 -ffixed-r12 -ffixed-r14 -ffixed-r15 \
#       -ffixed-r16 -ffixed-r17 -ffixed-r18 -ffixed-r19 \
#       -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
#       -ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
#       -ffixed-cr1 -ffixed-cr2 -ffixed-cr3 -ffixed-cr4 \
#       -ffixed-cr5 -ffixed-cr6 -ffixed-cr7
#
#   - Instructions:
#
#     Instructions in input file should only use PowerPC 405 instructions
#     covered by "PowerPC 405-S Embedded Processor Core" manual; however,
#     there is an assumption on certain catalog of instructions will never be
#     generated by power-linux-gcc compiler(or disabled by compiler switch).
#
#     Also, compiler should generate extended mnemonics instead of its base
#     instruction when extended mnemonics fits.
#
#     Via -falign-labels=n and -O2 or -O3, the compiler inserts .p2align
#     directive to help instruction alignment for best cache performance.
#
#   - Assembly Syntax:
#
#     There should be only white spaces before instruction mnemonics, in
#     another word, all inline comments should be put behind the instrution.
#
#     "Label:" and an instruction should not be on the same line, hand coded
#     assembly should be consistant to this same compiler output format.
#
# Depandences:
#
#   In order to utilize assembly subroutines implemented for supporting
#   missing instructions of multiplication and division in PPE42 ISA, a given
#   library(with assembly files and header) must be compiled and linked with
#   any source code that use this program to generate PPE binary.
#
# Usage:
#
#   ./<ThisScript> -f <a filename with path>  --- process single file
#   ./<ThisScript> -d <a directory path>      --- process multiple files
#   ./<ThisScript> -h                         --- detailed usage on other flags
#   ./<ThisScript> -v                         --- version of the program
#   ./<ThisScript> -d <a directory path> -s   --- perform result profiling
#
# Functions:
#
#   p2p_main          - main function, parse options and arguments
#     p2p_onefile     - processing single PPC Assembly File
#       p2p_combine   - processing two PPC instructions in input file
#       p2p_replace   - processing single PPC instruction in input file
#
# Data Structures:
#
#   ReplaceRules = { ppc_op : [rule, ppe_op] }
#   CombineRules = { ppc_op : [rule, ppe_op] }
#   FuseBranches = [ list of branches qualified for fusing with compares ]
#
#------------------------------------------------------------------------------
# ReplaceRules: [ 'r', 'u', 'a', 'h', 's', 'o', 'd', 'm' ]
#-------|-------------------------|--------------------------------------------
# Rule  | Example (PPC to PPE)    | Description
#-------|-------------------------|--------------------------------------------
# 'r'   | [ppc] ppc_op RT, RA, RB | simply 'replace' ppc opcode with ppe opcode
#  0  0 | [ppe] ppe_op RT, RA, RB | while the operands, if any, stay the same
#-------|-------------------------|--------------------------------------------
# 'ru'  | [ppc] ppc_op RT, RA, RB | on top of 'r' rule, emulate 'update' by
#       | [ppe] ppe_op RT, RA, RB | appending "add" instruction after the
# +1 +1 |       add    RA, RA, RB | original instruction to update RA
#-------|-------------------------|--------------------------------------------
# 'ra'  | [ppc] ppc_op RT, D(RA)  | on top of 'r' rule, emulate 'algebraic' by
#       | [ppe] ppe_op RT, D(RA)  | appending "extsh" instruction after the
# +1 +1 |       extsh  RT, RT     | original instruction to sign-extend RT
#-------|-------------------------|--------------------------------------------
# 'rau' | [ppc] ppc_op RT, RA, RB | combining rule 'r', 'a', and 'u' above.
#       | [ppe] ppe_op RT, RA, RB | Note: there is no requirement on whether
#       |       add    RA, RA, RB | rule 'a' or rule 'u' should be applied
# +2 +2 |       extsh  RT, RT     | first, the outcome should be the same.
#-------|-------------------------|--------------------------------------------
# 'h'   | [ppc] ppc_op RT, RA, RB | rule of 'halfword' multiplication
#       | [ppe] srwi   RA, 16     | emulate multiply "high halfword" with
#       |       srwi   RB, 16     | multiply "low halfword" by shifting
# +2 +2 |       ppe_op RT, RA, RB | the operands first
#-------|-------------------------|--------------------------------------------
# 's'   | [ppc] ppc_op RT, RA, RB | emulate word multiply and divide
#       | [ppe] stwu   R1, -24(R1)| instructions with calling 'subroutines'
#       |       stvd   D3, 8(R1)  | implemented in ppe42_mul.S and ppe42_div.S
#       |       mflr   R3         |
#       |       stw    R3, 16(R1) | Calling Conventions:(SS = Stack Size)
#       |       mr     R3, RA     |
#       |       mr     R4, RB     |  Caller is responsible for
#       |       bl     target     |   1) create stack frame
#       |       mr     RT, R3     |   2) save off R3 and R4 to stack
#       |       lwz    R3, 16(R1) |   3) save off link register to stack
#       |       mtlr   R3         |   4) put operands into R3, R4 before branch
#       |       lvd    D3, 8(R1)  |   5) put result in R3 to RT after branch
#       |       lwz    R1, 0(R1)  |   6) restore link register from stack
#       |                         |   7) restore R3 and R4 from stack
#       | [sub] stwu  R1, -SS(R1) |   8) remove the stack frame
#       |       <save volatile>   |
#       |       (subroutine body) |  Callee is responsible for
#       |       <restore volatile>|   1) create and remove stack frame
#       |       lwz    R1, 0(R1)  |   2) save and restore volatile registers
# +X +Y |       blr               |   3) subroutine will not touch LR again
#-------|-------------------------|--------------------------------------------
# 'o'   | [ppc] ppc_op[o] RT ...  | rule of 'o' form for overflow
#       | [ppe] ppe_op    RT ...  | Note: "mullwo", "divwo" and "divwuo" each
#       |       <inst specific>   | has unique setting for XER[OV,SO] if OE = 1
#-------|-------------------------|--------------------------------------------
# 'd'   | [ppc] ppc_op[.] RT ...  | rule of '.' or 'dot' form for recording
#       | [ppe] ppe_op    RT ...  | using "cmpwli" to emulate the [.] form
#       |       cmpwli    RT, 0   | to the instruction result and CR0 fields
#-------|-------------------------|--------------------------------------------
# 'm'   | [ppc] ppc_op RT, D(RA)  | emulate PowerPC load/store multiple word
#       | [ppe] ppe_op DT, D(RA)  | instructions with PPE specific
#       |       (doubleword ld/st)| 'virtual doubleword' instructions if target
#       |       or                | address is 8-byte aligned; otherwise, using
#       |       (singleword ld/st)| single word instructions instead or mix both
#       |       or                | Note only RA == R1/R2/R13 will always meet
# -1 -1 |       (single & double) | alignment requirement of virtual doubleword
#-------|-------------------------|--------------------------------------------
#
ReplaceRules = {#ppc_op   : [ rule | ppe_op ]
                #----------------------------
                #synchronization instructions
                'eieio'   : [ 'r',   'sync' ],
                'isync'   : [ 'r',   'nop'  ],
                'icbi'    : [ 'r',   'nop'  ],
                'icbt'    : [ 'r',   'nop'  ],
                'mtcr'    : [ 'r',   'mtcr0'],
                #load/store with [u/x/a] form
                'stbux'   : [ 'ru',  'stbx' ],
                'sthux'   : [ 'ru',  'sthx' ],
                'stwux'   : [ 'ru',  'stwx' ],
                'lbzux'   : [ 'ru',  'lbzx' ],
                'lhzux'   : [ 'ru',  'lhzx' ],
                'lwzux'   : [ 'ru',  'lwzx' ],
                'lha'     : [ 'ra',  'lhz'  ],
                'lhau'    : [ 'ra',  'lhzu' ],
                'lhax'    : [ 'ra',  'lhzx' ],
                'lhaux'   : [ 'rau', 'lhzx' ],
                #multiply/divide with [./o] form
                'mulhhw'  : [ 'h',   'mullhw'   ],
                'mulhhw.' : [ 'h',   'mullhw.'  ],
                'mulhhwu' : [ 'h',   'mullhwu'  ],
                'mulhhwu.': [ 'h',   'mullhwu.' ],
                'mulhw'   : [ 's',   'mulhw'    ],
                'mulhw.'  : [ 'sd',  'mulhw'    ],
                'mulhwu'  : [ 's',   'mulhwu'   ],
                'mulhwu.' : [ 'sd',  'mulhwu'   ],
                'mullw'   : [ 's',   'mullw'    ],
                'mullw.'  : [ 'sd',  'mullw'    ],
                'mullwo'  : [ 'so',  'mullw'    ],
                'mullwo.' : [ 'sod', 'mullw'    ],
                'mulli'   : [ 's',   'mullw'    ],
                'divw'    : [ 's',   'divw'     ],
                'divw.'   : [ 'sd',  'divw'     ],
                'divwo'   : [ 'so',  'divw'     ],
                'divwo.'  : [ 'sod', 'divw'     ],
                'divwu'   : [ 's',   'divwu'    ],
                'divwu.'  : [ 'sd',  'divwu'    ],
                'divwuo'  : [ 'so',  'divwu'    ],
                'divwuo.' : [ 'sod', 'divwu'    ],
                #load/store multiple word(Rx-R31)
                'lmw'     : [ 'm',   'lvd,lwz'  ],
                'stmw'    : [ 'm',   'stvd,stw' ]}


#------------------------------------------------------------------------------
# CombineRules: [ 'f', 'v', 'l' ]
#-------|-------------------------|--------------------------------------------
# 'f'   | [ppc] ppc_op(cmp*)      | rule for 'fusing' adjacent pair of compare
#       |       ppc_op(b*)        | and branch(PPE specific). Note: only
# -1  0 | [ppe] ppe_op(cmp*b*)    | extended mnemonics of compares are handled
#-------|-------------------------|--------------------------------------------
# 'v'   | [ppc] ppc_op(lwz/stw)   | rule for combining double word aligned
#       |       ppc_op(lwz/stw)   | load/store pairs into signle 'virtual'
# -1 -1 | [ppe] ppe_op(lvd/stvd)  | double word instructions(PPE specific)
#-------|-------------------------|--------------------------------------------
# 'l'   | [ppc]        .p2align   | compiler will insert ".p2align" directive to
#       |       Label:            | help instructions align from label to label.
#       | [ppe]        b Label    | then assembler will insert "nop" on .p2align
#       |              .p2align   | directive. a "branch" to skip the nops will
#  0 -1 |       Label:            | improve the performance while still aligned
#-------|-------------------------|--------------------------------------------
#
CombineRules = {#ppc_op : [ rule | ppe_cp ]
                #--------------------------
                #8byte aligned loads/stores
                'lwz'    : [ 'v', 'lvd'   ],
                'stw'    : [ 'v', 'stvd'  ],
                #compares fusable to branch
                'cmplw'  : [ 'f', 'cmplw' ],
                'cmpw'   : [ 'f', 'cmpw'  ],
                'cmpwi'  : [ 'f', 'cmpwi' ],
                #'.p2align' before 'label:'
                '.p2align' : [ 'l', 'b'   ]}


#------------------------------------------------------------------------------
# FuseBranches: [ Branches can be fused into cmp*b* ]
#------------------------------------------------------------------------------
#
FuseBranches = ['bc', 'bcl',
                'blt', 'bltl', 'ble', 'blel',
                'bgt', 'bgtl', 'bge', 'bgel',
                'beq', 'beql', 'bne', 'bnel']


# -----------------------------------------------------------------------------
# p2p_replace:
#   process each line(filtered) in the assembly file to replace PPC instruction
#   to supported PPE instruction(s)
#
# Arguments:
#   string:  line   - assembly file line to be replaced
#            ppc_op - detected PPC opcode that needs to be replaced
# Return:
#   boolean: True   - Return without Error
#            False  - Error Detected
# Variables:
#   string:  inst, rule, ppe_op, newline, temp_op
#            double_inst, single_inst, virtual_reg, base_offset, address_reg
# Subroutine:
#   NONE
# -----------------------------------------------------------------------------
def p2p_replace(line, ppc_op):

  # parse PPC instruction as in I or D form with opcode and upto 3 operands:
  # possible forms: opcode
  #                 opcode RT, RA, RB
  #                 opcode RT, RA, IM
  #                 opcode RT, D(RA)
  # inst.group(0) : <whole instruction>
  # inst.group(1) : " "
  # inst.group(2) : Opcode(.)
  # inst.group(3) : " "
  # inst.group(4) : GPR
  # inst.group(5) : " , "
  # inst.group(6) : GPR or Immediate(D)
  # inst.group(7) : " , " or " ( "
  # inst.group(8) : GPR or Immediate(IM)
  # inst.group(9) : " ) "
  inst = re.search(r"([\s]*)([a-zA-Z\.]+)([\s]*)([%r0-9]*)([\s,]*)([%r0-9\-]*)([\s,\(]*)([%r0-9\-]*)([\s\)]*)", line)

  # detect an error
  if inst is None or ppc_op != inst.group(2):
    return False

  # look up rule to process the instruction
  rule, ppe_op = ReplaceRules[ppc_op]

  # if enabled, put a mark in the output file
  if P2P_COMMENT: print "#P2P(%s):" % rule + line,

  # start cases of replacing PPC instruction with PPE instruction(s)
  #---r------------------------------------------------------------------------
  if 'r' in rule:

    # replace opcode under rule 'r' and rewrite the instruction
    newline = line.replace(ppc_op, ppe_op)
    print newline,

    # do not continue if there is 'a' or 'u' rule to process on this line
    if 'u' not in rule and 'a' not in rule:
      return True

  #---u------------------------------------------------------------------------
  if 'u' in rule:

    # construct and write "add RA, RA, RB" under rule 'u'
    newline = inst.group(1) + 'add' + inst.group(3) + inst.group(6) +\
              inst.group(5) + inst.group(6) + inst.group(7) + inst.group(8)
    print newline

    # do not continue if there is 'a' rule to process on this line
    if 'a' not in rule:
      return True

  #---a------------------------------------------------------------------------
  if 'a' in rule:

    # construct and write "extsh RT, RT" under rule 'a'
    newline = inst.group(1) + 'extsh' + inst.group(3) + inst.group(4) +\
              inst.group(5) + inst.group(4)
    print newline
    return True

  #---h------------------------------------------------------------------------
  if 'h' in rule:

    # construct and write "srwi RA, 16" under rule 'h'
    newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(6) +\
              inst.group(5) + "16"
    print newline

    # construct and write "srwi RB, 16" under rule 'h'
    newline = inst.group(1) + 'srwi' + inst.group(3) + inst.group(8) +\
              inst.group(5) + "16"
    print newline

    # replace opcode in original instruction and write under rule 'h'
    newline = line.replace(ppc_op, ppe_op)
    print newline
    return True

  #---s------------------------------------------------------------------------
  if 's' in rule:

    # construct branch target label
    ppe_op = P2P_PPE_PRE + ppe_op

    # construct and write "stwu R1, -24(R1)" to create the stack frame
    newline = inst.group(1) + 'stwu' + inst.group(3) + '1' +\
              inst.group(5) + '-24(1)'
    print newline

    # construct and write "stvd D3, 8(R1)" to save off R3 and R4
    newline = inst.group(1) + 'stvd' + inst.group(3) + '3' +\
              inst.group(5) + '8(1)'
    print newline

    # construct and write "mflr R3" to fetch the current link address
    newline = inst.group(1) + 'mflr' + inst.group(3) + '3'
    print newline

    # construct and write "stw R3, 16(R1)" to save off current LR to stack
    newline = inst.group(1) + 'stw' + inst.group(3) + '3' +\
              inst.group(5) + '16(1)'
    print newline

    # construct and write "mr R3, RA" to copy the operand RA to R3
    newline = inst.group(1) + 'mr' + inst.group(3) + '3' +\
              inst.group(5) + inst.group(6)
    print newline

    # if 'mulli' is detected, using 'li' instead of 'mr' for second operand
    if ppc_op == 'mulli':
      temp_op = 'li'
    else:
      temp_op = 'mr'

    # construct and write "mr R4, RB" to copy the operand RB to R4
    # or in 'mulli' case, "li R4, IM" to copy the operand IM to R4
    newline = inst.group(1) + temp_op + inst.group(3) + '4' +\
              inst.group(5) + inst.group(8)
    print newline

    # using branch and link(bl) to branch to subroutine
    # later subroutine can branch back using branch link register(blr)
    # Assumption: the subroutine will be responsible for saving
    # and restoring all the volatilo registers used in the subroutine
    newline = inst.group(1) + 'bl' + inst.group(3) + ppe_op
    print newline

    # construct and write "mr RT, R3" to copy the result in R3 to RT
    newline = inst.group(1) + 'mr' + inst.group(3) + inst.group(4) +\
              inst.group(5) + '3'
    print newline

    # construct and write "lwz R3, 16(R1)" to fetch the LR value from stack
    newline = inst.group(1) + 'lwz' + inst.group(3) + '3' +\
              inst.group(5) + '16(1)'
    print newline

    # construct and write "mtlr R3" to restore the link register
    newline = inst.group(1) + 'mtlr' + inst.group(3) + '3'
    print newline

    # construct and write "lvd D3, 8(R1)" to restore R3 and R4
    newline = inst.group(1) + 'lvd' + inst.group(3) + '3' +\
              inst.group(5) + '8(1)'
    print newline

    # construct and write "lwz R1, 0(R1)" to destroy the stack frame
    newline = inst.group(1) + 'lwz' + inst.group(3) + '1' +\
              inst.group(5) + '0(1)'
    print newline
    return True

  #---m------------------------------------------------------------------------
  if 'm' in rule:

    # parse instruction information
    # note register can be in either "N" form or "%rN" form
    double_inst,single_inst = ppe_op.split(',')
    virtual_reg = int(re.search(r'\d+', inst.group(4)).group())
    base_offset = int(inst.group(6))
    address_reg = int(re.search(r'\d+', inst.group(8)).group())

    # consider illegal if multiple word instruction covers non-exist registers
    if virtual_reg < 28:
      return False

    # loop until and include GPR31
    while virtual_reg < 32:
      # page 270 of 405 manual, only do this for load instructions
      if virtual_reg == address_reg != 31 and 'l' in single_inst:
         base_offset += 4
         virtual_reg += 1
         continue

      # if other GPRs being address_reg there is no guarantee for alignment
      if address_reg not in [1,2,13]:
        # construct and write "lwz/stw RT, D(RA)" for every registers
        newline = inst.group(1) + single_inst + inst.group(3) +\
                  str(virtual_reg) + inst.group(5) + str(base_offset) +\
                  inst.group(7) + inst.group(8) + inst.group(9)
        print newline
        base_offset += 4
        virtual_reg += 1
      else:
        # if base_offset is also aligned with base address in the address_reg
        # & there are at least two more registers to perform doubleword ld/st
        if not (base_offset % 8) and (virtual_reg + 1) < 32:
          # construct and write "lvd/stvd DR, D(RA)" under rule 'v'
          newline = inst.group(1) + double_inst + inst.group(3) +\
                    str(virtual_reg) + inst.group(5) + str(base_offset) +\
                    inst.group(7) + inst.group(8) + inst.group(9)
          print newline
          base_offset += 8
          virtual_reg += 2
        # either only one register left or base_offset isnt aligned
        else:
          # construct and write "lwz/stwz SR, D(RA)" under rule 'v'
          newline = inst.group(1) + single_inst + inst.group(3) +\
                    str(virtual_reg) + inst.group(5) + str(base_offset) +\
                    inst.group(7) + inst.group(8) + inst.group(9)
          print newline
          base_offset += 4
          virtual_reg += 1
      # end of this if-else
    # end of while loop
    return True
  # end of last if


# -----------------------------------------------------------------------------
# p2p_combine:
#   process each two lines(filtered) in the assembly file to combine two PPC
#   instructions to one PPE specific instruction for better performance
#
# Arguments:
#   string:  first_line   - 1st assembly file line to be combined
#            second_line  - 2nd assembly file line to be combined
#            first_op     - 1st detected PPC opcode that needs to be combined
#            second_op    - 2nd detected PPC opcode that needs to be combined
# Return:
#   boolean: done         - True: return without error
#                         - False: return with error detected
#            match        - True: eventually matched and combined
#                         - False: fail to qualify to be combined
# Variables:
#   string:  first_inst, second_inst, rule, ppe_op, newline
#            bo, px_bix, compare_operands, target
# Subroutine:
#   NONE
# -----------------------------------------------------------------------------
def p2p_combine(first_line, second_line, first_op, second_op):

  global P2P_SPACE; global P2P_CYCLE
  global P2P_COMPARE_BRANCH; global P2P_VIRTUAL_DOUBLE

  # parse PPC instruction as in I or B or D form with opcode and upto 3 operands
  # possible form : [1st] opcode [CR,] RA, RB
  #                 [1st] opcode [CR,] RA, IM
  #                 [1st] opcode RT, D(RA)
  #                 [2nd] opcode [CR,] Target
  #                 [2nd] opcode BO, BI, Target
  #                 [2nd] opcode RT, D(RA)
  # inst.group(0) : <whole instruction>
  # inst.group(1) : " "
  # inst.group(2) : Opcode(+/-/.)
  # inst.group(3) : " "
  # inst.group(4) : GPR or CR or BO or Target
  # inst.group(5) : " , "
  # inst.group(6) : GPR or IM or D(label+offset@sda21) or BI or Target
  # inst.group(7) : " , " or " ( "
  # inst.group(8) : GPR or IM or Target
  # inst.group(9) : " ) "
  first_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", first_line)
  second_inst = re.search(r"([\s]*)([a-zA-Z\+\-\.]+)([\s]*)([%a-zA-Z0-9_\.]*)([\s,]*)([%a-zA-Z0-9_@\.\-\+]*)([\s,\(]*)([%a-zA-Z0-9_\.\-]*)([\)]*)", second_line)

  # detect an error
  if first_inst is None or second_inst is None or \
     first_op != first_inst.group(2) or second_op not in second_inst.group(2):
    return False,False

  # look up rule to process the instruction
  rule, ppe_op = CombineRules[first_op]

  # start cases of combining two PPC instructions into PPE instruction
  #---f------------------------------------------------------------------------
  if 'f' in rule:

    if not P2P_COMPARE_BRANCH:
      return True,False 

    # fusing compare and branch
    ppe_op = ppe_op + second_op

    # for cmpwib* case, there is a difference between
    # cmpwi SI operand as signed 16-bit integer and then got sign extended and
    # cmpwib* UIX operand as unsigned 5-bit integer and then got zero extended
    # thus, will not fuse the two if the integer operand is not in range(0,31)
    # if cr field is omitted:
    if ',' in first_inst.group(7):
      # cr field must be cr0 or 0, error out if it is something else:
      if '0' not in first_inst.group(4):
        return False, True
      if 'i' in first_op and (int(first_inst.group(8)) < 0 or \
                              int(first_inst.group(8)) > 31):
          return True,False
      else:
        compare_operands = first_inst.group(6) + first_inst.group(7) + \
                           first_inst.group(8) + ', '
    else:
      if 'i' in first_op and (int(first_inst.group(6)) < 0 or \
                              int(first_inst.group(6)) > 31):
          return True,False
      else:
        compare_operands = first_inst.group(4) + first_inst.group(5) + \
                           first_inst.group(6) + ', '

    # if 'bc' then extract BO,BI fields and convert to PX,BIX fields of 'cmp*b*'
    #   Note CTR decreament and branch always cases are not handled, and
    #   python bin() outputs "0bXXXXX" form so bo[2] is actually BO bit 0
    # else there is no need for PX,BIX fields for extended mnemonics
    if 'bc' in second_op:
      bo = bin(int(second_inst.group(4)))

      # do not handle CRT decreament or branch always cases
      if bo[4] == 0 or bo[2] == 1:
        return True,False

      # PX = BO bit 1, BIX = BI = [0,1,2,3] due to only CR0 is used
      px_bix = bo[3] + second_inst.group(5) + \
               second_inst.group(6) + second_inst.group(7)
      target = second_inst.group(8)
    else:
      px_bix = ""
      # if cr field is omitted:
      if ',' in second_inst.group(5):
        # cr field must be cr0 or 0, error out if it is something else:
        if '0' not in second_inst.group(4):
          return False, True
        target = second_inst.group(6)
      else:
        target = second_inst.group(4)

    # profile: space--, cycle is the same because 1+2==3
    P2P_SPACE -= 1

    # if enabled, put a mark in the output file
    if P2P_COMMENT:
      print "#P2P(%s):" % rule + first_line,
      print "#P2P(%s):" % rule + second_line,

    # construct and write "cmp*b* [PX,BIX,] RA,RB/IM,target" under 'f' rule
    newline = first_inst.group(1) + ppe_op + first_inst.group(3) +\
              px_bix + compare_operands + target
    print newline
    return True,True


  #---v------------------------------------------------------------------------
  if 'v' in rule:

    if not P2P_VIRTUAL_DOUBLE:
      return True,False

    global P2P_VDW_SDA

    # Combinable Conditions:
    # 1) base address registers must be the same and one of R1/R2/R13
    # 2) address offsets have to be 8-bytes continuous and aligned
    # 3) target or source registers must qualify to be double word register
    # Note: label+offset@sda21 format is coverted to target r13 after link
    #       assume data go in and out r13 or SDA space is always 8-byte aligned
    #       here we only check the continous of address offset and register pair
    if first_inst.group(8) == second_inst.group(8) in ['1','2','13'] or \
       ("@sda21" in first_inst.group(6) and \
        "@sda21" in second_inst.group(6) and \
        P2P_VDW_SDA):

      if ((first_inst.group(6).replace("@sda21","") + "+4" == \
           second_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \
          ((first_inst.group(6).isdigit() and \
           not int(first_inst.group(6)) % 8) and \
           int(first_inst.group(6)) + 4 == int(second_inst.group(6)))) and \
         (int(first_inst.group(4)) + 1 == int(second_inst.group(4)) or \
          (int(first_inst.group(4)) == 31 and \
          int(second_inst.group(4)) == 0)):
        newline = first_line.replace(first_op, ppe_op)
      elif ((second_inst.group(6).replace("@sda21","") + "+4" == \
             first_inst.group(6).replace("@sda21","") and P2P_VDW_SDA) or \
            ((second_inst.group(6).isdigit() and \
             not int(second_inst.group(6)) % 8) and \
             int(second_inst.group(6)) + 4 == int(first_inst.group(6)))) and \
           (int(second_inst.group(4)) + 1 == int(first_inst.group(4)) or \
            (int(second_inst.group(4)) == 31 and \
            int(first_inst.group(4)) == 0)):
        newline = second_line.replace(second_op, ppe_op)
      else:
        return True,False

      # profile: space--, cycle--(same delay but 1 less from issue)
      P2P_SPACE -= 1; P2P_CYCLE -= 1

      # if enabled, put a mark in the output file
      if P2P_COMMENT:
        print "#P2P(%s):" % rule + first_line,
        print "#P2P(%s):" % rule + second_line,

      print newline,
      return True,True
    else:
      return True,False


# -----------------------------------------------------------------------------
# p2p_onefile:
#   process single PPC assembly file to convert it into PPE assembly file
#   also filter out non-instruction lines before calling the subroutine
#
# Arguments:
#   string: ppcFileName
# Return:
#   boolean: done - True if file processing completed without error
#                 - False if file processing failed due to an error
# Variables:
#   boolean: match, done
#   string:  ppeFileName, line, ppc_op, pre_op, pre_line, section, label
#   integer: line_num, first_label_ln, second_label_ln, misalign
# Subroutine:
#   p2p_combine
#   p2p_replace
# -----------------------------------------------------------------------------
def p2p_onefile(ppcFileName):

  global P2P_SPACE; P2P_SPACE = 0 # profile count
  global P2P_CYCLE; P2P_CYCLE = 0 # profile count

  if P2P_VERBOSE : print "Translate PPC assembly: " + ppcFileName

  # new PPE assembly file is renamed as <filename>.s
  ppeFileName = ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT)
  os.rename(ppcFileName, ppeFileName)

  # initialize storage variables for previous line that needs to be remembered
  pre_line = ""
  pre_op = ""

  # use inline file editing, back up original PPC assembly file as <filename>.S
  for line in fileinput.input(ppeFileName, inplace=1, backup='.405'):

    # in case of "mtmsr 0; isync"
    line = line.replace('isync','nop')

    # skip blank line
    if not line.strip():
      if pre_line:
        print pre_line,
        pre_line = ""
      print line,
      continue

    # skip comments line
    if re.search("^[\s]*(//|#)", line):
      if pre_line:
        print pre_line,
        pre_line = ""
      print line,
      continue

    # skip .section code except .p2align and label:
    section = re.search("^[\s]*(\.[0-9a-zA-Z_]+)", line)
    if section is not None and ':' not in line and \
       section.group(1) != '.p2align':
      if pre_line:
        print pre_line,
        pre_line = ""
      print line,
      continue

    # apply specical 'l' rule in CombineRules for '.p2align' before a 'label:'
    label = re.search("^[\s]*[\.0-9a-zA-Z_]+[:]+", line)
    if label is not None:
      if pre_line and pre_op == '.p2align':
        second_label_ln = fileinput.lineno()
        misalign = 8 - (second_label_ln - first_label_ln - 2) % 8
        if misalign in [3,4,5,6,7]:
          # profile: same space, but save cycles, branch penalty is 2
          P2P_CYCLE -= misalign - 2
          if P2P_COMMENT: print "#P2P(l):"
          print '\tb ' + label.group(0).split(':')[0]
        print pre_line,
        pre_line = ""
      first_label_ln = fileinput.lineno()
      if pre_line:
        print pre_line,
        pre_line = ""
      print line,
      continue

    # extract opcode field from line
    ppc_op = line.split()[0]
    done,match = False,False

    # detect the 2nd possible combinable instruction
    if pre_line and P2P_COMBINE:
      # ignore +/- signs for branch prediction
      if '+' in ppc_op or '-' in ppc_op:
        ppc_op = ppc_op[:-1]
      if 'cmp' in pre_op and ppc_op in FuseBranches or \
         'cmp' not in pre_op and ppc_op == pre_op:
        done,match = p2p_combine(pre_line, line, pre_op, ppc_op)
        if not match:
          print pre_line,
      else:
        print pre_line,
        done,match = True,False
      pre_line = ""

    # detect the 1st possible combinable instruction
    if not pre_line and not match and P2P_COMBINE:
      if ppc_op in CombineRules.keys():
        pre_op = ppc_op
        pre_line = line
        done,match = True,True
      else:
        done,match = True,False

    # defect possible replacable instruction
    if not match:
      if ppc_op in ReplaceRules.keys() and P2P_REPLACE:
        done = p2p_replace(line, ppc_op)
      else:
        print line,
        done = True

    # if instruction process is not done due to error
    if not done:
      line_num = fileinput.lineno()
      break

  # close the output file and restore the original input file
  fileinput.close()
  os.rename(ppeFileName+'.405', ppcFileName)

  # in case last line of the file qualified to be a pre_line and was not printed
  if pre_line:
    f = open(ppeFileName, 'a')
    f.write(pre_line)
    f.close()

  # print error debug message
  if not done:
    print "Error: target instruction detected at line [%d]:" % line_num
    print "       " + line
    print "       but fail to recognize instruction format."
    # terminate Makefile or execution if an error is detected
    sys.exit(1)

  if P2P_COMMENT and P2P_PROFILE:
    f = open(ppeFileName, 'a')
    f.write("#P2P: space(%d) cycle(%d)" % (P2P_SPACE,P2P_CYCLE))
    f.close()

  if P2P_VERBOSE:
    print "Generated PPE assembly: " + ppeFileName
    if P2P_PROFILE:
      print "Optimization Profiling: " + str(P2P_SPACE*4) + " bytes, " +\
                                         str(P2P_CYCLE) + " cycles."


# -----------------------------------------------------------------------------
# p2p_profile
#   profiling how much performance and code size are saved by optimization
#
# Arguments:
#   string: ppcFileName
# Return:
#   list: [space, cycle]
# Variables:
#   string: line, profile
# Subroutine:
#   None
# -----------------------------------------------------------------------------
def p2p_profile(ppcFileName):
  f = open(ppcFileName.replace(P2P_PPC_EXT, P2P_PPE_EXT), 'r')
  for line in f:
    pass
  f.close()
  profile = re.search(r"^\#P2P: space\(([0-9\-]+)\) cycle\(([0-9\-]+)\)", line)
  if profile is not None:
    return [int(profile.group(1)), int(profile.group(2))]
  else:
    return [0,0]

# -----------------------------------------------------------------------------
# p2p_main:
#   main of this script
#     print usage info
#     parse options and arguments
#     process one file or a directory of files
# -----------------------------------------------------------------------------
def p2p_main():

  # command-line option parsing
  from optparse import OptionParser
  usage  = "usage: %prog [options]"
  version= "%prog v." + P2P_VERSION
  parser = OptionParser(usage=usage, version=version)
  parser.add_option("-d", "--directory", metavar="PATH", dest="ppcPath",
                  help="process all files in a directory given by PATH")
  parser.add_option("-f", "--filename",  metavar="FILE", dest="ppcFile",
                  help="process single file(with path in the filename)")
  parser.add_option("-p", "--parallel",
                  action="store_true",  dest="parallel", default=False,
                  help="processing all files in parallel processes")
  parser.add_option("-s", "--statistics",
                  action="store_true",  dest="profile", default=False,
                  help="optimization profiling, require comment in outputs")
  parser.add_option("-c", "--combine-only",
                  action="store_false", dest="replace",  default=True,
                  help="enable only combine function by disabling replace")
  parser.add_option("-r", "--replace-only",
                  action="store_false", dest="combine",  default=True,
                  help="enable only replace function by disabling combine")
  parser.add_option("-b", "--compare branch disable",
                  action="store_false", dest="compare_branch",  default=True,
                  help="only disabling fused compare branch function")
  parser.add_option("-v", "--virtual double disable",
                  action="store_false", dest="virtual_double",  default=True,
                  help="only disabling fused virtual double function")
  parser.add_option("-e", "--eabi",
                  action="store_true", dest="vdw_sda",  default=False,
                  help="enable virtual double word fusion targeting sda")
  parser.add_option("-n", "--no-comment",
                  action="store_false", dest="comment",  default=True,
                  help="don't leave comment mark in output file")
  parser.add_option("-q", "--quiet",
                  action="store_false", dest="verbose",  default=True,
                  help="don't print status messages to stdout")
  (options, args) = parser.parse_args()
  # global program output verbose switch
  global P2P_VERBOSE; P2P_VERBOSE = options.verbose
  # leave a comment mark in output files
  global P2P_COMMENT; P2P_COMMENT = options.comment
  # space/performance profiling function
  global P2P_PROFILE; P2P_PROFILE = options.profile
  # enable instruction replace functions
  global P2P_REPLACE; P2P_REPLACE = options.replace
  # enable instruction combine functions
  global P2P_COMBINE; P2P_COMBINE = options.combine
  # enable virtual double word fusion targeting sda
  global P2P_VDW_SDA; P2P_VDW_SDA = options.vdw_sda
  # enable only fused compare and branch function
  global P2P_COMPARE_BRANCH; P2P_COMPARE_BRANCH = options.compare_branch
   # enable only combined virtual double function
  global P2P_VIRTUAL_DOUBLE; P2P_VIRTUAL_DOUBLE = options.virtual_double
 
  if P2P_VERBOSE :
    print "PPC405 Assembly to PPE42 Assembly Post-Compiler Proceesor (P2P)"
    print "Version: " + P2P_VERSION

  # single file processing
  if options.ppcFile:

    if P2P_VERBOSE :
      print "Processing signle file: " + options.ppcFile

    p2p_onefile(options.ppcFile)

  # multiple files processing
  if options.ppcPath:

    if P2P_VERBOSE :
      print "Accessing all files at: " + options.ppcPath
      print "*Parallel Process Mode: " + ("Off", "On")[options.parallel]

    if options.profile:
      bytes = 0; cycles = 0

    fileList = []
    for root, subdirs, files in os.walk(options.ppcPath):
      for file in fnmatch.filter(files, '*'+P2P_PPC_EXT):
        if options.parallel :
          fileList.append(os.path.join(root, file))
        else:
          if options.profile:
            space,cycle = p2p_profile(os.path.join(root, file))
            bytes += space*4
            cycles += cycle
          else:
            p2p_onefile(os.path.join(root, file))

    if options.profile:
      print "Optimization Profiling: " + str(bytes) + " bytes, " +\
                                         str(cycles) + " cycles."

    # parallel processing mode
    if options.parallel:
      from multiprocessing import Pool
      p = Pool()
      p.map(p2p_onefile, fileList)
      p.close()
      p.join()

  if P2P_VERBOSE : print "Done"


# -----------------------------------------------------------------------------
# python main
if __name__ == '__main__':
  p2p_main()

OpenPOWER on IntegriCloud