summaryrefslogtreecommitdiffstats
path: root/libpore/pore_inline_assembler.c
blob: 5747c6f4df7be73a868367383556af94dff126ea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/* $Source: src/usr/hwpf/hwp/build_winkle_images/p8_slw_build/pore_inline_assembler.c $ */
/*                                                                        */
/* OpenPOWER HostBoot Project                                             */
/*                                                                        */
/* Contributors Listed Below - COPYRIGHT 2012,2014                        */
/* [+] International Business Machines Corp.                              */
/*                                                                        */
/*                                                                        */
/* Licensed under the Apache License, Version 2.0 (the "License");        */
/* you may not use this file except in compliance with the License.       */
/* You may obtain a copy of the License at                                */
/*                                                                        */
/*     http://www.apache.org/licenses/LICENSE-2.0                         */
/*                                                                        */
/* Unless required by applicable law or agreed to in writing, software    */
/* distributed under the License is distributed on an "AS IS" BASIS,      */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or        */
/* implied. See the License for the specific language governing           */
/* permissions and limitations under the License.                         */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */
// $Id: pore_inline_assembler.c,v 1.22 2013/12/11 00:11:14 bcbrock Exp $
// $Source: /afs/awd/projects/eclipz/KnowledgeBase/.cvsroot/eclipz/chips/p8/working/procedures/pore_inline_assembler.c,v $
//-----------------------------------------------------------------------------
// *! (C) Copyright International Business Machines Corp. 2013
// *! All Rights Reserved -- Property of IBM
// *! ***  ***
//-----------------------------------------------------------------------------

// ** WARNING : This file is maintained as part of the OCC firmware.  Do **
// ** not edit this file in the PMX area or the hardware procedure area  **
// ** as any changes will be lost.                                       **

/// \file pore_inline_assembler.c
/// \brief Inline PGAS assembler for PgP/Stage1 PORE
///
/// \page pore_inline_assembler PORE Inline Assembler and Disassembler
///
/// Several procedures targeting the PORE engine require inline assembly and
/// disassembly of PORE code, that is, they require that PORE instructions be
/// assembled/disassembled directly into/from a host memory buffer. This page
/// describes these facilities.  The APIs described here are implemented in
/// the files pore_inline.h, pore_inline_assembler.c and
/// pore_inline_disassembler.c.  Both the inline assembelr and disassembler
/// conform to the PGAS assembly format for PORE.
///
/// Both inline assembly and disassembly make use of a PoreInlineContext
/// structure. This structure represents the state of a memory area being
/// targeted for inline assembly and disassembly.  The context is initialized
/// with the pore_inline_context_create() API, and a pointer to an instance of
/// this structure appears as the first argument of all assembler/disassembler
/// APIs. As assembly/disassembly progresses the PoreInlineContext keeps
/// track of how much host memory area has been filled by assembled code or
/// scanned by the disassebler.
///
/// Assembler/disassembler APIs are predicates that return 0 for success and a
/// non-zero error code for failure.  In the event of failure, the error code
/// (a small integer) is also stored in the \a error field of the context
/// structure.  String forms of the error codes are also available in the
/// global array pore_inline_error_strings[].
///
/// The assembler always produces PORE code in the PORE-native big-endian
/// format.  Likewise, the diassembler assumes the host memory to be
/// disassembled contains PORE code in big-endian format.
///
/// \section Initialization
///
/// Before invoking inline assembly/disassembly APIs, an instance of a
/// PoreInlineContext structure must be initialized using the
/// pore_inline_context_create() API.  For assembly, the context describes the
/// host memory buffer that will contain the assembled code.  For disassembly,
/// the context describes the host memory area that contains the code to be
/// disassembled. Full documentation is available for
/// pore_inline_context_create(), including documentation for options that
/// control assembly and disassembly.  The implementation also provides a
/// 'copy operator' for the context, pore_inline_context_copy().
///
/// An example of initializing a context for inline assembly with parity
/// checking appears below.
///
/// \code 
///
/// PoreInlineContext ctx;
/// uint32_t buf[BUFSIZE];
///
/// rc = pore_inline_context_create(&ctx, buf, BUFSIZE * 4, 0,
///                                 PORE_INLINE_CHECK_PARITY);
/// if (rc) . . . Handle Error
///
/// \endcode
///
/// Applications that reuse the same memory buffer for assembling and
/// processing multiple PORE programs can 'reset' the context between uses by
/// using the pore_inline_context_reset() API.  pore_inline_context_reset()
/// resets the location counter and memory extent to their initial (creation)
/// values, and the context error code is cleared.  Any options specified at
/// creation remain as they were.
///
/// \section Assembler
///
/// The inline assembler implements each PORE/PGAS instruction as individual
/// function calls.  The APIs are consistently named \c pore_\<OPCODE\>, where
/// \c \<OPCODE\> is a PGAS mnemonic in upper case.  The arguments to each
/// opcode appear in the same order that they appear in the source-level
/// assembler, with appropriate C-language types. The supported opcode APIs
/// are defined in pore_inline.h
/// 
/// Since the PORE instruction APIs are effectivly predicates, linear code
/// sequences are easily assembled using the C-language logical OR construct.
/// Any non-0 return code will immediately break the sequence and set the
/// expression value to 1.  The failure code can then be recovered from the \a
/// error field of the context.  This coding technique is illustrated in the
/// following example of assembling a memory-memory copy sequence.
///
/// \code 
///
/// PoreInlineContext ctx;
/// int error;
///
/// . . . // Initialize context
///
/// error =
///     pore_LD(&ctx, D0, 0, A0) ||
///     pore_STD(&ctx, D0, 0, A1);
///
/// if (error) <. . . Handle error based on ctx.error>
///
/// \endcode
///
/// The above example generates code equivalent to
///
/// \code
///
///         ld      D0, 0, A0
///         std     D0, 0, A1
///
/// \endcode
///
/// Again, if an error were to occur during assembly, inline assembly would
/// stop (and the logical OR would terminate) at the point of failure. In
/// particular, the inline assembler will never allow assembled code to exceed
/// the bounds of the memory area defined by the initial call of
/// pore_inline_context_create() that defines the assembler memory space.
///
///
/// \subsection Register Names and Other Mnemonics
///
/// The header file pore_inline.h defines macros for the register mnemonics.
///
/// - D0, D1 : 64-bit data registers
/// - A0, A1 : 32-bit address registers
/// - P0, P1 : 7-bit Pervasive chiplet id registers
/// - CTR : 24-bit ounter register
/// - PC : 48-bit Program Counter
/// - ETR : 64-bit EXE-Trigger Register (Low-order 32 bits are writable)
/// - EMR : The Error Mask Register
/// - IFR : ID/Flags Register
/// - SPRG0 : 32-bit Special-Purpose General Register 0
///
/// Mnemonics for the condition code bits are also defined by pore_inline.h
/// using the PGAS mnemonics.
///
///
/// \subsection Assembling Branches
///
/// Opcodes that implement relative branches require that the branch target be
/// specified as a <em> location counter </em>. Once initialized, the current
/// location counter is available as the \a lc field of the PoreInlineContext
/// object controlling the assembly.  The \a lc field is the only field
/// (besides the error code held in the \a error field) that application code
/// should ever reference. The inline assembler also provides a typedef
/// PoreInlineLocation to use for location counters, as well as the macro
/// PORE_LOCATION() to define a location variable inline with the code flow.
/// 
/// \subsubsection Backward Branches
///
/// Backward branches are straightforward.  For example, the memory-memory
/// copy example from earlier can be converted into a loop as shown below.  The
/// \a loop_target variable is initialized with the location counter of the
/// first instruction of the loop.  The final instruction of the loop then
/// branches back to the \a loop_target.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineLocation loop_target = 0; // See ** below the example
/// int error;
///
/// . . . // Initialize context
///
/// error =
///     PORE_LOCATION(&ctx, loop_target) ||
///     pore_LD(&ctx, D0, 0, A0)         ||
///     pore_STD(&ctx, D0, 0, A1)      	 ||
///     pore_ADDS(&ctx, A0, A0, 8)     	 ||
///     pore_ADDS(&ctx, A1, A1, 8)     	 ||
///     pore_LOOP(&ctx, loop_target);
///
/// if (error) <. . . Handle error based on ctx.error>
///
/// \endcode
///
/// The above inline assembler sequence is equivalent to the PGAS code
/// sequence:
///
/// \code
///
/// loop_target:
///        ld      D0, 0, A0
///        std     D0, 0, A1
///        adds    A0, A0, 8
///        adds    A1, A1, 8
///        loop    loop_target
///
/// \endcode
///
/// ** Location counters used as loop targets may need to be initialized,
/// otherwise the compiler may issue a warning that the variable "may be used
/// uninitialized", although in well-written code this would never happen.
///
///
/// \subsubsection Forward Branches
///
/// Forward branches are more complex.  Since the target location counter is
/// not known until the target has been assembled, the inline assembler
/// provides the API pore_inline_branch_fixup() to fix up forward branches
/// once the actual target is known.  This is illustrated in the simple code
/// sequence below, where an instruction is conditionally skipped.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineLocation source = 0, target = 0;
/// int error, rc;
///
/// . . . // Initialize context
///
/// error = 
///     PORE_LOCATION(&ctx, source)  ||
///     pore_BRANZ(&ctx, D0, source) ||
///     pore_ADDS(&ctx, D1, D1, 1)   ||
///     PORE_LOCATION(&ctx, target)  ||
///     pore_LD(&ctx, D0, 0, A0);
///
/// if (error) <. . . Handle assembly error based on ctx->error>
/// rc = pore_inline_branch_fixup(&ctx, source, target);
/// if (rc) <. . . Handle branch fixup error>
///
/// \endcode
///
/// In the above code, the branch instruction is initially assembled as a
/// branch-to-self - the recommended idiom for forward branch source
/// instructions.  Once the entire sequence has been assembled,
/// pore_inline_branch_fixup() reassembles the \c source instruction as a
/// branch to the \c target instruction. The above instruction sequence is
/// equivalent to the PGAS code below:
///
/// \code
///
/// source:
///         branz   D0, target
///         adds    D1, D1, 1
/// target:
///         ld      D0, 0, A0
///
/// \endcode
///
///
/// \subsubsection Absolute Branches
///
/// It is unlikely that a typical application of the PORE inline assembler
/// would ever need to include an absolute branch, since the branch target in
/// this case is a fixed absolute address that must be known at assembly
/// time. However the inline assembler does provide the pore_BRAIA() API for
/// this purpose.  This opcode requires a 16-bit address space constant and a
/// 32-bit absoulte address (offset) within the memory space to specify the
/// branch. 
///
///
/// \section Disassembly
///
/// Inline disassembly is implemented by a single API,
/// pore_inline_disassemble(). The idea is similar to assembly: A host memory
/// context containing PORE code (or data) is described by a PoreInlineContext
/// structure.  Each call of pore_inline_disassemble() disassembles the next
/// instruction (or datum) in the context into a PoreInlineDisassembly
/// structure provided by the caller.  The disassembly object contains both
/// binary and string forms of the disassembled instruction (or data). The
/// next call of pore_inline_disassemble() proceses the next instruction (or
/// datum) and so on.
///
/// \subsection Text (Code) Disassembly
///
/// In the example below the inline disassembler is used to completely
/// disassemble a memory area containing text (code) to \a stdout until an
/// error occurs, assumed to be either due to disassembling the entire memory
/// area or finding an illegal instruction.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineDisassembly dis;
///
/// . . . // Initialize context
///
/// while (pore_inline_disassemble(&ctx, &dis) == 0) {
///     printf("%s\n", dis.s);
/// }
///
/// \endcode
///
/// To illustrate binary disassembly, the following example uses the
/// disassembler to search for a RET statement in a block of PORE code, in
/// order to extend an inline subroutine with more code.  Note that the field
/// \a dis->ctx contains the context that existed at the time the instruction
/// was assembled.  By copying this context back into the global context,
/// inline assembly will continue by overwriting the RET with new
/// instructions. If the copy had \e not been done, then newly assembled code
/// would have \e followed the RET.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineDisassembly dis;
///
/// . . . // Initialize context
///
/// while ((pore_inline_disassemble(&ctx, &dis) == 0) &&
///        (dis.opcode != PORE_OPCODE_RET));
/// if (ctx.error != 0) {
///     . . . // Handle error
/// } else {
///     pore_inline_context_copy(&ctx, &dis.ctx);
///     . . . // Continue assembly by overwriting the RET
/// }
///
/// \endcode
///
/// A special type of context reset is available to simplify applications that
/// need to disassemble a just-assembled code sequence, e.g. for debugging.
/// pore_inline_context_reset_excursion() resets the context such that the
/// effective size of the context only covers the just-assembled code,
/// allowing a dissassembly loop to cleanly stop once all code has been
/// disassembled. The use is illustrated below - note that the disassembly
/// stops on the expected error code PORE_INLINE_NO_MEMORY once the
/// (effective) end of the buffer is reached.
///
/// \code
///
/// PoreInlineContext ctx;
/// PoreInlineDisassembly dis;
///
/// . . . // Initialize context
/// . . . // Assemble code into context
///
/// pore_inline_context_reset_excursion(&ctx);
///
/// while (pore_inline_disassemble(&ctx, &dis) == 0) {
///     printf("%s\n", dis.s);
/// }
/// if (ctx.error != PORE_INLINE_NO_MEMORY) {
///     . . . // Handle error
/// }
///
/// \endcode
///
/// \subsection Data Disassembly
///
/// If the PoreInlineContext is created with the flag
/// PORE_INLINE_DISASSEMBLE_DATA, then the context is disassembled as data. If
/// the PoreInlineContext is created with the flag
/// PORE_INLINE_DISASSEMBLE_UNKNOWN then putative data embedded in a text
/// section will be disassembled as data.  For complete information see the
/// documentation for pore_inline_disassemble().


#define __PORE_INLINE_ASSEMBLER_C__
#include "pore_inline.h"
#undef __PORE_INLINE_ASSEMBLER_C__

// Definitions of PORE register classes.  These are predicates that return
// 1 if the register is a member of the class, else 0.

PORE_STATIC int
pore_data(int reg)
{
    return 
	(reg == D0) ||
	(reg == D1);
}


PORE_STATIC int
pore_address(int reg)
{
    return
	(reg == A0) ||
	(reg == A1);
}


PORE_STATIC int
pore_pervasive_chiplet_id(int reg)
{
    return
	(reg == P0) ||
	(reg == P1);
}


PORE_STATIC int
pore_branch_compare_data(int reg)
{
    return 
	(reg == D0) ||
	(reg == D1) ||
	(reg == CTR);
}


PORE_STATIC int
pore_ls_destination(int reg)
{
    return
        (reg == D0) ||
        (reg == D1) ||
        (reg == A0) ||
        (reg == A1) ||
        (reg == P0) ||
        (reg == P1) ||
        (reg == CTR);
}


PORE_STATIC int
pore_li_destination(int reg)
{
    return 
        (reg == D0)   ||
        (reg == D1)   ||
        (reg == A0)   ||
        (reg == A1)   ||
        (reg == P0)   ||
        (reg == P1)   ||
        (reg == CTR);
}


PORE_STATIC int
pore_mr_source(int reg)
{
    return
        (reg == D0)    ||
        (reg == D1)    ||
        (reg == A0)    ||
        (reg == A1)    ||
        (reg == P0)    ||
        (reg == P1)    ||
        (reg == CTR)   ||
        (reg == PC)    ||
        (reg == ETR)   ||
        (reg == SPRG0) ||
        (reg == IFR)   ||
        (reg == EMR);
}

PORE_STATIC int
pore_mr_destination(int reg)
{
    return
        (reg == D0)   ||
        (reg == D1)   ||
        (reg == A0)   ||
        (reg == A1)   ||
        (reg == P0)   ||
        (reg == P1)   ||
        (reg == CTR)  ||
        (reg == PC)   ||
        (reg == SPRG0)||
        (reg == EMR);
}                    


/// Portable store of a 32-bit integer in big-endian format
///
/// The address \a p to receive the data is in the form of an unsigned long.

void
pore_inline_be32(unsigned long p, uint32_t x)
{
    uint8_t *p8 = (uint8_t *)p;
    uint8_t *px = (uint8_t *)(&x);
    int i, j;

    if (!PORE_BIG_ENDIAN) {
	for (i = 0, j = 3; i < 4; i++, j--) {
	    p8[i] = px[j];
	}
    } else {
	*((uint32_t *)p) = x;
    }
}
	
	
/// Portable store of a 64-bit integer in big-endian format
///
/// The address \a p to receive the data is in the form of an unsigned long.

void
pore_inline_be64(unsigned long p, uint64_t x)
{
    uint8_t *p8 = (uint8_t *)p;
    uint8_t *px = (uint8_t *)(&x);
    int i, j;

    if (!PORE_BIG_ENDIAN) {
	for (i = 0, j = 7; i < 8; i++, j--) {
	    p8[i] = px[j];
	}
    } else {
	*((uint64_t *)p) = x;
    }
}


// Portable load of a 32-bit integer in big-endian format

uint32_t
pore_inline_host32(unsigned long p)
{
    uint32_t x;
    uint8_t *p8 = (uint8_t *)p;
    uint8_t *px = (uint8_t *)(&x);
    int i, j;

    if (!PORE_BIG_ENDIAN) {
	for (i = 0, j = 3; i < 4; i++, j--) {
	    px[j] = p8[i];
	}
    } else {
	x = *((uint32_t *)p);
    }

    return x;
}
	
	
// Portable load of a 64-bit integer in big-endian format

uint64_t
pore_inline_host64(unsigned long p)
{
    uint64_t x;
    uint8_t *p8 = (uint8_t *)p;
    uint8_t *px = (uint8_t *)(&x);
    int i, j;

    if (!PORE_BIG_ENDIAN) {
	for (i = 0, j = 7; i < 8; i++, j--) {
	    px[j] = p8[i];
	}
    } else {
	x = *((uint64_t *)p);
    }

    return x;
}


// 32-bit population count
//
// This is a well-known divide-and-conquer algorithm.  The idea is to compute
// sums of adjacent bit segments in parallel, in place.

PORE_STATIC int
pore_popcount32(uint32_t x)
{
    uint32_t m1 = 0x55555555;
    uint32_t m2 = 0x33333333;
    uint32_t m4 = 0x0f0f0f0f;
    x -= (x >> 1) & m1;		   /* Sum pairs of bits */
    x = (x & m2) + ((x >> 2) & m2);/* Sum 4-bit segments */
    x = (x + (x >> 4)) & m4;	   /* Sum 8-bit segments */
    x += x >>  8;		   /* Sum 16-bit segments */
    return (x + (x >> 16)) & 0x3f; /* Final sum */
}


// 64-bit population count

PORE_STATIC int
pore_popcount64(uint64_t x)
{
    return pore_popcount32(x & 0xffffffff) + pore_popcount32(x >> 32);
}


// Compute the parity of a PORE instruction as 0 or 1

int
pore_inline_parity(uint32_t instruction, uint64_t imd64)
{
    return (pore_popcount32(instruction) + pore_popcount64(imd64)) % 2;
}
	
	
/// Reset a PORE inline assembler context to its creation state
///
/// \param ctx A pointer to an initialized (and likely 'used')
/// PoreInlineContext object.
///
/// This API resets a PoreInlineContext object to it's \e creation state, that
/// is, the state it was in after the call of pore_inline_context_create().
/// This API is designed for applications that reuse a memory buffer to
/// assemble multiple PORE code sequences.  After each sequence has been fully
/// assembled and processed, calling pore_inline_context_reset() sets the
/// context back as it was when the context was initially created so that the
/// memory area can be reused.  In particular, this API resets the location
/// counter and memory extent to their initial values, and the error code is
/// cleared.  Any options specified at creation remain as they were.
///
/// For a slightly different type of reset, see
/// pore_inline_context_reset_excursion().  

void
pore_inline_context_reset(PoreInlineContext *ctx)
{
    ctx->lc_address = ctx->memory;
    ctx->remaining = ctx->size;
    ctx->lc = ctx->original_lc;
    ctx->error = 0;
}



/// Reset a PORE inline assembler context to a special state for disassembly
///
/// \param ctx A pointer to an initialized (and almost certainly 'used')
/// PoreInlineContext object.
///
/// This API resets a PoreInlineContext object to it's \e creation state, that
/// is, the state it was in after the call of pore_inline_context_create(), \e
/// except that the effective size of the memory area has been reduced to the
/// size that was actually used during assembly.  This API is designed for
/// applications that assemble into a memory buffer and then want to easily
/// disassemble the code (e.g., for debugging).  After a code sequence has
/// been assembled, calling pore_inline_context_reset_excursion() sets the
/// context back as it was when the context was initially created, but with a
/// (typically) shorter effective length, so that the disassembly will cleanly
/// stop once the entire sequence has been disassembled. Once disassembled,
/// the buffer can be fully resued after a subsequent call of
/// pore_inline_context_reset().  In particular, this API resets the location
/// counter to its initial value, clears the error code, and sets the
/// effective size of the context to the amount of memory currently used.  Any
/// options specified at creation remain as they were.
///
/// For a full context reset see pore_inline_context_reset(). For an example
/// see the \b Disassembly section of \ref pore_inline_assembler.

void
pore_inline_context_reset_excursion(PoreInlineContext *ctx)
{
    ctx->lc_address = ctx->memory;
    ctx->remaining = ctx->size - ctx->remaining;
    ctx->lc = ctx->original_lc;
    ctx->error = 0;
}


/// Create a PORE inline assembler context
///
/// \param ctx A pointer to a PoreInlineContext object to be initialized
/// and used for inline assembly. or disassembly.
///
/// \param memory A pointer to the host memory area to receive the assembled
/// code, or contain the code to disassemble. In general the inline assembler
/// will expect this memory area to be 4-byte aligned. This pointer may be
/// NULL (0) only if the associated \a size is also 0.
///
/// \param size The size (in bytes) of the host memory area. The inline
/// assembler will generate the PORE_INLINE_NO_MEMORY error if an attempt is
/// made to assemble an instruction that would overflow the buffer, or
/// disassemble past the end of the buffer. A 0 size is valid.
///
/// \param lc The initial, bytewise, target location counter for the assembled
/// or disassembled code. This paramater will normally be initialized to 0 for
/// assembling relocatable programs. The parameter would only need to be
/// specified as non-0 for special cases, such as creating a context for
/// disassembly.
///
/// \param options Option flags.  Option flags are OR-ed together to create
/// the final set of options. Valid options are
///
/// - PORE_INLINE_GENERATE_PARITY : Generate the proper parity bit for each
/// instruction during assembly.
///
/// - PORE_INLINE_CHECK_PARITY : Check for correct instruction parity during
/// disassembly.
///
/// - PORE_INLINE_LISTING_MODE : Generate disassembly strings in the form of a
/// listing that contains location counters and encoded instructions as well
/// as their diassembly.  By default the disassembly strings do not contain
/// this information and can be fed back in as source code to a PORE
/// assembler.
///
/// - PORE_INLINE_DISASSEMBLE_DATA : generate disassembly assuming that the
/// context contains data rather than text. Normally data is disassembled as
/// .long directives, however if the context is unaligned or of an odd length
/// then .byte directives may be used as well.  This option can be used in
/// conjunction with PORE_INLINE_LISTING_MODE.
///
/// - PORE_INLINE_8_BYTE_DATA : generate data disassembly using 8-byte values
/// rather than the default 4-byte values.  Normally data is disassembled as
/// .quad directives under this option, however if the context is unaligned or
/// of an odd length then .long and .byte directives may be used as well.
/// This option can be used in conjunction with PORE_INLINE_LISTING_MODE.
///
/// A PoreInlineContext describes a memory area and assembler context for
/// inline assembly and disassembly.  Assembly/disassembly begins at the host
/// memory location and virtual location counter described in the parameters.
/// As instructions are assembled/disassembled the PoreInlineContext keeps
/// track of where in the host memory and virtual PORE memory areas to place
/// new instructions during assembly, or from where to fetch the next
/// instruction to disassemble.
///
/// \retval 0 Success
///
/// \retval PORE_INLINE_INVALID_PARAMETER Either the \a context pointer is
/// NULL (0), the \a memory pointer is NULL (0) with a non-0 size, or the \a
/// options include invalid options.  The error code is also stored as the
/// value of ctx->error, and in the event of an error the ctx->size field is
/// set to 0, effectively preventing the context from being used.

int
pore_inline_context_create(PoreInlineContext *ctx,
			   void *memory, size_t size, 
			   PoreInlineLocation lc, int options)
{
    int rc;

    int valid_options = 
	PORE_INLINE_GENERATE_PARITY  |
	PORE_INLINE_CHECK_PARITY     |
	PORE_INLINE_LISTING_MODE     |
        PORE_INLINE_DISASSEMBLE_DATA |
        PORE_INLINE_8_BYTE_DATA      |
        PORE_INLINE_DISASSEMBLE_UNKNOWN;

    if ((ctx == NULL) || ((memory == NULL) && (size != 0)) ||
	((options & ~valid_options) != 0)) {
	rc = PORE_INLINE_INVALID_PARAMETER;
    } else {
	rc = 0;
	ctx->memory = (unsigned long)memory;
	ctx->size = size;
	ctx->original_lc = lc;
	ctx->options = options;
	pore_inline_context_reset(ctx);
    }

    if (ctx != NULL) {
        ctx->error = rc;
        if (rc) {
            ctx->size = 0;      /* Effectively prevents using the ctx */
        }
    }

    return rc;
}
	
	
/// Copy a PORE inline assembler context
///
/// \param dest A pointer to a PoreInlineContext object to be initialized
/// as a copy of the \a src context.
///
/// \param src A pointer to a PoreInlineContext object to be used as the
/// source of the copy.
///
/// This API copies one PoreInlineContext structure to another.  An example
/// use appears in \ref pore_inline_assembler in the section discussing
/// disassembly.

void
pore_inline_context_copy(PoreInlineContext *dest, PoreInlineContext *src)
{
    *dest = *src;
}


// 'Bump' a context forward by a given number of bytes.  This an internal API
// and the bump is always known to be legal.

void
pore_inline_context_bump(PoreInlineContext *ctx, size_t bytes)
{
    ctx->remaining -= bytes;
    ctx->lc += bytes;
    ctx->lc_address += bytes;
}    


// Allocate space in the inline assembler context
//
// Allocation is specified and implemented in bytes.  Both the physical
// memory and the virtual LC are required to be 4-byte aligned. The allocator
// returns a pointer to the memory area, or 0 if allocation fails.
// Allocation failure sets the context error code to either
// PORE_INLINE_NO_MEMORY or PORE_INLINE_ALIGNMENT_ERROR.

PORE_STATIC unsigned long
pore_inline_allocate(PoreInlineContext *ctx, size_t bytes)
{
    unsigned long p = 0;

    if (((ctx->lc % 4) != 0) || 
	((ctx->lc_address % 4) != 0)) {
	ctx->error = PORE_INLINE_ALIGNMENT_ERROR;

    } else if (bytes > ctx->remaining) {
	ctx->error = PORE_INLINE_NO_MEMORY;

    } else {
	p = ctx->lc_address;
	pore_inline_context_bump(ctx, bytes);
    }
    return p;
}


// Assemble a 1-word instruction
//
// The opcode and operand are assumed to be legal, having come from
// abstractions that check their arguments.  This call may fail with
// PORE_INLINE_NO_MEMORY if there is no more room in the memory buffer. A
// non-zero return indicates failure.

int
pore_inline_instruction1(PoreInlineContext *ctx, int opcode, uint32_t operand)
{
    uint32_t instruction;
    unsigned long p;

    p = pore_inline_allocate(ctx, 4);
    if (p != 0) {

	instruction = (opcode << 25) | operand;
	if (ctx->options & PORE_INLINE_GENERATE_PARITY) {
	    instruction |= (1 - pore_inline_parity(instruction, 0)) << 24;
	}

	pore_inline_be32(p, instruction);
	ctx->error = 0;
    }
    return p == 0;
}
	    

// Assemble a 3-word instruction
//
// The opcode and operand are assumed to be legal, having come from
// abstractions that check their arguments.  This call may fail with
// PORE_INLINE_NO_MEMORY if there is no more room in the memory buffer.  A
// non-zero return indicates failure.

int
pore_inline_instruction3(PoreInlineContext *ctx, int opcode, uint32_t operand,
			 uint64_t immediate)
{
    uint32_t instruction;
    unsigned long p;

    p = pore_inline_allocate(ctx, 12);
    if (p != 0) {

	instruction = (opcode << 25) | operand;
	if (ctx->options & PORE_INLINE_GENERATE_PARITY) {
	    instruction |= (1 - pore_inline_parity(instruction, immediate)) << 24;
	}

	pore_inline_be32(p, instruction);
	pore_inline_be64(p + 4, immediate);
	ctx->error = 0;
    }
    return p == 0;
}


// Assemble WAIT
//
// The cycle count must be an unsigned 24-bit immediate otherwise the error
// PORE_INLINE_UINT24_REQUIRED is signalled.  PGAS requires that HALT be used
// if the intention is to halt 

int
pore_WAITS(PoreInlineContext *ctx, uint32_t cycles)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_WAITS;

    if (cycles == 0) {
        ctx->error = PORE_INLINE_USE_HALT;
    } else if ((cycles & 0xffffff) != cycles) {
	ctx->error = PORE_INLINE_UINT24_REQUIRED;
    } else {
	operand = cycles;
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble HOOKI
//
// The hook index must be an unsigned 24-bit immediate otherwise the error
// PORE_INLINE_UINT24_REQUIRED is signalled.

int
pore_HOOKI(PoreInlineContext *ctx, uint32_t index, uint64_t imm)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_HOOKI;

    if ((index & 0xffffff) != index) {
	ctx->error = PORE_INLINE_UINT24_REQUIRED;
    } else {
	operand = index;
	pore_inline_instruction3(ctx, opcode, operand, imm);
    }
    return ctx->error;
}


// Assemble BRA, BSR and LOOP
//
// The branch target here is a bytewise location counter.  The target must be
// 4-byte aligned and must be within the legal signed 24-bit word offset of
// the current LC. Unaligned targets cause PORE_INLINE_ALIGNMENT_ERROR.
// Unreachable targets cause PORE_INLINE_UNREACHABLE_TARGET.

int
pore_inline_bra(PoreInlineContext *ctx, int opcode, PoreInlineLocation target)
{
    int32_t offset;
    uint32_t operand;

    if (target % 4) {
	ctx->error = PORE_INLINE_ALIGNMENT_ERROR;
    } else {
	offset = (int32_t)(target - ctx->lc) / 4;
	if ((offset >= (1 << 23)) ||
	    (offset < -(1 << 23))) {
	    ctx->error = PORE_INLINE_UNREACHABLE_TARGET;
	} else {
	    operand = offset & 0xffffff;
	    pore_inline_instruction1(ctx, opcode, operand);
	}
    }
    return ctx->error;
}
	    

// Assemble BRAZ and BRANZ
//
// The branch target here is a bytewise location counter.  The target must be
// 4-byte aligned and must be within the legal signed 20-bit word offset of
// the current LC. Unaligned targets cause PORE_INLINE_ALIGNMENT_ERROR.
// Unreachable targets cause PORE_INLINE_UNREACHABLE_TARGET.  Illegal
// operands cause PORE_INLINE_ILLEGAL_REGISTER.

int
pore_inline_brac(PoreInlineContext *ctx, int opcode, int reg, 
		 PoreInlineLocation target)
{
    int32_t offset;
    uint32_t operand;

    if (target % 4) {
	ctx->error = PORE_INLINE_ALIGNMENT_ERROR;
    } else if (!pore_branch_compare_data(reg)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	offset = (int32_t)(target - ctx->lc) / 4;
	if ((offset >= (1 << 20)) ||
	    (offset < -(1 << 20))) {
	    ctx->error = PORE_INLINE_UNREACHABLE_TARGET;
	} else {
	    operand = (offset & 0xfffff) | (reg << 20);
	    pore_inline_instruction1(ctx, opcode, operand);
	}
    }
    return ctx->error;
}


// Assemble CMPIBRAEQ, CMPIBRANE, CMPIBSREQ
//
// The branch target here is a bytewise location counter.  The target must be
// 4-byte aligned and must be within the legal signed 24-bit word offset of
// the current LC. Unaligned targets cause PORE_INLINE_ALIGNMENT_ERROR.
// Unreachable targets cause PORE_INLINE_UNREACHABLE_TARGET. Illegal
// operands cause PORE_INLINE_ILLEGAL_REGISTER.

int
pore_inline_cmpibra(PoreInlineContext *ctx, int opcode, int reg,
                    PoreInlineLocation target, uint64_t imm)
{
    int32_t offset;
    uint32_t operand;

    if (target % 4) {
	ctx->error = PORE_INLINE_ALIGNMENT_ERROR;
    } else if (reg != D0) {
        ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	offset = (int32_t)(target - ctx->lc) / 4;
	if ((offset >= (1 << 23)) ||
	    (offset < -(1 << 23))) {
	    ctx->error = PORE_INLINE_UNREACHABLE_TARGET;
	} else {
	    operand = offset & 0xffffff;
	    pore_inline_instruction3(ctx, opcode, operand, imm);
	}
    }
    return ctx->error;
}


// Assemble BRAD and BSRD
//
// Illegal operands cause PORE_INLINE_ILLEGAL_REGISTER.

int
pore_inline_brad(PoreInlineContext *ctx, int opcode, int reg)
{
    uint32_t operand;

    if (!pore_data(reg)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = reg << 20;
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble ANDI, ORI, XORI
//
// Source and destination must be of class 'data' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.

int
pore_inline_ilogic(PoreInlineContext *ctx, int opcode, 
		   int dest, int src, uint64_t imm)
{
    uint32_t operand;

    if (!pore_data(dest) || !pore_data(src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = (dest << 20) | (src << 16);
	pore_inline_instruction3(ctx, opcode, operand, imm);
    }
    return ctx->error;
}


// Assemble AND, OR, XOR, ADD, SUB
//
// Destination must be of class 'data' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.  src1 and src2 must be D0,
// D1 respectively otherwise the PORE_INLINE_ILLEGAL_REGISTER error is
// generated.

int
pore_inline_alurr(PoreInlineContext *ctx, 
                  int opcode, int dest, int src1, int src2)
{
    uint32_t operand;

    if (!pore_data(dest) || (src1 != D0) || (src2 != D1)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = (dest << 20);
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble ADDS and SUBS
//
// Destination must be of class 'ls_destination' and must be equal to source,
// otherwise the PORE_INLINE_ILLEGAL_REGISTER error is generated.  If the
// immediate is not a signed 16-bit immediate then the
// PORE_INLINE_INT16_REQUIRED error is generated.

int
pore_inline_adds(PoreInlineContext *ctx, 
                 int opcode, int dest, int src, int imm)
{
    uint32_t operand;

    if (!pore_ls_destination(dest) || (dest != src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	if ((imm >= (1 << 15)) ||
	    (imm < -(1 << 15))) {
	    ctx->error = PORE_INLINE_INT16_REQUIRED;
	} else {
	    operand = (dest << 20) | (imm & 0xffff);
	    pore_inline_instruction1(ctx, opcode, operand);
	}
    }
    return ctx->error;
}


// Assemble NEG
//
// Source and destination must be of class 'data' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.

int
pore_NEG(PoreInlineContext *ctx, int dest, int src)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_NEG;

    if (!pore_data(dest) || !pore_data(src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = (dest << 20) | (src << 16);
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble MR
//
// The source must be an 'mr_source' and the destination must be an
// 'mr_destination' otherwise the PORE_INLINE_ILLEGAL_REGISTER error is
// generated.

int
pore_MR(PoreInlineContext *ctx, int dest, int src)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_MR;

    if (!pore_mr_destination(dest) || !pore_mr_source(src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = (dest << 20) | (src << 16);
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}

	    

// Assemble ROLS
//
// Source and destination must be of class 'data' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.  Illegal shifts yield the
// PORE_INLINE_ILLEGAL_ROTATE error.

int
pore_ROLS(PoreInlineContext *ctx, int dest, int src, int imm)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_ROLS;
    
    if (!pore_data(dest) || !pore_data(src)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else if ((imm != 1) &&
	       (imm != 4) &&
	       (imm != 8) &&
	       (imm != 16) &&
	       (imm != 32)) {
	ctx->error = PORE_INLINE_ILLEGAL_ROTATE;
    } else {
	operand = (dest << 20) | (src << 16) | imm;
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble LS
//
// The destination must be an 'ls_destination' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.  If the immediate is not
// a signed 20-bit immediate then the PORE_INLINE_INT20_REQUIRED error is
// generated. 

int
pore_LS(PoreInlineContext *ctx, int dest, int imm)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_LS;
    
    if (!pore_ls_destination(dest)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else if ((imm >= (1 << 19)) ||
               (imm < -(1 << 19))) {
        ctx->error = PORE_INLINE_INT20_REQUIRED;
    } else {
	operand = (dest << 20) | (imm & 0xfffff);
	pore_inline_instruction1(ctx, opcode, operand);
    }
    return ctx->error;
}


// Assemble LI
//
// The destination must be an 'li destination' otherwise the
// PORE_INLINE_ILLEGAL_REGISTER error is generated.

int
pore_LI(PoreInlineContext *ctx, int dest, uint64_t imm)
{
    uint32_t operand;
    int opcode = PGAS_OPCODE_LI;
    
    if (!pore_li_destination(dest)) {
	ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    } else {
	operand = dest << 20;
	pore_inline_instruction3(ctx, opcode, operand, imm);
    }
    return ctx->error;
}


// BSI and BCI are normally redacted as instructions due to HW274735

// LD, LDANDI, STD, STI, BSI, BCI

PORE_STATIC void
pervasive_ima24(PoreInlineContext *ctx, 
                int opcode, uint32_t offset, int base, uint64_t imm)
{
    uint32_t operand;

    if ((offset & 0x80f00000) != 0) {
        ctx->error = PORE_INLINE_ILLEGAL_SCOM_ADDRESS;
    } else {
        operand = ((base % 2) << 22) | (offset & 0xfffff);
        switch (opcode) {
        case PGAS_OPCODE_LD0:
        case PGAS_OPCODE_LD1:
        case PGAS_OPCODE_STD0:
        case PGAS_OPCODE_STD1:
            pore_inline_instruction1(ctx, opcode, operand);
            break;
        default:
            pore_inline_instruction3(ctx, opcode, operand, imm);
            break;
        }
    }
}            
        

PORE_STATIC void
memory_ima24(PoreInlineContext *ctx, 
             int opcode, uint32_t offset, int base, uint64_t imm)
{
    uint32_t operand;

    if ((offset & 0x3fffff) != offset) {
        ctx->error = PORE_INLINE_UINT22_REQUIRED;
    } else if ((offset % 8) != 0) {
        ctx->error = PORE_INLINE_ALIGNMENT_ERROR;
    } else {
        operand = 0x800000 | ((base % 2) << 22) | (offset & 0x3fffff);
        switch (opcode) {
        case PGAS_OPCODE_LD0:
        case PGAS_OPCODE_LD1:
        case PGAS_OPCODE_STD0:
        case PGAS_OPCODE_STD1:
            pore_inline_instruction1(ctx, opcode, operand);
            break;
        default:
            pore_inline_instruction3(ctx, opcode, operand, imm);
            break;
        }
    }
}


PORE_STATIC void
ima24(PoreInlineContext *ctx, 
      int opcode, uint32_t offset, int base, uint64_t imm)
{
    if (pore_pervasive_chiplet_id(base)) {
        pervasive_ima24(ctx, opcode, offset, base, imm);
    } else if (pore_address(base)) {
        memory_ima24(ctx, opcode, offset, base, imm);
    } else {
        ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
    }
}


int
pore_inline_load_store(PoreInlineContext *ctx, 
		       int opcode, int src_dest, int32_t offset, int base,
		       uint64_t imm)
{
    switch (opcode) {

    case PORE_INLINE_PSEUDO_LD:
    case PORE_INLINE_PSEUDO_LDANDI:
    case PORE_INLINE_PSEUDO_STD:

        // These three pick the real opcode based on the dest. register

        if (!pore_data(src_dest)) {
            ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
        } else {
            switch (opcode) {
            case PORE_INLINE_PSEUDO_LD:
                opcode = (src_dest == D0) ? 
                    PGAS_OPCODE_LD0 : PGAS_OPCODE_LD1;
                break;
            case PORE_INLINE_PSEUDO_LDANDI:
                opcode = (src_dest == D0) ? 
                    PGAS_OPCODE_LD0ANDI : PGAS_OPCODE_LD1ANDI;
                break;
            case PORE_INLINE_PSEUDO_STD:
                opcode = (src_dest == D0) ? 
                    PGAS_OPCODE_STD0 : PGAS_OPCODE_STD1;
                break;
            }
        }
        break;

#ifdef IGNORE_HW274735

        // BSI and BCI are normally redacted as instructions due to HW274735
        
        case PGAS_OPCODE_BSI:
        case PGAS_OPCODE_BCI:
        
            if (src_dest != D0) {
                ctx->error = PORE_INLINE_ILLEGAL_REGISTER;
            }
            break;

#endif // IGNORE_HW274735

    case PGAS_OPCODE_STI:
        break;

    default:
        ctx->error = PORE_INLINE_BUG;
    }

    if (ctx->error == 0) {
        ima24(ctx, opcode, offset, base, imm);
    }

    return ctx->error;
}


// Assemble BRAIA

int
pore_BRAIA(PoreInlineContext *ctx,
           uint16_t address_space, uint32_t offset)
{
    int opcode = PGAS_OPCODE_BRAI;
    uint32_t operand = 0;
    uint64_t imm = ((uint64_t)address_space << 32) | offset;

    pore_inline_instruction3(ctx, opcode, operand, imm);

    return ctx->error;
}


// Assemble SCAND

int
pore_SCAND(PoreInlineContext *ctx,
           int update, int capture, uint16_t length, 
           uint32_t select, uint32_t offset)
{
    int opcode = PGAS_OPCODE_SCAND;
    uint32_t operand;
    uint64_t imm = ((uint64_t)select << 32) | offset;

    if ((update < 0) ||
        (update > 1) ||
        (capture < 0) ||
        (capture > 1)) {
        ctx->error = PORE_INLINE_INVALID_PARAMETER;
    } else {
        opcode = PGAS_OPCODE_SCAND;
        operand = (update << 23) | (capture << 22) | length;
        pore_inline_instruction3(ctx, opcode, operand, imm);
    }
    return ctx->error;
}
        

/// Fix up a PORE inline assembler forward branch instruction
///
/// \param ctx A pointer to the initialized PoreInlineContext object
/// controlling inline assembly.
///
/// \param source The PORE inline location counter associated with the source
/// instruction of the forward branch.
///
/// \param target The PORE inline location counter associated with the target
/// instruction of the forward branch.
///
/// For usage examples, see the documentation \ref pore_inline_assembler.
/// Although intended for forward branches, this API could be used to create
/// backward branches as well.  Note however the limitation that the \a source
/// must be in the current context, since the source instruction needs to be
/// reassembled with the branch target. In theory the \a target could be
/// anywhere, as long as the location counter of the target is known.
///
/// \retval 0 Success
///
/// \retval code Failure.  Any non-zero return is the PORE inline assmebler
/// error code. The failure code is also stored in the PoreInlineContext
/// object \a error field.  The most likely causes of failure include a source
/// location that is not in the current context or not associated with a
/// branch instruction.

int
pore_inline_branch_fixup(PoreInlineContext *ctx, 
			 PoreInlineLocation source,
			 PoreInlineLocation target)
{
    uint32_t instruction;
    int32_t distance;
    uint64_t imm;
    int opcode, reg;
    PoreInlineContext source_ctx;

    if ((source < ctx->original_lc) ||
	(source > ctx->lc)) {
	ctx->error = PORE_INLINE_ILLEGAL_SOURCE_LC;
    } else {

	// Create a context as it existed when the source instruction was
	// initially assembled, and then reassemble the instruction in that
	// context with the actual target.

	distance = ctx->lc - source;

	source_ctx = *ctx;
	source_ctx.lc = source;
	source_ctx.remaining += distance;
	source_ctx.lc_address -= distance;
	source_ctx.error = 0;
	
	instruction = pore_inline_host32(source_ctx.lc_address);
	opcode = (instruction >> 25);
	reg = (instruction >> 20) & 0xf;
	
	switch (opcode) {
	case PGAS_OPCODE_BRA:
	    pore_BRA(&source_ctx, target);
	    break;
	case PGAS_OPCODE_BSR:
	    pore_BSR(&source_ctx, target);
	    break;
	case PGAS_OPCODE_LOOP:
	    pore_LOOP(&source_ctx, target);
	    break;
	case PGAS_OPCODE_BRAZ:
	    pore_BRAZ(&source_ctx, reg, target);
	    break;
	case PGAS_OPCODE_BRANZ:
	    pore_BRANZ(&source_ctx, reg, target);
	    break;
	case PGAS_OPCODE_CMPIBRAEQ:
	    imm = pore_inline_host64(source_ctx.lc_address + 4);
	    pore_CMPIBRAEQ(&source_ctx, D0, target, imm);
	    break;
	case PGAS_OPCODE_CMPIBRANE:
	    imm = pore_inline_host64(source_ctx.lc_address + 4);
	    pore_CMPIBRANE(&source_ctx, D0, target, imm);
	    break;
	case PGAS_OPCODE_CMPIBSREQ:
	    imm = pore_inline_host64(source_ctx.lc_address + 4);
	    pore_CMPIBSREQ(&source_ctx, D0, target, imm);
	    break;
	default:
	    source_ctx.error = PORE_INLINE_NOT_A_BRANCH;
	    break;
	}

	ctx->error = source_ctx.error;
    }
    return ctx->error;
}
OpenPOWER on IntegriCloud