summaryrefslogtreecommitdiffstats
path: root/src/usr/sbeio/common/sbe_retry_handler.C
blob: 4410d089ddf939f5b9077b9055b4e6f322e99c19 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/* $Source: src/usr/sbeio/common/sbe_retry_handler.C $                    */
/*                                                                        */
/* OpenPOWER HostBoot Project                                             */
/*                                                                        */
/* Contributors Listed Below - COPYRIGHT 2017,2018                        */
/* [+] International Business Machines Corp.                              */
/*                                                                        */
/*                                                                        */
/* Licensed under the Apache License, Version 2.0 (the "License");        */
/* you may not use this file except in compliance with the License.       */
/* You may obtain a copy of the License at                                */
/*                                                                        */
/*     http://www.apache.org/licenses/LICENSE-2.0                         */
/*                                                                        */
/* Unless required by applicable law or agreed to in writing, software    */
/* distributed under the License is distributed on an "AS IS" BASIS,      */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or        */
/* implied. See the License for the specific language governing           */
/* permissions and limitations under the License.                         */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */
/**
 * @file sbe_retry_handler.C
 *
 * Handle a SBE extract rc error.  We use a switch-case to determine
 * what action to take, and a finite state machine to control the
 * threshold actions.
 */

/*****************************************************************************/
// Includes
/*****************************************************************************/
#include <stdint.h>
#include <trace/interface.H>
#include <errl/errlentry.H>
#include <errl/errlmanager.H>
#include <p9_extract_sbe_rc.H>

#include <fapi2/target.H>
#include <fapi2/plat_hwp_invoker.H>
#include <initservice/isteps_trace.H>
#include <initservice/initserviceif.H>
#include <initservice/istepdispatcherif.H>
#include <errl/errludtarget.H>
#include <util/misc.H>
#include <ipmi/ipmiwatchdog.H>

#include <p9_start_cbs.H>
#include <p9_sbe_hreset.H>
#include <p9_get_sbe_msg_register.H>
#include <p9_perv_scom_addresses.H>
#include <sbe/sbe_update.H>
#include <sbeio/sbeioif.H>
#include <sbeio/sbe_sp_intf.H>
#include <../../usr/sbeio/sbe_fifodd.H>
#include <../../usr/sbeio/sbe_fifo_buffer.H>
#include <sbeio/sbe_ffdc_parser.H>
#include <sbeio/sbeioreasoncodes.H>
#include <sbeio/sbe_retry_handler.H>
#include <secureboot/service.H>

#include <devicefw/driverif.H>


extern trace_desc_t* g_trac_sbeio;

#define SBE_TRACF(printf_string,args...) \
    TRACFCOMP(g_trac_sbeio,"sbe_retry_handler.C: " printf_string,##args)
#define SBE_TRACD(printf_string,args...) \
    TRACDCOMP(g_trac_sbeio,"sbe_retry_handler.C: " printf_string,##args)
#define SBE_TRACU(args...)
#define SBE_TRACFBIN(printf_string,args...) \
    TRACFBIN(g_trac_sbeio,"sbe_retry_handler.C: " printf_string,##args)
#define SBE_TRACDBIN(printf_string,args...) \
    TRACDBIN(g_trac_sbeio,"sbe_retry_handler.C: " printf_string,##args)

using namespace ERRORLOG;

namespace SBEIO
{

// Define constant expressions to be used

//There are only 2 sides to the seeproms, so we only want to flip sides once
constexpr uint8_t MAX_SWITCH_SIDE_COUNT         = 1;

//We only want to attempt to boot with the same side seeprom twice
constexpr uint8_t MAX_SIDE_BOOT_ATTEMPTS        = 2;

// Currently we expect a maxiumum of 2 FFDC packets, the one
// that is useful to HB is the HWP FFDC. It is possible there is
//  a packet that details an internal sbe fail that hostboot will
// add to an errorlog but otherwise ignores
constexpr uint8_t MAX_EXPECTED_FFDC_PACKAGES    = 2;

// action_for_ffdc_rc will figure out what action we should do
// for each p9_extract_sbe_rc return code. If the RC does not match
// any return code from p9_extract_sbe_rc then we want to have a
// known "no action found" value which is defined here
constexpr uint32_t NO_ACTION_FOUND_FOR_THIS_RC  = 0xFFFF;

// Set up constants that will be used for setting up the timeout for
// reading the sbe message register
constexpr uint64_t SBE_RETRY_TIMEOUT_HW_SEC     = 60;  // 60 seconds
constexpr uint64_t SBE_RETRY_TIMEOUT_SIMICS_SEC = 600; // 600 seconds
constexpr uint32_t SBE_RETRY_NUM_LOOPS          = 60;

SbeRetryHandler::SbeRetryHandler(SBE_MODE_OF_OPERATION i_sbeMode)
: SbeRetryHandler(i_sbeMode, 0)
{
}

SbeRetryHandler::SbeRetryHandler(SBE_MODE_OF_OPERATION i_sbeMode,
                                 uint32_t i_plid)

: iv_useSDB(false)
, iv_secureModeDisabled(false) //Per HW team this should always be 0
, iv_masterErrorLogPLID(i_plid)
, iv_switchSidesCount(0)
, iv_currentAction(P9_EXTRACT_SBE_RC::ERROR_RECOVERED)
, iv_currentSBEState(SBE_REG_RETURN::SBE_NOT_AT_RUNTIME)
, iv_shutdownReturnCode(0)
, iv_currentSideBootAttempts(1) // It is safe to assume that the current side has attempted to boot
, iv_ffdcSetAction(false)
, iv_sbeMode(i_sbeMode)
, iv_sbeRestartMethod(SBE_RESTART_METHOD::HRESET)
, iv_initialPowerOn(false)
{
    SBE_TRACF(ENTER_MRK "SbeRetryHandler::SbeRetryHandler()");

    // Initialize members that have no default initialization
    iv_sbeRegister.reg = 0;

    SBE_TRACF(EXIT_MRK "SbeRetryHandler::SbeRetryHandler()");
}

SbeRetryHandler::~SbeRetryHandler() {}

void SbeRetryHandler::main_sbe_handler( TARGETING::Target * i_target )
{
    SBE_TRACF(ENTER_MRK "main_sbe_handler()");
    do
    {
        errlHndl_t l_errl = nullptr;

        // Only set the secure debug bit (SDB) if we are not using xscom yet
        if(!i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>().useXscom &&
            !i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>())
        {
            this->iv_useSDB = true;
        }

        // Get the SBE status register, this will tell us what state
        // the SBE is in , if the asynFFDC bit is set on the sbe_reg
        // then FFDC will be collected at this point in time.
        // sbe_run_extract_msg_reg will return true if there was an error reading the status
        if(!this->sbe_run_extract_msg_reg(i_target))
        {
            SBE_TRACF("main_sbe_handler(): Failed to get sbe register something is seriously wrong, we should always be able to read that!!");
            //Error log should have already committed in sbe_run_extract_msg_reg for this issue
            break;
        }

        // We will only trust the currState value if we know the SBE has just been booted.
        // In this case we have been told by the caller that the sbe just powered on
        // so it is safe to assume that the currState value is legit and we can trust that
        // the sbe has booted successfully to runtime.
        if( this->iv_initialPowerOn && (this->iv_sbeRegister.currState == SBE_STATE_RUNTIME))
        {
            //We have successfully powered on the SBE
            SBE_TRACF("main_sbe_handler(): Initial power on of the SBE was a success!!");
            break;
        }

        //////******************************************************************
        // If we have made it this far we can assume that something is wrong w/ the SBE
        //////******************************************************************

        // If something is wrong w/ the SBE during IPL time on a FSP based system then
        // we will always TI and let hwsv deal with the problem. This is a unique path
        // so we will have it handled in a separate procedure
#ifndef __HOSTBOOT_RUNTIME
        if(INITSERVICE::spBaseServicesEnabled())
        {
            if(iv_initialPowerOn)
            {
                // If this is the initial power on there will be no logs that point out this fail
                // so we need to create one now
                /*@
                * @errortype  ERRL_SEV_UNRECOVERABLE
                * @moduleid   SBEIO_EXTRACT_RC_HANDLER
                * @reasoncode SBEIO_SLAVE_FAILED_TO_BOOT
                * @userdata1  Bool to describe if FFDC data is found
                * @userdata2  HUID of proc
                * @devdesc    There was a problem attempting to boot SBE
                *             on the slave processor
                * @custdesc   Processor Error
                */
                l_errl = new ERRORLOG::ErrlEntry(
                            ERRORLOG::ERRL_SEV_UNRECOVERABLE,
                            SBEIO_EXTRACT_RC_HANDLER,
                            SBEIO_SLAVE_FAILED_TO_BOOT,
                            this->iv_ffdcSetAction,
                            TARGETING::get_huid(i_target));

                l_errl->collectTrace( "ISTEPS_TRACE", 256);
                l_errl->collectTrace( SBEIO_COMP_NAME, 256);
                // Set the PLID of the error log to master PLID
                // if the master PLID is set
                updatePlids(l_errl);

                errlCommit(l_errl, SBEIO_COMP_ID);
            }
            // This function will TI Hostboot so don't expect to return
            handleFspIplTimeFail(i_target);
            SBE_TRACF("main_sbe_handler(): We failed to TI the system when we should have, forcing an assert(0) call");
            // We should never return from handleFspIplTimeFail
            assert(0, "We have determined that there was an error with the SBE and should have TI'ed but for some reason we did not.");
        }
#endif

        // If iv_ffdcSetAction is true, that means that we found ffdc to parse
        // this indicates that the SBE already determined what went wrong and
        // reported the error via asyncFFDC so there is no need to
        // run p9_extract_sbe_rc
        // Also if the sbe is not booted at all, extract_rc will fail so we don't want to run it
        if(!this->iv_ffdcSetAction && this->iv_sbeRegister.sbeBooted)
        {
            SBE_TRACF("main_sbe_handler(): No async ffdc found and sbe says it has been booted, running run p9_sbe_extract_rc.");
            // Call the function that runs extract_rc, this needs to run to determine
            // what broke and what our retry action should be
            this->sbe_run_extract_rc(i_target);
        }
        // If we have determined that the sbe never booted
        // then set the current action to be "restart sbe"
        // that way we will attempt to start the sbe again
        else if(!this->iv_sbeRegister.sbeBooted)
        {
            SBE_TRACF("main_sbe_handler(): SBE reports it was never booted, calling p9_sbe_extract_rc will fail. Setting action to be RESTART_SBE");
            this->iv_currentAction = P9_EXTRACT_SBE_RC::RESTART_SBE;
        }

        // If the mode was marked as informational that means the caller did not want
        // any actions to take place, the caller only wanted information collected
        if(this->iv_sbeMode == INFORMATIONAL_ONLY)
        {
            SBE_TRACF("main_sbe_handler(): Retry handler is being called in INFORMATIONAL mode so we are exiting without attempting any retry actions");
            break;
        }

        // This do-while loop will continuously look at iv_currentAction, act
        // accordingly, then read status register and determine next action.
        // The ideal way to exit the loop is if the SBE makes it up to runtime after
        // attempting a retry which indicates we have recovered. If the currentAction
        // says NO_RECOVERY_ACTION then we break out of this loop.  Also if we fail
        // to read the sbe's status register or if we get write fails when trying to switch
        // seeprom sides. Both the fails mentioned last indicate there is a larger problem
        do
        {
            // We need to handle the following values that currentAction could be,
            // it is possible that iv_currentAction can be any of these values except there
            // is currently no path that will set it to be ERROR_RECOVERED
            //        ERROR_RECOVERED    = 0,
            //           - We should never hit this, if we have recovered then
            //             curreState should be RUNTIME
            //        RESTART_SBE        = 1,
            //        RESTART_CBS        = 2,
            //           - We will not listen to p9_extract_rc on HOW to restart the
            //             sbe. We will assume iv_sbeRestartMethod is correct and
            //             perform the restart method that iv_sbeRestartMethod says
            //             regardless if currentAction = RESTART_SBE or RESTART_CBS
            //        REIPL_BKP_SEEPROM  = 3,
            //        REIPL_UPD_SEEPROM  = 4,
            //            - We will switch the seeprom side (if we have not already)
            //            - then attempt to restart the sbe w/ iv_sbeRestartMethod
            //        NO_RECOVERY_ACTION = 5,
            //            - we deconfigure the processor we are retrying and fail out
            //
            // Important things to remember, we only want to attempt a single side
            // a maxiumum of 2 times, and also we only want to switch sides once

            SBE_TRACF("main_sbe_handler(): iv_sbeRegister.currState: %d , "
                        "iv_currentSideBootAttempts: %d , "
                        "iv_currentAction: %d , ",
                        this->iv_sbeRegister.currState,
                        this->iv_currentSideBootAttempts,
                        this->iv_currentAction);

            if(this->iv_currentAction == P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION)
            {
                SBE_TRACF("main_sbe_handler(): We have concluded there are no further recovery actions to take, deconfiguring proc and exiting handler");
                // There is no action possible. Gard and Callout the proc
                /*@
                    * @errortype  ERRL_SEV_UNRECOVERABLE
                    * @moduleid   SBEIO_EXTRACT_RC_HANDLER
                    * @reasoncode SBEIO_NO_RECOVERY_ACTION
                    * @userdata1  SBE current error
                    * @userdata2  HUID of proc
                    * @devdesc    There is no recovery action on the SBE.
                    *             We're deconfiguring this proc
                    * @custdesc   Processor Error
                    */
                l_errl = new ERRORLOG::ErrlEntry(
                            ERRORLOG::ERRL_SEV_UNRECOVERABLE,
                            SBEIO_EXTRACT_RC_HANDLER,
                            SBEIO_NO_RECOVERY_ACTION,
                            P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION,
                            TARGETING::get_huid(i_target));
                l_errl->collectTrace( "ISTEPS_TRACE", 256);
                l_errl->collectTrace( SBEIO_COMP_NAME, 256);
                l_errl->addHwCallout( i_target,
                                        HWAS::SRCI_PRIORITY_HIGH,
                                        HWAS::DELAYED_DECONFIG,
                                        HWAS::GARD_NULL );

                // Set the PLID of the error log to master PLID
                // if the master PLID is set
                updatePlids(l_errl);

                errlCommit(l_errl, SBEIO_COMP_ID);
                this->iv_currentSBEState = SBE_REG_RETURN::PROC_DECONFIG;
                break;
            }

            // if the bkp_seeprom or upd_seeprom, attempt to switch sides.
            // This is also dependent on the iv_switchSideCount.
            // Note: we do this for upd_seeprom because we don't support
            //       updating the seeprom during IPL time
            if((this->iv_currentAction ==
                            P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM ||
                this->iv_currentAction ==
                            P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM))
            {
                // We cannot switch sides and perform an hreset if the seeprom's
                // versions do not match. If this happens, log an error and stop
                // trying to recover the SBE
                if(this->iv_sbeRestartMethod == HRESET)
                {
                    TARGETING::ATTR_HB_SBE_SEEPROM_VERSION_MISMATCH_type l_versionsMismatch =
                            i_target->getAttr<TARGETING::ATTR_HB_SBE_SEEPROM_VERSION_MISMATCH>();

                    if(l_versionsMismatch)
                    {
                        SBE_TRACF("main_sbe_handler(): We cannot switch SEEPROM sides if their versions do not match, exiting handler");
                        /*@
                            * @errortype  ERRL_SEV_UNRECOVERABLE
                            * @moduleid   SBEIO_EXTRACT_RC_HANDLER
                            * @reasoncode SBEIO_SEEPROM_VERSION_MISMATCH
                            * @userdata1  HUID of proc
                            * @userdata2  unused
                            * @devdesc    Attempted to swap seeprom sides and
                            *             boot using hreset but version mismatched
                            * @custdesc   Processor Error
                            */
                        l_errl = new ERRORLOG::ErrlEntry(
                                    ERRORLOG::ERRL_SEV_UNRECOVERABLE,
                                    SBEIO_EXTRACT_RC_HANDLER,
                                    SBEIO_SEEPROM_VERSION_MISMATCH,
                                    TARGETING::get_huid(i_target),0);
                        l_errl->collectTrace( "ISTEPS_TRACE", 256);
                        l_errl->collectTrace( SBEIO_COMP_NAME, 256);
                        l_errl->addHwCallout( i_target,
                                                HWAS::SRCI_PRIORITY_HIGH,
                                                HWAS::NO_DECONFIG,
                                                HWAS::GARD_NULL );

                        // Set the PLID of the error log to master PLID
                        // if the master PLID is set
                        updatePlids(l_errl);

                        errlCommit(l_errl, SBEIO_COMP_ID);
                        // break out of the retry loop
                        break;
                    }
                }
                if(this->iv_switchSidesCount >= MAX_SWITCH_SIDE_COUNT)
                {
                    /*@
                    * @errortype  ERRL_SEV_PREDICTIVE
                    * @moduleid   SBEIO_EXTRACT_RC_HANDLER
                    * @reasoncode SBEIO_EXCEED_MAX_SIDE_SWITCHES
                    * @userdata1  Switch Sides Count
                    * @userdata2  HUID of proc
                    * @devdesc    We have already flipped seeprom sides once
                    *             and we should not have attempted to flip again
                    * @custdesc   Processor Error
                    */
                    l_errl = new ERRORLOG::ErrlEntry(
                                ERRORLOG::ERRL_SEV_PREDICTIVE,
                                SBEIO_EXTRACT_RC_HANDLER,
                                SBEIO_EXCEED_MAX_SIDE_SWITCHES,
                                this->iv_switchSidesCount,
                                TARGETING::get_huid(i_target));
                    l_errl->collectTrace( SBEIO_COMP_NAME, 256);

                    // Set the PLID of the error log to master PLID
                    // if the master PLID is set
                    updatePlids(l_errl);

                    errlCommit(l_errl, SBEIO_COMP_ID);
                    // Break out of loop, something bad happened and we dont want end
                    // up in a endless loop
                    break;
                }
                l_errl = this->switch_sbe_sides(i_target);
                if(l_errl)
                {
                    errlCommit(l_errl, SBEIO_COMP_ID);
                    // If any error occurs while we are trying to switch sides
                    // this indicates big problems so we want to break out of the
                    // retry loop
                    break;
                }
                // Note that we do not want to continue here because we want to
                // attempt to restart using whatever sbeRestartMethod is set to after
                // switching seeprom sides
            }

            // Both of the retry methods require a FAPI2 version of the target because they
            // are fapi2 HWPs
            const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> l_fapi2_proc_target (i_target);
            if(this->iv_currentSideBootAttempts >= MAX_SIDE_BOOT_ATTEMPTS)
            {
                /*@
                * @errortype  ERRL_SEV_PREDICTIVE
                * @moduleid   SBEIO_EXTRACT_RC_HANDLER
                * @reasoncode SBEIO_EXCEED_MAX_SIDE_BOOTS
                * @userdata1  # of boots attempts on this side
                * @userdata2  HUID of proc
                * @devdesc    We have already done the max attempts for
                *             the current seeprom side. For some reason
                *             we are attempting to do another boot.
                * @custdesc   Processor Error
                */
                l_errl = new ERRORLOG::ErrlEntry(
                            ERRORLOG::ERRL_SEV_PREDICTIVE,
                            SBEIO_EXTRACT_RC_HANDLER,
                            SBEIO_EXCEED_MAX_SIDE_BOOTS,
                            this->iv_currentSideBootAttempts,
                            TARGETING::get_huid(i_target));

                l_errl->collectTrace( SBEIO_COMP_NAME, 256);

                // Set the PLID of the error log to master PLID
                // if the master PLID is set
                updatePlids(l_errl);

                errlCommit(l_errl, SBEIO_COMP_ID);
                // Break out of loop, something bad happened and we dont want end
                // up in a endless loop
                break;
            }
            // Look at the sbeRestartMethd instance variable to determine which method
            // we will use to attempt the restart. In general during IPL time we will
            // attempt CBS, during runtime we will want to use HRESET.
            else if(this->iv_sbeRestartMethod == SBE_RESTART_METHOD::START_CBS)
            {
                //Increment attempt count for this side
                this->iv_currentSideBootAttempts++;

                SBE_TRACF("Invoking p9_start_cbs HWP on processor %.8X", get_huid(i_target));

                // We cannot use FAPI_INVOKE in this case because it is possible
                // we are handling a HWP fail. If we attempted to use FAPI_INVOKE
                // while we are already inside a FAPI_INVOKE call then we can
                // end up in an endless wait on the fapi mutex lock
                fapi2::ReturnCode l_rc;

                // For now we only use p9_start_cbs if we fail to boot the slave SBE
                // on our initial attempt, the bool param is true we are telling the
                // HWP that we are starting up the SBE which is true in this case
                FAPI_EXEC_HWP(l_rc, p9_start_cbs,
                                l_fapi2_proc_target, true);

                l_errl = rcToErrl(l_rc, ERRORLOG::ERRL_SEV_UNRECOVERABLE);

                if(l_errl)
                {
                    SBE_TRACF("ERROR: call p9_start_cbs, PLID=0x%x",
                                l_errl->plid() );
                    l_errl->collectTrace(SBEIO_COMP_NAME, 256 );
                    l_errl->collectTrace(FAPI_IMP_TRACE_NAME, 256);
                    l_errl->collectTrace(FAPI_TRACE_NAME, 384);

                    // Deconfig the target when SBE Retry fails
                    l_errl->addHwCallout(i_target,
                                            HWAS::SRCI_PRIORITY_LOW,
                                            HWAS::DELAYED_DECONFIG,
                                            HWAS::GARD_NULL);

                    // Set the PLID of the error log to master PLID
                    // if the master PLID is set
                    updatePlids(l_errl);

                    errlCommit( l_errl, SBEIO_COMP_ID);
                    // If we got an errlog while attempting start_cbs
                    // we will assume that no future retry actions
                    // will work so we will break out of the retry loop
                    break;
                }
            }
            // The only other type of reset method is HRESET
            else
            {
                // Increment attempt count for this side
                this->iv_currentSideBootAttempts++;

                SBE_TRACF("Invoking p9_sbe_hreset HWP on processor %.8X", get_huid(i_target));

                // We cannot use FAPI_INVOKE in this case because it is possible
                // we are handling a HWP fail. If we attempted to use FAPI_INVOKE
                // while we are already inside a FAPI_INVOKE call then we can
                // end up in an endless wait on the fapi mutex lock
                fapi2::ReturnCode l_rc;

                // For now we only use HRESET during runtime, the bool param
                // we are passing in is supposed to be FALSE if runtime, TRUE is ipl time
                FAPI_EXEC_HWP(l_rc, p9_sbe_hreset,
                                l_fapi2_proc_target, false);

                l_errl = rcToErrl(l_rc, ERRORLOG::ERRL_SEV_UNRECOVERABLE);

                if(l_errl)
                {
                    SBE_TRACF("ERROR: call p9_sbe_hreset, PLID=0x%x",
                                l_errl->plid() );
                    l_errl->collectTrace(SBEIO_COMP_NAME, 256 );
                    l_errl->collectTrace(FAPI_IMP_TRACE_NAME, 256);
                    l_errl->collectTrace(FAPI_TRACE_NAME, 384);

                    // Deconfig the target when SBE Retry fails
                    l_errl->addHwCallout(i_target,
                                            HWAS::SRCI_PRIORITY_LOW,
                                            HWAS::DELAYED_DECONFIG,
                                            HWAS::GARD_NULL);

                    // Set the PLID of the error log to master PLID
                    // if the master PLID is set
                    updatePlids(l_errl);

                    errlCommit( l_errl, SBEIO_COMP_ID);
                    // If we got an errlog while attempting p9_sbe_hreset
                    // we will assume that no future retry actions
                    // will work so we will exit
                    break;
                }
            }

            // We have performed the action, so make sure that ffdcSetAction is set back to 0
            this->iv_ffdcSetAction = 0;

            // Get the sbe register  (note that if asyncFFDC bit is set in status register then
            // we will read it in this call)
            if(!this->sbe_run_extract_msg_reg(i_target))
            {
                // Error log should have already committed in sbe_run_extract_msg_reg for this issue
                // we need to stop our recovery efforts and bail out of the retry handler
                break;
            }

            // If our retry attempt fail, and we didnt see any asyncFFDC after
            if (this->iv_sbeRegister.currState != SBE_STATE_RUNTIME)
            {
                // Again, if ffdcSetAction is set, that means we have found FFDC
                // already that the SBE saved away prior to failing so we don't need
                // to run extract_rc if ffdcSetAction is true
                if(!this->iv_ffdcSetAction)
                {
                    SBE_TRACF("main_sbe_handler(): Failed to reach runtime after sbe restart and no asyncFFDC found. Calling p9_sbe_extract_rc.");
                    // Run extract rc to figure out why the sbe did not make it to
                    // runtime state
                    this->sbe_run_extract_rc(i_target);
                }
            }

        } while((this->iv_sbeRegister).currState != SBE_STATE_RUNTIME);

        // If we ended up switching sides we want to mark it down as
        // as informational log
        if(this->iv_switchSidesCount)
        {
            /*@
             * @errortype   ERRL_SEV_INFORMATIONAL
             * @moduleid    SBEIO_EXTRACT_RC_HANDLER
             * @reasoncode  SBEIO_BOOTED_UNEXPECTED_SIDE
             * @userdata1   0
             * @userdata2   HUID of working proc
             * @devdesc     SBE booted from unexpected side.
             */
            l_errl = new ERRORLOG::ErrlEntry(
                        ERRORLOG::ERRL_SEV_INFORMATIONAL,
                        SBEIO_EXTRACT_RC_HANDLER,
                        SBEIO_BOOTED_UNEXPECTED_SIDE,
                        0,TARGETING::get_huid(i_target));
            l_errl->collectTrace("ISTEPS_TRACE",256);
            l_errl->collectTrace(SBEIO_COMP_NAME,256);

            // Set the PLID of the error log to master PLID
            // if the master PLID is set
            updatePlids(l_errl);

            errlCommit(l_errl, SBEIO_COMP_ID);
        }

    }while(0);

    SBE_TRACF(EXIT_MRK "main_sbe_handler()");
}

bool SbeRetryHandler::sbe_run_extract_msg_reg(TARGETING::Target * i_target)
{
    SBE_TRACF(ENTER_MRK "sbe_run_extract_msg_reg()");

    errlHndl_t l_errl = nullptr;

    //Assume that reading the status succeeded
    bool l_statusReadSuccess = true;

    // This function will poll the status register for 60 seconds
    // waiting for the SBE to reach runtime
    // we will exit the polling before 60 seconds if we either reach
    // runtime, or get an error reading the status reg, or if the asyncFFDC
    // bit is set
    l_errl = this->sbe_poll_status_reg(i_target);

    // If there is no error getting the status register, and the SBE
    // did not make it to runtime AND the asyncFFDC bit is set, we will
    // use the FFDC to decide our actions rather than using p9_extract_sbe_rc
    if((!l_errl) &&
       (this->iv_sbeRegister.currState != SBE_STATE_RUNTIME) &&
       this->iv_sbeRegister.asyncFFDC)
    {
        SBE_TRACF("SUCCESS: sbe_run_extract_msg_reg completed okay for proc 0x%.8X .  "
                    "There was asyncFFDC found though so we will run the FFDC parser",
                  TARGETING::get_huid(i_target));
        // The SBE has responded to an asyncronus request that hostboot
        // made with FFDC indicating an error has occurred.
        // This should be the path we hit when we are waiting to see
        // if the sbe boots
        this->sbe_get_ffdc_handler(i_target);
    }
    // If there was an error log that means that we failed to read the
    // cfam register to get the SBE status, something is seriously wrong
    // if we hit this
    else if (l_errl)
    {
        l_statusReadSuccess = false;
        SBE_TRACF("ERROR: call sbe_run_extract_msg_reg, PLID=0x%x", l_errl->plid() );

        l_errl->collectTrace(SBEIO_COMP_NAME,256);
        // Set the PLID of the error log to master PLID
        // if the master PLID is set
        updatePlids(l_errl);

        // capture the target data in the elog
        ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog( l_errl );

        // Commit error log
        errlCommit( l_errl, HWPF_COMP_ID );
    }
    // No error,  able to read the sbe status register okay
    // No guarantees that the SBE made it to runtime
    else
    {
        SBE_TRACF("SUCCESS: sbe_run_extract_msg_reg completed okay for proc 0x%.8X",
                    TARGETING::get_huid(i_target));
    }

    SBE_TRACF(EXIT_MRK "sbe_run_extract_msg_reg()");

    return l_statusReadSuccess;

}

errlHndl_t SbeRetryHandler::sbe_poll_status_reg(TARGETING::Target * i_target)
{
    SBE_TRACF(ENTER_MRK "sbe_poll_status_reg()");

    errlHndl_t l_errl = nullptr;

    this->iv_currentSBEState =
            SbeRetryHandler::SBE_REG_RETURN::SBE_NOT_AT_RUNTIME;

    const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>
            l_fapi2_proc_target(i_target);

    // Each sbe gets 60s to respond with the fact that it's
    // booted and at runtime (stable state)
    uint64_t l_sbeTimeout = SBE_RETRY_TIMEOUT_HW_SEC;  // 60 seconds
    // Bump this up really high for simics, things are slow there
    if( Util::isSimicsRunning() )
    {
        l_sbeTimeout = SBE_RETRY_TIMEOUT_SIMICS_SEC; // 600 seconds
    }

    //Sleep time should be 1 second on HW, 10 seconds on simics
    const uint64_t SBE_WAIT_SLEEP_SEC = (l_sbeTimeout/SBE_RETRY_NUM_LOOPS);

    SBE_TRACF("Running p9_get_sbe_msg_register HWP on proc target %.8X",
               TARGETING::get_huid(i_target));

    for( uint64_t l_loops = 0; l_loops < SBE_RETRY_NUM_LOOPS; l_loops++ )
    {
        fapi2::ReturnCode l_rc;

        // We cannot use FAPI_INVOKE in this case because it is possible
        // we are handling a HWP fail. If we attempted to use FAPI_INVOKE
        // while we are already inside a FAPI_INVOKE call then we can
        // end up in an endless wait on the fapi mutex lock
        FAPI_EXEC_HWP(l_rc, p9_get_sbe_msg_register,
                        l_fapi2_proc_target, this->iv_sbeRegister);

        l_errl = rcToErrl(l_rc, ERRORLOG::ERRL_SEV_UNRECOVERABLE);

        if (l_errl)
        {
            SBE_TRACF("ERROR : call p9_get_sbe_msg_register, PLID=0x%x, "
                      "on loop %d",
                      l_errl->plid(),
                      l_loops );

            l_errl->collectTrace(SBEIO_COMP_NAME,256);
            l_errl->collectTrace(FAPI_IMP_TRACE_NAME, 256);
            l_errl->collectTrace(FAPI_TRACE_NAME, 384);

            this->iv_currentSBEState =
                    SbeRetryHandler::SBE_REG_RETURN::FAILED_COLLECTING_REG;
            break;
        }
        else if ((this->iv_sbeRegister).currState == SBE_STATE_RUNTIME)
        {
            SBE_TRACF("SBE 0x%.8X booted and at runtime, "
                      "iv_sbeRegister=0x%.8X, on loop %d",
                      TARGETING::get_huid(i_target),
                      (this->iv_sbeRegister).reg,
                      l_loops);
            this->iv_currentSBEState =
                  SbeRetryHandler::SBE_REG_RETURN::SBE_AT_RUNTIME;
            break;
        }
        else if ((this->iv_sbeRegister).asyncFFDC)
        {
            SBE_TRACF("SBE 0x%.8X has async FFDC bit set, "
                      "iv_sbeRegister=0x%.8X",TARGETING::get_huid(i_target),
                      (this->iv_sbeRegister).reg);
            // Async FFDC is indicator that SBE is failing to boot, and if
            // in DUMP state, that SBE is done dumping, so leave loop
            break;
        }
        else
        {
            if( !(l_loops % 10) )
            {
                SBE_TRACF("%d> SBE 0x%.8X NOT booted yet, "
                          "iv_sbeRegister=0x%.8X", l_loops,
                          TARGETING::get_huid(i_target),
                           (this->iv_sbeRegister).reg);
            }
            l_loops++;
#ifndef __HOSTBOOT_RUNTIME
            // reset watchdog before performing the nanosleep
            INITSERVICE::sendProgressCode();
#endif
            nanosleep(SBE_WAIT_SLEEP_SEC,0);
        }
    }

    if ((this->iv_sbeRegister).currState != SBE_STATE_RUNTIME)
    {
        // Switch to using FSI SCOM if we are not using xscom
        TARGETING::ScomSwitches l_switches =
            i_target->getAttr<TARGETING::ATTR_SCOM_SWITCHES>();
        TARGETING::ScomSwitches l_switches_before = l_switches;

        if(!l_switches.useXscom)
        {
            // Turn off SBE SCOM and turn on FSI SCOM.
            l_switches.useFsiScom = 1;
            l_switches.useSbeScom = 0;

            SBE_TRACF("sbe_poll_status_reg: changing SCOM switches from 0x%.2X "
                    "to 0x%.2X for proc 0x%.8X",
                    l_switches_before,
                    l_switches,
                    TARGETING::get_huid(i_target));
            i_target->setAttr<TARGETING::ATTR_SCOM_SWITCHES>(l_switches);
        }
    }

    SBE_TRACF(EXIT_MRK "sbe_poll_status_reg()");
    return l_errl;
}

#ifndef __HOSTBOOT_RUNTIME
void SbeRetryHandler::handleFspIplTimeFail(TARGETING::Target * i_target)
{
    // If we found that there was async FFDC available we need to notify hwsv of this
    // even if we did not find anything useful in the ffdc for us, its possible hwsv
    // will be able to use it.
    if ((this->iv_sbeRegister).asyncFFDC)
    {
        iv_shutdownReturnCode = SBEIO_HWSV_COLLECT_SBE_RC;
    }
    // If the asyncFFDC bit is not set on the sbeRegister
    // then we need to pass the DEAD_SBE RC to hwsv when we
    // TI
    else
    {
        this->iv_shutdownReturnCode = SBEIO_DEAD_SBE;
    }
    SBE_TRACF("handleFspIplTimeFail(): During IPL time on FSP system hostboot will TI so that HWSV can handle the error. "
              "Shutting down w/ the error code %s" ,
              this->iv_sbeRegister.asyncFFDC ? "SBEIO_HWSV_COLLECT_SBE_RC" : "SBEIO_DEAD_SBE"  );

    // On FSP systems if we failed to recover the SBE then we should shutdown w/ the
    // correct error so that HWSV will know what FFDC to collect
    INITSERVICE::doShutdownWithError(this->iv_shutdownReturnCode,
                                    TARGETING::get_huid(i_target));
}
#endif

uint32_t SbeRetryHandler::action_for_ffdc_rc(
                uint32_t i_rc)
{
    SBE_TRACF(ENTER_MRK "action_for_ffdc_rc()");

    uint32_t l_action;

    switch(i_rc)
    {
        case fapi2::RC_EXTRACT_SBE_RC_RUNNING:
        case fapi2::RC_EXTRACT_SBE_RC_NEVER_STARTED:
        case fapi2::RC_EXTRACT_SBE_RC_PROGRAM_INTERRUPT:
        case fapi2::RC_EXTRACT_SBE_RC_ADDR_NOT_RECOGNIZED:
        case fapi2::RC_EXTRACT_SBE_RC_PIBMEM_ECC_ERR:
        case fapi2::RC_EXTRACT_SBE_RC_FI2CM_BIT_RATE_ERR_NONSECURE_MODE:

            l_action = P9_EXTRACT_SBE_RC::RESTART_SBE;

            break;

        case fapi2::RC_EXTRACT_SBE_RC_MAGIC_NUMBER_MISMATCH:
        case fapi2::RC_EXTRACT_SBE_RC_FI2C_ECC_ERR:
        case fapi2::RC_EXTRACT_SBE_RC_FI2C_ECC_ERR_NONSECURE_MODE:

            l_action = P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM;

            break;

        case fapi2::RC_EXTRACT_SBE_RC_FI2C_TIMEOUT:
        case fapi2::RC_EXTRACT_SBE_RC_SBE_L1_LOADER_FAIL:
        case fapi2::RC_EXTRACT_SBE_RC_SBE_L2_LOADER_FAIL:
        case fapi2::RC_EXTRACT_SBE_RC_UNKNOWN_ERROR:

            l_action = P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM;

            break;

        case fapi2::RC_EXTRACT_SBE_RC_OTP_TIMEOUT:
        case fapi2::RC_EXTRACT_SBE_RC_OTP_PIB_ERR:
        case fapi2::RC_EXTRACT_SBE_RC_PIBMEM_PIB_ERR:
        case fapi2::RC_EXTRACT_SBE_RC_FI2C_SPRM_CFG_ERR:
        case fapi2::RC_EXTRACT_SBE_RC_FI2C_PIB_ERR:

            l_action = P9_EXTRACT_SBE_RC::RESTART_CBS;

            break;

        case fapi2::RC_EXTRACT_SBE_RC_BRANCH_TO_SEEPROM_FAIL:
        case fapi2::RC_EXTRACT_SBE_RC_UNEXPECTED_OTPROM_HALT:
        case fapi2::RC_EXTRACT_SBE_RC_OTP_ECC_ERR:

            l_action = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;

            break;
        default:

            l_action = NO_ACTION_FOUND_FOR_THIS_RC;
    }

    SBE_TRACF(EXIT_MRK "action_for_ffdc_rc()");
    return l_action;
}

void SbeRetryHandler::sbe_get_ffdc_handler(TARGETING::Target * i_target)
{
    SBE_TRACF(ENTER_MRK "sbe_get_ffdc_handler()");
    uint32_t l_responseSize = SbeFifoRespBuffer::MSG_BUFFER_SIZE;
    uint32_t *l_pFifoResponse =
        reinterpret_cast<uint32_t *>(malloc(l_responseSize));

#ifndef __HOSTBOOT_RUNTIME
    errlHndl_t l_errl = nullptr;
    l_errl = getFifoSBEFFDC(i_target,
                                   l_pFifoResponse,
                                   l_responseSize);

    // Check if there was an error log created
    if(l_errl)
    {
        // Trace but otherwise silently ignore error
        SBE_TRACF("sbe_get_ffdc_handler: ignoring error PLID=0x%x from "
                  "get SBE FFDC FIFO request to proc 0x%.8X",
                  l_errl->plid(),
                  TARGETING::get_huid(i_target));
        delete l_errl;
        l_errl = nullptr;
    }
    else
    {
        // Parse the FFDC package(s) in the response
        auto l_ffdc_parser = std::make_shared<SbeFFDCParser>();
        l_ffdc_parser->parseFFDCData(reinterpret_cast<void *>(l_pFifoResponse));

        uint8_t l_pkgs = l_ffdc_parser->getTotalPackages();

        // Currently we expect a maxiumum of 2 FFDC packets. These packets would be
        // a HWP FFDC packet which we will look at to determine what our retry action
        // should be. The other type of packet we might see would be details on the
        // internal SBE fail. For internal SBE fail packets we will just add the FFDC
        // to the error log and move on.
        //
        // Note:  If we exceed MAX_EXPECTED_FFDC_PACKAGES, commit an informational log.
        // It shouldn't break anything but this could help us understand if something odd
        // is happening
        if(l_pkgs > MAX_EXPECTED_FFDC_PACKAGES)
        {
            /*@
            * @errortype    ERRORLOG::ERRL_SEV_INFORMATIONAL
            * @moduleid     SBEIO_GET_FFDC_HANDLER
            * @reasoncode   SBEIO_MORE_FFDC_THAN_EXPECTED
            * @userdata1    Maximum expected packages
            * @userdata2    Number of FFDC packages
            * @devdesc      Unexpected number of FFDC packages in buffer
            * @custdesc     Extra FFDC gathered, marked information event
            */
            l_errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_INFORMATIONAL,
                                             SBEIO_GET_FFDC_HANDLER,
                                             SBEIO_MORE_FFDC_THAN_EXPECTED,
                                             MAX_EXPECTED_FFDC_PACKAGES,
                                             l_pkgs);

            l_errl->collectTrace( SBEIO_COMP_NAME, 256);

            // Set the PLID of the error log to master PLID
            // if the master PLID is set
            updatePlids(l_errl);

            // Also log the failing proc as FFDC
            ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_errl);
            errlCommit(l_errl, SBEIO_COMP_ID);
        }

        // If there are FFDC packages, make a log for FFDC from SBE
        if(l_pkgs > 0)
        {
            /*@
             * @errortype    ERRORLOG::ERRL_SEV_PREDICTIVE
             * @moduleid     SBEIO_GET_FFDC_HANDLER
             * @reasoncode   SBEIO_RETURNED_FFDC
             * @userdata1    Processor Target
             * @userdata2    Number of FFDC packages
             * @devdesc      FFDC returned by SBE after failing to reach runtime
             * @custdesc     FFDC associated with boot device failing to boot
             */
            l_errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_PREDICTIVE,
                                             SBEIO_GET_FFDC_HANDLER,
                                             SBEIO_RETURNED_FFDC,
                                             TARGETING::get_huid(i_target),
                                             l_pkgs);

            // Also log the failing proc as FFDC
            ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog(l_errl);


            // Process each FFDC package
            for(auto i=0; i<l_pkgs; i++)
            {
                // Add each package to the log
                l_errl->addFFDC( SBEIO_COMP_ID,
                                l_ffdc_parser->getFFDCPackage(i),
                                l_ffdc_parser->getPackageLength(i),
                                0,
                                SBEIO_UDT_PARAMETERS,
                                false );

                // Get the RC from the FFDC package
                uint32_t l_rc = l_ffdc_parser->getPackageRC(i);

                // Determine an action for the RC
                P9_EXTRACT_SBE_RC::RETURN_ACTION l_action =
                            static_cast<P9_EXTRACT_SBE_RC::RETURN_ACTION>(action_for_ffdc_rc(l_rc));

                if(l_action != NO_ACTION_FOUND_FOR_THIS_RC)
                {
                    // Set the action associated with the RC that we found
                    this->iv_currentAction = l_action;

                    // This call will look at what action_for_ffdc_rc had set the return action to
                    // checks on how many times we have attempted to boot this side,
                    // and if we have already tried switching sides
                    //
                    //
                    // Note this call is important, if this is not called we could end up in a
                    // endless loop because this enforces MAX_SWITCH_SIDE_COUNT and MAX_SIDE_BOOT_ATTEMPTS
                    this->bestEffortCheck();

                    // Set the instance variable ffdcSetAction to let us
                    // know that the current action was set from what we
                    // found in the asyncFFDC
                    this->iv_ffdcSetAction = true;
                }
            }

            l_errl->collectTrace( SBEIO_COMP_NAME, KILOBYTE/4);
            l_errl->collectTrace( "ISTEPS_TRACE", KILOBYTE/4);

            // Set the PLID of the error log to master PLID
            // if the master PLID is set
            updatePlids(l_errl);

            errlCommit(l_errl, SBEIO_COMP_ID);
        }
    }
#endif

    free(l_pFifoResponse);
    l_pFifoResponse = nullptr;

    SBE_TRACF(EXIT_MRK "sbe_get_ffdc_handler()");
}


void SbeRetryHandler::sbe_run_extract_rc(TARGETING::Target * i_target)
{
    SBE_TRACF(ENTER_MRK "sbe_run_extract_rc()");

    errlHndl_t l_errl = nullptr;
    fapi2::ReturnCode l_rc;

    SBE_TRACF("Inside sbe_run_extract_rc, calling p9_extract_sbe_rc HWP");

    // Setup for the HWP
    const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP> l_fapi2ProcTarget(
                        const_cast<TARGETING::Target*> (i_target));

    // Default the return action to be NO_RECOVERY , if something goes
    // wrong in p9_extract_sbe_rc and l_ret doesn't get set in that function
    // then we want to fall back on NO_RECOVERY which we will handle
    // accordingly in bestEffortCheck
    P9_EXTRACT_SBE_RC::RETURN_ACTION l_ret =
                     P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;

    // TODO RTC: 190528 Force FAPI_INVOKE_HWP to call FAPI_EXEC_HWP when FAPI_INVOKE
    //          is blocked by mutex
    // Note that it's possible we are calling this while we are already inside
    // of a FAPI_INVOKE_HWP call. This might cause issue w/ current_err
    // but unsure how to get around it.
    FAPI_EXEC_HWP(l_rc, p9_extract_sbe_rc, l_fapi2ProcTarget,
                  l_ret, iv_useSDB, iv_secureModeDisabled);

    // Convert the returnCode into an UNRECOVERABLE error log which we will
    // associate w/ the caller's errlog via plid
    l_errl = rcToErrl(l_rc, ERRORLOG::ERRL_SEV_UNRECOVERABLE);
    this->iv_currentAction = l_ret;

    // Set the instance variable ffdcSetAction to let us
    // know that the current action was not set by what
    // we found in asyncFFDC
    this->iv_ffdcSetAction = false;

    // This call will look at what p9_extact_sbe_rc had set the return action to
    // checks on how many times we have attempted to boot this side,
    // and if we have already tried switching sides
    //
    // Note this call is important, if this is not called we could end up in a
    // endless loop because this enforces MAX_SWITCH_SIDE_COUNT and MAX_SIDE_BOOT_ATTEMPTS
    this->bestEffortCheck();

#ifndef __HOSTBOOT_RUNTIME
    // This could potentially take awhile, reset watchdog
    INITSERVICE::sendProgressCode();
#endif

    if(l_errl)
    {
        SBE_TRACF("Error: sbe_boot_fail_handler : p9_extract_sbe_rc HWP "
                  " returned action %d and errorlog PLID=0x%x, rc=0x%.4X",
                  this->iv_currentAction, l_errl->plid(), l_errl->reasonCode());

        l_errl->collectTrace(SBEIO_COMP_NAME,256);
        l_errl->collectTrace(FAPI_IMP_TRACE_NAME, 256);
        l_errl->collectTrace(FAPI_TRACE_NAME, 384);

        // Capture the target data in the elog
        ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog( l_errl );

        // Set the PLID of the error log to master PLID
        // if the master PLID is set
        updatePlids(l_errl);

        // Commit error log
        errlCommit( l_errl, HWPF_COMP_ID );
    }

    SBE_TRACF(EXIT_MRK "sbe_run_extract_rc() current action is %llx",
                        this->iv_currentAction);
}

void SbeRetryHandler::bestEffortCheck()
{
    // We don't want to accept that there is no recovery action just
    // because that is what extract_rc is telling us. We want to make
    // sure we have tried booting on this seeprom twice, and that we
    // have tried the other seeprom twice as well. If we have tried all of
    // those cases then we will fail out
    if(this->iv_currentAction == P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION)
    {
        if (this->iv_currentSideBootAttempts < MAX_SIDE_BOOT_ATTEMPTS)
        {
            SBE_TRACF("bestEffortCheck(): suggested action was NO_RECOVERY_ACTION but we are trying RESTART_SBE");
            this->iv_currentAction = P9_EXTRACT_SBE_RC::RESTART_SBE;
        }
        else if (this->iv_switchSidesCount < MAX_SWITCH_SIDE_COUNT)
        {
            SBE_TRACF("bestEffortCheck(): suggested action was NO_RECOVERY_ACTION but we are trying REIPL_BKP_SEEPROM");
            this->iv_currentAction = P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM;
        }
        else
        {
            // If we have attempted the max boot attempts on current side
            // and have already switched sides once, then we will accept
            // that we don't know how to recover and pass this status out
        }
    }
    // If we have already switched sides, and extract rc is telling us to
    // switch sides again, there is nothing we can do, so change currentAction
    // to be NO_RECOVERY_ACTION
    else if(this->iv_currentAction == P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM ||
        this->iv_currentAction == P9_EXTRACT_SBE_RC::REIPL_UPD_SEEPROM )
    {
        if (this->iv_switchSidesCount >= MAX_SWITCH_SIDE_COUNT)
        {
            SBE_TRACF("bestEffortCheck(): suggested action was REIPL_BKP_SEEPROM/REIPL_UPD_SEEPROM but that is not possible so changing to NO_RECOVERY_ACTION");
            this->iv_currentAction = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
        }
    }
    // If the extract sbe rc hwp tells us to restart, and we have already
    // done 2 retries on this side, then attempt to switch sides, if we can't
    // switch sides, set currentAction to NO_RECOVERY_ACTION
    else if(this->iv_currentAction == P9_EXTRACT_SBE_RC::RESTART_SBE ||
            this->iv_currentAction == P9_EXTRACT_SBE_RC::RESTART_CBS)
    {
        if (this->iv_currentSideBootAttempts >= MAX_SIDE_BOOT_ATTEMPTS)
        {
            if (this->iv_switchSidesCount >= MAX_SWITCH_SIDE_COUNT)
            {
                SBE_TRACF("bestEffortCheck(): suggested action was RESTART_SBE/RESTART_CBS but no actions possible so changing to NO_RECOVERY_ACTION");
                this->iv_currentAction = P9_EXTRACT_SBE_RC::NO_RECOVERY_ACTION;
            }
            else
            {
                SBE_TRACF("bestEffortCheck(): suggested action was RESTART_SBE/RESTART_CBS but max attempts tried already so changing to REIPL_BKP_SEEPROM");
                this->iv_currentAction = P9_EXTRACT_SBE_RC::REIPL_BKP_SEEPROM;
            }
        }
    }
}

errlHndl_t SbeRetryHandler::switch_sbe_sides(TARGETING::Target * i_target)
{
    SBE_TRACF(ENTER_MRK "switch_sbe_sides()");

    errlHndl_t l_errl = nullptr;

#ifdef __HOSTBOOT_RUNTIME
    const bool l_isRuntime = true;
#else
    const bool l_isRuntime = false;
#endif

    do{

        if(!l_isRuntime && !i_target->getAttr<TARGETING::ATTR_PROC_SBE_MASTER_CHIP>())
        {
            const uint32_t l_sbeBootSelectMask = SBE::SBE_BOOT_SELECT_MASK >> 32;
            // Read PERV_SB_CS_FSI_BYTE 0x2820 for target proc
            uint32_t l_read_reg = 0;
            size_t l_opSize = sizeof(uint32_t);
            l_errl = DeviceFW::deviceOp(
                            DeviceFW::READ,
                            i_target,
                            &l_read_reg,
                            l_opSize,
                            DEVICE_FSI_ADDRESS(PERV_SB_CS_FSI_BYTE) );

            if( l_errl )
            {
                SBE_TRACF( ERR_MRK"switch_sbe_sides: FSI device read "
                        "PERV_SB_CS_FSI_BYTE (0x%.4X), proc target = %.8X, "
                        "RC=0x%X, PLID=0x%lX",
                        PERV_SB_CS_FSI_BYTE, // 0x2820
                        TARGETING::get_huid(i_target),
                        ERRL_GETRC_SAFE(l_errl),
                        ERRL_GETPLID_SAFE(l_errl));
                break;
            }

            // Determine how boot side is currently set
            if(l_read_reg & l_sbeBootSelectMask) // Currently set for Boot Side 1
            {
                // Set Boot Side 0 by clearing bit for side 1
                SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 0 for HUID 0x%08X",
                        iv_switchSidesCount,
                        TARGETING::get_huid(i_target));
                l_read_reg &= ~l_sbeBootSelectMask;
            }
            else // Currently set for Boot Side 0
            {
                // Set Boot Side 1 by setting bit for side 1
                SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 1 for HUID 0x%08X",
                        iv_switchSidesCount,
                        TARGETING::get_huid(i_target));
                l_read_reg |= l_sbeBootSelectMask;
            }

            // Write updated PERV_SB_CS_FSI 0x2820 back into target proc
            l_errl = DeviceFW::deviceOp(
                            DeviceFW::WRITE,
                            i_target,
                            &l_read_reg,
                            l_opSize,
                            DEVICE_FSI_ADDRESS(PERV_SB_CS_FSI_BYTE) );
            if( l_errl )
            {
                SBE_TRACF( ERR_MRK"switch_sbe_sides: FSI device write "
                        "PERV_SB_CS_FSI_BYTE (0x%.4X), proc target = %.8X, "
                        "RC=0x%X, PLID=0x%lX",
                        PERV_SB_CS_FSI_BYTE, // 0x2820
                        TARGETING::get_huid(i_target),
                        ERRL_GETRC_SAFE(l_errl),
                        ERRL_GETPLID_SAFE(l_errl));
                break;
            }
        }
        else
        {
            // Read PERV_SB_CS_SCOM 0x50008 for target proc
            uint64_t l_read_reg = 0;
            size_t l_opSize = sizeof(uint64_t);
            l_errl = DeviceFW::deviceOp(
                            DeviceFW::READ,
                            i_target,
                            &l_read_reg,
                            l_opSize,
                            DEVICE_SCOM_ADDRESS(PERV_SB_CS_SCOM) );

            if( l_errl )
            {
                SBE_TRACF( ERR_MRK"switch_sbe_sides: SCOM device read "
                        "PERV_SB_CS_SCOM (0x%.4X), proc target = %.8X, "
                        "RC=0x%X, PLID=0x%lX",
                        PERV_SB_CS_SCOM, // 0x50008
                        TARGETING::get_huid(i_target),
                        ERRL_GETRC_SAFE(l_errl),
                        ERRL_GETPLID_SAFE(l_errl));
                break;
            }

            // Determine how boot side is currently set
            if(l_read_reg & SBE::SBE_BOOT_SELECT_MASK) // Currently set for Boot Side 1
            {
                // Set Boot Side 0 by clearing bit for side 1
                SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 0 for HUID 0x%08X",
                        iv_switchSidesCount,
                        TARGETING::get_huid(i_target));
                l_read_reg &= ~SBE::SBE_BOOT_SELECT_MASK;
            }
            else // Currently set for Boot Side 0
            {
                // Set Boot Side 1 by setting bit for side 1
                SBE_TRACF( "switch_sbe_sides #%d: Set Boot Side 1 for HUID 0x%08X",
                        iv_switchSidesCount,
                        TARGETING::get_huid(i_target));
                l_read_reg |= SBE::SBE_BOOT_SELECT_MASK;
            }

            // Write updated PERV_SB_CS_SCOM 0x50008 back into target proc
            l_errl = DeviceFW::deviceOp(
                            DeviceFW::WRITE,
                            i_target,
                            &l_read_reg,
                            l_opSize,
                            DEVICE_SCOM_ADDRESS(PERV_SB_CS_SCOM) );
            if( l_errl )
            {
                SBE_TRACF( ERR_MRK"switch_sbe_sides: SCOM device write "
                        "PERV_SB_CS_SCOM (0x%.4X), proc target = %.8X, "
                        "RC=0x%X, PLID=0x%lX",
                        PERV_SB_CS_SCOM, // 0x50008
                        TARGETING::get_huid(i_target),
                        ERRL_GETRC_SAFE(l_errl),
                        ERRL_GETPLID_SAFE(l_errl));
                break;
            }
        }

        // Increment switch sides count
        ++(this->iv_switchSidesCount);

        SBE_TRACF("switch_sbe_sides(): iv_switchSidesCount has been incremented to %llx",
                   iv_switchSidesCount);

        // Since we just switched sides, and we havent attempted a boot yet,
        // set the current attempts for this side to be 0
        this->iv_currentSideBootAttempts = 0;
    }while(0);

    if (l_errl)
    {
        // Set the PLID of the error log to master PLID
        // if the master PLID is set
        updatePlids(l_errl);
    }

    SBE_TRACF(EXIT_MRK "switch_sbe_sides()");
    return l_errl;
}

} // End of namespace SBEIO
OpenPOWER on IntegriCloud