summaryrefslogtreecommitdiffstats
path: root/src/import/generic/memory/lib/ccs/ccs.H
blob: aafb0826e46b6eeffed3826446b853b6b3f032fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/* $Source: src/import/generic/memory/lib/ccs/ccs.H $                     */
/*                                                                        */
/* OpenPOWER HostBoot Project                                             */
/*                                                                        */
/* Contributors Listed Below - COPYRIGHT 2019                             */
/* [+] International Business Machines Corp.                              */
/*                                                                        */
/*                                                                        */
/* Licensed under the Apache License, Version 2.0 (the "License");        */
/* you may not use this file except in compliance with the License.       */
/* You may obtain a copy of the License at                                */
/*                                                                        */
/*     http://www.apache.org/licenses/LICENSE-2.0                         */
/*                                                                        */
/* Unless required by applicable law or agreed to in writing, software    */
/* distributed under the License is distributed on an "AS IS" BASIS,      */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or        */
/* implied. See the License for the specific language governing           */
/* permissions and limitations under the License.                         */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */

///
/// @file ccs.H
/// @brief Run and manage the CCS engine
///
// *HWP HWP Owner: Matthew Hickman <Matthew.Hickman@ibm.com>
// *HWP HWP Backup: Andre Marin <aamarin@us.ibm.com>
// *HWP Team: Memory
// *HWP Level: 3
// *HWP Consumed by: HB:FSP

#ifndef _MSS_CCS_H_
#define _MSS_CCS_H_

#include <fapi2.H>

#include <generic/memory/lib/utils/poll.H>
#include <generic/memory/lib/utils/buffer_ops.H>
#include <generic/memory/lib/utils/index.H>
#include <generic/memory/lib/utils/pos.H>
#include <generic/memory/lib/utils/find.H>
#include <generic/memory/lib/utils/shared/mss_generic_consts.H>
#include <generic/memory/lib/ccs/ccs_traits.H>

namespace mss
{

static constexpr uint64_t CKE_HIGH = 0b1111;
static constexpr uint64_t CKE_LOW  = 0b0000;

// CKE setup for rank 0-7 to support
// Currently only support 0, 1, 4, 5
// Not supported ranks will always get 0
// For self_refresh_entry_command()
static constexpr uint64_t CKE_ARY_SRE[]  =
{
    //   0,      1, 2, 3,
    0b0111, 0b1011, 0, 0,
    //   4,      5, 6, 7
    0b0111, 0b1011, 0, 0
};

// For self_refresh_exit_command()
static constexpr uint64_t CKE_ARY_SRX[]  =
{
    //   0,      1, 2, 3,
    0b1000, 0b0100, 0, 0,
    //   4,      5, 6, 7
    0b1000, 0b0100, 0, 0
};

namespace ccs
{

enum rank_configuration
{
    DUAL_DIRECT = 0,
    QUAD_ENCODED = 1,
    // Note: we don't include QUAD_DIRECT in here
    // That's because it uses 4 CS and is board wiring dependent
    // Not sure if it would use CS23 or CID01 for CS2/3
};

///
/// @class instruction_t
/// @brief Class for ccs instructions
/// @tparam T fapi2::TargetType representing the target of the CCS instructions
/// @note A ccs instruction is data (array 0) and some control information (array 1)cc
///
class instruction_t
{
    private:
        using TT = ccsTraits<DEFAULT_MC_TYPE>;

    public:
        fapi2::buffer<uint64_t> arr0;
        fapi2::buffer<uint64_t> arr1;
        // The MCA indexed rank on which to operate. If this is invalid, all ranks will be disabled
        uint64_t iv_rank;
        // We want to have a switch to update rank or not. A user might want to setup CS in some weird way
        // In that case, they don't want us "fixing" their CS values
        // We'll default the rank to be updated - we want to send out CS properly
        bool iv_update_rank;

        ///
        /// @brief intstruction_t ctor
        /// @param[in] i_rank the rank this instruction is headed for
        /// @param[in] i_arr0 the initial value for arr0, defaults to 0
        /// @param[in] i_arr1 the initial value for arr1, defaults to 0
        /// @param[in] i_update_rank true if the rank should be updated before being sent, defaults to true
        ///
        instruction_t( const uint64_t i_rank = NO_CHIP_SELECT_ACTIVE,
                       const fapi2::buffer<uint64_t> i_arr0 = 0,
                       const fapi2::buffer<uint64_t> i_arr1 = 0,
                       const bool i_update_rank = true):
            arr0(i_arr0),
            arr1(i_arr1),
            iv_rank(i_rank),
            iv_update_rank(i_update_rank)
        {
            // Skip setting up the rank if the user doesn't want us to
            if(iv_update_rank)
            {
                // Set the chip selects to be 1's (not active)
                // We'll fix these up before executing the instructions
                arr0.insertFromRight<TT::ARR0_DDR_CSN_0_1,
                                     TT::ARR0_DDR_CSN_0_1_LEN>(0b11);
                arr0.insertFromRight<TT::ARR0_DDR_CSN_2_3,
                                     TT::ARR0_DDR_CSN_2_3_LEN>(0b11);
            }
        }

        ///
        /// @brief Updates the rank based upon the passed in rank configuration encoding
        /// @param[in] i_target the port target for this instruction - for error logging
        /// @param[in] i_rank_config the rank configuration
        /// @return fapi2::ReturnCode fapi2::FAPI2_RC_SUCCESS if ok
        ///
        fapi2::ReturnCode configure_rank(const fapi2::Target<TT::PORT_TARGET_TYPE>& i_target,
                                         const rank_configuration i_rank_config )
        {
            // If this instrunction is set to not update the rank, then don't update the rank
            if(!iv_update_rank)
            {
                return fapi2::FAPI2_RC_SUCCESS;
            }

            // Regardless of rank configurations, if we have NO_CHIP_SELECT_ACTIVE, deactivate all CS
            if(iv_rank == NO_CHIP_SELECT_ACTIVE)
            {
                arr0.insertFromRight<TT::ARR0_DDR_CSN_0_1, TT::ARR0_DDR_CSN_0_1_LEN>(0b11);
                arr0.insertFromRight<TT::ARR0_DDR_CSN_2_3, TT::ARR0_DDR_CSN_2_3_LEN>(0b11);
                return fapi2::FAPI2_RC_SUCCESS;
            }

            // First, check rank - we need to make sure that we have a valid rank
            FAPI_ASSERT(iv_rank < TT::CCS_MAX_MRANK_PER_PORT,
                        fapi2::MSS_INVALID_RANK()
                        .set_MCA_TARGET(i_target)
                        .set_RANK(iv_rank)
                        .set_FUNCTION(ffdc_function_codes::CCS_INST_CONFIGURE_RANK),
                        "%s rank out of bounds rank%u", mss::c_str(i_target), iv_rank);

            // Now the fun happens and we can deal with the actual encoding

            // If we're quad mode, setup the encoding accordingly
            if(i_rank_config == rank_configuration::QUAD_ENCODED)
            {
                // CS 0/1 are first, while CID0/1 are second
                // In quad enabled mode, CID acts as a "package select"
                // It selects R0/2 vs R1/3
                // CS0 vs CS1 selects the low vs high rank in the package
                // CS0 will select rank 0/1
                // CS1 will select rank 2/3

                const auto l_dimm_rank = mss::index(iv_rank);
                const bool l_is_dimm0 = iv_rank < TT::CCS_MAX_RANK_PER_DIMM;
                constexpr uint64_t NON_DIMM_CS = 0b11;

                // Assigns the CS based upon which DIMM we're at
                const auto CS01 = l_is_dimm0 ? TT::CS_N[l_dimm_rank].first : NON_DIMM_CS;
                const auto CS23 = l_is_dimm0 ? NON_DIMM_CS : TT::CS_N[l_dimm_rank].first;

                // Setup that rank
                arr0.insertFromRight<TT::ARR0_DDR_CSN_0_1,
                                     TT::ARR0_DDR_CSN_0_1_LEN>(CS01);
                arr0.insertFromRight<TT::ARR0_DDR_CSN_2_3,
                                     TT::ARR0_DDR_CSN_2_3_LEN>(CS23);
                arr0.insertFromRight<TT::ARR0_DDR_CID_0_1,
                                     TT::ARR0_DDR_CID_0_1_LEN>(TT::CS_N[l_dimm_rank].second);
            }

            // Otherwise, setup for dual-direct mode (our only other supported mode at the moment)
            else
            {
                const auto l_dimm_rank = mss::index(iv_rank);
                const bool l_is_dimm0 = iv_rank < TT::CCS_MAX_RANK_PER_DIMM;

                // Assigns the CS based upon which DIMM we're at
                const auto CS01 = l_is_dimm0 ? TT::CS_ND[l_dimm_rank].first : TT::CS_ND[l_dimm_rank].second;
                const auto CS23 = l_is_dimm0 ? TT::CS_ND[l_dimm_rank].second : TT::CS_ND[l_dimm_rank].first;

                // Setup that rank
                arr0.insertFromRight<TT::ARR0_DDR_CSN_0_1,
                                     TT::ARR0_DDR_CSN_0_1_LEN>(CS01);
                arr0.insertFromRight<TT::ARR0_DDR_CSN_2_3,
                                     TT::ARR0_DDR_CSN_2_3_LEN>(CS23);

                // Check that we don't have a rank out of bounds case here
                // We can only have that if
                // 1) we are DIMM1
                // 2) our DIMM rank is greater than the maximum allowed number of ranks on DIMM1
                // So, we pass always if we're DIMM0, or if our DIMM rank is less than the maximum number of DIMM's on rank 1
                FAPI_ASSERT(l_dimm_rank < TT::CCS_MAX_RANKS_DIMM1 || l_is_dimm0,
                            fapi2::MSS_INVALID_RANK()
                            .set_MCA_TARGET(i_target)
                            .set_RANK(iv_rank)
                            .set_FUNCTION(ffdc_function_codes::CCS_INST_CONFIGURE_RANK),
                            "%s rank out of bounds rank%u", mss::c_str(i_target), iv_rank);
            }

            return fapi2::FAPI2_RC_SUCCESS;
        fapi_try_exit:
            return fapi2::current_err;
        }

        ///
        /// @brief Equals comparison operator
        /// @param[in] i_rhs - the instruction to compare to
        /// @return True if both instructions are equal
        ///
        inline bool operator==( const instruction_t& i_rhs ) const
        {
            return arr0 == i_rhs.arr0 &&
                   arr1 == i_rhs.arr1 &&
                   iv_rank == i_rhs.iv_rank &&
                   iv_update_rank == i_rhs.iv_update_rank;
        }
};

///
/// @brief Determines our rank configuration type
/// @param[in] i_target the MCA target on which to operate
/// @param[out] o_rank_config the rank configuration
/// @return fapi2::ReturnCode fapi2::FAPI2_RC_SUCCESS if ok
///
inline fapi2::ReturnCode get_rank_config(const fapi2::Target<DEFAULT_MEM_PORT_TARGET>& i_target,
        rank_configuration& o_rank_config)
{
    typedef ccsTraits<DEFAULT_MC_TYPE> TT;
    constexpr uint8_t QUAD_RANK_ENABLE = 4;
    o_rank_config = rank_configuration::DUAL_DIRECT;

    uint8_t l_num_master_ranks[MAX_DIMM_PER_PORT] = {};
    FAPI_TRY(TT::get_rank_config_attr(i_target, l_num_master_ranks));

    // We only need to check DIMM0
    // Our number of ranks should be the same between DIMM's 0/1
    // Check if we have the right number for encoded mode
    o_rank_config = l_num_master_ranks[0] == QUAD_RANK_ENABLE ?
                    rank_configuration::QUAD_ENCODED :
                    rank_configuration::DUAL_DIRECT;

fapi_try_exit:
    return fapi2::current_err;
}

///
/// @brief Determines our rank configuration type across all ports
/// @param[in] i_target the MCA target on which to operate
/// @param[out] o_rank_config the rank configuration
/// @return fapi2::ReturnCode fapi2::FAPI2_RC_SUCCESS if ok
///
inline fapi2::ReturnCode get_rank_config(const fapi2::Target<DEFAULT_MC_TARGET>& i_target,
        std::vector<rank_configuration>& o_rank_config)
{
    typedef ccsTraits<DEFAULT_MC_TYPE> TT;

    o_rank_config.clear();
    // Create one per port, we then use relative indexing to get us the number we need
    o_rank_config = std::vector<rank_configuration>(TT::PORTS_PER_MC_TARGET);

    for(const auto& l_port : mss::find_targets<DEFAULT_MEM_PORT_TARGET>(i_target))
    {
        rank_configuration l_config;
        FAPI_TRY(get_rank_config(l_port, l_config));
        o_rank_config[mss::relative_pos<DEFAULT_MC_TARGET>(l_port)] = l_config;
    }

    return fapi2::FAPI2_RC_SUCCESS;
fapi_try_exit:
    return fapi2::current_err;
}

///
/// @brief A class representing a series of CCS instructions, and the
/// CCS engine parameters associated with running the instructions
/// @tparam T fapi2::TargetType  representing the fapi2 target which
/// @tparam P fapi2::TargetType representing the port
/// contains the CCS engine
class program
{
    private:
        using TT = ccsTraits<DEFAULT_MC_TYPE>;

    public:
        // Setup our poll parameters so the CCS executer can see
        // whether to use the delays in the instruction stream or not
        program(): iv_poll(0, 0)
        {}

        // Vector of instructions
        std::vector< instruction_t > iv_instructions;
        poll_parameters                 iv_poll;

        // Vector of polling probes
        std::vector< poll_probe<TT::PORT_TARGET_TYPE> >    iv_probes;
};

///
/// @brief Common setup for all MRS/RCD instructions
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in,out] i_arr0 fapi2::buffer<uint64_t> representing the ARR0 of the instruction
///
static void mrs_rcd_helper( fapi2::buffer<uint64_t>& i_arr0 )
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    //
    // Generic DDR4 MRS setup (RCD is an MRS)
    //
    // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. BRS
    i_arr0.insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(CKE_HIGH);

    // ACT is high
    i_arr0.setBit<TT::ARR0_DDR_ACTN>();

    // RAS, CAS, WE low
    i_arr0.clearBit<TT::ARR0_DDR_ADDRESS_16>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_15>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_14>();
}

///
/// @brief Setup activate command instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the DIMM this instruction is headed for
/// @param[in] i_rank the rank on this dimm
///
inline instruction_t act_command( const uint64_t i_rank )
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> l_boilerplate_arr0;
    fapi2::buffer<uint64_t> l_boilerplate_arr1;

    // Set all CKE to high
    l_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(CKE_HIGH);

    // ACT is high
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_ACTN>();

    // RAS low, CAS low, WE low
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_ADDRESS_16>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_15>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_14>();

    // Just leaving the row addresses to all 0 for now
    // row, bg, ba set to 0
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_ADDRESS_17>();
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_ADDRESS_0_13, TT::ARR0_DDR_ADDRESS_0_13_LEN>();
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_BANK_GROUP_1>();
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_BANK_GROUP_0>();
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_BANK_0_1, TT::ARR0_DDR_BANK_0_1_LEN>();
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_BANK_2>();

    return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1);
}

///
/// @brief Create, initialize an RCD (RCW - JEDEC) CCS command
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the DIMM this instruction is headed for
/// @param[in] i_turn_on_cke flag that states whether we want CKE on for this RCW (defaulted to true)
/// @return the RCD CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
inline instruction_t rcd_command( const fapi2::Target<fapi2::TARGET_TYPE_DIMM>& i_target,
                                  const bool i_sim,
                                  const bool i_turn_on_cke = true)
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> rcd_boilerplate_arr0;
    fapi2::buffer<uint64_t> rcd_boilerplate_arr1;

    //
    // Generic DDR4 MRS setup (RCD is an MRS)
    //
    mrs_rcd_helper(rcd_boilerplate_arr0);

    // Not adding i_turn_on_cke in the mrs_rcd helper because we only need this
    // for RCWs and there is no need to complicate/change the MRS cmd API with
    // uneeded functionality. Little duplication, but this isolates the change.
    if( !i_sim )
    {
        const uint64_t l_cke = i_turn_on_cke ? CKE_HIGH : CKE_LOW;
        rcd_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(l_cke);
    }

    //
    // RCD setup
    //
    // DDR4: Set BG1 to 0 during an MRS.
    // BG0, BA1:BA0 to 0b111 selects RCW (aka MR7).
    rcd_boilerplate_arr0.clearBit<TT::ARR0_DDR_BANK_GROUP_1>()
    .template insertFromRight<TT::ARR0_DDR_BANK_0_1, TT::ARR0_DDR_BANK_0_1_LEN>(0b11)
    .template setBit<TT::ARR0_DDR_BANK_GROUP_0>();

    // RCD always goes to the 0th rank on the DIMM; either 0 or 4.
    return instruction_t((mss::index(i_target) == 0) ? 0 : 4, rcd_boilerplate_arr0, rcd_boilerplate_arr1);
}

///
/// @brief Create, initialize an MRS CCS command
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the DIMM this instruction is headed for
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_mrs the specific MRS
/// @return the MRS CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
inline instruction_t mrs_command ( const uint64_t i_rank,
                                   const uint64_t i_mrs )
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> rcd_boilerplate_arr0;
    fapi2::buffer<uint64_t> rcd_boilerplate_arr1;
    fapi2::buffer<uint8_t> mrs(i_mrs);

    //
    // Generic DDR4 MRS setup (RCD is an MRS)
    //
    mrs_rcd_helper(rcd_boilerplate_arr0);

    //
    // MRS setup
    //
    // DDR4: Set BG1 to 0. BG0, BA1:BA0 to i_mrs
    rcd_boilerplate_arr0.clearBit<TT::ARR0_DDR_BANK_GROUP_1>();
    mss::swizzle<TT::ARR0_DDR_BANK_0_1, 3, 7>(mrs, rcd_boilerplate_arr0);
    FAPI_DBG("mrs rcd boiler 0x%016lx 0x%llx", uint8_t(mrs), uint64_t(rcd_boilerplate_arr0));
    return instruction_t(i_rank, rcd_boilerplate_arr0, rcd_boilerplate_arr1);
}

///
/// @brief Create, initialize a JEDEC Device Deselect CCS command
/// @param[in] i_idle the idle time to the next command (default to 0)
/// @return the Device Deselect CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
inline instruction_t des_command(const uint16_t i_idle = 0)
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> rcd_boilerplate_arr0;
    fapi2::buffer<uint64_t> rcd_boilerplate_arr1;

    // ACT is high. It's a no-care in the spec but it seems to raise questions when
    // people look at the trace, so lets set it high.
    rcd_boilerplate_arr0.setBit<TT::ARR0_DDR_ACTN>();

    // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. BRS
    rcd_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(CKE_HIGH);

    // Insert idle
    rcd_boilerplate_arr1.template insertFromRight<TT::ARR1_IDLES, TT::ARR1_IDLES_LEN>( i_idle );

    // ACT is high no-care
    // RAS, CAS, WE no-care

    // Device Deslect wants CS_n always high (select nothing using rank NO_CHIP_SELECT_ACTIVE)
    return instruction_t( NO_CHIP_SELECT_ACTIVE,
                          rcd_boilerplate_arr0,
                          rcd_boilerplate_arr1);
}

///
/// @brief Converts an ODT attribute to CCS array input
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_attr_value ODT attribute value
/// @return CCS value for the ODT's
///
inline uint8_t convert_odt_attr_to_ccs(const fapi2::buffer<uint8_t>& i_attr_value)
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    // ODT value buffer
    fapi2::buffer<uint8_t> l_ccs_value;
    l_ccs_value.template writeBit<TT::CCS_ODT_DIMM0_R0>(i_attr_value.template getBit<TT::ATTR_ODT_DIMM0_R0>())
    .template writeBit<TT::CCS_ODT_DIMM0_R1>(i_attr_value.template getBit<TT::ATTR_ODT_DIMM0_R1>())
    .template writeBit<TT::CCS_ODT_DIMM0_R0>(i_attr_value.template getBit<TT::ATTR_ODT_DIMM0_R0>())
    .template writeBit<TT::CCS_ODT_DIMM1_R0>(i_attr_value.template getBit<TT::ATTR_ODT_DIMM1_R0>())
    .template writeBit<TT::CCS_ODT_DIMM1_R1>(i_attr_value.template getBit<TT::ATTR_ODT_DIMM1_R1>());

    return uint8_t(l_ccs_value);
}

///
/// @brief Create, initialize an ODT CCS command
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_odt_values CCS defined ODT values
/// @param[in] i_cycles the number of cycles to hold the ODT for - defaults to DEFAULT_ODT_CYCLE_LEN
/// @return the Device Deselect CCS instruction
/// @note This technically is not a JEDEC command, but is needed for CCS to hold the ODT cycles
/// CCS by design does not repeat or latch ODT's appropriately
/// As such, it's up to the programmers to hold the ODT's appropriately
/// This "command" will greatly help us do that
///
template< typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline instruction_t odt_command(const uint8_t i_odt_values, const uint64_t i_cycles = TT::DEFAULT_ODT_CYCLE_LEN)
{
    auto l_odt_cmd = des_command();
    l_odt_cmd.arr0.template insertFromRight<TT::ARR0_DDR_ODT, TT::ARR0_DDR_ODT_LEN>(i_odt_values);
    l_odt_cmd.arr1.template insertFromRight<TT::ARR1_REPEAT_CMD_CNT, TT::ARR1_REPEAT_CMD_CNT_LEN>(i_cycles);

    return l_odt_cmd;
}


///
/// @brief Create, initialize a NTTM read CCS command
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @return the Device Deselect CCS instruction
/// @note need to setup 4 cycles delay
///
inline instruction_t nttm_read_command()
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    // get the des_command
    auto l_command = des_command();
    // set to CCS_INST_ARR1 register
    l_command.arr1.template setBit<TT::NTTM_MODE_FORCE_READ>();
    l_command.arr1.template insertFromRight<TT::ARR1_IDLES, TT::ARR1_IDLES_LEN>(TT::NTTM_READ_DELAY);

    return l_command;
}

///
/// @brief Create, initialize a JEDEC Device Power Down Entry CCS command
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @return the Device Deselect CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
inline instruction_t pde_command()
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> rcd_boilerplate_arr0;
    fapi2::buffer<uint64_t> rcd_boilerplate_arr1;

    // Power Down Entry just like a DES, but we set CKE low
    instruction_t l_inst = des_command();

    // CKE is low. Note: P8 set all 4 of these low - not sure if that's correct.
    l_inst.arr0.template insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(CKE_LOW);

    l_inst.arr1.template insertFromRight<TT::ARR1_IDLES, TT::ARR1_IDLES_LEN>( TT::TIMING_TCPDED );

    return l_inst;
}

///
/// @brief Setup ZQ Long instruction
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_idle the idle time to the next command (default to 0)
/// @return the MRS CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
inline instruction_t zqcl_command( const uint64_t i_rank,
                                   const uint16_t i_idle = 0 )
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> l_boilerplate_arr0;
    fapi2::buffer<uint64_t> l_boilerplate_arr1;

    // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. BRS
    l_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(CKE_HIGH);

    // ACT is high
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ACTN>();

    // RAS/CAS high, WE low
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ADDRESS_16>()
    .template setBit<TT::ARR0_DDR_ADDRESS_15>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_14>();

    // ADDR10/AP is high
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ADDRESS_10>();

    // Insert idle
    l_boilerplate_arr1.template insertFromRight<TT::ARR1_IDLES, TT::ARR1_IDLES_LEN>( i_idle );

    return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1);
}

///
/// @brief Setup read command helper function
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_bank_addr bank address bits [BG0:BG1] = [62:63] (right aligned)
/// @param[in] i_bank_group_addr bank group address bits [BA0:BA1] = [62:63] (right aligned)
/// @param[in] i_column_addr column address bits [A0:A9] = [54:63] (right aligned)
/// @return the read command CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
static fapi2::buffer<uint64_t> read_cmd_boilerplate( const uint64_t i_rank,
        const fapi2::buffer<uint64_t>& i_bank_addr = 0,
        const fapi2::buffer<uint64_t>& i_bank_group_addr = 0,
        const fapi2::buffer<uint64_t>& i_column_addr = 0)
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    // TODO - RTC 166175 Encapsulate command truth table in a subclass for ccs.H
    fapi2::buffer<uint64_t> l_boilerplate_arr0;

    // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. AAM
    l_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_CKE,
                                       TT::ARR0_DDR_CKE_LEN>(CKE_HIGH);

    // ACT is high
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ACTN>();

    // RAS high, CAS low, WE high
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ADDRESS_16>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_15>()
    .template setBit<TT::ARR0_DDR_ADDRESS_14>();

    l_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_BANK_0_1,
                                       TT::ARR0_DDR_BANK_0_1_LEN>(i_bank_addr);

    // Bank Group takes a little effort - the bits aren't contiguous
    constexpr uint64_t BG0_BIT = 62;
    constexpr uint64_t BG1_BIT = 63;

    l_boilerplate_arr0.writeBit<TT::ARR0_DDR_BANK_GROUP_0>(i_bank_group_addr.getBit<BG0_BIT>())
    .template writeBit<TT::ARR0_DDR_BANK_GROUP_1>(i_bank_group_addr.getBit<BG1_BIT>());

    // CA is A[0:9]
    l_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_ADDRESS_0_9,
                                       TT::ARR0_DDR_ADDRESS_0_9_LEN>(i_column_addr);

    return l_boilerplate_arr0;
}

///
/// @brief Setup write command (Fixed BL8 or BC4) instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_bank_addr bank address bits [BA0:BA1] = [62:63] (right aligned)
/// @param[in] i_bank_group_addr bank group address bits [BG0:BG1] = [62:63] (right aligned)
/// @param[in] i_column_addr column address bits [A0:A9] = [54:63] (right aligned)
/// @return the write command CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
inline instruction_t wr_command( const uint64_t i_rank,
                                 const fapi2::buffer<uint64_t>& i_bank_addr = 0,
                                 const fapi2::buffer<uint64_t>& i_bank_group_addr = 0,
                                 const fapi2::buffer<uint64_t>& i_column_addr = 0)
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    // WR's and RD's are very similar, so we just use the RD command boiler plate and modify the command to a WR
    fapi2::buffer<uint64_t> l_boilerplate_arr0 = read_cmd_boilerplate(i_rank,
            i_bank_addr,
            i_bank_group_addr,
            i_column_addr);
    fapi2::buffer<uint64_t> l_boilerplate_arr1;

    // RAS high, CAS low, WE low
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ADDRESS_16>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_15>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_14>();

    return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1);
}

///
/// @brief Setup read command (Fixed BL8 or BC4) instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_bank_addr bank address bits [BA0:BA1] = [62:63] (right aligned)
/// @param[in] i_bank_group_addr bank group address bits [BG0:BG1] = [62:63] (right aligned)
/// @param[in] i_column_addr column address bits [A0:A9] = [54:63] (right aligned)
/// @return the read command CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
inline instruction_t rd_command( const uint64_t i_rank,
                                 const fapi2::buffer<uint64_t>& i_bank_addr = 0,
                                 const fapi2::buffer<uint64_t>& i_bank_group_addr = 0,
                                 const fapi2::buffer<uint64_t>& i_column_addr = 0)
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> l_boilerplate_arr0;
    fapi2::buffer<uint64_t> l_boilerplate_arr1;

    l_boilerplate_arr0 = read_cmd_boilerplate(i_rank,
                         i_bank_addr,
                         i_bank_group_addr,
                         i_column_addr);

    // Setup ADDR10/AP based on read type
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_ADDRESS_10>();

    return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1);
}

///
/// @brief Setup read w/auto precharge command (Fixed BL8 or BC4) instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_bank_addr bank address bits [BG0:BG1] = [62:63] (right aligned)
/// @param[in] i_bank_group_addr bank group address bits [BA0:BA1] = [62:63] (right aligned)
/// @param[in] i_column_addr column address bits [A0:A9] = [54:63] (right aligned)
/// @return the read command CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
inline instruction_t rda_command( const uint64_t i_rank,
                                  const fapi2::buffer<uint64_t>& i_bank_addr = 0,
                                  const fapi2::buffer<uint64_t>& i_bank_group_addr = 0,
                                  const fapi2::buffer<uint64_t>& i_column_addr = 0)
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> l_boilerplate_arr0;
    fapi2::buffer<uint64_t> l_boilerplate_arr1;

    l_boilerplate_arr0 = read_cmd_boilerplate(i_rank,
                         i_bank_addr,
                         i_bank_group_addr,
                         i_column_addr);

    // Setup ADDR10/AP based on read type
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ADDRESS_10>();

    return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1);
}

///
/// @brief Setup precharge all banks command instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_idle the idle time to the next command (default to 0)
/// @return the precharge all banks command CCS instruction
/// @note THIS IS DDR4 ONLY RIGHT NOW. We can (and possibly should) specialize this
/// for the controller (Nimbus v Centaur) and then correct for DRAM generation (not included
/// in this template definition)
///
inline instruction_t precharge_all_command( const uint64_t i_rank,
        const uint16_t i_idle = 0 )
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> l_boilerplate_arr0;
    fapi2::buffer<uint64_t> l_boilerplate_arr1;

    // CKE is high Note: P8 set all 4 of these high - not sure if that's correct. AAM
    l_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(CKE_HIGH);

    // ACT is high
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ACTN>();

    // RAS low, CAS high, WE low
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_ADDRESS_16>()
    .template setBit<TT::ARR0_DDR_ADDRESS_15>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_14>();

    // Setup ADDR10/AP high
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ADDRESS_10>();

    // Insert idle
    l_boilerplate_arr1.template insertFromRight<TT::ARR1_IDLES, TT::ARR1_IDLES_LEN>( i_idle );

    // From DDR4 Spec table 17:
    // All other bits from the command truth table or 'V', for valid (1 or 0)

    return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1);
}

///
/// @brief Setup self-refresh entry command instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_idle the idle time to the next command (default to 0)
/// @return the self-refresh entry command CCS instruction
/// @note THIS IS FOR DDR4 NON-LRDIMM ONLY RIGHT NOW
///
inline instruction_t self_refresh_entry_command( const uint64_t i_rank, const uint16_t i_idle = 0 )
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> l_boilerplate_arr0;
    fapi2::buffer<uint64_t> l_boilerplate_arr1;

    // Set all CKE to high except the rank passed in
    l_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(CKE_ARY_SRE[i_rank]);

    // ACT is high
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ACTN>();

    // RAS low, CAS low, WE high
    l_boilerplate_arr0.clearBit<TT::ARR0_DDR_ADDRESS_16>()
    .template clearBit<TT::ARR0_DDR_ADDRESS_15>()
    .template setBit<TT::ARR0_DDR_ADDRESS_14>();

    // Insert idle
    l_boilerplate_arr1.template insertFromRight<TT::ARR1_IDLES, TT::ARR1_IDLES_LEN>( i_idle );

    // From DDR4 Spec table 17:
    // All other bits from the command truth table are 'V', for valid (1 or 0)

    return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1);
}

///
/// @brief Setup self-refresh exit using NOP command instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_idle the idle time to the next command (default to 0)
/// @return the self-refresh exit command CCS instruction
/// @note Using NOP in case SDRAM is in gear down mode and max power saving mode exit
/// @note THIS IS FOR DDR4 NON-LRDIMM ONLY RIGHT NOW
///
inline instruction_t self_refresh_exit_command( const uint64_t i_rank, const uint16_t i_idle = 0 )
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    fapi2::buffer<uint64_t> l_boilerplate_arr0;
    fapi2::buffer<uint64_t> l_boilerplate_arr1;

    // Set all CKE to low except the rank passed in
    l_boilerplate_arr0.insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(CKE_ARY_SRX[i_rank]);

    // ACT is high
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ACTN>();

    // RAS high, CAS high, WE high
    l_boilerplate_arr0.setBit<TT::ARR0_DDR_ADDRESS_16>()
    .template setBit<TT::ARR0_DDR_ADDRESS_15>()
    .template setBit<TT::ARR0_DDR_ADDRESS_14>();

    // Insert idle
    l_boilerplate_arr1.template insertFromRight<TT::ARR1_IDLES, TT::ARR1_IDLES_LEN>( i_idle );

    // From DDR4 Spec table 17:
    // All other bits from the command truth table are 'V', for valid (1 or 0)

    return instruction_t(i_rank, l_boilerplate_arr0, l_boilerplate_arr1);
}

///
/// @brief Setup refresh command instruction
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the DIMM this instruction is headed for
/// @param[in] i_rank the rank on this dimm
/// @param[in] i_idle the idle time to the next command (default to 0)
/// @return the self-refresh entry command CCS instruction
/// @note THIS IS FOR DDR4 NON-LRDIMM ONLY RIGHT NOW
///
inline instruction_t refresh_command( const uint64_t i_rank, const uint16_t i_idle = 0 )
{
    using TT = ccsTraits<DEFAULT_MC_TYPE>;

    // Refresh is self-refresh entry with CKE high
    auto l_refresh_template = self_refresh_entry_command(i_rank, i_idle);

    // CKE is high
    l_refresh_template.arr0.template insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(CKE_HIGH);

    return l_refresh_template;
}

//
// These functions are a little sugar to keep callers from doing the traits-dance to get the
// appropriate bit field
//

///
/// @brief Select the port(s) to be used by the CCS
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the target to effect
/// @param[in] i_ports the buffer representing the ports
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline fapi2::ReturnCode select_ports( const fapi2::Target<T>& i_target, uint64_t i_ports)
{
    fapi2::buffer<uint64_t> l_data;
    fapi2::buffer<uint64_t> l_ports;

    // Not handling multiple ports here, can't do that for CCS. BRS
    FAPI_TRY( l_ports.setBit(i_ports) );

    FAPI_TRY( mss::getScom(i_target, TT::MCB_CNTL_REG, l_data) );
    l_data.insert<TT::MCB_CNTL_PORT_SEL, TT::MCB_CNTL_PORT_SEL_LEN>(l_ports);
    FAPI_TRY( mss::putScom(i_target, TT::MCB_CNTL_REG, l_data) );

fapi_try_exit:
    return fapi2::current_err;
}

///
/// @brief User sets to a '1'b to tell the Hdw to stop CCS whenever failure occurs. When a
///        '0'b, Hdw will continue CCS even if a failure occurs.
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in]  the target to effect
/// @param[in,out] io_buffer the buffer representing the mode register
/// @param[in] i_value true iff stop whenever failure occurs.
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline void stop_on_err( const fapi2::Target<T>&, fapi2::buffer<uint64_t>& io_buffer, const states i_value)
{
    io_buffer.writeBit<TT::STOP_ON_ERR>(i_value);
}

///
/// @brief Disable ECC checking on the CCS arrays
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] the target to effect
/// @param[in,out] io_buffer the buffer representing the mode register
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline void disable_ecc( const fapi2::Target<T>&, fapi2::buffer<uint64_t>& io_buffer)
{
    io_buffer.setBit<TT::DISABLE_ECC_ARRAY_CHK>()
    .template setBit<TT::DISABLE_ECC_ARRAY_CORRECTION>();
}

///
/// @brief User sets to a '1'b to force the Hdw to ignore any array ue or sue errors
///        during CCS command fetching.
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] the target to effect
/// @param[in,out] io_buffer the buffer representing the mode register
/// @param[in] i_value true iff ignore any array ue or sue errors.
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline void ue_disable( const fapi2::Target<T>&, fapi2::buffer<uint64_t>& io_buffer, const states i_value)
{
    io_buffer.writeBit<TT::UE_DISABLE>(i_value);
}

///
/// @brief User sets to a '1'b to force the Hdw to delay parity a cycle
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] the target to effect
/// @param[in,out] io_buffer the buffer representing the mode register
/// @param[in] i_value mss::ON iff delay parity a cycle
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline void parity_after_cmd( const fapi2::Target<T>&, fapi2::buffer<uint64_t>& io_buffer, const states i_value)
{
    io_buffer.writeBit<TT::CFG_PARITY_AFTER_CMD>(i_value);
}

///
/// @brief DDr calibration counter
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] the target to effect
/// @param[in,out] io_buffer the buffer representing the mode register
/// @param[in] i_count the count to wait for DDR cal to complete.
/// @param[in] i_mult the DDR calibration time multiplaction factor
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline void cal_count( const fapi2::Target<T>&, fapi2::buffer<uint64_t>& io_buffer,
                       const uint64_t i_count, const uint64_t i_mult)
{
    io_buffer.insertFromRight<TT::DDR_CAL_TIMEOUT_CNT, TT::DDR_CAL_TIMEOUT_CNT_LEN>(i_count);
    io_buffer.insertFromRight<TT::DDR_CAL_TIMEOUT_CNT_MULT, TT::DDR_CAL_TIMEOUT_CNT_MULT_LEN>(i_mult);
}

///
/// @brief Copy CKE signals to CKE Spare on both ports NOTE: DOESN'T APPLY FOR NIMBUS. NO
///        SPARE CHIPS TO COPY TO. 0 - Spare CKEs not copied with values from CKE(0:1) and
///         CKE(4:5) 1 - Port A CKE(0:1) copied to Port A CKE(2:3), Port A CKE(4:5) copied
///         to Port A CKE(6:7), Port B CKE(0:1) copied to Port B CKE(2:3) and Port B CKE(4:5)
///         copied to Port B CKE(6:7)
/// @tparam T the fapi2::TargetType - derived
/// @tparam TT the ccsTraits associated with T - derived
/// @param[in] i_target the target to effect
/// @param[in,out] io_buffer the buffer representing the mode register
/// @param[in] i_value mss::ON iff Copy CKE signals to CKE Spare on both ports
/// @note no-op for p9n
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
void copy_cke_to_spare_cke( const fapi2::Target<T>&, fapi2::buffer<uint64_t>& io_buffer, const states i_value);

///
/// @brief Read the modeq register appropriate for this target
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the target to effect
/// @param[in,out] io_buffer the buffer representing the mode register
/// @return FAPI2_RC_SUCCSS iff ok
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline fapi2::ReturnCode read_mode( const fapi2::Target<T>& i_target, fapi2::buffer<uint64_t>& io_buffer)
{
    return mss::getScom(i_target, TT::MODEQ_REG, io_buffer);
}

///
/// @brief Write the modeq register appropriate for this target
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the target to effect
/// @param[in] i_buffer the buffer representing the mode register
/// @return FAPI2_RC_SUCCSS iff ok
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline fapi2::ReturnCode write_mode( const fapi2::Target<T>& i_target, const fapi2::buffer<uint64_t>& i_buffer)
{
    return mss::putScom(i_target, TT::MODEQ_REG, i_buffer);
}

///
/// @brief config the NTTM
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_mcbist the target to operate
/// @param[in] i_nttm_mode NTTM we need to turn on or off (i.e. ON, OFF)
/// @return fapi2::ReturnCode fapi2::FAPI2_RC_SUCCESS if ok
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
inline fapi2::ReturnCode configure_nttm( const fapi2::Target<T>& i_target,
        const mss::states i_nttm_mode)
{
    fapi2::buffer<uint64_t> l_data;

    FAPI_TRY(read_mode(i_target, l_data));

    l_data.writeBit<TT::NTTM_MODE>(i_nttm_mode);

    FAPI_TRY(write_mode(i_target, l_data));

fapi_try_exit:
    return fapi2::current_err;
}

///
/// @brief Execute a set of CCS instructions - multiple ports
/// @tparam P  the port type for this CCS engine
/// @tparam MC the MC type on which to operate
/// @param[in] i_program the vector of instructions
/// @param[in] i_ports the vector of ports
/// @return FAPI2_RC_SUCCSS iff ok
///
template< fapi2::TargetType P, mss::mc_type MC>
fapi2::ReturnCode cleanup_from_execute(const ccs::program& i_program,
                                       const std::vector< fapi2::Target<P> >& i_ports);

///
/// @brief Start or stop the CCS engine
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target The MCBIST containing the CCS engine
/// @param[in] i_start_stop bool MSS_CCS_START for starting MSS_CCS_STOP otherwise
/// @return FAPI2_RC_SUCCESS iff success
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
fapi2::ReturnCode start_stop( const fapi2::Target<T>& i_target, const bool i_start_stop )
{
    fapi2::buffer<uint64_t> l_buf;

    // Do we need to read this? We are setting the only bit defined in the scomdef? BRS
    FAPI_TRY(mss::getScom(i_target, TT::CNTLQ_REG, l_buf));

    FAPI_TRY( mss::putScom(i_target, TT::CNTLQ_REG,
                           i_start_stop ? l_buf.setBit<TT::CCS_START>() : l_buf.setBit<TT::CCS_STOP>()) );

fapi_try_exit:
    return fapi2::current_err;
}

///
/// @brief Determine the CCS failure type
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam P the target of the CCS instruction (the port)
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target MC target
/// @param[in] i_type the failure type
/// @param[in] i_port The port the CCS instruction is training
/// @return ReturnCode associated with the fail.
/// @note FFDC is handled here, caller doesn't need to do it
///
template< fapi2::TargetType T = DEFAULT_MC_TARGET, fapi2::TargetType P = DEFAULT_MEM_PORT_TARGET, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
fapi2::ReturnCode fail_type( const fapi2::Target<T>& i_target,
                             const uint64_t i_type,
                             const fapi2::Target<P>& i_port );

///
/// @brief Execute a CCS array already loaded in to the engine
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam P the target of the CCS instruction (the port)
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the target to effect
/// @param[in] i_program the MCBIST ccs program - to get the polling parameters
/// @param[in] i_port the port associated with the MCBIST array
/// @return FAPI2_RC_SUCCSS iff ok
///
template< fapi2::TargetType T, fapi2::TargetType P, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
fapi2::ReturnCode execute_inst_array(const fapi2::Target<T>& i_target,
                                     ccs::program& i_program,
                                     const fapi2::Target<P>& i_port)
{
    fapi2::buffer<uint64_t> status;

    FAPI_TRY(start_stop(i_target, mss::START), "%s Error in execute_inst_array", mss::c_str(i_port) );

    mss::poll(i_target, TT::STATQ_REG, i_program.iv_poll,
              [&status](const size_t poll_remaining, const fapi2::buffer<uint64_t>& stat_reg) -> bool
    {
        FAPI_DBG("ccs statq 0x%016lx, remaining: %d", stat_reg, poll_remaining);
        status = stat_reg;
        return status.getBit<TT::CCS_IN_PROGRESS>() != 1;
    },
    i_program.iv_probes);

    // Check for done and success. DONE being the only bit set.
    if (status == TT::STAT_QUERY_SUCCESS)
    {
        FAPI_INF("%s CCS Executed Successfully.", mss::c_str(i_port) );
        goto fapi_try_exit;
    }

    // So we failed or we're still in progress. Mask off the fail bits
    // and run this through the FFDC generator.
    FAPI_TRY(fail_type(i_target, status & TT::STAT_ERR_MASK, i_port), "Error in execute_inst_array" );

fapi_try_exit:
    return fapi2::current_err;
}

///
/// @brief Updates the initial delays based upon the total delays passed in
/// @tparam fapi2::TargetType T the type of the target running CCS
/// @tparam MC the memory controller type running CCS
/// @param[in] i_target the target type on which to operate
/// @param[in] i_delay the calculated delays from CCS
/// @param[in,out] io_program the program for which to update the delays
/// @return FAPI2_RC_SUCCSS iff ok
///
template< fapi2::TargetType T, mss::mc_type MC = DEFAULT_MC_TYPE >
fapi2::ReturnCode update_initial_delays( const fapi2::Target<T>& i_target,
        const uint64_t i_delay,
        ccs::program& io_program);

///
/// @brief Execute a set of CCS instructions - multiple ports
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam P  the port type for this CCS engine
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the target to effect
/// @param[in] i_program the vector of instructions
/// @param[in] i_ports the vector of ports
/// @return FAPI2_RC_SUCCSS iff ok
///
template< fapi2::TargetType T, fapi2::TargetType P, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
fapi2::ReturnCode execute( const fapi2::Target<T>& i_target,
                           ccs::program& i_program,
                           const std::vector< fapi2::Target<P> >& i_ports)
{
    // Subtract one for the idle we insert at the end
    constexpr size_t CCS_INSTRUCTION_DEPTH = TT::CCS_ARRAY_LEN - 1;
    constexpr uint64_t CCS_ARR0_ZERO = TT::CCS_ARR0_START;
    constexpr uint64_t CCS_ARR1_ZERO = TT::CCS_ARR1_START;

    ccs::instruction_t l_des = ccs::des_command();

    FAPI_INF("loading ccs instructions (%d) for %s", i_program.iv_instructions.size(), mss::c_str(i_target));

    auto l_inst_iter = i_program.iv_instructions.begin();

    std::vector<rank_configuration> l_rank_configs;
    FAPI_TRY(get_rank_config(i_target, l_rank_configs));

    // Stop the CCS engine just for giggles - it might be running ...
    FAPI_TRY( start_stop(i_target, mss::states::STOP), "Error in ccs::execute" );

    FAPI_ASSERT( mss::poll(i_target, TT::STATQ_REG, poll_parameters(),
                           [](const size_t poll_remaining, const fapi2::buffer<uint64_t>& stat_reg) -> bool
    {
        FAPI_INF("ccs statq (stop) 0x%llx, remaining: %d", stat_reg, poll_remaining);
        return stat_reg.getBit<TT::CCS_IN_PROGRESS>() != 1;
    }),
    TT::setup_trying_to_stop_err(i_target) );

    while (l_inst_iter != i_program.iv_instructions.end())
    {
        // Kick off the CCS engine - per port. No broadcast mode for CCS (per Shelton 9/23/15)
        for (const auto& p : i_ports)
        {
            const auto l_port_index = mss::relative_pos<T>(p);
            size_t l_inst_count = 0;

            uint64_t l_total_delay = 0;
            uint64_t l_delay = 0;
            uint64_t l_repeat = 0;
            uint8_t l_current_cke = 0;

            // Shove the instructions into the CCS engine, in 32 instruction chunks, and execute them
            for (; l_inst_iter != i_program.iv_instructions.end()
                 && l_inst_count < CCS_INSTRUCTION_DEPTH; ++l_inst_count, ++l_inst_iter)
            {
                // First, update the current instruction's chip selects for the current port
                FAPI_TRY(l_inst_iter->configure_rank(p, l_rank_configs[l_port_index]), "Error in rank config");

                l_inst_iter->arr0.extractToRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(l_current_cke);

                // Make sure this instruction leads to the next. Notice this limits this mechanism to pretty
                // simple (straight line) CCS programs. Anything with a loop or such will need another mechanism.
                l_inst_iter->arr1.insertFromRight<TT::ARR1_GOTO_CMD, TT::ARR1_GOTO_CMD_LEN>(l_inst_count + 1);
                FAPI_TRY( mss::putScom(i_target, CCS_ARR0_ZERO + l_inst_count, l_inst_iter->arr0), "Error in ccs::execute" );
                FAPI_TRY( mss::putScom(i_target, CCS_ARR1_ZERO + l_inst_count, l_inst_iter->arr1), "Error in ccs::execute" );

                // arr1 contains a specification of the delay and repeat after this instruction, as well
                // as a repeat. Total up the delays as we go so we know how long to wait before polling
                // the CCS engine for completion
                l_inst_iter->arr1.extractToRight<TT::ARR1_IDLES, TT::ARR1_IDLES_LEN>(l_delay);
                l_inst_iter->arr1.extractToRight<TT::ARR1_REPEAT_CMD_CNT, TT::ARR1_REPEAT_CMD_CNT_LEN>(l_repeat);

                l_total_delay += l_delay * (l_repeat + 1);

                FAPI_INF("css inst %d: 0x%016lX 0x%016lX (0x%lx, 0x%lx) delay: 0x%x (0x%x) %s",
                         l_inst_count, l_inst_iter->arr0, l_inst_iter->arr1,
                         CCS_ARR0_ZERO + l_inst_count, CCS_ARR1_ZERO + l_inst_count,
                         l_delay, l_total_delay, mss::c_str(i_target));
            }

            // Updates the initial delays
            FAPI_TRY(update_initial_delays(i_target, l_total_delay, i_program), "Error in ccs::execute");

            FAPI_INF("executing ccs instructions (%d:%d, %d) for %s",
                     i_program.iv_instructions.size(), l_inst_count, i_program.iv_poll.iv_initial_delay, mss::c_str(i_target));

            // Deselect
            l_des.arr0.insertFromRight<TT::ARR0_DDR_CKE, TT::ARR0_DDR_CKE_LEN>(l_current_cke);

            // Insert a DES as our last instruction. DES is idle state anyway and having this
            // here as an instruction forces the CCS engine to wait the delay specified in
            // the last instruction in this array (which it otherwise doesn't do.)
            l_des.arr1.setBit<TT::ARR1_END>();
            FAPI_TRY( mss::putScom(i_target, CCS_ARR0_ZERO + l_inst_count, l_des.arr0), "Error in ccs::execute" );
            FAPI_TRY( mss::putScom(i_target, CCS_ARR1_ZERO + l_inst_count, l_des.arr1), "Error in ccs::execute" );

            FAPI_INF("css inst %d fixup: 0x%016lX 0x%016lX (0x%lx, 0x%lx) %s",
                     l_inst_count, l_des.arr0, l_des.arr1,
                     CCS_ARR0_ZERO + l_inst_count, CCS_ARR1_ZERO + l_inst_count, mss::c_str(i_target));


            FAPI_INF("executing CCS array for port %d (%s)", l_port_index, mss::c_str(p));
            FAPI_TRY( select_ports( i_target, l_port_index), "Error in ccs execute" );
            FAPI_TRY( execute_inst_array(i_target, i_program, p), "Error in ccs execute" );
        }
    }

    // Cleans up after executing the CCS program (runs workarounds if needed)
    FAPI_TRY((cleanup_from_execute<P, DEFAULT_MC_TYPE>(i_program, i_ports)));

fapi_try_exit:
    i_program.iv_instructions.clear();
    return fapi2::current_err;
}

///
/// @brief Execute a set of CCS instructions - single port
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam P the target of the CCS instruction (the port)
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target the target to effect
/// @param[in] i_program the vector of instructions
/// @param[in] i_port The target that's being programmed by the array
/// @return FAPI2_RC_SUCCSS iff ok
///
template< fapi2::TargetType T, fapi2::TargetType P, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
fapi2::ReturnCode execute( const fapi2::Target<T>& i_target,
                           ccs::program& i_program,
                           const fapi2::Target<P>& i_port)
{
    // Mmm. Might want to find a better way to do this - seems expensive. BRS
    std::vector< fapi2::Target<P> > l_ports{ i_port };
    return execute(i_target, i_program, l_ports);
}

///
/// @brief Query the status of the CCS engine
/// @tparam T the target type of the chiplet which executes the CCS instruction
/// @tparam TT the CCS traits of the chiplet which executes the CCS instruction
/// @param[in] i_target The MCBIST containing the CCS engine
/// @param[out] io_status The query result first being the result, second the type
/// @return FAPI2_RC_SUCCESS iff success
///
template< fapi2::TargetType T, typename TT = ccsTraits<DEFAULT_MC_TYPE> >
fapi2::ReturnCode status_query( const fapi2::Target<T>& i_target, std::pair<uint64_t, uint64_t>& io_status );

} // ends namespace ccs
} // ends namespace mss

#endif
OpenPOWER on IntegriCloud