summaryrefslogtreecommitdiffstats
path: root/src/occ_405/timer/timer.c
blob: 78934972a157dd936ccc90a6dbf044ba8ce5e3b8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/* $Source: src/occ_405/timer/timer.c $                                   */
/*                                                                        */
/* OpenPOWER OnChipController Project                                     */
/*                                                                        */
/* Contributors Listed Below - COPYRIGHT 2011,2016                        */
/* [+] International Business Machines Corp.                              */
/*                                                                        */
/*                                                                        */
/* Licensed under the Apache License, Version 2.0 (the "License");        */
/* you may not use this file except in compliance with the License.       */
/* You may obtain a copy of the License at                                */
/*                                                                        */
/*     http://www.apache.org/licenses/LICENSE-2.0                         */
/*                                                                        */
/* Unless required by applicable law or agreed to in writing, software    */
/* distributed under the License is distributed on an "AS IS" BASIS,      */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or        */
/* implied. See the License for the specific language governing           */
/* permissions and limitations under the License.                         */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */

//*************************************************************************/
// Includes
//*************************************************************************/
#include <timer.h>                  // timer defines
#include "ssx.h"
#include <trac.h>                   // Trace macros
#include <occhw_common.h>           // PGP common defines
#include <occhw_ocb.h>              // OCB timer interfaces
#include <occ_service_codes.h>      // Reason codes
#include <timer_service_codes.h>    // Module Id
#include <cmdh_fsp.h>               // for RCs in the checkpoint macros
#include <dimm_structs.h>
#include <occ_sys_config.h>
#include <pgpe_shared.h>

//*************************************************************************/
// Externs
//*************************************************************************/
// Variable holding main thread loop count
extern uint32_t G_mainThreadLoopCounter;
// Running in simics?
extern bool G_simics_environment;

//*************************************************************************/
// Macros
//*************************************************************************/
// PPC405 watchdog timer handler
SSX_IRQ_FAST2FULL(ppc405WDTHndler, ppc405WDTHndlerFull);
// OCB timer handler
SSX_IRQ_FAST2FULL(ocbTHndler, ocbTHndlerFull);

//*************************************************************************/
// Defines/Enums
//*************************************************************************/
// Change watchdog reset control to take no action on state TSR[WIS]=1
// and TSR[ENW]=1
// Watchdog reset control set to "No reset"
#define OCC_TCR_WRC 0
// Bump up wdog period to ~1s
#define OCC_TCR_WP 3
// 4ms represented in nanoseconds
#define OCB_TIMER_TIMOUT    4000000

//*************************************************************************/
// Structures
//*************************************************************************/

//*************************************************************************/
// Globals
//*************************************************************************/
bool G_wdog_enabled = false;

// memory deadman is a per port timer that the MCU uses to verify that
// the memory's power and thermal are properly monitored. The memory deadman
// timers can be programmed 100 ms to 28 s. Reading the deadman timer's SCOM
// register resets its value. If the OCC fails to reset the deadman SCOM
// and the timer is expired, emergency throttle mode will be enforced.
GpeRequest G_reset_mem_deadman_request;                              // IPC request
GPE_BUFFER(reset_mem_deadman_args_t G_gpe_reset_mem_deadman_args);   // IPC args

uint32_t G_pgpe_beacon_address;      // PGPE Beacon Address

//*************************************************************************/
// Function Prototypes
//*************************************************************************/

//*************************************************************************/
// Functions
//*************************************************************************/

// Function Specification
//
// Name: initWatchdogTimers
//
// Description:
//
// End Function Specification
void initWatchdogTimers()
{
    int l_rc = SSX_OK;
    errlHndl_t l_err = NULL;

    TRAC_IMP("Initializing ppc405 watchdog. period=%d, reset_ctrl=%d",
             OCC_TCR_WP,
             OCC_TCR_WRC);

    // set up PPC405 watchdog timer
    l_rc = ppc405_watchdog_setup(OCC_TCR_WP,          // watchdog period
                                 OCC_TCR_WRC,         // watchdog reset control
                                 ppc405WDTHndler,     // interrupt handler
                                 NULL);               // argument to handler

    if (SSX_OK != l_rc)
    {
        TRAC_ERR("Error setting up ppc405 watchdog timer: l_rc: %d",l_rc);
        /*
         * @errortype
         * @moduleid    INIT_WD_TIMERS
         * @reasoncode  INTERNAL_HW_FAILURE
         * @userdata1   Return code of PPC405 watchdog timer setup
         * @userdata4   ERC_PPC405_WD_SETUP_FAILURE
         * @devdesc     Failure on hardware related function
         */
        l_err = createErrl(INIT_WD_TIMERS,                 // mod id
                           INTERNAL_HW_FAILURE,            // reason code
                           ERC_PPC405_WD_SETUP_FAILURE,    // Extended reason code
                           ERRL_SEV_UNRECOVERABLE,         // severity
                           NULL,                           // trace buffer
                           0,                              // trace size
                           l_rc,                           // userdata1
                           0);                             // userdata2

        // Callout firmware
        addCalloutToErrl(l_err,
                         ERRL_CALLOUT_TYPE_COMPONENT_ID,
                         ERRL_COMPONENT_ID_FIRMWARE,
                         ERRL_CALLOUT_PRIORITY_HIGH);

        // Commit error log
        commitErrl(&l_err);
    }

    // initialize memory deadman timer's IPC task
    if(G_sysConfigData.mem_type == MEM_TYPE_NIMBUS)
    {
        // Initialize the GPE1 IPC task that resets the deadman timer.
        init_mem_deadman_reset_task();
    }
}

// Function Specification
//
// Name: init_mem_deadman_reset_task
//
// Description:
//
// End Function Specification
void init_mem_deadman_reset_task(void)
{
    errlHndl_t l_err = NULL;
    int        rc = 0;

    // Initialize memory deadman timer reset task arguments
    G_gpe_reset_mem_deadman_args.error.error = 0;
    G_gpe_reset_mem_deadman_args.error.ffdc  = 0;
    G_gpe_reset_mem_deadman_args.mca         = 0;

    TRAC_INFO("init_mem_deadman_reset_task: Creating request for GPE deadman reset task");
    rc = gpe_request_create(&G_reset_mem_deadman_request,       // request
                       &G_async_gpe_queue1,                     // GPE1 queue
                       IPC_ST_RESET_MEM_DEADMAN,                // Function ID
                       &G_gpe_reset_mem_deadman_args,           // GPE argument_ptr
                       SSX_SECONDS(5),                          // timeout
                       NULL,                                    // callback
                       NULL,                                    // callback arg
                       ASYNC_CALLBACK_IMMEDIATE);               // options

    // If we couldn't create the GpeRequest objects, there must be a major problem
    // so we will log an error and halt OCC.
    if(rc)
    {
        //Failed to create GpeRequest object, log an error.
        TRAC_ERR("Failed to create memory deadman GpeRequest object[0x%x]", rc);

        /* @
         * @errortype
         * @moduleid    INIT_WD_TIMERS
         * @reasoncode  GPE_REQUEST_CREATE_FAILURE
         * @userdata1   gpe_request_create return code
         * @userdata2   0
         * @userdata4   OCC_NO_EXTENDED_RC
         * @devdesc     Failure to create GpeRequest object for
         *              memory deadman reset IPC task.
         *
         */
        l_err = createErrl(
            INIT_WD_TIMERS,                     //modId
            GPE_REQUEST_CREATE_FAILURE,         //reasoncode
            OCC_NO_EXTENDED_RC,                 //Extended reason code
            ERRL_SEV_PREDICTIVE,                //Severity
            NULL,                               //Trace Buf
            DEFAULT_TRACE_SIZE,                 //Trace Size
            rc,                                 //userdata1
            0                                   //userdata2
            );

        CHECKPOINT_FAIL_AND_HALT(l_err);
    }

}

// Function Specification
//
// Name: task_poke_watchdogs
//
// Description: Called every 2ms on both master and slaves while in observation
//               and active state. It performs the following:
//               1. Enable/Reset the OCC heartbeat, setting the count to 8ms.
//               2. Reset memory deadman timer for 1 MCA (by a GPE1 IPC task).
//               3. Every 4ms (every other time called):
//                  Verify PGPE is still functional by reading PGPE Beacon from
//                  SRAM if after 8ms (2 consecutive checks) there is no change
//                  to the PGPE Beacon count then log an error and request reset.
//
// End Function Specification
void task_poke_watchdogs(struct task * i_self)
{
    pmc_occ_heartbeat_reg_t hbr;                          // OCC heart beat register

    static bool             L_check_pgpe_beacon = false;  // Check GPE beacon this time?

// 1. Enable OCC heartbeat

    hbr.fields.pmc_occ_heartbeat_time = 8000; // count corresponding to 8 ms
    hbr.fields.pmc_occ_heartbeat_en   = true; // enable heartbeat timer

    out32(OCB_OCCHBR, hbr.value);             // Enable heartbeat register, and set it


// 2. Reset memory deadman timer
    if(G_sysConfigData.mem_type == MEM_TYPE_NIMBUS)
    {
        manage_mem_deadman_task();
    }

// 3. Verify PGPE Beacon is not frozen for 8 ms
    if(true == L_check_pgpe_beacon)
    {
        // Examine pgpe Beacon every other call (every 4ms)
        //@TODO: remove when PGPE code is integrated, RTC: 163934
        if(!G_simics_environment) // PGPE Beacon is not implemented in simics
        {
            check_pgpe_beacon();
        }
    }

    // toggle pgpe beacon check flag, check only once every other call (every 4ms)
    L_check_pgpe_beacon = !L_check_pgpe_beacon;

}

// Function Specification
//
// Name: manage_mem_deadman_task
//
// Description: Verify that if a memory deadman_task was scheduled on GPE1 last cycle
//              then it is completed. Then if there is a new task to be scheduled
//              for this cycle, then schedule it on the GPE1 engine.
//              Called every 2ms.
//
// End Function Specification

// MAX number of timeout cycles allowed for memory deadman IPC task
// before logging an error
#define MEM_DEADMAN_TASK_TIMEOUT 2

void manage_mem_deadman_task(void)
{
    //if a task is scheduled, verify that it is completed ...
    //track # of consecutive failures on a specific RDIMM
    static uint8_t L_scom_timeout[NUM_NIMBUS_MCAS] = {0};

    errlHndl_t     l_err     = NULL; // Error handler
    int            rc        = 0;    // Return code
    uint8_t        mca;              // MCA of last memory deadman task (scheduled/not-configured)
    static bool    L_gpe_scheduled      = false;
    static bool    L_gpe_idle_traced    = false;
    static bool    L_gpe_timeout_logged = false;
    static bool    L_gpe_had_1_tick     = false;

    uint32_t       gpe_rc = G_gpe_reset_mem_deadman_args.error.rc;  // IPC task rc

    do
    {   // mca of last memory deadman task (either not-configured or scheduled).
        mca = G_gpe_reset_mem_deadman_args.mca;

        //First, check to see if the previous GPE request still running
        if( !(async_request_is_idle(&G_reset_mem_deadman_request.request)) )
        {
            L_scom_timeout[mca]++;
            //This can happen due to variability in when the task runs
            if(!L_gpe_idle_traced && L_gpe_had_1_tick)
            {
                TRAC_INFO("manage_mem_deadman_task: GPE is still running. mca[%d]", mca);
                L_gpe_idle_traced = true;
            }
            L_gpe_had_1_tick = true;
            break;
        }
        else
        {
            //Request is idle
            L_gpe_had_1_tick = false;
            if(L_gpe_idle_traced)
            {
                TRAC_INFO("manage_mem_deadman_task: GPE completed. mca[%d]", mca);
                L_gpe_idle_traced = false;
            }
        }

        //check scom status
        if(L_gpe_scheduled)
        {
            if(!async_request_completed(&G_reset_mem_deadman_request.request) || gpe_rc)
            {
                //Request failed. Keep count of failures and log an error if we reach a
                //max retry count
                L_scom_timeout[mca]++;
                if(L_scom_timeout[mca] >= MEM_DEADMAN_TASK_TIMEOUT)
                {
                    break;
                }

            }
            else // A Task was scheduled last cycle, completed successfully, no errors
            {
                //Reset the timeout.
                L_scom_timeout[mca] = 0;
            }
        }

        //The previous GPE job completed. Now get ready for the next job.
        L_gpe_scheduled = false;


        //We didn't fail, update mca (irrespective of whether it will be scheduled)
        if ( mca >= NUM_NIMBUS_MCAS )
        {
            mca  = 0;
        }
        else
        {
            mca++;
        }
        G_gpe_reset_mem_deadman_args.mca = mca;


        // If the MCA is not configured, break
        if(!NIMBUS_DIMM_INDEX_THROTTLING_CONFIGURED(mca))
        {
            break;
        }

        // The MCA is configured, and the previous IPC task completed successfully
        rc = gpe_request_schedule(&G_reset_mem_deadman_request);

        // Always log an error if gpe request schedule fails
        if( rc )
        {
            //Error in schedule gpe memory deadman reset task
            TRAC_ERR("manage_mem_deadman_task: Failed to schedule memory deadman reset task rc=%x",
                     rc);

            /* @
             * @errortype
             * @moduleid    POKE_WD_TIMERS
             * @reasoncode  GPE_REQUEST_SCHEDULE_FAILURE
             * @userdata1   rc - gpe_request_schedule return code
             * @userdata2   0
             * @userdata4   OCC_NO_EXTENDED_RC
             * @devdesc     OCC Failed to schedule a GPE job for memory deadman reset
             */
            l_err = createErrl(
                POKE_WD_TIMERS,                         // modId
                GPE_REQUEST_SCHEDULE_FAILURE,           // reasoncode
                OCC_NO_EXTENDED_RC,                     // Extended reason code
                ERRL_SEV_UNRECOVERABLE,                 // Severity
                NULL,                                   // Trace Buf
                DEFAULT_TRACE_SIZE,                     // Trace Size
                rc,                                     // userdata1
                0                                       // userdata2
                );

            addUsrDtlsToErrl(
                l_err,                                            //io_err
                (uint8_t *) &(G_reset_mem_deadman_request.ffdc),  //i_dataPtr,
                sizeof(G_reset_mem_deadman_request.ffdc),         //i_size
                ERRL_USR_DTL_STRUCT_VERSION_1,                    //version
                ERRL_USR_DTL_BINARY_DATA);                        //type

            REQUEST_RESET(l_err);   //This will add a firmware callout for us
            break;

        }

        // Successfully scheduled a new memory deadman timer gpe IPC request
        L_gpe_scheduled = true;

    } while(0);


    if(L_scom_timeout[mca] >= MEM_DEADMAN_TASK_TIMEOUT && L_gpe_timeout_logged == false)
    {
        TRAC_ERR("manage_mem_deadman_task: Timeout scomming MCA[%d]", mca);

        /* @
         * @errortype
         * @moduleid    POKE_WD_TIMERS
         * @reasoncode  GPE_REQUEST_TASK_TIMEOUT
         * @userdata1   mca number
         * @userdata2   0
         * @userdata4   OCC_NO_EXTENDED_RC
         * @devdesc     Timed out trying to reset the memory deadman timer.
         */

        l_err = createErrl(
                POKE_WD_TIMERS,                         // modId
                GPE_REQUEST_TASK_TIMEOUT,               // reasoncode
                OCC_NO_EXTENDED_RC,                     // Extended reason code
                ERRL_SEV_PREDICTIVE,                    // Severity
                NULL,                                   // Trace Buf
                DEFAULT_TRACE_SIZE,                     // Trace Size
                mca,                                    // userdata1
                0                                       // userdata2
                );

        addUsrDtlsToErrl(l_err,                                   //io_err
                (uint8_t *) &(G_reset_mem_deadman_request.ffdc),  //i_dataPtr,
                sizeof(G_reset_mem_deadman_request.ffdc),         //i_size
                ERRL_USR_DTL_STRUCT_VERSION_1,                    //version
                ERRL_USR_DTL_BINARY_DATA);                        //type

        // Commit Error Log
        commitErrl(&l_err);

        L_gpe_timeout_logged = true;
    }

    return;
}

// Function Specification
//
// Name: check_pgpe_beacon
//
// Description: Checks the PGPE Beacon every 4ms
//              logs an error and resets if it
//              doesn't change for 8 ms
//
// End Function Specification

void check_pgpe_beacon(void)
{
    uint32_t        pgpe_beacon;                         // PGPE Beacon value now
    static uint32_t L_prev_pgpe_beacon          = 0;     // PGPE Beacon value 4 ms ago
    static bool     L_first_pgpe_beacon_check   = true;  // First time examining Beacon?
    static bool     L_pgpe_beacon_unchanged_4ms = false; // pgpe beacon unchanged once (4ms)
    static bool     L_error_logged              = false; // trace and error log only once
    errlHndl_t      l_err                       = NULL;  // Error handler
    do
    {
        // return PGPE Beacon
        pgpe_beacon = in32(G_pgpe_beacon_address);

        // in first invocation, just initialize L_prev_pgpe_beacon
        // don't check if the PGPE Beacon value changed
        if(L_first_pgpe_beacon_check)
        {
            L_prev_pgpe_beacon = pgpe_beacon;
            L_first_pgpe_beacon_check = false;
            break;
        }

        // L_prev_pgpe_beacon has been initialized; Every 4ms verify
        // that PGPE Beacon has changed relative to previous reading
        if(pgpe_beacon == L_prev_pgpe_beacon)
        {
            if(false == L_pgpe_beacon_unchanged_4ms)
            {
                // First time beacon unchaged (4ms), mark flag
                L_pgpe_beacon_unchanged_4ms = true;
                break;
            }
            else if (false == L_error_logged)
            {
                L_error_logged = true;

                // Second time beacon unchanged (8ms), log timeout error
                TRAC_ERR("Error PGPE Beacon didn't change for 8 ms: %d",
                         pgpe_beacon);

                /*
                 * @errortype
                 * @moduleid    POKE_WD_TIMERS
                 * @reasoncode  PGPE_FAILURE
                 * @userdata1   PGPE Beacon Value
                 * @userdata2   PGPE Beacon Address
                 * @userdata4   ERC_PGPE_BEACON_TIMEOUT
                 * @devdesc     PGPE Beacon timeout
                 */
                l_err = createErrl(POKE_WD_TIMERS,             // mod id
                                   PGPE_FAILURE,               // reason code
                                   ERC_PGPE_BEACON_TIMEOUT,    // Extended reason code
                                   ERRL_SEV_UNRECOVERABLE,     // severity
                                   NULL,                       // trace buffer
                                   0,                          // trace size
                                   pgpe_beacon,                // userdata1
                                   G_pgpe_beacon_address);     // userdata2

                // Commit error log and request reset
                REQUEST_RESET(l_err);
            }
        }
        else
        {
            // pgpe beacon changed over the last 4 ms
            L_pgpe_beacon_unchanged_4ms = false;
        }
    } while(0);

}


// Function Specification
//
// Name: ppc405WDTHndlerFull
//
// Description: PPC405 watchdog interrupt handler
//
// End Function Specification
void ppc405WDTHndlerFull(void * i_arg, SsxIrqId i_irq, int i_priority)
{
    static uint8_t l_wdog_intrpt_cntr = 0;

    // Always reset the watchdog interrupt status in the TSR.  If we halt
    // and leave TSR[WIS]=1 then the watchdog counter will eventually set
    // TSR[ENW]=1 and upon expiration of the next watchdog period the 405 will
    // take whatever action is in TCR[WRC] potentially resetting the OCC while
    // we have it in a halted state, an undesirable outcome.
    // Always clear TSR[ENW,WIS] to reset the watchdog state machine.
    mtspr(SPRN_TSR, (TSR_ENW | TSR_WIS));

    if (WDOG_ENABLED)
    {
        // When enabled, always increment this local static counter
        l_wdog_intrpt_cntr++;
        // The hardware timer should be set to around a second, on the third
        // interrupt we go to halt if the main thread counter hasn't incremented
        // since the last time it was reset.
        if (l_wdog_intrpt_cntr == 3)
        {
            l_wdog_intrpt_cntr = 0;
            // The watchdog interrupt has fired three times, time to check the
            // state of the main thread by looking at the main thread loop
            // counter, it must be non-zero else we will halt the occ
            if (G_mainThreadLoopCounter > 0)
            {
                // The main thread has run at least once in the last ~6 seconds
                G_mainThreadLoopCounter = 0;
            }
            else
            {

                OCC_HALT(ERRL_RC_WDOG_TIMER);
                TRAC_ERR("Should have halted here due to WDOG");
            }
        }
    }
}

// Function Specification
//
// Name: ocbTHndlerFull
//
// Description: OCB timer interrupt handler
//
// End Function Specification
void ocbTHndlerFull(void * i_arg, SsxIrqId i_irq, int i_priority)
{
    // OCC_HALT with exception code passed in.
    OCC_HALT(ERRL_RC_OCB_TIMER);
    TRAC_ERR("Should have halted here due to THndlerFull");
}
OpenPOWER on IntegriCloud