1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
|
/* IBM_PROLOG_BEGIN_TAG */
/* This is an automatically generated prolog. */
/* */
/* $Source: src/occ_405/timer/timer.c $ */
/* */
/* OpenPOWER OnChipController Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2011,2016 */
/* [+] International Business Machines Corp. */
/* */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
/* You may obtain a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
/* implied. See the License for the specific language governing */
/* permissions and limitations under the License. */
/* */
/* IBM_PROLOG_END_TAG */
//*************************************************************************/
// Includes
//*************************************************************************/
#include <timer.h> // timer defines
#include "ssx.h"
#include <trac.h> // Trace macros
#include <occhw_common.h> // PGP common defines
#include <occhw_ocb.h> // OCB timer interfaces
#include <occ_service_codes.h> // Reason codes
#include <timer_service_codes.h> // Module Id
#include <cmdh_fsp.h> // for RCs in the checkpoint macros
#include <dimm_structs.h>
#include <occ_sys_config.h>
#include <pgpe_shared.h>
//*************************************************************************/
// Externs
//*************************************************************************/
// Variable holding main thread loop count
extern uint32_t G_mainThreadLoopCounter;
// Running in simics?
extern bool G_simics_environment;
//*************************************************************************/
// Macros
//*************************************************************************/
// PPC405 watchdog timer handler
SSX_IRQ_FAST2FULL(ppc405WDTHndler, ppc405WDTHndlerFull);
// OCB timer handler
SSX_IRQ_FAST2FULL(ocbTHndler, ocbTHndlerFull);
//*************************************************************************/
// Defines/Enums
//*************************************************************************/
// Change watchdog reset control to take no action on state TSR[WIS]=1
// and TSR[ENW]=1
// Watchdog reset control set to "No reset"
#define OCC_TCR_WRC 0
// Bump up wdog period to ~1s
#define OCC_TCR_WP 3
// 4ms represented in nanoseconds
#define OCB_TIMER_TIMOUT 4000000
//*************************************************************************/
// Structures
//*************************************************************************/
//*************************************************************************/
// Globals
//*************************************************************************/
bool G_wdog_enabled = false;
// memory deadman is a per port timer that the MCU uses to verify that
// the memory's power and thermal are properly monitored. The memory deadman
// timers can be programmed 100 ms to 28 s. Reading the deadman timer's SCOM
// register resets its value. If the OCC fails to reset the deadman SCOM
// and the timer is expired, emergency throttle mode will be enforced.
GpeRequest G_reset_mem_deadman_request; // IPC request
GPE_BUFFER(reset_mem_deadman_args_t G_gpe_reset_mem_deadman_args); // IPC args
uint32_t G_pgpe_beacon_address; // PGPE Beacon Address
//*************************************************************************/
// Function Prototypes
//*************************************************************************/
//*************************************************************************/
// Functions
//*************************************************************************/
// Function Specification
//
// Name: initWatchdogTimers
//
// Description:
//
// End Function Specification
void initWatchdogTimers()
{
int l_rc = SSX_OK;
errlHndl_t l_err = NULL;
TRAC_IMP("Initializing ppc405 watchdog. period=%d, reset_ctrl=%d",
OCC_TCR_WP,
OCC_TCR_WRC);
// set up PPC405 watchdog timer
l_rc = ppc405_watchdog_setup(OCC_TCR_WP, // watchdog period
OCC_TCR_WRC, // watchdog reset control
ppc405WDTHndler, // interrupt handler
NULL); // argument to handler
if (SSX_OK != l_rc)
{
TRAC_ERR("Error setting up ppc405 watchdog timer: l_rc: %d",l_rc);
/*
* @errortype
* @moduleid INIT_WD_TIMERS
* @reasoncode INTERNAL_HW_FAILURE
* @userdata1 Return code of PPC405 watchdog timer setup
* @userdata4 ERC_PPC405_WD_SETUP_FAILURE
* @devdesc Failure on hardware related function
*/
l_err = createErrl(INIT_WD_TIMERS, // mod id
INTERNAL_HW_FAILURE, // reason code
ERC_PPC405_WD_SETUP_FAILURE, // Extended reason code
ERRL_SEV_UNRECOVERABLE, // severity
NULL, // trace buffer
0, // trace size
l_rc, // userdata1
0); // userdata2
// Callout firmware
addCalloutToErrl(l_err,
ERRL_CALLOUT_TYPE_COMPONENT_ID,
ERRL_COMPONENT_ID_FIRMWARE,
ERRL_CALLOUT_PRIORITY_HIGH);
// Commit error log
commitErrl(&l_err);
}
// initialize memory deadman timer's IPC task
if(G_sysConfigData.mem_type == MEM_TYPE_NIMBUS)
{
// Initialize the GPE1 IPC task that resets the deadman timer.
init_mem_deadman_reset_task();
}
}
// Function Specification
//
// Name: init_mem_deadman_reset_task
//
// Description:
//
// End Function Specification
void init_mem_deadman_reset_task(void)
{
errlHndl_t l_err = NULL;
int rc = 0;
// Initialize memory deadman timer reset task arguments
G_gpe_reset_mem_deadman_args.error.error = 0;
G_gpe_reset_mem_deadman_args.error.ffdc = 0;
G_gpe_reset_mem_deadman_args.mca = 0;
TRAC_INFO("init_mem_deadman_reset_task: Creating request for GPE deadman reset task");
rc = gpe_request_create(&G_reset_mem_deadman_request, // request
&G_async_gpe_queue1, // GPE1 queue
IPC_ST_RESET_MEM_DEADMAN, // Function ID
&G_gpe_reset_mem_deadman_args, // GPE argument_ptr
SSX_SECONDS(5), // timeout
NULL, // callback
NULL, // callback arg
ASYNC_CALLBACK_IMMEDIATE); // options
// If we couldn't create the GpeRequest objects, there must be a major problem
// so we will log an error and halt OCC.
if(rc)
{
//Failed to create GpeRequest object, log an error.
TRAC_ERR("Failed to create memory deadman GpeRequest object[0x%x]", rc);
/* @
* @errortype
* @moduleid INIT_WD_TIMERS
* @reasoncode GPE_REQUEST_CREATE_FAILURE
* @userdata1 gpe_request_create return code
* @userdata2 0
* @userdata4 OCC_NO_EXTENDED_RC
* @devdesc Failure to create GpeRequest object for
* memory deadman reset IPC task.
*
*/
l_err = createErrl(
INIT_WD_TIMERS, //modId
GPE_REQUEST_CREATE_FAILURE, //reasoncode
OCC_NO_EXTENDED_RC, //Extended reason code
ERRL_SEV_PREDICTIVE, //Severity
NULL, //Trace Buf
DEFAULT_TRACE_SIZE, //Trace Size
rc, //userdata1
0 //userdata2
);
CHECKPOINT_FAIL_AND_HALT(l_err);
}
}
// Function Specification
//
// Name: task_poke_watchdogs
//
// Description: Called every 2ms on both master and slaves while in observation
// and active state. It performs the following:
// 1. Enable/Reset the OCC heartbeat, setting the count to 8ms.
// 2. Reset memory deadman timer for 1 MCA (by a GPE1 IPC task).
// 3. Every 4ms (every other time called):
// Verify PGPE is still functional by reading PGPE Beacon from
// SRAM if after 8ms (2 consecutive checks) there is no change
// to the PGPE Beacon count then log an error and request reset.
//
// End Function Specification
void task_poke_watchdogs(struct task * i_self)
{
pmc_occ_heartbeat_reg_t hbr; // OCC heart beat register
static bool L_check_pgpe_beacon = false; // Check GPE beacon this time?
// 1. Enable OCC heartbeat
hbr.fields.pmc_occ_heartbeat_time = 8000; // count corresponding to 8 ms
hbr.fields.pmc_occ_heartbeat_en = true; // enable heartbeat timer
out32(OCB_OCCHBR, hbr.value); // Enable heartbeat register, and set it
// 2. Reset memory deadman timer
if(G_sysConfigData.mem_type == MEM_TYPE_NIMBUS)
{
manage_mem_deadman_task();
}
// 3. Verify PGPE Beacon is not frozen for 8 ms
if(true == L_check_pgpe_beacon)
{
// Examine pgpe Beacon every other call (every 4ms)
//@TODO: remove when PGPE code is integrated, RTC: 163934
if(!G_simics_environment) // PGPE Beacon is not implemented in simics
{
check_pgpe_beacon();
}
}
// toggle pgpe beacon check flag, check only once every other call (every 4ms)
L_check_pgpe_beacon = !L_check_pgpe_beacon;
}
// Function Specification
//
// Name: manage_mem_deadman_task
//
// Description: Verify that if a memory deadman_task was scheduled on GPE1 last cycle
// then it is completed. Then if there is a new task to be scheduled
// for this cycle, then schedule it on the GPE1 engine.
// Called every 2ms.
//
// End Function Specification
// MAX number of timeout cycles allowed for memory deadman IPC task
// before logging an error
#define MEM_DEADMAN_TASK_TIMEOUT 2
void manage_mem_deadman_task(void)
{
//if a task is scheduled, verify that it is completed ...
//track # of consecutive failures on a specific RDIMM
static uint8_t L_scom_timeout[NUM_NIMBUS_MCAS] = {0};
errlHndl_t l_err = NULL; // Error handler
int rc = 0; // Return code
uint8_t mca; // MCA of last memory deadman task (scheduled/not-configured)
static bool L_gpe_scheduled = false;
static bool L_gpe_idle_traced = false;
static bool L_gpe_timeout_logged = false;
static bool L_gpe_had_1_tick = false;
uint32_t gpe_rc = G_gpe_reset_mem_deadman_args.error.rc; // IPC task rc
do
{ // mca of last memory deadman task (either not-configured or scheduled).
mca = G_gpe_reset_mem_deadman_args.mca;
//First, check to see if the previous GPE request still running
if( !(async_request_is_idle(&G_reset_mem_deadman_request.request)) )
{
L_scom_timeout[mca]++;
//This can happen due to variability in when the task runs
if(!L_gpe_idle_traced && L_gpe_had_1_tick)
{
TRAC_INFO("manage_mem_deadman_task: GPE is still running. mca[%d]", mca);
L_gpe_idle_traced = true;
}
L_gpe_had_1_tick = true;
break;
}
else
{
//Request is idle
L_gpe_had_1_tick = false;
if(L_gpe_idle_traced)
{
TRAC_INFO("manage_mem_deadman_task: GPE completed. mca[%d]", mca);
L_gpe_idle_traced = false;
}
}
//check scom status
if(L_gpe_scheduled)
{
if(!async_request_completed(&G_reset_mem_deadman_request.request) || gpe_rc)
{
//Request failed. Keep count of failures and log an error if we reach a
//max retry count
L_scom_timeout[mca]++;
if(L_scom_timeout[mca] >= MEM_DEADMAN_TASK_TIMEOUT)
{
break;
}
}
else // A Task was scheduled last cycle, completed successfully, no errors
{
//Reset the timeout.
L_scom_timeout[mca] = 0;
}
}
//The previous GPE job completed. Now get ready for the next job.
L_gpe_scheduled = false;
//We didn't fail, update mca (irrespective of whether it will be scheduled)
if ( mca >= NUM_NIMBUS_MCAS )
{
mca = 0;
}
else
{
mca++;
}
G_gpe_reset_mem_deadman_args.mca = mca;
// If the MCA is not configured, break
if(!NIMBUS_DIMM_INDEX_THROTTLING_CONFIGURED(mca))
{
break;
}
// The MCA is configured, and the previous IPC task completed successfully
rc = gpe_request_schedule(&G_reset_mem_deadman_request);
// Always log an error if gpe request schedule fails
if( rc )
{
//Error in schedule gpe memory deadman reset task
TRAC_ERR("manage_mem_deadman_task: Failed to schedule memory deadman reset task rc=%x",
rc);
/* @
* @errortype
* @moduleid POKE_WD_TIMERS
* @reasoncode GPE_REQUEST_SCHEDULE_FAILURE
* @userdata1 rc - gpe_request_schedule return code
* @userdata2 0
* @userdata4 OCC_NO_EXTENDED_RC
* @devdesc OCC Failed to schedule a GPE job for memory deadman reset
*/
l_err = createErrl(
POKE_WD_TIMERS, // modId
GPE_REQUEST_SCHEDULE_FAILURE, // reasoncode
OCC_NO_EXTENDED_RC, // Extended reason code
ERRL_SEV_UNRECOVERABLE, // Severity
NULL, // Trace Buf
DEFAULT_TRACE_SIZE, // Trace Size
rc, // userdata1
0 // userdata2
);
addUsrDtlsToErrl(
l_err, //io_err
(uint8_t *) &(G_reset_mem_deadman_request.ffdc), //i_dataPtr,
sizeof(G_reset_mem_deadman_request.ffdc), //i_size
ERRL_USR_DTL_STRUCT_VERSION_1, //version
ERRL_USR_DTL_BINARY_DATA); //type
REQUEST_RESET(l_err); //This will add a firmware callout for us
break;
}
// Successfully scheduled a new memory deadman timer gpe IPC request
L_gpe_scheduled = true;
} while(0);
if(L_scom_timeout[mca] >= MEM_DEADMAN_TASK_TIMEOUT && L_gpe_timeout_logged == false)
{
TRAC_ERR("manage_mem_deadman_task: Timeout scomming MCA[%d]", mca);
/* @
* @errortype
* @moduleid POKE_WD_TIMERS
* @reasoncode GPE_REQUEST_TASK_TIMEOUT
* @userdata1 mca number
* @userdata2 0
* @userdata4 OCC_NO_EXTENDED_RC
* @devdesc Timed out trying to reset the memory deadman timer.
*/
l_err = createErrl(
POKE_WD_TIMERS, // modId
GPE_REQUEST_TASK_TIMEOUT, // reasoncode
OCC_NO_EXTENDED_RC, // Extended reason code
ERRL_SEV_PREDICTIVE, // Severity
NULL, // Trace Buf
DEFAULT_TRACE_SIZE, // Trace Size
mca, // userdata1
0 // userdata2
);
addUsrDtlsToErrl(l_err, //io_err
(uint8_t *) &(G_reset_mem_deadman_request.ffdc), //i_dataPtr,
sizeof(G_reset_mem_deadman_request.ffdc), //i_size
ERRL_USR_DTL_STRUCT_VERSION_1, //version
ERRL_USR_DTL_BINARY_DATA); //type
// Commit Error Log
commitErrl(&l_err);
L_gpe_timeout_logged = true;
}
return;
}
// Function Specification
//
// Name: check_pgpe_beacon
//
// Description: Checks the PGPE Beacon every 4ms
// logs an error and resets if it
// doesn't change for 8 ms
//
// End Function Specification
void check_pgpe_beacon(void)
{
uint32_t pgpe_beacon; // PGPE Beacon value now
static uint32_t L_prev_pgpe_beacon = 0; // PGPE Beacon value 4 ms ago
static bool L_first_pgpe_beacon_check = true; // First time examining Beacon?
static bool L_pgpe_beacon_unchanged_4ms = false; // pgpe beacon unchanged once (4ms)
static bool L_error_logged = false; // trace and error log only once
errlHndl_t l_err = NULL; // Error handler
do
{
// return PGPE Beacon
pgpe_beacon = in32(G_pgpe_beacon_address);
// in first invocation, just initialize L_prev_pgpe_beacon
// don't check if the PGPE Beacon value changed
if(L_first_pgpe_beacon_check)
{
L_prev_pgpe_beacon = pgpe_beacon;
L_first_pgpe_beacon_check = false;
break;
}
// L_prev_pgpe_beacon has been initialized; Every 4ms verify
// that PGPE Beacon has changed relative to previous reading
if(pgpe_beacon == L_prev_pgpe_beacon)
{
if(false == L_pgpe_beacon_unchanged_4ms)
{
// First time beacon unchaged (4ms), mark flag
L_pgpe_beacon_unchanged_4ms = true;
break;
}
else if (false == L_error_logged)
{
L_error_logged = true;
// Second time beacon unchanged (8ms), log timeout error
TRAC_ERR("Error PGPE Beacon didn't change for 8 ms: %d",
pgpe_beacon);
/*
* @errortype
* @moduleid POKE_WD_TIMERS
* @reasoncode PGPE_FAILURE
* @userdata1 PGPE Beacon Value
* @userdata2 PGPE Beacon Address
* @userdata4 ERC_PGPE_BEACON_TIMEOUT
* @devdesc PGPE Beacon timeout
*/
l_err = createErrl(POKE_WD_TIMERS, // mod id
PGPE_FAILURE, // reason code
ERC_PGPE_BEACON_TIMEOUT, // Extended reason code
ERRL_SEV_UNRECOVERABLE, // severity
NULL, // trace buffer
0, // trace size
pgpe_beacon, // userdata1
G_pgpe_beacon_address); // userdata2
// Commit error log and request reset
REQUEST_RESET(l_err);
}
}
else
{
// pgpe beacon changed over the last 4 ms
L_pgpe_beacon_unchanged_4ms = false;
}
} while(0);
}
// Function Specification
//
// Name: ppc405WDTHndlerFull
//
// Description: PPC405 watchdog interrupt handler
//
// End Function Specification
void ppc405WDTHndlerFull(void * i_arg, SsxIrqId i_irq, int i_priority)
{
static uint8_t l_wdog_intrpt_cntr = 0;
// Always reset the watchdog interrupt status in the TSR. If we halt
// and leave TSR[WIS]=1 then the watchdog counter will eventually set
// TSR[ENW]=1 and upon expiration of the next watchdog period the 405 will
// take whatever action is in TCR[WRC] potentially resetting the OCC while
// we have it in a halted state, an undesirable outcome.
// Always clear TSR[ENW,WIS] to reset the watchdog state machine.
mtspr(SPRN_TSR, (TSR_ENW | TSR_WIS));
if (WDOG_ENABLED)
{
// When enabled, always increment this local static counter
l_wdog_intrpt_cntr++;
// The hardware timer should be set to around a second, on the third
// interrupt we go to halt if the main thread counter hasn't incremented
// since the last time it was reset.
if (l_wdog_intrpt_cntr == 3)
{
l_wdog_intrpt_cntr = 0;
// The watchdog interrupt has fired three times, time to check the
// state of the main thread by looking at the main thread loop
// counter, it must be non-zero else we will halt the occ
if (G_mainThreadLoopCounter > 0)
{
// The main thread has run at least once in the last ~6 seconds
G_mainThreadLoopCounter = 0;
}
else
{
OCC_HALT(ERRL_RC_WDOG_TIMER);
TRAC_ERR("Should have halted here due to WDOG");
}
}
}
}
// Function Specification
//
// Name: ocbTHndlerFull
//
// Description: OCB timer interrupt handler
//
// End Function Specification
void ocbTHndlerFull(void * i_arg, SsxIrqId i_irq, int i_priority)
{
// OCC_HALT with exception code passed in.
OCC_HALT(ERRL_RC_OCB_TIMER);
TRAC_ERR("Should have halted here due to THndlerFull");
}
|