summaryrefslogtreecommitdiffstats
path: root/src/occ_405/cent
diff options
context:
space:
mode:
authormbroyles <mbroyles@us.ibm.com>2018-07-10 13:43:15 -0500
committerMartha Broyles <mbroyles@us.ibm.com>2018-07-12 11:14:57 -0400
commitb8a8037ca194fc690ff1a859b5c0ddf08e708b81 (patch)
tree217876bae0867bb9374cfc04c44a71b19242a10c /src/occ_405/cent
parent6d556b9b95fd84ca8d4a652cf1a08ffb3b613d07 (diff)
downloadtalos-occ-b8a8037ca194fc690ff1a859b5c0ddf08e708b81.tar.gz
talos-occ-b8a8037ca194fc690ff1a859b5c0ddf08e708b81.zip
Prevent calling out Centaurs on clock failover
CQ: SW437405 Change-Id: I1057d70bc6673b9d08a95573f00c9268f00dd126 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/62157 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com> Reviewed-by: Douglas R. Gilbert <dgilbert@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Diffstat (limited to 'src/occ_405/cent')
-rwxr-xr-xsrc/occ_405/cent/centaur_control.c65
-rwxr-xr-xsrc/occ_405/cent/centaur_control.h2
-rwxr-xr-xsrc/occ_405/cent/centaur_data.c13
3 files changed, 9 insertions, 71 deletions
diff --git a/src/occ_405/cent/centaur_control.c b/src/occ_405/cent/centaur_control.c
index e002bed..407bbdd 100755
--- a/src/occ_405/cent/centaur_control.c
+++ b/src/occ_405/cent/centaur_control.c
@@ -440,69 +440,4 @@ void centaur_control_init( void )
return;
}
-bool check_centaur_checkstop(memory_control_task_t * i_memControlTask )
-{
- errlHndl_t l_err = NULL;
- int cent = i_memControlTask->curMemIndex;
- // Check if the centaur has a channel checkstop. If it does,
- // then do not log any errors. We also don't want to throttle
- // a centaur that is in this condition.
- if(G_centaur_control_reg_parms.error.rc != CENTAUR_CHANNEL_CHECKSTOP)
- {
- TRAC_ERR("task_memory_control: IPC_ST_CENTAUR_SCOM failed. "
- "cent=%d rc=%x, index=0x%08x",
- cent, G_centaur_control_reg_parms.error.rc,
- G_centaur_control_reg_parms.error.addr);
-
- /* @
- * @errortype
- * @moduleid CENT_CONTROL_MOD
- * @reasoncode CENT_SCOM_ERROR
- * @userdata1 rc - Return code of scom operation
- * @userdata2 index of scom operation that failed
- * @userdata4 OCC_NO_EXTENDED_RC
- * @devdesc OCC access to centaur failed
- */
- l_err = createErrl(
- CENT_CONTROL_MOD, // modId
- CENT_SCOM_ERROR, // reasoncode
- OCC_NO_EXTENDED_RC, // Extended reason code
- ERRL_SEV_PREDICTIVE, // Severity
- NULL, // Trace Buf
- DEFAULT_TRACE_SIZE, // Trace Size
- G_centaur_control_reg_parms.error.rc, // userdata1
- G_centaur_control_reg_parms.error.addr // userdata2
- );
-
- addUsrDtlsToErrl(l_err, //io_err
- (uint8_t *) &(i_memControlTask->gpe_req.ffdc), //i_dataPtr,
- sizeof(GpeFfdc), //i_size
- ERRL_USR_DTL_STRUCT_VERSION_1, //version
- ERRL_USR_DTL_BINARY_DATA); //type
-
- //callout the centaur
- addCalloutToErrl(l_err,
- ERRL_CALLOUT_TYPE_HUID,
- G_sysConfigData.centaur_huids[cent],
- ERRL_CALLOUT_PRIORITY_MED);
-
- //callout the processor
- addCalloutToErrl(l_err,
- ERRL_CALLOUT_TYPE_HUID,
- G_sysConfigData.proc_huid,
- ERRL_CALLOUT_PRIORITY_MED);
-
- commitErrl(&l_err);
-
- return FALSE; // error was not a channel checkstop
- }
- else
- {
- // Remove the centaur sensor and all dimm sensors behind it.
- cent_chan_checkstop(cent);
- }
- return TRUE; // Centaur channel checkstop
-
-}
-
diff --git a/src/occ_405/cent/centaur_control.h b/src/occ_405/cent/centaur_control.h
index b6bb817..08832de 100755
--- a/src/occ_405/cent/centaur_control.h
+++ b/src/occ_405/cent/centaur_control.h
@@ -67,6 +67,4 @@ bool centaur_control( memory_control_task_t * i_memControlTask );
//void centaur_control_init( void ) INIT_SECTION;
void centaur_control_init( void );
-bool check_centaur_checkstop( memory_control_task_t * i_memControlTask );
-
#endif //_CENTAUR_CONTROL_H
diff --git a/src/occ_405/cent/centaur_data.c b/src/occ_405/cent/centaur_data.c
index 8b08030..ec27b9d 100755
--- a/src/occ_405/cent/centaur_data.c
+++ b/src/occ_405/cent/centaur_data.c
@@ -328,7 +328,9 @@ void cent_recovery(uint32_t i_cent)
ERRL_CALLOUT_TYPE_HUID,
G_sysConfigData.proc_huid,
ERRL_CALLOUT_PRIORITY_MED);
- commitErrl(&l_err);
+
+ // recovery is failing, ask for OCC reset to try to recover
+ REQUEST_RESET(l_err);
}
}
@@ -671,9 +673,9 @@ void centaur_data( void )
else // log the error if it was not a CENTAUR_CHANNEL_CHECKSTOP
{
//log an error the first time this happens but keep on running.
+ //This should be informational (except mfg) since we are going to retry
//eventually, we will timeout on the dimm & centaur temps not being updated
- //and fans will go to max speed (probably won't be able to throttle for
- //same reason we can't access the centaur here).
+ //if this is a hard failure which will call out the Centaur at that point.
if(!L_gpe_error_logged)
{
L_gpe_error_logged = TRUE;
@@ -696,13 +698,16 @@ void centaur_data( void )
CENT_TASK_DATA_MOD, //modId
CENT_SCOM_ERROR, //reasoncode
OCC_NO_EXTENDED_RC, //Extended reason code
- ERRL_SEV_PREDICTIVE, //Severity
+ ERRL_SEV_INFORMATIONAL, //Severity
NULL, //Trace Buf
DEFAULT_TRACE_SIZE, //Trace Size
l_parms->error.rc, //userdata1
0 //userdata2
);
+ //force severity to predictive if mfg ipl (allows callout to be added to info error)
+ setErrlActions(l_err, ERRL_ACTIONS_MANUFACTURING_ERROR);
+
addUsrDtlsToErrl(l_err, //io_err
(uint8_t *) &(l_centaur_data_ptr->gpe_req.ffdc), //i_dataPtr,
sizeof(GpeFfdc), //i_size
OpenPOWER on IntegriCloud