summaryrefslogtreecommitdiffstats
path: root/src/occ_405
diff options
context:
space:
mode:
authorDouglas Gilbert <dgilbert@us.ibm.com>2018-04-09 11:49:10 -0500
committerMartha Broyles <mbroyles@us.ibm.com>2018-04-20 15:46:18 -0400
commit61cd385caa634b5d8d63d3a21138c25230227d89 (patch)
tree4b382398597cbda4f1603af6a9637aa915449752 /src/occ_405
parent4f49f6351fa3908703b91e0dea9f3680c77a47a6 (diff)
downloadtalos-occ-61cd385caa634b5d8d63d3a21138c25230227d89.tar.gz
talos-occ-61cd385caa634b5d8d63d3a21138c25230227d89.zip
OCC Centaur: Check for channel checkstop
Change-Id: I2df9675d655b0391b249e49f7fc036788268e36c RTC: 191164 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57280 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: William A. Bryan <wilbryan@us.ibm.com> Reviewed-by: Andres A. Lugo-Reyes <aalugore@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Diffstat (limited to 'src/occ_405')
-rwxr-xr-xsrc/occ_405/cent/centaur_control.c9
-rwxr-xr-xsrc/occ_405/cent/centaur_data.c45
-rwxr-xr-xsrc/occ_405/cent/centaur_data.h6
3 files changed, 47 insertions, 13 deletions
diff --git a/src/occ_405/cent/centaur_control.c b/src/occ_405/cent/centaur_control.c
index e5f37ef..283a22b 100755
--- a/src/occ_405/cent/centaur_control.c
+++ b/src/occ_405/cent/centaur_control.c
@@ -491,9 +491,14 @@ bool check_centaur_checkstop(memory_control_task_t * i_memControlTask )
commitErrl(&l_err);
- return TRUE; // a centaur channel checkstop error occured
+ return FALSE; // error was not a channel checkstop
}
- return FALSE; // No centaur channel checkstop errors
+ else
+ {
+ // Remove the centaur sensor and all dimm sensors behind it.
+ cent_chan_checkstop(cent);
+ }
+ return TRUE; // Centaur channel checkstop
}
diff --git a/src/occ_405/cent/centaur_data.c b/src/occ_405/cent/centaur_data.c
index 902f2c7..158937f 100755
--- a/src/occ_405/cent/centaur_data.c
+++ b/src/occ_405/cent/centaur_data.c
@@ -192,9 +192,26 @@ uint8_t G_centaur_nest_lfir6 = 0;
//number of SC polls to wait between i2c recovery attempts
#define CENT_SC_MAX_INTERVAL 256
-//determine scom address of MCIFIR register for given Centaur n
-#define MCS0_MCIFIR_N(n) \
- ( (n<4)? (MCS0_MCIFIR + ((MCS1_MCIFIR - MCS0_MCIFIR) * (n))) : (MCS4_MCIFIR + ((MCS5_MCIFIR - MCS4_MCIFIR) * (n-4))) )
+// There was a centaur channel checkstop, remove the centaur from the enabled bitmask.
+void cent_chan_checkstop(uint32_t i_cent)
+{
+ if(CENTAUR_PRESENT(i_cent))
+ {
+ //remove checkstopped centaur from presence bitmap
+ G_present_centaurs &= ~(CENTAUR_BY_MASK(i_cent));
+
+ // remove the dimm temperature sensors behind this centaur
+ G_dimm_enabled_sensors.bytes[i_cent] = 0;
+
+ TRAC_IMP("Channel checkstop detected on Centaur[%d] G_present_centaurs[0x%08X]",
+ i_cent,
+ G_present_centaurs);
+
+ TRAC_IMP("Updated bitmap of enabled dimm temperature sensors: 0x%08X %08X",
+ G_dimm_enabled_sensors.words[0],
+ G_dimm_enabled_sensors.words[1]);
+ }
+}
void cent_recovery(uint32_t i_cent)
{
@@ -255,9 +272,13 @@ void cent_recovery(uint32_t i_cent)
G_cent_scom_gpe_parms.error.rc) &&
(!(L_cent_callouts & l_cent_mask)))
{
- // Check if the centaur has a channel checkstop. If it does, then do not
- // log any errors
- if(G_cent_scom_gpe_parms.error.rc != CENTAUR_CHANNEL_CHECKSTOP)
+ // Check if the centaur has a channel checkstop. If it does then remove the centaur
+ // from the enabled sensor bit map and do not log any errors
+ if(G_cent_scom_gpe_parms.error.rc == CENTAUR_CHANNEL_CHECKSTOP)
+ {
+ cent_chan_checkstop(l_prev_cent);
+ }
+ else // Make error log for inband scom errors
{
//Mark the centaur as being called out
L_cent_callouts |= l_cent_mask;
@@ -641,9 +662,13 @@ void centaur_data( void )
//(as long as the request was scheduled).
if(!async_request_completed(&l_centaur_data_ptr->gpe_req.request) || l_parms->error.rc )
{
- // Check if the centaur has a channel checkstop. If it does, then do not
- // log any errors
- if(G_cent_scom_gpe_parms.error.rc != CENTAUR_CHANNEL_CHECKSTOP)
+ // Check if the centaur has a channel checkstop. If it does then do not
+ // log any errors, but remove the centaur from the config
+ if(l_parms->error.rc == CENTAUR_CHANNEL_CHECKSTOP)
+ {
+ cent_chan_checkstop(l_centaur_data_ptr->prev_centaur);
+ }
+ else // log the error if it was not a CENTAUR_CHANNEL_CHECKSTOP
{
//log an error the first time this happens but keep on running.
//eventually, we will timeout on the dimm & centaur temps not being updated
@@ -868,7 +893,7 @@ void centaur_data( void )
}
while(0);
- //handle centaur i2c recovery requests and centaur workaround - Needed for P9??
+ //handle centaur i2c recovery requests and centaur workaround.
if(CENTAUR_PRESENT(l_centaur_data_ptr->current_centaur))
{
cent_recovery(l_centaur_data_ptr->current_centaur);
diff --git a/src/occ_405/cent/centaur_data.h b/src/occ_405/cent/centaur_data.h
index 0208265..30c0e0d 100755
--- a/src/occ_405/cent/centaur_data.h
+++ b/src/occ_405/cent/centaur_data.h
@@ -176,7 +176,11 @@ void cent_recovery(uint32_t i_cent);
//associated with the specified OCC centaur id.
CentaurMemData * cent_get_centaur_data_ptr( const uint8_t i_centaur_id );
-
+// Create the centaur configuration object
uint32_t centaur_configuration_create( CentaurConfiguration_t * i_centaurConfiguration );
+
+// Remove centaur from enabled sensor list due to channel checkstop
+void cent_chan_checkstop(uint32_t i_cent);
+
#endif //_CENTAUR_DATA_H
OpenPOWER on IntegriCloud