summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xsrc/occ_405/amec/amec_health.c293
-rwxr-xr-xsrc/occ_405/amec/amec_health.h4
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c6
-rw-r--r--src/occ_405/dcom/dcomMasterTx.c6
-rwxr-xr-xsrc/occ_405/dimm/dimm.c7
-rwxr-xr-xsrc/occ_405/occ_sys_config.h1
-rwxr-xr-xsrc/occ_gpe1/gpe1.h8
-rwxr-xr-xsrc/occ_gpe1/gpe1_dimm.h6
-rw-r--r--src/occ_gpe1/gpe1_dimm_control.c8
-rw-r--r--src/occ_gpe1/gpe1_dimm_read.c15
10 files changed, 172 insertions, 182 deletions
diff --git a/src/occ_405/amec/amec_health.c b/src/occ_405/amec/amec_health.c
index 132dc30..0495a92 100755
--- a/src/occ_405/amec/amec_health.c
+++ b/src/occ_405/amec/amec_health.c
@@ -87,12 +87,19 @@ uint64_t amec_mem_get_huid(uint8_t i_cent, uint8_t i_dimm)
{
//we're being asked for a dimm huid
l_huid = G_sysConfigData.dimm_huids[i_cent][i_dimm];
- if((l_huid == 0) && (MEM_TYPE_CUMULUS == G_sysConfigData.mem_type))
+ if(l_huid == 0)
{
- //if we don't have a valid dimm huid, use the centaur huid.
- l_huid = G_sysConfigData.centaur_huids[i_cent];
+ if (MEM_TYPE_CUMULUS == G_sysConfigData.mem_type)
+ {
+ //if we don't have a valid dimm huid, use the centaur huid.
+ l_huid = G_sysConfigData.centaur_huids[i_cent];
+ }
+ else
+ {
+ // else NIMBUS huid of 0 indicates not present (should never get called)
+ TRAC_ERR("amec_mem_get_huid: DIMM%04X did not have a HUID to call out!", (i_cent<<8)|i_dimm);
+ }
}
- // else NIMBUS huid of 0 indicates not present (should never get called)
}
return l_huid;
}
@@ -123,37 +130,31 @@ void amec_mem_mark_logged(uint8_t i_cent,
*
* Name: amec_health_check_dimm_temp
*
- * Description: Check if centaur's-dimm/rdimm-modules temperature exceeds the
- * error temperature as defined in thermal control thresholds
- * (ERROR field for Centaur/DIMM FRU Type)
+ * Description: Check if DIMM temperature exceeds the error temperature
+ * as defined in thermal control thresholds
+ * (ERROR field for DIMM FRU Type)
*
* End Function Specification
*/
void amec_health_check_dimm_temp()
{
- /*------------------------------------------------------------------------*/
- /* Local Variables */
- /*------------------------------------------------------------------------*/
uint16_t l_ot_error, l_cur_temp, l_max_temp;
sensor_t *l_sensor;
- uint8_t l_dimm; // per centaur/port dimms in cumulus/nimbus
- uint8_t l_index; // tracks centaurs/ports in cumulus/nimbus
- uint8_t l_max_index; // #centaurs/ports in cumulus/nimbus
+ uint8_t l_dimm;
+ uint8_t l_port;
+ uint8_t l_max_port; // #ports in nimbus/#centaurs in cumulus
uint32_t l_callouts_count = 0;
uint8_t l_new_callouts;
uint64_t l_huid;
errlHndl_t l_err = NULL;
- /*------------------------------------------------------------------------*/
- /* Code */
- /*------------------------------------------------------------------------*/
if(G_sysConfigData.mem_type == MEM_TYPE_NIMBUS)
{
- l_max_index = NUM_I2C_PORTS;
+ l_max_port = NUM_DIMM_PORTS;
}
else // MEM_TYPE_CUMULUS
{
- l_max_index = MAX_NUM_CENTAURS;
+ l_max_port = MAX_NUM_CENTAURS;
}
// Check to see if any dimms have reached the error temperature that
@@ -173,92 +174,88 @@ void amec_health_check_dimm_temp()
l_max_temp);
//iterate over all dimms
- for(l_index = 0; l_index < l_max_index; l_index++)
+ for(l_port = 0; l_port < l_max_port; l_port++)
{
//only callout a dimm if it hasn't been called out already
- l_new_callouts = G_dimm_overtemp_bitmap.bytes[l_index] ^
- G_dimm_overtemp_logged_bitmap.bytes[l_index];
+ l_new_callouts = G_dimm_overtemp_bitmap.bytes[l_port] ^
+ G_dimm_overtemp_logged_bitmap.bytes[l_port];
- //skip to next centaur if no new callouts for this one
+ //skip to next port if no new callouts for this one
if(!l_new_callouts)
{
continue;
}
- //find the dimm(s) that need to be called out behind this centaur
+ //find the dimm(s) that need to be called out for this port
for(l_dimm = 0; l_dimm < NUM_DIMMS_PER_CENTAUR; l_dimm++)
{
if(!(l_new_callouts & (DIMM_SENSOR0 >> l_dimm)) &&
- G_dimm_overtemp_bitmap.bytes[l_index])
+ G_dimm_overtemp_bitmap.bytes[l_port])
{
continue;
}
- l_huid = amec_mem_get_huid(l_index, l_dimm);
-
- amec_mem_mark_logged(l_index,
+ fru_temp_t* l_fru;
+ l_fru = &g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm];
+ amec_mem_mark_logged(l_port,
l_dimm,
&G_cent_overtemp_logged_bitmap,
- &G_dimm_overtemp_logged_bitmap.bytes[l_index]);
+ &G_dimm_overtemp_logged_bitmap.bytes[l_port]);
+ TRAC_ERR("amec_health_check_dimm_temp: DIMM%04X overtemp - %dC",
+ (l_port<<8)|l_dimm, l_fru->cur_temp);
- //If we don't have an error log for the callout, create one
- if(!l_err)
+ // Create single elog with up to MAX_CALLOUTS
+ if(l_callouts_count < ERRL_MAX_CALLOUTS)
{
- /* @
- * @errortype
- * @moduleid AMEC_HEALTH_CHECK_DIMM_TEMP
- * @reasoncode DIMM_ERROR_TEMP
- * @userdata1 Maximum dimm temperature
- * @userdata2 Dimm temperature threshold
- * @userdata4 OCC_NO_EXTENDED_RC
- * @devdesc Memory DIMM(s) exceeded maximum safe
- * temperature.
- */
- l_err = createErrl(AMEC_HEALTH_CHECK_DIMM_TEMP, //modId
- DIMM_ERROR_TEMP, //reasoncode
- OCC_NO_EXTENDED_RC, //Extended reason code
- ERRL_SEV_PREDICTIVE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- l_max_temp, //userdata1
- l_ot_error); //userdata2
-
- // Callout the "over temperature" procedure
- addCalloutToErrl(l_err,
- ERRL_CALLOUT_TYPE_COMPONENT_ID,
- ERRL_COMPONENT_ID_OVER_TEMPERATURE,
- ERRL_CALLOUT_PRIORITY_HIGH);
- l_callouts_count = 1;
- }
-
- // Callout dimm
- addCalloutToErrl(l_err,
- ERRL_CALLOUT_TYPE_HUID,
- l_huid,
- ERRL_CALLOUT_PRIORITY_MED);
-
- l_callouts_count++;
+ //If we don't have an error log for the callout, create one
+ if(!l_err)
+ {
+ /* @
+ * @errortype
+ * @moduleid AMEC_HEALTH_CHECK_DIMM_TEMP
+ * @reasoncode DIMM_ERROR_TEMP
+ * @userdata1 Maximum dimm temperature
+ * @userdata2 Dimm temperature threshold
+ * @userdata4 OCC_NO_EXTENDED_RC
+ * @devdesc Memory DIMM(s) exceeded maximum safe
+ * temperature.
+ */
+ l_err = createErrl(AMEC_HEALTH_CHECK_DIMM_TEMP, //modId
+ DIMM_ERROR_TEMP, //reasoncode
+ OCC_NO_EXTENDED_RC, //Extended reason code
+ ERRL_SEV_PREDICTIVE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ l_max_temp, //userdata1
+ l_ot_error); //userdata2
+
+ // Callout the "over temperature" procedure
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_OVER_TEMPERATURE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+ l_callouts_count = 1;
+ }
- //If we've reached the max # of callouts for an error log
- //commit the error log
- if(l_callouts_count == ERRL_MAX_CALLOUTS)
- {
- commitErrl(&l_err);
- }
+ // Callout dimm
+ l_huid = amec_mem_get_huid(l_port, l_dimm);
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ l_huid,
+ ERRL_CALLOUT_PRIORITY_MED);
- //If we found all of the callouts for this centaur, go to the next one
- if(!l_new_callouts)
- {
- break;
+ l_callouts_count++;
}
}//iterate over dimms
- }//iterate over centaurs
+ }//iterate over ports
if(l_err)
{
commitErrl(&l_err);
}
-}
+
+} // end amec_health_check_dimm_temp()
+
/*
* Function Specification
@@ -275,7 +272,7 @@ void amec_health_check_dimm_timeout()
{
static dimm_sensor_flags_t L_temp_update_bitmap_prev = {0};
dimm_sensor_flags_t l_need_inc, l_need_clr, l_temp_update_bitmap;
- uint8_t l_dimm, l_cent;
+ uint8_t l_dimm, l_port;
fru_temp_t* l_fru;
errlHndl_t l_err = NULL;
uint32_t l_callouts_count = 0;
@@ -295,7 +292,7 @@ void amec_health_check_dimm_timeout()
G_dimm_temp_updated_bitmap.bigword = 0;
//check if we need to increment any timers (haven't been updated in the last second)
- l_need_inc.bigword = G_dimm_enabled_sensors.bigword & ~l_temp_update_bitmap.bigword;
+ l_need_inc.bigword = G_dimm_enabled_sensors.bigword & ~l_temp_update_bitmap.bigword;
//check if we need to clear any timers (updated now but not updated previously)
l_need_clr.bigword = l_temp_update_bitmap.bigword & ~L_temp_update_bitmap_prev.bigword;
@@ -310,18 +307,18 @@ void amec_health_check_dimm_timeout()
break;
}
- //iterate across all centaurs/ports incrementing dimm sensor timers as needed
- for(l_cent = 0; l_cent < MAX_NUM_CENTAURS; l_cent++)
+ //iterate across all ports incrementing dimm sensor timers as needed
+ for(l_port = 0; l_port < NUM_DIMM_PORTS; l_port++)
{
- //any dimm timers behind this centaur need incrementing?
- if(!l_need_inc.bytes[l_cent])
+ //any dimm timers on this port need incrementing?
+ if(!l_need_inc.bytes[l_port])
{
- // All dimm sensors were updated for this centaur/port
- // Trace this fact and clear the expired byte for all DIMMs on this centaur/port
- if(G_dimm_temp_expired_bitmap.bytes[l_cent])
+ // All dimm sensors were updated for this port
+ // Trace this fact and clear the expired byte for all DIMMs on this port
+ if(G_dimm_temp_expired_bitmap.bytes[l_port])
{
- G_dimm_temp_expired_bitmap.bytes[l_cent] = 0;
- TRAC_INFO("All dimm sensors for centaur %d have been updated", l_cent);
+ G_dimm_temp_expired_bitmap.bytes[l_port] = 0;
+ TRAC_INFO("All dimm sensors for centaur %d have been updated", l_port);
}
continue;
}
@@ -330,18 +327,18 @@ void amec_health_check_dimm_timeout()
for(l_dimm = 0; l_dimm < NUM_DIMMS_PER_CENTAUR; l_dimm++)
{
//not this one, check if we need to clear the dimm timeout and go to the next one
- if(!(l_need_inc.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm)))
+ if(!(l_need_inc.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm)))
{
// Clear this one if needed
- if(G_dimm_temp_expired_bitmap.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm))
+ if(G_dimm_temp_expired_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm))
{
- G_dimm_temp_expired_bitmap.bytes[l_cent] &= ~(DIMM_SENSOR0 >> l_dimm);
+ G_dimm_temp_expired_bitmap.bytes[l_port] &= ~(DIMM_SENSOR0 >> l_dimm);
}
continue;
}
//we found one.
- l_fru = &g_amec->proc[0].memctl[l_cent].centaur.dimm_temps[l_dimm];
+ l_fru = &g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm];
//increment timer
l_fru->sample_age++;
@@ -357,8 +354,8 @@ void amec_health_check_dimm_timeout()
// meet the DIMM MAX_READ_TIMEOUT.)
if((l_fru->sample_age == 1) && (!G_simics_environment))
{
- TRAC_INFO("No new DIMM temperature available on cent[%d] dimm[%d] temp[%d] flags[0x%02X]",
- l_cent, l_dimm, l_fru->cur_temp, l_fru->flags);
+ TRAC_INFO("No new DIMM temperature available for DIMM%04X (cur_temp[%d] flags[0x%02X])",
+ (l_port<<8)|l_dimm, l_fru->cur_temp, l_fru->flags);
}
//check if the temperature reading is still useable
@@ -369,72 +366,63 @@ void amec_health_check_dimm_timeout()
}
//temperature has expired. Notify control algorithms which DIMM
- if(!(G_dimm_temp_expired_bitmap.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm)))
+ if(!(G_dimm_temp_expired_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm)))
{
- G_dimm_temp_expired_bitmap.bytes[l_cent] |= (DIMM_SENSOR0 >> l_dimm);
- TRAC_ERR("Timed out reading dimm temperature sensor on cent %d dimm %d.",
- l_cent, l_dimm);
+ G_dimm_temp_expired_bitmap.bytes[l_port] |= (DIMM_SENSOR0 >> l_dimm);
+ TRAC_ERR("Timed out reading DIMM%04X temperature sensor", (l_port<<8)|l_dimm);
}
//If we've already logged an error for this FRU go to the next one.
- if(G_dimm_timeout_logged_bitmap.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm))
+ if(G_dimm_timeout_logged_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm))
{
continue;
}
- TRAC_ERR("Timed out reading dimm temperature on cent/port[%d] dimm[%d] temp[%d] flags[0x%02X]",
- l_cent, l_dimm, l_fru->cur_temp, l_fru->flags);
-
- if(!l_err)
- {
- /* @
- * @errortype
- * @moduleid AMEC_HEALTH_CHECK_DIMM_TIMEOUT
- * @reasoncode FRU_TEMP_TIMEOUT
- * @userdata1 timeout value in seconds
- * @userdata2 0
- * @userdata4 OCC_NO_EXTENDED_RC
- * @devdesc Failed to read a memory DIMM temperature
- *
- */
- l_err = createErrl(AMEC_HEALTH_CHECK_DIMM_TIMEOUT, //modId
- FRU_TEMP_TIMEOUT, //reasoncode
- OCC_NO_EXTENDED_RC, //Extended reason code
- ERRL_SEV_PREDICTIVE, //Severity
- NULL, //Trace Buf
- DEFAULT_TRACE_SIZE, //Trace Size
- g_amec->thermaldimm.temp_timeout, //userdata1
- 0); //userdata2
-
- l_callouts_count = 0;
- }
+ TRAC_ERR("Timed out reading DIMM%04X temperature (cur_temp[%d] flags[0x%02X])",
+ (l_port<<8)|l_dimm, l_fru->cur_temp, l_fru->flags);
- //Get the HUID for the dimm
- l_huid = amec_mem_get_huid(l_cent, l_dimm);
+ //Mark DIMM as logged so we don't log it more than once
+ amec_mem_mark_logged(l_port,
+ l_dimm,
+ &G_cent_timeout_logged_bitmap,
+ &G_dimm_timeout_logged_bitmap.bytes[l_port]);
- // Callout dimm
- addCalloutToErrl(l_err,
- ERRL_CALLOUT_TYPE_HUID,
- l_huid,
- ERRL_CALLOUT_PRIORITY_MED);
+ // Create single elog with up to MAX_CALLOUTS
+ if(l_callouts_count < ERRL_MAX_CALLOUTS)
+ {
+ if(!l_err)
+ {
+ /* @
+ * @errortype
+ * @moduleid AMEC_HEALTH_CHECK_DIMM_TIMEOUT
+ * @reasoncode FRU_TEMP_TIMEOUT
+ * @userdata1 timeout value in seconds
+ * @userdata2 0
+ * @userdata4 OCC_NO_EXTENDED_RC
+ * @devdesc Failed to read a memory DIMM temperature
+ *
+ */
+ l_err = createErrl(AMEC_HEALTH_CHECK_DIMM_TIMEOUT, //modId
+ FRU_TEMP_TIMEOUT, //reasoncode
+ OCC_NO_EXTENDED_RC, //Extended reason code
+ ERRL_SEV_PREDICTIVE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ g_amec->thermaldimm.temp_timeout, //userdata1
+ 0); //userdata2
+ }
- l_callouts_count++;
+ //Get the HUID for the DIMM and add callout
+ l_huid = amec_mem_get_huid(l_port, l_dimm);
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ l_huid,
+ ERRL_CALLOUT_PRIORITY_MED);
- //If we've reached the max # of callouts for an error log
- //commit the error log
- if(l_callouts_count == ERRL_MAX_CALLOUTS)
- {
- commitErrl(&l_err);
+ l_callouts_count++;
}
-
- //Mark dimm as logged so we don't log it more than once
- amec_mem_mark_logged(l_cent,
- l_dimm,
- &G_cent_timeout_logged_bitmap,
- &G_dimm_timeout_logged_bitmap.bytes[l_cent]);
} //iterate over all dimms
-
- } //iterate over all centaurs/ports
+ } //iterate over all ports
if(l_err)
{
@@ -448,10 +436,10 @@ void amec_health_check_dimm_timeout()
}
//iterate across all centaurs/ports clearing dimm sensor timers as needed
- for(l_cent = 0; l_cent < MAX_NUM_CENTAURS; l_cent++)
+ for(l_port = 0; l_port < MAX_NUM_CENTAURS; l_port++)
{
- if(!l_need_clr.bytes[l_cent])
+ if(!l_need_clr.bytes[l_port])
{
continue;
}
@@ -460,13 +448,13 @@ void amec_health_check_dimm_timeout()
for(l_dimm = 0; l_dimm < NUM_DIMMS_PER_CENTAUR; l_dimm++)
{
//not this one, go to next one
- if(!(l_need_clr.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm)))
+ if(!(l_need_clr.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm)))
{
continue;
}
//we found one.
- l_fru = &g_amec->proc[0].memctl[l_cent].centaur.dimm_temps[l_dimm];
+ l_fru = &g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm];
//clear timer
l_fru->sample_age = 0;
@@ -475,15 +463,16 @@ void amec_health_check_dimm_timeout()
// complete on each call. Skip the "recovery" trace in Simics.
if((L_ran_once) && (!G_simics_environment))
{
- TRAC_INFO("DIMM temperature collection has resumed on cent/port[%d] dimm[%d] temp[%d]",
- l_cent, l_dimm, l_fru->cur_temp);
+ TRAC_INFO("DIMM temperature collection has resumed for DIMM%04X temp[%d]",
+ (l_port<<8)|l_dimm, l_fru->cur_temp);
}
}//iterate over all dimms
}//iterate over all centaurs/ports
}while(0);
L_ran_once = TRUE;
-}
+
+} // end amec_health_check_dimm_timeout()
diff --git a/src/occ_405/amec/amec_health.h b/src/occ_405/amec/amec_health.h
index 0f5baf0..11d8fb0 100755
--- a/src/occ_405/amec/amec_health.h
+++ b/src/occ_405/amec/amec_health.h
@@ -47,5 +47,9 @@ void amec_health_check_cent_temp(void);
void amec_health_check_cent_timeout(void);
void amec_health_check_dimm_temp(void);
void amec_health_check_dimm_timeout(void);
+void amec_mem_mark_logged(uint8_t i_cent,
+ uint8_t i_dimm,
+ uint8_t* i_clog_bitmap,
+ uint8_t* i_dlog_bitmap);
#endif
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
index 47bc2eb..443a967 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
@@ -1954,14 +1954,15 @@ errlHndl_t data_store_mem_throt(const cmdh_fsp_cmd_t * i_cmd_ptr,
memcpy(&l_temp_set, &(l_data_set->min_n_per_mba), sizeof(mem_throt_config_data_t));
// A 0 for any power or N value is an error
+ unsigned int l_index = 0;
for(l_n_ptr = &l_temp_set.min_n_per_mba; l_n_ptr <= &l_temp_set.nom_mem_power; l_n_ptr++)
{
if(!(*l_n_ptr))
{
if(MEM_TYPE_NIMBUS == G_sysConfigData.mem_type)
{
- CMDH_TRAC_ERR("data_store_mem_throt: RDIMM Throttle N value is 0!"
- " mc[%d] port[%d]", mc, port);
+ CMDH_TRAC_ERR("data_store_mem_throt: RDIMM Throttle value[%d] is 0!"
+ " mc[%d] port[%d]", l_index, mc, port);
}
else if(MEM_TYPE_CUMULUS == G_sysConfigData.mem_type)
{
@@ -1971,6 +1972,7 @@ errlHndl_t data_store_mem_throt(const cmdh_fsp_cmd_t * i_cmd_ptr,
cmdh_build_errl_rsp(i_cmd_ptr, o_rsp_ptr, ERRL_RC_INVALID_DATA, &l_err);
break;
}
+ ++l_index;
}
if(l_err) // zero N Value?
diff --git a/src/occ_405/dcom/dcomMasterTx.c b/src/occ_405/dcom/dcomMasterTx.c
index 116b0e4..de88369 100644
--- a/src/occ_405/dcom/dcomMasterTx.c
+++ b/src/occ_405/dcom/dcomMasterTx.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2016 */
+/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -487,6 +487,8 @@ void task_dcom_tx_slv_inbox( task_t *i_self)
// Description: transmit doorbells to slaves
// from master
//
+// NOTE: runs at crit interrupt (adding traces will cause crash)
+//
// End Function Specification
void dcom_tx_slv_inbox_doorbell( void )
{
@@ -530,8 +532,6 @@ void dcom_tx_slv_inbox_doorbell( void )
l_delta : G_dcomTime.master.doorbellMaxDeltaTx;
G_dcomTime.master.doorbellSeq = G_dcom_slv_inbox_doorbell_tx.magic_counter;
G_dcomTime.master.doorbellNumSent++;
-
- DCOM_DBG("Sent multicast doorbell\n");
}
#endif //_DCOMMASTERTOSLAVE_C
diff --git a/src/occ_405/dimm/dimm.c b/src/occ_405/dimm/dimm.c
index fb62d6b..496814b 100755
--- a/src/occ_405/dimm/dimm.c
+++ b/src/occ_405/dimm/dimm.c
@@ -44,6 +44,7 @@
#include "common.h"
#include "memory.h"
#include "centaur_data.h"
+#include "amec_health.h"
extern bool G_mem_monitoring_allowed;
extern memory_control_task_t G_memory_control_task;
@@ -308,7 +309,7 @@ void mark_dimm_failed()
l_err = createErrl(DIMM_MID_MARK_DIMM_FAILED,
DIMM_GPE_FAILURE,
ERC_DIMM_COMPLETE_FAILURE,
- ERRL_SEV_INFORMATIONAL,
+ ERRL_SEV_PREDICTIVE,
NULL,
DEFAULT_TRACE_SIZE,
G_dimm_sm_args.error.rc,
@@ -322,6 +323,10 @@ void mark_dimm_failed()
ERRL_CALLOUT_TYPE_HUID,
G_sysConfigData.dimm_huids[port][dimm],
ERRL_CALLOUT_PRIORITY_HIGH);
+ //Mark DIMM as logged so we don't log it again
+ amec_mem_mark_logged(port, dimm,
+ &G_cent_timeout_logged_bitmap,
+ &G_dimm_timeout_logged_bitmap.bytes[port]);
commitErrl(&l_err);
}
diff --git a/src/occ_405/occ_sys_config.h b/src/occ_405/occ_sys_config.h
index fb45635..d89833c 100755
--- a/src/occ_405/occ_sys_config.h
+++ b/src/occ_405/occ_sys_config.h
@@ -57,7 +57,6 @@
#define NUM_NIMBUS_MCAS (MAX_NUM_MCU_PORTS * NUM_NIMBUS_MC_PAIRS)
#define NUM_DIMMS_PER_MEM_CONTROLLER 8
-#define NUM_I2C_PORTS 2
#define NUM_PROC_CHIPS_PER_OCC 1
#define NUM_CENTAURS_PER_MEM_CONTROLLER 1
diff --git a/src/occ_gpe1/gpe1.h b/src/occ_gpe1/gpe1.h
index af26829..4ce0823 100755
--- a/src/occ_gpe1/gpe1.h
+++ b/src/occ_gpe1/gpe1.h
@@ -48,14 +48,6 @@
#define I2C_FIFO4_REG_READ 0x000A0012
-// I2C Status Reigster masks
-#define STATUS_ERROR_MASK 0xFE80330000000000
-#define STATUS_ERROR_OR_COMPLETE_MASK 0xFF80330000000000
-#define STATUS_COMPLETE_MASK 0x0100000000000000
-#define PEEK_ERROR_MASK 0x00000000FC000000
-#define PEEK_MORE_DATA 0x0000000002000000
-
-
// Debug trace
#ifdef GPE1_DEBUG
#define GPE1_DIMM_DBG(frmt,args...) \
diff --git a/src/occ_gpe1/gpe1_dimm.h b/src/occ_gpe1/gpe1_dimm.h
index c3249c6..b151f87 100755
--- a/src/occ_gpe1/gpe1_dimm.h
+++ b/src/occ_gpe1/gpe1_dimm.h
@@ -44,9 +44,9 @@
#define SCOM_ENGINE_OFFSET(engine) (engine << 12)
-// I2C Status Reigster masks
-#define STATUS_ERROR_MASK 0xFE80330000000000
-#define STATUS_ERROR_OR_COMPLETE_MASK 0xFF80330000000000
+// I2C Status Register masks
+#define STATUS_ERROR_MASK 0xFC80000000000000
+#define STATUS_ERROR_OR_COMPLETE_MASK 0xFF80000000000000
#define STATUS_COMPLETE_MASK 0x0100000000000000
#define PEEK_ERROR_MASK 0x00000000FC000000
#define PEEK_MORE_DATA 0x0000000002000000
diff --git a/src/occ_gpe1/gpe1_dimm_control.c b/src/occ_gpe1/gpe1_dimm_control.c
index 65ca4b6..a87a464 100644
--- a/src/occ_gpe1/gpe1_dimm_control.c
+++ b/src/occ_gpe1/gpe1_dimm_control.c
@@ -146,15 +146,16 @@ void gpe_reset_mem_deadman(ipc_msg_t* cmd, void* arg)
ipc_async_cmd_t *async_cmd = (ipc_async_cmd_t*)cmd;
reset_mem_deadman_args_t *args = (reset_mem_deadman_args_t*)async_cmd->cmd_data;
- int mca = args->mca; // Nimbus MCA; mc_pair = mca >>2 and port = mca & 3
-
args->error.error = 0;
args->error.ffdc = 0;
do
{ // read Deadman timer's SCOM Register for specified MCA (MC pair and port numbers)
// @TODO: uncomment when deadman timer scom registers are definied in simics. RTC: 163713, RTC: 163934
- //rc = getscom_abs(DEADMAN_TIMER_MCA(mca), &regValue);
+#if 0
+ int mca = args->mca; // Nimbus MCA; mc_pair = mca >>2 and port = mca & 3
+
+ rc = getscom_abs(DEADMAN_TIMER_MCA(mca), &regValue);
if(rc)
{
PK_TRACE("gpe_reset_mem_deadman: Deadman timer read failed"
@@ -172,6 +173,7 @@ void gpe_reset_mem_deadman(ipc_msg_t* cmd, void* arg)
mca, DEADMAN_TIMER_MCA(mca), regValue);
}
+#endif
} while(0);
// send back a response, IPC success even if ffdc/rc are non zeros
diff --git a/src/occ_gpe1/gpe1_dimm_read.c b/src/occ_gpe1/gpe1_dimm_read.c
index e0ee84c..86d80b6 100644
--- a/src/occ_gpe1/gpe1_dimm_read.c
+++ b/src/occ_gpe1/gpe1_dimm_read.c
@@ -370,10 +370,12 @@ void dimm_initiate_read(ipc_msg_t* cmd, void* arg)
if ((regValue & STATUS_ERROR_OR_COMPLETE_MASK) == STATUS_COMPLETE_MASK)
{
// Status register indicates no errors and last command completed.
- // Write the I2C command register with a 2 byte read request
+ // Write the I2C command register with a 2 byte read request.
+ // Since FIFO4 can read 4 bytes in one operation, we will do a read of 4 bytes
+ // and only look at first 2 bytes. (FIFO4 will hang if only try to read 2 bytes)
scomAddr = I2C_COMMAND_REG | SCOM_ENGINE_OFFSET(args->i2cEngine);
- // start+address+stop + slave_address, rw=1=read, length=2
- regValue = 0xD001000200000000;
+ // start+address+stop + slave_address, rw=1=read, length=4
+ regValue = 0xD001000400000000;
regValue |= ((uint64_t)args->i2cAddr << 48);
rc = putscom_abs(scomAddr, regValue);
if(rc)
@@ -497,12 +499,7 @@ void dimm_read_temp(ipc_msg_t* cmd, void* arg)
WORD_HIGH(regValue), WORD_LOW(regValue));
gpe_set_ffdc(&(args->error), scomAddr, GPE_RC_I2C_ERROR, regValue);
}
- else if (regValue & PEEK_MORE_DATA)
- {
- // The data_request bit is non-zero, but no more data is needed!
- PK_TRACE("dimm_read_temp: Got data, but more data needs access??");
- gpe_set_ffdc(&(args->error), scomAddr, GPE_RC_NOT_COMPLETE, regValue);
- }
+ // PEEK_MORE_DATA will be set because we only read 2 of the 4 bytes (ignore this bit)
}
}
// else, all data not available yet (NOT_COMPLETE)
OpenPOWER on IntegriCloud