diff options
Diffstat (limited to 'src')
-rwxr-xr-x | src/occ_405/amec/amec_health.c | 293 | ||||
-rwxr-xr-x | src/occ_405/amec/amec_health.h | 4 | ||||
-rwxr-xr-x | src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c | 6 | ||||
-rw-r--r-- | src/occ_405/dcom/dcomMasterTx.c | 6 | ||||
-rwxr-xr-x | src/occ_405/dimm/dimm.c | 7 | ||||
-rwxr-xr-x | src/occ_405/occ_sys_config.h | 1 | ||||
-rwxr-xr-x | src/occ_gpe1/gpe1.h | 8 | ||||
-rwxr-xr-x | src/occ_gpe1/gpe1_dimm.h | 6 | ||||
-rw-r--r-- | src/occ_gpe1/gpe1_dimm_control.c | 8 | ||||
-rw-r--r-- | src/occ_gpe1/gpe1_dimm_read.c | 15 |
10 files changed, 172 insertions, 182 deletions
diff --git a/src/occ_405/amec/amec_health.c b/src/occ_405/amec/amec_health.c index 132dc30..0495a92 100755 --- a/src/occ_405/amec/amec_health.c +++ b/src/occ_405/amec/amec_health.c @@ -87,12 +87,19 @@ uint64_t amec_mem_get_huid(uint8_t i_cent, uint8_t i_dimm) { //we're being asked for a dimm huid l_huid = G_sysConfigData.dimm_huids[i_cent][i_dimm]; - if((l_huid == 0) && (MEM_TYPE_CUMULUS == G_sysConfigData.mem_type)) + if(l_huid == 0) { - //if we don't have a valid dimm huid, use the centaur huid. - l_huid = G_sysConfigData.centaur_huids[i_cent]; + if (MEM_TYPE_CUMULUS == G_sysConfigData.mem_type) + { + //if we don't have a valid dimm huid, use the centaur huid. + l_huid = G_sysConfigData.centaur_huids[i_cent]; + } + else + { + // else NIMBUS huid of 0 indicates not present (should never get called) + TRAC_ERR("amec_mem_get_huid: DIMM%04X did not have a HUID to call out!", (i_cent<<8)|i_dimm); + } } - // else NIMBUS huid of 0 indicates not present (should never get called) } return l_huid; } @@ -123,37 +130,31 @@ void amec_mem_mark_logged(uint8_t i_cent, * * Name: amec_health_check_dimm_temp * - * Description: Check if centaur's-dimm/rdimm-modules temperature exceeds the - * error temperature as defined in thermal control thresholds - * (ERROR field for Centaur/DIMM FRU Type) + * Description: Check if DIMM temperature exceeds the error temperature + * as defined in thermal control thresholds + * (ERROR field for DIMM FRU Type) * * End Function Specification */ void amec_health_check_dimm_temp() { - /*------------------------------------------------------------------------*/ - /* Local Variables */ - /*------------------------------------------------------------------------*/ uint16_t l_ot_error, l_cur_temp, l_max_temp; sensor_t *l_sensor; - uint8_t l_dimm; // per centaur/port dimms in cumulus/nimbus - uint8_t l_index; // tracks centaurs/ports in cumulus/nimbus - uint8_t l_max_index; // #centaurs/ports in cumulus/nimbus + uint8_t l_dimm; + uint8_t l_port; + uint8_t l_max_port; // #ports in nimbus/#centaurs in cumulus uint32_t l_callouts_count = 0; uint8_t l_new_callouts; uint64_t l_huid; errlHndl_t l_err = NULL; - /*------------------------------------------------------------------------*/ - /* Code */ - /*------------------------------------------------------------------------*/ if(G_sysConfigData.mem_type == MEM_TYPE_NIMBUS) { - l_max_index = NUM_I2C_PORTS; + l_max_port = NUM_DIMM_PORTS; } else // MEM_TYPE_CUMULUS { - l_max_index = MAX_NUM_CENTAURS; + l_max_port = MAX_NUM_CENTAURS; } // Check to see if any dimms have reached the error temperature that @@ -173,92 +174,88 @@ void amec_health_check_dimm_temp() l_max_temp); //iterate over all dimms - for(l_index = 0; l_index < l_max_index; l_index++) + for(l_port = 0; l_port < l_max_port; l_port++) { //only callout a dimm if it hasn't been called out already - l_new_callouts = G_dimm_overtemp_bitmap.bytes[l_index] ^ - G_dimm_overtemp_logged_bitmap.bytes[l_index]; + l_new_callouts = G_dimm_overtemp_bitmap.bytes[l_port] ^ + G_dimm_overtemp_logged_bitmap.bytes[l_port]; - //skip to next centaur if no new callouts for this one + //skip to next port if no new callouts for this one if(!l_new_callouts) { continue; } - //find the dimm(s) that need to be called out behind this centaur + //find the dimm(s) that need to be called out for this port for(l_dimm = 0; l_dimm < NUM_DIMMS_PER_CENTAUR; l_dimm++) { if(!(l_new_callouts & (DIMM_SENSOR0 >> l_dimm)) && - G_dimm_overtemp_bitmap.bytes[l_index]) + G_dimm_overtemp_bitmap.bytes[l_port]) { continue; } - l_huid = amec_mem_get_huid(l_index, l_dimm); - - amec_mem_mark_logged(l_index, + fru_temp_t* l_fru; + l_fru = &g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm]; + amec_mem_mark_logged(l_port, l_dimm, &G_cent_overtemp_logged_bitmap, - &G_dimm_overtemp_logged_bitmap.bytes[l_index]); + &G_dimm_overtemp_logged_bitmap.bytes[l_port]); + TRAC_ERR("amec_health_check_dimm_temp: DIMM%04X overtemp - %dC", + (l_port<<8)|l_dimm, l_fru->cur_temp); - //If we don't have an error log for the callout, create one - if(!l_err) + // Create single elog with up to MAX_CALLOUTS + if(l_callouts_count < ERRL_MAX_CALLOUTS) { - /* @ - * @errortype - * @moduleid AMEC_HEALTH_CHECK_DIMM_TEMP - * @reasoncode DIMM_ERROR_TEMP - * @userdata1 Maximum dimm temperature - * @userdata2 Dimm temperature threshold - * @userdata4 OCC_NO_EXTENDED_RC - * @devdesc Memory DIMM(s) exceeded maximum safe - * temperature. - */ - l_err = createErrl(AMEC_HEALTH_CHECK_DIMM_TEMP, //modId - DIMM_ERROR_TEMP, //reasoncode - OCC_NO_EXTENDED_RC, //Extended reason code - ERRL_SEV_PREDICTIVE, //Severity - NULL, //Trace Buf - DEFAULT_TRACE_SIZE, //Trace Size - l_max_temp, //userdata1 - l_ot_error); //userdata2 - - // Callout the "over temperature" procedure - addCalloutToErrl(l_err, - ERRL_CALLOUT_TYPE_COMPONENT_ID, - ERRL_COMPONENT_ID_OVER_TEMPERATURE, - ERRL_CALLOUT_PRIORITY_HIGH); - l_callouts_count = 1; - } - - // Callout dimm - addCalloutToErrl(l_err, - ERRL_CALLOUT_TYPE_HUID, - l_huid, - ERRL_CALLOUT_PRIORITY_MED); - - l_callouts_count++; + //If we don't have an error log for the callout, create one + if(!l_err) + { + /* @ + * @errortype + * @moduleid AMEC_HEALTH_CHECK_DIMM_TEMP + * @reasoncode DIMM_ERROR_TEMP + * @userdata1 Maximum dimm temperature + * @userdata2 Dimm temperature threshold + * @userdata4 OCC_NO_EXTENDED_RC + * @devdesc Memory DIMM(s) exceeded maximum safe + * temperature. + */ + l_err = createErrl(AMEC_HEALTH_CHECK_DIMM_TEMP, //modId + DIMM_ERROR_TEMP, //reasoncode + OCC_NO_EXTENDED_RC, //Extended reason code + ERRL_SEV_PREDICTIVE, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + l_max_temp, //userdata1 + l_ot_error); //userdata2 + + // Callout the "over temperature" procedure + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_COMPONENT_ID, + ERRL_COMPONENT_ID_OVER_TEMPERATURE, + ERRL_CALLOUT_PRIORITY_HIGH); + l_callouts_count = 1; + } - //If we've reached the max # of callouts for an error log - //commit the error log - if(l_callouts_count == ERRL_MAX_CALLOUTS) - { - commitErrl(&l_err); - } + // Callout dimm + l_huid = amec_mem_get_huid(l_port, l_dimm); + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + l_huid, + ERRL_CALLOUT_PRIORITY_MED); - //If we found all of the callouts for this centaur, go to the next one - if(!l_new_callouts) - { - break; + l_callouts_count++; } }//iterate over dimms - }//iterate over centaurs + }//iterate over ports if(l_err) { commitErrl(&l_err); } -} + +} // end amec_health_check_dimm_temp() + /* * Function Specification @@ -275,7 +272,7 @@ void amec_health_check_dimm_timeout() { static dimm_sensor_flags_t L_temp_update_bitmap_prev = {0}; dimm_sensor_flags_t l_need_inc, l_need_clr, l_temp_update_bitmap; - uint8_t l_dimm, l_cent; + uint8_t l_dimm, l_port; fru_temp_t* l_fru; errlHndl_t l_err = NULL; uint32_t l_callouts_count = 0; @@ -295,7 +292,7 @@ void amec_health_check_dimm_timeout() G_dimm_temp_updated_bitmap.bigword = 0; //check if we need to increment any timers (haven't been updated in the last second) - l_need_inc.bigword = G_dimm_enabled_sensors.bigword & ~l_temp_update_bitmap.bigword; + l_need_inc.bigword = G_dimm_enabled_sensors.bigword & ~l_temp_update_bitmap.bigword; //check if we need to clear any timers (updated now but not updated previously) l_need_clr.bigword = l_temp_update_bitmap.bigword & ~L_temp_update_bitmap_prev.bigword; @@ -310,18 +307,18 @@ void amec_health_check_dimm_timeout() break; } - //iterate across all centaurs/ports incrementing dimm sensor timers as needed - for(l_cent = 0; l_cent < MAX_NUM_CENTAURS; l_cent++) + //iterate across all ports incrementing dimm sensor timers as needed + for(l_port = 0; l_port < NUM_DIMM_PORTS; l_port++) { - //any dimm timers behind this centaur need incrementing? - if(!l_need_inc.bytes[l_cent]) + //any dimm timers on this port need incrementing? + if(!l_need_inc.bytes[l_port]) { - // All dimm sensors were updated for this centaur/port - // Trace this fact and clear the expired byte for all DIMMs on this centaur/port - if(G_dimm_temp_expired_bitmap.bytes[l_cent]) + // All dimm sensors were updated for this port + // Trace this fact and clear the expired byte for all DIMMs on this port + if(G_dimm_temp_expired_bitmap.bytes[l_port]) { - G_dimm_temp_expired_bitmap.bytes[l_cent] = 0; - TRAC_INFO("All dimm sensors for centaur %d have been updated", l_cent); + G_dimm_temp_expired_bitmap.bytes[l_port] = 0; + TRAC_INFO("All dimm sensors for centaur %d have been updated", l_port); } continue; } @@ -330,18 +327,18 @@ void amec_health_check_dimm_timeout() for(l_dimm = 0; l_dimm < NUM_DIMMS_PER_CENTAUR; l_dimm++) { //not this one, check if we need to clear the dimm timeout and go to the next one - if(!(l_need_inc.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm))) + if(!(l_need_inc.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm))) { // Clear this one if needed - if(G_dimm_temp_expired_bitmap.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm)) + if(G_dimm_temp_expired_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm)) { - G_dimm_temp_expired_bitmap.bytes[l_cent] &= ~(DIMM_SENSOR0 >> l_dimm); + G_dimm_temp_expired_bitmap.bytes[l_port] &= ~(DIMM_SENSOR0 >> l_dimm); } continue; } //we found one. - l_fru = &g_amec->proc[0].memctl[l_cent].centaur.dimm_temps[l_dimm]; + l_fru = &g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm]; //increment timer l_fru->sample_age++; @@ -357,8 +354,8 @@ void amec_health_check_dimm_timeout() // meet the DIMM MAX_READ_TIMEOUT.) if((l_fru->sample_age == 1) && (!G_simics_environment)) { - TRAC_INFO("No new DIMM temperature available on cent[%d] dimm[%d] temp[%d] flags[0x%02X]", - l_cent, l_dimm, l_fru->cur_temp, l_fru->flags); + TRAC_INFO("No new DIMM temperature available for DIMM%04X (cur_temp[%d] flags[0x%02X])", + (l_port<<8)|l_dimm, l_fru->cur_temp, l_fru->flags); } //check if the temperature reading is still useable @@ -369,72 +366,63 @@ void amec_health_check_dimm_timeout() } //temperature has expired. Notify control algorithms which DIMM - if(!(G_dimm_temp_expired_bitmap.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm))) + if(!(G_dimm_temp_expired_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm))) { - G_dimm_temp_expired_bitmap.bytes[l_cent] |= (DIMM_SENSOR0 >> l_dimm); - TRAC_ERR("Timed out reading dimm temperature sensor on cent %d dimm %d.", - l_cent, l_dimm); + G_dimm_temp_expired_bitmap.bytes[l_port] |= (DIMM_SENSOR0 >> l_dimm); + TRAC_ERR("Timed out reading DIMM%04X temperature sensor", (l_port<<8)|l_dimm); } //If we've already logged an error for this FRU go to the next one. - if(G_dimm_timeout_logged_bitmap.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm)) + if(G_dimm_timeout_logged_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm)) { continue; } - TRAC_ERR("Timed out reading dimm temperature on cent/port[%d] dimm[%d] temp[%d] flags[0x%02X]", - l_cent, l_dimm, l_fru->cur_temp, l_fru->flags); - - if(!l_err) - { - /* @ - * @errortype - * @moduleid AMEC_HEALTH_CHECK_DIMM_TIMEOUT - * @reasoncode FRU_TEMP_TIMEOUT - * @userdata1 timeout value in seconds - * @userdata2 0 - * @userdata4 OCC_NO_EXTENDED_RC - * @devdesc Failed to read a memory DIMM temperature - * - */ - l_err = createErrl(AMEC_HEALTH_CHECK_DIMM_TIMEOUT, //modId - FRU_TEMP_TIMEOUT, //reasoncode - OCC_NO_EXTENDED_RC, //Extended reason code - ERRL_SEV_PREDICTIVE, //Severity - NULL, //Trace Buf - DEFAULT_TRACE_SIZE, //Trace Size - g_amec->thermaldimm.temp_timeout, //userdata1 - 0); //userdata2 - - l_callouts_count = 0; - } + TRAC_ERR("Timed out reading DIMM%04X temperature (cur_temp[%d] flags[0x%02X])", + (l_port<<8)|l_dimm, l_fru->cur_temp, l_fru->flags); - //Get the HUID for the dimm - l_huid = amec_mem_get_huid(l_cent, l_dimm); + //Mark DIMM as logged so we don't log it more than once + amec_mem_mark_logged(l_port, + l_dimm, + &G_cent_timeout_logged_bitmap, + &G_dimm_timeout_logged_bitmap.bytes[l_port]); - // Callout dimm - addCalloutToErrl(l_err, - ERRL_CALLOUT_TYPE_HUID, - l_huid, - ERRL_CALLOUT_PRIORITY_MED); + // Create single elog with up to MAX_CALLOUTS + if(l_callouts_count < ERRL_MAX_CALLOUTS) + { + if(!l_err) + { + /* @ + * @errortype + * @moduleid AMEC_HEALTH_CHECK_DIMM_TIMEOUT + * @reasoncode FRU_TEMP_TIMEOUT + * @userdata1 timeout value in seconds + * @userdata2 0 + * @userdata4 OCC_NO_EXTENDED_RC + * @devdesc Failed to read a memory DIMM temperature + * + */ + l_err = createErrl(AMEC_HEALTH_CHECK_DIMM_TIMEOUT, //modId + FRU_TEMP_TIMEOUT, //reasoncode + OCC_NO_EXTENDED_RC, //Extended reason code + ERRL_SEV_PREDICTIVE, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + g_amec->thermaldimm.temp_timeout, //userdata1 + 0); //userdata2 + } - l_callouts_count++; + //Get the HUID for the DIMM and add callout + l_huid = amec_mem_get_huid(l_port, l_dimm); + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + l_huid, + ERRL_CALLOUT_PRIORITY_MED); - //If we've reached the max # of callouts for an error log - //commit the error log - if(l_callouts_count == ERRL_MAX_CALLOUTS) - { - commitErrl(&l_err); + l_callouts_count++; } - - //Mark dimm as logged so we don't log it more than once - amec_mem_mark_logged(l_cent, - l_dimm, - &G_cent_timeout_logged_bitmap, - &G_dimm_timeout_logged_bitmap.bytes[l_cent]); } //iterate over all dimms - - } //iterate over all centaurs/ports + } //iterate over all ports if(l_err) { @@ -448,10 +436,10 @@ void amec_health_check_dimm_timeout() } //iterate across all centaurs/ports clearing dimm sensor timers as needed - for(l_cent = 0; l_cent < MAX_NUM_CENTAURS; l_cent++) + for(l_port = 0; l_port < MAX_NUM_CENTAURS; l_port++) { - if(!l_need_clr.bytes[l_cent]) + if(!l_need_clr.bytes[l_port]) { continue; } @@ -460,13 +448,13 @@ void amec_health_check_dimm_timeout() for(l_dimm = 0; l_dimm < NUM_DIMMS_PER_CENTAUR; l_dimm++) { //not this one, go to next one - if(!(l_need_clr.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm))) + if(!(l_need_clr.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm))) { continue; } //we found one. - l_fru = &g_amec->proc[0].memctl[l_cent].centaur.dimm_temps[l_dimm]; + l_fru = &g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm]; //clear timer l_fru->sample_age = 0; @@ -475,15 +463,16 @@ void amec_health_check_dimm_timeout() // complete on each call. Skip the "recovery" trace in Simics. if((L_ran_once) && (!G_simics_environment)) { - TRAC_INFO("DIMM temperature collection has resumed on cent/port[%d] dimm[%d] temp[%d]", - l_cent, l_dimm, l_fru->cur_temp); + TRAC_INFO("DIMM temperature collection has resumed for DIMM%04X temp[%d]", + (l_port<<8)|l_dimm, l_fru->cur_temp); } }//iterate over all dimms }//iterate over all centaurs/ports }while(0); L_ran_once = TRUE; -} + +} // end amec_health_check_dimm_timeout() diff --git a/src/occ_405/amec/amec_health.h b/src/occ_405/amec/amec_health.h index 0f5baf0..11d8fb0 100755 --- a/src/occ_405/amec/amec_health.h +++ b/src/occ_405/amec/amec_health.h @@ -47,5 +47,9 @@ void amec_health_check_cent_temp(void); void amec_health_check_cent_timeout(void); void amec_health_check_dimm_temp(void); void amec_health_check_dimm_timeout(void); +void amec_mem_mark_logged(uint8_t i_cent, + uint8_t i_dimm, + uint8_t* i_clog_bitmap, + uint8_t* i_dlog_bitmap); #endif diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c index 47bc2eb..443a967 100755 --- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c +++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c @@ -1954,14 +1954,15 @@ errlHndl_t data_store_mem_throt(const cmdh_fsp_cmd_t * i_cmd_ptr, memcpy(&l_temp_set, &(l_data_set->min_n_per_mba), sizeof(mem_throt_config_data_t)); // A 0 for any power or N value is an error + unsigned int l_index = 0; for(l_n_ptr = &l_temp_set.min_n_per_mba; l_n_ptr <= &l_temp_set.nom_mem_power; l_n_ptr++) { if(!(*l_n_ptr)) { if(MEM_TYPE_NIMBUS == G_sysConfigData.mem_type) { - CMDH_TRAC_ERR("data_store_mem_throt: RDIMM Throttle N value is 0!" - " mc[%d] port[%d]", mc, port); + CMDH_TRAC_ERR("data_store_mem_throt: RDIMM Throttle value[%d] is 0!" + " mc[%d] port[%d]", l_index, mc, port); } else if(MEM_TYPE_CUMULUS == G_sysConfigData.mem_type) { @@ -1971,6 +1972,7 @@ errlHndl_t data_store_mem_throt(const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_build_errl_rsp(i_cmd_ptr, o_rsp_ptr, ERRL_RC_INVALID_DATA, &l_err); break; } + ++l_index; } if(l_err) // zero N Value? diff --git a/src/occ_405/dcom/dcomMasterTx.c b/src/occ_405/dcom/dcomMasterTx.c index 116b0e4..de88369 100644 --- a/src/occ_405/dcom/dcomMasterTx.c +++ b/src/occ_405/dcom/dcomMasterTx.c @@ -5,7 +5,7 @@ /* */ /* OpenPOWER OnChipController Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2016 */ +/* Contributors Listed Below - COPYRIGHT 2011,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -487,6 +487,8 @@ void task_dcom_tx_slv_inbox( task_t *i_self) // Description: transmit doorbells to slaves // from master // +// NOTE: runs at crit interrupt (adding traces will cause crash) +// // End Function Specification void dcom_tx_slv_inbox_doorbell( void ) { @@ -530,8 +532,6 @@ void dcom_tx_slv_inbox_doorbell( void ) l_delta : G_dcomTime.master.doorbellMaxDeltaTx; G_dcomTime.master.doorbellSeq = G_dcom_slv_inbox_doorbell_tx.magic_counter; G_dcomTime.master.doorbellNumSent++; - - DCOM_DBG("Sent multicast doorbell\n"); } #endif //_DCOMMASTERTOSLAVE_C diff --git a/src/occ_405/dimm/dimm.c b/src/occ_405/dimm/dimm.c index fb62d6b..496814b 100755 --- a/src/occ_405/dimm/dimm.c +++ b/src/occ_405/dimm/dimm.c @@ -44,6 +44,7 @@ #include "common.h" #include "memory.h" #include "centaur_data.h" +#include "amec_health.h" extern bool G_mem_monitoring_allowed; extern memory_control_task_t G_memory_control_task; @@ -308,7 +309,7 @@ void mark_dimm_failed() l_err = createErrl(DIMM_MID_MARK_DIMM_FAILED, DIMM_GPE_FAILURE, ERC_DIMM_COMPLETE_FAILURE, - ERRL_SEV_INFORMATIONAL, + ERRL_SEV_PREDICTIVE, NULL, DEFAULT_TRACE_SIZE, G_dimm_sm_args.error.rc, @@ -322,6 +323,10 @@ void mark_dimm_failed() ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.dimm_huids[port][dimm], ERRL_CALLOUT_PRIORITY_HIGH); + //Mark DIMM as logged so we don't log it again + amec_mem_mark_logged(port, dimm, + &G_cent_timeout_logged_bitmap, + &G_dimm_timeout_logged_bitmap.bytes[port]); commitErrl(&l_err); } diff --git a/src/occ_405/occ_sys_config.h b/src/occ_405/occ_sys_config.h index fb45635..d89833c 100755 --- a/src/occ_405/occ_sys_config.h +++ b/src/occ_405/occ_sys_config.h @@ -57,7 +57,6 @@ #define NUM_NIMBUS_MCAS (MAX_NUM_MCU_PORTS * NUM_NIMBUS_MC_PAIRS) #define NUM_DIMMS_PER_MEM_CONTROLLER 8 -#define NUM_I2C_PORTS 2 #define NUM_PROC_CHIPS_PER_OCC 1 #define NUM_CENTAURS_PER_MEM_CONTROLLER 1 diff --git a/src/occ_gpe1/gpe1.h b/src/occ_gpe1/gpe1.h index af26829..4ce0823 100755 --- a/src/occ_gpe1/gpe1.h +++ b/src/occ_gpe1/gpe1.h @@ -48,14 +48,6 @@ #define I2C_FIFO4_REG_READ 0x000A0012 -// I2C Status Reigster masks -#define STATUS_ERROR_MASK 0xFE80330000000000 -#define STATUS_ERROR_OR_COMPLETE_MASK 0xFF80330000000000 -#define STATUS_COMPLETE_MASK 0x0100000000000000 -#define PEEK_ERROR_MASK 0x00000000FC000000 -#define PEEK_MORE_DATA 0x0000000002000000 - - // Debug trace #ifdef GPE1_DEBUG #define GPE1_DIMM_DBG(frmt,args...) \ diff --git a/src/occ_gpe1/gpe1_dimm.h b/src/occ_gpe1/gpe1_dimm.h index c3249c6..b151f87 100755 --- a/src/occ_gpe1/gpe1_dimm.h +++ b/src/occ_gpe1/gpe1_dimm.h @@ -44,9 +44,9 @@ #define SCOM_ENGINE_OFFSET(engine) (engine << 12) -// I2C Status Reigster masks -#define STATUS_ERROR_MASK 0xFE80330000000000 -#define STATUS_ERROR_OR_COMPLETE_MASK 0xFF80330000000000 +// I2C Status Register masks +#define STATUS_ERROR_MASK 0xFC80000000000000 +#define STATUS_ERROR_OR_COMPLETE_MASK 0xFF80000000000000 #define STATUS_COMPLETE_MASK 0x0100000000000000 #define PEEK_ERROR_MASK 0x00000000FC000000 #define PEEK_MORE_DATA 0x0000000002000000 diff --git a/src/occ_gpe1/gpe1_dimm_control.c b/src/occ_gpe1/gpe1_dimm_control.c index 65ca4b6..a87a464 100644 --- a/src/occ_gpe1/gpe1_dimm_control.c +++ b/src/occ_gpe1/gpe1_dimm_control.c @@ -146,15 +146,16 @@ void gpe_reset_mem_deadman(ipc_msg_t* cmd, void* arg) ipc_async_cmd_t *async_cmd = (ipc_async_cmd_t*)cmd; reset_mem_deadman_args_t *args = (reset_mem_deadman_args_t*)async_cmd->cmd_data; - int mca = args->mca; // Nimbus MCA; mc_pair = mca >>2 and port = mca & 3 - args->error.error = 0; args->error.ffdc = 0; do { // read Deadman timer's SCOM Register for specified MCA (MC pair and port numbers) // @TODO: uncomment when deadman timer scom registers are definied in simics. RTC: 163713, RTC: 163934 - //rc = getscom_abs(DEADMAN_TIMER_MCA(mca), ®Value); +#if 0 + int mca = args->mca; // Nimbus MCA; mc_pair = mca >>2 and port = mca & 3 + + rc = getscom_abs(DEADMAN_TIMER_MCA(mca), ®Value); if(rc) { PK_TRACE("gpe_reset_mem_deadman: Deadman timer read failed" @@ -172,6 +173,7 @@ void gpe_reset_mem_deadman(ipc_msg_t* cmd, void* arg) mca, DEADMAN_TIMER_MCA(mca), regValue); } +#endif } while(0); // send back a response, IPC success even if ffdc/rc are non zeros diff --git a/src/occ_gpe1/gpe1_dimm_read.c b/src/occ_gpe1/gpe1_dimm_read.c index e0ee84c..86d80b6 100644 --- a/src/occ_gpe1/gpe1_dimm_read.c +++ b/src/occ_gpe1/gpe1_dimm_read.c @@ -370,10 +370,12 @@ void dimm_initiate_read(ipc_msg_t* cmd, void* arg) if ((regValue & STATUS_ERROR_OR_COMPLETE_MASK) == STATUS_COMPLETE_MASK) { // Status register indicates no errors and last command completed. - // Write the I2C command register with a 2 byte read request + // Write the I2C command register with a 2 byte read request. + // Since FIFO4 can read 4 bytes in one operation, we will do a read of 4 bytes + // and only look at first 2 bytes. (FIFO4 will hang if only try to read 2 bytes) scomAddr = I2C_COMMAND_REG | SCOM_ENGINE_OFFSET(args->i2cEngine); - // start+address+stop + slave_address, rw=1=read, length=2 - regValue = 0xD001000200000000; + // start+address+stop + slave_address, rw=1=read, length=4 + regValue = 0xD001000400000000; regValue |= ((uint64_t)args->i2cAddr << 48); rc = putscom_abs(scomAddr, regValue); if(rc) @@ -497,12 +499,7 @@ void dimm_read_temp(ipc_msg_t* cmd, void* arg) WORD_HIGH(regValue), WORD_LOW(regValue)); gpe_set_ffdc(&(args->error), scomAddr, GPE_RC_I2C_ERROR, regValue); } - else if (regValue & PEEK_MORE_DATA) - { - // The data_request bit is non-zero, but no more data is needed! - PK_TRACE("dimm_read_temp: Got data, but more data needs access??"); - gpe_set_ffdc(&(args->error), scomAddr, GPE_RC_NOT_COMPLETE, regValue); - } + // PEEK_MORE_DATA will be set because we only read 2 of the 4 bytes (ignore this bit) } } // else, all data not available yet (NOT_COMPLETE) |