summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xsrc/occ_405/amec/amec_data.c31
-rwxr-xr-xsrc/occ_405/amec/amec_health.c197
-rwxr-xr-xsrc/occ_405/amec/amec_health.h2
-rwxr-xr-xsrc/occ_405/amec/amec_service_codes.h40
-rwxr-xr-xsrc/occ_405/amec/amec_sys.h3
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds.c2
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c59
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h13
-rwxr-xr-xsrc/occ_405/main.c3
-rw-r--r--src/occ_405/occ_service_codes.h10
-rwxr-xr-xsrc/occ_405/occ_sys_config.h3
-rw-r--r--src/occ_405/pss/avsbus.c2
-rwxr-xr-xsrc/occ_405/sensor/sensor_enum.h1
-rwxr-xr-xsrc/occ_405/sensor/sensor_info.c35
-rwxr-xr-xsrc/occ_405/sensor/sensor_table.c2
15 files changed, 332 insertions, 71 deletions
diff --git a/src/occ_405/amec/amec_data.c b/src/occ_405/amec/amec_data.c
index 4c553d6..9294c1d 100755
--- a/src/occ_405/amec/amec_data.c
+++ b/src/occ_405/amec/amec_data.c
@@ -261,11 +261,40 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode)
TRAC_INFO("AMEC_data_write_thrm_thresholds: Setting %u as DVFS setpoint for DIMM",
l_dvfs_temp);
- g_amec->vrhotproc.setpoint = l_frudata[DATA_FRU_VRM].error_count;
+ g_amec->vrhotproc.setpoint = l_frudata[DATA_FRU_VRM_OT_STATUS].error_count;
TRAC_INFO("AMEC_data_write_thrm_thresholds: Setting %u as DVFS setpoint for VRHOT",
g_amec->vrhotproc.setpoint);
+ // Store the VRM Vdd thermal data
+ if(i_mode == OCC_MODE_NOMINAL)
+ {
+ l_dvfs_temp = l_frudata[DATA_FRU_VRM_VDD].dvfs;
+ l_error = l_frudata[DATA_FRU_VRM_VDD].error;
+ }
+ else
+ {
+ l_dvfs_temp = l_frudata[DATA_FRU_VRM_VDD].pm_dvfs;
+ if(i_mode == OCC_MODE_TURBO)
+ {
+ //Need to log an error if we dvfs in static turbo mode (for mfg)
+ l_error = l_dvfs_temp;
+ }
+ else
+ {
+ l_error = l_frudata[DATA_FRU_VRM_VDD].pm_error;
+ }
+ }
+ // Store the DVFS thermal setpoint in 0.1 degrees C
+ g_amec->thermalvdd.setpoint = l_dvfs_temp * 10;
+ // Store the error temperature for OT detection
+ g_amec->thermalvdd.ot_error = l_error;
+ // Store the temperature timeout value
+ g_amec->thermalvdd.temp_timeout = l_frudata[DATA_FRU_VRM_VDD].max_read_timeout;
+
+ TRAC_INFO("AMEC_data_write_thrm_thresholds: Setting %u as DVFS setpoint for VRM Vdd",
+ l_dvfs_temp);
+
} while(0);
return l_err;
diff --git a/src/occ_405/amec/amec_health.c b/src/occ_405/amec/amec_health.c
index 1d026d2..12c348d 100755
--- a/src/occ_405/amec/amec_health.c
+++ b/src/occ_405/amec/amec_health.c
@@ -67,6 +67,9 @@ uint8_t G_cent_temp_expired_bitmap = 0;
// Array to store the update tag of each core's temperature sensor
uint32_t G_core_temp_update_tag[MAX_NUM_CORES] = {0};
+// Reading VRM Vdd temperature timedout?
+bool G_vrm_vdd_temp_expired = false;
+
//*************************************************************************/
// Function Declarations
//*************************************************************************/
@@ -398,13 +401,13 @@ void amec_health_check_dimm_timeout()
* @reasoncode FRU_TEMP_TIMEOUT
* @userdata1 timeout value in seconds
* @userdata2 0
- * @userdata4 OCC_NO_EXTENDED_RC
+ * @userdata4 ERC_AMEC_DIMM_TEMP_TIMEOUT
* @devdesc Failed to read a memory DIMM temperature
*
*/
l_err = createErrl(AMEC_HEALTH_CHECK_DIMM_TIMEOUT, //modId
FRU_TEMP_TIMEOUT, //reasoncode
- OCC_NO_EXTENDED_RC, //Extended reason code
+ ERC_AMEC_DIMM_TEMP_TIMEOUT, //Extended reason code
ERRL_SEV_PREDICTIVE, //Severity
NULL, //Trace Buf
DEFAULT_TRACE_SIZE, //Trace Size
@@ -706,14 +709,14 @@ void amec_health_check_cent_timeout()
* @reasoncode FRU_TEMP_TIMEOUT
* @userdata1 timeout value in seconds
* @userdata2 0
- * @userdata4 OCC_NO_EXTENDED_RC
+ * @userdata4 ERC_AMEC_CENT_TEMP_TIMEOUT
* @devdesc Failed to read a centaur memory controller
* temperature
*
*/
l_err = createErrl(AMEC_HEALTH_CHECK_CENT_TIMEOUT, //modId
FRU_TEMP_TIMEOUT, //reasoncode
- OCC_NO_EXTENDED_RC, //Extended reason code
+ ERC_AMEC_CENT_TEMP_TIMEOUT, //Extended reason code
ERRL_SEV_PREDICTIVE, //Severity
NULL, //Trace Buf
DEFAULT_TRACE_SIZE, //Trace Size
@@ -999,6 +1002,192 @@ void amec_health_check_proc_timeout()
}while(0);
}
+// Function Specification
+//
+// Name: amec_health_check_vrm_vdd_temp
+//
+// Description: This function checks if the VRM Vdd temperature has
+// exceeded the error temperature sent in data format 0x13.
+//
+// End Function Specification
+void amec_health_check_vrm_vdd_temp()
+{
+ /*------------------------------------------------------------------------*/
+ /* Local Variables */
+ /*------------------------------------------------------------------------*/
+ uint16_t l_ot_error;
+ static uint32_t L_error_count = 0;
+ static BOOLEAN L_ot_error_logged = FALSE;
+ sensor_t *l_sensor;
+ errlHndl_t l_err = NULL;
+
+ /*------------------------------------------------------------------------*/
+ /* Code */
+ /*------------------------------------------------------------------------*/
+ do
+ {
+ // Get TEMPVDD sensor
+ l_sensor = getSensorByGsid(TEMPVDD);
+ l_ot_error = g_amec->thermalvdd.ot_error;
+
+ // Check to see if we exceeded our error temperature
+ if (l_sensor->sample > l_ot_error)
+ {
+ // Increment the error counter for this FRU
+ L_error_count++;
+
+ // Trace and log error the first time this occurs
+ if (L_error_count == AMEC_HEALTH_ERROR_TIMER)
+ {
+ // Have we logged an OT error for this FRU already?
+ if (L_ot_error_logged == TRUE)
+ {
+ break;
+ }
+
+ L_ot_error_logged = TRUE;
+
+ TRAC_ERR("amec_health_check_vrm_vdd_temp: VRM vdd has exceeded OT error! temp[%u] ot_error[%u]",
+ l_sensor->sample,
+ l_ot_error);
+
+ // Log an OT error
+ /* @
+ * @errortype
+ * @moduleid AMEC_HEALTH_CHECK_VRM_VDD_TEMP
+ * @reasoncode VRM_VDD_ERROR_TEMP
+ * @userdata1 0
+ * @userdata2 Fru peak temperature sensor
+ * @devdesc VRM Vdd has reached error temperature
+ * threshold and is called out in this error log.
+ *
+ */
+ l_err = createErrl(AMEC_HEALTH_CHECK_VRM_VDD_TEMP,
+ VRM_VDD_ERROR_TEMP,
+ ERC_AMEC_PROC_ERROR_OVER_TEMPERATURE,
+ ERRL_SEV_PREDICTIVE,
+ NULL,
+ DEFAULT_TRACE_SIZE,
+ 0,
+ l_sensor->sample_max);
+
+ // Callout the Ambient procedure
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_OVER_TEMPERATURE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+
+ // Callout VRM Vdd
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.vrm_vdd_huid,
+ ERRL_CALLOUT_PRIORITY_MED);
+
+ // Commit Error
+ commitErrl(&l_err);
+ }
+ }
+ else
+ {
+ // Trace that we have now dropped below the error threshold
+ if (L_error_count >= AMEC_HEALTH_ERROR_TIMER)
+ {
+ TRAC_INFO("amec_health_check_vrm_vdd_temp: VRM Vdd temp [%u] now below error temp [%u] after error_count [%u]",
+ l_sensor->sample, l_ot_error, L_error_count);
+ }
+
+ // Reset the error counter for this FRU
+ L_error_count = 0;
+ }
+ }while (0);
+
+}
+
+// Function Specification
+//
+// Name: amec_health_check_vrm_vdd_temp_timeout
+//
+// Description: This function checks if OCC has failed to read the VRM Vdd
+// temperature and if it has exceeded the maximum allowed number of retries.
+//
+// End Function Specification
+void amec_health_check_vrm_vdd_temp_timeout()
+{
+ /*------------------------------------------------------------------------*/
+ /* Local Variables */
+ /*------------------------------------------------------------------------*/
+ errlHndl_t l_err = NULL;
+ uint32_t l_update_tag = 0;
+ static uint32_t L_read_fail_cnt = 0;
+ static BOOLEAN L_error_logged = FALSE;
+ static uint32_t L_vdd_temp_update_tag = 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Code */
+ /*------------------------------------------------------------------------*/
+
+ // Check if VRM Vdd temperature sensor has been updated by checking the sensor update tag
+ // If the update tag is not changing, then temperature sensor is not being updated.
+ l_update_tag = AMECSENSOR_PTR(TEMPVDD)->update_tag;
+ if (l_update_tag != L_vdd_temp_update_tag)
+ {
+ // We were able to read VRM Vdd temperature
+ L_read_fail_cnt = 0;
+ G_vrm_vdd_temp_expired = false;
+ L_vdd_temp_update_tag = l_update_tag;
+ }
+ else
+ {
+ // Failed to read VRM Vdd temperature sensor
+ L_read_fail_cnt++;
+
+ // Check if we have reached the maximum read time allowed
+ if((L_read_fail_cnt == g_amec->thermalvdd.temp_timeout) &&
+ (g_amec->thermalvdd.temp_timeout != 0xFF))
+ {
+ //temperature has expired. Notify control algorithms
+ G_vrm_vdd_temp_expired = true;
+
+ // Log error one time
+ if (L_error_logged == FALSE)
+ {
+ L_error_logged = TRUE;
+
+ TRAC_ERR("Timed out reading VRM Vdd temperature for timeout[%u]",
+ g_amec->thermalvdd.temp_timeout);
+
+ /* @
+ * @errortype
+ * @moduleid AMEC_HEALTH_CHECK_VRM_VDD_TIMEOUT
+ * @reasoncode FRU_TEMP_TIMEOUT
+ * @userdata1 timeout value in seconds
+ * @userdata2 0
+ * @userdata4 ERC_AMEC_VRM_VDD_TEMP_TIMEOUT
+ * @devdesc Failed to read VRM Vdd temperature.
+ *
+ */
+ l_err = createErrl(AMEC_HEALTH_CHECK_VRM_VDD_TIMEOUT, //modId
+ FRU_TEMP_TIMEOUT, //reasoncode
+ ERC_AMEC_VRM_VDD_TEMP_TIMEOUT, //Extended reason code
+ ERRL_SEV_PREDICTIVE, //Severity
+ NULL, //Trace Buf
+ DEFAULT_TRACE_SIZE, //Trace Size
+ g_amec->thermalvdd.temp_timeout, //userdata1
+ 0); //userdata2
+
+ // Callout the VRM
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.vrm_vdd_huid,
+ ERRL_CALLOUT_PRIORITY_MED);
+
+ // Commit error log and request reset
+ REQUEST_RESET(l_err);
+ }
+ } // if reached timeout
+ } // else failed to read temp
+}
+
/*----------------------------------------------------------------------------*/
/* End */
/*----------------------------------------------------------------------------*/
diff --git a/src/occ_405/amec/amec_health.h b/src/occ_405/amec/amec_health.h
index 11d8fb0..7992f26 100755
--- a/src/occ_405/amec/amec_health.h
+++ b/src/occ_405/amec/amec_health.h
@@ -51,5 +51,7 @@ void amec_mem_mark_logged(uint8_t i_cent,
uint8_t i_dimm,
uint8_t* i_clog_bitmap,
uint8_t* i_dlog_bitmap);
+void amec_health_check_vrm_vdd_temp(void);
+void amec_health_check_vrm_vdd_temp_timeout(void);
#endif
diff --git a/src/occ_405/amec/amec_service_codes.h b/src/occ_405/amec/amec_service_codes.h
index f206daf..8c87e5f 100755
--- a/src/occ_405/amec/amec_service_codes.h
+++ b/src/occ_405/amec/amec_service_codes.h
@@ -48,25 +48,27 @@
/*----------------------------------------------------------------------------*/
enum occAmecModuleId
{
- AMEC_INITIALIZE_FW_SENSORS = AMEC_COMP_ID | 0x00,
- AMEC_UPDATE_FW_SENSORS = AMEC_COMP_ID | 0x01,
- AMEC_VECTORIZE_FW_SENSORS = AMEC_COMP_ID | 0x02,
- AMEC_AMESTER_INTERFACE = AMEC_COMP_ID | 0x03,
- AMEC_PCAP_CONN_OC_CONTROLLER = AMEC_COMP_ID | 0x04,
- AMEC_MST_CHECK_PCAPS_MATCH = AMEC_COMP_ID | 0x05,
- AMEC_MST_CHECK_UNDER_PCAP = AMEC_COMP_ID | 0x06,
- AMEC_SLAVE_CHECK_PERFORMANCE = AMEC_COMP_ID | 0x07,
- AMEC_HEALTH_CHECK_PROC_TEMP = AMEC_COMP_ID | 0x08,
- AMEC_HEALTH_CHECK_DIMM_TEMP = AMEC_COMP_ID | 0x09,
- AMEC_HEALTH_CHECK_CENT_TEMP = AMEC_COMP_ID | 0x10,
- AMEC_HEALTH_CHECK_DIMM_TIMEOUT = AMEC_COMP_ID | 0x11,
- AMEC_HEALTH_CHECK_CENT_TIMEOUT = AMEC_COMP_ID | 0x12,
- AMEC_HEALTH_CHECK_VRFAN_TIMEOUT = AMEC_COMP_ID | 0x13,
- AMEC_HEALTH_CHECK_PROC_TIMEOUT = AMEC_COMP_ID | 0x14,
- AMEC_CALC_DTS_SENSORS = AMEC_COMP_ID | 0x16,
- AMEC_SET_FREQ_RANGE = AMEC_COMP_ID | 0x17,
- AMEC_UPDATE_APSS_GPIO = AMEC_COMP_ID | 0x18,
- AMEC_GPU_PCAP_MID = AMEC_COMP_ID | 0x19,
+ AMEC_INITIALIZE_FW_SENSORS = AMEC_COMP_ID | 0x00,
+ AMEC_UPDATE_FW_SENSORS = AMEC_COMP_ID | 0x01,
+ AMEC_VECTORIZE_FW_SENSORS = AMEC_COMP_ID | 0x02,
+ AMEC_AMESTER_INTERFACE = AMEC_COMP_ID | 0x03,
+ AMEC_PCAP_CONN_OC_CONTROLLER = AMEC_COMP_ID | 0x04,
+ AMEC_MST_CHECK_PCAPS_MATCH = AMEC_COMP_ID | 0x05,
+ AMEC_MST_CHECK_UNDER_PCAP = AMEC_COMP_ID | 0x06,
+ AMEC_SLAVE_CHECK_PERFORMANCE = AMEC_COMP_ID | 0x07,
+ AMEC_HEALTH_CHECK_PROC_TEMP = AMEC_COMP_ID | 0x08,
+ AMEC_HEALTH_CHECK_DIMM_TEMP = AMEC_COMP_ID | 0x09,
+ AMEC_HEALTH_CHECK_CENT_TEMP = AMEC_COMP_ID | 0x10,
+ AMEC_HEALTH_CHECK_DIMM_TIMEOUT = AMEC_COMP_ID | 0x11,
+ AMEC_HEALTH_CHECK_CENT_TIMEOUT = AMEC_COMP_ID | 0x12,
+ AMEC_HEALTH_CHECK_VRFAN_TIMEOUT = AMEC_COMP_ID | 0x13,
+ AMEC_HEALTH_CHECK_PROC_TIMEOUT = AMEC_COMP_ID | 0x14,
+ AMEC_CALC_DTS_SENSORS = AMEC_COMP_ID | 0x16,
+ AMEC_SET_FREQ_RANGE = AMEC_COMP_ID | 0x17,
+ AMEC_UPDATE_APSS_GPIO = AMEC_COMP_ID | 0x18,
+ AMEC_GPU_PCAP_MID = AMEC_COMP_ID | 0x19,
+ AMEC_HEALTH_CHECK_VRM_VDD_TEMP = AMEC_COMP_ID | 0x1A,
+ AMEC_HEALTH_CHECK_VRM_VDD_TIMEOUT = AMEC_COMP_ID | 0x1B,
};
/*----------------------------------------------------------------------------*/
diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h
index 3f1d333..e86a000 100755
--- a/src/occ_405/amec/amec_sys.h
+++ b/src/occ_405/amec/amec_sys.h
@@ -362,6 +362,7 @@ typedef struct
sensor_t vrhot_mem_proc;
sensor_t vrfan;
+ sensor_t tempvdd;
// Chip Sensors
sensor_t todclock0;
@@ -687,6 +688,8 @@ typedef struct
amec_controller_t thermaldimm;
// Thermal Controller based on VRHOT signal from processor VRM
amec_controller_t vrhotproc;
+ // Thermal Controller based on VRM Vdd temperatures
+ amec_controller_t thermalvdd;
// Oversubscription Status
oversub_status_t oversub_status;
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds.c b/src/occ_405/cmdh/cmdh_fsp_cmds.c
index 422dc38..28fe8bb 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds.c
@@ -349,7 +349,7 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
if (vrfan != NULL)
{
l_tempSensorList[l_sensorHeader.count].id = 0;
- l_tempSensorList[l_sensorHeader.count].fru_type = DATA_FRU_VRM;
+ l_tempSensorList[l_sensorHeader.count].fru_type = DATA_FRU_VRM_OT_STATUS;
l_tempSensorList[l_sensorHeader.count].value = vrfan->sample & 0xFF;
l_sensorHeader.count++;
}
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
index c35f1b7..5982e7f 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
@@ -57,6 +57,7 @@
#define DATA_PCAP_VERSION_20 0x20
#define DATA_SYS_VERSION_20 0x20
+#define DATA_SYS_VERSION_21 0x21
#define DATA_APSS_VERSION20 0x20
@@ -1565,7 +1566,7 @@ errlHndl_t data_store_sys_config(const cmdh_fsp_cmd_t * i_cmd_ptr,
errlHndl_t l_err = NULL;
// Cast the command to the struct for this format
- cmdh_sys_config_v20_t * l_cmd_ptr = (cmdh_sys_config_v20_t *)i_cmd_ptr;
+ cmdh_sys_config_v21_t * l_cmd_ptr = (cmdh_sys_config_v21_t *)i_cmd_ptr;
uint16_t l_data_length = 0;
uint32_t l_sys_data_sz = 0;
bool l_invalid_input = TRUE; //Assume bad input
@@ -1582,6 +1583,14 @@ errlHndl_t data_store_sys_config(const cmdh_fsp_cmd_t * i_cmd_ptr,
l_invalid_input = FALSE;
}
}
+ else if(l_cmd_ptr->version == DATA_SYS_VERSION_21)
+ {
+ l_sys_data_sz = sizeof(cmdh_sys_config_v21_t) - sizeof(cmdh_fsp_cmd_header_t);
+ if(l_sys_data_sz == l_data_length)
+ {
+ l_invalid_input = FALSE;
+ }
+ }
if(l_invalid_input)
{
@@ -1613,28 +1622,32 @@ errlHndl_t data_store_sys_config(const cmdh_fsp_cmd_t * i_cmd_ptr,
ERRL_COMPONENT_ID_FIRMWARE,
ERRL_CALLOUT_PRIORITY_HIGH);
}
- else
+ else // version and length is valid, store the data
{
- if(l_cmd_ptr->version == DATA_SYS_VERSION_20)
+ // Copy data that is common to all versions
+ G_sysConfigData.system_type.byte = l_cmd_ptr->sys_config.system_type;
+ G_sysConfigData.backplane_huid = l_cmd_ptr->sys_config.backplane_sid;
+ G_sysConfigData.apss_huid = l_cmd_ptr->sys_config.apss_sid;
+ G_sysConfigData.proc_huid = l_cmd_ptr->sys_config.proc_sid;
+ CNFG_DBG("data_store_sys_config: SystemType[0x%02X] BPSID[0x%08X] APSSSID[0x%08X] ProcSID[0x%08X]",
+ G_sysConfigData.system_type.byte, G_sysConfigData.backplane_huid, G_sysConfigData.apss_huid,
+ G_sysConfigData.proc_huid);
+
+ //Write core temp and freq sensor ids
+ //Core Temp and Freq sensors are always in sequence in the table
+ for (l_coreIndex = 0; l_coreIndex < MAX_CORES; l_coreIndex++)
{
- // Copy data
- G_sysConfigData.system_type.byte = l_cmd_ptr->sys_config.system_type;
- G_sysConfigData.backplane_huid = l_cmd_ptr->sys_config.backplane_sid;
- G_sysConfigData.apss_huid = l_cmd_ptr->sys_config.apss_sid;
- G_sysConfigData.proc_huid = l_cmd_ptr->sys_config.proc_sid;
- CNFG_DBG("data_store_sys_config: SystemType[0x%02X] BPSID[0x%08X] APSSSID[0x%08X] ProcSID[0x%08X]",
- G_sysConfigData.system_type.byte, G_sysConfigData.backplane_huid, G_sysConfigData.apss_huid,
- G_sysConfigData.proc_huid);
-
- //Write core temp and freq sensor ids
- //Core Temp and Freq sensors are always in sequence in the table
- for (l_coreIndex = 0; l_coreIndex < MAX_CORES; l_coreIndex++)
- {
- AMECSENSOR_PTR(TEMPPROCTHRMC0 + l_coreIndex)->ipmi_sid = l_cmd_ptr->sys_config.core_sid[(l_coreIndex * 2)];
- AMECSENSOR_PTR(FREQAC0 + l_coreIndex)->ipmi_sid = l_cmd_ptr->sys_config.core_sid[(l_coreIndex * 2) + 1];
- CNFG_DBG("data_store_sys_config: Core[%d] TempSID[0x%08X] FreqSID[0x%08X]", l_coreIndex,
- AMECSENSOR_PTR(TEMPPROCTHRMC0 + l_coreIndex)->ipmi_sid, AMECSENSOR_PTR(FREQAC0 + l_coreIndex)->ipmi_sid);
- }
+ AMECSENSOR_PTR(TEMPPROCTHRMC0 + l_coreIndex)->ipmi_sid = l_cmd_ptr->sys_config.core_sid[(l_coreIndex * 2)];
+ AMECSENSOR_PTR(FREQAC0 + l_coreIndex)->ipmi_sid = l_cmd_ptr->sys_config.core_sid[(l_coreIndex * 2) + 1];
+ CNFG_DBG("data_store_sys_config: Core[%d] TempSID[0x%08X] FreqSID[0x%08X]", l_coreIndex,
+ AMECSENSOR_PTR(TEMPPROCTHRMC0 + l_coreIndex)->ipmi_sid, AMECSENSOR_PTR(FREQAC0 + l_coreIndex)->ipmi_sid);
+ }
+
+ if(l_cmd_ptr->version == DATA_SYS_VERSION_21)
+ {
+ // Copy the additional data for version 21
+ G_sysConfigData.vrm_vdd_huid = l_cmd_ptr->vrm_vdd_sid;
+ AMECSENSOR_PTR(TEMPVDD)->ipmi_sid = l_cmd_ptr->vrm_vdd_temp_sid;
}
// Change Data Request Mask to indicate we got this data
@@ -1723,7 +1736,7 @@ errlHndl_t data_store_thrm_thresholds(const cmdh_fsp_cmd_t * i_cmd_ptr,
l_cmd_ptr->data[i].max_read_timeout;
// Set a local flag if we get data for VRM FRU type
- if(l_frutype == DATA_FRU_VRM)
+ if(l_frutype == DATA_FRU_VRM_OT_STATUS)
{
l_vrm_frutype = TRUE;
}
@@ -1759,7 +1772,7 @@ errlHndl_t data_store_thrm_thresholds(const cmdh_fsp_cmd_t * i_cmd_ptr,
// Also, make the error count very high so that the health
// monitor doesn't complain about VRHOT being asserted.
G_vrm_thermal_monitoring = FALSE;
- G_data_cnfg->thrm_thresh.data[DATA_FRU_VRM].error_count = 0xFF;
+ G_data_cnfg->thrm_thresh.data[DATA_FRU_VRM_OT_STATUS].error_count = 0xFF;
CMDH_TRAC_IMP("data_store_thrm_thresholds: No VRM limits received. OCC will not monitor AVS bus status");
}
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h
index fcb4893..dbeb768 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h
@@ -76,9 +76,10 @@ typedef enum
DATA_FRU_PROC = 0x00,
DATA_FRU_CENTAUR = 0x01,
DATA_FRU_DIMM = 0x02,
- DATA_FRU_VRM = 0x03,
+ DATA_FRU_VRM_OT_STATUS = 0x03, // this is just a bit indicating OT or not
DATA_FRU_GPU = 0x04,
DATA_FRU_GPU_MEM = 0x05,
+ DATA_FRU_VRM_VDD = 0x06, // this is an actual temperature reading for VRM Vdd
DATA_FRU_MAX,
} eConfigDataFruType;
@@ -218,6 +219,16 @@ typedef struct __attribute__ ((packed))
cmdh_sys_config_data_v20_t sys_config;
}cmdh_sys_config_v20_t;
+typedef struct __attribute__ ((packed))
+{
+ struct cmdh_fsp_cmd_header;
+ uint8_t format;
+ uint8_t version;
+ cmdh_sys_config_data_v20_t sys_config;
+ uint32_t vrm_vdd_sid; // VRM Vdd Sensor ID for hw callout
+ uint32_t vrm_vdd_temp_sid; // VRM Vdd Temperature sensor ID
+}cmdh_sys_config_v21_t;
+
// Used by TMGT to send OCC the IPS config data.
typedef struct __attribute__ ((packed))
{
diff --git a/src/occ_405/main.c b/src/occ_405/main.c
index ad09280..f80486e 100755
--- a/src/occ_405/main.c
+++ b/src/occ_405/main.c
@@ -1343,10 +1343,11 @@ void hmon_routine()
}
//if we are in observation, characterization, or activate state, then monitor the processor
- //temperature for timeout conditions and the processor VRHOT signal.
+ //and VRM Vdd temperatures for timeout conditions
if (IS_OCC_STATE_OBSERVATION() || IS_OCC_STATE_ACTIVE() || IS_OCC_STATE_CHARACTERIZATION())
{
amec_health_check_proc_timeout();
+// enable with VRM Vdd read support amec_health_check_vrm_vdd_temp_timeout();
}
//if we are in observation, characterization, or active state with memory temperature data
diff --git a/src/occ_405/occ_service_codes.h b/src/occ_405/occ_service_codes.h
index c740c05..fd0fbec 100644
--- a/src/occ_405/occ_service_codes.h
+++ b/src/occ_405/occ_service_codes.h
@@ -61,8 +61,8 @@ enum occReasonCode
EXTERNAL_INTERFACE_FAILURE = 0x18,
/// Incorrect number of active quads reported
INVALID_ACTIVE_QUAD_COUNT = 0x19,
- /// VRM reached error threshold (VR_HOT asserted)
- VRM_ERROR_TEMP = 0x20,
+ /// VRM Vdd reached error temperature threshold
+ VRM_VDD_ERROR_TEMP = 0x20,
/// VR_FAN - AVS Bus over-temperature reported
VRM_VRFAN_WARNING = 0x22,
/// GPIO_VR_HOT_MEM_PROC signal from APSS asserted
@@ -210,9 +210,9 @@ enum occExtReasonCode
ERC_AMEC_PROC_ERROR_OVER_TEMPERATURE = 0x002F,
- ERC_APLT_INIT_FAILURE = 0x0030,
- ERC_APLT_START_VERSION_MISMATCH = 0x0031,
- ERC_APLT_START_CHECKSUM_MISMATCH = 0x0032,
+ ERC_AMEC_VRM_VDD_TEMP_TIMEOUT = 0x0030,
+ ERC_AMEC_DIMM_TEMP_TIMEOUT = 0x0031,
+ ERC_AMEC_CENT_TEMP_TIMEOUT = 0x0032,
ERC_CMDH_MBOX_REQST_FAILURE = 0x0040,
ERC_CMDH_INTERNAL_FAILURE = 0x0041,
diff --git a/src/occ_405/occ_sys_config.h b/src/occ_405/occ_sys_config.h
index 9e5afcd..e398a6c 100755
--- a/src/occ_405/occ_sys_config.h
+++ b/src/occ_405/occ_sys_config.h
@@ -340,6 +340,9 @@ typedef struct
// DPSS HUID - Used by OCC for DPSS error call out
uint32_t dpss_huid;
+ // VRM Vdd HUID - Used by OCC for VRM Vdd error call out
+ uint32_t vrm_vdd_huid;
+
// Contains how many OCCs & how many proc modules are present.
uint8_t sys_num_proc_present;
diff --git a/src/occ_405/pss/avsbus.c b/src/occ_405/pss/avsbus.c
index d6252c5..7c155f9 100644
--- a/src/occ_405/pss/avsbus.c
+++ b/src/occ_405/pss/avsbus.c
@@ -768,7 +768,7 @@ uint16_t avsbus_read_status(const avsbus_type_e i_type)
uint16_t o_reading = 0;
bool l_failure = FALSE;
- const uint8_t max_read_attempts = G_data_cnfg->thrm_thresh.data[DATA_FRU_VRM].max_read_timeout;
+ const uint8_t max_read_attempts = G_data_cnfg->thrm_thresh.data[DATA_FRU_VRM_OT_STATUS].max_read_timeout;
// Static error counters for each type (Vdd/Vdn)
static uint32_t L_error_count[ERRORCOUNT_MAXTYPES] = {0};
diff --git a/src/occ_405/sensor/sensor_enum.h b/src/occ_405/sensor/sensor_enum.h
index f3fe743..433530e 100755
--- a/src/occ_405/sensor/sensor_enum.h
+++ b/src/occ_405/sensor/sensor_enum.h
@@ -145,6 +145,7 @@ enum e_gsid
CURVDD,
CURVDN,
VRMPROCOT,
+ TEMPVDD,
// ------------------------------------------------------
// Core Sensors
diff --git a/src/occ_405/sensor/sensor_info.c b/src/occ_405/sensor/sensor_info.c
index 23bda41..06f3ff1 100755
--- a/src/occ_405/sensor/sensor_info.c
+++ b/src/occ_405/sensor/sensor_info.c
@@ -29,7 +29,9 @@
#define AMEEFP_250US_IN_HZ AMEFP(4,3) // 4000 Hz
#define AMEEFP_500US_IN_HZ AMEFP(2,3) // 2000 Hz
#define AMEEFP_1MS_IN_HZ AMEFP(1,3) // 1000 Hz
+#define AMEEFP_1500US_IN_HZ AMEFP(75,1) // 750 Hz
#define AMEEFP_2MS_IN_HZ AMEFP(5,2) // 500 Hz
+#define AMEEFP_3MS_IN_HZ AMEFP(375,0) // 375 Hz
#define AMEEFP_4MS_IN_HZ AMEFP(25,1) // 250 Hz
#define AMEEFP_8MS_IN_HZ AMEFP(125,0) // 125 Hz
#define AMEEFP_16MS_IN_HZ AMEFP(625,-1) // 62.5 Hz
@@ -40,14 +42,16 @@
#define AMEFP_SCALE_0_16384 AMEFP(610352,-8) // scalar so that digital 16384=100%
// constants to allow fewer changes if tick time changes
-#define AMEEFP_EVERY_TICK_HZ AMEEFP_500US_IN_HZ // tick time 500us
-#define AMEEFP_EVERY_2ND_TICK_HZ AMEEFP_1MS_IN_HZ // 1ms
-#define AMEEFP_EVERY_4TH_TICK_HZ AMEEFP_2MS_IN_HZ // 2ms
-#define AMEEFP_EVERY_8TH_TICK_HZ AMEEFP_4MS_IN_HZ // 4ms
-#define AMEEFP_EVERY_16TH_TICK_HZ AMEEFP_8MS_IN_HZ // 8ms
-#define AMEEFP_EVERY_32ND_TICK_HZ AMEEFP_16MS_IN_HZ // 16ms
-#define AMEEFP_EVERY_64TH_TICK_HZ AMEEFP_32MS_IN_HZ // 32ms
-#define AMEEFP_EVERY_128TH_TICK_HZ AMEEFP_64MS_IN_HZ // 64ms
+#define AMEEFP_EVERY_TICK_HZ AMEEFP_500US_IN_HZ // tick time 500us
+#define AMEEFP_EVERY_2ND_TICK_HZ AMEEFP_1MS_IN_HZ // 1ms
+#define AMEEFP_EVERY_3RD_TICK_HZ AMEEFP_1500US_IN_HZ // 1.5ms
+#define AMEEFP_EVERY_4TH_TICK_HZ AMEEFP_2MS_IN_HZ // 2ms
+#define AMEEFP_EVERY_6TH_TICK_HZ AMEEFP_3MS_IN_HZ // 3ms
+#define AMEEFP_EVERY_8TH_TICK_HZ AMEEFP_4MS_IN_HZ // 4ms
+#define AMEEFP_EVERY_16TH_TICK_HZ AMEEFP_8MS_IN_HZ // 8ms
+#define AMEEFP_EVERY_32ND_TICK_HZ AMEEFP_16MS_IN_HZ // 16ms
+#define AMEEFP_EVERY_64TH_TICK_HZ AMEEFP_32MS_IN_HZ // 32ms
+#define AMEEFP_EVERY_128TH_TICK_HZ AMEEFP_64MS_IN_HZ // 64ms
// This will get the string when given the GSID
#define SENSOR_GSID_TO_STRING(gsid) G_sensor_list[gsid].name;
@@ -320,8 +324,8 @@ const sensor_info_t G_sensor_info[] =
SENSOR_INFO_T_ENTRY( TEMPPROCTHRM, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_16TH_TICK_HZ, AMEFP( 1, 0) ),
SENSOR_INFO_T_ENTRY( UTIL, "%\0", AMEC_SENSOR_TYPE_UTIL, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_16TH_TICK_HZ, AMEFP( 1,-2) ),
SENSOR_INFO_T_ENTRY( TEMPNEST, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_16TH_TICK_HZ, AMEFP( 1, 0) ),
- SENSOR_INFO_T_ENTRY( VOLTVDDSENSE, "mV\0", AMEC_SENSOR_TYPE_VOLTAGE, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_2ND_TICK_HZ, AMEFP( 1, -1) ),
- SENSOR_INFO_T_ENTRY( VOLTVDNSENSE, "mV\0", AMEC_SENSOR_TYPE_VOLTAGE, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_2ND_TICK_HZ, AMEFP( 1, -1) ),
+ SENSOR_INFO_T_ENTRY( VOLTVDDSENSE, "mV\0", AMEC_SENSOR_TYPE_VOLTAGE, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_3RD_TICK_HZ, AMEFP( 1, -1) ),
+ SENSOR_INFO_T_ENTRY( VOLTVDNSENSE, "mV\0", AMEC_SENSOR_TYPE_VOLTAGE, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_3RD_TICK_HZ, AMEFP( 1, -1) ),
SENSOR_INFO_T_ENTRY( PWRVDD, "W\0", AMEC_SENSOR_TYPE_POWER, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_2ND_TICK_HZ, AMEFP( 1, 0) ),
SENSOR_INFO_T_ENTRY( PWRVDN, "W\0", AMEC_SENSOR_TYPE_POWER, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_2ND_TICK_HZ, AMEFP( 1, 0) ),
SENSOR_INFO_T_ENTRY( PROCPWRTHROT, "#\0", AMEC_SENSOR_TYPE_PERF, AMEC_SENSOR_LOC_PROC, AMEC_SENSOR_NONUM, AMEEFP_EVERY_TICK_HZ, AMEFP( 1, 0) ),
@@ -331,11 +335,12 @@ const sensor_info_t G_sensor_info[] =
SENS_QUAD_ENTRY_SET( VOLTDROOPCNTQ, "#\0", AMEC_SENSOR_TYPE_VOLTAGE, AMEC_SENSOR_LOC_QUAD, AMEC_SENSOR_NONUM, AMEEFP_EVERY_16TH_TICK_HZ, AMEFP( 1, 0) ),
/* ==ReguSensors== NameString Units Type Location Number Freq ScaleFactor */
- SENSOR_INFO_T_ENTRY( VOLTVDD, "mV\0", AMEC_SENSOR_TYPE_VOLTAGE, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_2ND_TICK_HZ, AMEFP( 1, -1) ),
- SENSOR_INFO_T_ENTRY( VOLTVDN, "mV\0", AMEC_SENSOR_TYPE_VOLTAGE, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_2ND_TICK_HZ, AMEFP( 1, -1) ),
- SENSOR_INFO_T_ENTRY( CURVDD, "A\0", AMEC_SENSOR_TYPE_CURRENT, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_2ND_TICK_HZ, AMEFP( 1,-2) ),
- SENSOR_INFO_T_ENTRY( CURVDN, "A\0", AMEC_SENSOR_TYPE_CURRENT, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_2ND_TICK_HZ, AMEFP( 1,-2) ),
- SENSOR_INFO_T_ENTRY( VRMPROCOT, "#\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_4TH_TICK_HZ, AMEFP( 1, 0) ),
+ SENSOR_INFO_T_ENTRY( VOLTVDD, "mV\0", AMEC_SENSOR_TYPE_VOLTAGE, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_3RD_TICK_HZ, AMEFP( 1, -1) ),
+ SENSOR_INFO_T_ENTRY( VOLTVDN, "mV\0", AMEC_SENSOR_TYPE_VOLTAGE, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_3RD_TICK_HZ, AMEFP( 1, -1) ),
+ SENSOR_INFO_T_ENTRY( CURVDD, "A\0", AMEC_SENSOR_TYPE_CURRENT, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_3RD_TICK_HZ, AMEFP( 1,-2) ),
+ SENSOR_INFO_T_ENTRY( CURVDN, "A\0", AMEC_SENSOR_TYPE_CURRENT, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_3RD_TICK_HZ, AMEFP( 1,-2) ),
+ SENSOR_INFO_T_ENTRY( VRMPROCOT, "#\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_6TH_TICK_HZ, AMEFP( 1, 0) ),
+ SENSOR_INFO_T_ENTRY( TEMPVDD, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_VRM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_6TH_TICK_HZ, AMEFP( 1, 0) ),
/* ==CoreSensors== NameString Units Type Location Number Freq ScaleFactor */
SENS_CORE_ENTRY_SET( FREQREQC, "MHz\0", AMEC_SENSOR_TYPE_FREQ, AMEC_SENSOR_LOC_CORE, AMEC_SENSOR_NONUM, AMEEFP_EVERY_TICK_HZ, AMEFP( 1, 0) ),
diff --git a/src/occ_405/sensor/sensor_table.c b/src/occ_405/sensor/sensor_table.c
index c2dabdb..1b7aef0 100755
--- a/src/occ_405/sensor/sensor_table.c
+++ b/src/occ_405/sensor/sensor_table.c
@@ -377,6 +377,7 @@ const sensor_ptr_t G_amec_sensor_list[] =
SENSOR_PTR( CURVDD, &g_amec_sys.proc[0].curvdd),
SENSOR_PTR( CURVDN, &g_amec_sys.proc[0].curvdn),
SENSOR_PTR( VRMPROCOT, &g_amec_sys.sys.vrfan),
+ SENSOR_PTR( TEMPVDD, &g_amec_sys.sys.tempvdd),
// ------------------------------------------------------
@@ -558,6 +559,7 @@ const minisensor_ptr_t G_amec_mini_sensor_list[] INIT_SECTION =
MINI_SENSOR_PTR( CURVDD, NULL),
MINI_SENSOR_PTR( CURVDN, NULL),
MINI_SENSOR_PTR( VRMPROCOT, NULL),
+ MINI_SENSOR_PTR( TEMPVDD, NULL),
// ------------------------------------------------------
// Core Sensors (24 of each)
OpenPOWER on IntegriCloud