summaryrefslogtreecommitdiffstats
path: root/src/occ_405
diff options
context:
space:
mode:
authorChris Cain <cjcain@us.ibm.com>2018-03-27 17:50:57 -0500
committerChristopher J. Cain <cjcain@us.ibm.com>2018-03-28 14:43:25 -0400
commitb3a2f75d837fd671f13dacb2464c36a5fc8fc69d (patch)
treeaf9d67a1bac2b5fce6b81030ab7c0ba98bea4490 /src/occ_405
parentbd605ba0a030b3490f0edebd8fb704722b6eab0d (diff)
downloadtalos-occ-b3a2f75d837fd671f13dacb2464c36a5fc8fc69d.tar.gz
talos-occ-b3a2f75d837fd671f13dacb2464c36a5fc8fc69d.zip
Fix DIMM overtemp bitmap and and trace updates
Change-Id: Ia0f998573316280f253eb3bc495f5c414c092461 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/56344 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Andres A. Lugo-Reyes <aalugore@us.ibm.com> Reviewed-by: Douglas R. Gilbert <dgilbert@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com> Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
Diffstat (limited to 'src/occ_405')
-rwxr-xr-xsrc/occ_405/amec/amec_data.c28
-rwxr-xr-xsrc/occ_405/amec/amec_health.c25
-rwxr-xr-xsrc/occ_405/dimm/dimm.c4
3 files changed, 31 insertions, 26 deletions
diff --git a/src/occ_405/amec/amec_data.c b/src/occ_405/amec/amec_data.c
index 5857373..6a51503 100755
--- a/src/occ_405/amec/amec_data.c
+++ b/src/occ_405/amec/amec_data.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2017 */
+/* Contributors Listed Below - COPYRIGHT 2011,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -157,6 +157,7 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode)
cmdh_thrm_thresholds_set_t *l_frudata = NULL;
uint8_t l_dvfs_temp = 0;
uint8_t l_error = 0;
+ bool l_pm_limits = false;
/*------------------------------------------------------------------------*/
/* Code */
@@ -182,6 +183,9 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode)
}
else
{
+ l_pm_limits = true;
+ TRAC_INFO("AMEC_data_write_thrm_thresholds: Using PM limits");
+
l_dvfs_temp = l_frudata[DATA_FRU_PROC].pm_dvfs;
if(i_mode == OCC_MODE_TURBO)
{
@@ -200,11 +204,11 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode)
// Store the temperature timeout value
g_amec->thermalproc.temp_timeout = l_frudata[DATA_FRU_PROC].max_read_timeout;
- TRAC_INFO("AMEC_data_write_thrm_thresholds: Setting %u as DVFS setpoint for processor",
- l_dvfs_temp);
+ TRAC_INFO("AMEC_data_write_thrm_thresholds: Processor setpoints - DVFS: %u, Error: %u",
+ l_dvfs_temp, l_error);
// Store the Centaur thermal data
- if ((i_mode == OCC_MODE_NOMINAL) || (G_sysConfigData.system_type.kvm))
+ if (!l_pm_limits)
{
// use normal thresholds for Nominal or OPAL
l_dvfs_temp = l_frudata[DATA_FRU_CENTAUR].dvfs;
@@ -231,11 +235,11 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode)
// Store the temperature timeout value
g_amec->thermalcent.temp_timeout = l_frudata[DATA_FRU_CENTAUR].max_read_timeout;
- TRAC_INFO("AMEC_data_write_thrm_thresholds: Setting %u as DVFS setpoint for Centaur",
- l_dvfs_temp);
+ TRAC_INFO("AMEC_data_write_thrm_thresholds: Centaur setpoints - DVFS: %u, Error: %u",
+ l_dvfs_temp, l_error);
// Store the DIMM thermal data
- if ((i_mode == OCC_MODE_NOMINAL) || (G_sysConfigData.system_type.kvm))
+ if (!l_pm_limits)
{
// use normal thresholds for Nominal or OPAL
l_dvfs_temp = l_frudata[DATA_FRU_DIMM].dvfs;
@@ -261,8 +265,8 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode)
// Store the temperature timeout value
g_amec->thermaldimm.temp_timeout = l_frudata[DATA_FRU_DIMM].max_read_timeout;
- TRAC_INFO("AMEC_data_write_thrm_thresholds: Setting %u as DVFS setpoint for DIMM",
- l_dvfs_temp);
+ TRAC_INFO("AMEC_data_write_thrm_thresholds: DIMM setpoints - DVFS: %u, Error: %u",
+ l_dvfs_temp, l_error);
g_amec->vrhotproc.setpoint = l_frudata[DATA_FRU_VRM_OT_STATUS].error_count;
@@ -270,7 +274,7 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode)
g_amec->vrhotproc.setpoint);
// Store the VRM Vdd thermal data
- if ((i_mode == OCC_MODE_NOMINAL) || (G_sysConfigData.system_type.kvm))
+ if (!l_pm_limits)
{
// use normal thresholds for Nominal or OPAL
l_dvfs_temp = l_frudata[DATA_FRU_VRM_VDD].dvfs;
@@ -296,8 +300,8 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode)
// Store the temperature timeout value
g_amec->thermalvdd.temp_timeout = l_frudata[DATA_FRU_VRM_VDD].max_read_timeout;
- TRAC_INFO("AMEC_data_write_thrm_thresholds: Setting %u as DVFS setpoint for VRM Vdd",
- l_dvfs_temp);
+ TRAC_INFO("AMEC_data_write_thrm_thresholds: VRM Vdd setpoints - DVFS: %u, Error: %u",
+ l_dvfs_temp, l_error);
} while(0);
diff --git a/src/occ_405/amec/amec_health.c b/src/occ_405/amec/amec_health.c
index 60d5a81..b80d043 100755
--- a/src/occ_405/amec/amec_health.c
+++ b/src/occ_405/amec/amec_health.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2017 */
+/* Contributors Listed Below - COPYRIGHT 2011,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -171,10 +171,6 @@ void amec_health_check_dimm_temp()
l_sensor = getSensorByGsid(TEMPDIMMTHRM);
l_cur_temp = l_sensor->sample;
l_max_temp = l_sensor->sample_max;
- TRAC_ERR("amec_health_check_dimm_temp: DIMM reached error temp[%d]. cur_max[%d], hist_max[%d]",
- l_ot_error,
- l_cur_temp,
- l_max_temp);
//iterate over all dimms
for(l_port = 0; l_port < l_max_port; l_port++)
@@ -184,16 +180,21 @@ void amec_health_check_dimm_temp()
G_dimm_overtemp_logged_bitmap.bytes[l_port];
//skip to next port if no new callouts for this one
- if(!l_new_callouts)
+ if (!l_new_callouts || (G_dimm_overtemp_bitmap.bytes[l_port] == 0))
{
continue;
}
+ TRAC_ERR("amec_health_check_dimm_temp: DIMM reached error temp[%d]. current[%d], hist_max[%d], port[%d]",
+ l_ot_error,
+ l_cur_temp,
+ l_max_temp,
+ l_port);
+
//find the dimm(s) that need to be called out for this port
for(l_dimm = 0; l_dimm < NUM_DIMMS_PER_CENTAUR; l_dimm++)
{
- if(!(l_new_callouts & (DIMM_SENSOR0 >> l_dimm)) &&
- G_dimm_overtemp_bitmap.bytes[l_port])
+ if (!(l_new_callouts & (DIMM_SENSOR0 >> l_dimm)))
{
continue;
}
@@ -217,8 +218,8 @@ void amec_health_check_dimm_temp()
* @errortype
* @moduleid AMEC_HEALTH_CHECK_DIMM_TEMP
* @reasoncode DIMM_ERROR_TEMP
- * @userdata1 Maximum dimm temperature
- * @userdata2 Dimm temperature threshold
+ * @userdata1 Maximum DIMM temperature
+ * @userdata2 DIMM temperature threshold
* @userdata4 OCC_NO_EXTENDED_RC
* @devdesc Memory DIMM(s) exceeded maximum safe
* temperature.
@@ -321,7 +322,7 @@ void amec_health_check_dimm_timeout()
if(G_dimm_temp_expired_bitmap.bytes[l_port])
{
G_dimm_temp_expired_bitmap.bytes[l_port] = 0;
- TRAC_INFO("All dimm sensors for centaur %d have been updated", l_port);
+ TRAC_INFO("All DIMM sensors for port %d have been updated", l_port);
}
continue;
}
@@ -519,7 +520,7 @@ void amec_health_check_cent_temp()
l_sensor = getSensorByGsid(TEMPCENT);
l_cur_temp = l_sensor->sample;
l_max_temp = l_sensor->sample_max;
- TRAC_ERR("amec_health_check_cent_temp: Centaur reached error temp[%d]. cur_max[%d], hist_max[%d] bitmap[0x%02X]",
+ TRAC_ERR("amec_health_check_cent_temp: Centaur reached error temp[%d]. current[%d], hist_max[%d], bitmap[0x%02X]",
l_ot_error,
l_cur_temp,
l_max_temp,
diff --git a/src/occ_405/dimm/dimm.c b/src/occ_405/dimm/dimm.c
index bcdfb6c..40af9de 100755
--- a/src/occ_405/dimm/dimm.c
+++ b/src/occ_405/dimm/dimm.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2017 */
+/* Contributors Listed Below - COPYRIGHT 2011,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -684,7 +684,7 @@ void process_dimm_temp()
if (l_dimm_temp >= g_amec->thermaldimm.ot_error)
{
//Set a bit so that this dimm can be called out by the thermal thread
- G_dimm_overtemp_bitmap.bytes[port] |= 1 << dimm;
+ G_dimm_overtemp_bitmap.bytes[port] |= DIMM_SENSOR0 >> dimm;
}
l_fru->cur_temp = l_dimm_temp;
OpenPOWER on IntegriCloud