summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/occ_405/amec/amec_controller.c236
-rw-r--r--src/occ_405/amec/amec_controller.h4
-rwxr-xr-xsrc/occ_405/amec/amec_data.c92
-rwxr-xr-xsrc/occ_405/amec/amec_freq.c24
-rw-r--r--src/occ_405/amec/amec_freq.h5
-rwxr-xr-xsrc/occ_405/amec/amec_health.c126
-rw-r--r--src/occ_405/amec/amec_init.c15
-rw-r--r--src/occ_405/amec/amec_sensors_ocmb.c364
-rwxr-xr-xsrc/occ_405/amec/amec_sys.h27
-rwxr-xr-xsrc/occ_405/cent/centaur_data.c49
-rwxr-xr-xsrc/occ_405/cent/centaur_data.h1
-rw-r--r--src/occ_405/cent/ocmb_data.c6
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds.c11
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c87
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h10
-rwxr-xr-xsrc/occ_405/occbuildname.c2
-rwxr-xr-xsrc/occ_405/sensor/sensor_enum.h3
-rwxr-xr-xsrc/occ_405/sensor/sensor_info.c3
-rwxr-xr-xsrc/occ_405/sensor/sensor_table.c6
19 files changed, 802 insertions, 269 deletions
diff --git a/src/occ_405/amec/amec_controller.c b/src/occ_405/amec/amec_controller.c
index 1f02eda..a74ad2b 100644
--- a/src/occ_405/amec/amec_controller.c
+++ b/src/occ_405/amec/amec_controller.c
@@ -36,6 +36,8 @@
//*************************************************************************
extern dimm_sensor_flags_t G_dimm_temp_expired_bitmap;
extern uint16_t G_cent_temp_expired_bitmap;
+extern uint8_t G_ocm_dts_type_expired_bitmap;
+
//*************************************************************************
// Macros
//*************************************************************************
@@ -246,8 +248,8 @@ void amec_controller_vrm_vdd_thermal()
// Description: This function implements the Proportional Controller for the
// DIMM thermal control. Although it doesn't return any
// results, it populates the thermal vote in the field
-// g_amec->thermaldimm.speed_request.
-//
+// g_amec->thermaldimm.speed_request, g_amec->thermalmcdimm.speed_request,
+// g_amec->thermalpmic.speed_request and g_amec->thermalmcext.speed_request,
// Task Flags:
//
// End Function Specification
@@ -256,82 +258,197 @@ void amec_controller_dimm_thermal()
/*------------------------------------------------------------------------*/
/* Local Variables */
/*------------------------------------------------------------------------*/
+ uint8_t i = 0;
+ uint8_t l_max_dimm_types = 0;
+ const uint16_t l_dimm_types[4] = {DATA_FRU_DIMM,
+ DATA_FRU_MEMCTRL_DRAM,
+ DATA_FRU_PMIC,
+ DATA_FRU_MEMCTRL_EXT};
uint16_t l_thermal_winner = 0;
uint16_t l_residue = 0;
uint16_t l_old_residue = 0;
+ uint16_t l_throttle_temp = 0;
+ uint16_t l_Pgain = 0;
+ uint16_t * l_speed_request = NULL;
+ uint16_t * l_total_res = NULL;
int16_t l_error = 0;
int16_t l_mem_speed = 0;
int16_t l_throttle_chg = 0;
int32_t l_throttle = 0;
sensor_t * l_sensor = NULL;
+ bool l_timeout = false;
/*------------------------------------------------------------------------*/
/* Code */
/*------------------------------------------------------------------------*/
- // Get TEMPDIMMTHRM sensor value
- l_sensor = getSensorByGsid(TEMPDIMMTHRM);
-
- if(G_dimm_temp_expired_bitmap.dw[0] ||
- G_dimm_temp_expired_bitmap.dw[1])
- {
- //we were not able to read one or more dimm temperatures.
- //Assume temperature is at the setpoint plus 1 degree C.
- l_thermal_winner = g_amec->thermaldimm.setpoint + 10;
- }
- else
- {
- // Use the highest temperature of all DIMMs in 0.1 degrees C
- l_thermal_winner = l_sensor->sample * 10;
- }
-
- // Check if there is an error
- if (g_amec->thermaldimm.setpoint == l_thermal_winner)
- return;
-
- // Calculate the thermal control error
- l_error = g_amec->thermaldimm.setpoint - l_thermal_winner;
-
- // Proportional Controller for the thermal control loop based on DIMM
- // temperatures
- l_throttle = (int32_t) l_error * g_amec->thermaldimm.Pgain;
- l_residue = (uint16_t) l_throttle;
- l_throttle_chg = (int16_t) (l_throttle >> 16);
-
- if ((int16_t) l_throttle_chg > AMEC_MEMORY_SPEED_CHANGE_LIMIT)
+ // loop for the number of different fru types the "dimm" sensors can be
+ // to determine memory throttle based on each type
+ if(MEM_TYPE_OCM == G_sysConfigData.mem_type)
{
- l_throttle_chg = AMEC_MEMORY_SPEED_CHANGE_LIMIT;
+ // all 4 types are possible:
+ l_max_dimm_types = 4;
}
else
{
- if ((int16_t) l_throttle_chg < (-AMEC_MEMORY_SPEED_CHANGE_LIMIT))
- {
- l_throttle_chg = -AMEC_MEMORY_SPEED_CHANGE_LIMIT;
- }
+ // can only be the one DATA_FRU_DIMM type which must be listed first in l_dimm_types
+ l_max_dimm_types = 1;
}
- // Calculate the new thermal speed request for DIMMs
- l_mem_speed = g_amec->thermaldimm.speed_request +
- (int16_t) l_throttle_chg * AMEC_MEMORY_STEP_SIZE;
-
- // Proceed with residue summation to correctly follow set-point
- l_old_residue = g_amec->thermaldimm.total_res;
- g_amec->thermaldimm.total_res += l_residue;
- if (g_amec->thermaldimm.total_res < l_old_residue)
+ for(i= 0; i < l_max_dimm_types; i++)
{
- l_mem_speed += AMEC_MEMORY_STEP_SIZE;
- }
-
- // Enforce actuator saturation limits
- if (l_mem_speed > AMEC_MEMORY_MAX_STEP)
- l_mem_speed = AMEC_MEMORY_MAX_STEP;
- if (l_mem_speed < AMEC_MEMORY_MIN_STEP)
- l_mem_speed = AMEC_MEMORY_MIN_STEP;
-
- // Generate the new thermal speed request
- g_amec->thermaldimm.speed_request = (uint16_t) l_mem_speed;
-
- // Update the Memory OT Throttle Sensor
- if(g_amec->thermaldimm.speed_request < AMEC_MEMORY_MAX_STEP)
+ l_timeout = false; // default this type did not timeout
+
+ // setup vars specific for type being processed
+ if(l_dimm_types[i] == DATA_FRU_DIMM)
+ {
+ // use control values for DATA_FRU_DIMM type
+ l_throttle_temp = g_amec->thermaldimm.setpoint;
+ l_Pgain = g_amec->thermaldimm.Pgain;
+ l_speed_request = &g_amec->thermaldimm.speed_request;
+ l_total_res = &g_amec->thermaldimm.total_res;
+
+ // Get the highest DIMM temperature in 0.1 degrees C
+ l_sensor = getSensorByGsid(TEMPDIMMTHRM);
+ l_thermal_winner = l_sensor->sample * 10;
+
+ // check for time out
+ if(G_dimm_temp_expired_bitmap.dw[0] || G_dimm_temp_expired_bitmap.dw[1])
+ {
+ if(MEM_TYPE_OCM != G_sysConfigData.mem_type)
+ {
+ // non-OCM can only have DIMM type so timeout must be for DIMM
+ l_timeout = true;
+ }
+ else if(G_ocm_dts_type_expired_bitmap & OCM_DTS_TYPE_DIMM_MASK) // MEM_TYPE_OCM
+ {
+ l_timeout = true;
+ }
+ }
+ } // end if DATA_FRU_DIMM
+ else if(l_dimm_types[i] == DATA_FRU_MEMCTRL_DRAM)
+ {
+ // use control values for DATA_FRU_MEMCTRL_DRAM type
+ l_throttle_temp = g_amec->thermalmcdimm.setpoint;
+ l_Pgain = g_amec->thermalmcdimm.Pgain;
+ l_speed_request = &g_amec->thermalmcdimm.speed_request;
+ l_total_res = &g_amec->thermalmcdimm.total_res;
+
+ // Get the highest Memctrl+DRAM temperature in 0.1 degrees C
+ l_sensor = getSensorByGsid(TEMPMCDIMMTHRM);
+ l_thermal_winner = l_sensor->sample * 10;
+
+ // check if this type timed out
+ if(G_ocm_dts_type_expired_bitmap & OCM_DTS_TYPE_MEMCTRL_DRAM_MASK)
+ {
+ l_timeout = true;
+ }
+ }
+ else if(l_dimm_types[i] == DATA_FRU_PMIC)
+ {
+ // use control values for DATA_FRU_PMIC type
+ l_throttle_temp = g_amec->thermalpmic.setpoint;
+ l_Pgain = g_amec->thermalpmic.Pgain;
+ l_speed_request = &g_amec->thermalpmic.speed_request;
+ l_total_res = &g_amec->thermalpmic.total_res;
+
+ // Get the highest PMIC temperature in 0.1 degrees C
+ l_sensor = getSensorByGsid(TEMPPMICTHRM);
+ l_thermal_winner = l_sensor->sample * 10;
+
+ // check if this type timed out
+ if(G_ocm_dts_type_expired_bitmap & OCM_DTS_TYPE_PMIC_MASK)
+ {
+ l_timeout = true;
+ }
+ }
+ else if(l_dimm_types[i] == DATA_FRU_MEMCTRL_EXT)
+ {
+ // use control values for DATA_FRU_MEMCTRL_EXT type
+ l_throttle_temp = g_amec->thermalmcext.setpoint;
+ l_Pgain = g_amec->thermalmcext.Pgain;
+ l_speed_request = &g_amec->thermalmcext.speed_request;
+ l_total_res = &g_amec->thermalmcext.total_res;
+
+ // Get the highest external mem controller temperature in 0.1 degrees C
+ l_sensor = getSensorByGsid(TEMPMCEXTTHRM);
+ l_thermal_winner = l_sensor->sample * 10;
+
+ // check if this type timed out
+ if(G_ocm_dts_type_expired_bitmap & OCM_DTS_TYPE_MEMCTRL_EXT_MASK)
+ {
+ l_timeout = true;
+ }
+ }
+ else
+ {
+ // should never happen -- code bug
+ TRAC_ERR("amec_controller_dimm_thermal: Invalid DIMM sensor type[0x%02X] at idx[%d]",
+ l_dimm_types[i],
+ i);
+ continue;
+ }
+
+ // start common code for all types to determine throttle level
+ // Adjust the temperature if there was a time out reading this sensor fru type
+ if(l_timeout)
+ {
+ //Assume temperature is at the throttle temp plus 1 degree C.
+ l_thermal_winner = l_throttle_temp + 10;
+ }
+
+ // Check if this type is being used and the temp differs from the throttle point
+ if( (!l_thermal_winner) || (l_throttle_temp == l_thermal_winner) )
+ continue;
+
+ // Calculate the thermal control error
+ l_error = l_throttle_temp - l_thermal_winner;
+
+ // Proportional Controller for the thermal control loop based on memory temperatures
+ l_throttle = (int32_t) l_error * l_Pgain;
+ l_residue = (uint16_t) l_throttle;
+ l_throttle_chg = (int16_t) (l_throttle >> 16);
+
+ if ((int16_t) l_throttle_chg > AMEC_MEMORY_SPEED_CHANGE_LIMIT)
+ {
+ l_throttle_chg = AMEC_MEMORY_SPEED_CHANGE_LIMIT;
+ }
+ else
+ {
+ if ((int16_t) l_throttle_chg < (-AMEC_MEMORY_SPEED_CHANGE_LIMIT))
+ {
+ l_throttle_chg = -AMEC_MEMORY_SPEED_CHANGE_LIMIT;
+ }
+ }
+
+ // Calculate the new thermal speed request
+ l_mem_speed = *l_speed_request +
+ (int16_t) l_throttle_chg * AMEC_MEMORY_STEP_SIZE;
+
+ // Proceed with residue summation to correctly follow set-point
+ l_old_residue = *l_total_res;
+ *l_total_res += l_residue;
+ if (*l_total_res < l_old_residue)
+ {
+ l_mem_speed += AMEC_MEMORY_STEP_SIZE;
+ }
+
+ // Enforce actuator saturation limits
+ if (l_mem_speed > AMEC_MEMORY_MAX_STEP)
+ l_mem_speed = AMEC_MEMORY_MAX_STEP;
+ if (l_mem_speed < AMEC_MEMORY_MIN_STEP)
+ l_mem_speed = AMEC_MEMORY_MIN_STEP;
+
+ // Save the new thermal speed request for this memory sensor type
+ *l_speed_request = (uint16_t) l_mem_speed;
+
+ } // end for loop processing each memory sensor type
+
+ // Done processing all types now determine if any of them are driving throttling
+ // and update the Memory OT Throttle Sensor
+ if( (g_amec->thermaldimm.speed_request < AMEC_MEMORY_MAX_STEP) ||
+ (g_amec->thermalmcdimm.speed_request < AMEC_MEMORY_MAX_STEP) ||
+ (g_amec->thermalpmic.speed_request < AMEC_MEMORY_MAX_STEP) ||
+ (g_amec->thermalmcext.speed_request < AMEC_MEMORY_MAX_STEP) )
{
// Memory speed is less than max indicate throttle due to OT
sensor_update(AMECSENSOR_PTR(MEMOTTHROT), 1);
@@ -340,7 +457,6 @@ void amec_controller_dimm_thermal()
{
sensor_update(AMECSENSOR_PTR(MEMOTTHROT), 0);
}
-
}
diff --git a/src/occ_405/amec/amec_controller.h b/src/occ_405/amec/amec_controller.h
index 97e379c..8808a03 100644
--- a/src/occ_405/amec/amec_controller.h
+++ b/src/occ_405/amec/amec_controller.h
@@ -117,8 +117,8 @@ void amec_controller_membuf_thermal();
* This function implements a Proportional Controller for the
* thermal control loop based on DIMM temperatures. Although it
* doesn't return any results, it populates the thermal vote in
- * the field g_amec->thermaldimm.speed_request.
- *
+ * the fields g_amec->thermaldimm.speed_request, g_amec->thermalmcdimm.speed_request,
+ * g_amec->thermalpmic.speed_request and g_amec->thermalmcext.speed_request
*/
void amec_controller_dimm_thermal();
diff --git a/src/occ_405/amec/amec_data.c b/src/occ_405/amec/amec_data.c
index 43ff5b3..6aa6b78 100755
--- a/src/occ_405/amec/amec_data.c
+++ b/src/occ_405/amec/amec_data.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2018 */
+/* Contributors Listed Below - COPYRIGHT 2011,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -268,6 +268,96 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode)
TRAC_INFO("AMEC_data_write_thrm_thresholds: DIMM setpoints - DVFS: %u, Error: %u",
l_dvfs_temp, l_error);
+ // Store the Memctrl+DIMM thermal data
+ if (!l_pm_limits)
+ {
+ // use normal thresholds for Nominal or OPAL
+ l_dvfs_temp = l_frudata[DATA_FRU_MEMCTRL_DRAM].dvfs;
+ l_error = l_frudata[DATA_FRU_MEMCTRL_DRAM].error;
+ }
+ else
+ {
+ l_dvfs_temp = l_frudata[DATA_FRU_MEMCTRL_DRAM].pm_dvfs;
+ if(i_mode == OCC_MODE_TURBO)
+ {
+ //Need to log an error if we throttle in static turbo mode (for mfg)
+ l_error = l_dvfs_temp;
+ }
+ else
+ {
+ l_error = l_frudata[DATA_FRU_MEMCTRL_DRAM].pm_error;
+ }
+ }
+ // Store the DVFS thermal setpoint in 0.1 degrees C
+ g_amec->thermalmcdimm.setpoint = l_dvfs_temp * 10;
+ // Store the error temperature for OT detection
+ g_amec->thermalmcdimm.ot_error = l_error;
+ // Store the temperature timeout value
+ g_amec->thermalmcdimm.temp_timeout = l_frudata[DATA_FRU_DIMM].max_read_timeout;
+
+ TRAC_INFO("AMEC_data_write_thrm_thresholds: MC+DIMM setpoints - DVFS: %u, Error: %u",
+ l_dvfs_temp, l_error);
+
+ // Store the PMIC thermal data
+ if (!l_pm_limits)
+ {
+ // use normal thresholds for Nominal or OPAL
+ l_dvfs_temp = l_frudata[DATA_FRU_PMIC].dvfs;
+ l_error = l_frudata[DATA_FRU_PMIC].error;
+ }
+ else
+ {
+ l_dvfs_temp = l_frudata[DATA_FRU_PMIC].pm_dvfs;
+ if(i_mode == OCC_MODE_TURBO)
+ {
+ //Need to log an error if we throttle in static turbo mode (for mfg)
+ l_error = l_dvfs_temp;
+ }
+ else
+ {
+ l_error = l_frudata[DATA_FRU_PMIC].pm_error;
+ }
+ }
+ // Store the DVFS thermal setpoint in 0.1 degrees C
+ g_amec->thermalpmic.setpoint = l_dvfs_temp * 10;
+ // Store the error temperature for OT detection
+ g_amec->thermalpmic.ot_error = l_error;
+ // Store the temperature timeout value
+ g_amec->thermalpmic.temp_timeout = l_frudata[DATA_FRU_PMIC].max_read_timeout;
+
+ TRAC_INFO("AMEC_data_write_thrm_thresholds: PMIC setpoints - DVFS: %u, Error: %u",
+ l_dvfs_temp, l_error);
+
+ // Store the external mem ctrl thermal data
+ if (!l_pm_limits)
+ {
+ // use normal thresholds for Nominal or OPAL
+ l_dvfs_temp = l_frudata[DATA_FRU_MEMCTRL_EXT].dvfs;
+ l_error = l_frudata[DATA_FRU_MEMCTRL_EXT].error;
+ }
+ else
+ {
+ l_dvfs_temp = l_frudata[DATA_FRU_MEMCTRL_EXT].pm_dvfs;
+ if(i_mode == OCC_MODE_TURBO)
+ {
+ //Need to log an error if we throttle in static turbo mode (for mfg)
+ l_error = l_dvfs_temp;
+ }
+ else
+ {
+ l_error = l_frudata[DATA_FRU_MEMCTRL_EXT].pm_error;
+ }
+ }
+ // Store the DVFS thermal setpoint in 0.1 degrees C
+ g_amec->thermalmcext.setpoint = l_dvfs_temp * 10;
+ // Store the error temperature for OT detection
+ g_amec->thermalmcext.ot_error = l_error;
+ // Store the temperature timeout value
+ g_amec->thermalmcext.temp_timeout = l_frudata[DATA_FRU_MEMCTRL_EXT].max_read_timeout;
+
+ TRAC_INFO("AMEC_data_write_thrm_thresholds: External MC setpoints - DVFS: %u, Error: %u",
+ l_dvfs_temp, l_error);
+
// Store the VRM Vdd thermal data
if (!l_pm_limits)
{
diff --git a/src/occ_405/amec/amec_freq.c b/src/occ_405/amec/amec_freq.c
index 4275b68..ea33a3e 100755
--- a/src/occ_405/amec/amec_freq.c
+++ b/src/occ_405/amec/amec_freq.c
@@ -760,6 +760,30 @@ void amec_slv_mem_voting_box(void)
kvm_reason = MEMORY_OVER_TEMP;
}
+ // Check vote from Mem ctrl+DIMM thermal control loop
+ if (l_vote > g_amec->thermalmcdimm.speed_request)
+ {
+ l_vote = g_amec->thermalmcdimm.speed_request;
+ l_reason = AMEC_MEM_VOTING_REASON_MCDIMM;
+ kvm_reason = MEMORY_OVER_TEMP;
+ }
+
+ // Check vote from Pmic thermal control loop
+ if (l_vote > g_amec->thermalpmic.speed_request)
+ {
+ l_vote = g_amec->thermalpmic.speed_request;
+ l_reason = AMEC_MEM_VOTING_REASON_PMIC;
+ kvm_reason = MEMORY_OVER_TEMP;
+ }
+
+ // Check vote from external mem controller thermal control loop
+ if (l_vote > g_amec->thermalmcext.speed_request)
+ {
+ l_vote = g_amec->thermalmcext.speed_request;
+ l_reason = AMEC_MEM_VOTING_REASON_MC_EXT;
+ kvm_reason = MEMORY_OVER_TEMP;
+ }
+
// Check if memory autoslewing is enabled
if (g_amec->mnfg_parms.mem_autoslew)
{
diff --git a/src/occ_405/amec/amec_freq.h b/src/occ_405/amec/amec_freq.h
index 7fe8a26..8df060a 100644
--- a/src/occ_405/amec/amec_freq.h
+++ b/src/occ_405/amec/amec_freq.h
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2015 */
+/* Contributors Listed Below - COPYRIGHT 2011,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -100,6 +100,9 @@ typedef enum
AMEC_MEM_VOTING_REASON_CENT = 0x01,
AMEC_MEM_VOTING_REASON_DIMM = 0x02,
AMEC_MEM_VOTING_REASON_SLEW = 0x03,
+ AMEC_MEM_VOTING_REASON_MCDIMM = 0x04,
+ AMEC_MEM_VOTING_REASON_PMIC = 0x05,
+ AMEC_MEM_VOTING_REASON_MC_EXT = 0x06,
}amec_mem_voting_reason_t;
// This is memory throttle reason code encoded in OPAL dynamic data
diff --git a/src/occ_405/amec/amec_health.c b/src/occ_405/amec/amec_health.c
index 0d99f20..cdb7d6b 100755
--- a/src/occ_405/amec/amec_health.c
+++ b/src/occ_405/amec/amec_health.c
@@ -56,6 +56,9 @@ dimm_sensor_flags_t G_dimm_timeout_logged_bitmap = {{0}};
// Are any dimms currently in the timedout state (bitmap of dimm)?
dimm_sensor_flags_t G_dimm_temp_expired_bitmap = {{0}};
+// Timedout state of OCMB "DIMM" sensors by fru type (bitmap of DTS type)
+uint8_t G_ocm_dts_type_expired_bitmap = 0;
+
// Have we already called out the centaur for timeout (bitmap of centaurs)?
uint16_t G_cent_timeout_logged_bitmap = 0;
@@ -177,10 +180,6 @@ void amec_health_check_dimm_temp()
return;
}
- l_ot_error = g_amec->thermaldimm.ot_error;
- l_sensor = getSensorByGsid(TEMPDIMMTHRM);
- l_max_temp = l_sensor->sample_max;
-
//iterate over all dimms
for(l_port = 0; l_port < l_max_port; l_port++)
{
@@ -211,14 +210,51 @@ void amec_health_check_dimm_temp()
fru_temp_t* l_fru;
l_fru = &g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm];
+ switch(l_fru->temp_fru_type)
+ {
+ case DATA_FRU_DIMM:
+ l_ot_error = g_amec->thermaldimm.ot_error;
+ l_sensor = getSensorByGsid(TEMPDIMMTHRM);
+ l_max_temp = l_sensor->sample_max;
+ break;
+
+ case DATA_FRU_MEMCTRL_DRAM:
+ l_ot_error = g_amec->thermalmcdimm.ot_error;
+ l_sensor = getSensorByGsid(TEMPMCDIMMTHRM);
+ l_max_temp = l_sensor->sample_max;
+ break;
+
+ case DATA_FRU_PMIC:
+ l_ot_error = g_amec->thermalpmic.ot_error;
+ l_sensor = getSensorByGsid(TEMPPMICTHRM);
+ l_max_temp = l_sensor->sample_max;
+ break;
+
+ case DATA_FRU_MEMCTRL_EXT:
+ l_ot_error = g_amec->thermalmcext.ot_error;
+ l_sensor = getSensorByGsid(TEMPMCEXTTHRM);
+ l_max_temp = l_sensor->sample_max;
+ break;
+
+ default:
+ // this is a code bug trace and let the error be logged for debug
+ TRAC_ERR("amec_health_check_dimm_temp: sensor[%04X] marked as OT has invalid type[%d]",
+ (l_port<<8)|l_dimm, l_fru->temp_fru_type);
+ l_ot_error = 0xff;
+ l_max_temp = 0xff;
+ break;
+ }
+ TRAC_ERR("amec_health_check_dimm_temp: sensor[%04X] type[0x%02X] reached error temp[%d] current[%d]",
+ (l_port<<8)|l_dimm, l_fru->temp_fru_type, l_ot_error, l_fru->cur_temp);
+
amec_mem_mark_logged(l_port,
l_dimm,
&G_cent_overtemp_logged_bitmap,
&G_dimm_overtemp_logged_bitmap.bytes[l_port]);
- TRAC_ERR("amec_health_check_dimm_temp: DIMM%04X being called out for overtemp - %dC",
- (l_port<<8)|l_dimm, l_fru->cur_temp);
- // Create single elog with up to MAX_CALLOUTS for this port
+ // Create single elog with up to MAX_CALLOUTS
+ // this will be generic regardless of temperature sensor type, the callouts will be correct
+ // and the traces will point to specific types/thresholds
if(l_callouts_count < ERRL_MAX_CALLOUTS)
{
//If we don't have an error log for the callout, create one
@@ -290,12 +326,15 @@ void amec_health_check_dimm_timeout()
{
static dimm_sensor_flags_t L_temp_update_bitmap_prev = {{0}};
dimm_sensor_flags_t l_need_inc, l_need_clr, l_temp_update_bitmap;
- uint8_t l_dimm, l_port;
+ uint8_t l_dimm, l_port, l_temp_timeout;
fru_temp_t* l_fru;
errlHndl_t l_err = NULL;
uint32_t l_callouts_count = 0;
uint64_t l_huid;
static bool L_ran_once = FALSE;
+ uint8_t l_max_port = 0; // #ports in nimbus/#mem buffs in cumulus/OCM
+ uint8_t l_max_dimm_per_port = 0; // per port in nimbus/per mem buff in cumulus/OCM
+ uint8_t l_ocm_dts_type_expired_bitmap = 0;
do
{
@@ -331,8 +370,6 @@ void amec_health_check_dimm_timeout()
break;
}
- uint8_t l_max_port; // #ports in nimbus/#mem buffs in cumulus/OCM
- uint8_t l_max_dimm_per_port; // per port in nimbus/per mem buff in cumulus/OCM
if(G_sysConfigData.mem_type == MEM_TYPE_NIMBUS)
{
l_max_port = NUM_DIMM_PORTS;
@@ -400,17 +437,42 @@ void amec_health_check_dimm_timeout()
}
//check if the temperature reading is still useable
- if(g_amec->thermaldimm.temp_timeout == 0xff ||
- l_fru->sample_age < g_amec->thermaldimm.temp_timeout)
+ if(l_fru->temp_fru_type == DATA_FRU_DIMM)
+ {
+ l_temp_timeout = g_amec->thermaldimm.temp_timeout;
+ }
+
+ else if(l_fru->temp_fru_type == DATA_FRU_MEMCTRL_DRAM)
+ {
+ l_temp_timeout = g_amec->thermalmcdimm.temp_timeout;
+ }
+
+ else if(l_fru->temp_fru_type == DATA_FRU_PMIC)
+ {
+ l_temp_timeout = g_amec->thermalpmic.temp_timeout;
+ }
+
+ else if(l_fru->temp_fru_type == DATA_FRU_MEMCTRL_EXT)
+ {
+ l_temp_timeout = g_amec->thermalmcext.temp_timeout;
+ }
+
+ else // invalid type or not used, ignore
+ l_temp_timeout = 0xff;
+
+ if(l_temp_timeout == 0xff ||
+ l_fru->sample_age < l_temp_timeout)
{
continue;
}
- //temperature has expired. Notify control algorithms which DIMM
+ //temperature has expired. Notify control algorithms which DIMM DTS and type
if(!(G_dimm_temp_expired_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm)))
{
G_dimm_temp_expired_bitmap.bytes[l_port] |= (DIMM_SENSOR0 >> l_dimm);
- TRAC_ERR("Timed out reading DIMM%04X temperature sensor", (l_port<<8)|l_dimm);
+ TRAC_ERR("Timed out reading DIMM%04X temperature sensor type[0x%02X]",
+ (l_port<<8)|l_dimm,
+ l_fru->temp_fru_type);
}
//If we've already logged an error for this FRU go to the next one.
@@ -421,7 +483,7 @@ void amec_health_check_dimm_timeout()
// To prevent DIMMs from incorrectly being called out, don't log errors if there have
// been timeouts with GPE1 tasks not finishing
- if(G_error_history[ERRH_GPE1_NOT_IDLE] > g_amec->thermaldimm.temp_timeout)
+ if(G_error_history[ERRH_GPE1_NOT_IDLE] > l_temp_timeout)
{
TRAC_ERR("Timed out reading DIMM temperature due to GPE1 issues");
// give notification that GPE1 error should now be logged which will reset the OCC
@@ -460,7 +522,7 @@ void amec_health_check_dimm_timeout()
ERRL_SEV_PREDICTIVE, //Severity
NULL, //Trace Buf
DEFAULT_TRACE_SIZE, //Trace Size
- g_amec->thermaldimm.temp_timeout, //userdata1
+ l_temp_timeout, //userdata1
0); //userdata2
}
@@ -527,6 +589,38 @@ void amec_health_check_dimm_timeout()
}//iterate over all dimms
}//iterate over all centaurs/ports
}while(0);
+
+ // For OCM the "DIMM" dts are used for different types. Need to determine what type the
+ // "DIMM" DTS readings are for so the control loop will handle timeout based on correct type
+ if(MEM_TYPE_OCM == G_sysConfigData.mem_type)
+ {
+ if(G_dimm_temp_expired_bitmap.dw[0] || G_dimm_temp_expired_bitmap.dw[1])
+ {
+ // at least one sensor expired. Set type for each expired sensor
+ //iterate across all OCMBs
+ for(l_port = 0; l_port < l_max_port; l_port++)
+ {
+ //iterate over all "dimm" DTS readings
+ for(l_dimm = 0; l_dimm < l_max_dimm_per_port; l_dimm++)
+ {
+ if(G_dimm_temp_expired_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm))
+ {
+ // found an expired sensor
+ l_ocm_dts_type_expired_bitmap |= g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm].dts_type_mask;
+ }
+ }//iterate over all dimms
+ }//iterate over all OCMBs
+ } // if temp expired
+
+ // check if there is a change to any type expired
+ if(G_ocm_dts_type_expired_bitmap != l_ocm_dts_type_expired_bitmap)
+ {
+ TRAC_INFO("DIMM DTS type expired bitmap changed from[0x%04X] to[0x%04X]",
+ G_ocm_dts_type_expired_bitmap, l_ocm_dts_type_expired_bitmap);
+ G_ocm_dts_type_expired_bitmap = l_ocm_dts_type_expired_bitmap;
+ }
+ } // if mem type OCM
+
L_ran_once = TRUE;
} // end amec_health_check_dimm_timeout()
diff --git a/src/occ_405/amec/amec_init.c b/src/occ_405/amec/amec_init.c
index 3d3faa6..6b51b1d 100644
--- a/src/occ_405/amec/amec_init.c
+++ b/src/occ_405/amec/amec_init.c
@@ -267,6 +267,21 @@ void amec_init_gamec_struct(void)
g_amec->thermalcent.Pgain = 30000;
g_amec->thermalcent.speed_request = AMEC_MEMORY_MAX_STEP;
+ // Initialize thermal controller based on temperature sensor covering both mem ctrl and DIMM
+ g_amec->thermalmcdimm.setpoint = 850; //In 0.1 degrees C -> 850 = 85.0 C
+ g_amec->thermalmcdimm.Pgain = 30000;
+ g_amec->thermalmcdimm.speed_request = AMEC_MEMORY_MAX_STEP;
+
+ // Initialize thermal controller based on PMIC temperatures
+ g_amec->thermalpmic.setpoint = 850; //In 0.1 degrees C -> 850 = 85.0 C
+ g_amec->thermalpmic.Pgain = 30000;
+ g_amec->thermalpmic.speed_request = AMEC_MEMORY_MAX_STEP;
+
+ // Initialize thermal controller based on external mem controller temperatures
+ g_amec->thermalmcext.setpoint = 850; //In 0.1 degrees C -> 850 = 85.0 C
+ g_amec->thermalmcext.Pgain = 30000;
+ g_amec->thermalmcext.speed_request = AMEC_MEMORY_MAX_STEP;
+
// Initialize component power caps
g_amec->pcap.active_proc_pcap = 0;
g_amec->pcap.active_mem_level = 0;
diff --git a/src/occ_405/amec/amec_sensors_ocmb.c b/src/occ_405/amec/amec_sensors_ocmb.c
index a2cd4a6..8ffbefa 100644
--- a/src/occ_405/amec/amec_sensors_ocmb.c
+++ b/src/occ_405/amec/amec_sensors_ocmb.c
@@ -110,21 +110,21 @@ void amec_update_ocmb_sensors(uint8_t i_membuf)
// End Function Specification
void amec_update_ocmb_dimm_dts_sensors(OcmbMemData * i_sensor_cache, uint8_t i_membuf)
{
+// confirmed ok to use same values for all types (internal mc, dimm, external mc, pmic...)
#define MIN_VALID_DIMM_TEMP 1
#define MAX_VALID_DIMM_TEMP 125 //according to Mike Pardiek 04/23/2019
-#define MAX_MEM_TEMP_CHANGE 2
+#define MAX_MEM_TEMP_CHANGE 4
- uint32_t k, l_hottest_dimm_temp;
+ uint32_t k;
uint16_t l_dts[NUM_DIMMS_PER_OCMB] = {0};
- uint32_t l_hottest_dimm_loc = NUM_DIMMS_PER_OCMB;
int32_t l_dimm_temp, l_prev_temp;
static uint8_t L_ran_once[MAX_NUM_OCMBS] = {FALSE};
- static bool L_ot_traced[MAX_NUM_OCMBS][NUM_DIMMS_PER_OCMB] = {{false}};
- // Harvest thermal data for all dimms
+ // Harvest thermal data for memory thermal sensors that are enabled and being used
for(k=0; k < NUM_DIMMS_PER_OCMB; k++)
{
- if(!CENTAUR_SENSOR_ENABLED(i_membuf, k))
+ if( (!CENTAUR_SENSOR_ENABLED(i_membuf, k)) ||
+ (g_amec->proc[0].memctl[i_membuf].centaur.dimm_temps[k].temp_fru_type == DATA_FRU_NOT_USED) )
{
continue;
}
@@ -250,48 +250,17 @@ void amec_update_ocmb_dimm_dts_sensors(OcmbMemData * i_sensor_cache, uint8_t i_m
}
}
- //Check if at or above the error temperature
- if(l_dts[k] >= g_amec->thermaldimm.ot_error)
- {
- //Set a bit so that this dimm can be called out by the thermal thread
- G_dimm_overtemp_bitmap.bytes[i_membuf] |= (DIMM_SENSOR0 >> k);
- // trace first time OT per DIMM
- if( !L_ot_traced[i_membuf][k] )
- {
- TRAC_ERR("amec_update_ocmb_dimm_dts_sensors: Mem Buf[%d] DIMM[%d] reached error temp[%d]. current[%d]",
- i_membuf,
- k,
- g_amec->thermaldimm.ot_error,
- l_dts[k]);
- L_ot_traced[i_membuf][k] = true;
- }
- }
+ //Check for over temperature must be done by type and will be checked
+ // in amec_update_ocmb_temp_sensors() which happens after all OCMBs have been read
}
- // Find hottest temperature from all DIMMs for this centaur
- for(l_hottest_dimm_temp = 0, k = 0; k < NUM_DIMMS_PER_OCMB; k++)
+ // update the current temperatures
+ for(k = 0; k < NUM_DIMMS_PER_OCMB; k++)
{
- if(l_dts[k] > l_hottest_dimm_temp)
- {
- l_hottest_dimm_temp = l_dts[k];
- l_hottest_dimm_loc = k;
- }
g_amec->proc[0].memctl[i_membuf].centaur.dimm_temps[k].cur_temp = l_dts[k];
}
- amec_centaur_t* l_centaur_ptr = &g_amec->proc[0].memctl[i_membuf].centaur;
-
- //only update location if hottest dimm temp is greater than previous maximum
- if(l_hottest_dimm_temp > l_centaur_ptr->tempdimmax.sample_max)
- {
- sensor_update(&l_centaur_ptr->locdimmax, l_hottest_dimm_loc);
- }
-
- //update the max dimm temperature sensor for this centaur
- sensor_update(&l_centaur_ptr->tempdimmax, l_hottest_dimm_temp);
-
L_ran_once[i_membuf] = TRUE;
- AMEC_DBG("Centaur[%d]: HotDimm=%d\n",i_membuf,l_hottest_dimm_temp);
}
// Function Specification
@@ -317,129 +286,223 @@ void amec_update_ocmb_dts_sensors(OcmbMemData * i_sensor_cache, uint8_t i_membuf
fru_temp_t* l_fru = &g_amec->proc[0].memctl[i_membuf].centaur.centaur_hottest;
- l_prev_temp = l_fru->cur_temp;
- if(!l_prev_temp)
+ // Internal DTS sensor is either for internal memctrl or not being used
+ // ignore the internal sensor if it isn't marked for internal memctrl
+ if(l_fru->temp_fru_type == DATA_FRU_CENTAUR)
{
- l_prev_temp = l_sens_temp;
- }
-
- //Check DTS status bits
- if(i_sensor_cache->status.fields.ubdts0_valid &&
- (!i_sensor_cache->status.fields.ubdts0_err))
- {
- //make sure temperature is within a 'reasonable' range.
- if(l_sens_temp < MIN_VALID_MEMBUF_TEMP ||
- l_sens_temp > MAX_VALID_MEMBUF_TEMP)
- {
- //set a flag so that if we end up logging an error we have something to debug why
- l_fru->flags |= FRU_TEMP_OUT_OF_RANGE;
- l_dts = l_prev_temp;
- }
- else
- {
- //don't allow temp to change more than is reasonable since last read
- if(l_sens_temp > (l_prev_temp + MAX_MEM_TEMP_CHANGE))
- {
- l_dts = l_prev_temp + MAX_MEM_TEMP_CHANGE;
- if(!l_fru->flags)
- {
- TRAC_INFO("membuf temp rose faster than reasonable: membuf[%d] prev[%d] cur[%d]",
- i_membuf, l_prev_temp, l_sens_temp);
- l_fru->flags |= FRU_TEMP_FAST_CHANGE;
- }
- }
- else if (l_sens_temp < (l_prev_temp - MAX_MEM_TEMP_CHANGE))
- {
- l_dts = l_prev_temp - MAX_MEM_TEMP_CHANGE;
- if(!l_fru->flags)
- {
- TRAC_INFO("membuf temp fell faster than reasonable: cent[%d] prev[%d] cur[%d]",
- i_membuf, l_prev_temp, l_sens_temp);
- l_fru->flags |= FRU_TEMP_FAST_CHANGE;
- }
- }
- else //reasonable amount of change occurred
- {
- l_dts = l_sens_temp;
- l_fru->flags &= ~FRU_TEMP_FAST_CHANGE;
- }
-
- //Notify thermal thread that temperature has been updated
- G_cent_temp_updated_bitmap |= (CENTAUR0_PRESENT_MASK >> i_membuf);
-
- //clear error flags
- l_fru->flags &= FRU_TEMP_FAST_CHANGE;
- }
+ l_prev_temp = l_fru->cur_temp;
+ if(!l_prev_temp)
+ {
+ l_prev_temp = l_sens_temp;
+ }
+
+ //Check DTS status bits
+ if(i_sensor_cache->status.fields.ubdts0_valid &&
+ (!i_sensor_cache->status.fields.ubdts0_err))
+ {
+ //make sure temperature is within a 'reasonable' range.
+ if(l_sens_temp < MIN_VALID_MEMBUF_TEMP ||
+ l_sens_temp > MAX_VALID_MEMBUF_TEMP)
+ {
+ //set a flag so that if we end up logging an error we have something to debug why
+ l_fru->flags |= FRU_TEMP_OUT_OF_RANGE;
+ l_dts = l_prev_temp;
+ }
+ else
+ {
+ //don't allow temp to change more than is reasonable since last read
+ if(l_sens_temp > (l_prev_temp + MAX_MEM_TEMP_CHANGE))
+ {
+ l_dts = l_prev_temp + MAX_MEM_TEMP_CHANGE;
+ if(!l_fru->flags)
+ {
+ TRAC_INFO("membuf temp rose faster than reasonable: membuf[%d] prev[%d] cur[%d]",
+ i_membuf, l_prev_temp, l_sens_temp);
+ l_fru->flags |= FRU_TEMP_FAST_CHANGE;
+ }
+ }
+ else if (l_sens_temp < (l_prev_temp - MAX_MEM_TEMP_CHANGE))
+ {
+ l_dts = l_prev_temp - MAX_MEM_TEMP_CHANGE;
+ if(!l_fru->flags)
+ {
+ TRAC_INFO("membuf temp fell faster than reasonable: cent[%d] prev[%d] cur[%d]",
+ i_membuf, l_prev_temp, l_sens_temp);
+ l_fru->flags |= FRU_TEMP_FAST_CHANGE;
+ }
+ }
+ else //reasonable amount of change occurred
+ {
+ l_dts = l_sens_temp;
+ l_fru->flags &= ~FRU_TEMP_FAST_CHANGE;
+ }
+
+ //Notify thermal thread that temperature has been updated
+ G_cent_temp_updated_bitmap |= (CENTAUR0_PRESENT_MASK >> i_membuf);
+
+ //clear error flags
+ l_fru->flags &= FRU_TEMP_FAST_CHANGE;
+ }
+ }
+ else //status was INVALID
+ {
+ if(L_ran_once[i_membuf])
+ {
+ //Trace the error if we haven't traced it already for this sensor
+ if(!(l_fru->flags & FRU_SENSOR_STATUS_INVALID) &&
+ i_sensor_cache->status.fields.ubdts0_err)
+ {
+ TRAC_ERR("Membuf %d temp sensor error.", i_membuf);
+ }
+
+ l_fru->flags |= FRU_SENSOR_STATUS_INVALID;
+ }
+
+ //use last temperature
+ l_dts = l_prev_temp;
+ }
+
+ L_ran_once[i_membuf] = TRUE;
+
+ //Check if at or above the error temperature
+ if(l_dts >= g_amec->thermalcent.ot_error)
+ {
+ //Set a bit so that this dimm can be called out by the thermal thread
+ G_cent_overtemp_bitmap |= (CENTAUR0_PRESENT_MASK >> i_membuf);
+ }
+
+ // Update Interim Data - later this will get picked up to form centaur sensor
+ l_fru->cur_temp = l_dts;
+
+ AMEC_DBG("Membuf[%d]: HotMembuf=%d\n",i_membuf,l_dts);
}
- else //status was INVALID
+ else // internal sensor not being used
{
- if(L_ran_once[i_membuf])
- {
- //Trace the error if we haven't traced it already for this sensor
- if(!(l_fru->flags & FRU_SENSOR_STATUS_INVALID) &&
- i_sensor_cache->status.fields.ubdts0_err)
- {
- TRAC_ERR("Membuf %d temp sensor error.", i_membuf);
- }
+ // make sure temperature is 0 indicating not present
+ l_fru->cur_temp = 0;
- l_fru->flags |= FRU_SENSOR_STATUS_INVALID;
- }
+ //Notify thermal thread that temperature has been updated so no timeout error is logged
+ G_cent_temp_updated_bitmap |= CENTAUR0_PRESENT_MASK >> i_membuf;
- //use last temperature
- l_dts = l_prev_temp;
+ //clear error flags
+ l_fru->flags = 0;
}
-
- L_ran_once[i_membuf] = TRUE;
-
- //Check if at or above the error temperature
- if(l_dts >= g_amec->thermalcent.ot_error)
- {
- //Set a bit so that this dimm can be called out by the thermal thread
- G_cent_overtemp_bitmap |= (CENTAUR0_PRESENT_MASK >> i_membuf);
- }
-
- // Update Interim Data - later this will get picked up to form centaur sensor
- g_amec->proc[0].memctl[i_membuf].centaur.centaur_hottest.cur_temp = l_dts;
-
- AMEC_DBG("Membuf[%d]: HotMembuf=%d\n",i_membuf,l_dts);
}
// Function Specification
//
// Name: amec_update_ocmb_temp_sensors
//
-// Description: Updates thermal sensors that have data grabbed by the centaur.
+// Description: Updates thermal sensors to give summary (across all OCMBs) for each mem type
//
// Thread: RealTime Loop
//
// End Function Specification
void amec_update_ocmb_temp_sensors(void)
{
- uint32_t k;
+ uint32_t k, l_dimm;
uint32_t l_hot_dimm = 0;
uint32_t l_hot_mb = 0;
+ uint32_t l_hot_mb_dimm = 0;
+ uint32_t l_hot_pmic = 0;
+ uint32_t l_hot_ext_mb = 0;
+ uint8_t l_ot_error = 0;
+ uint8_t l_cur_temp = 0;
+ uint8_t l_fru_type = DATA_FRU_NOT_USED;
+ static bool L_ot_traced[MAX_NUM_OCMBS][NUM_DIMMS_PER_OCMB] = {{false}};
- // -----------------------------------------------------------
- // Find hottest temperature from all membufs for this Proc chip
- // Find hottest temperature from all DIMMs for this Proc chip
- // -----------------------------------------------------------
for(k=0; k < MAX_NUM_OCMBS; k++)
{
- if(g_amec->proc[0].memctl[k].centaur.centaur_hottest.cur_temp > l_hot_mb)
+ // Find hottest temperature from all internal membufs for this Proc chip
+ // make sure the type is "CENTAUR" i.e. internal memory controller temp
+ if( (g_amec->proc[0].memctl[k].centaur.centaur_hottest.temp_fru_type == DATA_FRU_CENTAUR) &&
+ (g_amec->proc[0].memctl[k].centaur.centaur_hottest.cur_temp > l_hot_mb) )
{
l_hot_mb = g_amec->proc[0].memctl[k].centaur.centaur_hottest.cur_temp;
}
- if(g_amec->proc[0].memctl[k].centaur.tempdimmax.sample > l_hot_dimm)
+
+ // process each of the thermal sensors (stored as "dimm" temps)
+ // based on what type they are for and finding the hottest for each type
+ for(l_dimm=0; l_dimm < NUM_DIMMS_PER_OCMB; l_dimm++)
{
- l_hot_dimm = g_amec->proc[0].memctl[k].centaur.tempdimmax.sample;
- }
- }
+ l_fru_type = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].temp_fru_type;
+ l_cur_temp = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp;
+
+ switch(l_fru_type)
+ {
+ case DATA_FRU_DIMM:
+ l_ot_error = g_amec->thermaldimm.ot_error;
+ if(g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp > l_hot_dimm)
+ {
+ l_hot_dimm = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp;
+ }
+ break;
+
+ case DATA_FRU_MEMCTRL_DRAM:
+ l_ot_error = g_amec->thermalmcdimm.ot_error;
+ if(g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp > l_hot_mb_dimm)
+ {
+ l_hot_mb_dimm = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp;
+ }
+ break;
+
+ case DATA_FRU_PMIC:
+ l_ot_error = g_amec->thermalpmic.ot_error;
+ if(g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp > l_hot_pmic)
+ {
+ l_hot_pmic = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp;
+ }
+ break;
+
+ case DATA_FRU_MEMCTRL_EXT:
+ l_ot_error = g_amec->thermalmcext.ot_error;
+ if(g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp > l_hot_ext_mb)
+ {
+ l_hot_ext_mb = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp;
+ }
+ break;
+
+ case DATA_FRU_NOT_USED:
+ default:
+ // ignore reading
+ l_ot_error = 0;
+ break;
+ } // end switch fru type
+
+ // check if this "DIMM" sensor is over its error temperature
+ if( l_ot_error && (l_cur_temp >= l_ot_error) )
+ {
+ //Set a bit so that this sensor can be called out by the thermal thread
+ G_dimm_overtemp_bitmap.bytes[k] |= (DIMM_SENSOR0 >> l_dimm);
+ // trace first time OT per DIMM DTS sensor
+ if( !L_ot_traced[k][l_dimm] )
+ {
+ TRAC_ERR("amec_update_ocmb_temp_sensors: OCMB[%d] DTS[%d] type[0x%02X] reached error temp[%d]. current[%d]",
+ k,
+ l_dimm,
+ l_fru_type,
+ l_ot_error,
+ l_cur_temp);
+ L_ot_traced[k][l_dimm] = true;
+ }
+ }
+ } // end for each "dimm" thermal sensor
+ } // end for each OCMB
+
sensor_update(&g_amec->proc[0].tempcent,l_hot_mb);
AMEC_DBG("HotMembuf=%d\n",l_hot_mb);
sensor_update(&g_amec->proc[0].tempdimmthrm,l_hot_dimm);
AMEC_DBG("HotDimm=%d\n",l_hot_dimm);
+ sensor_update(&g_amec->proc[0].tempmcdimmthrm,l_hot_mb_dimm);
+ AMEC_DBG("HotMCDimm=%d\n",l_hot_mb_dimm);
+
+ sensor_update(&g_amec->proc[0].temppmicthrm,l_hot_pmic);
+ AMEC_DBG("HotPmic=%d\n",l_hot_pmic);
+
+ sensor_update(&g_amec->proc[0].tempmcextthrm,l_hot_ext_mb);
+ AMEC_DBG("HotExternalMembuf=%d\n",l_hot_ext_mb);
}
@@ -510,24 +573,27 @@ void amec_perfcount_ocmb_getmc( OcmbMemData * i_sensor_cache,
g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memread2ms = tempreg;
- // Go after second MC performance counter (power ups and activations)
- tempu = l_sensor_cache->mba_act;
- templ = l_sensor_cache->mba_powerups;
-
- // ------------------------------------------------------------
- // Sensor: MRDMx (0.01 Mrps) Memory read requests per sec
- // ------------------------------------------------------------
- tempreg = g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memread2ms;
- tempreg += g_amec->proc[0].memctl[i_membuf].centaur.portpair[1].perf.memread2ms;
- sensor_update( (&(g_amec->proc[0].memctl[i_membuf].mrd)), tempreg);
-
- // -------------------------------------------------------------
- // Sensor: MWRMx (0.01 Mrps) Memory write requests per sec
- // -------------------------------------------------------------
- tempreg = g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memwrite2ms;
- tempreg += g_amec->proc[0].memctl[i_membuf].centaur.portpair[1].perf.memwrite2ms;
- sensor_update( (&(g_amec->proc[0].memctl[i_membuf].mwr)), tempreg);
-
+ // Due to limited SRAM only have sensor support for first 12 mem buffs
+ if(i_membuf < 12)
+ {
+ // Go after second MC performance counter (power ups and activations)
+ tempu = l_sensor_cache->mba_act;
+ templ = l_sensor_cache->mba_powerups;
+
+ // ------------------------------------------------------------
+ // Sensor: MRDMx (0.01 Mrps) Memory read requests per sec
+ // ------------------------------------------------------------
+ tempreg = g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memread2ms;
+ tempreg += g_amec->proc[0].memctl[i_membuf].centaur.portpair[1].perf.memread2ms;
+ sensor_update( (&(g_amec->proc[0].memctl[i_membuf].mrd)), tempreg);
+
+ // -------------------------------------------------------------
+ // Sensor: MWRMx (0.01 Mrps) Memory write requests per sec
+ // -------------------------------------------------------------
+ tempreg = g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memwrite2ms;
+ tempreg += g_amec->proc[0].memctl[i_membuf].centaur.portpair[1].perf.memwrite2ms;
+ sensor_update( (&(g_amec->proc[0].memctl[i_membuf].mwr)), tempreg);
+ }
return;
}
diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h
index c19cd53..f17b42b 100755
--- a/src/occ_405/amec/amec_sys.h
+++ b/src/occ_405/amec/amec_sys.h
@@ -146,6 +146,7 @@ typedef struct
amec_cent_mem_speed_t last_mem_speed_sent;
} amec_portpair_t;
+// bit masks for fru_temp_t flags
#define FRU_SENSOR_STATUS_STALLED 0x01
#define FRU_SENSOR_STATUS_ERROR 0x02
#define FRU_SENSOR_STATUS_VALID_OLD 0x04
@@ -154,6 +155,12 @@ typedef struct
#define FRU_TEMP_FAST_CHANGE 0x20
#define FRU_SENSOR_CENT_NEST_FIR6 0x40 //centaur only
+// OpenCAPI memory only bit masks for fru_temp_t dts_type_mask
+#define OCM_DTS_TYPE_DIMM_MASK 0x01
+#define OCM_DTS_TYPE_MEMCTRL_DRAM_MASK 0x02
+#define OCM_DTS_TYPE_PMIC_MASK 0x04
+#define OCM_DTS_TYPE_MEMCTRL_EXT_MASK 0x08
+
typedef struct
{
uint8_t cur_temp;
@@ -161,6 +168,10 @@ typedef struct
uint8_t flags;
// Sensor ID for reporting temperature to BMC and FSP
uint32_t temp_sid;
+ // Indicates what eConfigDataFruType this temperature is for
+ uint8_t temp_fru_type;
+ // Indicates what this temperature is for
+ uint8_t dts_type_mask;
}fru_temp_t;
typedef struct
@@ -191,7 +202,6 @@ typedef struct
// Sensor ID for reporting temperature to BMC and FSP
uint32_t temp_sid;
-
} amec_centaur_t;
typedef struct
@@ -469,8 +479,11 @@ typedef struct
vectorSensor_t util_vector;
// Memory Summary Sensors
- sensor_t tempcent;
- sensor_t tempdimmthrm;
+ sensor_t tempcent; // hottest of all DATA_FRU_CENTAUR monitored by this OCC
+ sensor_t tempdimmthrm; // hottest of all DATA_FRU_DIMM monitored by this OCC
+ sensor_t tempmcdimmthrm; // hottest of all DATA_FRU_MEMCTRL_DRAM monitored by this OCC
+ sensor_t temppmicthrm; // hottest of all DATA_FRU_PMIC monitored by this OCC
+ sensor_t tempmcextthrm; // hottest of all DATA_FRU_MEMCTRL_EXT monitored by this OCC
sensor_t mempwrthrot;
sensor_t memotthrot;
@@ -628,10 +641,16 @@ typedef struct
//---------------------------------------------------------
// Thermal Controller based on processor temperatures
amec_controller_t thermalproc;
- // Thermal Controller based on Centaur temperatures
+ // Thermal Controller based on Centaur (internal mc) temperatures
amec_controller_t thermalcent;
// Thermal Controller based on DIMM temperatures
amec_controller_t thermaldimm;
+ // Thermal Controller based on temperature sensors covering both Memctrl+DIMM
+ amec_controller_t thermalmcdimm;
+ // Thermal Controller based on PMIC temperatures
+ amec_controller_t thermalpmic;
+ // Thermal Controller based on external mem controller temperatures
+ amec_controller_t thermalmcext;
// Thermal Controller based on VRM Vdd temperatures
amec_controller_t thermalvdd;
diff --git a/src/occ_405/cent/centaur_data.c b/src/occ_405/cent/centaur_data.c
index 0e6f17d..3f6f255 100755
--- a/src/occ_405/cent/centaur_data.c
+++ b/src/occ_405/cent/centaur_data.c
@@ -104,8 +104,8 @@ extern gpe_shared_data_t G_shared_gpe_data;
// MemDataMcs mcs; // not used
// MemDataSensorCache scache;
// } MemData;
-//Global array of centaur data buffers
-GPE_BUFFER(CentaurMemData G_centaur_data[NUM_CENTAUR_DATA_BUFF +
+//Global array of centaur data buffers common with OCMB
+GPE_BUFFER(CentaurMemData G_centaur_data[MAX_NUM_MEM_CONTROLLERS +
NUM_CENTAUR_DOUBLE_BUF +
NUM_CENTAUR_DATA_EMPTY_BUF]);
@@ -118,18 +118,19 @@ GPE_BUFFER(MemBufScomParms_t G_cent_scom_gpe_parms);
//scom command list entry
GPE_BUFFER(scomList_t G_cent_scom_list_entry[NUM_CENT_OPS]);
-//buffer for storing output from running IPC_ST_MEMBUF_SCOM()
+//buffer for storing output from running IPC_ST_MEMBUF_SCOM() Centaur only
GPE_BUFFER(uint64_t G_cent_scom_data[MAX_NUM_CENTAURS]) = {0};
// parms for call to IPC_ST_MEMBUF_INIT_FUNCID
GPE_BUFFER(MemBufConfigParms_t G_gpe_centaur_config_args);
GPE_BUFFER(MemBufConfiguration_t G_membufConfiguration);
-//Global array of centaur data pointers
-CentaurMemData * G_centaur_data_ptrs[MAX_NUM_CENTAURS] = { &G_centaur_data[0],
- &G_centaur_data[1], &G_centaur_data[2], &G_centaur_data[3],
- &G_centaur_data[4], &G_centaur_data[5], &G_centaur_data[6],
- &G_centaur_data[7]};
+//Global array of centaur data pointers common with OCMB need to use max mem ctrl to cover max OCMB
+CentaurMemData * G_centaur_data_ptrs[MAX_NUM_MEM_CONTROLLERS] =
+ { &G_centaur_data[0], &G_centaur_data[1], &G_centaur_data[2], &G_centaur_data[3],
+ &G_centaur_data[4], &G_centaur_data[5], &G_centaur_data[6], &G_centaur_data[7],
+ &G_centaur_data[8], &G_centaur_data[9], &G_centaur_data[10], &G_centaur_data[11],
+ &G_centaur_data[12], &G_centaur_data[13], &G_centaur_data[14], &G_centaur_data[15] };
//Global structures for gpe get mem data parms
GPE_BUFFER(MemBufGetMemDataParms_t G_membuf_data_parms);
@@ -146,6 +147,15 @@ membuf_data_task_t G_membuf_data_task = {
.membuf_data_ptr = &G_centaur_data[MAX_NUM_CENTAURS]
};
+//OCMB structures used for task data pointers.
+membuf_data_task_t G_ocmb_data_task = {
+ .start_membuf = 0,
+ .current_membuf = 0,
+ .end_membuf = 15,
+ .prev_membuf = 15,
+ .membuf_data_ptr = &G_centaur_data[MAX_NUM_OCMBS]
+};
+
dimm_sensor_flags_t G_dimm_enabled_sensors = {{0}};
dimm_sensor_flags_t G_dimm_present_sensors = {{0}};
@@ -600,10 +610,18 @@ void centaur_data( void )
membuf_data_task_t * l_centaur_data_ptr = &G_membuf_data_task;
MemBufGetMemDataParms_t * l_parms =
(MemBufGetMemDataParms_t *)(l_centaur_data_ptr->gpe_req.cmd_data);
+ uint8_t l_empty_buf_idx = MAX_NUM_CENTAURS + 1; // array index for empty buffer
static bool L_gpe_scheduled = FALSE;
static bool L_gpe_error_logged = FALSE;
static bool L_gpe_had_1_tick = FALSE;
+ // local inits are for Centaur, need to change some that are different for OCM
+ if(G_sysConfigData.mem_type == MEM_TYPE_OCM)
+ {
+ l_centaur_data_ptr = &G_ocmb_data_task;
+ l_empty_buf_idx = MAX_NUM_OCMBS + 1;
+ }
+
do
{
// ------------------------------------------
@@ -806,7 +824,7 @@ void centaur_data( void )
// (this is very handy for debug...)
if( !CENTAUR_PRESENT(l_centaur_data_ptr->current_membuf))
{
- G_centaur_data_ptrs[l_centaur_data_ptr->current_membuf] = &G_centaur_data[9];
+ G_centaur_data_ptrs[l_centaur_data_ptr->current_membuf] = &G_centaur_data[l_empty_buf_idx];
}
//Update current centaur
@@ -1266,7 +1284,7 @@ void centaur_init( void )
//
// Description: Returns a pointer to the most up-to-date centaur data for
// the centaur associated with the specified OCC centaur id.
-// Returns NULL for centaur ID outside the range of 0 to 7.
+// Returns NULL for mem buf ID outside range.
//
// End Function Specification
CentaurMemData * cent_get_centaur_data_ptr( const uint8_t i_occ_centaur_id )
@@ -1274,14 +1292,21 @@ CentaurMemData * cent_get_centaur_data_ptr( const uint8_t i_occ_centaur_id )
//The caller needs to send in a valid OCC centaur id. Since type is uchar
//so there is no need to check for case less than 0.
//If centaur id is invalid then returns NULL.
- if( i_occ_centaur_id < MAX_NUM_CENTAURS )
+ if( (G_sysConfigData.mem_type == MEM_TYPE_CUMULUS) &&
+ (i_occ_centaur_id < MAX_NUM_CENTAURS) )
+ {
+ //Returns a pointer to the most up-to-date centaur data.
+ return G_centaur_data_ptrs[i_occ_centaur_id];
+ }
+ else if( (G_sysConfigData.mem_type == MEM_TYPE_OCM) &&
+ (i_occ_centaur_id < MAX_NUM_OCMBS) )
{
//Returns a pointer to the most up-to-date centaur data.
return G_centaur_data_ptrs[i_occ_centaur_id];
}
else
{
- //Core id outside the range
+ //Mem buf id outside the range
TRAC_ERR("cent_get_centaur_data_ptr: Invalid OCC centaur id [0x%x]", i_occ_centaur_id);
return( NULL );
}
diff --git a/src/occ_405/cent/centaur_data.h b/src/occ_405/cent/centaur_data.h
index b5177b1..d6c2cea 100755
--- a/src/occ_405/cent/centaur_data.h
+++ b/src/occ_405/cent/centaur_data.h
@@ -121,6 +121,7 @@ typedef struct membuf_data_task membuf_data_task_t;
//Global centaur structures used for task data pointers
extern membuf_data_task_t G_membuf_data_task;
+extern membuf_data_task_t G_ocmb_data_task;
//Global is bitmask of centaurs
extern uint32_t G_present_centaurs;
diff --git a/src/occ_405/cent/ocmb_data.c b/src/occ_405/cent/ocmb_data.c
index fc3f8fa..e78e66d 100644
--- a/src/occ_405/cent/ocmb_data.c
+++ b/src/occ_405/cent/ocmb_data.c
@@ -42,7 +42,7 @@ extern gpe_shared_data_t G_shared_gpe_data;
* @post G_membufConfiguration populated
* @post G_present_centaurs populated
* @post G_dimm_present_sensors
- * @post G_membuf_data_task populated
+ * @post G_ocmb_data_task populated
* @post GPE request to call for recover created ?
* @post GPE request to call for throttle conttrol created
* @note HW Deadman timer enabled and set to max value
@@ -109,7 +109,7 @@ void ocmb_init(void)
G_membuf_data_parms.data = 0;
rc = gpe_request_create(
- &G_membuf_data_task.gpe_req, //gpe_req for the task
+ &G_ocmb_data_task.gpe_req, //gpe_req for the task
&G_async_gpe_queue1, //queue
IPC_ST_MEMBUF_DATA_FUNCID, //Function ID
&G_membuf_data_parms, //parm for the task
@@ -120,7 +120,7 @@ void ocmb_init(void)
if( rc )
{
TRAC_ERR("ocmb_init: gpe_request_create failed for "
- "G_membuf_data_task.gpe_req. rc = 0x%08x", rc);
+ "G_ocmb_data_task.gpe_req. rc = 0x%08x", rc);
break;
}
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds.c b/src/occ_405/cmdh/cmdh_fsp_cmds.c
index 493d7c5..e8ee2d6 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds.c
@@ -205,8 +205,11 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
}
//If memory is being throttled due to OverTemp or due to Failure to read sensors set mthrot_due_to_ot bit.
- if (((g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_DIMM) ||
- (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_CENT)))
+ if ( (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_DIMM) ||
+ (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_CENT) ||
+ (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_MCDIMM) ||
+ (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_PMIC) ||
+ (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_MC_EXT) )
{
l_poll_rsp->ext_status.mthrot_due_to_ot = 1;
}
@@ -384,7 +387,7 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
//Add entry for centaurs.
uint32_t l_temp_sid = g_amec->proc[0].memctl[l_cent].centaur.temp_sid;
l_tempSensorList[l_sensorHeader.count].id = l_temp_sid;
- l_tempSensorList[l_sensorHeader.count].fru_type = DATA_FRU_CENTAUR;
+ l_tempSensorList[l_sensorHeader.count].fru_type = g_amec->proc[0].memctl[l_cent].centaur.centaur_hottest.temp_fru_type;
if (G_cent_timeout_logged_bitmap & (CENTAUR0_PRESENT_MASK >> l_cent))
{
l_tempSensorList[l_sensorHeader.count].value = 0xFF;
@@ -414,7 +417,7 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
if (l_temp_sid != 0)
{
l_tempSensorList[l_sensorHeader.count].id = l_temp_sid;
- l_tempSensorList[l_sensorHeader.count].fru_type = DATA_FRU_DIMM;
+ l_tempSensorList[l_sensorHeader.count].fru_type = g_amec->proc[0].memctl[l_cent].centaur.dimm_temps[l_dimm].temp_fru_type;
//If a dimm timed out long enough, we should return 0xFFFF for that sensor.
if (G_dimm_temp_expired_bitmap.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm))
{
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
index 0ca1e04..e976309 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
@@ -2104,13 +2104,6 @@ errlHndl_t data_store_thrm_thresholds(const cmdh_fsp_cmd_t * i_cmd_ptr,
G_data_cnfg->thrm_thresh.data[l_frutype].max_read_timeout =
l_cmd_ptr->data[i].max_read_timeout;
- // VRM OT status is no longer supported since the OCC supports reading Vdd temperature
- // Trace if VRM OT status FRU type is received and just ignore it
- if(l_frutype == DATA_FRU_VRM_OT_STATUS)
- {
- CMDH_TRAC_IMP("data_store_thrm_thresholds: Received deprecated VRM OT STATUS type will be ignored");
- }
-
// Useful trace for debugging
//CMDH_TRAC_INFO("data_store_thrm_thresholds: FRU_type[0x%.2X] T_control[%u] DVFS[%u] Error[%u]",
// G_data_cnfg->thrm_thresh.data[l_frutype].fru_type,
@@ -2352,12 +2345,15 @@ errlHndl_t data_store_mem_cfg(const cmdh_fsp_cmd_t * i_cmd_ptr,
if (IS_OCM_MEM_TYPE(l_data_set->memory_type))
{
- // Get the physical location from type
- l_membuf_num &= OCMB_TYPE_LOCATION_MASK;
if (G_sysConfigData.mem_type != MEM_TYPE_OCM)
{
l_type_mismatch = TRUE;
}
+ else
+ {
+ // Get the physical location from type
+ l_membuf_num &= OCMB_TYPE_LOCATION_MASK;
+ }
}
else if (G_sysConfigData.mem_type != MEM_TYPE_CUMULUS)
{
@@ -2383,13 +2379,34 @@ errlHndl_t data_store_mem_cfg(const cmdh_fsp_cmd_t * i_cmd_ptr,
// Store the temperature sensor ID
g_amec->proc[0].memctl[l_membuf_num].centaur.temp_sid = l_data_set->temp_sensor_id;
- if (G_sysConfigData.mem_type == MEM_TYPE_OCM)
+ // Specific handling for OCMB vs Centaur
+ if(G_sysConfigData.mem_type == MEM_TYPE_OCM)
{
- // Both OCMB and Centaur code use this global to idicate which MBs
- // are present, but Centaur sets this up later in centaur_init()
- G_present_centaurs |= (CENTAUR0_PRESENT_MASK >> l_membuf_num);
+ // Both OCMB and Centaur code use this global to idicate which MBs
+ // are present, but Centaur sets this up later in centaur_init()
+ G_present_centaurs |= (CENTAUR0_PRESENT_MASK >> l_membuf_num);
+
+ // Store the temperature sensor fru type
+ // The internal sensor is either for internal memctrl ("centaur" fru type)
+ // or it is not being used due to hw bug
+ if( (l_data_set->dimm_info2 == DATA_FRU_CENTAUR) ||
+ (l_data_set->dimm_info2 == DATA_FRU_NOT_USED) )
+ {
+ g_amec->proc[0].memctl[l_membuf_num].centaur.centaur_hottest.temp_fru_type = l_data_set->dimm_info2;
+ }
+ else
+ {
+ // not a valid fru type for the internal sensor, trace and don't use it
+ CMDH_TRAC_ERR("data_store_mem_cfg: Got invalid fru type[0x%02X] for mem buf[%d]",
+ l_data_set->dimm_info2, l_membuf_num);
+ g_amec->proc[0].memctl[l_membuf_num].centaur.centaur_hottest.temp_fru_type = DATA_FRU_NOT_USED;
+ }
+ }
+ else // centaur
+ {
+ // must be type centaur
+ g_amec->proc[0].memctl[l_membuf_num].centaur.centaur_hottest.temp_fru_type = DATA_FRU_CENTAUR;
}
-
l_num_mem_bufs++;
}
else // individual DIMM
@@ -2401,6 +2418,48 @@ errlHndl_t data_store_mem_cfg(const cmdh_fsp_cmd_t * i_cmd_ptr,
g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_sid =
l_data_set->temp_sensor_id;
+ // Store the temperature sensor fru type
+ if(G_sysConfigData.mem_type == MEM_TYPE_OCM)
+ {
+ // The 2 external temp sensors may be used for non-dimm fru type i.e. PMIC, mem controller...
+ // this fru type is coming from attributes setup by HWP during IPL and then read by (H)TMGT
+ if(l_data_set->dimm_info2 == DATA_FRU_DIMM)
+ {
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_DIMM;
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = OCM_DTS_TYPE_DIMM_MASK;
+ }
+ else if(l_data_set->dimm_info2 == DATA_FRU_MEMCTRL_DRAM)
+ {
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_MEMCTRL_DRAM;
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = OCM_DTS_TYPE_MEMCTRL_DRAM_MASK;
+ }
+ else if(l_data_set->dimm_info2 == DATA_FRU_PMIC)
+ {
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_PMIC;
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = OCM_DTS_TYPE_PMIC_MASK;
+ }
+ else if(l_data_set->dimm_info2 == DATA_FRU_MEMCTRL_EXT)
+ {
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_MEMCTRL_EXT;
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = OCM_DTS_TYPE_MEMCTRL_EXT_MASK;
+ }
+ else // sensor not used
+ {
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_NOT_USED;
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = 0;
+ if (l_data_set->dimm_info2 != DATA_FRU_NOT_USED)
+ {
+ // not a valid fru type
+ CMDH_TRAC_ERR("data_store_mem_cfg: Got invalid fru type[0x%02X] for mem buf[%d] dimm[%d]",
+ l_data_set->dimm_info2, l_membuf_num, l_dimm_num);
+ }
+ }
+ }
+ else // centaur
+ {
+ // must be type DIMM
+ g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_DIMM;
+ }
l_num_dimms++;
}
} // end CENTAUR/OCMB
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h
index e37e114..ae76cc7 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h
@@ -77,15 +77,21 @@ typedef enum
typedef enum
{
DATA_FRU_PROC = 0x00,
- DATA_FRU_CENTAUR = 0x01,
+ DATA_FRU_CENTAUR = 0x01, // memory controller (Centaur/Explorer) sensor location is internal
DATA_FRU_DIMM = 0x02,
- DATA_FRU_VRM_OT_STATUS = 0x03, // this is just for the bit and is no longer being supported
+ DATA_FRU_MEMCTRL_DRAM = 0x03, // OCM only. external sensor covers both mem controller and DRAM
DATA_FRU_GPU = 0x04,
DATA_FRU_GPU_MEM = 0x05,
DATA_FRU_VRM_VDD = 0x06, // this is an actual temperature reading for VRM Vdd
+ DATA_FRU_PMIC = 0x07, // OCM only
+ DATA_FRU_MEMCTRL_EXT = 0x08, // OCM only. memory controller sensor location is external
DATA_FRU_MAX,
} eConfigDataFruType;
+// For OCM the mapping of sensors to fru type comes in mem config data from (H)TMGT and some may not be used
+// OCC should ignore readings from all sesnors marked as not used
+#define DATA_FRU_NOT_USED 0xFF
+
typedef enum
{
PWR_READING_TYPE_APSS = 0x00,
diff --git a/src/occ_405/occbuildname.c b/src/occ_405/occbuildname.c
index 54dcc23..74bd96e 100755
--- a/src/occ_405/occbuildname.c
+++ b/src/occ_405/occbuildname.c
@@ -34,6 +34,6 @@ volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) =
#else
-volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /*<BuildName>*/ "op_occ_190723b\0" /*</BuildName>*/ ;
+volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /*<BuildName>*/ "op_occ_190726a\0" /*</BuildName>*/ ;
#endif
diff --git a/src/occ_405/sensor/sensor_enum.h b/src/occ_405/sensor/sensor_enum.h
index e56fea2..0133fec 100755
--- a/src/occ_405/sensor/sensor_enum.h
+++ b/src/occ_405/sensor/sensor_enum.h
@@ -511,6 +511,9 @@ enum e_gsid
// ------------------------------------------------------
TEMPCENT,
TEMPDIMMTHRM,
+ TEMPMCDIMMTHRM,
+ TEMPPMICTHRM,
+ TEMPMCEXTTHRM,
// ------------------------------------------------------
// GPU Sensors
diff --git a/src/occ_405/sensor/sensor_info.c b/src/occ_405/sensor/sensor_info.c
index e57e4e4..a9bc7c8 100755
--- a/src/occ_405/sensor/sensor_info.c
+++ b/src/occ_405/sensor/sensor_info.c
@@ -336,6 +336,9 @@ const sensor_info_t G_sensor_info[] =
/* ==MemSummarySensors== NameString Units Type Location Number Freq ScaleFactor */
SENSOR_INFO_T_ENTRY( TEMPCENT, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_8TH_TICK_HZ, AMEFP( 1, 0) ),
SENSOR_INFO_T_ENTRY( TEMPDIMMTHRM, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_128TH_TICK_HZ, AMEFP( 1, 0) ),
+ SENSOR_INFO_T_ENTRY( TEMPMCDIMMTHRM, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_128TH_TICK_HZ, AMEFP( 1, 0) ),
+ SENSOR_INFO_T_ENTRY( TEMPPMICTHRM, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_128TH_TICK_HZ, AMEFP( 1, 0) ),
+ SENSOR_INFO_T_ENTRY( TEMPMCEXTTHRM, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_128TH_TICK_HZ, AMEFP( 1, 0) ),
/* ==GPUSensors== NameString Units Type Location Number Freq ScaleFactor */
SENSOR_INFO_T_ENTRY( TEMPGPU0, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_GPU, AMEC_SENSOR_NONUM, AMEEFP_1S_IN_HZ, AMEFP( 1, 0) ),
diff --git a/src/occ_405/sensor/sensor_table.c b/src/occ_405/sensor/sensor_table.c
index fe66b16..6237c5f 100755
--- a/src/occ_405/sensor/sensor_table.c
+++ b/src/occ_405/sensor/sensor_table.c
@@ -385,6 +385,9 @@ const sensor_ptr_t G_amec_sensor_list[] =
SENSOR_PTR(TEMPCENT, &g_amec_sys.proc[0].tempcent),
SENSOR_PTR(TEMPDIMMTHRM, &g_amec_sys.proc[0].tempdimmthrm),
+ SENSOR_PTR(TEMPMCDIMMTHRM, &g_amec_sys.proc[0].tempmcdimmthrm),
+ SENSOR_PTR(TEMPPMICTHRM, &g_amec_sys.proc[0].temppmicthrm),
+ SENSOR_PTR(TEMPMCEXTTHRM, &g_amec_sys.proc[0].tempmcextthrm),
// ------------------------------------------------------
// GPU Sensors
@@ -558,6 +561,9 @@ const minisensor_ptr_t G_amec_mini_sensor_list[] INIT_SECTION =
MINI_SENSOR_PTR( TEMPCENT, &G_dcom_slv_outbox_tx.tempcent),
MINI_SENSOR_PTR( TEMPDIMMTHRM, &G_dcom_slv_outbox_tx.tempdimmthrm),
+ MINI_SENSOR_PTR( TEMPMCDIMMTHRM, NULL), // $todo RTC: 213569 add 3 new summary sensors to call home
+ MINI_SENSOR_PTR( TEMPPMICTHRM, NULL),
+ MINI_SENSOR_PTR( TEMPMCEXTTHRM, NULL),
// ------------------------------------------------------
// GPU Sensors
OpenPOWER on IntegriCloud