summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/occ_405/amec/amec_controller.c82
-rw-r--r--src/occ_405/amec/amec_controller.h10
-rwxr-xr-xsrc/occ_405/amec/amec_freq.c56
-rw-r--r--src/occ_405/amec/amec_freq.h7
-rw-r--r--src/occ_405/amec/amec_init.c7
-rwxr-xr-xsrc/occ_405/amec/amec_master_smh.c39
-rwxr-xr-xsrc/occ_405/amec/amec_slave_smh.c3
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds.c22
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds.h16
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c11
-rwxr-xr-xsrc/occ_405/occ_sys_config.h11
11 files changed, 224 insertions, 40 deletions
diff --git a/src/occ_405/amec/amec_controller.c b/src/occ_405/amec/amec_controller.c
index 2154d07..f310208 100644
--- a/src/occ_405/amec/amec_controller.c
+++ b/src/occ_405/amec/amec_controller.c
@@ -156,6 +156,88 @@ void amec_controller_proc_thermal()
}
}
+// Function Specification
+//
+// Name: amec_controller_vrm_vdd_thermal
+//
+// Description: This function implements the Proportional Controller for the
+// VRM Vdd thermal control. Although it doesn't return any
+// results, it populates the thermal vote in the field
+// g_amec->thermalvdd.speed_request.
+//
+// Task Flags:
+//
+// End Function Specification
+void amec_controller_vrm_vdd_thermal()
+{
+ /*------------------------------------------------------------------------*/
+ /* Local Variables */
+ /*------------------------------------------------------------------------*/
+ uint16_t l_vdd_temp_tenthsC = 0;
+ uint16_t l_residue = 0;
+ uint16_t l_old_residue = 0;
+ int16_t l_thermal_diff = 0;
+ int16_t l_cpu_speed = 0;
+ int16_t l_throttle_chg = 0;
+ int32_t l_throttle = 0;
+ sensor_t * l_sensor = NULL;
+
+ /*------------------------------------------------------------------------*/
+ /* Code */
+ /*------------------------------------------------------------------------*/
+ // Get VRM Vdd temperature sensor
+ l_sensor = getSensorByGsid(TEMPVDD);
+
+ // Convert current Vdd temperature to 0.1 degrees C
+ l_vdd_temp_tenthsC = l_sensor->sample * 10;
+
+ // Calculate the temperature difference from the DVFS setpoint
+ l_thermal_diff = g_amec->thermalvdd.setpoint - l_vdd_temp_tenthsC;
+
+ // Proportional Controller for the thermal control loop
+ l_throttle = (int32_t) l_thermal_diff * g_amec->thermalvdd.Pgain;
+ l_residue = (uint16_t) l_throttle;
+ l_throttle_chg = (int16_t) (l_throttle >> 16);
+
+ // don't allow a throttle change more than step limit
+ if ((int16_t) l_throttle_chg > (int16_t) g_amec->sys.speed_step_limit)
+ {
+ l_throttle_chg = g_amec->sys.speed_step_limit;
+ }
+ else
+ {
+ if ((int16_t) l_throttle_chg < ((int16_t) (-g_amec->sys.speed_step_limit)))
+ {
+ l_throttle_chg = (int16_t)(-g_amec->sys.speed_step_limit);
+ }
+ }
+
+ // Calculate the new thermal CPU speed request
+ l_cpu_speed = g_amec->thermalvdd.speed_request +
+ (int16_t)(l_throttle_chg * g_amec->sys.speed_step);
+
+ // Proceed with residue summation to correctly follow set-point
+ l_old_residue = g_amec->thermalvdd.total_res;
+ g_amec->thermalvdd.total_res += l_residue;
+ if (g_amec->thermalvdd.total_res < l_old_residue)
+ {
+ l_cpu_speed += g_amec->sys.speed_step;
+ }
+
+ // Enforce actuator saturation limits
+ if (l_cpu_speed > g_amec->sys.max_speed)
+ l_cpu_speed = g_amec->sys.max_speed;
+ if (l_cpu_speed < g_amec->sys.min_speed)
+ l_cpu_speed = g_amec->sys.min_speed;
+
+ // Generate the new thermal speed request
+ g_amec->thermalvdd.speed_request = l_cpu_speed;
+ // Calculate frequency request based on thermal speed request
+ g_amec->thermalvdd.freq_request = amec_controller_speed2freq(
+ g_amec->thermalvdd.speed_request,
+ g_amec->sys.fmax);
+}
+
//*************************************************************************
// Function Specification
//
diff --git a/src/occ_405/amec/amec_controller.h b/src/occ_405/amec/amec_controller.h
index 76e70ef..29a8061 100644
--- a/src/occ_405/amec/amec_controller.h
+++ b/src/occ_405/amec/amec_controller.h
@@ -91,6 +91,16 @@ typedef struct amec_controller
void amec_controller_proc_thermal();
/**
+ * Thermal Control Loop based on VRM Vdd temperature
+ *
+ * This function implements the Proportional Controller for the VRM Vdd thermal
+ * control loop. Although it doesn't return any results, it populates the
+ * thermal vote in the field g_amec->thermalvdd.speed_request.
+ *
+ */
+void amec_controller_vrm_vdd_thermal();
+
+/**
* Thermal Control Loop based on Centaur temperatures.
*
* This function implements a Proportional Controller for the
diff --git a/src/occ_405/amec/amec_freq.c b/src/occ_405/amec/amec_freq.c
index 03bc22c..9150328 100755
--- a/src/occ_405/amec/amec_freq.c
+++ b/src/occ_405/amec/amec_freq.c
@@ -282,6 +282,9 @@ void amec_slv_proc_voting_box(void)
amec_proc_voting_reason_t l_kvm_throt_reason = NO_THROTTLE;
amec_part_t *l_part = NULL;
+ // frequency threshold for reporting throttling
+ uint16_t l_report_throttle_freq = G_sysConfigData.system_type.report_dvfs_nom ? G_sysConfigData.sys_mode_freq.table[OCC_MODE_NOMINAL] : G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO];
+
/*------------------------------------------------------------------------*/
/* Code */
/*------------------------------------------------------------------------*/
@@ -307,9 +310,9 @@ void amec_slv_proc_voting_box(void)
l_chip_fmax = g_amec->proc[0].pwr_votes.ppb_fmax;
l_chip_reason = AMEC_VOTING_REASON_PPB;
- if( G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO] < l_chip_fmax)
+ if(l_report_throttle_freq <= l_chip_fmax)
{
- l_kvm_throt_reason = PCAP_EXCEED_PTURBO;
+ l_kvm_throt_reason = PCAP_EXCEED_REPORT;
}
else
{
@@ -340,9 +343,9 @@ void amec_slv_proc_voting_box(void)
l_chip_fmax = g_amec->thermalproc.freq_request;
l_chip_reason = AMEC_VOTING_REASON_PROC_THRM;
- if( G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO] < l_chip_fmax)
+ if( l_report_throttle_freq <= l_chip_fmax)
{
- l_kvm_throt_reason = PROC_OVERTEMP_EXCEED_PTURBO;
+ l_kvm_throt_reason = PROC_OVERTEMP_EXCEED_REPORT;
}
else
{
@@ -350,15 +353,31 @@ void amec_slv_proc_voting_box(void)
}
}
+ //Thermal controller input based on VRM Vdd temperature
+ if(g_amec->thermalvdd.freq_request < l_chip_fmax)
+ {
+ l_chip_fmax = g_amec->thermalvdd.freq_request;
+ l_chip_reason = AMEC_VOTING_REASON_VDD_THRM;
+
+ if( l_report_throttle_freq <= l_chip_fmax)
+ {
+ l_kvm_throt_reason = VDD_OVERTEMP_EXCEED_REPORT;
+ }
+ else
+ {
+ l_kvm_throt_reason = VDD_OVERTEMP;
+ }
+ }
+
// Controller request based on VRHOT signal from processor regulator
if(g_amec->vrhotproc.freq_request < l_chip_fmax)
{
l_chip_fmax = g_amec->vrhotproc.freq_request;
l_chip_reason = AMEC_VOTING_REASON_VRHOT_THRM;
- if(G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO] < l_chip_fmax)
+ if(l_report_throttle_freq <= l_chip_fmax)
{
- l_kvm_throt_reason = PROC_OVERTEMP_EXCEED_PTURBO;
+ l_kvm_throt_reason = PROC_OVERTEMP_EXCEED_REPORT;
}
else
{
@@ -393,6 +412,7 @@ void amec_slv_proc_voting_box(void)
// Before enforcing a soft Fmin, make sure we don't
// have a thermal or power emergency
if(!(l_chip_reason & (AMEC_VOTING_REASON_PROC_THRM |
+ AMEC_VOTING_REASON_VDD_THRM |
AMEC_VOTING_REASON_VRHOT_THRM |
AMEC_VOTING_REASON_PPB |
AMEC_VOTING_REASON_PMAX |
@@ -428,9 +448,9 @@ void amec_slv_proc_voting_box(void)
l_core_freq = g_amec->proc[0].pwr_votes.proc_pcap_vote;
l_core_reason = AMEC_VOTING_REASON_PWR;
- if(G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO] < l_core_freq)
+ if(l_report_throttle_freq <= l_core_freq)
{
- l_kvm_throt_reason = PCAP_EXCEED_PTURBO;
+ l_kvm_throt_reason = PCAP_EXCEED_REPORT;
}
else
{
@@ -544,19 +564,17 @@ void amec_slv_proc_voting_box(void)
}
}//End of for loop
- //convert POWERCAP reason to POWER_SUPPLY_FAILURE if ovs is asserted
- if((l_kvm_throt_reason == POWERCAP) && AMEC_INTF_GET_OVERSUBSCRIPTION())
+ //check if there was a throttle reason change
+ if(l_kvm_throt_reason != G_amec_opal_proc_throt_reason)
{
- l_kvm_throt_reason = POWER_SUPPLY_FAILURE;
- }
-
- //check if we need to update the throttle reason in homer
- if(G_sysConfigData.system_type.kvm &&
- (l_kvm_throt_reason != G_amec_opal_proc_throt_reason))
- {
- //Notify dcom thread to update the table
+ //Always update G_amec_opal_proc_throt_reason, this is used to set poll rsp bits for all system types
G_amec_opal_proc_throt_reason = l_kvm_throt_reason;
- ssx_semaphore_post(&G_dcomThreadWakeupSem);
+
+ // Only if running OPAL need to notify dcom thread to update the table in HOMER for OPAL
+ if(G_sysConfigData.system_type.kvm)
+ {
+ ssx_semaphore_post(&G_dcomThreadWakeupSem);
+ }
}
}
diff --git a/src/occ_405/amec/amec_freq.h b/src/occ_405/amec/amec_freq.h
index d2b9c90..2cf3c85 100644
--- a/src/occ_405/amec/amec_freq.h
+++ b/src/occ_405/amec/amec_freq.h
@@ -81,6 +81,7 @@ typedef enum
AMEC_VOTING_REASON_OVERRIDE_CORE = 0x00020000,
AMEC_VOTING_REASON_IPS = 0x00040000,
AMEC_VOTING_REASON_APSS_PMAX = 0x00080000,
+ AMEC_VOTING_REASON_VDD_THRM = 0x00100000,
}amec_freq_voting_reason_t;
@@ -116,8 +117,10 @@ typedef enum {
POWER_SUPPLY_FAILURE = 0x03,
OVERCURRENT = 0x04,
OCC_RESET = 0x05,
- PCAP_EXCEED_PTURBO = 0x06,
- PROC_OVERTEMP_EXCEED_PTURBO = 0x07,
+ PCAP_EXCEED_REPORT = 0x06,
+ PROC_OVERTEMP_EXCEED_REPORT = 0x07,
+ VDD_OVERTEMP = 0x08,
+ VDD_OVERTEMP_EXCEED_REPORT = 0x09,
MANUFACTURING_OVERRIDE = 0xAA,
}amec_proc_voting_reason_t;
diff --git a/src/occ_405/amec/amec_init.c b/src/occ_405/amec/amec_init.c
index 5ca4e66..4ba89fc 100644
--- a/src/occ_405/amec/amec_init.c
+++ b/src/occ_405/amec/amec_init.c
@@ -250,6 +250,13 @@ void amec_init_gamec_struct(void)
g_amec->thermalproc.freq_request = -1; //unconstrained frequency vote
g_amec->thermalproc.total_res = 0;
+ // Initialize thermal controller for VRM Vdd
+ g_amec->thermalvdd.setpoint = 850; // 850 = 85.0 C
+ g_amec->thermalvdd.Pgain = 1000;
+ g_amec->thermalvdd.speed_request = 1000;
+ g_amec->thermalvdd.freq_request = -1; //unconstrained frequency vote
+ g_amec->thermalvdd.total_res = 0;
+
// Initialize thermal controller based on DIMM temperatures
g_amec->thermaldimm.setpoint = 850; //In 0.1 degrees C -> 850 = 85.0 C
g_amec->thermaldimm.Pgain = 30000;
diff --git a/src/occ_405/amec/amec_master_smh.c b/src/occ_405/amec/amec_master_smh.c
index ce84749..ab2a3af 100755
--- a/src/occ_405/amec/amec_master_smh.c
+++ b/src/occ_405/amec/amec_master_smh.c
@@ -59,8 +59,8 @@
//Power cap failure threshold with no GPUs set to 32 ticks
#define PCAP_FAILURE_THRESHOLD 32
-//Power cap failure threshold with GPUs set to number of ticks for 100ms
-#define PCAP_GPU_FAILURE_THRESHOLD (100000 / MICS_PER_TICK)
+//Power cap failure threshold with GPUs set to number of ticks for 2s
+#define PCAP_GPU_FAILURE_THRESHOLD (2000000 / MICS_PER_TICK)
//*************************************************************************/
// Structures
@@ -398,6 +398,9 @@ void amec_mst_check_under_pcap(void)
errlHndl_t l_err = NULL;
uint8_t i = 0;
uint8_t l_apss_func_id = 0;
+ uint32_t l_trace[MAX_APSS_ADC_CHANNELS] = {0}; // used to trace per channel data
+ uint8_t l_trace_idx = 0;
+
/*------------------------------------------------------------------------*/
/* Code */
@@ -424,6 +427,7 @@ void amec_mst_check_under_pcap(void)
AMECSENSOR_PTR(PWRSYS)->sample);
// Trace power per APSS channel to have the best breakdown for debug
+ // compress traces to 4 max to save space on OP systems
for (i = 0; i < MAX_APSS_ADC_CHANNELS; i++)
{
l_apss_func_id = G_apss_ch_to_function[i];
@@ -433,10 +437,37 @@ void amec_mst_check_under_pcap(void)
(l_apss_func_id != ADC_GND_REMOTE_SENSE) &&
(l_apss_func_id != ADC_12V_STANDBY_CURRENT) )
{
- TRAC_ERR("APSS channel %d Function ID = %d Power = %dW", i, l_apss_func_id,
- AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample);
+ l_trace[l_trace_idx] = (i << 24) | (l_apss_func_id << 16) | (AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample);
+ l_trace_idx++;
}
}
+ while(l_trace_idx != 0)
+ {
+ if(l_trace_idx >=4)
+ {
+ TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X], [%08X]",
+ l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3], l_trace[l_trace_idx-4]);
+ l_trace_idx -= 4;
+ }
+ else if(l_trace_idx == 3)
+ {
+ TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X]",
+ l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3]);
+ l_trace_idx = 0;
+ }
+ else if(l_trace_idx == 2)
+ {
+ TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X]",
+ l_trace[l_trace_idx-1], l_trace[l_trace_idx-2]);
+ l_trace_idx = 0;
+ }
+ else // l_trace_idx == 1
+ {
+ TRAC_ERR("APSS channel/FuncID/Power: [%08X]",
+ l_trace[l_trace_idx-1]);
+ l_trace_idx = 0;
+ }
+ }
/* @
* @errortype
diff --git a/src/occ_405/amec/amec_slave_smh.c b/src/occ_405/amec/amec_slave_smh.c
index 9142caa..4750336 100755
--- a/src/occ_405/amec/amec_slave_smh.c
+++ b/src/occ_405/amec/amec_slave_smh.c
@@ -547,6 +547,9 @@ void amec_slv_state_2(void)
//-------------------------------------------------------
amec_update_centaur_sensors(CENTAUR_2);
*/
+
+ // Call VRM Vdd thermal controller
+ amec_controller_vrm_vdd_thermal();
}
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds.c b/src/occ_405/cmdh/cmdh_fsp_cmds.c
index 86ee360..5f27ba8 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds.c
@@ -60,6 +60,7 @@ extern bool G_vrm_vdd_temp_expired;
#include <gpe_export.h>
extern gpe_shared_data_t G_shared_gpe_data;
+extern opal_proc_voting_reason_t G_amec_opal_proc_throt_reason;
// This table contains tunable parameter information that can be exposed to
// customers (only Master OCC should access/control this table)
@@ -170,12 +171,29 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
uint32_t l_freq_reason = g_amec->proc[0].core[k].f_reason;
if ( l_freq_reason & (AMEC_VOTING_REASON_PROC_THRM | AMEC_VOTING_REASON_VRHOT_THRM) )
{
- l_poll_rsp->ext_status.dvfs_due_to_ot = 1;
+ // only set DVFS bit if throttling below frequency to report throttling
+ if(G_amec_opal_proc_throt_reason == CPU_OVERTEMP)
+ {
+ l_poll_rsp->ext_status.dvfs_due_to_ot = 1;
+ }
+ }
+
+ if ( l_freq_reason & AMEC_VOTING_REASON_VDD_THRM )
+ {
+ // only set DVFS bit if throttling below frequency to report throttling
+ if(G_amec_opal_proc_throt_reason == VDD_OVERTEMP)
+ {
+ l_poll_rsp->ext_status.dvfs_due_to_vdd_ot = 1;
+ }
}
if ( l_freq_reason & (AMEC_VOTING_REASON_PPB | AMEC_VOTING_REASON_PMAX | AMEC_VOTING_REASON_PWR) )
{
- l_poll_rsp->ext_status.dvfs_due_to_pwr = 1;
+ // only set DVFS bit if throttling below frequency to report throttling
+ if(G_amec_opal_proc_throt_reason == POWERCAP)
+ {
+ l_poll_rsp->ext_status.dvfs_due_to_pwr = 1;
+ }
}
}
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds.h b/src/occ_405/cmdh/cmdh_fsp_cmds.h
index 9dda8dc..a6c7083 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds.h
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds.h
@@ -116,14 +116,14 @@ typedef struct __attribute__ ((packed)) cmdh_poll_resp_v20
{
struct
{
- uint8_t dvfs_due_to_ot : 1; // 1 => OCC clipped max Pstate due to an over temp.
- uint8_t dvfs_due_to_pwr : 1; // 1 => OCC clipped max Psate due to reaching pcap limit.
- uint8_t mthrot_due_to_ot: 1; // 1 => OCC throttled memory due to an over temp.
- uint8_t n_power : 1; // 1 => Server running without redundant power.
- uint8_t _reserved_3 : 1;
- uint8_t sync_request : 1; // 1 => OCC needs to restart snapshot buffers
- uint8_t _reserved_1 : 1;
- uint8_t _reserved_0 : 1;
+ uint8_t dvfs_due_to_ot : 1; // 1 => OCC clipped max Pstate due to a processor over temp.
+ uint8_t dvfs_due_to_pwr : 1; // 1 => OCC clipped max Psate due to reaching pcap limit.
+ uint8_t mthrot_due_to_ot : 1; // 1 => OCC throttled memory due to an over temp.
+ uint8_t n_power : 1; // 1 => Server running without redundant power.
+ uint8_t dvfs_due_to_vdd_ot : 1; // 1 => OCC clipped max Pstate due to VRM Vdd over temp.
+ uint8_t sync_request : 1; // 1 => OCC needs to restart snapshot buffers
+ uint8_t _reserved_1 : 1;
+ uint8_t _reserved_0 : 1;
};
uint8_t word;
} ext_status;
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
index 52cb637..e60f3c7 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
@@ -1849,6 +1849,17 @@ errlHndl_t data_store_thrm_thresholds(const cmdh_fsp_cmd_t * i_cmd_ptr,
break;
}
+ // clear all FRU types to 0xFF (not defined) to prevent errors when (H)TMGT doesn't send thresholds for a FRU type
+ for(i=0; i<DATA_FRU_MAX; i++)
+ {
+ G_data_cnfg->thrm_thresh.data[i].fru_type = i;
+ G_data_cnfg->thrm_thresh.data[i].dvfs = 0xFF;
+ G_data_cnfg->thrm_thresh.data[i].error = 0xFF;
+ G_data_cnfg->thrm_thresh.data[i].pm_dvfs = 0xFF;
+ G_data_cnfg->thrm_thresh.data[i].pm_error = 0xFF;
+ G_data_cnfg->thrm_thresh.data[i].max_read_timeout = 0xFF;
+ }
+
// Store the base data
G_data_cnfg->thrm_thresh.version = l_cmd_ptr->version;
G_data_cnfg->thrm_thresh.proc_core_weight = l_cmd_ptr->proc_core_weight;
diff --git a/src/occ_405/occ_sys_config.h b/src/occ_405/occ_sys_config.h
index f393256..6cfe2b1 100755
--- a/src/occ_405/occ_sys_config.h
+++ b/src/occ_405/occ_sys_config.h
@@ -74,10 +74,11 @@ typedef union
{
struct
{
- uint8_t kvm: 1;
- uint8_t reserved: 5;
- uint8_t ite: 1;
- uint8_t single: 1;
+ uint8_t kvm: 1;
+ uint8_t reserved: 3;
+ uint8_t report_dvfs_nom: 1;
+ uint8_t reserved_2: 2;
+ uint8_t single: 1;
};
uint8_t byte;
} eSystemType;
@@ -332,7 +333,7 @@ typedef struct
// Instead of system-type, lets try to send all system attributes
// that matter instead of having tables in OCC code.
- eSystemType system_type; // OCC usage of this byte is TBD
+ eSystemType system_type;
// Processor HUID - HUID for this OCC processor, used by OCC for processor error call out
uint32_t proc_huid;
OpenPOWER on IntegriCloud