diff options
-rw-r--r-- | src/occ_405/amec/amec_controller.c | 82 | ||||
-rw-r--r-- | src/occ_405/amec/amec_controller.h | 10 | ||||
-rwxr-xr-x | src/occ_405/amec/amec_freq.c | 56 | ||||
-rw-r--r-- | src/occ_405/amec/amec_freq.h | 7 | ||||
-rw-r--r-- | src/occ_405/amec/amec_init.c | 7 | ||||
-rwxr-xr-x | src/occ_405/amec/amec_master_smh.c | 39 | ||||
-rwxr-xr-x | src/occ_405/amec/amec_slave_smh.c | 3 | ||||
-rwxr-xr-x | src/occ_405/cmdh/cmdh_fsp_cmds.c | 22 | ||||
-rwxr-xr-x | src/occ_405/cmdh/cmdh_fsp_cmds.h | 16 | ||||
-rwxr-xr-x | src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c | 11 | ||||
-rwxr-xr-x | src/occ_405/occ_sys_config.h | 11 |
11 files changed, 224 insertions, 40 deletions
diff --git a/src/occ_405/amec/amec_controller.c b/src/occ_405/amec/amec_controller.c index 2154d07..f310208 100644 --- a/src/occ_405/amec/amec_controller.c +++ b/src/occ_405/amec/amec_controller.c @@ -156,6 +156,88 @@ void amec_controller_proc_thermal() } } +// Function Specification +// +// Name: amec_controller_vrm_vdd_thermal +// +// Description: This function implements the Proportional Controller for the +// VRM Vdd thermal control. Although it doesn't return any +// results, it populates the thermal vote in the field +// g_amec->thermalvdd.speed_request. +// +// Task Flags: +// +// End Function Specification +void amec_controller_vrm_vdd_thermal() +{ + /*------------------------------------------------------------------------*/ + /* Local Variables */ + /*------------------------------------------------------------------------*/ + uint16_t l_vdd_temp_tenthsC = 0; + uint16_t l_residue = 0; + uint16_t l_old_residue = 0; + int16_t l_thermal_diff = 0; + int16_t l_cpu_speed = 0; + int16_t l_throttle_chg = 0; + int32_t l_throttle = 0; + sensor_t * l_sensor = NULL; + + /*------------------------------------------------------------------------*/ + /* Code */ + /*------------------------------------------------------------------------*/ + // Get VRM Vdd temperature sensor + l_sensor = getSensorByGsid(TEMPVDD); + + // Convert current Vdd temperature to 0.1 degrees C + l_vdd_temp_tenthsC = l_sensor->sample * 10; + + // Calculate the temperature difference from the DVFS setpoint + l_thermal_diff = g_amec->thermalvdd.setpoint - l_vdd_temp_tenthsC; + + // Proportional Controller for the thermal control loop + l_throttle = (int32_t) l_thermal_diff * g_amec->thermalvdd.Pgain; + l_residue = (uint16_t) l_throttle; + l_throttle_chg = (int16_t) (l_throttle >> 16); + + // don't allow a throttle change more than step limit + if ((int16_t) l_throttle_chg > (int16_t) g_amec->sys.speed_step_limit) + { + l_throttle_chg = g_amec->sys.speed_step_limit; + } + else + { + if ((int16_t) l_throttle_chg < ((int16_t) (-g_amec->sys.speed_step_limit))) + { + l_throttle_chg = (int16_t)(-g_amec->sys.speed_step_limit); + } + } + + // Calculate the new thermal CPU speed request + l_cpu_speed = g_amec->thermalvdd.speed_request + + (int16_t)(l_throttle_chg * g_amec->sys.speed_step); + + // Proceed with residue summation to correctly follow set-point + l_old_residue = g_amec->thermalvdd.total_res; + g_amec->thermalvdd.total_res += l_residue; + if (g_amec->thermalvdd.total_res < l_old_residue) + { + l_cpu_speed += g_amec->sys.speed_step; + } + + // Enforce actuator saturation limits + if (l_cpu_speed > g_amec->sys.max_speed) + l_cpu_speed = g_amec->sys.max_speed; + if (l_cpu_speed < g_amec->sys.min_speed) + l_cpu_speed = g_amec->sys.min_speed; + + // Generate the new thermal speed request + g_amec->thermalvdd.speed_request = l_cpu_speed; + // Calculate frequency request based on thermal speed request + g_amec->thermalvdd.freq_request = amec_controller_speed2freq( + g_amec->thermalvdd.speed_request, + g_amec->sys.fmax); +} + //************************************************************************* // Function Specification // diff --git a/src/occ_405/amec/amec_controller.h b/src/occ_405/amec/amec_controller.h index 76e70ef..29a8061 100644 --- a/src/occ_405/amec/amec_controller.h +++ b/src/occ_405/amec/amec_controller.h @@ -91,6 +91,16 @@ typedef struct amec_controller void amec_controller_proc_thermal(); /** + * Thermal Control Loop based on VRM Vdd temperature + * + * This function implements the Proportional Controller for the VRM Vdd thermal + * control loop. Although it doesn't return any results, it populates the + * thermal vote in the field g_amec->thermalvdd.speed_request. + * + */ +void amec_controller_vrm_vdd_thermal(); + +/** * Thermal Control Loop based on Centaur temperatures. * * This function implements a Proportional Controller for the diff --git a/src/occ_405/amec/amec_freq.c b/src/occ_405/amec/amec_freq.c index 03bc22c..9150328 100755 --- a/src/occ_405/amec/amec_freq.c +++ b/src/occ_405/amec/amec_freq.c @@ -282,6 +282,9 @@ void amec_slv_proc_voting_box(void) amec_proc_voting_reason_t l_kvm_throt_reason = NO_THROTTLE; amec_part_t *l_part = NULL; + // frequency threshold for reporting throttling + uint16_t l_report_throttle_freq = G_sysConfigData.system_type.report_dvfs_nom ? G_sysConfigData.sys_mode_freq.table[OCC_MODE_NOMINAL] : G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO]; + /*------------------------------------------------------------------------*/ /* Code */ /*------------------------------------------------------------------------*/ @@ -307,9 +310,9 @@ void amec_slv_proc_voting_box(void) l_chip_fmax = g_amec->proc[0].pwr_votes.ppb_fmax; l_chip_reason = AMEC_VOTING_REASON_PPB; - if( G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO] < l_chip_fmax) + if(l_report_throttle_freq <= l_chip_fmax) { - l_kvm_throt_reason = PCAP_EXCEED_PTURBO; + l_kvm_throt_reason = PCAP_EXCEED_REPORT; } else { @@ -340,9 +343,9 @@ void amec_slv_proc_voting_box(void) l_chip_fmax = g_amec->thermalproc.freq_request; l_chip_reason = AMEC_VOTING_REASON_PROC_THRM; - if( G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO] < l_chip_fmax) + if( l_report_throttle_freq <= l_chip_fmax) { - l_kvm_throt_reason = PROC_OVERTEMP_EXCEED_PTURBO; + l_kvm_throt_reason = PROC_OVERTEMP_EXCEED_REPORT; } else { @@ -350,15 +353,31 @@ void amec_slv_proc_voting_box(void) } } + //Thermal controller input based on VRM Vdd temperature + if(g_amec->thermalvdd.freq_request < l_chip_fmax) + { + l_chip_fmax = g_amec->thermalvdd.freq_request; + l_chip_reason = AMEC_VOTING_REASON_VDD_THRM; + + if( l_report_throttle_freq <= l_chip_fmax) + { + l_kvm_throt_reason = VDD_OVERTEMP_EXCEED_REPORT; + } + else + { + l_kvm_throt_reason = VDD_OVERTEMP; + } + } + // Controller request based on VRHOT signal from processor regulator if(g_amec->vrhotproc.freq_request < l_chip_fmax) { l_chip_fmax = g_amec->vrhotproc.freq_request; l_chip_reason = AMEC_VOTING_REASON_VRHOT_THRM; - if(G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO] < l_chip_fmax) + if(l_report_throttle_freq <= l_chip_fmax) { - l_kvm_throt_reason = PROC_OVERTEMP_EXCEED_PTURBO; + l_kvm_throt_reason = PROC_OVERTEMP_EXCEED_REPORT; } else { @@ -393,6 +412,7 @@ void amec_slv_proc_voting_box(void) // Before enforcing a soft Fmin, make sure we don't // have a thermal or power emergency if(!(l_chip_reason & (AMEC_VOTING_REASON_PROC_THRM | + AMEC_VOTING_REASON_VDD_THRM | AMEC_VOTING_REASON_VRHOT_THRM | AMEC_VOTING_REASON_PPB | AMEC_VOTING_REASON_PMAX | @@ -428,9 +448,9 @@ void amec_slv_proc_voting_box(void) l_core_freq = g_amec->proc[0].pwr_votes.proc_pcap_vote; l_core_reason = AMEC_VOTING_REASON_PWR; - if(G_sysConfigData.sys_mode_freq.table[OCC_MODE_TURBO] < l_core_freq) + if(l_report_throttle_freq <= l_core_freq) { - l_kvm_throt_reason = PCAP_EXCEED_PTURBO; + l_kvm_throt_reason = PCAP_EXCEED_REPORT; } else { @@ -544,19 +564,17 @@ void amec_slv_proc_voting_box(void) } }//End of for loop - //convert POWERCAP reason to POWER_SUPPLY_FAILURE if ovs is asserted - if((l_kvm_throt_reason == POWERCAP) && AMEC_INTF_GET_OVERSUBSCRIPTION()) + //check if there was a throttle reason change + if(l_kvm_throt_reason != G_amec_opal_proc_throt_reason) { - l_kvm_throt_reason = POWER_SUPPLY_FAILURE; - } - - //check if we need to update the throttle reason in homer - if(G_sysConfigData.system_type.kvm && - (l_kvm_throt_reason != G_amec_opal_proc_throt_reason)) - { - //Notify dcom thread to update the table + //Always update G_amec_opal_proc_throt_reason, this is used to set poll rsp bits for all system types G_amec_opal_proc_throt_reason = l_kvm_throt_reason; - ssx_semaphore_post(&G_dcomThreadWakeupSem); + + // Only if running OPAL need to notify dcom thread to update the table in HOMER for OPAL + if(G_sysConfigData.system_type.kvm) + { + ssx_semaphore_post(&G_dcomThreadWakeupSem); + } } } diff --git a/src/occ_405/amec/amec_freq.h b/src/occ_405/amec/amec_freq.h index d2b9c90..2cf3c85 100644 --- a/src/occ_405/amec/amec_freq.h +++ b/src/occ_405/amec/amec_freq.h @@ -81,6 +81,7 @@ typedef enum AMEC_VOTING_REASON_OVERRIDE_CORE = 0x00020000, AMEC_VOTING_REASON_IPS = 0x00040000, AMEC_VOTING_REASON_APSS_PMAX = 0x00080000, + AMEC_VOTING_REASON_VDD_THRM = 0x00100000, }amec_freq_voting_reason_t; @@ -116,8 +117,10 @@ typedef enum { POWER_SUPPLY_FAILURE = 0x03, OVERCURRENT = 0x04, OCC_RESET = 0x05, - PCAP_EXCEED_PTURBO = 0x06, - PROC_OVERTEMP_EXCEED_PTURBO = 0x07, + PCAP_EXCEED_REPORT = 0x06, + PROC_OVERTEMP_EXCEED_REPORT = 0x07, + VDD_OVERTEMP = 0x08, + VDD_OVERTEMP_EXCEED_REPORT = 0x09, MANUFACTURING_OVERRIDE = 0xAA, }amec_proc_voting_reason_t; diff --git a/src/occ_405/amec/amec_init.c b/src/occ_405/amec/amec_init.c index 5ca4e66..4ba89fc 100644 --- a/src/occ_405/amec/amec_init.c +++ b/src/occ_405/amec/amec_init.c @@ -250,6 +250,13 @@ void amec_init_gamec_struct(void) g_amec->thermalproc.freq_request = -1; //unconstrained frequency vote g_amec->thermalproc.total_res = 0; + // Initialize thermal controller for VRM Vdd + g_amec->thermalvdd.setpoint = 850; // 850 = 85.0 C + g_amec->thermalvdd.Pgain = 1000; + g_amec->thermalvdd.speed_request = 1000; + g_amec->thermalvdd.freq_request = -1; //unconstrained frequency vote + g_amec->thermalvdd.total_res = 0; + // Initialize thermal controller based on DIMM temperatures g_amec->thermaldimm.setpoint = 850; //In 0.1 degrees C -> 850 = 85.0 C g_amec->thermaldimm.Pgain = 30000; diff --git a/src/occ_405/amec/amec_master_smh.c b/src/occ_405/amec/amec_master_smh.c index ce84749..ab2a3af 100755 --- a/src/occ_405/amec/amec_master_smh.c +++ b/src/occ_405/amec/amec_master_smh.c @@ -59,8 +59,8 @@ //Power cap failure threshold with no GPUs set to 32 ticks #define PCAP_FAILURE_THRESHOLD 32 -//Power cap failure threshold with GPUs set to number of ticks for 100ms -#define PCAP_GPU_FAILURE_THRESHOLD (100000 / MICS_PER_TICK) +//Power cap failure threshold with GPUs set to number of ticks for 2s +#define PCAP_GPU_FAILURE_THRESHOLD (2000000 / MICS_PER_TICK) //*************************************************************************/ // Structures @@ -398,6 +398,9 @@ void amec_mst_check_under_pcap(void) errlHndl_t l_err = NULL; uint8_t i = 0; uint8_t l_apss_func_id = 0; + uint32_t l_trace[MAX_APSS_ADC_CHANNELS] = {0}; // used to trace per channel data + uint8_t l_trace_idx = 0; + /*------------------------------------------------------------------------*/ /* Code */ @@ -424,6 +427,7 @@ void amec_mst_check_under_pcap(void) AMECSENSOR_PTR(PWRSYS)->sample); // Trace power per APSS channel to have the best breakdown for debug + // compress traces to 4 max to save space on OP systems for (i = 0; i < MAX_APSS_ADC_CHANNELS; i++) { l_apss_func_id = G_apss_ch_to_function[i]; @@ -433,10 +437,37 @@ void amec_mst_check_under_pcap(void) (l_apss_func_id != ADC_GND_REMOTE_SENSE) && (l_apss_func_id != ADC_12V_STANDBY_CURRENT) ) { - TRAC_ERR("APSS channel %d Function ID = %d Power = %dW", i, l_apss_func_id, - AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample); + l_trace[l_trace_idx] = (i << 24) | (l_apss_func_id << 16) | (AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample); + l_trace_idx++; } } + while(l_trace_idx != 0) + { + if(l_trace_idx >=4) + { + TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X], [%08X]", + l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3], l_trace[l_trace_idx-4]); + l_trace_idx -= 4; + } + else if(l_trace_idx == 3) + { + TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X], [%08X]", + l_trace[l_trace_idx-1], l_trace[l_trace_idx-2], l_trace[l_trace_idx-3]); + l_trace_idx = 0; + } + else if(l_trace_idx == 2) + { + TRAC_ERR("APSS channel/FuncID/Power: [%08X], [%08X]", + l_trace[l_trace_idx-1], l_trace[l_trace_idx-2]); + l_trace_idx = 0; + } + else // l_trace_idx == 1 + { + TRAC_ERR("APSS channel/FuncID/Power: [%08X]", + l_trace[l_trace_idx-1]); + l_trace_idx = 0; + } + } /* @ * @errortype diff --git a/src/occ_405/amec/amec_slave_smh.c b/src/occ_405/amec/amec_slave_smh.c index 9142caa..4750336 100755 --- a/src/occ_405/amec/amec_slave_smh.c +++ b/src/occ_405/amec/amec_slave_smh.c @@ -547,6 +547,9 @@ void amec_slv_state_2(void) //------------------------------------------------------- amec_update_centaur_sensors(CENTAUR_2); */ + + // Call VRM Vdd thermal controller + amec_controller_vrm_vdd_thermal(); } diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds.c b/src/occ_405/cmdh/cmdh_fsp_cmds.c index 86ee360..5f27ba8 100755 --- a/src/occ_405/cmdh/cmdh_fsp_cmds.c +++ b/src/occ_405/cmdh/cmdh_fsp_cmds.c @@ -60,6 +60,7 @@ extern bool G_vrm_vdd_temp_expired; #include <gpe_export.h> extern gpe_shared_data_t G_shared_gpe_data; +extern opal_proc_voting_reason_t G_amec_opal_proc_throt_reason; // This table contains tunable parameter information that can be exposed to // customers (only Master OCC should access/control this table) @@ -170,12 +171,29 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr) uint32_t l_freq_reason = g_amec->proc[0].core[k].f_reason; if ( l_freq_reason & (AMEC_VOTING_REASON_PROC_THRM | AMEC_VOTING_REASON_VRHOT_THRM) ) { - l_poll_rsp->ext_status.dvfs_due_to_ot = 1; + // only set DVFS bit if throttling below frequency to report throttling + if(G_amec_opal_proc_throt_reason == CPU_OVERTEMP) + { + l_poll_rsp->ext_status.dvfs_due_to_ot = 1; + } + } + + if ( l_freq_reason & AMEC_VOTING_REASON_VDD_THRM ) + { + // only set DVFS bit if throttling below frequency to report throttling + if(G_amec_opal_proc_throt_reason == VDD_OVERTEMP) + { + l_poll_rsp->ext_status.dvfs_due_to_vdd_ot = 1; + } } if ( l_freq_reason & (AMEC_VOTING_REASON_PPB | AMEC_VOTING_REASON_PMAX | AMEC_VOTING_REASON_PWR) ) { - l_poll_rsp->ext_status.dvfs_due_to_pwr = 1; + // only set DVFS bit if throttling below frequency to report throttling + if(G_amec_opal_proc_throt_reason == POWERCAP) + { + l_poll_rsp->ext_status.dvfs_due_to_pwr = 1; + } } } diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds.h b/src/occ_405/cmdh/cmdh_fsp_cmds.h index 9dda8dc..a6c7083 100755 --- a/src/occ_405/cmdh/cmdh_fsp_cmds.h +++ b/src/occ_405/cmdh/cmdh_fsp_cmds.h @@ -116,14 +116,14 @@ typedef struct __attribute__ ((packed)) cmdh_poll_resp_v20 { struct { - uint8_t dvfs_due_to_ot : 1; // 1 => OCC clipped max Pstate due to an over temp. - uint8_t dvfs_due_to_pwr : 1; // 1 => OCC clipped max Psate due to reaching pcap limit. - uint8_t mthrot_due_to_ot: 1; // 1 => OCC throttled memory due to an over temp. - uint8_t n_power : 1; // 1 => Server running without redundant power. - uint8_t _reserved_3 : 1; - uint8_t sync_request : 1; // 1 => OCC needs to restart snapshot buffers - uint8_t _reserved_1 : 1; - uint8_t _reserved_0 : 1; + uint8_t dvfs_due_to_ot : 1; // 1 => OCC clipped max Pstate due to a processor over temp. + uint8_t dvfs_due_to_pwr : 1; // 1 => OCC clipped max Psate due to reaching pcap limit. + uint8_t mthrot_due_to_ot : 1; // 1 => OCC throttled memory due to an over temp. + uint8_t n_power : 1; // 1 => Server running without redundant power. + uint8_t dvfs_due_to_vdd_ot : 1; // 1 => OCC clipped max Pstate due to VRM Vdd over temp. + uint8_t sync_request : 1; // 1 => OCC needs to restart snapshot buffers + uint8_t _reserved_1 : 1; + uint8_t _reserved_0 : 1; }; uint8_t word; } ext_status; diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c index 52cb637..e60f3c7 100755 --- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c +++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c @@ -1849,6 +1849,17 @@ errlHndl_t data_store_thrm_thresholds(const cmdh_fsp_cmd_t * i_cmd_ptr, break; } + // clear all FRU types to 0xFF (not defined) to prevent errors when (H)TMGT doesn't send thresholds for a FRU type + for(i=0; i<DATA_FRU_MAX; i++) + { + G_data_cnfg->thrm_thresh.data[i].fru_type = i; + G_data_cnfg->thrm_thresh.data[i].dvfs = 0xFF; + G_data_cnfg->thrm_thresh.data[i].error = 0xFF; + G_data_cnfg->thrm_thresh.data[i].pm_dvfs = 0xFF; + G_data_cnfg->thrm_thresh.data[i].pm_error = 0xFF; + G_data_cnfg->thrm_thresh.data[i].max_read_timeout = 0xFF; + } + // Store the base data G_data_cnfg->thrm_thresh.version = l_cmd_ptr->version; G_data_cnfg->thrm_thresh.proc_core_weight = l_cmd_ptr->proc_core_weight; diff --git a/src/occ_405/occ_sys_config.h b/src/occ_405/occ_sys_config.h index f393256..6cfe2b1 100755 --- a/src/occ_405/occ_sys_config.h +++ b/src/occ_405/occ_sys_config.h @@ -74,10 +74,11 @@ typedef union { struct { - uint8_t kvm: 1; - uint8_t reserved: 5; - uint8_t ite: 1; - uint8_t single: 1; + uint8_t kvm: 1; + uint8_t reserved: 3; + uint8_t report_dvfs_nom: 1; + uint8_t reserved_2: 2; + uint8_t single: 1; }; uint8_t byte; } eSystemType; @@ -332,7 +333,7 @@ typedef struct // Instead of system-type, lets try to send all system attributes // that matter instead of having tables in OCC code. - eSystemType system_type; // OCC usage of this byte is TBD + eSystemType system_type; // Processor HUID - HUID for this OCC processor, used by OCC for processor error call out uint32_t proc_huid; |