diff options
author | William Bryan <wilbryan@us.ibm.com> | 2017-09-28 13:32:29 -0500 |
---|---|---|
committer | William A. Bryan <wilbryan@us.ibm.com> | 2017-10-03 16:03:05 -0400 |
commit | 74f721c90235a18821b97782d98349cf51e0f12d (patch) | |
tree | 1f2fd59b41db514c0273632dd2dd7926e25a2030 /src/occ_405/amec | |
parent | 76b91d0038d59b30de14108e908bc78c6d988796 (diff) | |
download | talos-occ-74f721c90235a18821b97782d98349cf51e0f12d.tar.gz talos-occ-74f721c90235a18821b97782d98349cf51e0f12d.zip |
GPU 405 Enable Memory Temperatures
Change-Id: Id50d12a50a05b8b3a6a6f1ce3ce4512d3299caa7
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46882
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
Diffstat (limited to 'src/occ_405/amec')
-rwxr-xr-x | src/occ_405/amec/amec_data.c | 6 | ||||
-rwxr-xr-x | src/occ_405/amec/amec_pcap.c | 81 | ||||
-rwxr-xr-x | src/occ_405/amec/amec_sys.h | 38 |
3 files changed, 93 insertions, 32 deletions
diff --git a/src/occ_405/amec/amec_data.c b/src/occ_405/amec/amec_data.c index 3a82bb3..4c553d6 100755 --- a/src/occ_405/amec/amec_data.c +++ b/src/occ_405/amec/amec_data.c @@ -458,12 +458,6 @@ void amec_data_write_pcap(void) g_amec->pcap.ovs_node_pcap = G_sysConfigData.pcap.hard_min_pcap; } - //Oversubscription pcap can NOT be higher than a customer set pcap. - if(g_amec->pcap.ovs_node_pcap > l_customer) - { - g_amec->pcap.ovs_node_pcap = l_customer; - } - //for all new pcap data setting: If KVM, update the OPAL dynamic data if(G_sysConfigData.system_type.kvm) { diff --git a/src/occ_405/amec/amec_pcap.c b/src/occ_405/amec/amec_pcap.c index 7584ddf..995324d 100755 --- a/src/occ_405/amec/amec_pcap.c +++ b/src/occ_405/amec/amec_pcap.c @@ -95,14 +95,16 @@ extern uint32_t G_first_num_gpus_sys; // Thread: Real Time Loop // // End Function Specification -void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power) +void amec_gpu_pcap(bool i_oversubscription, bool i_active_pcap_changed, int32_t i_avail_power) { /*------------------------------------------------------------------------*/ /* Local Variables */ /*------------------------------------------------------------------------*/ uint8_t i = 0; uint32_t l_gpu_cap_mw = 0; + uint16_t l_system_gpu_total_pcap = 0; // total GPU pcap required by system based on if currently in oversub or not static uint16_t L_total_gpu_pcap = 0; // Current total GPU pcap in effect + static uint16_t L_n_plus_1_mode_gpu_total_pcap = 0; // Total GPU pcap required for N+1 (not in oversubscription) static uint16_t L_n_mode_gpu_total_pcap = 0; // Total GPU pcap required for oversubscription static uint16_t L_active_psr_gpu_total_pcap = 0; // Total GPU pcap for the currently set pcap and PSR static uint16_t L_per_gpu_pcap = 0; // Amount of L_total_gpu_pcap for each GPU @@ -112,10 +114,12 @@ void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power) /*------------------------------------------------------------------------*/ /* Code */ /*------------------------------------------------------------------------*/ - // If this is the first time running calculate the total GPU power cap for oversubscription + // If this is the first time running calculate the total GPU power cap for system power caps (N and N+1) if(L_first_run) { + // calculate total GPU power cap for oversubscription if(g_amec->pcap.ovs_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts) + { // Take all non-GPU power away from the oversubscription power cap L_n_mode_gpu_total_pcap = g_amec->pcap.ovs_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts; @@ -157,6 +161,50 @@ void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power) ERRL_CALLOUT_PRIORITY_HIGH); commitErrl(&l_err); } + + // calculate total GPU power cap for N+1 (not in oversubscription) + if(G_sysConfigData.pcap.system_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts) + { + // Take all non-GPU power away from the N+1 power cap + L_n_plus_1_mode_gpu_total_pcap = G_sysConfigData.pcap.system_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts; + // Add back in the power that will be dropped by processor DVFS and memory throttling and give to GPUs + L_n_plus_1_mode_gpu_total_pcap += G_sysConfigData.total_proc_mem_pwr_drop_watts; + } + else + { + // This should not happen, the total non GPU power should never be higher than the N+1 mode cap + // Log error and set GPUs to minimum power cap + L_n_plus_1_mode_gpu_total_pcap = 0; // this will set minimum GPU power cap + + TRAC_ERR("amec_gpu_pcap: non GPU max power %dW is more than N+1 mode pwr limit %dW", + G_sysConfigData.total_non_gpu_max_pwr_watts, G_sysConfigData.pcap.system_pcap); + + /* @ + * @errortype + * @moduleid AMEC_GPU_PCAP_MID + * @reasoncode GPU_FAILURE + * @userdata1 N+1 mode Power Cap watts + * @userdata2 Total non-GPU power watts + * @userdata4 ERC_GPU_N_PLUS_1_MODE_PCAP_CALC_FAILURE + * @devdesc Total non-GPU power more than N+1 mode power cap + * + */ + errlHndl_t l_err = createErrl(AMEC_GPU_PCAP_MID, + GPU_FAILURE, + ERC_GPU_N_PLUS_1_MODE_PCAP_CALC_FAILURE, + ERRL_SEV_PREDICTIVE, + NULL, + DEFAULT_TRACE_SIZE, + G_sysConfigData.pcap.system_pcap, + G_sysConfigData.total_non_gpu_max_pwr_watts); + + //Callout firmware + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_COMPONENT_ID, + ERRL_COMPONENT_ID_FIRMWARE, + ERRL_CALLOUT_PRIORITY_HIGH); + commitErrl(&l_err); + } } // if first run // Calculate the total GPU power cap for the current active limit and PSR @@ -180,12 +228,23 @@ void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power) G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.active_node_pcap); } - // Total GPU power cap is the lower of oversubscription and active power limit - // must always account for oversubscription to ensure when a power supply is lost the OCC - // can react fast enough, GPU power capping is too slow and must have GPU power cap already - // set to account for oversubscription case - L_total_gpu_pcap = (L_n_mode_gpu_total_pcap < L_active_psr_gpu_total_pcap) ? - L_n_mode_gpu_total_pcap : L_active_psr_gpu_total_pcap; + // Total GPU power cap is the lower of system (N+1 or oversubscription depending on if in oversub) + // and the active power limit. We do not need to always account for oversubscription since + // the automatic hw power brake will assert to the GPUs if there is a problem when oversub is + // entered from the time OCC can set and GPUs react to a new power limit + if(i_oversubscription) + { + // system in oversubscription use N mode cap + l_system_gpu_total_pcap = L_n_mode_gpu_total_pcap; + } + else + { + // system is not in oversubscription use N+1 mode cap + l_system_gpu_total_pcap = L_n_plus_1_mode_gpu_total_pcap; + } + + L_total_gpu_pcap = (l_system_gpu_total_pcap < L_active_psr_gpu_total_pcap) ? + l_system_gpu_total_pcap : L_active_psr_gpu_total_pcap; // Divide the total equally across all GPUs in the system if(G_first_num_gpus_sys) @@ -282,8 +341,8 @@ void amec_pcap_calc(void) l_oversub_state = AMEC_INTF_GET_OVERSUBSCRIPTION(); // Determine the active power cap. norm_node_pcap is set as lowest - // between sys and user in amec_data_write_pcap() - // when in oversub only use oversub pcap if lower than norm_node_pcap + // between sys (N+1 mode) and user in amec_data_write_pcap() + // when in oversub (N mode) only use oversub pcap if lower than norm_node_pcap // to handle user set power cap lower than the oversub power cap if( (TRUE == l_oversub_state) && (g_amec->pcap.ovs_node_pcap < g_amec->pcap.norm_node_pcap) ) @@ -312,7 +371,7 @@ void amec_pcap_calc(void) // Determine GPU power cap if there are GPUs present if(G_first_proc_gpu_config) { - amec_gpu_pcap(l_active_pcap_changed, l_avail_power); + amec_gpu_pcap(l_oversub_state, l_active_pcap_changed, l_avail_power); } if(l_node_pwr != 0) diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h index 803ca28..3f1d333 100755 --- a/src/occ_405/amec/amec_sys.h +++ b/src/occ_405/amec/amec_sys.h @@ -436,27 +436,35 @@ typedef struct //------------------------------------------------------------- typedef struct { - bool disabled; // GPU has been marked failed and no longer monitored - bool readOnce; // Comm has been established with GPU - bool overtempError; // Core OT error has been logged against GPU - bool memOvertempError; // Memory OT error has been logged against GPU + bool disabled; // GPU has been marked failed and no longer monitored + bool readOnce; // Comm has been established with GPU + bool commErrorLogged; // GPU has been called out due to comm error + bool overtempError; // Core OT error has been logged against GPU + bool memOvertempError; // Memory OT error has been logged against GPU bool checkDriverLoaded; // Indicates if need to check if driver is loaded - bool driverLoaded; // Indicates if GPU driver is loaded + bool driverLoaded; // Indicates if GPU driver is loaded bool checkMemTempSupport; // Indicates if need to check if mem monitoring is supported - bool memTempSupported; // Indicates if memory temperature monitoring is supported - uint8_t memErrorCount; // count of consecutive GPU mem temp read failures - uint8_t errorCount; // count of consecutive GPU core temp read failures + bool memTempSupported; // Indicates if memory temperature monitoring is supported + bool notReset; // '1' = GPU NOT in reset. Read from OCC FLAGS register + bool coreTempNotAvailable; // for fan control: '1' = core temp not available. (send 0 for fan control) + bool memTempNotAvailable; // for fan control: '1' = Mem temp not available. (send 0 for fan control) + bool coreTempFailure; // for fan control: '1' = timeout failure reading core temp (send 0xFF for fan control) + bool memTempFailure; // for fan control: '1' = timeout failure reading Mem temp (send 0xFF for fan control) + uint8_t memErrorCount; // count of consecutive GPU mem temp read failures when GPU not in reset + uint8_t errorCount; // count of consecutive GPU core temp read failures when GPU not in reset + uint8_t retryCount; // count of consecutive GPU core temp read failures before I2C reset } gpuStatus_t; typedef struct { - bool check_pwr_limit; // Indicates if need to read power limits from GPU - bool pwr_limits_read; // Indicates if power limits were read i.e. have min/max - bool gpu_min_cap_required; // Indicates if power limits were read i.e. have min/max - uint32_t gpu_min_pcap_mw; // Min GPU power limit in mW read from the GPU - uint32_t gpu_max_pcap_mw; // Max GPU power limit in mW read from the GPU - uint32_t gpu_desired_pcap_mw; // AMEC determined pcap in mW to set + bool check_pwr_limit; // Indicates if need to read power limits from GPU + bool pwr_limits_read; // Indicates if power limits were read i.e. have min/max + bool set_failed; // Indicates if failed to set power limit + bool gpu_min_cap_required; // Indicates if GPU requires min cap + uint32_t gpu_min_pcap_mw; // Min GPU power limit in mW read from the GPU + uint32_t gpu_max_pcap_mw; // Max GPU power limit in mW read from the GPU + uint32_t gpu_desired_pcap_mw; // AMEC determined pcap in mW to set uint32_t gpu_requested_pcap_mw; // Requested power cap in mW sent to GPU - uint32_t gpu_actual_pcap_mw; // Actual power cap in mW read back from the GPU + uint32_t gpu_default_pcap_mw; // Default power cap in mW read from the GPU } gpuPcap_t; |