diff options
author | mbroyles <mbroyles@us.ibm.com> | 2017-08-23 16:53:05 -0500 |
---|---|---|
committer | Christopher J. Cain <cjcain@us.ibm.com> | 2017-08-29 15:43:35 -0400 |
commit | e1a597e9f5bc8e7b193058ca32a8c8ba46ebf519 (patch) | |
tree | bcc3ccd11631980666d8085397818abeef2f8d4a /src/occ_405/amec | |
parent | c34f286542bb7fa5eccc9bfcc2a9473637d4a0e5 (diff) | |
download | talos-occ-e1a597e9f5bc8e7b193058ca32a8c8ba46ebf519.tar.gz talos-occ-e1a597e9f5bc8e7b193058ca32a8c8ba46ebf519.zip |
Calculate GPU Power Cap
Define GPU ID callout type
Change-Id: I99e691abe64fc0d706571fc7a128d565159e0461
RTC: 133823
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/45077
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
Diffstat (limited to 'src/occ_405/amec')
-rwxr-xr-x | src/occ_405/amec/amec_master_smh.c | 51 | ||||
-rwxr-xr-x | src/occ_405/amec/amec_pcap.c | 176 | ||||
-rwxr-xr-x | src/occ_405/amec/amec_service_codes.h | 1 | ||||
-rwxr-xr-x | src/occ_405/amec/amec_sys.h | 1 |
4 files changed, 209 insertions, 20 deletions
diff --git a/src/occ_405/amec/amec_master_smh.c b/src/occ_405/amec/amec_master_smh.c index 0fcee20..ce84749 100755 --- a/src/occ_405/amec/amec_master_smh.c +++ b/src/occ_405/amec/amec_master_smh.c @@ -39,6 +39,7 @@ #include "amec_service_codes.h" //For AMEC_MST_CHECK_PCAPS_MATCH #include "dcom.h" #include <amec_sensors_power.h> +#include <cmdh_fsp_cmds.h> // For G_apss_ch_to_function //*************************************************************************/ // Externs @@ -55,9 +56,12 @@ //Power cap mismatch threshold set to 8 ticks #define PCAPS_MISMATCH_THRESHOLD 8 -//Power cap failure threshold set to 32 ticks +//Power cap failure threshold with no GPUs set to 32 ticks #define PCAP_FAILURE_THRESHOLD 32 +//Power cap failure threshold with GPUs set to number of ticks for 100ms +#define PCAP_GPU_FAILURE_THRESHOLD (100000 / MICS_PER_TICK) + //*************************************************************************/ // Structures //*************************************************************************/ @@ -90,6 +94,8 @@ uint16_t G_mst_soft_fmax = 0xFFFF; //Counter of committed violations by the Slave OCCs uint8_t G_mst_violation_cnt[MAX_OCCS] = {0}; +extern uint32_t G_first_num_gpus_sys; + // -------------------------------------------------------- // AMEC Master State 5 Substate Table // -------------------------------------------------------- @@ -390,6 +396,8 @@ void amec_mst_check_under_pcap(void) /* Local Variables */ /*------------------------------------------------------------------------*/ errlHndl_t l_err = NULL; + uint8_t i = 0; + uint8_t l_apss_func_id = 0; /*------------------------------------------------------------------------*/ /* Code */ @@ -406,21 +414,29 @@ void amec_mst_check_under_pcap(void) G_over_cap_count++; - //Log error and reset OCC if count >= 32 (ticks) - if(G_over_cap_count >= PCAP_FAILURE_THRESHOLD) + // GPUs take longer for power limit to take effect if GPUs are present need to use + // a longer wait time before logging an error and resetting + if( ( (!G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_FAILURE_THRESHOLD) ) || + ( (G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_GPU_FAILURE_THRESHOLD) ) ) { TRAC_ERR("Failure to maintain power cap: Power Cap = %d ," - "PWRSYS = %d ,PWRPROC = %d ,PWRFAN = %d ," - "PWRMEM = %d",g_amec->pcap.active_node_pcap, - AMECSENSOR_PTR(PWRSYS)->sample, - AMECSENSOR_PTR(PWRPROC)->sample, - AMECSENSOR_PTR(PWRFAN)->sample, - AMECSENSOR_PTR(PWRMEM)->sample); - - TRAC_ERR("PWRIO = %d , PWRSTORE = %d, PWRGPU = %d", - AMECSENSOR_PTR(PWRIO)->sample, - AMECSENSOR_PTR(PWRSTORE)->sample, - AMECSENSOR_PTR(PWRGPU)->sample); + "PWRSYS = %d",g_amec->pcap.active_node_pcap, + AMECSENSOR_PTR(PWRSYS)->sample); + + // Trace power per APSS channel to have the best breakdown for debug + for (i = 0; i < MAX_APSS_ADC_CHANNELS; i++) + { + l_apss_func_id = G_apss_ch_to_function[i]; + + if((l_apss_func_id != ADC_RESERVED) && + (l_apss_func_id != ADC_12V_SENSE) && + (l_apss_func_id != ADC_GND_REMOTE_SENSE) && + (l_apss_func_id != ADC_12V_STANDBY_CURRENT) ) + { + TRAC_ERR("APSS channel %d Function ID = %d Power = %dW", i, l_apss_func_id, + AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample); + } + } /* @ * @errortype @@ -458,11 +474,8 @@ void amec_mst_check_under_pcap(void) } else { - //Decrement count if node power under power cap value - if(G_over_cap_count > 0) - { - G_over_cap_count--; - } + // Clear counter + G_over_cap_count = 0; } return; diff --git a/src/occ_405/amec/amec_pcap.c b/src/occ_405/amec/amec_pcap.c index 286921d..7584ddf 100755 --- a/src/occ_405/amec/amec_pcap.c +++ b/src/occ_405/amec/amec_pcap.c @@ -58,7 +58,6 @@ extern PWR_READING_TYPE G_pwr_reading_type; //ppb_fmax #define PDROP_THRESH 0 //Number of MHz to raise the proc_pcap_vote for every watt of available power -//(DCM value should be less than SCM) #define PROC_MHZ_PER_WATT 28 //Number of MHz to raise ppb_fmax per watt of available power. Depends on //number of procs in node. @@ -75,6 +74,8 @@ uint32_t G_mhz_per_pstate=0; uint8_t G_over_pcap_count=0; extern uint16_t G_proc_fmax_mhz; // max(turbo,uturbo) frequencies +extern uint32_t G_first_proc_gpu_config; +extern uint32_t G_first_num_gpus_sys; //*************************************************************************/ // Function Prototypes @@ -84,6 +85,170 @@ extern uint16_t G_proc_fmax_mhz; // max(turbo,uturbo) frequencies // Functions //*************************************************************************/ +////////////////////////// +// Function Specification +// +// Name: amec_gpu_pcap +// +// Description: Determine power cap for GPUs +// +// Thread: Real Time Loop +// +// End Function Specification +void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power) +{ + /*------------------------------------------------------------------------*/ + /* Local Variables */ + /*------------------------------------------------------------------------*/ + uint8_t i = 0; + uint32_t l_gpu_cap_mw = 0; + static uint16_t L_total_gpu_pcap = 0; // Current total GPU pcap in effect + static uint16_t L_n_mode_gpu_total_pcap = 0; // Total GPU pcap required for oversubscription + static uint16_t L_active_psr_gpu_total_pcap = 0; // Total GPU pcap for the currently set pcap and PSR + static uint16_t L_per_gpu_pcap = 0; // Amount of L_total_gpu_pcap for each GPU + static uint8_t L_psr = 100; // PSR value used in L_active_psr_gpu_total_pcap calculation + static bool L_first_run = TRUE; // for calculations done only 1 time + + /*------------------------------------------------------------------------*/ + /* Code */ + /*------------------------------------------------------------------------*/ + // If this is the first time running calculate the total GPU power cap for oversubscription + if(L_first_run) + { + if(g_amec->pcap.ovs_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts) + { + // Take all non-GPU power away from the oversubscription power cap + L_n_mode_gpu_total_pcap = g_amec->pcap.ovs_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts; + // Add back in the power that will be dropped by processor DVFS and memory throttling and give to GPUs + L_n_mode_gpu_total_pcap += G_sysConfigData.total_proc_mem_pwr_drop_watts; + } + else + { + // This should not happen, the total non GPU power should never be higher than the N mode cap + // Log error and set GPUs to minimum power cap + L_n_mode_gpu_total_pcap = 0; // this will set minimum GPU power cap + + TRAC_ERR("amec_gpu_pcap: non GPU max power %dW is more than N mode pwr limit %dW", + G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.ovs_node_pcap); + + /* @ + * @errortype + * @moduleid AMEC_GPU_PCAP_MID + * @reasoncode GPU_FAILURE + * @userdata1 N mode Power Cap watts + * @userdata2 Total non-GPU power watts + * @userdata4 ERC_GPU_N_MODE_PCAP_CALC_FAILURE + * @devdesc Total non-GPU power more than N mode power cap + * + */ + errlHndl_t l_err = createErrl(AMEC_GPU_PCAP_MID, + GPU_FAILURE, + ERC_GPU_N_MODE_PCAP_CALC_FAILURE, + ERRL_SEV_PREDICTIVE, + NULL, + DEFAULT_TRACE_SIZE, + g_amec->pcap.ovs_node_pcap, + G_sysConfigData.total_non_gpu_max_pwr_watts); + + //Callout firmware + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_COMPONENT_ID, + ERRL_COMPONENT_ID_FIRMWARE, + ERRL_CALLOUT_PRIORITY_HIGH); + commitErrl(&l_err); + } + } // if first run + + // Calculate the total GPU power cap for the current active limit and PSR + // this only needs to be calculated if either the active limit or PSR changed + if( (L_first_run) || (i_active_pcap_changed) || (L_psr != G_sysConfigData.psr) ) + { + L_psr = G_sysConfigData.psr; + if(g_amec->pcap.active_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts) + { + // Take all non-GPU power away from the active power cap + L_active_psr_gpu_total_pcap = g_amec->pcap.active_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts; + // Add back in the power that will be dropped by processor DVFS and memory throttling based on the PSR + // to give to GPUs + L_active_psr_gpu_total_pcap += ( (L_psr / 100) * G_sysConfigData.total_proc_mem_pwr_drop_watts ); + } + else + { + // Set GPUs to minimum power cap + L_active_psr_gpu_total_pcap = 0; + TRAC_IMP("amec_gpu_pcap: non GPU max power %dW is more than active pwr limit %dW", + G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.active_node_pcap); + } + + // Total GPU power cap is the lower of oversubscription and active power limit + // must always account for oversubscription to ensure when a power supply is lost the OCC + // can react fast enough, GPU power capping is too slow and must have GPU power cap already + // set to account for oversubscription case + L_total_gpu_pcap = (L_n_mode_gpu_total_pcap < L_active_psr_gpu_total_pcap) ? + L_n_mode_gpu_total_pcap : L_active_psr_gpu_total_pcap; + + // Divide the total equally across all GPUs in the system + if(G_first_num_gpus_sys) + { + L_per_gpu_pcap = L_total_gpu_pcap / G_first_num_gpus_sys; + } + else + { + L_per_gpu_pcap = 0; + TRAC_ERR("amec_gpu_pcap: Called with no GPUs present!"); + } + } + + // Setup to send new power limit to GPUs. The actual sending of GPU power limit will be handled by task_gpu_sm() + for (i=0; i<MAX_NUM_GPU_PER_DOMAIN; i++) + { + // Before sending a GPU a power limit the power limits must be read from the GPU to know min/max GPU allows + if( GPU_PRESENT(i) && g_amec->gpu[i].pcap.pwr_limits_read ) + { + l_gpu_cap_mw = L_per_gpu_pcap * 1000; // convert W to mW + + // GPU is present and have min/max power limits from GPU + // clip the GPU power limit to min/max GPU limit if needed + if(l_gpu_cap_mw < g_amec->gpu[i].pcap.gpu_min_pcap_mw) // clip to min? + { + l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_min_pcap_mw; + } + else if(l_gpu_cap_mw > g_amec->gpu[i].pcap.gpu_max_pcap_mw) // clip to max? + { + l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_max_pcap_mw; + } + + // If not already at the min then set to min if trying to reduce power and proc/memory are at min + if( (i_avail_power < 0) && (g_amec->proc[0].pwr_votes.ppb_fmax == g_amec->sys.fmin) && + (g_amec->pcap.active_mem_level) && (l_gpu_cap_mw != g_amec->gpu[i].pcap.gpu_min_pcap_mw) ) + { + l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_min_pcap_mw; + if(g_amec->gpu[i].pcap.gpu_desired_pcap_mw != l_gpu_cap_mw) + { + TRAC_ERR("amec_gpu_pcap: Forcing GPU%d to minimum pwr limit %dmW", i, l_gpu_cap_mw); + g_amec->gpu[i].pcap.gpu_min_cap_required = TRUE; + } + } + + // check if this is a new power limit + if(g_amec->gpu[i].pcap.gpu_desired_pcap_mw != l_gpu_cap_mw) + { + TRAC_IMP("amec_gpu_pcap: Updating GPU%d desired pcap %dmW to %dmW", i, + g_amec->gpu[i].pcap.gpu_desired_pcap_mw, l_gpu_cap_mw); + g_amec->gpu[i].pcap.gpu_desired_pcap_mw = l_gpu_cap_mw; + + if( (g_amec->gpu[i].pcap.gpu_min_cap_required) && (l_gpu_cap_mw != g_amec->gpu[i].pcap.gpu_min_pcap_mw) ) + { + TRAC_ERR("amec_gpu_pcap: GPU%d no longer requires minimum pwr limit %dmW", i, g_amec->gpu[i].pcap.gpu_min_pcap_mw); + g_amec->gpu[i].pcap.gpu_min_cap_required = FALSE; + } + } + } + } // for each GPU + + L_first_run = FALSE; +} + ////////////////////////// // Function Specification @@ -101,6 +266,7 @@ void amec_pcap_calc(void) /* Local Variables */ /*------------------------------------------------------------------------*/ bool l_oversub_state = 0; + bool l_active_pcap_changed = FALSE; uint16_t l_node_pwr = AMECSENSOR_PTR(PWRSYS)->sample; uint16_t l_p0_pwr = AMECSENSOR_PTR(PWRPROC)->sample; int32_t l_avail_power = 0; @@ -138,9 +304,17 @@ void amec_pcap_calc(void) // set this pcap as valid (needed by master for comparison) g_amec->pcap_valid = 1; + l_active_pcap_changed = TRUE; } l_avail_power = g_amec->pcap.active_node_pcap - l_node_pwr; + + // Determine GPU power cap if there are GPUs present + if(G_first_proc_gpu_config) + { + amec_gpu_pcap(l_active_pcap_changed, l_avail_power); + } + if(l_node_pwr != 0) { l_proc_fraction = ((uint32_t)(l_p0_pwr) << 16)/l_node_pwr; diff --git a/src/occ_405/amec/amec_service_codes.h b/src/occ_405/amec/amec_service_codes.h index 47d6c09..f206daf 100755 --- a/src/occ_405/amec/amec_service_codes.h +++ b/src/occ_405/amec/amec_service_codes.h @@ -66,6 +66,7 @@ enum occAmecModuleId AMEC_CALC_DTS_SENSORS = AMEC_COMP_ID | 0x16, AMEC_SET_FREQ_RANGE = AMEC_COMP_ID | 0x17, AMEC_UPDATE_APSS_GPIO = AMEC_COMP_ID | 0x18, + AMEC_GPU_PCAP_MID = AMEC_COMP_ID | 0x19, }; /*----------------------------------------------------------------------------*/ diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h index a45fb42..c084a0c 100755 --- a/src/occ_405/amec/amec_sys.h +++ b/src/occ_405/amec/amec_sys.h @@ -449,6 +449,7 @@ typedef struct { typedef struct { bool check_pwr_limit; // Indicates if need to read power limits from GPU bool pwr_limits_read; // Indicates if power limits were read i.e. have min/max + bool gpu_min_cap_required; // Indicates if power limits were read i.e. have min/max uint32_t gpu_min_pcap_mw; // Min GPU power limit in mW read from the GPU uint32_t gpu_max_pcap_mw; // Max GPU power limit in mW read from the GPU uint32_t gpu_desired_pcap_mw; // AMEC determined pcap in mW to set |