summaryrefslogtreecommitdiffstats
path: root/src/occ_405/amec
diff options
context:
space:
mode:
authormbroyles <mbroyles@us.ibm.com>2017-08-23 16:53:05 -0500
committerChristopher J. Cain <cjcain@us.ibm.com>2017-08-29 15:43:35 -0400
commite1a597e9f5bc8e7b193058ca32a8c8ba46ebf519 (patch)
treebcc3ccd11631980666d8085397818abeef2f8d4a /src/occ_405/amec
parentc34f286542bb7fa5eccc9bfcc2a9473637d4a0e5 (diff)
downloadtalos-occ-e1a597e9f5bc8e7b193058ca32a8c8ba46ebf519.tar.gz
talos-occ-e1a597e9f5bc8e7b193058ca32a8c8ba46ebf519.zip
Calculate GPU Power Cap
Define GPU ID callout type Change-Id: I99e691abe64fc0d706571fc7a128d565159e0461 RTC: 133823 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/45077 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: William A. Bryan <wilbryan@us.ibm.com> Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
Diffstat (limited to 'src/occ_405/amec')
-rwxr-xr-xsrc/occ_405/amec/amec_master_smh.c51
-rwxr-xr-xsrc/occ_405/amec/amec_pcap.c176
-rwxr-xr-xsrc/occ_405/amec/amec_service_codes.h1
-rwxr-xr-xsrc/occ_405/amec/amec_sys.h1
4 files changed, 209 insertions, 20 deletions
diff --git a/src/occ_405/amec/amec_master_smh.c b/src/occ_405/amec/amec_master_smh.c
index 0fcee20..ce84749 100755
--- a/src/occ_405/amec/amec_master_smh.c
+++ b/src/occ_405/amec/amec_master_smh.c
@@ -39,6 +39,7 @@
#include "amec_service_codes.h" //For AMEC_MST_CHECK_PCAPS_MATCH
#include "dcom.h"
#include <amec_sensors_power.h>
+#include <cmdh_fsp_cmds.h> // For G_apss_ch_to_function
//*************************************************************************/
// Externs
@@ -55,9 +56,12 @@
//Power cap mismatch threshold set to 8 ticks
#define PCAPS_MISMATCH_THRESHOLD 8
-//Power cap failure threshold set to 32 ticks
+//Power cap failure threshold with no GPUs set to 32 ticks
#define PCAP_FAILURE_THRESHOLD 32
+//Power cap failure threshold with GPUs set to number of ticks for 100ms
+#define PCAP_GPU_FAILURE_THRESHOLD (100000 / MICS_PER_TICK)
+
//*************************************************************************/
// Structures
//*************************************************************************/
@@ -90,6 +94,8 @@ uint16_t G_mst_soft_fmax = 0xFFFF;
//Counter of committed violations by the Slave OCCs
uint8_t G_mst_violation_cnt[MAX_OCCS] = {0};
+extern uint32_t G_first_num_gpus_sys;
+
// --------------------------------------------------------
// AMEC Master State 5 Substate Table
// --------------------------------------------------------
@@ -390,6 +396,8 @@ void amec_mst_check_under_pcap(void)
/* Local Variables */
/*------------------------------------------------------------------------*/
errlHndl_t l_err = NULL;
+ uint8_t i = 0;
+ uint8_t l_apss_func_id = 0;
/*------------------------------------------------------------------------*/
/* Code */
@@ -406,21 +414,29 @@ void amec_mst_check_under_pcap(void)
G_over_cap_count++;
- //Log error and reset OCC if count >= 32 (ticks)
- if(G_over_cap_count >= PCAP_FAILURE_THRESHOLD)
+ // GPUs take longer for power limit to take effect if GPUs are present need to use
+ // a longer wait time before logging an error and resetting
+ if( ( (!G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_FAILURE_THRESHOLD) ) ||
+ ( (G_first_num_gpus_sys) && (G_over_cap_count >= PCAP_GPU_FAILURE_THRESHOLD) ) )
{
TRAC_ERR("Failure to maintain power cap: Power Cap = %d ,"
- "PWRSYS = %d ,PWRPROC = %d ,PWRFAN = %d ,"
- "PWRMEM = %d",g_amec->pcap.active_node_pcap,
- AMECSENSOR_PTR(PWRSYS)->sample,
- AMECSENSOR_PTR(PWRPROC)->sample,
- AMECSENSOR_PTR(PWRFAN)->sample,
- AMECSENSOR_PTR(PWRMEM)->sample);
-
- TRAC_ERR("PWRIO = %d , PWRSTORE = %d, PWRGPU = %d",
- AMECSENSOR_PTR(PWRIO)->sample,
- AMECSENSOR_PTR(PWRSTORE)->sample,
- AMECSENSOR_PTR(PWRGPU)->sample);
+ "PWRSYS = %d",g_amec->pcap.active_node_pcap,
+ AMECSENSOR_PTR(PWRSYS)->sample);
+
+ // Trace power per APSS channel to have the best breakdown for debug
+ for (i = 0; i < MAX_APSS_ADC_CHANNELS; i++)
+ {
+ l_apss_func_id = G_apss_ch_to_function[i];
+
+ if((l_apss_func_id != ADC_RESERVED) &&
+ (l_apss_func_id != ADC_12V_SENSE) &&
+ (l_apss_func_id != ADC_GND_REMOTE_SENSE) &&
+ (l_apss_func_id != ADC_12V_STANDBY_CURRENT) )
+ {
+ TRAC_ERR("APSS channel %d Function ID = %d Power = %dW", i, l_apss_func_id,
+ AMECSENSOR_PTR(PWRAPSSCH0 + i)->sample);
+ }
+ }
/* @
* @errortype
@@ -458,11 +474,8 @@ void amec_mst_check_under_pcap(void)
}
else
{
- //Decrement count if node power under power cap value
- if(G_over_cap_count > 0)
- {
- G_over_cap_count--;
- }
+ // Clear counter
+ G_over_cap_count = 0;
}
return;
diff --git a/src/occ_405/amec/amec_pcap.c b/src/occ_405/amec/amec_pcap.c
index 286921d..7584ddf 100755
--- a/src/occ_405/amec/amec_pcap.c
+++ b/src/occ_405/amec/amec_pcap.c
@@ -58,7 +58,6 @@ extern PWR_READING_TYPE G_pwr_reading_type;
//ppb_fmax
#define PDROP_THRESH 0
//Number of MHz to raise the proc_pcap_vote for every watt of available power
-//(DCM value should be less than SCM)
#define PROC_MHZ_PER_WATT 28
//Number of MHz to raise ppb_fmax per watt of available power. Depends on
//number of procs in node.
@@ -75,6 +74,8 @@ uint32_t G_mhz_per_pstate=0;
uint8_t G_over_pcap_count=0;
extern uint16_t G_proc_fmax_mhz; // max(turbo,uturbo) frequencies
+extern uint32_t G_first_proc_gpu_config;
+extern uint32_t G_first_num_gpus_sys;
//*************************************************************************/
// Function Prototypes
@@ -84,6 +85,170 @@ extern uint16_t G_proc_fmax_mhz; // max(turbo,uturbo) frequencies
// Functions
//*************************************************************************/
+//////////////////////////
+// Function Specification
+//
+// Name: amec_gpu_pcap
+//
+// Description: Determine power cap for GPUs
+//
+// Thread: Real Time Loop
+//
+// End Function Specification
+void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power)
+{
+ /*------------------------------------------------------------------------*/
+ /* Local Variables */
+ /*------------------------------------------------------------------------*/
+ uint8_t i = 0;
+ uint32_t l_gpu_cap_mw = 0;
+ static uint16_t L_total_gpu_pcap = 0; // Current total GPU pcap in effect
+ static uint16_t L_n_mode_gpu_total_pcap = 0; // Total GPU pcap required for oversubscription
+ static uint16_t L_active_psr_gpu_total_pcap = 0; // Total GPU pcap for the currently set pcap and PSR
+ static uint16_t L_per_gpu_pcap = 0; // Amount of L_total_gpu_pcap for each GPU
+ static uint8_t L_psr = 100; // PSR value used in L_active_psr_gpu_total_pcap calculation
+ static bool L_first_run = TRUE; // for calculations done only 1 time
+
+ /*------------------------------------------------------------------------*/
+ /* Code */
+ /*------------------------------------------------------------------------*/
+ // If this is the first time running calculate the total GPU power cap for oversubscription
+ if(L_first_run)
+ {
+ if(g_amec->pcap.ovs_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts)
+ {
+ // Take all non-GPU power away from the oversubscription power cap
+ L_n_mode_gpu_total_pcap = g_amec->pcap.ovs_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts;
+ // Add back in the power that will be dropped by processor DVFS and memory throttling and give to GPUs
+ L_n_mode_gpu_total_pcap += G_sysConfigData.total_proc_mem_pwr_drop_watts;
+ }
+ else
+ {
+ // This should not happen, the total non GPU power should never be higher than the N mode cap
+ // Log error and set GPUs to minimum power cap
+ L_n_mode_gpu_total_pcap = 0; // this will set minimum GPU power cap
+
+ TRAC_ERR("amec_gpu_pcap: non GPU max power %dW is more than N mode pwr limit %dW",
+ G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.ovs_node_pcap);
+
+ /* @
+ * @errortype
+ * @moduleid AMEC_GPU_PCAP_MID
+ * @reasoncode GPU_FAILURE
+ * @userdata1 N mode Power Cap watts
+ * @userdata2 Total non-GPU power watts
+ * @userdata4 ERC_GPU_N_MODE_PCAP_CALC_FAILURE
+ * @devdesc Total non-GPU power more than N mode power cap
+ *
+ */
+ errlHndl_t l_err = createErrl(AMEC_GPU_PCAP_MID,
+ GPU_FAILURE,
+ ERC_GPU_N_MODE_PCAP_CALC_FAILURE,
+ ERRL_SEV_PREDICTIVE,
+ NULL,
+ DEFAULT_TRACE_SIZE,
+ g_amec->pcap.ovs_node_pcap,
+ G_sysConfigData.total_non_gpu_max_pwr_watts);
+
+ //Callout firmware
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_COMPONENT_ID,
+ ERRL_COMPONENT_ID_FIRMWARE,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+ commitErrl(&l_err);
+ }
+ } // if first run
+
+ // Calculate the total GPU power cap for the current active limit and PSR
+ // this only needs to be calculated if either the active limit or PSR changed
+ if( (L_first_run) || (i_active_pcap_changed) || (L_psr != G_sysConfigData.psr) )
+ {
+ L_psr = G_sysConfigData.psr;
+ if(g_amec->pcap.active_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts)
+ {
+ // Take all non-GPU power away from the active power cap
+ L_active_psr_gpu_total_pcap = g_amec->pcap.active_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts;
+ // Add back in the power that will be dropped by processor DVFS and memory throttling based on the PSR
+ // to give to GPUs
+ L_active_psr_gpu_total_pcap += ( (L_psr / 100) * G_sysConfigData.total_proc_mem_pwr_drop_watts );
+ }
+ else
+ {
+ // Set GPUs to minimum power cap
+ L_active_psr_gpu_total_pcap = 0;
+ TRAC_IMP("amec_gpu_pcap: non GPU max power %dW is more than active pwr limit %dW",
+ G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.active_node_pcap);
+ }
+
+ // Total GPU power cap is the lower of oversubscription and active power limit
+ // must always account for oversubscription to ensure when a power supply is lost the OCC
+ // can react fast enough, GPU power capping is too slow and must have GPU power cap already
+ // set to account for oversubscription case
+ L_total_gpu_pcap = (L_n_mode_gpu_total_pcap < L_active_psr_gpu_total_pcap) ?
+ L_n_mode_gpu_total_pcap : L_active_psr_gpu_total_pcap;
+
+ // Divide the total equally across all GPUs in the system
+ if(G_first_num_gpus_sys)
+ {
+ L_per_gpu_pcap = L_total_gpu_pcap / G_first_num_gpus_sys;
+ }
+ else
+ {
+ L_per_gpu_pcap = 0;
+ TRAC_ERR("amec_gpu_pcap: Called with no GPUs present!");
+ }
+ }
+
+ // Setup to send new power limit to GPUs. The actual sending of GPU power limit will be handled by task_gpu_sm()
+ for (i=0; i<MAX_NUM_GPU_PER_DOMAIN; i++)
+ {
+ // Before sending a GPU a power limit the power limits must be read from the GPU to know min/max GPU allows
+ if( GPU_PRESENT(i) && g_amec->gpu[i].pcap.pwr_limits_read )
+ {
+ l_gpu_cap_mw = L_per_gpu_pcap * 1000; // convert W to mW
+
+ // GPU is present and have min/max power limits from GPU
+ // clip the GPU power limit to min/max GPU limit if needed
+ if(l_gpu_cap_mw < g_amec->gpu[i].pcap.gpu_min_pcap_mw) // clip to min?
+ {
+ l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_min_pcap_mw;
+ }
+ else if(l_gpu_cap_mw > g_amec->gpu[i].pcap.gpu_max_pcap_mw) // clip to max?
+ {
+ l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_max_pcap_mw;
+ }
+
+ // If not already at the min then set to min if trying to reduce power and proc/memory are at min
+ if( (i_avail_power < 0) && (g_amec->proc[0].pwr_votes.ppb_fmax == g_amec->sys.fmin) &&
+ (g_amec->pcap.active_mem_level) && (l_gpu_cap_mw != g_amec->gpu[i].pcap.gpu_min_pcap_mw) )
+ {
+ l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_min_pcap_mw;
+ if(g_amec->gpu[i].pcap.gpu_desired_pcap_mw != l_gpu_cap_mw)
+ {
+ TRAC_ERR("amec_gpu_pcap: Forcing GPU%d to minimum pwr limit %dmW", i, l_gpu_cap_mw);
+ g_amec->gpu[i].pcap.gpu_min_cap_required = TRUE;
+ }
+ }
+
+ // check if this is a new power limit
+ if(g_amec->gpu[i].pcap.gpu_desired_pcap_mw != l_gpu_cap_mw)
+ {
+ TRAC_IMP("amec_gpu_pcap: Updating GPU%d desired pcap %dmW to %dmW", i,
+ g_amec->gpu[i].pcap.gpu_desired_pcap_mw, l_gpu_cap_mw);
+ g_amec->gpu[i].pcap.gpu_desired_pcap_mw = l_gpu_cap_mw;
+
+ if( (g_amec->gpu[i].pcap.gpu_min_cap_required) && (l_gpu_cap_mw != g_amec->gpu[i].pcap.gpu_min_pcap_mw) )
+ {
+ TRAC_ERR("amec_gpu_pcap: GPU%d no longer requires minimum pwr limit %dmW", i, g_amec->gpu[i].pcap.gpu_min_pcap_mw);
+ g_amec->gpu[i].pcap.gpu_min_cap_required = FALSE;
+ }
+ }
+ }
+ } // for each GPU
+
+ L_first_run = FALSE;
+}
+
//////////////////////////
// Function Specification
@@ -101,6 +266,7 @@ void amec_pcap_calc(void)
/* Local Variables */
/*------------------------------------------------------------------------*/
bool l_oversub_state = 0;
+ bool l_active_pcap_changed = FALSE;
uint16_t l_node_pwr = AMECSENSOR_PTR(PWRSYS)->sample;
uint16_t l_p0_pwr = AMECSENSOR_PTR(PWRPROC)->sample;
int32_t l_avail_power = 0;
@@ -138,9 +304,17 @@ void amec_pcap_calc(void)
// set this pcap as valid (needed by master for comparison)
g_amec->pcap_valid = 1;
+ l_active_pcap_changed = TRUE;
}
l_avail_power = g_amec->pcap.active_node_pcap - l_node_pwr;
+
+ // Determine GPU power cap if there are GPUs present
+ if(G_first_proc_gpu_config)
+ {
+ amec_gpu_pcap(l_active_pcap_changed, l_avail_power);
+ }
+
if(l_node_pwr != 0)
{
l_proc_fraction = ((uint32_t)(l_p0_pwr) << 16)/l_node_pwr;
diff --git a/src/occ_405/amec/amec_service_codes.h b/src/occ_405/amec/amec_service_codes.h
index 47d6c09..f206daf 100755
--- a/src/occ_405/amec/amec_service_codes.h
+++ b/src/occ_405/amec/amec_service_codes.h
@@ -66,6 +66,7 @@ enum occAmecModuleId
AMEC_CALC_DTS_SENSORS = AMEC_COMP_ID | 0x16,
AMEC_SET_FREQ_RANGE = AMEC_COMP_ID | 0x17,
AMEC_UPDATE_APSS_GPIO = AMEC_COMP_ID | 0x18,
+ AMEC_GPU_PCAP_MID = AMEC_COMP_ID | 0x19,
};
/*----------------------------------------------------------------------------*/
diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h
index a45fb42..c084a0c 100755
--- a/src/occ_405/amec/amec_sys.h
+++ b/src/occ_405/amec/amec_sys.h
@@ -449,6 +449,7 @@ typedef struct {
typedef struct {
bool check_pwr_limit; // Indicates if need to read power limits from GPU
bool pwr_limits_read; // Indicates if power limits were read i.e. have min/max
+ bool gpu_min_cap_required; // Indicates if power limits were read i.e. have min/max
uint32_t gpu_min_pcap_mw; // Min GPU power limit in mW read from the GPU
uint32_t gpu_max_pcap_mw; // Max GPU power limit in mW read from the GPU
uint32_t gpu_desired_pcap_mw; // AMEC determined pcap in mW to set
OpenPOWER on IntegriCloud