summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormbroyles <mbroyles@us.ibm.com>2017-09-25 14:26:08 -0500
committerMartha Broyles <mbroyles@us.ibm.com>2017-10-18 12:33:08 -0400
commitbcc1e17b10e570791acb64649bbb7bd5e4d9348d (patch)
treec8967b8ceb72ce92e015301a0e09bd98b599d7cf
parentfa085f9b68802ec92a250eccca6ddb4152e7b61f (diff)
downloadtalos-occ-bcc1e17b10e570791acb64649bbb7bd5e4d9348d.tar.gz
talos-occ-bcc1e17b10e570791acb64649bbb7bd5e4d9348d.zip
Support for GPU config format version 2
Change-Id: I14108b7a5ea7ce4e3649ab164a6e6c905274c635 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46765 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com> Reviewed-by: William A. Bryan <wilbryan@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c181
-rwxr-xr-xsrc/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h32
-rwxr-xr-xsrc/occ_405/occ_sys_config.c7
-rwxr-xr-xsrc/occ_405/occ_sys_config.h11
4 files changed, 208 insertions, 23 deletions
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
index 5982e7f..52cb637 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c
@@ -75,6 +75,13 @@ extern uint8_t G_occ_interrupt_type;
extern uint16_t G_proc_fmax_mhz; // Maximum frequency (uturbo if WOF enabled, otherwise turbo)
extern OCCPstateParmBlock G_oppb; // OCC Pstate Parameters Block Structure
+extern uint32_t G_first_proc_gpu_config;
+extern uint32_t G_first_num_gpus_sys;
+extern uint32_t G_curr_num_gpus_sys;
+extern uint32_t G_curr_proc_gpu_config;
+extern bool G_gpu_config_done;
+extern bool G_gpu_monitoring_allowed;
+extern task_t G_task_table[TASK_END];
typedef struct data_req_table
{
@@ -1262,36 +1269,166 @@ errlHndl_t data_store_gpu(const cmdh_fsp_cmd_t * i_cmd_ptr,
cmdh_fsp_rsp_t * o_rsp_ptr)
{
errlHndl_t l_err = NULL;
- const uint8_t GPU_VERSION = 0x01;
- const uint16_t GPU_LENGTH = sizeof(cmdh_gpu_config_t) - sizeof(cmdh_fsp_cmd_header_t);
+ uint8_t i = 0;
+ uint8_t l_gpu_num = 0;
+ cmdh_gpu_config_v2_t *l_cmd_ptr = (cmdh_gpu_config_v2_t *)i_cmd_ptr;
+ uint16_t l_data_length = CMDH_DATALEN_FIELD_UINT16((&l_cmd_ptr->header));
+ uint16_t l_gpu_data_length = 0;
+ uint8_t l_present_bit_mask = 0; // Bit mask for present GPUs behind this OCC
+
+ // parse data based on version. Version byte is located at same offset for all versions
+ if(l_cmd_ptr->header.version == 1)
+ {
+ cmdh_gpu_config_t *l_cmd_ptr_v1 = (cmdh_gpu_config_t *)i_cmd_ptr;
+ l_data_length = CMDH_DATALEN_FIELD_UINT16(l_cmd_ptr_v1);
+ l_gpu_data_length = sizeof(cmdh_gpu_config_t) - sizeof(cmdh_fsp_cmd_header_t);
+ if(l_gpu_data_length == l_data_length)
+ {
+ G_sysConfigData.total_non_gpu_max_pwr_watts = l_cmd_ptr_v1->total_non_gpu_max_pwr_watts;
+ G_sysConfigData.total_proc_mem_pwr_drop_watts = l_cmd_ptr_v1->total_proc_mem_pwr_drop_watts;
- cmdh_gpu_config_t *l_cmd_ptr = (cmdh_gpu_config_t *)i_cmd_ptr;
- uint16_t l_data_length = CMDH_DATALEN_FIELD_UINT16(l_cmd_ptr);
+ AMECSENSOR_PTR(TEMPGPU0)->ipmi_sid = l_cmd_ptr_v1->gpu0_temp_sid;
+ AMECSENSOR_PTR(TEMPGPU0MEM)->ipmi_sid = l_cmd_ptr_v1->gpu0_mem_temp_sid;
+ G_sysConfigData.gpu_sensor_ids[0] = l_cmd_ptr_v1->gpu0_sid;
- if ((GPU_VERSION == l_cmd_ptr->version) && (GPU_LENGTH == l_data_length))
+ AMECSENSOR_PTR(TEMPGPU1)->ipmi_sid = l_cmd_ptr_v1->gpu1_temp_sid;
+ AMECSENSOR_PTR(TEMPGPU1MEM)->ipmi_sid = l_cmd_ptr_v1->gpu1_mem_temp_sid;
+ G_sysConfigData.gpu_sensor_ids[1] = l_cmd_ptr_v1->gpu1_sid;
+
+ AMECSENSOR_PTR(TEMPGPU2)->ipmi_sid = l_cmd_ptr_v1->gpu2_temp_sid;
+ AMECSENSOR_PTR(TEMPGPU2MEM)->ipmi_sid = l_cmd_ptr_v1->gpu2_mem_temp_sid;
+ G_sysConfigData.gpu_sensor_ids[2] = l_cmd_ptr_v1->gpu2_sid;
+
+ G_data_cnfg->data_mask |= DATA_MASK_GPU;
+ CMDH_TRAC_IMP("data_store_gpu: Got valid GPU data packet");
+ }
+ else
+ {
+ CMDH_TRAC_ERR("data_store_gpu: GPU version 1 invalid length Expected: 0x%04X Received: 0x%04X",
+ l_gpu_data_length, l_data_length);
+ cmdh_build_errl_rsp(i_cmd_ptr, o_rsp_ptr, ERRL_RC_INVALID_CMD_LEN, &l_err);
+ }
+ } // if version 1
+ else if(l_cmd_ptr->header.version == 2)
{
- G_sysConfigData.total_non_gpu_max_pwr_watts = l_cmd_ptr->total_non_gpu_max_pwr_watts;
- G_sysConfigData.total_proc_mem_pwr_drop_watts = l_cmd_ptr->total_proc_mem_pwr_drop_watts;
+ l_gpu_data_length = sizeof(cmdh_gpu_cfg_header_v2_t) - sizeof(cmdh_fsp_cmd_header_t);
+ l_gpu_data_length += (l_cmd_ptr->header.num_data_sets * sizeof(cmdh_gpu_set_v2_t));
+
+ if(l_gpu_data_length == l_data_length)
+ {
+ if( (l_cmd_ptr->header.gpu_i2c_engine == PIB_I2C_ENGINE_C) &&
+ ((l_cmd_ptr->header.gpu_i2c_bus_voltage == 0) || (l_cmd_ptr->header.gpu_i2c_bus_voltage == 18)) )
+ {
+ G_sysConfigData.gpu_i2c_engine = l_cmd_ptr->header.gpu_i2c_engine;
+ G_sysConfigData.gpu_i2c_bus_voltage = l_cmd_ptr->header.gpu_i2c_bus_voltage;
+ CMDH_TRAC_IMP("data_store_gpu: I2C engine = 0x%02X I2C bus voltage = %d deci volts",
+ G_sysConfigData.gpu_i2c_engine, G_sysConfigData.gpu_i2c_bus_voltage);
- AMECSENSOR_PTR(TEMPGPU0)->ipmi_sid = l_cmd_ptr->gpu0_temp_sid;
- AMECSENSOR_PTR(TEMPGPU0MEM)->ipmi_sid = l_cmd_ptr->gpu0_mem_temp_sid;
- G_sysConfigData.gpu_sensor_ids[0] = l_cmd_ptr->gpu0_sid;
+ G_sysConfigData.total_non_gpu_max_pwr_watts = l_cmd_ptr->header.total_non_gpu_max_pwr_watts;
+ G_sysConfigData.total_proc_mem_pwr_drop_watts = l_cmd_ptr->header.total_proc_mem_pwr_drop_watts;
- AMECSENSOR_PTR(TEMPGPU1)->ipmi_sid = l_cmd_ptr->gpu1_temp_sid;
- AMECSENSOR_PTR(TEMPGPU1MEM)->ipmi_sid = l_cmd_ptr->gpu1_mem_temp_sid;
- G_sysConfigData.gpu_sensor_ids[1] = l_cmd_ptr->gpu1_sid;
+ // Store the individual GPU data
+ for(i=0; i<l_cmd_ptr->header.num_data_sets; i++)
+ {
+ // Get the GPU number data is for
+ l_gpu_num = l_cmd_ptr->gpu_data[i].gpu_num;
- AMECSENSOR_PTR(TEMPGPU2)->ipmi_sid = l_cmd_ptr->gpu2_temp_sid;
- AMECSENSOR_PTR(TEMPGPU2MEM)->ipmi_sid = l_cmd_ptr->gpu2_mem_temp_sid;
- G_sysConfigData.gpu_sensor_ids[2] = l_cmd_ptr->gpu2_sid;
+ if( (l_gpu_num >= 0) && (l_gpu_num < MAX_NUM_GPU_PER_DOMAIN) )
+ {
+ G_sysConfigData.gpu_i2c_info[l_gpu_num].port = l_cmd_ptr->gpu_data[i].i2c_port;
+ G_sysConfigData.gpu_i2c_info[l_gpu_num].address = l_cmd_ptr->gpu_data[i].i2c_addr;
- G_data_cnfg->data_mask |= DATA_MASK_GPU;
- CMDH_TRAC_IMP("data_store_gpu: Got valid GPU data packet");
- }
+ // if port or i2c address is 0xFF the GPU will not be monitored
+ if( (G_sysConfigData.gpu_i2c_info[l_gpu_num].port != 0xFF) &&
+ (G_sysConfigData.gpu_i2c_info[l_gpu_num].address != 0xFF) )
+ {
+ CMDH_TRAC_IMP("data_store_gpu: GPU%d I2C port = 0x%02X address = 0x%02X", l_gpu_num,
+ G_sysConfigData.gpu_i2c_info[l_gpu_num].port,
+ G_sysConfigData.gpu_i2c_info[l_gpu_num].address);
+
+ AMECSENSOR_PTR(TEMPGPU0 + l_gpu_num)->ipmi_sid = l_cmd_ptr->gpu_data[i].gpu_temp_sid;
+ AMECSENSOR_PTR(TEMPGPU0MEM + l_gpu_num)->ipmi_sid = l_cmd_ptr->gpu_data[i].gpu_mem_temp_sid;
+ G_sysConfigData.gpu_sensor_ids[l_gpu_num] = l_cmd_ptr->gpu_data[i].gpu_sid;
+
+ // If there is no APSS this data is giving GPU presence, mark this GPU as present
+ if(G_pwr_reading_type != PWR_READING_TYPE_APSS)
+ {
+ l_present_bit_mask |= (0x01 << l_gpu_num);
+ }
+ }
+ else
+ {
+ CMDH_TRAC_ERR("data_store_gpu: GPU%d NOT monitored Invalid I2C port = 0x%02X I2C address = 0x%02X",
+ l_gpu_num, G_sysConfigData.gpu_i2c_info[l_gpu_num].port,
+ G_sysConfigData.gpu_i2c_info[l_gpu_num].address);
+ }
+ }
+ else
+ {
+ // We got an invalid GPU number
+ CMDH_TRAC_ERR("data_store_gpu: Received invalid GPU number %d", l_gpu_num);
+ cmdh_build_errl_rsp(i_cmd_ptr, o_rsp_ptr, ERRL_RC_INVALID_DATA, &l_err);
+ break;
+ }
+ } // for each GPU data set
+
+ // if there is no APSS for GPU presence then this data is the GPU presence
+ if(G_pwr_reading_type != PWR_READING_TYPE_APSS)
+ {
+ if(l_err == NULL)
+ {
+ G_first_num_gpus_sys = l_cmd_ptr->header.total_num_gpus_system;
+ G_curr_num_gpus_sys = G_first_num_gpus_sys;
+ G_first_proc_gpu_config = l_present_bit_mask;
+ G_curr_proc_gpu_config = G_first_proc_gpu_config;
+ G_gpu_config_done = TRUE;
+
+ if(G_first_proc_gpu_config)
+ {
+ // GPUs are present enable monitoring
+ G_gpu_monitoring_allowed = TRUE;
+ G_task_table[TASK_ID_GPU_SM].flags = GPU_RTL_FLAGS;
+ }
+
+ CMDH_TRAC_IMP("data_store_gpu: This OCC GPUs present mask = 0x%02X Total number GPUs present in system = %d",
+ G_first_proc_gpu_config, G_first_num_gpus_sys);
+
+ G_data_cnfg->data_mask |= DATA_MASK_GPU;
+ CMDH_TRAC_IMP("data_store_gpu: Got valid GPU data packet");
+ }
+ else
+ {
+ G_first_num_gpus_sys = 0;
+ G_curr_num_gpus_sys = 0;
+ G_first_proc_gpu_config = 0;
+ G_curr_proc_gpu_config = 0;
+ G_gpu_config_done = FALSE;
+ }
+ }
+ else if(l_err == NULL)
+ {
+ G_data_cnfg->data_mask |= DATA_MASK_GPU;
+ CMDH_TRAC_IMP("data_store_gpu: Got valid GPU data packet");
+ }
+ } // valid i2c engine and voltage
+ else
+ {
+ // We got an invalid I2C Engine and/or voltage
+ CMDH_TRAC_ERR("data_store_gpu: Received invalid I2C Engine/Voltage 0x%02X / %d",
+ l_cmd_ptr->header.gpu_i2c_engine, l_cmd_ptr->header.gpu_i2c_bus_voltage);
+ cmdh_build_errl_rsp(i_cmd_ptr, o_rsp_ptr, ERRL_RC_INVALID_DATA, &l_err);
+ }
+ } // if length valid
+ else
+ {
+ CMDH_TRAC_ERR("data_store_gpu: GPU version 2 invalid length Expected: 0x%04X Received: 0x%04X",
+ l_gpu_data_length, l_data_length);
+ cmdh_build_errl_rsp(i_cmd_ptr, o_rsp_ptr, ERRL_RC_INVALID_CMD_LEN, &l_err);
+ }
+ } //else if version 2
else
{
- CMDH_TRAC_ERR("data_store_gpu: Invalid GPU version/length (0x%02X/0x%04X))",
- l_cmd_ptr->version, l_data_length);
+ CMDH_TRAC_ERR("data_store_gpu: Invalid GPU version 0x%02X", l_cmd_ptr->header.version);
cmdh_build_errl_rsp(i_cmd_ptr, o_rsp_ptr, ERRL_RC_INVALID_DATA, &l_err);
}
@@ -1735,7 +1872,7 @@ errlHndl_t data_store_thrm_thresholds(const cmdh_fsp_cmd_t * i_cmd_ptr,
G_data_cnfg->thrm_thresh.data[l_frutype].max_read_timeout =
l_cmd_ptr->data[i].max_read_timeout;
- // Set a local flag if we get data for VRM FRU type
+ // Set a local flag if we get data for VRM OT status FRU type
if(l_frutype == DATA_FRU_VRM_OT_STATUS)
{
l_vrm_frutype = TRUE;
diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h
index dbeb768..5dac7fa 100755
--- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h
+++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h
@@ -36,6 +36,7 @@
#include "cmdh_fsp.h"
#include "cmdh_fsp_cmds.h"
#include "apss.h"
+#include "occ_sys_config.h"
// Enum of the various CnfgData command formats that
// are sent to OCC over the TMGT<->OCC interface.
@@ -165,6 +166,37 @@ typedef struct __attribute__ ((packed))
}cmdh_avsbus_config_t;
// Used by TMGT to send OCC GPU data.
+// Header data for GPU version 2 cfg packet
+typedef struct __attribute__ ((packed))
+{
+ struct cmdh_fsp_cmd_header;
+ uint8_t format;
+ uint8_t version;
+ uint16_t total_non_gpu_max_pwr_watts;
+ uint16_t total_proc_mem_pwr_drop_watts;
+ uint8_t total_num_gpus_system;
+ uint8_t gpu_i2c_engine;
+ uint8_t gpu_i2c_bus_voltage;
+ uint8_t num_data_sets;
+}cmdh_gpu_cfg_header_v2_t;
+
+typedef struct __attribute__ ((packed))
+{
+ uint8_t gpu_num; // number 0...2 for GPU data is for
+ uint8_t i2c_port; // I2C port for this GPU
+ uint8_t i2c_addr; // I2C slave address for this GPU
+ uint8_t reserved;
+ uint32_t gpu_temp_sid; // GPU Core Temperature Sensor ID
+ uint32_t gpu_mem_temp_sid; // GPU Memory Temperature Sensor ID
+ uint32_t gpu_sid; // GPU Sensor ID for callout
+}cmdh_gpu_set_v2_t;
+
+typedef struct __attribute__ ((packed))
+{
+ cmdh_gpu_cfg_header_v2_t header;
+ cmdh_gpu_set_v2_t gpu_data[MAX_NUM_GPU_PER_DOMAIN];
+}cmdh_gpu_config_v2_t;
+
typedef struct __attribute__ ((packed))
{
struct cmdh_fsp_cmd_header;
diff --git a/src/occ_405/occ_sys_config.c b/src/occ_405/occ_sys_config.c
index 20d5022..93f0805 100755
--- a/src/occ_405/occ_sys_config.c
+++ b/src/occ_405/occ_sys_config.c
@@ -230,6 +230,13 @@ occSysConfigData_t G_sysConfigData =
.total_non_gpu_max_pwr_watts = 0,
.total_proc_mem_pwr_drop_watts = 0,
.psr = 100, // default to 100% take all possible power away from CPU/memory first
+ .gpu_i2c_engine = PIB_I2C_ENGINE_C, // default to engine C
+ .gpu_i2c_bus_voltage = 18, // default to 1.8V
+ .gpu_i2c_info = {
+ [0] {.port = 4, .address = 0x98},
+ [1] {.port = 4, .address = 0x9A},
+ [2] {.port = 4, .address = 0x9C},
+ },
};
diff --git a/src/occ_405/occ_sys_config.h b/src/occ_405/occ_sys_config.h
index e398a6c..f393256 100755
--- a/src/occ_405/occ_sys_config.h
+++ b/src/occ_405/occ_sys_config.h
@@ -295,6 +295,12 @@ typedef struct
uint16_t reserved3; //reserved
} mem_throt_config_data_t;
+// Per GPU I2C Info
+typedef struct
+{
+ uint8_t port;
+ uint8_t address;
+} gpuI2CInfo_t;
// Sys Config Structure
@@ -434,7 +440,10 @@ typedef struct
uint32_t gpu_sensor_ids[MAX_NUM_GPU_PER_DOMAIN];
uint16_t total_non_gpu_max_pwr_watts;
uint16_t total_proc_mem_pwr_drop_watts;
- uint8_t psr; // power shifting ratio for power capping between GPU/Proc&mem
+ uint8_t psr; // power shifting ratio for power capping between GPU/Proc&mem
+ uint8_t gpu_i2c_engine; // PIB I2CM engine for all GPUs
+ uint8_t gpu_i2c_bus_voltage; // GPU I2C bus voltage (1 = 0.1V)
+ gpuI2CInfo_t gpu_i2c_info[MAX_NUM_GPU_PER_DOMAIN]; // per GPU I2C info (port/address)
} occSysConfigData_t; __attribute__ ((__aligned__ (128)))
OpenPOWER on IntegriCloud