summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorWilliam Bryan <wilbryan@us.ibm.com>2017-04-17 14:04:13 -0500
committerWilliam A. Bryan <wilbryan@us.ibm.com>2017-05-31 12:57:18 -0400
commitc48de6ff1191fa3980b4c873758b9d20a425320e (patch)
tree38682c508b170ff2ec2a75e4c6a1142e137a42cd /src
parent25a3c4917a09bc60ee0dd2820f72d23fa1ccf018 (diff)
downloadtalos-occ-c48de6ff1191fa3980b4c873758b9d20a425320e.tar.gz
talos-occ-c48de6ff1191fa3980b4c873758b9d20a425320e.zip
APSS GPIO Controls
-- Adds the use of GPIO_VR_HOT_MEM_PROC_0/1 in manufacturing mode. -- Adds the ability to determine which GPUs are available in the system. Change-Id: Ib86bca7b8ac279b044025a67002dc9e60ecd7c07 RTC:172166 RTC:155565 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/39651 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com> Reviewed-by: Andres A. Lugo-Reyes <aalugore@us.ibm.com> Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
Diffstat (limited to 'src')
-rwxr-xr-xsrc/occ_405/amec/amec_master_smh.c35
-rwxr-xr-xsrc/occ_405/amec/amec_sensors_power.c180
-rwxr-xr-xsrc/occ_405/amec/amec_sensors_power.h12
-rwxr-xr-xsrc/occ_405/amec/amec_service_codes.h3
-rwxr-xr-xsrc/occ_405/amec/amec_slave_smh.c1
-rwxr-xr-xsrc/occ_405/amec/amec_sys.h1
-rw-r--r--src/occ_405/occ_service_codes.h2
-rwxr-xr-xsrc/occ_405/proc/proc_data.c12
-rwxr-xr-xsrc/occ_405/pss/apss.c5
-rwxr-xr-xsrc/occ_405/sensor/sensor_enum.h3
-rwxr-xr-xsrc/occ_405/sensor/sensor_info.c2
-rwxr-xr-xsrc/occ_405/sensor/sensor_table.c3
12 files changed, 230 insertions, 29 deletions
diff --git a/src/occ_405/amec/amec_master_smh.c b/src/occ_405/amec/amec_master_smh.c
index 0b024ec..244bde5 100755
--- a/src/occ_405/amec/amec_master_smh.c
+++ b/src/occ_405/amec/amec_master_smh.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2016 */
+/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -23,9 +23,9 @@
/* */
/* IBM_PROLOG_END_TAG */
-//*************************************************************************
+//*************************************************************************/
// Includes
-//*************************************************************************
+//*************************************************************************/
#include <occ_common.h>
#include <ssx.h>
#include <errl.h> // Error logging
@@ -38,18 +38,19 @@
#include "amec_sys.h"
#include "amec_service_codes.h" //For AMEC_MST_CHECK_PCAPS_MATCH
#include "dcom.h"
+#include <amec_sensors_power.h>
-//*************************************************************************
+//*************************************************************************/
// Externs
-//*************************************************************************
+//*************************************************************************/
-//*************************************************************************
+//*************************************************************************/
// Macros
-//*************************************************************************
+//*************************************************************************/
-//*************************************************************************
+//*************************************************************************/
// Defines/Enums
-//*************************************************************************
+//*************************************************************************/
//Power cap mismatch threshold set to 8 ticks (2 milliseconds)
#define PCAPS_MISMATCH_THRESHOLD 8
@@ -57,13 +58,13 @@
//Power cap failure threshold set to 32 (ticks)
#define PCAP_FAILURE_THRESHOLD 32
-//*************************************************************************
+//*************************************************************************/
// Structures
-//*************************************************************************
+//*************************************************************************/
-//*************************************************************************
+//*************************************************************************/
// Globals
-//*************************************************************************
+//*************************************************************************/
smh_state_t G_amec_mst_state = {AMEC_INITIAL_STATE,
AMEC_INITIAL_STATE,
AMEC_INITIAL_STATE};
@@ -189,13 +190,13 @@ const smh_tbl_t amec_mst_state_table[AMEC_SMH_STATES_PER_LVL] =
// fw timings when the AMEC master State Machine finishes.
smh_state_timing_t G_amec_mst_state_timings = {amec_mst_update_smh_sensors};
-//*************************************************************************
+//*************************************************************************/
// Function Prototypes
-//*************************************************************************
+//*************************************************************************/
-//*************************************************************************
+//*************************************************************************/
// Functions
-//*************************************************************************
+//*************************************************************************/
// Function Specification
//
diff --git a/src/occ_405/amec/amec_sensors_power.c b/src/occ_405/amec/amec_sensors_power.c
index b8704ae..ab6f1ee 100755
--- a/src/occ_405/amec/amec_sensors_power.c
+++ b/src/occ_405/amec/amec_sensors_power.c
@@ -54,6 +54,17 @@
// This holds the converted ADC Reads
uint32_t G_lastValidAdcValue[MAX_APSS_ADC_CHANNELS] = {0};
+// Indicates if we have determined GPU presence
+bool G_gpu_config_done = FALSE;
+
+// Bitmap of GPUs present
+uint32_t G_first_proc_gpu_config = 0;
+uint32_t G_first_sys_gpu_config = 0;
+uint32_t G_first_num_gpus_sys = 0;
+uint32_t G_curr_proc_gpu_config = 0;
+uint32_t G_curr_sys_gpu_config = 0;
+uint32_t G_curr_num_gpus_sys = 0;
+
// There are only MAX_APSS_ADC_CHANNELS channels. Therefore if the channel value
// is greater then the MAX, then there was no channel associated with the function id.
#define ADC_CONVERTED_VALUE(i_chan) \
@@ -63,9 +74,9 @@ extern uint8_t G_occ_interrupt_type;
extern bool G_vrm_thermal_monitoring;
extern bool G_apss_present;
-//*************************************************************************
+//*************************************************************************/
// Code
-//*************************************************************************
+//*************************************************************************/
// Function Specification
//
@@ -231,6 +242,8 @@ void amec_update_apss_sensors(void)
sensor_update(AMECSENSOR_PTR(PWRAPSSCH0 + l_idx), (uint16_t) temp32);
}
}
+
+ amec_update_apss_gpio();
}
// ----------------------------------------------------------
@@ -396,6 +409,9 @@ void amec_update_apss_sensors(void)
//Count of number of updates.
g_pwr250us_over30sec.count++;
+ // Check the GPU presence signals
+ amec_update_gpu_configuration();
+
// ----------------------------------------------------
// Clear Flag to indicate that AMEC has received the data.
// ----------------------------------------------------
@@ -691,7 +707,167 @@ void amec_update_avsbus_sensors(void)
} // end amec_update_avsbus_sensors()
+// Function Specification
+//
+// Name: amec_update_apss_gpio
+//
+// Description: Updates sensors based on the GPIO data from the APSS
+//
+// Thread: RealTime Loop
+//
+// End Function Specification
+void amec_update_apss_gpio(void)
+{
+ // GPIO port numbers from system model
+ uint8_t * l_vrhot_port_nums = G_sysConfigData.apss_gpio_map.vr_fan;
+
+ // Actual values of the GPIO
+ uint8_t l_vrhot0 = 1, l_vrhot1 = 1;
+
+ // Data is valid?
+ uint8_t l_valid0 = FALSE, l_valid1 = FALSE;
+
+ // Get value from most recent APSS data
+ l_valid0 = apss_gpio_get(l_vrhot_port_nums[0], &l_vrhot0); //GPIO_VR_HOT_MEM_PROC_0
+ l_valid1 = apss_gpio_get(l_vrhot_port_nums[1], &l_vrhot1); //GPIO_VR_HOT_MEM_PROC_1
+ // Only log once
+ static uint8_t L_err_logged;
+
+ // These signals are active low
+ if( (l_valid0 && !l_vrhot0) || (l_valid1 && !l_vrhot1) )
+ {
+ // Update the sensor indicating that one of the vrhot signals was asserted
+ sensor_update(AMECSENSOR_PTR(VRHOTMEMPRCCNT), 1);
+
+ // Only log once
+ if(!L_err_logged)
+ {
+ INTR_TRAC_ERR("GPIO_VR_HOT_MEM_PROC_0[%d, valid=%d] GPIO_VR_HOT_MEM_PROC_1[%d, valid=%d]",
+ l_vrhot0, l_valid0, l_vrhot1, l_valid1);
+ /*
+ * @errortype
+ * @moduleid AMEC_UPDATE_APSS_GPIO
+ * @reasoncode VR_HOT_MEM_PROC_ASSERTED
+ * @userdata1 0
+ * @userdata2 0
+ * @userdata4 OCC_NO_EXTENDED_RC
+ * @devdesc GPIO_VR_HOT_MEM_PROC_0/1 was asserted
+ */
+ errlHndl_t l_err = createErrl(AMEC_UPDATE_APSS_GPIO,
+ VR_HOT_MEM_PROC_ASSERTED,
+ OCC_NO_EXTENDED_RC,
+ ERRL_SEV_INFORMATIONAL,
+ NULL,
+ DEFAULT_TRACE_SIZE,
+ 0,
+ 0);
+
+ // Manufacturing error only
+ setErrlActions(l_err, ERRL_ACTIONS_MANUFACTURING_ERROR);
+
+ // Processor callout
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.proc_huid,
+ ERRL_CALLOUT_PRIORITY_HIGH);
+
+ // APSS callout
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.apss_huid,
+ ERRL_CALLOUT_PRIORITY_LOW);
+
+ commitErrl(&l_err);
+
+ L_err_logged = TRUE;
+ }
+ }
+ else if ( (l_valid0 && l_vrhot0) && (l_valid1 && l_vrhot1) )
+ {
+ sensor_update(AMECSENSOR_PTR(VRHOTMEMPRCCNT), 0);
+ }
+}
+
+// Function Specification
+//
+// Name: amec_update_gpu_configuration
+//
+// Description: Checks the APSS data to see which GPUs are present
+//
+// Thread: RealTime Loop
+//
+// End Function Specification
+void amec_update_gpu_configuration(void)
+{
+ // GPIO port numbers from system model
+ uint8_t * l_gpu_port_nums = G_sysConfigData.apss_gpio_map.gpu;
+
+ // Actual values of the GPIO
+ uint8_t l_gpu_pres = 1;
+
+ // Data is valid?
+ bool l_valid = FALSE;
+ bool l_all_valid = FALSE;
+
+ uint8_t i = 0;
+ uint8_t l_start_proc = (G_pbax_id.chip_id * GPU_PRES_SIGN_PER_OCC);
+
+ uint8_t l_valid_bitmask_proc = 0; // Bitmask for present GPUs behind just this proc
+ uint8_t l_valid_bitmask_sys = 0; // Bitmask for present GPUs behind both procs
+ uint8_t l_num_gpus_sys = 0; // Number of GPUs both procs
+
+ // Check which GPUs are present
+ for( i=0; i < MAX_GPU_PRES_SIGNALS; i++ )
+ {
+ l_valid = apss_gpio_get(l_gpu_port_nums[i], &l_gpu_pres);
+
+ // Presence signal is active low
+ l_gpu_pres = (l_gpu_pres ? 0 : 1);
+ if(l_valid)
+ {
+ l_all_valid = TRUE;
+
+ // Keep track of number and configuration of GPUs behind both procs
+ l_num_gpus_sys += l_gpu_pres;
+ l_valid_bitmask_sys |= (l_gpu_pres << i);
+
+ // Also want to keep a separate tally of GPUs behind only this proc
+ if( (i >= l_start_proc) && (i < (l_start_proc + GPU_PRES_SIGN_PER_OCC)) )
+ {
+ l_valid_bitmask_proc |= (l_gpu_pres << (i - l_start_proc));
+ }
+ }
+ else
+ {
+ l_all_valid = FALSE;
+ break;
+ }
+ }
+
+ // If all GPU signals are valid, update the global if this is the first read.
+ // If this is not the first read, make sure that the signals match the first.
+ if(l_all_valid)
+ {
+ G_curr_proc_gpu_config = l_valid_bitmask_proc;
+ G_curr_sys_gpu_config = l_valid_bitmask_sys;
+ G_curr_num_gpus_sys = l_num_gpus_sys;
+ if(!G_gpu_config_done)
+ {
+ G_gpu_config_done = TRUE;
+ G_first_proc_gpu_config = l_valid_bitmask_proc;
+ G_first_sys_gpu_config = l_valid_bitmask_sys;
+ G_first_num_gpus_sys = l_num_gpus_sys;
+ TRAC_IMP("GPU presence detection completed. GPU configuration for this OCC: 0x%08X, total[%d]",
+ G_curr_proc_gpu_config, G_curr_num_gpus_sys);
+ }
+ else if (G_curr_sys_gpu_config != G_first_sys_gpu_config)
+ {
+ TRAC_ERR("GPU presence has changed unexpectedly! Old:0x%02X, New:0x%02X",
+ G_first_sys_gpu_config, l_valid_bitmask_sys);
+ }
+ }
+}
/*----------------------------------------------------------------------------*/
/* End */
/*----------------------------------------------------------------------------*/
diff --git a/src/occ_405/amec/amec_sensors_power.h b/src/occ_405/amec/amec_sensors_power.h
index 9fef1de..2cb55fb 100755
--- a/src/occ_405/amec/amec_sensors_power.h
+++ b/src/occ_405/amec/amec_sensors_power.h
@@ -1,11 +1,11 @@
/* IBM_PROLOG_BEGIN_TAG */
/* This is an automatically generated prolog. */
/* */
-/* $Source: src/occ/amec/amec_sensors_power.h $ */
+/* $Source: src/occ_405/amec/amec_sensors_power.h $ */
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2015 */
+/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -47,7 +47,15 @@
void amec_update_apss_sensors(void);
// Function that is called by AMEC State Machine that will update the AMEC
+// sensors for GPIO data collected from the APSS.
+void amec_update_apss_gpio(void);
+
+// Function that is called by AMEC State Machine that will update the AMEC
// sensors for data that comes from the AVS Bus (Voltage/Current)
void amec_update_avsbus_sensors(void);
+// Function called by the AMEC state machine until GPU configuration is
+// successfully determined
+void amec_update_gpu_configuration(void);
+
#endif // _AMEC_SENSORS_POWER_H
diff --git a/src/occ_405/amec/amec_service_codes.h b/src/occ_405/amec/amec_service_codes.h
index 076bef9..47d6c09 100755
--- a/src/occ_405/amec/amec_service_codes.h
+++ b/src/occ_405/amec/amec_service_codes.h
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2016 */
+/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -65,6 +65,7 @@ enum occAmecModuleId
AMEC_HEALTH_CHECK_PROC_TIMEOUT = AMEC_COMP_ID | 0x14,
AMEC_CALC_DTS_SENSORS = AMEC_COMP_ID | 0x16,
AMEC_SET_FREQ_RANGE = AMEC_COMP_ID | 0x17,
+ AMEC_UPDATE_APSS_GPIO = AMEC_COMP_ID | 0x18,
};
/*----------------------------------------------------------------------------*/
diff --git a/src/occ_405/amec/amec_slave_smh.c b/src/occ_405/amec/amec_slave_smh.c
index 2137d35..e92308a 100755
--- a/src/occ_405/amec/amec_slave_smh.c
+++ b/src/occ_405/amec/amec_slave_smh.c
@@ -210,6 +210,7 @@ smh_state_timing_t G_amec_slv_state_timings = {amec_slv_update_smh_sensors};
//*************************************************************************/
// Globals
//*************************************************************************/
+extern bool G_gpu_config_done;
//*************************************************************************/
// Function Prototypes
diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h
index 66d1b84..40afd49 100755
--- a/src/occ_405/amec/amec_sys.h
+++ b/src/occ_405/amec/amec_sys.h
@@ -361,6 +361,7 @@ typedef struct
sensor_t pwr250usgpu;
sensor_t pwrapssch[MAX_APSS_ADC_CHANNELS];
sensor_t cur12Vstby;
+ sensor_t vrhot_mem_proc;
sensor_t vrfan;
diff --git a/src/occ_405/occ_service_codes.h b/src/occ_405/occ_service_codes.h
index 5719801..417aa2d 100644
--- a/src/occ_405/occ_service_codes.h
+++ b/src/occ_405/occ_service_codes.h
@@ -65,6 +65,8 @@ enum occReasonCode
VRM_ERROR_TEMP = 0x20,
/// VR_FAN - AVS Bus over-temperature reported
VRM_VRFAN_WARNING = 0x22,
+ /// GPIO_VR_HOT_MEM_PROC signal from APSS asserted
+ VR_HOT_MEM_PROC_ASSERTED = 0x23,
/// DIMM reached error threshold
DIMM_ERROR_TEMP = 0x30,
/// Frequency limited due to oversubscription condition
diff --git a/src/occ_405/proc/proc_data.c b/src/occ_405/proc/proc_data.c
index ac8ee9f..ab88c19 100755
--- a/src/occ_405/proc/proc_data.c
+++ b/src/occ_405/proc/proc_data.c
@@ -654,17 +654,27 @@ void nest_dts_init(void)
void task_24x7(task_t * i_task)
{
static uint8_t L_numTicks = 0x00; // never called since OCC started
+ static bool L_idle_trace = FALSE;
if (!G_24x7_disabled)
{
// Schedule 24x7 task if idle
if (!async_request_is_idle(&G_24x7_request.request))
{
- INTR_TRAC_ERR("task_24x7: request not idle");
+ if(!L_idle_trace)
+ {
+ INTR_TRAC_ERR("task_24x7: request not idle");
+ L_idle_trace = TRUE;
+ }
L_numTicks++;
}
else
{
+ if(L_idle_trace)
+ {
+ INTR_TRAC_INFO("task_24x7: previously was not idle and is now idle after %d ticks", L_numTicks);
+ L_idle_trace = FALSE;
+ }
// Clear errors and init parameters for GPE task
G_24x7_parms.error.error = 0;
G_24x7_parms.numTicksPassed = L_numTicks;
diff --git a/src/occ_405/pss/apss.c b/src/occ_405/pss/apss.c
index 83eff11..eb9c3d7 100755
--- a/src/occ_405/pss/apss.c
+++ b/src/occ_405/pss/apss.c
@@ -770,7 +770,6 @@ void task_apss_complete_pwr_meas(struct task *i_self)
APSS_DBG("task_apss_complete_pwr_meas: finished w/rc=0x%08X\n", G_gpe_complete_pwr_meas_read_args.error.rc);
APSS_DBG_HEXDUMP(&G_gpe_complete_pwr_meas_read_args, sizeof(G_gpe_complete_pwr_meas_read_args), "G_gpe_complete_pwr_meas_read_args");
-
} // end task_apss_complete_pwr_meas
bool apss_gpio_get(uint8_t i_pin_number, uint8_t *o_pin_value)
@@ -784,9 +783,9 @@ bool apss_gpio_get(uint8_t i_pin_number, uint8_t *o_pin_value)
bool l_dcom_data_valid = FALSE;
int i=0;
- for(;i<sizeof(G_dcom_slv_inbox_rx);i++)
+ for(;i < NUM_OF_APSS_GPIO_PORTS; i++ )
{
- if( ((char*)&G_dcom_slv_inbox_rx)[i] != 0 )
+ if( G_dcom_slv_inbox_rx.gpio[i] != 0 )
{
l_dcom_data_valid = TRUE;
break;
diff --git a/src/occ_405/sensor/sensor_enum.h b/src/occ_405/sensor/sensor_enum.h
index eb01943..b41e9e4 100755
--- a/src/occ_405/sensor/sensor_enum.h
+++ b/src/occ_405/sensor/sensor_enum.h
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2016 */
+/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -98,6 +98,7 @@ enum e_gsid
PWRAPSSCH14,
PWRAPSSCH15,
CUR12VSTBY, // 12V Standby Current
+ VRHOTMEMPRCCNT,
// ------------------------------------------------------
// Chip Sensors
diff --git a/src/occ_405/sensor/sensor_info.c b/src/occ_405/sensor/sensor_info.c
index 6ad923d..6ee5da4 100755
--- a/src/occ_405/sensor/sensor_info.c
+++ b/src/occ_405/sensor/sensor_info.c
@@ -290,7 +290,7 @@ const sensor_info_t G_sensor_info[] =
SENSOR_INFO_T_ENTRY( PWRAPSSCH14, "W\0", AMEC_SENSOR_TYPE_POWER, AMEC_SENSOR_LOC_SYS, AMEC_SENSOR_NONUM, AMEEFP_2MS_IN_HZ, AMEFP( 1, 0) ),
SENSOR_INFO_T_ENTRY( PWRAPSSCH15, "W\0", AMEC_SENSOR_TYPE_POWER, AMEC_SENSOR_LOC_SYS, AMEC_SENSOR_NONUM, AMEEFP_2MS_IN_HZ, AMEFP( 1, 0) ),
SENSOR_INFO_T_ENTRY( CUR12VSTBY, "A\0", AMEC_SENSOR_TYPE_CURRENT, AMEC_SENSOR_LOC_SYS, AMEC_SENSOR_NONUM, AMEEFP_250US_IN_HZ, AMEFP( 1,-2) ),
-
+ SENSOR_INFO_T_ENTRY( VRHOTMEMPRCCNT, "#\0", AMEC_SENSOR_TYPE_GENERIC, AMEC_SENSOR_LOC_SYS, AMEC_SENSOR_NONUM, AMEEFP_2MS_IN_HZ, AMEFP( 1, 0) ),
/* ==ChipSensors== NameString Units Type Location Number Freq ScaleFactor */
SENSOR_INFO_T_ENTRY( TODclock0, "us\0", AMEC_SENSOR_TYPE_TIME, AMEC_SENSOR_LOC_ALL, AMEC_SENSOR_NONUM, AMEEFP_2MS_IN_HZ, AMEFP( 16, 0) ),
diff --git a/src/occ_405/sensor/sensor_table.c b/src/occ_405/sensor/sensor_table.c
index 7d243ee..1cb2ac6 100755
--- a/src/occ_405/sensor/sensor_table.c
+++ b/src/occ_405/sensor/sensor_table.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2016 */
+/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -336,6 +336,7 @@ const sensor_ptr_t G_amec_sensor_list[] =
SENSOR_PTR( PWRAPSSCH14, &g_amec_sys.sys.pwrapssch[14]),
SENSOR_PTR( PWRAPSSCH15, &g_amec_sys.sys.pwrapssch[15]),
SENSOR_PTR( CUR12VSTBY, &g_amec_sys.sys.cur12Vstby),
+ SENSOR_PTR( VRHOTMEMPRCCNT, &g_amec_sys.sys.vrhot_mem_proc),
// ------------------------------------------------------
// Chip Sensors
OpenPOWER on IntegriCloud