summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndres Lugo-Reyes <aalugore@us.ibm.com>2017-11-02 17:12:20 -0500
committerAndres A. Lugo-Reyes <aalugore@us.ibm.com>2017-12-18 15:20:54 -0500
commit4d2912987d234b0d77b69f0fd2bee8a8d71e1613 (patch)
tree54cf763c42cf46703c86809586488fb006714d8a
parent0bf193d8d4f98934d4f539b1950e8b46c03a2716 (diff)
downloadtalos-occ-4d2912987d234b0d77b69f0fd2bee8a8d71e1613.tar.gz
talos-occ-4d2912987d234b0d77b69f0fd2bee8a8d71e1613.zip
OCC: Call Home Data Log
-CPU temp PER proc -DIMM Temp changes -Memory Bandwidth changes -VRM VDD -Error History counts Change-Id: Ie30f373982a5f3327975d433d508ad2fb27f4fc3 RTC:133944 CMVC-Prereq: 1040415 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/49395 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com> Reviewed-by: William A. Bryan <wilbryan@us.ibm.com> Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com> Reviewed-by: Andres A. Lugo-Reyes <aalugore@us.ibm.com>
-rw-r--r--src/common/apss_structs.h4
-rwxr-xr-xsrc/occ_405/dcom/dcom.h19
-rwxr-xr-xsrc/occ_405/dcom/dcomSlaveTx.c25
-rwxr-xr-xsrc/occ_405/incl/occ_common.h7
-rwxr-xr-xsrc/occ_405/sensor/sensor_table.c2
-rwxr-xr-xsrc/occ_405/thread/chom.c235
-rwxr-xr-xsrc/occ_405/thread/chom.h145
7 files changed, 259 insertions, 178 deletions
diff --git a/src/common/apss_structs.h b/src/common/apss_structs.h
index 99242fa..416f27a 100644
--- a/src/common/apss_structs.h
+++ b/src/common/apss_structs.h
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2016 */
+/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -35,7 +35,7 @@
// List of supported APSS Modes set in occ_gpe0/apss_init.c
#define APSS_MODE_COMPOSITE 0
#define APSS_MODE_AUTO2 1
-
+#define MAX_APSS_ADC_CHANNELS 16
/* This data structure holds the common args data structures between the */
/* 405 and the GPE0, used in IPC communications. */
diff --git a/src/occ_405/dcom/dcom.h b/src/occ_405/dcom/dcom.h
index 3bbfcda..e730a76 100755
--- a/src/occ_405/dcom/dcom.h
+++ b/src/occ_405/dcom/dcom.h
@@ -103,12 +103,13 @@
#define DCOM_TRACE_NOT_IDLE_AFTER_CONSEC_TIMES 3
// general defines
-#define TOD_SIZE 6
-#define NUM_TOD_SENSORS 3
-#define SLV_INBOX_RSV_SIZE 150
-#define SLV_MAILBOX_SIZE 32
-#define SLV_OUTBOX_RSV_SIZE 618
-#define DOORBELL_RSV_SIZE 1
+#define TOD_SIZE 6
+#define NUM_TOD_SENSORS 3
+#define SLV_INBOX_RSV_SIZE 150
+#define SLV_MAILBOX_SIZE 32
+#define SLV_OUTBOX_RSV_SIZE 602
+#define DOORBELL_RSV_SIZE 1
+#define DCOM_MAX_ERRH_ENTRIES 8
#define DCOM_250us_GAP 1
#define DCOM_4MS_GAP 8
@@ -212,7 +213,7 @@ typedef struct __attribute__ ((packed))
uint16_t tempprocavg; // [172]
uint16_t tempprocthermal; // [174]
uint16_t utilcy[MAX_CORES]; // [176]
- uint16_t vrfan; // [224]
+ uint16_t tempvdd; // [224]
uint16_t reserved2; // [226]
uint16_t mrd2msp0mx[MAX_NUM_MEM_CONTROLLERS]; // [228]
uint16_t mwr2msp0mx[MAX_NUM_MEM_CONTROLLERS]; // [244]
@@ -230,10 +231,12 @@ typedef struct __attribute__ ((packed))
// Factual (i.e., actual frequency requested by this OCC slave)
uint16_t factual; // [372]
+ // Error history counts
+ error_history_count_t errhCount[DCOM_MAX_ERRH_ENTRIES]; // [374] - 16 bytes
// Reserved Bytes
union
{
- uint8_t reserved[SLV_OUTBOX_RSV_SIZE]; // [374] - 618 bytes
+ uint8_t reserved[SLV_OUTBOX_RSV_SIZE]; // [390] - 602 bytes
struct __attribute__ ((packed))
{
uint8_t _reserved_1;
diff --git a/src/occ_405/dcom/dcomSlaveTx.c b/src/occ_405/dcom/dcomSlaveTx.c
index 837608d..7abf2b1 100755
--- a/src/occ_405/dcom/dcomSlaveTx.c
+++ b/src/occ_405/dcom/dcomSlaveTx.c
@@ -51,6 +51,9 @@ dcom_slv_outbox_doorbell_t G_dcom_slv_outbox_doorbell_tx;
STATIC_ASSERT( (NUM_BYTES_IN_SLAVE_OUTBOX != (sizeof(G_dcom_slv_outbox_tx))) );
uint32_t G_slave_pbax_rc = 0;
+// Access to the global error history count array
+extern uint8_t G_error_history[ERR_HISTORY_SIZE];
+
// Function Specification
//
// Name: dcom_build_slv_outbox
@@ -60,12 +63,12 @@ uint32_t G_slave_pbax_rc = 0;
// Build the slave outboxes so slave can send to master
//
// End Function Specification
-
uint32_t dcom_build_slv_outbox(void)
{
// Locals
uint32_t l_addr_of_slv_outbox_in_main_mem = 0;
-
+ uint32_t l_errh_idx = 0;
+ uint8_t l_num_errh_added = 0;
static uint8_t L_seq = 0xFF;
L_seq++;
@@ -89,6 +92,24 @@ uint32_t dcom_build_slv_outbox(void)
G_dcom_slv_outbox_doorbell_tx.active_node_pcap = g_amec->pcap.active_node_pcap;
G_dcom_slv_outbox_doorbell_tx.addr_slv_outbox_buffer = l_addr_of_slv_outbox_in_main_mem;
+ // Collect the error history counts to send to master
+ for( l_errh_idx = 0; l_errh_idx < ERR_HISTORY_SIZE; l_errh_idx++ )
+ {
+ if( G_error_history[l_errh_idx] > 0 )
+ {
+ G_dcom_slv_outbox_tx.errhCount[l_num_errh_added].error_id =
+ l_errh_idx;
+ G_dcom_slv_outbox_tx.errhCount[l_num_errh_added].error_count =
+ G_error_history[l_errh_idx];
+ l_num_errh_added++;
+
+ // make sure we only add up to 8 error histories
+ if( l_num_errh_added == DCOM_MAX_ERRH_ENTRIES )
+ {
+ break;
+ }
+ }
+ }
return l_addr_of_slv_outbox_in_main_mem;
}
diff --git a/src/occ_405/incl/occ_common.h b/src/occ_405/incl/occ_common.h
index 39fb2ba..b21b244 100755
--- a/src/occ_405/incl/occ_common.h
+++ b/src/occ_405/incl/occ_common.h
@@ -331,5 +331,12 @@ extern const char G_occ_buildname[16];
int memcmp ( const void * ptr1, const void * ptr2, size_t num );
+// struct to hold error history data
+typedef struct __attribute__ ((packed))
+{
+ uint8_t error_id;
+ uint8_t error_count;
+} error_history_count_t;
+
#endif //_OCC_COMMON_H
diff --git a/src/occ_405/sensor/sensor_table.c b/src/occ_405/sensor/sensor_table.c
index 1b7aef0..9538958 100755
--- a/src/occ_405/sensor/sensor_table.c
+++ b/src/occ_405/sensor/sensor_table.c
@@ -559,7 +559,7 @@ const minisensor_ptr_t G_amec_mini_sensor_list[] INIT_SECTION =
MINI_SENSOR_PTR( CURVDD, NULL),
MINI_SENSOR_PTR( CURVDN, NULL),
MINI_SENSOR_PTR( VRMPROCOT, NULL),
- MINI_SENSOR_PTR( TEMPVDD, NULL),
+ MINI_SENSOR_PTR( TEMPVDD, &G_dcom_slv_outbox_tx.tempvdd),
// ------------------------------------------------------
// Core Sensors (24 of each)
diff --git a/src/occ_405/thread/chom.c b/src/occ_405/thread/chom.c
index 572941d..dbb3a87 100755
--- a/src/occ_405/thread/chom.c
+++ b/src/occ_405/thread/chom.c
@@ -40,10 +40,13 @@ extern amec_sys_t g_amec_sys;
// chom timer
uint32_t g_chom_gen_periodic_log_timer;
+
// track which power mode has been during the polling period
uint8_t g_chom_pwr_modes[OCC_INTERNAL_MODE_MAX_NUM]; // Nominal, SPS, DPS, DPS-MP, FFO
+
// force immediate chom log flag
uint8_t g_chom_force;
+
// chom data log
ChomLogData_t g_chom_log;
ChomLogData_t * g_chom = &g_chom_log;
@@ -54,57 +57,47 @@ STATIC_ASSERT( sizeof(ChomLogData_t) > CHOM_LOG_DATA_MAX );
// Chom Sensors Table
// Some of the chom sensors need multiple mini-sensor to calculate
-// the max, summstion of temperature or bandwidth
+// the max, summation of temperature or bandwidth
// mark those mini-sensor "NULL" and will be updated
// from "chom_update_sensors()"
const uint16_t * g_chom_sensor_table[CHOM_NUM_OF_SENSORS] =
{ // Node total power (DC)
&g_amec_sys.sys.pwrsys.sample,
- // Socket power
- &g_amec_sys.proc[0].pwrproc.sample,
- &G_dcom_slv_outbox_rx[1].pwrproc,
- &G_dcom_slv_outbox_rx[2].pwrproc,
- &G_dcom_slv_outbox_rx[3].pwrproc,
- &G_dcom_slv_outbox_rx[4].pwrproc,
- &G_dcom_slv_outbox_rx[5].pwrproc,
- &G_dcom_slv_outbox_rx[6].pwrproc,
- &G_dcom_slv_outbox_rx[7].pwrproc,
- // Memory power
- &G_dcom_slv_outbox_rx[0].pwr250usmemp0,
- &G_dcom_slv_outbox_rx[1].pwr250usmemp0,
- &G_dcom_slv_outbox_rx[2].pwr250usmemp0,
- &G_dcom_slv_outbox_rx[3].pwr250usmemp0,
- &G_dcom_slv_outbox_rx[4].pwr250usmemp0,
- &G_dcom_slv_outbox_rx[5].pwr250usmemp0,
- &G_dcom_slv_outbox_rx[6].pwr250usmemp0,
- &G_dcom_slv_outbox_rx[7].pwr250usmemp0,
- // Fan power
- &g_amec_sys.fan.pwr250usfan.sample,
+ // APSS sensors 1 per channel (16 total)
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// Processor frequency
&G_dcom_slv_outbox_rx[0].freqa,
&G_dcom_slv_outbox_rx[1].freqa,
&G_dcom_slv_outbox_rx[2].freqa,
&G_dcom_slv_outbox_rx[3].freqa,
- &G_dcom_slv_outbox_rx[4].freqa,
- &G_dcom_slv_outbox_rx[5].freqa,
- &G_dcom_slv_outbox_rx[6].freqa,
- &G_dcom_slv_outbox_rx[7].freqa,
// Processor utilization sensor
&G_dcom_slv_outbox_rx[0].util,
&G_dcom_slv_outbox_rx[1].util,
&G_dcom_slv_outbox_rx[2].util,
&G_dcom_slv_outbox_rx[3].util,
- &G_dcom_slv_outbox_rx[4].util,
- &G_dcom_slv_outbox_rx[5].util,
- &G_dcom_slv_outbox_rx[6].util,
- &G_dcom_slv_outbox_rx[7].util,
- // Max Core temperature for all processors in the node
- NULL,
- // Max Centaur temperature for all Centaurs in the node
- NULL,
- // Max Dimm temperature for all Dimms in the node
- NULL,
+ // Processor temperature sensors
+ &G_dcom_slv_outbox_rx[0].tempprocthermal,
+ &G_dcom_slv_outbox_rx[1].tempprocthermal,
+ &G_dcom_slv_outbox_rx[2].tempprocthermal,
+ &G_dcom_slv_outbox_rx[3].tempprocthermal,
+ // Centaur temperature sensors
+ &G_dcom_slv_outbox_rx[0].temp2mscent,
+ &G_dcom_slv_outbox_rx[1].temp2mscent,
+ &G_dcom_slv_outbox_rx[2].temp2mscent,
+ &G_dcom_slv_outbox_rx[3].temp2mscent,
+ // DIMM temperature sensors
+ &G_dcom_slv_outbox_rx[0].tempdimmthrm,
+ &G_dcom_slv_outbox_rx[1].tempdimmthrm,
+ &G_dcom_slv_outbox_rx[2].tempdimmthrm,
+ &G_dcom_slv_outbox_rx[3].tempdimmthrm,
+ // VRM VDD temperatures
+ // TEMPVDDP0 ~ TEMPVDDP7
+ &G_dcom_slv_outbox_rx[0].tempvdd,
+ &G_dcom_slv_outbox_rx[1].tempvdd,
+ &G_dcom_slv_outbox_rx[2].tempvdd,
+ &G_dcom_slv_outbox_rx[3].tempvdd,
// Instructions per second sensor
NULL,
// Memory bandwidth for process memory controller
@@ -116,16 +109,9 @@ const uint16_t * g_chom_sensor_table[CHOM_NUM_OF_SENSORS] =
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// P3M0 ~ P3M7
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- // P4M0 ~ P4M7
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- // P5M0 ~ P5M7
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- // P6M0 ~ P6M7
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- // P7M0 ~ P7M7
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
+
// Function Specification
//
// Name: chom_data_init
@@ -176,13 +162,16 @@ void chom_data_reset()
// End Function Specification
void chom_update_sensors()
{
- uint16_t l_max_core_temp = 0;
- uint16_t l_max_cent_temp = 0;
- uint16_t l_max_dimm_temp = 0;
uint32_t l_mips = 0;
uint16_t l_mem_rw = 0;
uint16_t l_sample = 0;
- uint16_t i = 0, j = 0, k = 0;
+
+ static uint32_t L_memBWNumSamples[NUM_CHOM_MODES][MAX_NUM_MEM_CONTROLLERS] = {{0}};
+
+ // Use FMF as default
+ static uint32_t * L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_FMF];
+
+ uint16_t i = 0, j = 0;
// Is the current mode different than previous poll
if (g_chom->nodeData.curPwrMode != CURRENT_MODE())
@@ -207,39 +196,48 @@ void chom_update_sensors()
{
case OCC_MODE_NOMINAL:
g_chom_pwr_modes[OCC_INTERNAL_MODE_NOM] = 1;
+ L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_NOMINAL];
break;
case OCC_MODE_PWRSAVE:
g_chom_pwr_modes[OCC_INTERNAL_MODE_SPS] = 1;
+ L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_SPS];
break;
case OCC_MODE_DYN_POWER_SAVE:
g_chom_pwr_modes[OCC_INTERNAL_MODE_DPS] = 1;
+ L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_DPS];
break;
case OCC_MODE_DYN_POWER_SAVE_FP:
g_chom_pwr_modes[OCC_INTERNAL_MODE_DPS_MP] = 1;
+ L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_DPS_MP];
break;
case OCC_MODE_FFO:
g_chom_pwr_modes[OCC_INTERNAL_MODE_FFO] = 1;
+ L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_FFO];
break;
case OCC_MODE_NOM_PERFORMANCE:
g_chom_pwr_modes[OCC_INTERNAL_MODE_NOM_PERF] = 1;
+ L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_NOM_PERF];
break;
case OCC_MODE_MAX_PERFORMANCE:
g_chom_pwr_modes[OCC_INTERNAL_MODE_MAX_PERF] = 1;
+ L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_MAX_PERF];
break;
case OCC_MODE_FMF:
g_chom_pwr_modes[OCC_INTERNAL_MODE_FMF] = 1;
+ L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_FMF];
break;
default:
TRAC_INFO("chom_update_sensors: Cannot record chom data for mode 0x%02X",
g_chom->sensorData[0].pwrMode.mode);
+ L_curNumSamplePtr = L_memBWNumSamples[CHOM_MODE_FMF];
break;
}
}
@@ -247,58 +245,87 @@ void chom_update_sensors()
// update number of samples
g_chom->sensorData[0].pwrMode.numOfSamples++;
- // update chom sensors which has multiple mini-sensor source
- for (i = 0 ; i<MAX_OCCS ; i++)
+ // update APSS
+ uint16_t l_apss_idx = CHOMPWRAPSSCH0;
+ uint16_t l_current_channel = 0;
+ for( i = 0; i < MAX_APSS_ADC_CHANNELS; i++ )
{
- if (G_dcom_slv_outbox_rx[i].tempprocthermal > l_max_core_temp)
- {
- l_max_core_temp = G_dcom_slv_outbox_rx[i].tempprocthermal;
- }
-
- if (G_dcom_slv_outbox_rx[i].temp2mscent > l_max_cent_temp)
- {
- l_max_cent_temp = G_dcom_slv_outbox_rx[i].temp2mscent;
- }
-
- if (G_dcom_slv_outbox_rx[i].tempdimmthrm > l_max_dimm_temp)
- {
- l_max_dimm_temp = G_dcom_slv_outbox_rx[i].tempdimmthrm;
- }
+ // Transfer data from AMEC sensor to CHOM sensor
+ l_current_channel = AMECSENSOR_ARRAY_PTR( PWRAPSSCH0, i )->sample;
+ // TRAC_INFO("channel %d reading: %d", i, l_current_channel);
+ g_chom->sensorData[0].sensor[l_apss_idx].sample = l_current_channel;
+
+ // Send the corresponding function ID in the node data
+ g_chom->nodeData.channelFuncIds[i] =
+ G_apss_ch_to_function[i];
+ l_apss_idx++;
}
- g_chom->sensorData[0].sensor[CHOMTEMPPROC].sample = l_max_core_temp;
- g_chom->sensorData[0].sensor[CHOMTEMPCENT].sample = l_max_cent_temp;
- g_chom->sensorData[0].sensor[CHOMTEMPDIMM].sample = l_max_dimm_temp;
+
// update MIPS
- k = 0;
- for (i=0 ; i<MAX_OCCS ; i++)
+ uint16_t l_mips_count = 0;
+ uint16_t l_mem_idx = CHOMBWP0M0;
+
+ // Loop through OCCs updating chom sensors
+ for ( i = 0; i < CHOM_MAX_OCCS; i++ )
{
- if (0 != G_dcom_slv_outbox_rx[i].ips4msp0)
+ // count MIPS
+ if ( 0 != G_dcom_slv_outbox_rx[i].ips4msp0 )
{
l_mips += G_dcom_slv_outbox_rx[i].ips4msp0;
- k++;
+ l_mips_count++;
+ }
+
+ // update memory bandwidth
+ for ( j = 0; j < MAX_NUM_MEM_CONTROLLERS; j++)
+ {
+ l_mem_rw = G_dcom_slv_outbox_rx[i].mrd2msp0mx[j] +
+ G_dcom_slv_outbox_rx[i].mwr2msp0mx[j];
+
+ // If l_mem_rw == 0, do not add to sensor
+ if(l_mem_rw != 0)
+ {
+ g_chom->sensorData[0].sensor[l_mem_idx].sample = l_mem_rw;
+ L_curNumSamplePtr[j]++;
+
+ // Calculate the averages/min/max for the memory bandwidth sensors
+ l_sample = g_chom->sensorData[0].sensor[l_mem_idx].sample;
+
+ if (g_chom->sensorData[0].sensor[l_mem_idx].sampleMin > l_sample)
+ {
+ g_chom->sensorData[0].sensor[l_mem_idx].sampleMin = l_sample;
+ }
+ if (g_chom->sensorData[0].sensor[l_mem_idx].sampleMax < l_sample)
+ {
+ g_chom->sensorData[0].sensor[l_mem_idx].sampleMax = l_sample;
+ }
+
+ g_chom->sensorData[0].sensor[l_mem_idx].accumulator += l_sample;
+ g_chom->sensorData[0].sensor[l_mem_idx].average =
+ (g_chom->sensorData[0].sensor[l_mem_idx].accumulator /
+ L_curNumSamplePtr[j]);
+ }
+ l_mem_idx++;
}
}
- if (k != 0)
+
+ // Update MIPS
+ if (l_mips_count != 0)
{
- g_chom->sensorData[0].sensor[CHOMIPS].sample = (l_mips/k);
+ g_chom->sensorData[0].sensor[CHOMIPS].sample = (l_mips/l_mips_count);
}
- // update memory bandwidth
- k = CHOMBWP0M0;
- for (i=0 ; i<MAX_OCCS ; i++)
+ // loop through all sensors and update data from mini-sensors
+ for (i = 0 ; i<CHOM_NUM_OF_SENSORS ; i++)
{
- for (j=0 ; j<MAX_NUM_MEM_CONTROLLERS ; j++)
+ // Skip memory bandwidth controllers since handled above
+ if( i == CHOMBWP0M0 )
{
- l_mem_rw = G_dcom_slv_outbox_rx[i].mrd2msp0mx[j]+G_dcom_slv_outbox_rx[i].mwr2msp0mx[j];
- g_chom->sensorData[0].sensor[k].sample = l_mem_rw;
- k++;
+ i += (MAX_NUM_MEMORY_SENSORS-1);
+ continue;
}
- }
- // loop through all sensors and update data from mini-sensors
- for (i = 0 ; i<CHOM_NUM_OF_SENSORS ; i++)
- { // update sample, min, max, average sensor data
+ // update sample, min, max, average sensor data
if (NULL != g_chom_sensor_table[i])
{
// directly mapping to mini-sensor
@@ -319,6 +346,46 @@ void chom_update_sensors()
g_chom->sensorData[0].sensor[i].average = (g_chom->sensorData[0].sensor[i].accumulator/
g_chom->sensorData[0].pwrMode.numOfSamples);
}
+
+ // Collect the error history data
+ int proc_idx = 0, errh_idx = 0, slv_idx = 0, entry_idx = 0;
+
+ // get the master proc index
+ uint8_t master_id = G_pbax_id.chip_id;
+
+ // Iterate through procs
+ for( proc_idx = 0; proc_idx < CHOM_MAX_OCCS; proc_idx++ )
+ {
+ // If we are on the master proc, skip it since it is already
+ // present in the call home log
+ if( proc_idx == master_id )
+ {
+ continue;
+ }
+ else
+ {
+ // Iterate through each proc's error history counts
+ for( errh_idx = 0; errh_idx < DCOM_MAX_ERRH_ENTRIES; errh_idx++)
+ {
+ // If the entry id is 0, we have reached the end of error history
+ // counts for this proc
+ // If entry_idx is 4, we have reached our limit of entries to collect
+ if((G_dcom_slv_outbox_rx[proc_idx].errhCount[errh_idx].error_id == 0) ||
+ (entry_idx >= CHOM_MAX_ERRH_ENTRIES))
+ {
+ break;
+ }
+ else
+ {
+ // Add the error history to the chom data
+ g_chom->nodeData.errhCounts[slv_idx][entry_idx] =
+ G_dcom_slv_outbox_rx[proc_idx].errhCount[errh_idx];
+ slv_idx++;
+ entry_idx++;
+ }
+ }
+ }
+ }
}
@@ -336,7 +403,7 @@ void chom_gen_periodic_log()
errlHndl_t l_errlHndl = NULL;
TRAC_INFO("Enter chom_gen_periodic_log");
-
+ TRAC_INFO("chom size = %d", sizeof(*g_chom));
// update total time
g_chom->nodeData.totalTime = g_chom_gen_periodic_log_timer;
diff --git a/src/occ_405/thread/chom.h b/src/occ_405/thread/chom.h
index b899e9e..8f03809 100755
--- a/src/occ_405/thread/chom.h
+++ b/src/occ_405/thread/chom.h
@@ -1,11 +1,11 @@
/* IBM_PROLOG_BEGIN_TAG */
/* This is an automatically generated prolog. */
/* */
-/* $Source: src/occ/thread/chom.h $ */
+/* $Source: src/occ_405/thread/chom.h $ */
/* */
/* OpenPOWER OnChipController Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2011,2015 */
+/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -28,61 +28,70 @@
#include <occ_common.h>
#include <trac_interface.h>
+#include <apss.h>
#define CHOM_GEN_LOG_PERIODIC_TIME 86400 // seconds in a day
-#define CHOM_VERSION 0x00
+#define CHOM_VERSION 0x01
// Max size of chom data log
#define CHOM_LOG_DATA_MAX 3072
-
-// List of call home sensors
+// Max number of memory bandwidth CHOM sensors
+#define MAX_NUM_MEMORY_SENSORS 32
+// Max number of procs Call Home will get data for
+#define CHOM_MAX_OCCS 4
+// Max number of error history entries to add to all home log
+#define CHOM_MAX_ERRH_ENTRIES 4
+// List of call home sensors (Max 126)
enum
{
// Node total power (DC)
CHOMPWR = 0,
- // Socket power
- CHOMPWRS0,
- CHOMPWRS1,
- CHOMPWRS2,
- CHOMPWRS3,
- CHOMPWRS4,
- CHOMPWRS5,
- CHOMPWRS6,
- CHOMPWRS7,
- // Memory power
- CHOMPWRM0,
- CHOMPWRM1,
- CHOMPWRM2,
- CHOMPWRM3,
- CHOMPWRM4,
- CHOMPWRM5,
- CHOMPWRM6,
- CHOMPWRM7,
- // Fan power
- CHOMPWRFAN,
+ // APSS Channels
+ CHOMPWRAPSSCH0,
+ CHOMPWRAPSSCH1,
+ CHOMPWRAPSSCH2,
+ CHOMPWRAPSSCH3,
+ CHOMPWRAPSSCH4,
+ CHOMPWRAPSSCH5,
+ CHOMPWRAPSSCH6,
+ CHOMPWRAPSSCH7,
+ CHOMPWRAPSSCH8,
+ CHOMPWRAPSSCH9,
+ CHOMPWRAPSSCH10,
+ CHOMPWRAPSSCH11,
+ CHOMPWRAPSSCH12,
+ CHOMPWRAPSSCH13,
+ CHOMPWRAPSSCH14,
+ CHOMPWRAPSSCH15,
// Processor frequency
CHOMFREQP0,
CHOMFREQP1,
CHOMFREQP2,
CHOMFREQP3,
- CHOMFREQP4,
- CHOMFREQP5,
- CHOMFREQP6,
- CHOMFREQP7,
// Processor utilization sensor
CHOMUTILP0,
CHOMUTILP1,
CHOMUTILP2,
CHOMUTILP3,
- CHOMUTILP4,
- CHOMUTILP5,
- CHOMUTILP6,
- CHOMUTILP7,
- // Max core temperature for all processors in the node
- CHOMTEMPPROC,
- // Max Centaur temperature for all Centaurs in the node
- CHOMTEMPCENT,
- // Max Dimm temperature for all Dimms in the node
- CHOMTEMPDIMM,
+ // Proc temperatures across all nodes
+ CHOMTEMPPROC0,
+ CHOMTEMPPROC1,
+ CHOMTEMPPROC2,
+ CHOMTEMPPROC3,
+ // Centaur temperature for all Centaurs in the node
+ CHOMTEMPCENTP0,
+ CHOMTEMPCENTP1,
+ CHOMTEMPCENTP2,
+ CHOMTEMPCENTP3,
+ // Dimm temperature for all Dimms in the node
+ CHOMTEMPDIMMP0,
+ CHOMTEMPDIMMP1,
+ CHOMTEMPDIMMP2,
+ CHOMTEMPDIMMP3,
+ // VRM VDD temperature per proc
+ CHOMTEMPVDDP0,
+ CHOMTEMPVDDP1,
+ CHOMTEMPVDDP2,
+ CHOMTEMPVDDP3,
// Instructions per second sensor
CHOMIPS,
// Memory bandwidth for process memory controller
@@ -118,43 +127,23 @@ enum
CHOMBWP3M5,
CHOMBWP3M6,
CHOMBWP3M7,
- CHOMBWP4M0,
- CHOMBWP4M1,
- CHOMBWP4M2,
- CHOMBWP4M3,
- CHOMBWP4M4,
- CHOMBWP4M5,
- CHOMBWP4M6,
- CHOMBWP4M7,
- CHOMBWP5M0,
- CHOMBWP5M1,
- CHOMBWP5M2,
- CHOMBWP5M3,
- CHOMBWP5M4,
- CHOMBWP5M5,
- CHOMBWP5M6,
- CHOMBWP5M7,
- CHOMBWP6M0,
- CHOMBWP6M1,
- CHOMBWP6M2,
- CHOMBWP6M3,
- CHOMBWP6M4,
- CHOMBWP6M5,
- CHOMBWP6M6,
- CHOMBWP6M7,
- CHOMBWP7M0,
- CHOMBWP7M1,
- CHOMBWP7M2,
- CHOMBWP7M3,
- CHOMBWP7M4,
- CHOMBWP7M5,
- CHOMBWP7M6,
- CHOMBWP7M7,
-
// The number of chom sensors reported
CHOM_NUM_OF_SENSORS
};
+enum chom_supported_modes
+{
+ CHOM_MODE_NOMINAL,
+ CHOM_MODE_SPS,
+ CHOM_MODE_DPS,
+ CHOM_MODE_DPS_MP,
+ CHOM_MODE_FFO,
+ CHOM_MODE_NOM_PERF,
+ CHOM_MODE_MAX_PERF,
+ CHOM_MODE_FMF,
+ // number of modes required to run Call home
+ NUM_CHOM_MODES
+};
// Call home sensor Structure
struct ChomSensor
{
@@ -167,14 +156,6 @@ struct ChomSensor
typedef struct ChomSensor ChomSensor_t;
-// CPI data structure
-struct ChomCpiData
-{
- uint8_t proc;
- uint32_t cpi;
-}__attribute__ ((__packed__));
-
-typedef struct ChomCpiData ChomCpiData_t;
// Power mode structure
struct ChomPwrMode
@@ -193,8 +174,10 @@ struct ChomNodeData
uint8_t curPwrMode; // the current power mode at the time of the polling event
uint32_t totalTime; // duration of the polling period
uint8_t modeInLog; // the number of different power mode in the polling period
- ChomCpiData_t cpiData[MAX_OCCS]; // Chip Packing Interface data, 5 bytes per processor
+ uint8_t channelFuncIds[MAX_APSS_ADC_CHANNELS];
uint16_t numSensors; // the number of sensors for which call home data was collected
+ // error history counts. Only collect on up to 3 slaves, excluding master
+ error_history_count_t errhCounts[CHOM_MAX_OCCS-1][CHOM_MAX_ERRH_ENTRIES];
} __attribute__ ((__packed__));
typedef struct ChomNodeData ChomNodeData_t;
OpenPOWER on IntegriCloud