diff options
author | Chris Cain <cjcain@us.ibm.com> | 2017-01-24 15:56:29 -0600 |
---|---|---|
committer | Christopher J. Cain <cjcain@us.ibm.com> | 2017-01-27 15:15:32 -0500 |
commit | 6610ab230252ce3a96adbc5ddfeca7cf45ab69b2 (patch) | |
tree | 50a9ec2766c2c0f0e1a7f2eedacc049c44aed697 /src/occ_405/pss | |
parent | 360934dea9355e488206267d7f9fd9b1c753cf16 (diff) | |
download | talos-occ-6610ab230252ce3a96adbc5ddfeca7cf45ab69b2.tar.gz talos-occ-6610ab230252ce3a96adbc5ddfeca7cf45ab69b2.zip |
Add VR Fan and OC support via AVSBUS
- monitor VR Fan (over-temperature) and OC (over-current)
- add VR Fan sensor to poll response (Temperature FRU type: VRM)
- log mfg error for OC
- add error history counters for each
Change-Id: Ia552aa2cc2db8adebcbbd928c146a057bb120c73
RTC: 132561
RTC: 132560
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/35358
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
Diffstat (limited to 'src/occ_405/pss')
-rw-r--r-- | src/occ_405/pss/avsbus.c | 456 | ||||
-rwxr-xr-x | src/occ_405/pss/avsbus.h | 14 |
2 files changed, 464 insertions, 6 deletions
diff --git a/src/occ_405/pss/avsbus.c b/src/occ_405/pss/avsbus.c index 6b76379..6e65b2f 100644 --- a/src/occ_405/pss/avsbus.c +++ b/src/occ_405/pss/avsbus.c @@ -34,6 +34,7 @@ #include "pss_service_codes.h" #include "ssx.h" #include "occ_util.h" +#include "cmdh_fsp_cmds_datacnfg.h" //#define AVSDEBUG @@ -49,12 +50,16 @@ bool G_avsbus_vdn_monitoring = FALSE; extern uint32_t G_nest_frequency_mhz; #define AVSBUS_FREQUENCY_MHZ 10 +extern bool G_vrm_thermal_monitoring; // Number of read failures allowed before elog is created and reset requested. // This should be no longer than 4ms (or it will impact WOF calculations) // (readings are taken every 500us => 500us * 8 = 4ms) const uint8_t MAX_READ_ATTEMPTS = 8; +const uint16_t AVSBUS_STATUS_READ_ERROR = 0xFFFF; +extern data_cnfg_t * G_data_cnfg; + // NOTE: OCC must use Bridge B, because Bridge A is reserved for PGPE // AVS Bus setup that must be done once (common between read/write operations) @@ -275,12 +280,12 @@ void avsbus_read_start(const avsbus_type_e i_type, // HW: Wait for bus op to complete // HW: arbitration between two bridges - // HW: o2s_ongoning: 0 -> 1 + // HW: o2s_ongoing: 0 -> 1 // HW: execution completes // HW: o2s_ongoing 1 -> 0 #ifdef AVSDEBUG - ++*l_trace_count; // DEBUG + ++*l_trace_count; #endif } // end avsbus_read_start() @@ -293,7 +298,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type, { if (isSafeStateRequested()) { - // No need to attempt read if OCC will be reset + // No need to process data if OCC will be reset return 0; } @@ -342,7 +347,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type, // HW: Wait for bus op to complete // HW: arbitration between two bridges - // HW: o2s_ongoning: 0 -> 1 + // HW: o2s_ongoing: 0 -> 1 // HW: execution completes // HW: o2s_ongoing 1 -> 0 @@ -379,7 +384,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type, rc = AVSBUS_ERROR; } } - else if (1 == (l_status >> 63)) // o2s_ongoing + else if (l_status & 0x8000000000000000) // o2s_ongoing { // o2s_ongoing bit was still set (operation did not complete) l_failure = TRUE; @@ -508,6 +513,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type, *l_error_count, l_trace_type, l_trace_cmd); G_avsbus_vdd_monitoring = FALSE; G_avsbus_vdn_monitoring = FALSE; + G_vrm_thermal_monitoring = FALSE; errlHndl_t l_err = createErrl(PSS_MID_AVSBUS_READ, rc, exrc, @@ -526,7 +532,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type, } #ifdef AVSDEBUG - ++*l_trace_count; // DEBUG + ++*l_trace_count; #endif return o_reading; @@ -553,3 +559,441 @@ void initiate_avsbus_reads(avsbus_cmdtype_e i_cmdType) } // end initiate_avsbus_reads() + +// Initiate read for vr fan +void initiate_avsbus_read_status() +{ + if (isSafeStateRequested() || (G_vrm_thermal_monitoring == FALSE)) + { + // No need to attempt read if OCC will be reset + return; + } + +#ifdef AVSDEBUG + static uint32_t L_trace_count = 0; + uint32_t DEBUG_TRACE_MAX = 2; +#endif + + unsigned int index; + for (index = 0; index <= 1; ++index) + { + // Determine busses that are being monitored + uint8_t bus = 0xFF; + if ((index == 0) && G_avsbus_vdd_monitoring) + { + bus = G_sysConfigData.avsbus_vdd.bus; + } + else if ((index == 1) && G_avsbus_vdn_monitoring) + { + bus = G_sysConfigData.avsbus_vdn.bus; + } + if (bus != 0xFF) + { + // Determine register based on the bus number + uint32_t o2scmd_reg = OCB_O2SCMD0B; + uint32_t o2swd_reg = OCB_O2SWD0B; + if (0 == bus) + { + o2scmd_reg = OCB_O2SCMD0B; + o2swd_reg = OCB_O2SWD0B; + } + else + { + o2scmd_reg = OCB_O2SCMD1B; + o2swd_reg = OCB_O2SWD1B; + } + +#ifdef AVSDEBUG + if (L_trace_count < DEBUG_TRACE_MAX) + { + TRAC_INFO("initiate_avsbus_read_status: read Status - bus[%d], rail[broadcast]", bus); + } +#endif + + // Write O2SCMD[a][n] + // o2s_clear_sticky_bits = 1 + uint64_t value; + value = 0x4000000000000000; + out64(o2scmd_reg, value); + + // Write O2SWD[a][n] - write commands and initiate hardware operation + // o2s_wdata with content + // AVS Bus command (read staus): + // 0:1 StartCode = 0b01 + // 2:3 Cmd = 0b11 (read) + // 4 CmdGroup = 0b0 (AVSBus) + // 5:8 CmdDataType (STATUS = 01110b) + // 9:12 Select (All rails / broadcast = 01111b ) + // 13:28 CmdData (reserved / must be 1s) + // 29:31 CRC + // 01110DDD DRRRR111 11111111 11111CCC + // 01110111 01111111 11111111 11111CCC + value = 0x777FFFF800000000; + // Calculate/add CRC + value |= avs_crc_calculate(value); + out64(o2swd_reg, value); + } + } + + // Read has been started so now just wait for HW to complete + + // HW: Wait for bus op to complete + // HW: arbitration between two bridges + // HW: o2s_ongoing: 0 -> 1 + // HW: execution completes + // HW: o2s_ongoing 1 -> 0 + +#ifdef AVSDEBUG + ++L_trace_count; +#endif + +} // end initiate_avsbus_read_status() + + +// Process AVS Bus read status results (or errors) +// Predictive error will be logged after "VRMs: max_read_timeout" failures +// and a mfg error will be committed. +// Returns the status data or AVSBUS_STATUS_READ_ERROR on error +uint16_t avsbus_read_status(const avsbus_type_e i_type) +{ + if (isSafeStateRequested() || (G_vrm_thermal_monitoring == FALSE)) + { + // No need to process data if OCC will be reset + return 0; + } + + uint16_t o_reading = 0; + bool l_failure = FALSE; + const uint8_t max_read_attempts = G_data_cnfg->thrm_thresh.data[DATA_FRU_VRM].max_read_timeout; + + // Static error counters for each type (Vdd/Vdn) + static uint32_t L_error_count[ERRORCOUNT_MAXTYPES] = {0}; + uint32_t * l_error_count = &L_error_count[i_type]; + + char l_trace_type = 'd'; + avsbusData_t l_data = G_sysConfigData.avsbus_vdd; + if (AVSBUS_VDN == i_type) + { + l_trace_type = 'n'; + l_data = G_sysConfigData.avsbus_vdn; + } + +#ifdef AVSDEBUG + static uint32_t L_trace_count = 0; + uint32_t DEBUG_TRACE_MAX = 2; + if (L_trace_count < DEBUG_TRACE_MAX) + { + TRAC_INFO("avsbus_read_status: Vd%c - bus[%d] rail[%d]", + l_trace_type, l_data.bus, l_data.rail); + } +#endif + + // Determine register based on the bus number + uint32_t o2sst_reg = OCB_O2SST0B; + uint32_t o2srd_reg = OCB_O2SRD0B; + if (1 == l_data.bus) + { + o2sst_reg = OCB_O2SST1B; + o2srd_reg = OCB_O2SRD1B; + } + + // HW: Wait for bus op to complete + // HW: arbitration between two bridges + // HW: o2s_ongoing: 0 -> 1 + // HW: execution completes + // HW: o2s_ongoing 1 -> 0 + + // Since read was started in previous tick, it should have already completed + // (no need to poll/wait on o2s_ongoing) + enum occReasonCode rc = OCC_SUCCESS_REASON_CODE; + uint64_t l_status = in64(o2sst_reg); + // OCC O2S Status Register + // 0 o2s_ongoing + // 1:4 reserved + // 5 write_while_bridge_busy_error + // 6 reserved + // 7 FSM error + // 8:63 reserved + // GrrrrBrF rrrrrrrr rrrrrrrr rrrrrrrr + + if (0 != (l_status & 0x0500000000000000)) + { + // error bit was set + l_failure = TRUE; + (*l_error_count)++; + if ((*l_error_count == 1) || (*l_error_count == max_read_attempts)) + { + TRAC_ERR("avsbus_read_status: Error found in Vd%c O2SST[0x%08X] = [0x%08X]", + l_trace_type, o2sst_reg, WORD_HIGH(l_status)); + /* + * @errortype + * @moduleid PSS_MID_AVSBUS_READ + * @reasoncode AVSBUS_ERROR + * @userdata1 AVS Bus type/bus/rail + * @userdata2 status + * @devdesc Error encountered when reading AVS Bus + */ + rc = AVSBUS_ERROR; + } + } + else if (l_status & 0x8000000000000000) // o2s_ongoing + { + // o2s_ongoing bit was still set (operation did not complete) + l_failure = TRUE; + (*l_error_count)++; + if ((*l_error_count == 1) || (*l_error_count == max_read_attempts)) + { + TRAC_ERR("avsbus_read_status: Vd%c timeout waiting for o2s_ongoing change O2SST[0x%08X] = [0x%08X]", + l_trace_type, o2sst_reg, WORD_HIGH(l_status)); + /* + * @errortype + * @moduleid PSS_MID_AVSBUS_READ + * @reasoncode AVSBUS_TIMEOUT + * @userdata1 AVS Bus type/bus/rail + * @userdata2 status + * @devdesc Timeout when reading AVS Bus + */ + rc = AVSBUS_TIMEOUT; + } + } + + if (FALSE == l_failure) + { + // Read the response data + uint64_t value = in64(o2srd_reg); + // AVS Bus response (read status): + // 0:1 SlaveAck (0b00 from slave indicates good CRC and action was taken) + // 2 0 + // 3:7 StatusResp + // 8:23 CmdData (LSB = 1mV or 10mA) + // 24:28 Reserved (must be all 1s) + // 29:31 CRC + // AA0SSSSS VVVVVVVV VVVVVVVV 11111CCC + + // Validate CRC + const uint64_t crc = avs_crc_calculate(value); + if (crc != (value & AVS_CRC_MASK)) + { + l_failure = TRUE; + (*l_error_count)++; + if ((*l_error_count == 1) || (*l_error_count == max_read_attempts)) + { + TRAC_ERR("avsbus_read_status: CRC mismatch in Vd%c rsp O2SRD[0x%08X] = [0x%08X] (calculated CRC 0x%08X)", + l_trace_type, o2srd_reg, WORD_HIGH(value), WORD_HIGH(crc)); + /* + * @errortype + * @moduleid PSS_MID_AVSBUS_READ + * @reasoncode AVSBUS_CRC_ERROR + * @userdata1 AVS Bus type/bus/rail + * @userdata2 status + * @devdesc CRC error reading AVS Bus + */ + rc = AVSBUS_CRC_ERROR; + } + } + // Check for valid command operation and extract read data + else if (0 == (value & 0xC000000000000000)) + { + // AVS Bus Status: + // 0 VDone + // 1 IOUT_OC_WARNING (over-current) + // 2 VOUT_UV_WARNING (under-voltage) + // 3 IOUT_OT_WARNING (over-temperature) + // 4 POUT_OP_WARNING (over power) + // 5-7 reserved + // 8-15 reserved + o_reading = (value >> 40) & 0x0000FFFF; + +#ifdef AVSDEBUG + static uint16_t L_lastReading = 0; + if ((L_trace_count < DEBUG_TRACE_MAX) || (o_reading != L_lastReading)) + { + TRAC_INFO("avsbus_read_status: Successfully read Vd%c status 0x%04X [0x%08X]", + l_trace_type, o_reading, WORD_HIGH(value)); + L_lastReading = o_reading; + } +#endif + if (*l_error_count) + { + // Trace and clear the error count + TRAC_INFO("avsbus_read_status: Successfully read Vd%c status [0x%08X] (error count=%d)", + l_trace_type, WORD_HIGH(value), *l_error_count); + *l_error_count = 0; + } + } + else + { + l_failure = TRUE; + (*l_error_count)++; + if ((*l_error_count == 1) || (*l_error_count == max_read_attempts)) + { + TRAC_ERR("avsbus_read_status: SlaveAck reported no action taken[0x%08X]", WORD_HIGH(value)); + rc = AVSBUS_ERROR; + } + } + } + + if (l_failure) + { + if (*l_error_count == max_read_attempts) + { + TRAC_ERR("avsbus_read_status: Reached %d consecutive Vd%c errors reading status", + *l_error_count, l_trace_type); + G_avsbus_vdd_monitoring = FALSE; + G_avsbus_vdn_monitoring = FALSE; + G_vrm_thermal_monitoring = FALSE; + errlHndl_t l_err = createErrl(PSS_MID_AVSBUS_READ, + rc, + ERC_AVSBUS_STATUS_FAILURE, + ERRL_SEV_PREDICTIVE, + NULL, + DEFAULT_TRACE_SIZE, + (i_type << 16) | (l_data.bus << 8) | l_data.rail, + WORD_HIGH(l_status)); + setErrlActions(l_err, ERRL_ACTIONS_MANUFACTURING_ERROR); + // add processor callout + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.proc_huid, + ERRL_CALLOUT_PRIORITY_MED); + commitErrl(&l_err); + } + o_reading = AVSBUS_STATUS_READ_ERROR; + } + +#ifdef AVSDEBUG + ++L_trace_count; // DEBUG +#endif + + return o_reading; + +} // end avsbus_read_status() + + +// Read the status from AVS Bus and return 1 if over-temperature was found for either bus +// or 0 if no OT was found. 0xFF will be returned if there was an error reading status. +// Error history counters will be incremented for any over-temp/over-current condition. +// Mfg error will be logged for the first OT or first OC condition. +uint8_t process_avsbus_status() +{ + uint8_t foundOT = 0; + uint8_t foundOC = 0; + uint16_t vdd_status = 0; + uint16_t vdn_status = 0; + + if (G_vrm_thermal_monitoring) + { + if (G_avsbus_vdd_monitoring) + { + vdd_status = avsbus_read_status(AVSBUS_VDD); + if (vdd_status != AVSBUS_STATUS_READ_ERROR) + { + if (vdd_status & AVSBUS_STATUS_OVER_TEMPERATURE_MASK) + { + foundOT = 1; + INCREMENT_ERR_HISTORY(ERR_AVSBUS_VDD_OVER_TEMPERATURE); + } + + if (vdd_status & AVSBUS_STATUS_OVER_CURRENT_MASK) + { + foundOC = 1; + INCREMENT_ERR_HISTORY(ERR_AVSBUS_VDD_OVER_CURRENT); + } + } + else + { + // 0xFF indicates error reading status + foundOT = 0xFF; + } + } + if (G_avsbus_vdn_monitoring) + { + vdn_status = avsbus_read_status(AVSBUS_VDN); + if (vdn_status != AVSBUS_STATUS_READ_ERROR) + { + if (vdn_status & AVSBUS_STATUS_OVER_TEMPERATURE_MASK) + { + // if no Vdd error, update OT result + if (foundOT != 0xFF) + { + foundOT = 1; + } + INCREMENT_ERR_HISTORY(ERR_AVSBUS_VDN_OVER_TEMPERATURE); + } + + if (vdn_status & AVSBUS_STATUS_OVER_CURRENT_MASK) + { + foundOC = 1; + INCREMENT_ERR_HISTORY(ERR_AVSBUS_VDN_OVER_CURRENT); + } + } + else + { + // 0xFF indicates error reading status + foundOT = 0xFF; + } + } + + // Log an error the first time either condition is asserted + static bool loggedOT = FALSE; + static bool loggedOC = FALSE; + errlHndl_t l_err; + if ((foundOT == 1) && !loggedOT) + { + loggedOT = TRUE; + /* @ + * @errortype + * @moduleid PSS_MID_AVSBUS_READ + * @reasoncode VRM_VRFAN_WARNING + * @userdata1 Vdd Status + * @userdata2 Vdn Status + * @devdesc VRFAN / over-temperature asserted + */ + l_err = createErrl(PSS_MID_AVSBUS_READ, + VRM_VRFAN_WARNING, + OCC_NO_EXTENDED_RC, + ERRL_SEV_INFORMATIONAL, + NULL, + DEFAULT_TRACE_SIZE, + vdd_status, vdn_status); + setErrlActions(l_err, ERRL_ACTIONS_MANUFACTURING_ERROR); + // add processor callout + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.proc_huid, + ERRL_CALLOUT_PRIORITY_MED); + commitErrl(&l_err); + } + if ((foundOC == 1) && !loggedOC) + { + loggedOC = TRUE; + /* @ + * @errortype + * @moduleid PSS_MID_AVSBUS_READ + * @reasoncode VRM_OVER_CURRENT_WARNING + * @userdata1 Vdd Status + * @userdata2 Vdn Status + * @devdesc Output over-current asserted + */ + l_err = createErrl(PSS_MID_AVSBUS_READ, + VRM_OVER_CURRENT_WARNING, + OCC_NO_EXTENDED_RC, + ERRL_SEV_INFORMATIONAL, + NULL, + DEFAULT_TRACE_SIZE, + vdd_status, vdn_status); + setErrlActions(l_err, ERRL_ACTIONS_MANUFACTURING_ERROR); + // add processor callout + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.proc_huid, + ERRL_CALLOUT_PRIORITY_MED); + commitErrl(&l_err); + } + } + + return foundOT; + +} // end process_avsbus_status() + + diff --git a/src/occ_405/pss/avsbus.h b/src/occ_405/pss/avsbus.h index 80b91cf..49a2af9 100755 --- a/src/occ_405/pss/avsbus.h +++ b/src/occ_405/pss/avsbus.h @@ -31,6 +31,11 @@ extern bool G_avsbus_vdd_monitoring; extern bool G_avsbus_vdn_monitoring; +#define AVSBUS_STATUS_OVER_CURRENT_MASK 0x4000 +#define AVSBUS_STATUS_UNDER_VOLTAGE_MASK 0x2000 +#define AVSBUS_STATUS_OVER_TEMPERATURE_MASK 0x1000 +#define AVSBUS_STATUS_OVER_POWER_MASK 0x0800 + typedef enum { AVSBUS_VDD = 0x00, @@ -59,5 +64,14 @@ void initiate_avsbus_reads(avsbus_cmdtype_e i_cmdType); uint16_t avsbus_read(const avsbus_type_e i_type, const avsbus_cmdtype_e i_cmdtype); +// Initiate read of AVS Bus Status +// (results can then be read on the next tick) +void initiate_avsbus_read_status(); + +// Read the status from AVS Bus and return 1 if over-temperature was found for either bus +// or 0 if no OT was found. 0xFF will be returned if there was an error reading status +// on either bus. Mfg error will be logged for the first OT or first OC condition. +// Error history counters will be incremented for any over-temp/over-current condition. +uint8_t process_avsbus_status(); #endif //_AVSBUS_H |