summaryrefslogtreecommitdiffstats
path: root/src/occ_405/pss
diff options
context:
space:
mode:
authorChris Cain <cjcain@us.ibm.com>2017-01-24 15:56:29 -0600
committerChristopher J. Cain <cjcain@us.ibm.com>2017-01-27 15:15:32 -0500
commit6610ab230252ce3a96adbc5ddfeca7cf45ab69b2 (patch)
tree50a9ec2766c2c0f0e1a7f2eedacc049c44aed697 /src/occ_405/pss
parent360934dea9355e488206267d7f9fd9b1c753cf16 (diff)
downloadtalos-occ-6610ab230252ce3a96adbc5ddfeca7cf45ab69b2.tar.gz
talos-occ-6610ab230252ce3a96adbc5ddfeca7cf45ab69b2.zip
Add VR Fan and OC support via AVSBUS
- monitor VR Fan (over-temperature) and OC (over-current) - add VR Fan sensor to poll response (Temperature FRU type: VRM) - log mfg error for OC - add error history counters for each Change-Id: Ia552aa2cc2db8adebcbbd928c146a057bb120c73 RTC: 132561 RTC: 132560 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/35358 Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Martha Broyles <mbroyles@us.ibm.com> Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
Diffstat (limited to 'src/occ_405/pss')
-rw-r--r--src/occ_405/pss/avsbus.c456
-rwxr-xr-xsrc/occ_405/pss/avsbus.h14
2 files changed, 464 insertions, 6 deletions
diff --git a/src/occ_405/pss/avsbus.c b/src/occ_405/pss/avsbus.c
index 6b76379..6e65b2f 100644
--- a/src/occ_405/pss/avsbus.c
+++ b/src/occ_405/pss/avsbus.c
@@ -34,6 +34,7 @@
#include "pss_service_codes.h"
#include "ssx.h"
#include "occ_util.h"
+#include "cmdh_fsp_cmds_datacnfg.h"
//#define AVSDEBUG
@@ -49,12 +50,16 @@ bool G_avsbus_vdn_monitoring = FALSE;
extern uint32_t G_nest_frequency_mhz;
#define AVSBUS_FREQUENCY_MHZ 10
+extern bool G_vrm_thermal_monitoring;
// Number of read failures allowed before elog is created and reset requested.
// This should be no longer than 4ms (or it will impact WOF calculations)
// (readings are taken every 500us => 500us * 8 = 4ms)
const uint8_t MAX_READ_ATTEMPTS = 8;
+const uint16_t AVSBUS_STATUS_READ_ERROR = 0xFFFF;
+extern data_cnfg_t * G_data_cnfg;
+
// NOTE: OCC must use Bridge B, because Bridge A is reserved for PGPE
// AVS Bus setup that must be done once (common between read/write operations)
@@ -275,12 +280,12 @@ void avsbus_read_start(const avsbus_type_e i_type,
// HW: Wait for bus op to complete
// HW: arbitration between two bridges
- // HW: o2s_ongoning: 0 -> 1
+ // HW: o2s_ongoing: 0 -> 1
// HW: execution completes
// HW: o2s_ongoing 1 -> 0
#ifdef AVSDEBUG
- ++*l_trace_count; // DEBUG
+ ++*l_trace_count;
#endif
} // end avsbus_read_start()
@@ -293,7 +298,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type,
{
if (isSafeStateRequested())
{
- // No need to attempt read if OCC will be reset
+ // No need to process data if OCC will be reset
return 0;
}
@@ -342,7 +347,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type,
// HW: Wait for bus op to complete
// HW: arbitration between two bridges
- // HW: o2s_ongoning: 0 -> 1
+ // HW: o2s_ongoing: 0 -> 1
// HW: execution completes
// HW: o2s_ongoing 1 -> 0
@@ -379,7 +384,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type,
rc = AVSBUS_ERROR;
}
}
- else if (1 == (l_status >> 63)) // o2s_ongoing
+ else if (l_status & 0x8000000000000000) // o2s_ongoing
{
// o2s_ongoing bit was still set (operation did not complete)
l_failure = TRUE;
@@ -508,6 +513,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type,
*l_error_count, l_trace_type, l_trace_cmd);
G_avsbus_vdd_monitoring = FALSE;
G_avsbus_vdn_monitoring = FALSE;
+ G_vrm_thermal_monitoring = FALSE;
errlHndl_t l_err = createErrl(PSS_MID_AVSBUS_READ,
rc,
exrc,
@@ -526,7 +532,7 @@ uint16_t avsbus_read(const avsbus_type_e i_type,
}
#ifdef AVSDEBUG
- ++*l_trace_count; // DEBUG
+ ++*l_trace_count;
#endif
return o_reading;
@@ -553,3 +559,441 @@ void initiate_avsbus_reads(avsbus_cmdtype_e i_cmdType)
} // end initiate_avsbus_reads()
+
+// Initiate read for vr fan
+void initiate_avsbus_read_status()
+{
+ if (isSafeStateRequested() || (G_vrm_thermal_monitoring == FALSE))
+ {
+ // No need to attempt read if OCC will be reset
+ return;
+ }
+
+#ifdef AVSDEBUG
+ static uint32_t L_trace_count = 0;
+ uint32_t DEBUG_TRACE_MAX = 2;
+#endif
+
+ unsigned int index;
+ for (index = 0; index <= 1; ++index)
+ {
+ // Determine busses that are being monitored
+ uint8_t bus = 0xFF;
+ if ((index == 0) && G_avsbus_vdd_monitoring)
+ {
+ bus = G_sysConfigData.avsbus_vdd.bus;
+ }
+ else if ((index == 1) && G_avsbus_vdn_monitoring)
+ {
+ bus = G_sysConfigData.avsbus_vdn.bus;
+ }
+ if (bus != 0xFF)
+ {
+ // Determine register based on the bus number
+ uint32_t o2scmd_reg = OCB_O2SCMD0B;
+ uint32_t o2swd_reg = OCB_O2SWD0B;
+ if (0 == bus)
+ {
+ o2scmd_reg = OCB_O2SCMD0B;
+ o2swd_reg = OCB_O2SWD0B;
+ }
+ else
+ {
+ o2scmd_reg = OCB_O2SCMD1B;
+ o2swd_reg = OCB_O2SWD1B;
+ }
+
+#ifdef AVSDEBUG
+ if (L_trace_count < DEBUG_TRACE_MAX)
+ {
+ TRAC_INFO("initiate_avsbus_read_status: read Status - bus[%d], rail[broadcast]", bus);
+ }
+#endif
+
+ // Write O2SCMD[a][n]
+ // o2s_clear_sticky_bits = 1
+ uint64_t value;
+ value = 0x4000000000000000;
+ out64(o2scmd_reg, value);
+
+ // Write O2SWD[a][n] - write commands and initiate hardware operation
+ // o2s_wdata with content
+ // AVS Bus command (read staus):
+ // 0:1 StartCode = 0b01
+ // 2:3 Cmd = 0b11 (read)
+ // 4 CmdGroup = 0b0 (AVSBus)
+ // 5:8 CmdDataType (STATUS = 01110b)
+ // 9:12 Select (All rails / broadcast = 01111b )
+ // 13:28 CmdData (reserved / must be 1s)
+ // 29:31 CRC
+ // 01110DDD DRRRR111 11111111 11111CCC
+ // 01110111 01111111 11111111 11111CCC
+ value = 0x777FFFF800000000;
+ // Calculate/add CRC
+ value |= avs_crc_calculate(value);
+ out64(o2swd_reg, value);
+ }
+ }
+
+ // Read has been started so now just wait for HW to complete
+
+ // HW: Wait for bus op to complete
+ // HW: arbitration between two bridges
+ // HW: o2s_ongoing: 0 -> 1
+ // HW: execution completes
+ // HW: o2s_ongoing 1 -> 0
+
+#ifdef AVSDEBUG
+ ++L_trace_count;
+#endif
+
+} // end initiate_avsbus_read_status()
+
+
+// Process AVS Bus read status results (or errors)
+// Predictive error will be logged after "VRMs: max_read_timeout" failures
+// and a mfg error will be committed.
+// Returns the status data or AVSBUS_STATUS_READ_ERROR on error
+uint16_t avsbus_read_status(const avsbus_type_e i_type)
+{
+ if (isSafeStateRequested() || (G_vrm_thermal_monitoring == FALSE))
+ {
+ // No need to process data if OCC will be reset
+ return 0;
+ }
+
+ uint16_t o_reading = 0;
+ bool l_failure = FALSE;
+ const uint8_t max_read_attempts = G_data_cnfg->thrm_thresh.data[DATA_FRU_VRM].max_read_timeout;
+
+ // Static error counters for each type (Vdd/Vdn)
+ static uint32_t L_error_count[ERRORCOUNT_MAXTYPES] = {0};
+ uint32_t * l_error_count = &L_error_count[i_type];
+
+ char l_trace_type = 'd';
+ avsbusData_t l_data = G_sysConfigData.avsbus_vdd;
+ if (AVSBUS_VDN == i_type)
+ {
+ l_trace_type = 'n';
+ l_data = G_sysConfigData.avsbus_vdn;
+ }
+
+#ifdef AVSDEBUG
+ static uint32_t L_trace_count = 0;
+ uint32_t DEBUG_TRACE_MAX = 2;
+ if (L_trace_count < DEBUG_TRACE_MAX)
+ {
+ TRAC_INFO("avsbus_read_status: Vd%c - bus[%d] rail[%d]",
+ l_trace_type, l_data.bus, l_data.rail);
+ }
+#endif
+
+ // Determine register based on the bus number
+ uint32_t o2sst_reg = OCB_O2SST0B;
+ uint32_t o2srd_reg = OCB_O2SRD0B;
+ if (1 == l_data.bus)
+ {
+ o2sst_reg = OCB_O2SST1B;
+ o2srd_reg = OCB_O2SRD1B;
+ }
+
+ // HW: Wait for bus op to complete
+ // HW: arbitration between two bridges
+ // HW: o2s_ongoing: 0 -> 1
+ // HW: execution completes
+ // HW: o2s_ongoing 1 -> 0
+
+ // Since read was started in previous tick, it should have already completed
+ // (no need to poll/wait on o2s_ongoing)
+ enum occReasonCode rc = OCC_SUCCESS_REASON_CODE;
+ uint64_t l_status = in64(o2sst_reg);
+ // OCC O2S Status Register
+ // 0 o2s_ongoing
+ // 1:4 reserved
+ // 5 write_while_bridge_busy_error
+ // 6 reserved
+ // 7 FSM error
+ // 8:63 reserved
+ // GrrrrBrF rrrrrrrr rrrrrrrr rrrrrrrr
+
+ if (0 != (l_status & 0x0500000000000000))
+ {
+ // error bit was set
+ l_failure = TRUE;
+ (*l_error_count)++;
+ if ((*l_error_count == 1) || (*l_error_count == max_read_attempts))
+ {
+ TRAC_ERR("avsbus_read_status: Error found in Vd%c O2SST[0x%08X] = [0x%08X]",
+ l_trace_type, o2sst_reg, WORD_HIGH(l_status));
+ /*
+ * @errortype
+ * @moduleid PSS_MID_AVSBUS_READ
+ * @reasoncode AVSBUS_ERROR
+ * @userdata1 AVS Bus type/bus/rail
+ * @userdata2 status
+ * @devdesc Error encountered when reading AVS Bus
+ */
+ rc = AVSBUS_ERROR;
+ }
+ }
+ else if (l_status & 0x8000000000000000) // o2s_ongoing
+ {
+ // o2s_ongoing bit was still set (operation did not complete)
+ l_failure = TRUE;
+ (*l_error_count)++;
+ if ((*l_error_count == 1) || (*l_error_count == max_read_attempts))
+ {
+ TRAC_ERR("avsbus_read_status: Vd%c timeout waiting for o2s_ongoing change O2SST[0x%08X] = [0x%08X]",
+ l_trace_type, o2sst_reg, WORD_HIGH(l_status));
+ /*
+ * @errortype
+ * @moduleid PSS_MID_AVSBUS_READ
+ * @reasoncode AVSBUS_TIMEOUT
+ * @userdata1 AVS Bus type/bus/rail
+ * @userdata2 status
+ * @devdesc Timeout when reading AVS Bus
+ */
+ rc = AVSBUS_TIMEOUT;
+ }
+ }
+
+ if (FALSE == l_failure)
+ {
+ // Read the response data
+ uint64_t value = in64(o2srd_reg);
+ // AVS Bus response (read status):
+ // 0:1 SlaveAck (0b00 from slave indicates good CRC and action was taken)
+ // 2 0
+ // 3:7 StatusResp
+ // 8:23 CmdData (LSB = 1mV or 10mA)
+ // 24:28 Reserved (must be all 1s)
+ // 29:31 CRC
+ // AA0SSSSS VVVVVVVV VVVVVVVV 11111CCC
+
+ // Validate CRC
+ const uint64_t crc = avs_crc_calculate(value);
+ if (crc != (value & AVS_CRC_MASK))
+ {
+ l_failure = TRUE;
+ (*l_error_count)++;
+ if ((*l_error_count == 1) || (*l_error_count == max_read_attempts))
+ {
+ TRAC_ERR("avsbus_read_status: CRC mismatch in Vd%c rsp O2SRD[0x%08X] = [0x%08X] (calculated CRC 0x%08X)",
+ l_trace_type, o2srd_reg, WORD_HIGH(value), WORD_HIGH(crc));
+ /*
+ * @errortype
+ * @moduleid PSS_MID_AVSBUS_READ
+ * @reasoncode AVSBUS_CRC_ERROR
+ * @userdata1 AVS Bus type/bus/rail
+ * @userdata2 status
+ * @devdesc CRC error reading AVS Bus
+ */
+ rc = AVSBUS_CRC_ERROR;
+ }
+ }
+ // Check for valid command operation and extract read data
+ else if (0 == (value & 0xC000000000000000))
+ {
+ // AVS Bus Status:
+ // 0 VDone
+ // 1 IOUT_OC_WARNING (over-current)
+ // 2 VOUT_UV_WARNING (under-voltage)
+ // 3 IOUT_OT_WARNING (over-temperature)
+ // 4 POUT_OP_WARNING (over power)
+ // 5-7 reserved
+ // 8-15 reserved
+ o_reading = (value >> 40) & 0x0000FFFF;
+
+#ifdef AVSDEBUG
+ static uint16_t L_lastReading = 0;
+ if ((L_trace_count < DEBUG_TRACE_MAX) || (o_reading != L_lastReading))
+ {
+ TRAC_INFO("avsbus_read_status: Successfully read Vd%c status 0x%04X [0x%08X]",
+ l_trace_type, o_reading, WORD_HIGH(value));
+ L_lastReading = o_reading;
+ }
+#endif
+ if (*l_error_count)
+ {
+ // Trace and clear the error count
+ TRAC_INFO("avsbus_read_status: Successfully read Vd%c status [0x%08X] (error count=%d)",
+ l_trace_type, WORD_HIGH(value), *l_error_count);
+ *l_error_count = 0;
+ }
+ }
+ else
+ {
+ l_failure = TRUE;
+ (*l_error_count)++;
+ if ((*l_error_count == 1) || (*l_error_count == max_read_attempts))
+ {
+ TRAC_ERR("avsbus_read_status: SlaveAck reported no action taken[0x%08X]", WORD_HIGH(value));
+ rc = AVSBUS_ERROR;
+ }
+ }
+ }
+
+ if (l_failure)
+ {
+ if (*l_error_count == max_read_attempts)
+ {
+ TRAC_ERR("avsbus_read_status: Reached %d consecutive Vd%c errors reading status",
+ *l_error_count, l_trace_type);
+ G_avsbus_vdd_monitoring = FALSE;
+ G_avsbus_vdn_monitoring = FALSE;
+ G_vrm_thermal_monitoring = FALSE;
+ errlHndl_t l_err = createErrl(PSS_MID_AVSBUS_READ,
+ rc,
+ ERC_AVSBUS_STATUS_FAILURE,
+ ERRL_SEV_PREDICTIVE,
+ NULL,
+ DEFAULT_TRACE_SIZE,
+ (i_type << 16) | (l_data.bus << 8) | l_data.rail,
+ WORD_HIGH(l_status));
+ setErrlActions(l_err, ERRL_ACTIONS_MANUFACTURING_ERROR);
+ // add processor callout
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.proc_huid,
+ ERRL_CALLOUT_PRIORITY_MED);
+ commitErrl(&l_err);
+ }
+ o_reading = AVSBUS_STATUS_READ_ERROR;
+ }
+
+#ifdef AVSDEBUG
+ ++L_trace_count; // DEBUG
+#endif
+
+ return o_reading;
+
+} // end avsbus_read_status()
+
+
+// Read the status from AVS Bus and return 1 if over-temperature was found for either bus
+// or 0 if no OT was found. 0xFF will be returned if there was an error reading status.
+// Error history counters will be incremented for any over-temp/over-current condition.
+// Mfg error will be logged for the first OT or first OC condition.
+uint8_t process_avsbus_status()
+{
+ uint8_t foundOT = 0;
+ uint8_t foundOC = 0;
+ uint16_t vdd_status = 0;
+ uint16_t vdn_status = 0;
+
+ if (G_vrm_thermal_monitoring)
+ {
+ if (G_avsbus_vdd_monitoring)
+ {
+ vdd_status = avsbus_read_status(AVSBUS_VDD);
+ if (vdd_status != AVSBUS_STATUS_READ_ERROR)
+ {
+ if (vdd_status & AVSBUS_STATUS_OVER_TEMPERATURE_MASK)
+ {
+ foundOT = 1;
+ INCREMENT_ERR_HISTORY(ERR_AVSBUS_VDD_OVER_TEMPERATURE);
+ }
+
+ if (vdd_status & AVSBUS_STATUS_OVER_CURRENT_MASK)
+ {
+ foundOC = 1;
+ INCREMENT_ERR_HISTORY(ERR_AVSBUS_VDD_OVER_CURRENT);
+ }
+ }
+ else
+ {
+ // 0xFF indicates error reading status
+ foundOT = 0xFF;
+ }
+ }
+ if (G_avsbus_vdn_monitoring)
+ {
+ vdn_status = avsbus_read_status(AVSBUS_VDN);
+ if (vdn_status != AVSBUS_STATUS_READ_ERROR)
+ {
+ if (vdn_status & AVSBUS_STATUS_OVER_TEMPERATURE_MASK)
+ {
+ // if no Vdd error, update OT result
+ if (foundOT != 0xFF)
+ {
+ foundOT = 1;
+ }
+ INCREMENT_ERR_HISTORY(ERR_AVSBUS_VDN_OVER_TEMPERATURE);
+ }
+
+ if (vdn_status & AVSBUS_STATUS_OVER_CURRENT_MASK)
+ {
+ foundOC = 1;
+ INCREMENT_ERR_HISTORY(ERR_AVSBUS_VDN_OVER_CURRENT);
+ }
+ }
+ else
+ {
+ // 0xFF indicates error reading status
+ foundOT = 0xFF;
+ }
+ }
+
+ // Log an error the first time either condition is asserted
+ static bool loggedOT = FALSE;
+ static bool loggedOC = FALSE;
+ errlHndl_t l_err;
+ if ((foundOT == 1) && !loggedOT)
+ {
+ loggedOT = TRUE;
+ /* @
+ * @errortype
+ * @moduleid PSS_MID_AVSBUS_READ
+ * @reasoncode VRM_VRFAN_WARNING
+ * @userdata1 Vdd Status
+ * @userdata2 Vdn Status
+ * @devdesc VRFAN / over-temperature asserted
+ */
+ l_err = createErrl(PSS_MID_AVSBUS_READ,
+ VRM_VRFAN_WARNING,
+ OCC_NO_EXTENDED_RC,
+ ERRL_SEV_INFORMATIONAL,
+ NULL,
+ DEFAULT_TRACE_SIZE,
+ vdd_status, vdn_status);
+ setErrlActions(l_err, ERRL_ACTIONS_MANUFACTURING_ERROR);
+ // add processor callout
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.proc_huid,
+ ERRL_CALLOUT_PRIORITY_MED);
+ commitErrl(&l_err);
+ }
+ if ((foundOC == 1) && !loggedOC)
+ {
+ loggedOC = TRUE;
+ /* @
+ * @errortype
+ * @moduleid PSS_MID_AVSBUS_READ
+ * @reasoncode VRM_OVER_CURRENT_WARNING
+ * @userdata1 Vdd Status
+ * @userdata2 Vdn Status
+ * @devdesc Output over-current asserted
+ */
+ l_err = createErrl(PSS_MID_AVSBUS_READ,
+ VRM_OVER_CURRENT_WARNING,
+ OCC_NO_EXTENDED_RC,
+ ERRL_SEV_INFORMATIONAL,
+ NULL,
+ DEFAULT_TRACE_SIZE,
+ vdd_status, vdn_status);
+ setErrlActions(l_err, ERRL_ACTIONS_MANUFACTURING_ERROR);
+ // add processor callout
+ addCalloutToErrl(l_err,
+ ERRL_CALLOUT_TYPE_HUID,
+ G_sysConfigData.proc_huid,
+ ERRL_CALLOUT_PRIORITY_MED);
+ commitErrl(&l_err);
+ }
+ }
+
+ return foundOT;
+
+} // end process_avsbus_status()
+
+
diff --git a/src/occ_405/pss/avsbus.h b/src/occ_405/pss/avsbus.h
index 80b91cf..49a2af9 100755
--- a/src/occ_405/pss/avsbus.h
+++ b/src/occ_405/pss/avsbus.h
@@ -31,6 +31,11 @@
extern bool G_avsbus_vdd_monitoring;
extern bool G_avsbus_vdn_monitoring;
+#define AVSBUS_STATUS_OVER_CURRENT_MASK 0x4000
+#define AVSBUS_STATUS_UNDER_VOLTAGE_MASK 0x2000
+#define AVSBUS_STATUS_OVER_TEMPERATURE_MASK 0x1000
+#define AVSBUS_STATUS_OVER_POWER_MASK 0x0800
+
typedef enum
{
AVSBUS_VDD = 0x00,
@@ -59,5 +64,14 @@ void initiate_avsbus_reads(avsbus_cmdtype_e i_cmdType);
uint16_t avsbus_read(const avsbus_type_e i_type,
const avsbus_cmdtype_e i_cmdtype);
+// Initiate read of AVS Bus Status
+// (results can then be read on the next tick)
+void initiate_avsbus_read_status();
+
+// Read the status from AVS Bus and return 1 if over-temperature was found for either bus
+// or 0 if no OT was found. 0xFF will be returned if there was an error reading status
+// on either bus. Mfg error will be logged for the first OT or first OC condition.
+// Error history counters will be incremented for any over-temp/over-current condition.
+uint8_t process_avsbus_status();
#endif //_AVSBUS_H
OpenPOWER on IntegriCloud