diff options
-rw-r--r-- | src/include/usr/htmgt/htmgt_reasoncodes.H | 8 | ||||
-rw-r--r-- | src/usr/htmgt/occError.C | 493 |
2 files changed, 295 insertions, 206 deletions
diff --git a/src/include/usr/htmgt/htmgt_reasoncodes.H b/src/include/usr/htmgt/htmgt_reasoncodes.H index 6d523e752..bd56282ed 100644 --- a/src/include/usr/htmgt/htmgt_reasoncodes.H +++ b/src/include/usr/htmgt/htmgt_reasoncodes.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2014,2016 */ +/* Contributors Listed Below - COPYRIGHT 2014,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -43,9 +43,8 @@ namespace HTMGT HTMGT_MOD_PASS_THRU = 0x09, HTMGT_MOD_CFG_FREQ_POINTS = 0x0A, HTMGT_MOD_WAIT_FOR_CHECKPOINT = 0x0B, + HTMGT_MODID_PROCESS_ELOG = 0x10, HTMGT_MOD_OCC_POLL = 0x76, - HTMGT_MOD_BAD_FRU_CALLOUTS = 0x7D, - HTMGT_MOD_MISMATCHING_SEVERITY = 0x7F, HTMGT_MOD_READ_OCC_SRAM = 0x85, HTMGT_MOD_SEND_OCC_CMD = 0x90, HTMGT_MOD_WRITE_OCC_CMD = 0x91, @@ -89,6 +88,9 @@ namespace HTMGT HTMGT_RC_TARGET_NOT_FUNCTIONAL = HTMGT_COMP_ID | 0x54, HTMGT_RC_OCC_MASTER_NOT_FOUND = HTMGT_COMP_ID | 0x55, HTMGT_RC_OCC_RESET_THREHOLD = HTMGT_COMP_ID | 0x56, + HTMGT_RC_INVALID_OCC_ELOG = HTMGT_COMP_ID | 0x63, + HTMGT_RC_BAD_FRU_CALLOUTS = HTMGT_COMP_ID | 0x7D, + HTMGT_RC_MISMATCHING_SEVERITY = HTMGT_COMP_ID | 0x7F, HTMGT_RC_PCAP_CALC_COMPLETE = HTMGT_COMP_ID | 0xB9, HTMGT_RC_ENERGYSCALE_FFDC = HTMGT_COMP_ID | 0xFD, }; diff --git a/src/usr/htmgt/occError.C b/src/usr/htmgt/occError.C index 14d890a22..0b785a643 100644 --- a/src/usr/htmgt/occError.C +++ b/src/usr/htmgt/occError.C @@ -95,263 +95,350 @@ namespace HTMGT { errlHndl_t l_errlHndl = nullptr; + compId_t l_comp_id = OCCC_COMP_ID; + if (i_source == OCC_ERRSRC_PGPE) + { + l_comp_id = PGPE_COMP_ID; + } + else if (i_source == OCC_ERRSRC_XGPE) + { + l_comp_id = XGPE_COMP_ID; + } + else if (i_source != OCC_ERRSRC_405) + { + TMGT_ERR("occProcessElog: Invalid elog source specified 0x%02X", + i_source); + } + // Read data from SRAM (length must be multiple of 8 bytes) const uint16_t l_length = (i_length) & 0xFFF8; - fapi2::variable_buffer l_buffer(l_length*8); //convert to bits + if ((l_length > 0) && (i_address != 0)) + { + fapi2::variable_buffer l_buffer(l_length*8); //convert to bits // HBOCC is only defined for HTMGT #ifdef CONFIG_HTMGT - l_errlHndl = HBOCC::readSRAM( iv_target, - i_address, - reinterpret_cast<uint64_t*>(l_buffer.pointer()), - l_length ); + l_errlHndl = HBOCC::readSRAM( iv_target, + i_address, + reinterpret_cast<uint64_t*> + (l_buffer.pointer()), + l_length ); #endif - if (nullptr == l_errlHndl) - { - compId_t l_comp_id = OCCC_COMP_ID; - if (i_source == OCC_ERRSRC_PGPE) - { - l_comp_id = PGPE_COMP_ID; - } - else if (i_source == OCC_ERRSRC_XGPE) + if (nullptr == l_errlHndl) { - l_comp_id = XGPE_COMP_ID; - } - - const occErrlEntry_t * l_occElog= reinterpret_cast<occErrlEntry_t*> - (l_buffer.pointer()); + const occErrlEntry_t * l_occElog = + reinterpret_cast<occErrlEntry_t*> (l_buffer.pointer()); - TMGT_BIN("OCC ELOG", l_occElog, 256); + TMGT_BIN("OCC ELOG", l_occElog, 256); - // Get user details section - const occErrlUsrDtls_t *l_usrDtls_ptr = (occErrlUsrDtls_t *) - ((uint8_t*)l_occElog + sizeof(occErrlEntry_t)); + // Get user details section + const occErrlUsrDtls_t *l_usrDtls_ptr = (occErrlUsrDtls_t *) + ((uint8_t*)l_occElog + sizeof(occErrlEntry_t)); - const uint32_t l_occSrc = l_comp_id | l_occElog->reasonCode; - ERRORLOG::errlSeverity_t severity = - ERRORLOG::ERRL_SEV_INFORMATIONAL; + const uint32_t l_occSrc = l_comp_id | l_occElog->reasonCode; + ERRORLOG::errlSeverity_t severity = + ERRORLOG::ERRL_SEV_INFORMATIONAL; - // Translate Severity - const uint8_t l_occSeverity = l_occElog->severity; - if (l_occSeverity < OCC_SEV_ACTION_XLATE_SIZE) - { - severity = - occSeverityErrorActionXlate[l_occSeverity].occErrlSeverity; - } - else - { - TMGT_ERR("occProcessElog: Severity translate failure" - " (severity = 0x%02X)", l_occElog->severity); - } + if (l_occSrc == 0x2A01) + { + // 2A01 is Periodic OCC Telemetry / Call Home data + TMGT_ERR("OCC is reporting Periodic Telemetry Data (0x2A01)" + " - NOT AN ERROR"); + } - // Process Actions - bool l_occReset = false; - elogProcessActions(l_occElog->actions, l_occReset, severity); + // Translate Severity + const uint8_t l_occSeverity = l_occElog->severity; + if (l_occSeverity < OCC_SEV_ACTION_XLATE_SIZE) + { + severity = + occSeverityErrorActionXlate[l_occSeverity].occErrlSeverity; + } + else + { + TMGT_ERR("occProcessElog: Severity translate failure" + " (severity = 0x%02X)", l_occElog->severity); + } - // Need to add WOF reason code to OCC object regardless of - // whether WOF resets are disabled. - if( l_occElog->actions & TMGT_ERRL_ACTIONS_WOF_RESET_REQUIRED ) - { - iv_wofResetReasons |= l_usrDtls_ptr->userData1; - TMGT_ERR("WOF Reset Reasons for OCC%d = 0x%08x", - iv_instance, - iv_wofResetReasons); + // Process Actions + bool l_occReset = false; + elogProcessActions(l_occElog->actions, l_occReset, severity); - } + // Need to add WOF reason code to OCC object regardless of + // whether WOF resets are disabled. + if( l_occElog->actions & TMGT_ERRL_ACTIONS_WOF_RESET_REQUIRED ) + { + iv_wofResetReasons |= l_usrDtls_ptr->userData1; + TMGT_ERR("WOF Reset Reasons for OCC%d = 0x%08x", + iv_instance, + iv_wofResetReasons); - // Check if we need a WOF requested reset - if(iv_needsWofReset == true) - { - TMGT_ERR("WOF Reset detected! SRC = 0x%X", - l_occSrc); + } - // We compare against one less than the threshold because - // the WOF reset count doesn't get incremented until resetPrep - if( iv_wofResetCount < (WOF_RESET_COUNT_THRESHOLD-1) ) + // Check if we need a WOF requested reset + if(iv_needsWofReset == true) { - // Not at WOF reset threshold yet. Set sev to INFO - severity = ERRORLOG::ERRL_SEV_INFORMATIONAL; + TMGT_ERR("WOF Reset detected! SRC = 0x%X", + l_occSrc); + + // We compare against one less than the threshold because + // the WOF reset count doesn't get incremented until + // the resetPrep + if( iv_wofResetCount < (WOF_RESET_COUNT_THRESHOLD-1) ) + { + // Not at WOF reset threshold yet. Set sev to INFO + severity = ERRORLOG::ERRL_SEV_INFORMATIONAL; + } } - } - if (l_occReset == true) - { - iv_needsReset = true; - OccManager::updateSafeModeReason(l_occSrc, iv_instance); - } + if (l_occReset == true) + { + iv_needsReset = true; + OccManager::updateSafeModeReason(l_occSrc, iv_instance); + } - // Create OCC error log - // NOTE: word 4 (used by extended reason code) to save off OCC - // sub component value which is needed to correctly parse - // srcs which have similar uniqueness - // NOTE: SRC tags are NOT required here as these logs will get - // parsed with the OCC src tags - bldErrLog(l_errlHndl, - (htmgtModuleId)(l_usrDtls_ptr->modId & 0x00FF), - (htmgtReasonCode)l_occSrc, // occ reason code - l_usrDtls_ptr->userData1, - l_usrDtls_ptr->userData2, - l_usrDtls_ptr->userData3, - (l_usrDtls_ptr->modId << 16 ) | - l_occElog->extendedRC, // extended reason code - severity); - - // Add callout information - const uint8_t l_max_callouts = l_occElog->maxCallouts; - bool l_bad_fru_data = false; - uint8_t numCallouts = 0; - uint8_t calloutIndex = 0; - while (calloutIndex < l_max_callouts) - { - const occErrlCallout_t callout = - l_occElog->callout[calloutIndex]; - if (callout.type != 0) + // Create OCC error log + // NOTE: word 4 (used by extended reason code) to save off OCC + // sub component value which is needed to correctly parse + // srcs which have similar uniqueness + // NOTE: SRC tags are NOT required here as these logs will get + // parsed with the OCC src tags + bldErrLog(l_errlHndl, + (htmgtModuleId)(l_usrDtls_ptr->modId & 0x00FF), + (htmgtReasonCode)l_occSrc, // occ reason code + l_usrDtls_ptr->userData1, + l_usrDtls_ptr->userData2, + l_usrDtls_ptr->userData3, + (l_usrDtls_ptr->modId << 16 ) | + l_occElog->extendedRC, // extended reason code + severity); + + // Add callout information + const uint8_t l_max_callouts = l_occElog->maxCallouts; + bool l_bad_fru_data = false; + uint8_t numCallouts = 0; + uint8_t calloutIndex = 0; + while (calloutIndex < l_max_callouts) { - HWAS::callOutPriority priority; - bool l_success = true; - l_success = elogXlateSrciPriority(callout.priority, - priority); - if (l_success == true) + const occErrlCallout_t callout = + l_occElog->callout[calloutIndex]; + if (callout.type != 0) { - l_success = elogAddCallout(l_errlHndl, - priority, - callout, - numCallouts); - if (l_success == false) + HWAS::callOutPriority priority; + bool l_success = true; + l_success = elogXlateSrciPriority(callout.priority, + priority); + if (l_success == true) + { + l_success = elogAddCallout(l_errlHndl, + priority, + callout, + numCallouts); + if (l_success == false) + { + l_bad_fru_data = true; + } + } + else { l_bad_fru_data = true; + TMGT_ERR("occProcessElog: Priority translate" + " failure (priority = 0x%02X)", + callout.priority); } } else - { - l_bad_fru_data = true; - TMGT_ERR("occProcessElog: Priority translate" - " failure (priority = 0x%02X)", - callout.priority); - } - } - else - { // make sure all the remaining callout data are zeros, - // otherwise mark bad fru data - const occErrlCallout_t zeros = { 0 }; - while (calloutIndex < l_max_callouts) - { - if (memcmp(&l_occElog->callout[calloutIndex], - &zeros, sizeof(occErrlCallout_t))) + { // make sure all the remaining callout data are zeros, + // otherwise mark bad fru data + const occErrlCallout_t zeros = { 0 }; + while (calloutIndex < l_max_callouts) { - TMGT_ERR("occProcessElog: The remaining" - " callout data should be all zeros"); - l_bad_fru_data = true; - break; + if (memcmp(&l_occElog->callout[calloutIndex], + &zeros, sizeof(occErrlCallout_t))) + { + TMGT_ERR("occProcessElog: The remaining" + " callout data should be all zeros"); + l_bad_fru_data = true; + break; + } + ++calloutIndex; } - ++calloutIndex; + break; } - break; + ++calloutIndex; } - ++calloutIndex; - } - // Any bad fru data found ? - errlHndl_t err2 = nullptr; - if (l_bad_fru_data == true) - { - TMGT_BIN("Callout Data", &l_occElog->callout[0], - sizeof(occErrlCallout)*ERRL_MAX_CALLOUTS); - /*@ - * @errortype - * @refcode LIC_REFCODE - * @subsys EPUB_FIRMWARE_SP - * @reasoncode HTMGT_RC_OCC_ERROR_LOG - * @moduleid HTMGT_MOD_BAD_FRU_CALLOUTS - * @userdata1[0-15] OCC elog id - * @userdata1[16-31] Bad callout index - * @devdesc Bad FRU data received in OCC error log - */ - bldErrLog(err2, HTMGT_MOD_BAD_FRU_CALLOUTS, - HTMGT_RC_OCC_ERROR_LOG, - i_id, calloutIndex, 0, 0, - ERRORLOG::ERRL_SEV_INFORMATIONAL); - ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); - } - // Check callout number and severity - if ((numCallouts == 0) && - (severity != ERRORLOG::ERRL_SEV_INFORMATIONAL)) - { - if (i_source == OCC_ERRSRC_405) + // Any bad fru data found ? + errlHndl_t err2 = nullptr; + if (l_bad_fru_data == true) { - TMGT_ERR("occProcessElog: No FRU callouts found for OCC%d" - " elog_id:0x%02X, severity:0x%0X", - iv_instance, i_id, severity); + TMGT_BIN("Callout Data", &l_occElog->callout[0], + sizeof(occErrlCallout)*ERRL_MAX_CALLOUTS); /*@ * @errortype * @refcode LIC_REFCODE * @subsys EPUB_FIRMWARE_SP - * @reasoncode HTMGT_RC_OCC_ERROR_LOG - * @moduleid HTMGT_MOD_MISMATCHING_SEVERITY + * @reasoncode HTMGT_RC_BAD_FRU_CALLOUTS + * @moduleid HTMGT_MODID_PROCESS_ELOG * @userdata1[0-15] OCC elog id - * @userdata1[16-31] OCC severity - * @devdesc No FRU callouts found for non-info OCC Error Log + * @userdata1[16-31] Bad callout index + * @devdesc Bad FRU data received in OCC error log */ - bldErrLog(err2, HTMGT_MOD_MISMATCHING_SEVERITY, - HTMGT_RC_OCC_ERROR_LOG, - i_id, severity, 0, 0, + bldErrLog(err2, HTMGT_MODID_PROCESS_ELOG, + HTMGT_RC_BAD_FRU_CALLOUTS, + i_id, calloutIndex, 0, 0, ERRORLOG::ERRL_SEV_INFORMATIONAL); ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); } - else + // Check callout number and severity + if ((numCallouts == 0) && + (severity != ERRORLOG::ERRL_SEV_INFORMATIONAL)) { - // Add Processor callout for PGPE/SGPE/XGPE - TMGT_ERR("occProcessElog: Adding processor callout for" - " OCC%d", iv_instance); - TARGETING::ConstTargetHandle_t l_proc_target = - TARGETING::getParentChip(iv_target); - l_errlHndl->addHwCallout(l_proc_target, - HWAS::SRCI_PRIORITY_MED, - HWAS::NO_DECONFIG, - HWAS::GARD_NULL); + if (i_source == OCC_ERRSRC_405) + { + TMGT_ERR("occProcessElog: No FRU callouts found for " + "OCC%d elog_id:0x%02X, severity:0x%0X", + iv_instance, i_id, severity); + /*@ + * @errortype + * @refcode LIC_REFCODE + * @subsys EPUB_FIRMWARE_SP + * @reasoncode HTMGT_RC_MISMATCHING_SEVERITY + * @moduleid HTMGT_MODID_PROCESS_ELOG + * @userdata1[0-15] OCC elog id + * @userdata1[16-31] OCC severity + * @devdesc No FRU callouts found for non-info OCC Error + */ + bldErrLog(err2, HTMGT_MODID_PROCESS_ELOG, + HTMGT_RC_MISMATCHING_SEVERITY, + i_id, severity, 0, 0, + ERRORLOG::ERRL_SEV_INFORMATIONAL); + ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); + } + else + { + // Add Processor callout for PGPE/SGPE/XGPE + TMGT_ERR("occProcessElog: Adding processor callout for" + " OCC%d", iv_instance); + TARGETING::ConstTargetHandle_t l_proc_target = + TARGETING::getParentChip(iv_target); + l_errlHndl->addHwCallout(l_proc_target, + HWAS::SRCI_PRIORITY_MED, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL); + } } - } - if (int_flags_set(FLAG_HALT_ON_OCC_SRC)) - { - // Check if OCC SRC matches our trigger SRC - if ((l_occSrc & 0xFF) == (get_int_flags() >> 24)) + if (int_flags_set(FLAG_HALT_ON_OCC_SRC)) { - TMGT_ERR("occProcessElog: OCC%d reported 0x%04X and " - "HALT_ON_SRC is set. Resets will be disabled", - iv_instance, l_occSrc); - set_int_flags(get_int_flags() | FLAG_RESET_DISABLED); + // Check if OCC SRC matches our trigger SRC + if ((l_occSrc & 0xFF) == (get_int_flags() >> 24)) + { + TMGT_ERR("occProcessElog: OCC%d reported 0x%04X and " + "HALT_ON_SRC is set. Resets will be disabled", + iv_instance, l_occSrc); + set_int_flags(get_int_flags() | FLAG_RESET_DISABLED); + } } - } - // Process force error log to be sent to BMC. - if( (l_occElog->actions & TMGT_ERRL_ACTIONS_FORCE_ERROR_POSTED ) || - (l_occSrc == (OCCC_COMP_ID | 0x01 ) ) ) //GEN_CALLHOME_LOG - { - l_errlHndl->setEselCallhomeInfoEvent(true); - } + // Process force error log to be sent to BMC. + if((l_occElog->actions & TMGT_ERRL_ACTIONS_FORCE_ERROR_POSTED)|| + (l_occSrc == (OCCC_COMP_ID | 0x01 ) ) ) //GEN_CALLHOME_LOG + { + l_errlHndl->setEselCallhomeInfoEvent(true); + } #ifdef CONFIG_CONSOLE_OUTPUT_OCC_COMM - char header[64]; - sprintf(header, "OCC%d ELOG: (0x%04X bytes)", iv_instance, i_length); - dumpToConsole(header, (const uint8_t *)l_occElog, - std::min(i_length,(uint16_t)512)); + char header[64]; + sprintf(header, "OCC%d ELOG: (0x%04X bytes)", iv_instance, + i_length); + dumpToConsole(header, (const uint8_t *)l_occElog, + std::min(i_length,(uint16_t)512)); #endif - // Add full OCC error log data as a User Details section - l_errlHndl->addFFDC(l_comp_id, - l_occElog, - i_length, - 1, // version - 0); // subsection - ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); + // Add full OCC error log data as a User Details section + l_errlHndl->addFFDC(l_comp_id, + l_occElog, + i_length, + 1, // version + 0); // subsection + ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); + } + else + { + TMGT_ERR("occProcessElog: Unable to read elog %d from source " + "0x%02X on OCC%d, SRAM address (0x%08X) length " + "(0x%04X), rc=0x%04X", + i_id, i_source, iv_instance, i_address, i_length, + l_errlHndl->reasonCode()); + ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); + } } else { - TMGT_ERR("occProcessElog: Unable to read elog %d from source " - "0x%02X on OCC%d, SRAM address (0x%08X) length (0x%04X), " - "rc=0x%04X", - i_id, i_source, iv_instance, i_address, i_length, - l_errlHndl->reasonCode()); + TMGT_ERR("occProcessElog: Invalid OCC%d elog data: ID: 0x%02X, " + "Source: 0x%02X, Length: 0x%04X, Address: 0x%08X", + iv_instance, i_id, i_source, i_length, i_address); + + occErrlEntry_t *l_sram_data = NULL; + uint16_t l_rc = 0; + uint8_t l_sev = 0; + uint8_t l_actions = 0; + const uint16_t l_length = 2048; + if (0 != i_address) + { + // Try to read some data from SRAM + fapi2::variable_buffer l_buffer(l_length*8); //convert to bits + // HBOCC is only defined for HTMGT +#ifdef CONFIG_HTMGT + l_errlHndl = HBOCC::readSRAM(iv_target, + i_address, + reinterpret_cast<uint64_t*> + (l_buffer.pointer()), + l_length); +#endif + if (NULL == l_errlHndl) + { + const occErrlEntry_t * l_occElog = + reinterpret_cast<occErrlEntry_t*> (l_buffer.pointer()); + if (l_occElog) + { + l_rc = l_comp_id | l_occElog->reasonCode; + l_sev = l_occElog->severity; + l_actions = l_occElog->actions; + } + } + else + { + TMGT_ERR("occ_process_elog: Failed to read OCC SRAM, " + "rc=0x%04X", l_errlHndl->reasonCode()); + l_errlHndl->collectTrace("HTMGT"); + ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); + } + } + + /*@ + * @errortype + * @refcode LIC_REFCODE + * @subsys EPUB_FIRMWARE_SP + * @reasoncode HTMGT_RC_INVALID_OCC_ELOG + * @moduleid HTMGT_MODID_PROCESS_ELOG + * @userdata1 instance + * @userdata2 error source + * @userdata3 OCC RC + * @userdata4 OCC severity / actions + * @devdesc Invalid OCC error log data + */ + bldErrLog(l_errlHndl, HTMGT_MODID_PROCESS_ELOG, + HTMGT_RC_INVALID_OCC_ELOG, + iv_instance, i_source, l_rc, (l_sev<<16) | (l_actions), + ERRORLOG::ERRL_SEV_UNRECOVERABLE); + if (NULL != l_sram_data) + { + l_errlHndl->addFFDC(l_comp_id, + l_sram_data, + l_length, + 1, // version + 0); // subsection + } ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); } |