diff options
author | Matt Derksen <mderkse1@us.ibm.com> | 2017-10-23 15:17:24 -0500 |
---|---|---|
committer | William G. Hoffa <wghoffa@us.ibm.com> | 2017-11-09 17:47:52 -0500 |
commit | 2b4e2315094efa8eb3e5b45480418bd86806a25f (patch) | |
tree | ed2223d8cb9e5323e337c9c97c2f8bafcb98041f /src | |
parent | 9fec69bc023ce50d718f4430e5dd7c6f7b2cd810 (diff) | |
download | talos-hostboot-2b4e2315094efa8eb3e5b45480418bd86806a25f.tar.gz talos-hostboot-2b4e2315094efa8eb3e5b45480418bd86806a25f.zip |
Send down OCC info logs to BMC for call-home
This creates a new eSEL type (dd) to display
informational callhome logs. The OCC will send
down telemetry information to track the general health
of the system using this new log.
Change-Id: I0319798554c4e48c287953bd2d0de8352bfc4909
RTC:180324
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/48776
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Brian E. Bakke <bbakke@us.ibm.com>
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/include/usr/errl/errlentry.H | 37 | ||||
-rw-r--r-- | src/include/usr/errl/errlmanager.H | 12 | ||||
-rw-r--r-- | src/include/usr/ipmi/ipmisel.H | 23 | ||||
-rw-r--r-- | src/usr/errl/errlentry.C | 9 | ||||
-rw-r--r-- | src/usr/errl/errlmanager_common.C | 43 | ||||
-rw-r--r-- | src/usr/ipmi/ipmisel.C | 44 | ||||
-rw-r--r-- | src/usr/targeting/common/xmltohb/attribute_types_openpower.xml | 12 | ||||
-rw-r--r-- | src/usr/targeting/common/xmltohb/target_types_openpower.xml | 1 | ||||
-rw-r--r-- | src/usr/util/runtime/rt_cmds.C | 57 |
9 files changed, 204 insertions, 34 deletions
diff --git a/src/include/usr/errl/errlentry.H b/src/include/usr/errl/errlentry.H index 555bcb4c1..4159d867d 100644 --- a/src/include/usr/errl/errlentry.H +++ b/src/include/usr/errl/errlentry.H @@ -612,6 +612,24 @@ public: */ std::vector<void*> getUDSections(compId_t i_compId, uint8_t i_subSect); + /** + * @brief set iv_eselCallhomeInfoEvent + * + * When true, send this error as a special callhome + * type of eSEL to the BMC + * + * @return nothing + */ + void setEselCallhomeInfoEvent(bool i_valid); + + /** + * @brief get iv_eselCallhomeInfoEvent + * + * @return true if this log should result in a callhome event type eSEL, + * false otherwise + */ + bool getEselCallhomeInfoEvent(); + private: /** @@ -788,6 +806,7 @@ private: */ bool getSkipShowingLog(); + /** * @brief Sets internal flag to indicate if this log should be * saved to PNOR and sent to the BMC @@ -838,6 +857,9 @@ private: //BMC, or displayed in the console bool iv_skipShowingLog; + // when true, send this special type of eSEL to the BMC + // This is used to send OCC informational errors up to BMC + bool iv_eselCallhomeInfoEvent; }; @@ -984,6 +1006,21 @@ inline bool ErrlEntry::getSkipShowingLog() return iv_skipShowingLog; } +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +inline void ErrlEntry::setEselCallhomeInfoEvent(bool i_valid) +{ + iv_eselCallhomeInfoEvent = i_valid; + return; +} + +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +inline bool ErrlEntry::getEselCallhomeInfoEvent() +{ + return iv_eselCallhomeInfoEvent; +} + } // End namespace diff --git a/src/include/usr/errl/errlmanager.H b/src/include/usr/errl/errlmanager.H index 174b55725..93165a821 100644 --- a/src/include/usr/errl/errlmanager.H +++ b/src/include/usr/errl/errlmanager.H @@ -607,6 +607,18 @@ private: * @param[in/out] io_err - errorlog that's being checked and updatd */ void setErrlSkipFlag(errlHndl_t io_err); + +#ifdef CONFIG_BMC_IPMI + /** + * @brief Helper function to grab the value of + * ATTR_ALLOW_CALLHOME_ESELS_TO_BMC and + * return it as a boolean value + * + * @return true if these ESELs are allowed, false otherwise + */ + bool allowCallHomeEselsToBmc(void); +#endif + }; diff --git a/src/include/usr/ipmi/ipmisel.H b/src/include/usr/ipmi/ipmisel.H index caf62f6e4..961f56f5f 100644 --- a/src/include/usr/ipmi/ipmisel.H +++ b/src/include/usr/ipmi/ipmisel.H @@ -76,15 +76,28 @@ namespace IPMISEL * @param[in] size of eSEL data * @param[in] eid of errorlog for this eSEL (for ack) * @param[in] callout list,which has sel event details + * @param[in] is eSEL for informational call home error */ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize, - uint32_t i_eid,std::vector<sel_info_t*>&i_calloutList); + uint32_t i_eid,std::vector<sel_info_t*>&i_calloutList, + bool i_infoCallHome); // per IPMI Spec, section 32.1 SEL Event Records enum sel_record_type { record_type_system_event = 0x02, + + // Used to send callhome informational eSEL to BMC + // currently used to send OCC telemetry information to the BMC + record_type_oem_call_home_info_event = 0xDD, + + // This is a procedure callout + // byte 0 = procedure ID + // bytes 4,5 = record ID of associated eSEL record_type_oem_sel_for_procedure_callout = 0xDE, + + // Normal flattened PEL, often just called the eSEL + // bytes 4-6 = 040020 record_type_ami_esel = 0xDF, }; @@ -301,17 +314,19 @@ namespace IPMISEL * @brief parse the msg and call send_esel to send the esel (handles if * the SEL reservation is lost) * @param[in] i_msg + * @param[in] i_infoCallHome - informational call-home log */ - void process_esel(msg_t *i_msg); + void process_esel(msg_t *i_msg, bool i_infoCallHome); /** * @brief do the actual ipmi calls to send the esel data to the bmc * @param[in] i_data esel data * @param[in] o_err any error generated during the send * @param[in] o_cc ipmi completion code from last sendrecv + * @param[in] i_infoCallHome - informational call-home log */ - void send_esel(IPMISEL::eselInitData * i_data, - errlHndl_t &o_err, IPMI::completion_code &o_cc); + void send_esel(IPMISEL::eselInitData * i_data, errlHndl_t &o_err, + IPMI::completion_code &o_cc, bool i_infoCallHome); /** * @brief read the SEL time diff --git a/src/usr/errl/errlentry.C b/src/usr/errl/errlentry.C index 1ebbb2671..bfeadb9b8 100644 --- a/src/usr/errl/errlentry.C +++ b/src/usr/errl/errlentry.C @@ -87,7 +87,8 @@ ErrlEntry::ErrlEntry(const errlSeverity_t i_sev, iv_Src( SRC_ERR_INFO, i_modId, i_reasonCode, i_user1, i_user2 ), iv_termState(TERM_STATE_UNKNOWN), iv_sevFinal(false), - iv_skipShowingLog(true) + iv_skipShowingLog(true), + iv_eselCallhomeInfoEvent(false) { #ifdef CONFIG_ERRL_ENTRY_TRACE TRACFCOMP( g_trac_errl, ERR_MRK"Error created : PLID=%.8X, RC=%.4X, Mod=%.2X, Userdata=%.16X %.16X", plid(), i_reasonCode, i_modId, i_user1, i_user2 ); @@ -780,7 +781,11 @@ void ErrlEntry::commit( compId_t i_committerComponent ) // User header contains the component ID of the committer. iv_User.setComponentId( i_committerComponent ); - setSubSystemIdBasedOnCallouts(); + // Avoid adding a callout to informational callhome "error" + if (!getEselCallhomeInfoEvent()) + { + setSubSystemIdBasedOnCallouts(); + } // Add the captured backtrace to the error log if (iv_pBackTrace) diff --git a/src/usr/errl/errlmanager_common.C b/src/usr/errl/errlmanager_common.C index 8bd058fac..abcde7883 100644 --- a/src/usr/errl/errlmanager_common.C +++ b/src/usr/errl/errlmanager_common.C @@ -530,6 +530,24 @@ inline bool SensorModifier::modifySensor(uint8_t i_sensorType, return l_retval; } +// Retrieve if informational/call-home eSELs are allowed to the BMC +bool ErrlManager::allowCallHomeEselsToBmc(void) +{ + bool l_allowed = false; + uint8_t flag = 0; + TARGETING::Target* sys = nullptr; + TARGETING::targetService().getTopLevelTarget(sys); + if (sys) + { + flag = sys->getAttr<TARGETING::ATTR_ALLOW_CALLHOME_ESELS_TO_BMC>(); + } + if (flag) + { + l_allowed = true; + } + + return l_allowed; +} /////////////////////////////////////////////////////////////////////////////// // ErrlManager::sendErrLogToBmc() @@ -540,12 +558,24 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels) ENTER_MRK "sendErrLogToBmc errlogId 0x%.8x, i_sendSels %d", io_err->eid(), i_sendSels); + + bool l_send_eSel_only = !i_sendSels; // don't send callout sensor SEL + bool l_callhome_type = false; // Is this a callhome type eSEL? + if (io_err->getEselCallhomeInfoEvent() && allowCallHomeEselsToBmc()) + { + TRACFCOMP( g_trac_errl, INFO_MRK + "sendErrLogToBmc: setting l_callhome_type" ); + l_callhome_type = true; + l_send_eSel_only = true; // just send eSEL without any callout SELs + } + do { + // keep track of procedure callouts that modify hardware callouts SensorModifier l_modifier; // Decide whether we want to skip the error log - if( io_err->getSkipShowingLog() ) + if( io_err->getSkipShowingLog() && !l_callhome_type ) { TRACFCOMP( g_trac_errl, INFO_MRK "sendErrLogToBmc: %.8X is INFORMATIONAL/RECOVERED; skipping", @@ -558,7 +588,7 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels) std::vector< HWAS::callout_ud_t* > l_callouts; HWAS::callout_ud_t l_calloutToAdd; // used for EIBUS error HWAS::callOutPriority l_priority = HWAS::SRCI_PRIORITY_NONE; - if (i_sendSels) + if (!l_send_eSel_only) { bool l_busCalloutEncountered = false; // flag bus callout @@ -663,7 +693,7 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels) // bool default constructor initializes to false as per C++ standard std::map<uint8_t, bool> l_sensorNumberEncountered; - if (i_sendSels) + if (!l_send_eSel_only) { l_selEventList.clear(); std::vector<HWAS::callout_ud_t*>::const_iterator i; @@ -775,7 +805,8 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels) { IPMISEL::sendESEL(l_pelData, l_pelSize, io_err->eid(), - l_selEventList); + l_selEventList, + l_callhome_type); TRACFCOMP(g_trac_errl, INFO_MRK "sendErrLogToBmc callout size %d", l_selEventList.size()); @@ -799,8 +830,8 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels) l_selEventList.push_back(l_selEvent); - IPMISEL::sendESEL(l_pelData, l_pelSize, - io_err->eid(), l_selEventList); + IPMISEL::sendESEL(l_pelData, l_pelSize, io_err->eid(), + l_selEventList, l_callhome_type); } // free the buffer diff --git a/src/usr/ipmi/ipmisel.C b/src/usr/ipmi/ipmisel.C index de7feec18..9bfb802ab 100644 --- a/src/usr/ipmi/ipmisel.C +++ b/src/usr/ipmi/ipmisel.C @@ -89,7 +89,8 @@ enum esel_retry namespace IPMISEL { void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize, - uint32_t i_eid, std::vector<sel_info_t*>&i_selEventList) + uint32_t i_eid, std::vector<sel_info_t*>&i_selEventList, + bool i_infoCallHome) { IPMI_TRAC(ENTER_MRK "sendESEL() %d",i_selEventList.size()); @@ -103,13 +104,13 @@ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize, #endif msg->type = MSG_SEND_ESEL; msg->data[0] = i_eid; - eselInitData *eselData = + eselInitData *eselData = new eselInitData(i_selEventList, i_eselData, i_dataSize); msg->extra_data = eselData; #ifdef __HOSTBOOT_RUNTIME - process_esel(msg); + process_esel(msg, i_infoCallHome); #else // one message queue to the SEL thread static msg_q_t mq = Singleton<IpmiSEL>::instance().msgQueue(); @@ -129,7 +130,7 @@ void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize, /* * @brief process esel msg */ -void process_esel(msg_t *i_msg) +void process_esel(msg_t *i_msg, bool i_infoCallHome) { errlHndl_t l_err = NULL; IPMI::completion_code l_cc = IPMI::CC_UNKBAD; @@ -144,7 +145,7 @@ void process_esel(msg_t *i_msg) { IPMI_TRAC(ENTER_MRK"sel list size %d", l_data->selInfoList.size()); std::vector<sel_info_t*>::iterator it; - for (it = l_data->selInfoList.begin(); it != l_data->selInfoList.end(); + for (it = l_data->selInfoList.begin(); it != l_data->selInfoList.end(); ++it) { sel_info_t *l_sel = *it; @@ -153,12 +154,12 @@ void process_esel(msg_t *i_msg) l_data->selEvent = true; //If sensor type is sys event then need to send the oem sel - //to handle procedure callout + //to handle procedure callout if (l_sel->sensorType == TARGETING::SENSOR_TYPE_SYS_EVENT) { //oem sel data l_data->selEvent = false; - l_oemSel.record_type = + l_oemSel.record_type = record_type_oem_sel_for_procedure_callout; l_oemSel.event_data1 = l_sel->eventOffset; l_sel->eventOffset = SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE; @@ -174,13 +175,13 @@ void process_esel(msg_t *i_msg) l_eSel.event_dir_type = l_sel->eventDirType; l_eSel.event_data1 = l_sel->eventOffset; memcpy(l_data->eSel,&l_eSel,sizeof(selRecord)); - + uint32_t l_send_count = MAX_SEND_COUNT; while (l_send_count > 0) { - // try to send the eles to the bmc - send_esel(l_data, l_err, l_cc); + // try to send the esel to the bmc + send_esel(l_data, l_err, l_cc, i_infoCallHome); // if no error but last completion code was: if ((l_err == NULL) && @@ -244,7 +245,8 @@ void process_esel(msg_t *i_msg) * @brief Send esel data to bmc */ void send_esel(eselInitData * i_data, - errlHndl_t &o_err, IPMI::completion_code &o_cc) + errlHndl_t &o_err, IPMI::completion_code &o_cc, + bool i_infoCallHome) { IPMI_TRAC(ENTER_MRK "send_esel"); uint8_t* data = NULL; @@ -305,8 +307,18 @@ void send_esel(eselInitData * i_data, memcpy(&data[PARTIAL_ADD_ESEL_REQ], i_data->eSel, sizeof(selRecord)); // update to make this what AMI eSEL wants - data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,record_type)] = record_type_ami_esel; - data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,event_data1)] = event_data1_ami; + if (i_infoCallHome) + { + data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,record_type)] = + record_type_oem_call_home_info_event; + } + else + { + data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,record_type)] = + record_type_ami_esel; + } + data[PARTIAL_ADD_ESEL_REQ + offsetof(selRecord,event_data1)] = + event_data1_ami; o_cc = IPMI::CC_UNKBAD; TRACFBIN( g_trac_ipmi, INFO_MRK"1st partial_add_esel:", data, len); @@ -394,8 +406,8 @@ void send_esel(eselInitData * i_data, } }while(0); - // if eSEL wasn't created due to an error, we don't want to continue - if ((o_err == NULL) && (o_cc == IPMI::CC_OK)) + // if eSEL wasn't created due to an error or callhome, we don't want to continue + if ((o_err == NULL) && (o_cc == IPMI::CC_OK) && (!i_infoCallHome)) { // caller wants us to NOT create sensor SEL if ((i_data->eSel[offsetof(selRecord,sensor_type)] == SENSOR::INVALID_TYPE) && @@ -541,7 +553,7 @@ void IpmiSEL::execute(void) switch(msg_type) { case IPMISEL::MSG_SEND_ESEL: - IPMISEL::process_esel(msg); + IPMISEL::process_esel(msg, false); //done with msg msg_free(msg); break; diff --git a/src/usr/targeting/common/xmltohb/attribute_types_openpower.xml b/src/usr/targeting/common/xmltohb/attribute_types_openpower.xml index 2904f6c1b..6f7bc56bd 100644 --- a/src/usr/targeting/common/xmltohb/attribute_types_openpower.xml +++ b/src/usr/targeting/common/xmltohb/attribute_types_openpower.xml @@ -1367,5 +1367,17 @@ ID for the sensor number returned with the elog. --> <writeable/> </attribute> +<attribute> + <id>ALLOW_CALLHOME_ESELS_TO_BMC</id> + <description>Flag used to allow sending informational call-home errors as ESELS to the BMC</description> + <simpleType> + <uint8_t> + <default>0</default> + </uint8_t> + </simpleType> + <persistency>non-volatile</persistency> + <readable/> +</attribute> + </attributes> diff --git a/src/usr/targeting/common/xmltohb/target_types_openpower.xml b/src/usr/targeting/common/xmltohb/target_types_openpower.xml index f470604f5..602eb2cc3 100644 --- a/src/usr/targeting/common/xmltohb/target_types_openpower.xml +++ b/src/usr/targeting/common/xmltohb/target_types_openpower.xml @@ -99,6 +99,7 @@ <attribute><id>CALCULATED_PROC_MEMORY_POWER_DROP</id></attribute> <attribute><id>PROC_SOCKET_POWER_WATTS</id></attribute> <attribute><id>PROC_MHZ_PER_WATT</id></attribute> + <attribute><id>ALLOW_CALLHOME_ESELS_TO_BMC</id></attribute> </targetTypeExtension> <targetTypeExtension> diff --git a/src/usr/util/runtime/rt_cmds.C b/src/usr/util/runtime/rt_cmds.C index beddbb5d4..ed7f5a8b8 100644 --- a/src/usr/util/runtime/rt_cmds.C +++ b/src/usr/util/runtime/rt_cmds.C @@ -37,6 +37,7 @@ #include <devicefw/driverif.H> #include <util/util_reasoncodes.H> #include <errl/errlmanager.H> +#include <errl/errlreasoncodes.H> #include <vector> namespace Util @@ -633,11 +634,12 @@ void cmd_putscom( char*& o_output, void cmd_errorlog( char*& o_output, uint64_t i_word1, uint64_t i_word2, - uint32_t i_callout ) + uint32_t i_callout, + uint32_t i_ffdcLength ) { - UTIL_FT( "cmd_errorlog> word1=%.8X%.8X, word2=%.8X%.8X, i_callout=%.8X", + UTIL_FT( "cmd_errorlog> word1=%.8X%.8X, word2=%.8X%.8X, i_callout=%.8X ffdcLength=%ld", (uint32_t)(i_word1>>32), (uint32_t)i_word1, - (uint32_t)(i_word2>>32), (uint32_t)i_word2, i_callout ); + (uint32_t)(i_word2>>32), (uint32_t)i_word2, i_callout, i_ffdcLength ); o_output = new char[100]; errlHndl_t l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, @@ -654,7 +656,44 @@ void cmd_errorlog( char*& o_output, HWAS::NO_DECONFIG, HWAS::GARD_NULL ); } + + if (i_ffdcLength > 0) + { + uint8_t data[256]; + + uint8_t l_count = 0; + uint16_t l_ffdc_length = 256; // break into 256 byte additions + do { + if (i_ffdcLength > l_ffdc_length) + { + i_ffdcLength -= l_ffdc_length; + } + else + { + l_ffdc_length = i_ffdcLength; + i_ffdcLength = 0; + } + memset(data, l_count, l_ffdc_length); + + l_err->addFFDC(UTIL_COMP_ID, + &data, + l_ffdc_length, + 0, // Version + ERRORLOG::ERRL_UDT_NOFORMAT, // parser ignores data + false ); // merge + l_count++; + } while (i_ffdcLength > 0); + + if (i_word1 == 1) + { + // mark error as dd type + l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); + l_err->setEselCallhomeInfoEvent(true); + } + } + l_err->collectTrace("UTIL", 1024); + uint32_t l_plid = l_err->plid(); errlCommit(l_err, UTIL_COMP_ID); sprintf( o_output, "Committed plid 0x%.8X", l_plid ); @@ -871,17 +910,23 @@ int hbrtCommand( int argc, else if( !strcmp( argv[0], "errorlog" ) ) { // errorlog <word1> <word2> <huid to callout> - if( (argc == 3) || (argc == 4) ) + if( (argc == 3) || (argc == 4) || (argc == 5) ) { uint32_t l_huid = 0; + uint32_t l_ffdcLength = 0; if( argc == 4 ) { l_huid = strtou64( argv[3], NULL, 16 ); } + if (argc == 5) + { + l_ffdcLength = strtou64( argv[4], NULL, 16 ); + } cmd_errorlog( *l_output, strtou64( argv[1], NULL, 16 ), strtou64( argv[2], NULL, 16 ), - l_huid ); + l_huid, + l_ffdcLength ); } else { @@ -918,7 +963,7 @@ int hbrtCommand( int argc, strcat( *l_output, l_tmpstr ); sprintf( l_tmpstr, "putscom <huid> <address> <data>\n" ); strcat( *l_output, l_tmpstr ); - sprintf( l_tmpstr, "errorlog <word1> <word2> [<huid to callout>]\n" ); + sprintf( l_tmpstr, "errorlog <word1> <word2> [<huid to callout>] [size]\n" ); strcat( *l_output, l_tmpstr ); sprintf( l_tmpstr, "sbemsg <chipid>\n" ); strcat( *l_output, l_tmpstr ); |