diff options
-rw-r--r-- | src/include/usr/errl/errlentry.H | 22 | ||||
-rw-r--r-- | src/include/usr/errl/errlmanager.H | 169 | ||||
-rw-r--r-- | src/include/usr/errl/errlprvt.H | 5 | ||||
-rw-r--r-- | src/include/usr/errl/errlsrc.H | 11 | ||||
-rw-r--r-- | src/usr/errl/errlentry.C | 205 | ||||
-rw-r--r-- | src/usr/errl/errlmanager.C | 816 | ||||
-rw-r--r-- | src/usr/errl/errlsrc.C | 13 | ||||
-rw-r--r-- | src/usr/errl/test/errluserdetailtest.H | 13 | ||||
-rw-r--r-- | src/usr/mbox/mailboxsp.C | 4 | ||||
-rw-r--r-- | src/usr/pnor/pnorrp.C | 3 | ||||
-rw-r--r-- | src/usr/targeting/targetservicestart.C | 4 |
11 files changed, 986 insertions, 279 deletions
diff --git a/src/include/usr/errl/errlentry.H b/src/include/usr/errl/errlentry.H index 6e67bf55c..d00c9c54a 100644 --- a/src/include/usr/errl/errlentry.H +++ b/src/include/usr/errl/errlentry.H @@ -604,18 +604,32 @@ private: /** + * @brief sort compare function, to be used when flattening an + * error log + * + * @param[in] i_ud1 - first user-defined section to compare + * @param[in] i_ud2 - second user-defined section to compare + * + * @return true if i_ud1 should be first, false otherwise + */ + static bool udSort(ErrlUD *i_ud1, ErrlUD *i_ud2); + + /** * @brief Flatten the data to caller's buffer. The resulting flat * data will be in PEL (platform event log) format. * The ErrlManager is the primary user of this call. * * @param[in,out] o_buffer Points to data block to be filled - * @param[in] i_cbBuffer Count of bytes in buffer supplied + * @param[in] i_cbBuffer Count of bytes in buffer supplied + * @param[in] i_truncate true: truncate the data + * false: error if buffer too small + * NOTE: PH, UH and PS MUST fit * * @return Count of bytes copied to caller's - * buffer or else zero if it does not fit. + * buffer or else zero if it does not fit and i_truncate==false. */ - uint64_t flatten( void * o_buffer, const uint64_t i_cbBuffer ); - + uint64_t flatten( void * o_buffer, const uint64_t i_cbBuffer, + const bool i_truncate = false); /** * @brief Called by commit(), this fucntion creates a diff --git a/src/include/usr/errl/errlmanager.H b/src/include/usr/errl/errlmanager.H index ef51fc659..2d3a21edd 100644 --- a/src/include/usr/errl/errlmanager.H +++ b/src/include/usr/errl/errlmanager.H @@ -66,6 +66,16 @@ namespace ERRORLOG */ void errlCommit(errlHndl_t& io_err, compId_t i_committerComp ); +/** + * @brief Global enums used by static errlResourceReady function + */ +enum errlManagerNeeds +{ + PNOR, + TARG, + MBOX, +} ; + /*****************************************************************************/ // Forward class declarations @@ -125,6 +135,25 @@ public: */ static void setHwasProcessCalloutFn(HWAS::processCalloutFn i_fn); + /** + * @brief Sends msg to errlmanager telling what resources are ready + * + * This is called by resources that the ErrlManager needs, which start up + * AFTER ErrlManager starts. Currently, that's PNOR MBOX and TARGeting. + * + * It is a static function because a module cannot call an interface on a + * singleton in another module + */ + static void errlResourceReady(errlManagerNeeds i_needs); + + /** + * @brief Sends msg to errlmanager telling what resources are ready + * + * This is called by resources that the ErrlManager needs, which start up + * AFTER ErrlManager starts. Currently, that's PNOR MBOX and TARGeting. + * + */ + void sendResourcesMsg(errlManagerNeeds i_needs); /** * @brief Returns the HWAS ProcessCallout function pointer @@ -169,10 +198,13 @@ private: */ enum ERRLOG_MSG_TYPE { - ERRLOG_NEEDS_TO_BE_COMMITTED_TYPE = 0x00000030 | MBOX::FIRST_SECURE_MSG, - ERRLOG_SEND_TO_FSP_TYPE = 0x00000031 | MBOX::FIRST_SECURE_MSG, + ERRLOG_NEEDS_TO_BE_COMMITTED_TYPE = 0x00000030 | MBOX::FIRST_SECURE_MSG, + ERRLOG_SEND_TO_FSP_TYPE = 0x00000031 | MBOX::FIRST_SECURE_MSG, ERRLOG_COMMITTED_ACK_RESPONSE_TYPE = 0x00000032 | MBOX::FIRST_UNSECURE_MSG, - ERRLOG_SHUTDOWN = 0x00000033 | MBOX::FIRST_SECURE_MSG, + ERRLOG_SHUTDOWN_TYPE = 0x00000033 | MBOX::FIRST_SECURE_MSG, + ERRLOG_ACCESS_PNOR_TYPE = 0x00000034 | MBOX::FIRST_SECURE_MSG, + ERRLOG_ACCESS_MBOX_TYPE = 0x00000035 | MBOX::FIRST_SECURE_MSG, + ERRLOG_ACCESS_TARG_TYPE = 0x00000036 | MBOX::FIRST_SECURE_MSG, }; /** @@ -187,8 +219,10 @@ private: enum ERRORLOG_PLID_OFFSET { ERRLOG_PLID_BASE = 0x90000000,// Hostboot Base PLID Offset + ERRLOG_PLID_BASE_MASK = 0x9F000000,// mask of just the id ERRLOG_PLID_NODE_SHIFT = 24, // shift to put node number 0x9# ERRLOG_PLID_MASK = 0x00FFFFFF,// mask to find log number + ERRLOG_PLID_INITIAL = 0x00FF0000,// initial big id number }; /** @@ -198,19 +232,28 @@ private: ErrlManager& operator=(const ErrlManager& i_right); /** - * @brief Create and register the error log message queue + * @brief Access PNOR and get the address and size of the HBEL section in + * PNOR; sets the iv_pnorAddr, iv_maxErrlInPnor, iv_pnorOpenSlot variables; + * parsers the error logs in PNOR and determines the new iv_currLogId. * * @param[in/out] NONE * @return NONE. */ - void msgQueueInit ( void ); + void setupPnorInfo(); + /** + * @brief Create and register the error log message queue + * + * @param[in/out] NONE + * @return NONE. + */ + void msgQueueInit (); /** * @brief Performs startup of the error log processing thread. * - * @param[in/out] - * @return NONE + * @param[in/out] + * @return NONE */ static void * startup ( void* i_self ); @@ -219,19 +262,19 @@ private: * and send it to FSP. * * @param[in/out] NONE - * @return NONE + * @return NONE * */ - void errlogMsgHndlr ( void ); + void errlogMsgHndlr (); /** - * @brief Send Host boot error log to error message queue for committing. + * @brief Send Host boot error log to error message queue for committing. * * @param[in,out] io_err Error log handle to be committed * @param[in] i_committerComp Component id that committed the error * - * @return NONE - * + * @return NONE + * */ void sendErrlogToMessageQueue ( errlHndl_t& io_err, compId_t i_committerComp ); @@ -239,6 +282,16 @@ private: * @brief Create a mailbox message with the error log and send it to Fsp. * * @param[in,out] io_err Error log handle to be committed + * @return msg_t pointer - NULL if msg sent, allocated msg if + * couldn't send + * + */ + msg_t *sendErrLogToMbox ( errlHndl_t& io_err ); + + /** + * @brief Create a mailbox message with the error log and send it to Fsp. + * + * @param[in,out] io_err Error log handle to be committed * @return NONE * */ @@ -247,20 +300,20 @@ private: /** * @brief Save errlog entry in the memory * - * @param[in,out] io_err Error log handle to be committed - * @return NULL + * @param[in,out] io_err Error log handle to be committed + * @return NULL * */ void saveErrLogEntry( errlHndl_t& io_err ); /** - * @brief Shutdown error log manager + * @brief Shutdown error log manager * - * @param[in,out] None + * @param[in,out] None * @return NULL * */ - void errlogShutdown( void ); + void errlogShutdown(); /** * @brief Current log ID. As new error logs are created, @@ -287,6 +340,88 @@ private: */ msg_q_t iv_msgQ; + // functions relating to writing the errorlogs to PNOR + + /** + * @brief flatten and save the error log into PNOR + * + * @param[in] error log handle + * @return true if saved in PNOR, false if not + * + */ + bool saveErrLogToPnor( errlHndl_t& io_err); + + /** + * @brief find the flattened error log in PNOR and set it's ACK bit + * + * @param[in] i_errEid EID of the error log to look for + * @return NONE + * + */ + void ackErrLogInPnor( uint32_t i_errEid ); + + /** + * @brief check the state of the PNOR 'slot' + * + * @param[in] i_position - index into PNOR to check + * @return true if slot is empty + * (first 32bits == EMPTY_ERRLOG_IN_PNOR) + * + */ + bool isSlotEmpty(uint32_t i_position); + + /** + * @brief check the state of the PNOR 'slot' + * ONLY to be called on a slot that is NOT empty + * + * @param[in] i_position - index into PNOR to check + * @return true if errlog in this slot has been ACKed + * (ACK bit in word5 is 0x0) + * + */ + bool isSlotACKed(uint32_t i_position); + + /** + * @brief increment the iv_PnorOpenSlot variable to the next + * available slot, handling wrap and checking that the + * slot is either EMPTY or has an ACKed error log. + * + * @return true if there was an open slot, false if PNOR is full + * + */ + bool incrementPnorOpenSlot(); + + /** + * @brief read the Eid from the flattened error log in PNOR; + * MUST point to a non-empty slot + * + * @param[in] i_position - index into PNOR to check + * @return eid for that error log + * + */ + uint32_t readEidFromFlattened(uint32_t i_position); + + /** + * @brief set the ACK bit in the flattened error log in PNOR; + * MUST point to a non-empty slot + * + * @param[in] i_position - index into PNOR to check + * @return NONE + * + */ + void setACKInFlattened(uint32_t i_position); + + /** + * @brief errorlog-into-PNOR variables + */ + char *iv_pnorAddr; // HBEL section in PNOR + uint32_t iv_maxErrlInPnor; // max number of errorlogs that will fit + uint32_t iv_pnorOpenSlot; // current open slot available for an errorlog + std::list<errlHndl_t> iv_errlToSave; // errlogs still to be saved to PNOR + + bool iv_isSpBaseServices; // do we need to send to FSP + bool iv_isMboxEnabled; // are we able to send to FSP + std::list<msg_t *> iv_errlToSend; // msgs still to be sent to FSP }; } // End namespace diff --git a/src/include/usr/errl/errlprvt.H b/src/include/usr/errl/errlprvt.H index fbdf82588..1f4e1e401 100644 --- a/src/include/usr/errl/errlprvt.H +++ b/src/include/usr/errl/errlprvt.H @@ -53,7 +53,6 @@ private: SLEN = 40, // section length w/o sizeof(ErrlSctnHdr) SST = 0, // section type VER = 1 // section version - // CSS_VER = 8 // TODO unused, relates to iv_cssver[] }; /** @@ -119,10 +118,6 @@ private: uint8_t iv_sctns; // count of sections uint32_t iv_plid; // platform log id uint32_t iv_eid; // Error Log ID - - // uint8_t iv_cssver[CSS_VER]; // TODO unused now, do we need this? - - }; diff --git a/src/include/usr/errl/errlsrc.H b/src/include/usr/errl/errlsrc.H index 89fa9cbef..b6a98b519 100644 --- a/src/include/usr/errl/errlsrc.H +++ b/src/include/usr/errl/errlsrc.H @@ -46,6 +46,7 @@ class ErrlSrc : public ErrlSctn // you would expect to be part of ErrlEntry are actually instance data // in this class. friend class ErrlEntry; + friend class ErrlManager; private: @@ -57,8 +58,10 @@ private: SST = 1, // section type VER = 1, // section version SRCVER = 2, // SRC version (not section version) - WORDCOUNT = 9 // SRC word count - // CSS_VER = 8 // TODO unused, relates to iv_cssver[] + WORDCOUNT = 9, // SRC word count + DECONFIG_BIT = 0x02000000, // deconfig bit (6) in word 5 + GARD_BIT = 0x01000000, // gard bit (7) in word 5 + ACK_BIT = 0x00200000, // ack bit (10) in word 5 }; @@ -145,7 +148,7 @@ private: * @note return value is only valid if input is valid hex digit * '0'-'9', 'A'-'F', 'a'='f' */ - uint64_t aschex2bin(char c); + uint64_t aschex2bin(char c) const; // Instance data srcType_t iv_srcType : 8; // SRC type, the ?? in SRC ??xxxxxx @@ -156,8 +159,6 @@ private: uint64_t iv_user2; // user data 2 bool iv_deconfig; // true if there is a deconfigure callout bool iv_gard; // true if there is a gard callout - - }; diff --git a/src/usr/errl/errlentry.C b/src/usr/errl/errlentry.C index c60a0af61..d0e87f846 100644 --- a/src/usr/errl/errlentry.C +++ b/src/usr/errl/errlentry.C @@ -132,7 +132,7 @@ ErrlEntry::ErrlEntry(const errlSeverity_t i_sev, ErrlEntry::~ErrlEntry() { // Free memory of all sections - for (std::vector<ErrlUD*>::iterator l_itr = iv_SectionVector.begin(); + for (std::vector<ErrlUD*>::const_iterator l_itr = iv_SectionVector.begin(); l_itr != iv_SectionVector.end(); ++l_itr) { delete (*l_itr); @@ -500,7 +500,7 @@ void ErrlEntry::setSubSystemIdBasedOnCallouts() HWAS::callout_ud_t * highestPriorityCallout = NULL; // look thru the errlog for any Callout UserDetail sections - for( std::vector<ErrlUD*>::iterator it = iv_SectionVector.begin(); + for( std::vector<ErrlUD*>::const_iterator it = iv_SectionVector.begin(); it != iv_SectionVector.end(); it++ ) { @@ -530,7 +530,7 @@ void ErrlEntry::setSubSystemIdBasedOnCallouts() { // no callouts in log, add default callout for hb code and // add trace - TRACFCOMP(g_trac_errl, "WRN>> No callouts in elog 0x%.8X", eid()); + TRACFCOMP(g_trac_errl, "WRN>> No callouts in elog %.8X", eid()); TRACFCOMP(g_trac_errl, "Adding default callout EPUB_PRC_HB_CODE "); addProcedureCallout( HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW); @@ -734,7 +734,7 @@ void ErrlEntry::processCallout() if (pFn != NULL) { // look thru the errlog for any Callout UserDetail sections - for(std::vector<ErrlUD*>::iterator it = iv_SectionVector.begin(); + for(std::vector<ErrlUD*>::const_iterator it = iv_SectionVector.begin(); it != iv_SectionVector.end(); it++ ) { @@ -772,7 +772,7 @@ void ErrlEntry::deferredDeconfigure() { //check for defered deconfigure callouts // look thru the errlog for any Callout UserDetail sections - for(std::vector<ErrlUD*>::iterator it = iv_SectionVector.begin(); + for(std::vector<ErrlUD*>::const_iterator it = iv_SectionVector.begin(); it != iv_SectionVector.end(); it++ ) { @@ -810,7 +810,7 @@ uint64_t ErrlEntry::flattenedSize() // plus the sizes of the other optional sections - std::vector<ErrlUD*>::iterator it; + std::vector<ErrlUD*>::const_iterator it; for( it = iv_SectionVector.begin(); it != iv_SectionVector.end(); it++ ) { l_bytecount += (*it)->flatSize(); @@ -824,29 +824,35 @@ uint64_t ErrlEntry::flattenedSize() // for use by ErrlManager. Return how many bytes flattened to the output // buffer, or else zero on error. -uint64_t ErrlEntry::flatten( void * o_pBuffer, uint64_t i_bufsize ) +uint64_t ErrlEntry::flatten( void * o_pBuffer, + const uint64_t i_bufsize, + const bool i_truncate) { - uint64_t l_flatCount = 0; - uint64_t l_cb = 0; + uint64_t l_flatSize = 0; + uint64_t l_cb = 0; + uint64_t l_sizeRemaining = i_bufsize; + + // The CPPASSERT() macro will cause the compile to abend + // when the expression given evaluates to false. If ever + // these cause the compile to fail, then perhaps the size + // of enum'ed types has grown unexpectedly. + CPPASSERT( 2 == sizeof(iv_Src.iv_reasonCode)); + CPPASSERT( 2 == sizeof(compId_t)); + CPPASSERT( 1 == sizeof(iv_Src.iv_modId)); do { - l_flatCount = flattenedSize(); - if ( i_bufsize < l_flatCount ) + // check if the input buffer needs to be and is big enough + l_flatSize = flattenedSize(); + if (( l_sizeRemaining < l_flatSize ) && (!i_truncate)) { - // buffer is not big enough; return zero - TRACFCOMP( g_trac_errl, ERR_MRK"Invalid buffer size"); - l_flatCount = 0; + TRACFCOMP( g_trac_errl, + ERR_MRK"Buffer (%d) < flatSize (%d), aborting flatten", + l_sizeRemaining, l_flatSize); + l_flatSize = 0; // return zero break; } - // The CPPASSERT() macro will cause the compile to abend - // when the expression given evaluates to false. If ever - // these cause the compile to fail, then perhaps the size - // of enum'ed types has grown unexpectedly. - CPPASSERT( 2 == sizeof(iv_Src.iv_reasonCode)); - CPPASSERT( 2 == sizeof(compId_t)); - CPPASSERT( 1 == sizeof(iv_Src.iv_modId)); // Inform the private header how many sections there are, // counting the PH, UH, PS, and the optionals. @@ -854,41 +860,45 @@ uint64_t ErrlEntry::flatten( void * o_pBuffer, uint64_t i_bufsize ) // Flatten the PH private header section char * pBuffer = static_cast<char *>(o_pBuffer); - l_cb = iv_Private.flatten( pBuffer, i_bufsize ); + l_cb = iv_Private.flatten( pBuffer, l_sizeRemaining ); if( 0 == l_cb ) { - // Rare. TRACFCOMP( g_trac_errl, ERR_MRK"ph.flatten error"); - l_flatCount = 0; + l_flatSize = 0; + // don't check i_truncate - this section MUST fit. break; } + // save this location - if the number of sections that we flatten is + // reduced, we need to update this PH section. + char *pPHBuffer = pBuffer; + pBuffer += l_cb; - i_bufsize -= l_cb; + l_sizeRemaining -= l_cb; // flatten the UH user header section - l_cb = iv_User.flatten( pBuffer, i_bufsize ); + l_cb = iv_User.flatten( pBuffer, l_sizeRemaining ); if( 0 == l_cb ) { - // Rare. TRACFCOMP( g_trac_errl, ERR_MRK"uh.flatten error"); - l_flatCount = 0; + l_flatSize = 0; + // don't check i_truncate - this section MUST fit. break; } pBuffer += l_cb; - i_bufsize -= l_cb; + l_sizeRemaining -= l_cb; // flatten the PS primary SRC section - l_cb = iv_Src.flatten( pBuffer, i_bufsize ); + l_cb = iv_Src.flatten( pBuffer, l_sizeRemaining ); if( 0 == l_cb ) { - // Rare. TRACFCOMP( g_trac_errl, ERR_MRK"ps.flatten error"); - l_flatCount = 0; + l_flatSize = 0; + // don't check i_truncate - this section MUST fit. break; } pBuffer += l_cb; - i_bufsize -= l_cb; + l_sizeRemaining -= l_cb; // flatten the optional user-defined sections @@ -899,32 +909,48 @@ uint64_t ErrlEntry::flatten( void * o_pBuffer, uint64_t i_bufsize ) // any remaining user-defined sections. Therefore this order // preserves the callouts, and then gives priority to other // non-trace sections. - std::vector<ErrlUD*>::iterator it; - for(it = iv_SectionVector.begin(); it != iv_SectionVector.end(); it++) + // + // for saving errorlogs into PNOR, i_truncate will be set to true + // and sections which don't fit are not saved. + uint32_t l_sectionCount = iv_SectionVector.size(); + + std::vector<ErrlUD*>::const_iterator it; + for(it = iv_SectionVector.begin(); + (it != iv_SectionVector.end()) && (l_flatSize != 0); + it++) { // If UD section is a hardware callout. if( (ERRL_COMP_ID == (*it)->iv_header.iv_compId) && (ERRL_UDT_CALLOUT == (*it)->iv_header.iv_sst) ) { - l_cb = (*it)->flatten( pBuffer, i_bufsize ); + l_cb = (*it)->flatten( pBuffer, l_sizeRemaining ); if( 0 == l_cb ) { - // Rare. - TRACFCOMP( g_trac_errl, ERR_MRK"ud.flatten error"); - l_flatCount = 0; - break; + if (i_truncate) + { + // TODO: RTC 77560 - error if this happens during test + TRACFCOMP( g_trac_errl, + INFO_MRK"ud.flatten error, skipping"); + // won't fit - don't count it. + l_sectionCount--; + continue; + } + else + { + TRACFCOMP( g_trac_errl, + ERR_MRK"ud.flatten error, aborting"); + l_flatSize = 0; // return zero + break; + } } pBuffer += l_cb; - i_bufsize -= l_cb; + l_sizeRemaining -= l_cb; } - } + } // for - if( 0 == l_flatCount ) - { - break; - } - - for(it = iv_SectionVector.begin(); it != iv_SectionVector.end(); it++) + for(it = iv_SectionVector.begin(); + (it != iv_SectionVector.end()) && (l_flatSize != 0); + it++) { // If UD section is not a hardware callout and not a trace. if( !(((ERRL_COMP_ID == (*it)->iv_header.iv_compId) && @@ -932,53 +958,90 @@ uint64_t ErrlEntry::flatten( void * o_pBuffer, uint64_t i_bufsize ) ((FIPS_ERRL_COMP_ID == (*it)->iv_header.iv_compId) && (FIPS_ERRL_UDT_TRACE == (*it)->iv_header.iv_sst))) ) { - l_cb = (*it)->flatten( pBuffer, i_bufsize ); + l_cb = (*it)->flatten( pBuffer, l_sizeRemaining ); if( 0 == l_cb ) { - // Rare. - TRACFCOMP( g_trac_errl, ERR_MRK"ud.flatten error"); - l_flatCount = 0; - break; + if (i_truncate) + { + // TODO: RTC 77560 - error if this happens during test + TRACFCOMP( g_trac_errl, + INFO_MRK"ud.flatten error, skipping"); + // won't fit - don't count it. + l_sectionCount--; + continue; + } + else + { + TRACFCOMP( g_trac_errl, + ERR_MRK"ud.flatten error, aborting"); + l_flatSize = 0; // return zero + break; + } } pBuffer += l_cb; - i_bufsize -= l_cb; + l_sizeRemaining -= l_cb; } - } - - if( 0 == l_flatCount ) - { - break; - } + } // for - for(it = iv_SectionVector.begin(); it != iv_SectionVector.end(); it++) + for(it = iv_SectionVector.begin(); + (it != iv_SectionVector.end()) && (l_flatSize != 0); + it++) { // If UD section is a trace. if( (FIPS_ERRL_COMP_ID == (*it)->iv_header.iv_compId) && (FIPS_ERRL_UDT_TRACE == (*it)->iv_header.iv_sst) ) { - l_cb = (*it)->flatten( pBuffer, i_bufsize ); + l_cb = (*it)->flatten( pBuffer, l_sizeRemaining ); if( 0 == l_cb ) { - // Rare. - TRACFCOMP( g_trac_errl, ERR_MRK"ud.flatten error"); - l_flatCount = 0; - break; + if (i_truncate) + { + // TODO: RTC 77560 - error if this happens during test + TRACFCOMP( g_trac_errl, + INFO_MRK"ud.flatten error, skipping"); + // won't fit - don't count it. + l_sectionCount--; + continue; + } + else + { + TRACFCOMP( g_trac_errl, + ERR_MRK"ud.flatten error, aborting"); + l_flatSize = 0; // return zero + break; + } } pBuffer += l_cb; - i_bufsize -= l_cb; + l_sizeRemaining -= l_cb; } - } + } // for - if( 0 == l_flatCount ) + if( 0 == l_flatSize ) { break; } + if (l_sectionCount != iv_SectionVector.size()) + { + // some section was too big and didn't get flatten - update the + // section count in the PH section and re-flatten it. + iv_Private.iv_sctns = 3 + l_sectionCount; + l_cb = iv_Private.flatten( pPHBuffer, l_sizeRemaining ); + if( 0 == l_cb ) + { + TRACFCOMP( g_trac_errl, ERR_MRK"ph.flatten error"); + l_flatSize = 0; + // don't check i_truncate - this section MUST fit. + break; + } + } } while( 0 ); - return l_flatCount; -} + // if l_flatSize == 0, there was an error, return 0. + // else return actual size that we flattened into the buffer. + return (l_flatSize == 0) ? 0 : (i_bufsize - l_sizeRemaining); +} // flatten uint64_t ErrlEntry::unflatten( const void * i_buffer, uint64_t i_len ) diff --git a/src/usr/errl/errlmanager.C b/src/usr/errl/errlmanager.C index 269a5305f..48c188374 100644 --- a/src/usr/errl/errlmanager.C +++ b/src/usr/errl/errlmanager.C @@ -26,12 +26,15 @@ * @brief Implementation of ErrlManager class */ +#define STORE_ERRL_IN_L3 + /*****************************************************************************/ // I n c l u d e s /*****************************************************************************/ #include <errl/errlmanager.H> #include <trace/interface.H> #include <errl/errlentry.H> +#include <initservice/taskargs.H> #include <sys/task.h> #include <stdlib.h> #include <string.h> @@ -40,18 +43,17 @@ #include <initservice/initserviceif.H> #include <pnor/pnorif.H> #include <sys/mm.h> +#include <intr/interrupt.H> namespace ERRORLOG { extern trace_desc_t* g_trac_errl; - -// Scaffolding +#ifdef STORE_ERRL_IN_L3 // Store error logs in this memory buffer in L3 RAM. char* g_ErrlStorage = new char[ ERRL_STORAGE_SIZE ]; - /** * @brief * In storage, the flattened error logs are interspersed with "markers." @@ -73,15 +75,41 @@ char* g_ErrlStorage = new char[ ERRL_STORAGE_SIZE ]; */ #define POINTER2OFFSET(p) ((reinterpret_cast<char*>(p))-(g_ErrlStorage)) +#else + +char* g_ErrlStorage; + +#endif + +const uint32_t PNOR_ERROR_LENGTH = 4096; +const uint32_t EMPTY_ERRLOG_IN_PNOR = 0xFFFFFFFF; + +class AtLoadFunctions +{ + public: + AtLoadFunctions() + { + // call errlManager ctor so that we're ready and waiting for errors. + ERRORLOG::theErrlManager::instance(); + } +}; +// this causes the function to get run at module load. +AtLoadFunctions atLoadFunction; /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// -ErrlManager::ErrlManager() +ErrlManager::ErrlManager() : + iv_hwasProcessCalloutFn(NULL), + iv_msgQ(NULL), + iv_pnorAddr(NULL), + iv_maxErrlInPnor(0), + iv_pnorOpenSlot(0), + iv_isSpBaseServices(true), // queue msgs for fsp until we find we shouldnt + iv_isMboxEnabled(false) // but mbox isn't ready yet.. { TRACFCOMP( g_trac_errl, ENTER_MRK "ErrlManager::ErrlManager constructor" ); - iv_hwasProcessCalloutFn = NULL; - +#ifdef STORE_ERRL_IN_L3 // Scaffolding. // For now, put error logs in a 64KB buffer in L3 RAM // This buffer has a header (storage_header_t) followed by @@ -100,46 +128,31 @@ ErrlManager::ErrlManager() marker_t* l_pMarker = OFFSET2MARKER( iv_pStorage->offsetStart ); l_pMarker->offsetNext = 0; l_pMarker->length = 0; +#endif - // to determine the starting log ID, we need to look thru PNOR and see - // what error records are there; ours will be 1 after the highest found. - + // to determine the starting log ID, we need to do this in 2 steps // first, determine our node - TARGETING::Target* l_pMasterProcChip = NULL; - TARGETING::targetService().masterProcChipTargetHandle(l_pMasterProcChip); - assert(l_pMasterProcChip != NULL); + // BYTE 0 of the PLID is the ID: 0x9# where # is the node instance. + // [0..3] for hostboot on master proc (chip==0) on node [0..3] + // [4..7] for hostboot on alternate proc on node [0..3] - const uint64_t l_node = - l_pMasterProcChip->getAttr<TARGETING::ATTR_PHYS_PATH>(). - pathElementOfType(TARGETING::TYPE_NODE).instance; + const INTR::PIR_t masterCpu = task_getcpuid(); + const uint32_t l_eid_id = (masterCpu.chipId == 0) ? + masterCpu.nodeId : + masterCpu.nodeId + 4; - // current log id is 0x9# where # is the node instance. - iv_currLogId = ERRLOG_PLID_BASE + (l_node << ERRLOG_PLID_NODE_SHIFT); + iv_currLogId = ERRLOG_PLID_BASE + ERRLOG_PLID_INITIAL + + (l_eid_id << ERRLOG_PLID_NODE_SHIFT); - // now walk thru memory, finding error logs and determine the highest ID - uint32_t l_maxId = 0; + // next, we need to look thru PNOR and see what error records are there; + // ours will be 1 after the highest found. + // BUT that can't happen until AFTER PNOR is up and running... so we'll do + // that in the daemon. if PNOR reports an error, then the EID will just be + // whatever it is. - // Follow the markers. The start-of-list marker: - marker_t* pMarker = OFFSET2MARKER(iv_pStorage->offsetStart); - while (pMarker->offsetNext) - { - pelPrivateHeaderSection_t * pPrivateHdr = - reinterpret_cast<pelPrivateHeaderSection_t*>(pMarker+1); - - if (pPrivateHdr->eid > l_maxId) - { - l_maxId = pPrivateHdr->eid; - } - - // next marker/error log - pMarker = OFFSET2MARKER(pMarker->offsetNext); - } // while - - // bump the current plid to 1 past the max eid found - iv_currLogId += (l_maxId & ERRLOG_PLID_MASK) + 1; - - TRACFCOMP( g_trac_errl, INFO_MRK"ErrlManager on proc %.8X, LogId 0x%X", - get_huid(l_pMasterProcChip), iv_currLogId); + TRACFCOMP( g_trac_errl, INFO_MRK"ErrlManager on node %d (%smaster proc), LogId 0x%X", + masterCpu.nodeId, (masterCpu.chipId == 0) ? "" : "alternate ", + iv_currLogId); // Create and register error log message queue. msgQueueInit(); @@ -169,43 +182,18 @@ ErrlManager::~ErrlManager() /////////////////////////////////////////////////////////////////////////////// // ErrlManager::msgQueueInit() /////////////////////////////////////////////////////////////////////////////// -void ErrlManager::msgQueueInit ( void ) +void ErrlManager::msgQueueInit () { - errlHndl_t l_err = NULL; - TRACFCOMP( g_trac_errl, ENTER_MRK "ErrlManager::msgQueueInit ..." ); - do - { - // Create error log message queue. - iv_msgQ = msg_q_create(); - - // Register messageQ with Mailbox to receive message. - l_err = MBOX::msgq_register( MBOX::HB_ERROR_MSGQ, - iv_msgQ ); - if( l_err ) - { - TRACFCOMP(g_trac_errl, ERR_MRK "Msg queue already registered"); - - delete( l_err ); - l_err = NULL; - - //If we got an error then it means the message queue is - //registered with mailbox. This should not happen. - //So assert here. - assert(0); - - break; - } + // Create error log message queue. + iv_msgQ = msg_q_create(); - // Register for error log manager shutdown event - INITSERVICE::registerShutdownEvent( iv_msgQ, ERRLOG_SHUTDOWN, + // Register for error log manager shutdown event + INITSERVICE::registerShutdownEvent( iv_msgQ, ERRLOG_SHUTDOWN_TYPE, INITSERVICE::NO_PRIORITY ); - } while (0); - TRACFCOMP( g_trac_errl, EXIT_MRK "ErrlManager::msgQueueInit" ); - return; } @@ -214,14 +202,12 @@ void ErrlManager::msgQueueInit ( void ) /////////////////////////////////////////////////////////////////////////////// void * ErrlManager::startup ( void* i_self ) { - TRACFCOMP( g_trac_errl, ENTER_MRK "ErrlManager::startup..." ); //Start a thread and let error log message handler running. reinterpret_cast<ErrlManager *>(i_self)->errlogMsgHndlr(); TRACFCOMP( g_trac_errl, EXIT_MRK "ErrlManager::startup" ); - return NULL; } @@ -229,42 +215,172 @@ void * ErrlManager::startup ( void* i_self ) /////////////////////////////////////////////////////////////////////////////// // ErrlManager::errlogMsgHndlr() /////////////////////////////////////////////////////////////////////////////// -void ErrlManager::errlogMsgHndlr ( void ) +void ErrlManager::errlogMsgHndlr () { - errlHndl_t l_err = NULL; - msg_t * theMsg = NULL; - TRACFCOMP( g_trac_errl, ENTER_MRK "Enter ErrlManager::errlogMsgHndlr" ); while( 1 ) { - theMsg = msg_wait( iv_msgQ ); + msg_t * theMsg = msg_wait( iv_msgQ ); TRACFCOMP( g_trac_errl, INFO_MRK"Got an error log Msg - Type: 0x%08x", theMsg->type ); //Process message just received switch( theMsg->type ) { - case ERRLOG_NEEDS_TO_BE_COMMITTED_TYPE: + case ERRLOG_ACCESS_PNOR_TYPE: + { + // PNOR is up and running now. + + setupPnorInfo(); + + //We are done with the msg + msg_free(theMsg); + + // go back and wait for a next msg + break; + } + case ERRLOG_ACCESS_TARG_TYPE: + { + // TARGETING is up and running now. + + // do we NOT need to send the error? + TARGETING::Target * sys = NULL; + TARGETING::targetService().getTopLevelTarget( sys ); + TARGETING::SpFunctions spfn; + + if (!(sys && + sys->tryGetAttr<TARGETING::ATTR_SP_FUNCTIONS>(spfn) && + spfn.baseServices)) + { + iv_isSpBaseServices = false; + + // if there are queued msgs, delete them + while (!iv_errlToSend.empty()) + { + msg_t * msg = iv_errlToSend.front(); + free( msg->extra_data ); + msg_free( msg ); + // delete from the list + iv_errlToSend.pop_front(); + } // while items on iv_errlToSend list + } + + //We are done with the msg + msg_free(theMsg); + + // go back and wait for a next msg + break; + } + case ERRLOG_ACCESS_MBOX_TYPE: { + // MBOX is up and running now. + + // do we need to send the errorlog + TARGETING::Target * sys = NULL; + TARGETING::targetService().getTopLevelTarget( sys ); + TARGETING::SpFunctions spfn; + + if (sys && + sys->tryGetAttr<TARGETING::ATTR_SP_FUNCTIONS>(spfn) && + spfn.mailboxEnabled) + { + iv_isMboxEnabled = true; + } + + // if we're supposed to and can now send msgs, do it. + if (iv_isSpBaseServices && iv_isMboxEnabled) + { + // Register messageQ with Mailbox to receive message. + errlHndl_t l_err = + MBOX::msgq_register( MBOX::HB_ERROR_MSGQ, + iv_msgQ ); + if( l_err ) + { + TRACFCOMP(g_trac_errl, ERR_MRK "Msg queue already registered"); + + delete( l_err ); + l_err = NULL; + + //If we got an error then it means the message queue + //is registered with mailbox. + //This should not happen. So assert here. + assert(0); + } + + // if error(s) came in before MBOX was ready, + // the msg(s) would be on this list. send it now. + while (!iv_errlToSend.empty()) + { + msg_t * msg = iv_errlToSend.front(); + + l_err = MBOX::send( MBOX::FSP_ERROR_MSGQ, msg ); + if( l_err ) + { + TRACFCOMP(g_trac_errl, ERR_MRK "Failed sending error log to FSP"); + + //Free the extra data due to the error + if( (msg != NULL) && (msg->extra_data != NULL) ) + { + free( msg->extra_data ); + msg_free( msg ); + } + + delete l_err; + l_err = NULL; + } + + // delete from the list + iv_errlToSend.pop_front(); + } // while items on list + } + else + { + // if there are queued msgs, delete them + while (!iv_errlToSend.empty()) + { + msg_t * msg = iv_errlToSend.front(); + free( msg->extra_data ); + msg_free( msg ); + // delete from the list + iv_errlToSend.pop_front(); + } // while items on iv_errlToSend list + } + + //We are done with the msg + msg_free(theMsg); + // go back and wait for a next msg + break; + } + case ERRLOG_NEEDS_TO_BE_COMMITTED_TYPE: + { //Extract error log handle from the message. We need the - // error log handle to pass along to saveErrlogEntry and - // sendMboxMsg - l_err = (errlHndl_t) theMsg->extra_data; + // error log handle to pass along to saveErrLogEntry and + // sendErrLogToMbox + errlHndl_t l_err = (errlHndl_t) theMsg->extra_data; //Ask the ErrlEntry to assign commit component, commit time l_err->commit( (compId_t) theMsg->data[0] ); - //Write the error log to L3 memory till PNOR is implemented - //RTC #47517 for future task to write error log to PNOR + //Save the error log to PNOR + bool l_savedToPnor = saveErrLogToPnor(l_err); + +#ifdef STORE_ERRL_IN_L3 + //Write the error log to L3 memory + //useful ONLY for the hb-errl tool saveErrLogEntry ( l_err ); +#endif - //Create a mbox message with the error log and send it to - // FSP. - // We only send error log to FSP when mailbox is enabled - if( MBOX::mailbox_enabled() ) + //Try to send the error log if someone is there to receive + if (iv_isSpBaseServices) { - sendMboxMsg ( l_err ); + msg_t *l_sentToMbox = sendErrLogToMbox ( l_err ); + if (l_sentToMbox != NULL) + { + // we were supposed to send it and couldn't; + // save it on the queue. + iv_errlToSend.push_back(l_sentToMbox); + } } //Ask the ErrlEntry to process any callouts @@ -273,7 +389,6 @@ void ErrlManager::errlogMsgHndlr ( void ) //Ask if it is a terminating log if( l_err->isTerminateLog() ) { - TRACFCOMP( g_trac_errl, INFO_MRK "Terminating error was commited" " errlmanager is reqesting a shutdown."); @@ -282,12 +397,20 @@ void ErrlManager::errlogMsgHndlr ( void ) TRACDCOMP( g_trac_errl, INFO_MRK"shutdown in progress" ); - } - //We are done with the error log handle so delete it. - delete l_err; - l_err = NULL; + // check if we actually saved the msg to PNOR + if (l_savedToPnor) + { + //done with the error log handle so delete it. + delete l_err; + l_err = NULL; + } + else + { // save didn't work - push into a list to do when + // the next ACK gets processed. + iv_errlToSave.push_back(l_err); + } //We are done with the msg msg_free(theMsg); @@ -296,18 +419,41 @@ void ErrlManager::errlogMsgHndlr ( void ) break; } case ERRLOG_COMMITTED_ACK_RESPONSE_TYPE: - //Hostboot must keep track and clean up hostboot error - //logs in PNOR after it is committed by FSP. + { + //Hostboot must keep track and clean up hostboot error + //logs in PNOR after it is committed by FSP. + uint32_t l_tmpPlid = theMsg->data[0]>>32; + TRACFCOMP( g_trac_errl, INFO_MRK"ack: %.8x", l_tmpPlid); - //TODO: We have an RTC 47517 for this work. New code need - //to be added to mark the error log in PNOR as committed. + ackErrLogInPnor(l_tmpPlid); - TRACFCOMP( g_trac_errl, INFO_MRK"Got a acked msg - Type: 0x%08x", - theMsg->type ); - msg_free(theMsg); - break; + msg_free(theMsg); + + if (!iv_errlToSave.empty()) + { + //we didn't have room before in PNOR to save an + // error log, so try now since we just ACKed one. + errlHndl_t l_err = iv_errlToSave.front(); + + bool l_savedToPnor = saveErrLogToPnor(l_err); + + // check if we actually saved the msg to PNOR + if (l_savedToPnor) + { // if so, we're done - clean up + + //done with the error log handle so delete it. + delete l_err; + l_err = NULL; - case ERRLOG_SHUTDOWN: + // delete from the list + iv_errlToSave.pop_front(); + } + // else, still couldn't save it (for some reason??) so + // it's still on the list. + } + break; + } + case ERRLOG_SHUTDOWN_TYPE: TRACFCOMP( g_trac_errl, INFO_MRK "Shutdown event received" ); //Start shutdown process for error log @@ -327,7 +473,7 @@ void ErrlManager::errlogMsgHndlr ( void ) msg_free(theMsg); break; - } + } // switch } //The errlogMsgHndlr should run all the time. It only @@ -338,57 +484,60 @@ void ErrlManager::errlogMsgHndlr ( void ) /////////////////////////////////////////////////////////////////////////////// -// ErrlManager::sendMboxMsg() +// ErrlManager::sendErrLogToMbox() /////////////////////////////////////////////////////////////////////////////// -void ErrlManager::sendMboxMsg ( errlHndl_t& io_err ) +msg_t *ErrlManager::sendErrLogToMbox ( errlHndl_t& io_err ) { - errlHndl_t l_err = NULL; - msg_t * msg = NULL; + msg_t *msg = NULL; - TRACFCOMP( g_trac_errl, ENTER_MRK"ErrlManager::sendMboxMsg" ); + TRACFCOMP( g_trac_errl, ENTER_MRK"ErrlManager::sendErrLogToMbox" ); do { - //Create a mailbox message to send to FSP - msg = msg_allocate(); - msg->type = ERRLOG_SEND_TO_FSP_TYPE; + //Create a mailbox message to send to FSP + msg = msg_allocate(); + msg->type = ERRLOG_SEND_TO_FSP_TYPE; - uint32_t l_msgSize = io_err->flattenedSize(); + uint32_t l_msgSize = io_err->flattenedSize(); - //Data[0] will be hostboot error log ID so Hostboot can - //keep track of the error log when FSP responses back. - //The error log ID is also the plid (platform log identify) + //Data[0] will be hostboot error log ID so Hostboot can + //keep track of the error log when FSP responses back. - msg->data[0] = io_err->plid(); - msg->data[1] = l_msgSize; + msg->data[0] = io_err->eid(); + msg->data[1] = l_msgSize; - void * temp_buff = malloc( l_msgSize ); - io_err->flatten ( temp_buff, l_msgSize ); - msg->extra_data = temp_buff; + void * temp_buff = malloc( l_msgSize ); + io_err->flatten ( temp_buff, l_msgSize ); + msg->extra_data = temp_buff; - TRACDCOMP( g_trac_errl, INFO_MRK"Send msg to FSP for errlogId [0x%08x]", - io_err->plid() ); + TRACDCOMP( g_trac_errl, INFO_MRK"Send msg to FSP for errlogId %.8x", + io_err->eid() ); - l_err = MBOX::send( MBOX::FSP_ERROR_MSGQ, msg ); - if( l_err ) + if (iv_isMboxEnabled) { - TRACFCOMP(g_trac_errl, ERR_MRK "Failed sending error log to FSP"); - - //Free the extra data due to the error - if( msg != NULL && msg->extra_data != NULL ) - { - free( msg->extra_data ); - msg_free( msg ); - } - - delete l_err; - l_err = NULL; - + errlHndl_t l_err = MBOX::send( MBOX::FSP_ERROR_MSGQ, msg ); + if( l_err ) + { + TRACFCOMP(g_trac_errl, ERR_MRK "Failed sending error log to FSP"); + + //Free the extra data due to the error + if( (msg != NULL) && (msg->extra_data != NULL) ) + { + free( msg->extra_data ); + msg_free( msg ); + msg = NULL; + } + + delete l_err; + l_err = NULL; + } } + // else, we created the msg, but couldn't send it - return it so that + // it can be saved and sent later when the MBOX is up. } while (0); - TRACFCOMP( g_trac_errl, EXIT_MRK"sendMboxMsg()" ); - return; -} + TRACFCOMP( g_trac_errl, EXIT_MRK"sendErrLogToMbox() returning %p", msg); + return msg; +} // sendErrLogToMbox /////////////////////////////////////////////////////////////////////////////// // Handling commit error log. @@ -406,9 +555,8 @@ void ErrlManager::commitErrLog(errlHndl_t& io_err, compId_t i_committerComp ) break; } - TRACFCOMP(g_trac_errl, "commitErrLog() called by %.4X for plid=0x%X," - "Reasoncode=%.4X", i_committerComp, - io_err->plid(), io_err->reasonCode() ); + TRACFCOMP(g_trac_errl, "commitErrLog() called by %.4X for eid=%.8x, Reasoncode=%.4X", + i_committerComp, io_err->eid(), io_err->reasonCode() ); //Ask ErrlEntry to check for any special deferred deconfigure callouts io_err->deferredDeconfigure(); @@ -424,12 +572,14 @@ void ErrlManager::commitErrLog(errlHndl_t& io_err, compId_t i_committerComp ) return; } +#ifdef STORE_ERRL_IN_L3 /////////////////////////////////////////////////////////////////////////////// // ErrlManager::saveErrLogEntry() /////////////////////////////////////////////////////////////////////////////// void ErrlManager::saveErrLogEntry( errlHndl_t& io_err ) { - TRACFCOMP( g_trac_errl, ENTER_MRK"ErrlManager::saveErrLogEntry" ); + TRACFCOMP( g_trac_errl, ENTER_MRK"ErrlManager::saveErrLogEntry eid %.8x", + io_err->eid()); do { // Get flattened count of bytes. @@ -470,7 +620,7 @@ void ErrlManager::saveErrLogEntry( errlHndl_t& io_err ) TRACFCOMP( g_trac_errl, EXIT_MRK"ErrlManager::saveErrLogEntry" ); return; } - +#endif /////////////////////////////////////////////////////////////////////////////// @@ -499,7 +649,51 @@ void errlCommit(errlHndl_t& io_err, compId_t i_committerComp ) return; } +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// Global function (not a method on an object) to commit the error log. +void ErrlManager::errlResourceReady(errlManagerNeeds i_needs) +{ + ERRORLOG::theErrlManager::instance().sendResourcesMsg(i_needs); + return; +} + +void ErrlManager::sendResourcesMsg(errlManagerNeeds i_needs) +{ + TRACFCOMP( g_trac_errl, ENTER_MRK"ErrlManager::sendResourcesMsg %d", i_needs); + + //Create a message to send to Host boot error message queue. + msg_t *msg = msg_allocate(); + switch (i_needs) + { + case PNOR: + msg->type = ERRORLOG::ErrlManager::ERRLOG_ACCESS_PNOR_TYPE; + break; + case TARG: + msg->type = ERRORLOG::ErrlManager::ERRLOG_ACCESS_TARG_TYPE; + break; + case MBOX: + msg->type = ERRORLOG::ErrlManager::ERRLOG_ACCESS_MBOX_TYPE; + break; + default: + { + TRACFCOMP( g_trac_errl, ERR_MRK "bad msg!!"); + assert(0); + } + } + + //Send the msg asynchronously to error message queue to handle. + int rc = msg_send ( ERRORLOG::ErrlManager::iv_msgQ, msg ); + + //Return code is non-zero when the message queue is invalid + //or the message type is invalid. + if ( rc ) + { + TRACFCOMP( g_trac_errl, ERR_MRK "Failed (rc=%d) to send %d message.", rc, i_needs); + } + return; +} /////////////////////////////////////////////////////////////////////////////// // ErrlManager::sendErrlogToMessageQueue() /////////////////////////////////////////////////////////////////////////////// @@ -525,7 +719,7 @@ void ErrlManager::sendErrlogToMessageQueue ( errlHndl_t& io_err, msg->extra_data = io_err; TRACFCOMP( g_trac_errl, INFO_MRK"Send an error log to message queue" - " to commit. plid=0x%X", io_err->plid() ); + " to commit. eid=%.8X", io_err->eid() ); //Send the error log to error message queue to handle. //Message is sent as asynchronous. @@ -536,7 +730,7 @@ void ErrlManager::sendErrlogToMessageQueue ( errlHndl_t& io_err, if ( rc ) { TRACFCOMP( g_trac_errl, ERR_MRK "Failed to send mailbox message" - "to message queue. plid=0x%X", io_err->plid() ); + "to message queue. eid=%.8X", io_err->eid() ); break; } @@ -548,42 +742,53 @@ void ErrlManager::sendErrlogToMessageQueue ( errlHndl_t& io_err, /////////////////////////////////////////////////////////////////////////////// // ErrlManager::errlogShutdown() /////////////////////////////////////////////////////////////////////////////// -void ErrlManager::errlogShutdown(void) +void ErrlManager::errlogShutdown() { - errlHndl_t l_err = NULL; - PNOR::SectionInfo_t l_section; + // if there are errorlogs that didn't get sent via the MBOX to FSP, + // trace them and clean up + while (!iv_errlToSend.empty()) + { + msg_t * msg = iv_errlToSend.front(); + TRACDCOMP(g_trac_errl, INFO_MRK "Failed to send to FSP: eid %.8x", + msg->data[0]); + free( msg->extra_data ); + msg_free( msg ); + // delete from the list + iv_errlToSend.pop_front(); + } // while items on iv_errlToSend list + + // if there are errorlogs that didn't get stored in PNOR, + // trace them and clean up + while (!iv_errlToSave.empty()) + { + errlHndl_t l_err = iv_errlToSave.front(); + TRACFCOMP(g_trac_errl, ERR_MRK "Failed to store to PNOR: eid %.8x", + l_err->eid()); + delete l_err; + // delete from the list + iv_errlToSave.pop_front(); + } // while items on iv_errlToSave list // Ensure that all the error logs are pushed out to PNOR // prior to the PNOR resource provider shutting down. - - l_err = PNOR::getSectionInfo(PNOR::HB_ERRLOGS, PNOR::CURRENT_SIDE, - l_section); - - if(l_err) + int l_rc = mm_remove_pages(FLUSH, (void *) iv_pnorAddr, + iv_maxErrlInPnor * PNOR_ERROR_LENGTH); + if( l_rc ) { - TRACFCOMP(g_trac_errl, ERR_MRK "Error in getting PNOR section info"); - //We are shutting the error log manager so we can not commit - //error. So just log the error trace for the error. - delete l_err; - l_err = NULL; - } - else - { - int l_rc = mm_remove_pages(FLUSH, (void *) l_section.vaddr, - l_section.size); - if( l_rc ) - { - //If mm_remove_pages returns none zero for error then - //log an error trace in this case. - TRACFCOMP(g_trac_errl, ERR_MRK "Fail to flush the page"); - } + //If mm_remove_pages returns none zero for error then + //log an error trace in this case. + TRACFCOMP(g_trac_errl, ERR_MRK "Fail to flush the page %p size %d", + iv_pnorAddr, iv_maxErrlInPnor * PNOR_ERROR_LENGTH); } // Un-register error log message queue from the shutdown INITSERVICE::unregisterShutdownEvent( iv_msgQ); - // Un-register error log message queue from the mailbox service - MBOX::msgq_unregister( MBOX::HB_ERROR_MSGQ ); + if (iv_isMboxEnabled) + { + // Un-register error log message queue from the mailbox service + MBOX::msgq_unregister( MBOX::HB_ERROR_MSGQ ); + } // Do not destroy the queue... there are paths where the daemon thread // still has references to the queue or the unregisterShutdownEvent did @@ -596,4 +801,271 @@ void ErrlManager::errlogShutdown(void) return; } +// ------------------------------------------------------------------ +// setupPnorInfo +// ------------------------------------------------------------------ +void ErrlManager::setupPnorInfo() +{ + TRACFCOMP( g_trac_errl, ENTER_MRK"setupPnorInfo" ); + + do + { + // Get SPD PNOR section info from PNOR RP + PNOR::SectionInfo_t info; + errlHndl_t err = PNOR::getSectionInfo( PNOR::HB_ERRLOGS, + PNOR::CURRENT_SIDE, + info ); + + if (err) + { + TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo getSectionInfo failed"); + assert(err == NULL); + break; + } + + TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo sectionInfo id %d name \"%s\" size %d", + info.id, info.name, info.size ); + + // Set the globals appropriately + iv_pnorAddr = reinterpret_cast<char *> (info.vaddr); + iv_maxErrlInPnor = info.size / PNOR_ERROR_LENGTH; + + TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo iv_pnorAddr %p maxErrlInPnor %d", + iv_pnorAddr, iv_maxErrlInPnor ); + + // initial value, in case PNOR is empty - start at this end slot + // so that our first save will increment and wrap correctly + iv_pnorOpenSlot = (iv_maxErrlInPnor - 1); + + // walk thru memory, finding error logs and determine the highest ID + uint32_t l_maxId = 0; + for (uint32_t i = 0; i < iv_maxErrlInPnor; i++) + { + if (!isSlotEmpty(i)) + { + uint32_t l_eid = readEidFromFlattened(i); + if (l_eid > l_maxId ) + { + l_maxId = l_eid; + + // set this - start at this 'max' slot so that our first + // save will increment correctly + iv_pnorOpenSlot = i; + } + // also check if it's ACKed or not. and ACK it. + // for FSP system, this shouldn't ever happen. + // for non-FSP systems, this clears out all 'last IPL' logs + if (!isSlotACKed(i)) + { + TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo slot %d eid %.8X was not ACKed.", + i, l_eid); + setACKInFlattened(i); + } // not ACKed + } // not empty + } // for + + // bump the current eid to 1 past the max eid found + while (!__sync_bool_compare_and_swap(&iv_currLogId, iv_currLogId, + (iv_currLogId & ERRLOG_PLID_BASE_MASK) + + (l_maxId & ERRLOG_PLID_MASK) + 1)); + TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo reseting LogId 0x%X", iv_currLogId); + + // if error(s) came in before PNOR was ready, + // the error log(s) would be on this list. save now. + while (!iv_errlToSave.empty()) + { + errlHndl_t l_err = iv_errlToSave.front(); + + //ACK it if no one is there to receive + bool l_savedToPnor = saveErrLogToPnor(l_err); + + // check if we actually saved the msg to PNOR + if (l_savedToPnor) + { // if so, we're done - clean up + + //done with the error log handle so delete it. + delete l_err; + l_err = NULL; + + // delete from the list + iv_errlToSave.pop_front(); + } + else + { + // still couldn't save it (PNOR maybe full) so + // it's still on the list. + break; // get out of this while loop. + } + } // while entries on list + + } while (0); + + TRACFCOMP( g_trac_errl, EXIT_MRK"setupPnorInfo"); +} // setupPnorInfo + +/////////////////////////////////////////////////////////////////////////////// +// ErrlManager::incrementPnorOpenSlot() +/////////////////////////////////////////////////////////////////////////////// +bool ErrlManager::incrementPnorOpenSlot() +{ + uint32_t initialSlot = iv_pnorOpenSlot; // starting slot + do + { + iv_pnorOpenSlot++; + if (iv_pnorOpenSlot == iv_maxErrlInPnor) + { // wrap + iv_pnorOpenSlot = 0; + } + } while ( !isSlotEmpty(iv_pnorOpenSlot) && + !isSlotACKed(iv_pnorOpenSlot) && + (iv_pnorOpenSlot != initialSlot)); + + // if we got a different slot, return true; else false - no open slots + return (iv_pnorOpenSlot != initialSlot); +} // incrementPnorOpenSlot + +/////////////////////////////////////////////////////////////////////////////// +// ErrlManager::saveErrLogToPnor() +/////////////////////////////////////////////////////////////////////////////// +bool ErrlManager::saveErrLogToPnor( errlHndl_t& io_err) +{ + TRACFCOMP( g_trac_errl, ENTER_MRK"saveErrLogToPnor eid=%.8x", io_err->eid()); + + // save our current slot, and see if there's an open slot + uint32_t l_previousSlot = iv_pnorOpenSlot; // in case flatten fails + bool l_slotFound = (iv_pnorAddr != NULL) && incrementPnorOpenSlot(); + + if (l_slotFound) + { + // flatten into PNOR, truncate to the slot size + char *l_pnorAddr = iv_pnorAddr + (PNOR_ERROR_LENGTH * iv_pnorOpenSlot); + TRACDBIN( g_trac_errl, INFO_MRK"saveErrLogToPnor: l_pnorAddr before", + l_pnorAddr, 128); + int l_errSize = io_err->flatten(l_pnorAddr, PNOR_ERROR_LENGTH, true); + if (l_errSize !=0) + { + TRACFCOMP( g_trac_errl, INFO_MRK"saveErrLogToPnor: %d bytes flattened into %p, slot %d", + l_errSize, l_pnorAddr, iv_pnorOpenSlot ); + + TRACDBIN( g_trac_errl, INFO_MRK"saveErrLogToPnor: l_pnorAddr after", + l_pnorAddr, 128); + + // Ensure that this error log is pushed out to PNOR + int l_rc = mm_remove_pages(FLUSH, + (void *) l_pnorAddr, l_errSize); + if( l_rc ) + { + //If mm_remove_pages returns none zero for error then + //log an error trace in this case. + TRACFCOMP(g_trac_errl, ERR_MRK "Fail to flush the page %p size %d", + l_pnorAddr, PNOR_ERROR_LENGTH); + } + } + else + { + TRACFCOMP( g_trac_errl, ERR_MRK"saveErrLogToPnor: could not flatten data"); + // restore slot so that our next save will find this slot + iv_pnorOpenSlot = l_previousSlot; + } + } + else + { + TRACFCOMP( g_trac_errl, EXIT_MRK"saveErrLogToPnor: NOT SAVED"); + } + return l_slotFound; +} // saveErrLogToPnor + +/////////////////////////////////////////////////////////////////////////////// +// ErrlManager::ackErrLogInPnor() +/////////////////////////////////////////////////////////////////////////////// +void ErrlManager::ackErrLogInPnor( uint32_t i_errEid ) +{ + TRACFCOMP( g_trac_errl, ENTER_MRK"ackErrLogInPnor(%.8x)", i_errEid); + + // look for an un-ACKed log that matches this eid + uint32_t i; + for (i = 0; i < iv_maxErrlInPnor; i++) + { + if (!isSlotEmpty(i) && !isSlotACKed(i)) + { + uint32_t l_eid = readEidFromFlattened(i); + if (l_eid == i_errEid) + { + TRACDCOMP( g_trac_errl, INFO_MRK"ackErrLogInPnor: match in slot %d", i); + setACKInFlattened(i); + break; + } + } + } // for + + // if we made it through the loop w/out breaking early + if (i == iv_maxErrlInPnor) + { + //could not find the errorlog to mark for acknowledgment + TRACFCOMP( g_trac_errl, ERR_MRK"ackErrLogInPnor failed to find the error log" ); + } + + TRACFCOMP( g_trac_errl, EXIT_MRK"ackErrLogInPnor" ); + return; +} // ackErrLogInPnor + + +bool ErrlManager::isSlotEmpty(uint32_t i_position) +{ + // checks the first word of the flattened errlog, which should be a + // pelsectionheader - which will NEVER be 0xFFFFFFFF if it's valid. + char * l_pnorAddr = iv_pnorAddr + (PNOR_ERROR_LENGTH * i_position); + bool rc = (memcmp(l_pnorAddr, &EMPTY_ERRLOG_IN_PNOR, sizeof(uint32_t)) + == 0); + TRACDCOMP( g_trac_errl, "isSlotEmpty: slot %d @ %p is %s", + i_position, l_pnorAddr, rc ? "empty" : "not empty"); + return rc; +} + +// readEidFromFlattened() +// i_position MUST be valid errlog (not EMPTY_ERRLOG_IN_PNOR) +uint32_t ErrlManager::readEidFromFlattened(uint32_t i_position) +{ + const char * l_pnorAddr = iv_pnorAddr + (PNOR_ERROR_LENGTH * i_position); + const pelPrivateHeaderSection_t *pPH = + reinterpret_cast<const pelPrivateHeaderSection_t *>(l_pnorAddr); + TRACDCOMP(g_trac_errl, "readEid(%d): eid %.8x", i_position, pPH->eid); + + return pPH->eid; +} + +// isSlotACKed() +// i_position MUST be valid errlog (not EMPTY_ERRLOG_IN_PNOR) +bool ErrlManager::isSlotACKed(uint32_t i_position) +{ + const char * l_pnorAddr = iv_pnorAddr + (PNOR_ERROR_LENGTH * i_position); + l_pnorAddr += sizeof(pelPrivateHeaderSection_t); + l_pnorAddr += sizeof(pelUserHeaderSection_t); + const pelSRCSection_t *pSRC = + reinterpret_cast<const pelSRCSection_t *>(l_pnorAddr); + + TRACDCOMP(g_trac_errl, "isSlotACKed(%d): word5 %08x - %s", + i_position, pSRC->word5, + (pSRC->word5 & ErrlSrc::ACK_BIT) ? "not ACKed" : "ACKed"); + + return (pSRC->word5 & ErrlSrc::ACK_BIT) ? false : true; +} + +// setACKInFlattened() +void ErrlManager::setACKInFlattened(uint32_t i_position) +{ + char * l_pnorErrlAddr = iv_pnorAddr + (PNOR_ERROR_LENGTH * i_position); + char * l_pnorAddr = l_pnorErrlAddr + sizeof(pelPrivateHeaderSection_t); + l_pnorAddr += sizeof(pelUserHeaderSection_t); + pelSRCSection_t *pSRC = reinterpret_cast<pelSRCSection_t *>(l_pnorAddr); + + pSRC->word5 &= ~(ErrlSrc::ACK_BIT); + + TRACDCOMP(g_trac_errl, "setACKInFlattened(%d): word5 %08x - %s", + i_position, pSRC->word5, + (pSRC->word5 & ErrlSrc::ACK_BIT) ? "not ACKed" : "ACKed"); + + return; +} + } // End namespace diff --git a/src/usr/errl/errlsrc.C b/src/usr/errl/errlsrc.C index b9552680f..10f5834d2 100644 --- a/src/usr/errl/errlsrc.C +++ b/src/usr/errl/errlsrc.C @@ -142,13 +142,16 @@ uint64_t ErrlSrc::flatten( void * o_pBuffer, const uint64_t i_cbBuffer ) // set deconfigure and/or gard bits if (iv_deconfig) { - psrc->word5 |= 0x02000000; // deconfigure - bit 6 + psrc->word5 |= ErrlSrc::DECONFIG_BIT; // deconfigure } if (iv_gard) { - psrc->word5 |= 0x01000000; // GARD - bit 7 + psrc->word5 |= ErrlSrc::GARD_BIT; // GARD } + // set ACK bit - means unacknowledged + psrc->word5 |= ErrlSrc::ACK_BIT; // ACK + // Stash the Hostboot long long words into the hexwords of the SRC. psrc->word6 = iv_user1; // spans 6-7 psrc->word8 = iv_user2; // spans 8-9 @@ -184,11 +187,11 @@ uint64_t ErrlSrc::unflatten( const void * i_buf) iv_user1 = p->word6; iv_user2 = p->word8; - if(p->word5 & 0x02000000) // deconfigure - bit 6 + if(p->word5 & ErrlSrc::DECONFIG_BIT) // deconfigure { iv_deconfig = true; } - if(p->word5 & 0x01000000) // GARD - bit 7 + if(p->word5 & ErrlSrc::GARD_BIT) // GARD { iv_gard = true; } @@ -198,7 +201,7 @@ uint64_t ErrlSrc::unflatten( const void * i_buf) // Quick hexdigit to binary converter. // Hopefull someday to replaced by strtoul -uint64_t ErrlSrc::aschex2bin(char c) +uint64_t ErrlSrc::aschex2bin(char c) const { if(c >= 'a' && c <= 'f') { diff --git a/src/usr/errl/test/errluserdetailtest.H b/src/usr/errl/test/errluserdetailtest.H index 70b4669ca..f68bf41d4 100644 --- a/src/usr/errl/test/errluserdetailtest.H +++ b/src/usr/errl/test/errluserdetailtest.H @@ -90,6 +90,19 @@ public: ErrlUserDetailsString stringUD("String test - string 3"); stringUD.addToLog(errl); + // shove a lot of traces here, so that we test the truncate in the + // write to PNOR + errl->collectTrace("TARG", 1024); + errl->collectTrace("TARG", 48); + errl->collectTrace("TARG", 1024); + errl->collectTrace("TARG", 48); + errl->collectTrace("TARG", 1024); + errl->collectTrace("TARG", 48); + errl->collectTrace("TARG", 1024); + errl->collectTrace("TARG", 48); + errl->collectTrace("TARG", 1024); + errl->collectTrace("TARG", 48); + // commit the errorlog errlCommit(errl, CXXTEST_COMP_ID); } diff --git a/src/usr/mbox/mailboxsp.C b/src/usr/mbox/mailboxsp.C index 41fe57873..791919323 100644 --- a/src/usr/mbox/mailboxsp.C +++ b/src/usr/mbox/mailboxsp.C @@ -38,6 +38,7 @@ #include <targeting/common/commontargeting.H> #include <kernel/ipc.H> #include <arch/ppc.H> +#include <errl/errlmanager.H> #define MBOX_TRACE_NAME MBOX_COMP_NAME @@ -190,6 +191,9 @@ errlHndl_t MailboxSp::_init() // Start the the interprocessor communications message handler IPC::IpcSp::init(err); + // call ErrlManager function - tell him that MBOX is ready! + ERRORLOG::ErrlManager::errlResourceReady(ERRORLOG::MBOX); + return err; } diff --git a/src/usr/pnor/pnorrp.C b/src/usr/pnor/pnorrp.C index 0f212bce5..e4171e79f 100644 --- a/src/usr/pnor/pnorrp.C +++ b/src/usr/pnor/pnorrp.C @@ -246,6 +246,9 @@ void PnorRP::initDaemon() iv_startupRC = l_errhdl->reasonCode(); } + // call ErrlManager function - tell him that PNOR is ready! + ERRORLOG::ErrlManager::errlResourceReady(ERRORLOG::PNOR); + TRACUCOMP(g_trac_pnor, "< PnorRP::initDaemon" ); } diff --git a/src/usr/targeting/targetservicestart.C b/src/usr/targeting/targetservicestart.C index dc252eed5..43770aedf 100644 --- a/src/usr/targeting/targetservicestart.C +++ b/src/usr/targeting/targetservicestart.C @@ -47,6 +47,7 @@ // Others #include <errl/errlentry.H> +#include <errl/errlmanager.H> #include <devicefw/userif.H> //****************************************************************************** @@ -95,6 +96,9 @@ static void initTargeting(errlHndl_t& io_pError) (void)l_targetService.init(); initializeAttributes(l_targetService); + + // call ErrlManager function - tell him that TARG is ready! + ERRORLOG::ErrlManager::errlResourceReady(ERRORLOG::TARG); } TARG_EXIT(); |