diff options
| author | Brian Horton <brianh@linux.ibm.com> | 2015-08-19 13:41:30 -0500 |
|---|---|---|
| committer | A. Patrick Williams III <iawillia@us.ibm.com> | 2015-09-03 14:40:03 -0500 |
| commit | 0579143faea0a19bd4133ce5179855ec7dc3c9b7 (patch) | |
| tree | bb79013be84055a49f57913a8aa814ea20c5d776 /src/usr/errl | |
| parent | 06e40c4aa3efe6f006dc7971ad58f08f42117ea3 (diff) | |
| download | blackbird-hostboot-0579143faea0a19bd4133ce5179855ec7dc3c9b7.tar.gz blackbird-hostboot-0579143faea0a19bd4133ce5179855ec7dc3c9b7.zip | |
fix issues with dup sel/esel
. call errlmanager ack inline/synchronously, so that we don't have
dup eSELs due to shutdown
. don't resend sensor SEL events for errorlogs read out of PNOR
Depends-On: Ia2b211d8ebf489e92f2bc3d281d92175a6b61f30
Change-Id: I32c8e49ef2e00567987cdf030a225b61e7077cf6
CQ: SW312094
Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/19993
Tested-by: Jenkins Server
Tested-by: Jenkins OP Build CI
Tested-by: Jenkins OP HW
Tested-by: FSP CI Jenkins
Reviewed-by: WILLIAM G. HOFFA <wghoffa@us.ibm.com>
Reviewed-by: Richard J. Knight <rjknight@us.ibm.com>
Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
Diffstat (limited to 'src/usr/errl')
| -rw-r--r-- | src/usr/errl/errlmanager.C | 50 | ||||
| -rw-r--r-- | src/usr/errl/errlmanager_common.C | 250 | ||||
| -rw-r--r-- | src/usr/errl/runtime/rt_errlmanager.C | 6 |
3 files changed, 147 insertions, 159 deletions
diff --git a/src/usr/errl/errlmanager.C b/src/usr/errl/errlmanager.C index dea951ce4..dbca48014 100644 --- a/src/usr/errl/errlmanager.C +++ b/src/usr/errl/errlmanager.C @@ -386,6 +386,13 @@ void ErrlManager::errlogMsgHndlr () // Mark IPMI processing complete _clearFlag(*it, IPMI_FLAG); } + else if (_isFlagSet(*it, IPMI_NOSEL_FLAG)) + { + // send errorlog + sendErrLogToBmc(it->first, false); + // Mark IPMI processing complete + _clearFlag(*it, IPMI_NOSEL_FLAG); + } _updateErrlListIter(it); } #endif @@ -759,8 +766,6 @@ void ErrlManager::setHwasProcessCalloutFn(HWAS::processCalloutFn i_fn) ERRORLOG::theErrlManager::instance().iv_hwasProcessCalloutFn = i_fn; } -/////////////////////////////////////////////////////////////////////////////// -// Global function (not a method on an object) to commit the error log. void ErrlManager::errlResourceReady(errlManagerNeeds i_needs) { ERRORLOG::theErrlManager::instance().sendResourcesMsg(i_needs); @@ -810,39 +815,6 @@ void ErrlManager::sendResourcesMsg(errlManagerNeeds i_needs) return; } -/////////////////////////////////////////////////////////////////////////////// -// Global function (not a method on an object) to ack that the error log -// was sent to the BMC. -void ErrlManager::errlAckErrorlog(uint32_t i_eid) -{ - ERRORLOG::theErrlManager::instance().sendAckErrorlog(i_eid); - return; -} - -void ErrlManager::sendAckErrorlog(uint32_t i_eid) -{ - TRACFCOMP( g_trac_errl, ENTER_MRK"ErrlManager::sendAddErrorlog 0x%.8X", - i_eid); - - //Create a message to send to Host boot error message queue. - msg_t *msg = msg_allocate(); - msg->type = ERRLOG_COMMITTED_ACK_RESPONSE_TYPE; - //Pass along the eid of the error, shifted up to the first word - msg->data[0] = static_cast<uint64_t>(i_eid) << 32; - - //Send the msg asynchronously to error message queue to handle. - int rc = msg_send ( ERRORLOG::ErrlManager::iv_msgQ, msg ); - - //Return code is non-zero when the message queue is invalid - //or the message type is invalid. - if ( rc ) - { - TRACFCOMP( g_trac_errl, ERR_MRK "Failed (rc=%d) to send ack 0x%.8X message.", - rc, i_eid); - } - return; -} - bool ErrlManager::errlCommittedThisBoot() { @@ -918,10 +890,6 @@ void ErrlManager::errlogShutdown() iv_errlList.pop_front(); } // while items on iv_errlList list - // Ensure that all the error logs are pushed out to PNOR - // prior to the PNOR resource provider shutting down. - PNOR::flush(PNOR::HB_ERRLOGS); - // Un-register error log message queue from the shutdown INITSERVICE::unregisterShutdownEvent( iv_msgQ); @@ -939,6 +907,10 @@ void ErrlManager::errlogShutdown() // shutting down. // msg_q_destroy(iv_msgQ); + // Ensure that all the error logs are pushed out to PNOR + // prior to the PNOR resource provider shutting down. + PNOR::flush(PNOR::HB_ERRLOGS); + return; } diff --git a/src/usr/errl/errlmanager_common.C b/src/usr/errl/errlmanager_common.C index 17422cb12..51ed8fa77 100644 --- a/src/usr/errl/errlmanager_common.C +++ b/src/usr/errl/errlmanager_common.C @@ -42,13 +42,6 @@ const uint32_t EMPTY_ERRLOG_IN_PNOR = 0xFFFFFFFF; const uint32_t FIRST_BYTE_ERRLOG = 0xF0000000; /////////////////////////////////////////////////////////////////////////////// -// Atomically increment log id and return it. -uint32_t ErrlManager::getUniqueErrId() -{ - return (__sync_add_and_fetch(&iv_currLogId, 1)); -} - -/////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Global function (not a method on an object) to commit the error log. void errlCommit(errlHndl_t& io_err, compId_t i_committerComp ) @@ -57,13 +50,30 @@ void errlCommit(errlHndl_t& io_err, compId_t i_committerComp ) return; } +/////////////////////////////////////////////////////////////////////////////// +// Global function (not a method on an object) to ack that the error log +// was sent to the BMC. +void ErrlManager::errlAckErrorlog(uint32_t i_eid) +{ + ERRORLOG::theErrlManager::instance().ackErrLogInPnor(i_eid); + return; +} + +/////////////////////////////////////////////////////////////////////////////// // Global function (not a method on an object) to get the hidden logs flag. uint8_t getHiddenLogsEnable( ) { return ERRORLOG::theErrlManager::instance().iv_hiddenErrLogsEnable; } +/////////////////////////////////////////////////////////////////////////////// +// Atomically increment log id and return it. +uint32_t ErrlManager::getUniqueErrId() +{ + return (__sync_add_and_fetch(&iv_currLogId, 1)); +} + // ------------------------------------------------------------------ // setupPnorInfo // ------------------------------------------------------------------ @@ -156,7 +166,8 @@ void ErrlManager::setupPnorInfo() if (iv_isIpmiEnabled) { // convert to SEL/eSEL and send to BMC over IPMI - sendErrLogToBmc(err); + sendErrLogToBmc(err, + false /* do not resend SELs */); delete err; } else @@ -164,9 +175,9 @@ void ErrlManager::setupPnorInfo() TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo pushing slot %d eid %.8X to iv_errList.", i, l_id); - // Pair with IPMI flag to add to the errlList + // Pair with IPMI_NOSEL flag to add to the errlList // so that it'll get sent down when IPMI is up - ErrlFlagPair_t l_pair(err, IPMI_FLAG + ErrlFlagPair_t l_pair(err, IPMI_NOSEL_FLAG #ifdef CONFIG_CONSOLE_OUTPUT_ERRORDISPLAY | ERRLDISP_FLAG #endif @@ -436,16 +447,16 @@ void getSensorOffsetBasedOnSeverity(errlHndl_t & io_err, void getSensorInfo(HWAS::callout_ud_t *i_ud, uint8_t &o_sensorNumber, uint8_t &o_eventOffset, - HWAS::callOutPriority &io_priority, errlHndl_t& io_error ); /////////////////////////////////////////////////////////////////////////////// // ErrlManager::sendErrLogToBmc() /////////////////////////////////////////////////////////////////////////////// -void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) +void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels) { TRACFCOMP(g_trac_errl, ENTER_MRK - "sendErrLogToBmc errlogId 0x%.8x", io_err->eid()); + "sendErrLogToBmc errlogId 0x%.8x, i_sendSels %d", + io_err->eid(), i_sendSels); do { @@ -458,70 +469,66 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) break; } - // look thru the errlog for any Callout UserDetail sections - // to determine the sensor information for the SEL - // create a vector of sensor numbers and offsets std::vector<std::pair<uint8_t, uint8_t> > l_sensorNumbers; HWAS::callOutPriority l_priority = HWAS::SRCI_PRIORITY_NONE; - - for(std::vector<ErrlUD*>::const_iterator - it = io_err->iv_SectionVector.begin(); - it != io_err->iv_SectionVector.end(); - it++ ) + if (i_sendSels) { - uint8_t l_sensorNumber = TARGETING::UTIL::INVALID_IPMI_SENSOR; - uint8_t l_eventOffset = IPMISEL::event_data1_invalid_offset; - - HWAS::callout_ud_t *l_ud = - reinterpret_cast<HWAS::callout_ud_t*>((*it)->iv_pData); - - // if this is a CALLOUT that will have a target - if ((ERRL_COMP_ID == (*it)->iv_header.iv_compId) && - (1 == (*it)->iv_header.iv_ver) && - (ERRL_UDT_CALLOUT == (*it)->iv_header.iv_sst) ) + // look thru the errlog for any Callout UserDetail sections + // to determine the sensor information for the SEL + // create a vector of sensor numbers and offsets + for(std::vector<ErrlUD*>::const_iterator + it = io_err->iv_SectionVector.begin(); + it != io_err->iv_SectionVector.end(); + it++ ) { - // if this callout is higher than any previous callout - if (l_ud->priority > l_priority) + // if this is a CALLOUT + if ((ERRL_COMP_ID == (*it)->iv_header.iv_compId) && + (1 == (*it)->iv_header.iv_ver) && + (ERRL_UDT_CALLOUT == (*it)->iv_header.iv_sst) ) { - TRACFCOMP(g_trac_errl, - "sendErrLogToBmc new priority picked 0x%x > 0x%x", - l_ud->priority, l_priority ); - - // get sensor number for the target. - // we found a higher priority callout, get the sensor - // information for it - getSensorInfo( l_ud, l_sensorNumber, l_eventOffset, - l_priority, io_err); - - TRACFCOMP(g_trac_errl, - "l_sensorNumber = 0x%x, l_eventOffset = 0x%x", - l_sensorNumber, l_eventOffset ); + HWAS::callout_ud_t *l_ud = + reinterpret_cast<HWAS::callout_ud_t*>((*it)->iv_pData); + // if this callout isn't higher than any previous callout + if (l_ud->priority < l_priority) + { + continue; // on to the next + } - //remove previous sensor data - l_sensorNumbers.clear(); + // if greater, than clear out the previous list + if (l_ud->priority > l_priority) + { + TRACFCOMP(g_trac_errl, + "sendErrLogToBmc new priority picked 0x%x > 0x%x", + l_ud->priority, l_priority ); - l_sensorNumbers.push_back(std::make_pair(l_sensorNumber, - l_eventOffset)); + //remove previous sensor data + l_sensorNumbers.clear(); // and update the priority - l_priority = l_ud->priority; + l_priority = l_ud->priority; + } + + // greater than or equal - save this sensor - } - // or if it has the same priority - else if(l_ud->priority == l_priority) - { //get the sensor number for the target + uint8_t l_sensorNumber = + TARGETING::UTIL::INVALID_IPMI_SENSOR; + uint8_t l_eventOffset = + IPMISEL::event_data1_invalid_offset; - getSensorInfo( l_ud, l_sensorNumber, - l_eventOffset, l_priority, io_err); + getSensorInfo( l_ud, l_sensorNumber, l_eventOffset, + io_err); - l_sensorNumbers.push_back(std::make_pair(l_sensorNumber, - l_eventOffset)); - } - } - } // for each SectionVector + TRACFCOMP(g_trac_errl, + "l_sensorNumber = 0x%x, l_eventOffset = 0x%x", + l_sensorNumber, l_eventOffset ); + l_sensorNumbers.push_back( + std::make_pair(l_sensorNumber, l_eventOffset)); + } // if callout + } // for each SectionVector + } // if i_sendSels // flatten into buffer, truncate to max eSEL size uint32_t l_pelSize = io_err->flattenedSize(); @@ -537,7 +544,7 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) uint32_t l_errSize = io_err->flatten (l_pelData, l_pelSize, true /* truncate */); - if (l_errSize ==0 ) + if (l_errSize == 0 ) { // flatten didn't work TRACFCOMP( g_trac_errl, ERR_MRK @@ -546,71 +553,87 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) break; } - for(size_t i = 0; i < l_sensorNumbers.size(); i++) + if (i_sendSels) { + for(size_t i = 0; i < l_sensorNumbers.size(); i++) + { + uint8_t l_eventDirType = IPMISEL::sensor_specific; - uint8_t l_eventDirType = IPMISEL::sensor_specific; - - // if the offset is unknown after this then it will - // be updated based on elog severity below - uint8_t l_eventOffset = l_sensorNumbers.at(i).second ; + // if the offset is unknown after this then it will + // be updated based on elog severity below + uint8_t l_eventOffset = l_sensorNumbers.at(i).second ; - // last ditch effort, if no sensor number is present at this - // point, just use the system event sensor - if( l_sensorNumbers.at(i).first == - TARGETING::UTIL::INVALID_IPMI_SENSOR ) - { - l_sensorNumbers.at(i).first = - TARGETING::UTIL::getSensorNumber(NULL, - TARGETING::SENSOR_NAME_SYSTEM_EVENT); + // last ditch effort, if no sensor number is present at this + // point, just use the system event sensor + if( l_sensorNumbers.at(i).first == + TARGETING::UTIL::INVALID_IPMI_SENSOR ) + { + l_sensorNumbers.at(i).first = + TARGETING::UTIL::getSensorNumber(NULL, + TARGETING::SENSOR_NAME_SYSTEM_EVENT); - l_sensorNumbers.at(i).second = - SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE; + l_sensorNumbers.at(i).second = + SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE; - } + } - // grab the sensor type so the bmc knows how to use the offset - uint8_t unused = 0; - uint8_t l_SensorType = 0; + // grab the sensor type so the bmc knows how to use the offset + uint8_t unused = 0; + uint8_t l_SensorType = 0; - errlHndl_t e = - SENSOR::SensorBase::getSensorType( - l_sensorNumbers.at(i).first, - l_SensorType,unused); + errlHndl_t e = + SENSOR::SensorBase::getSensorType( + l_sensorNumbers.at(i).first, + l_SensorType,unused); - if( e ) - { - TRACFCOMP(g_trac_errl, - ERR_MRK"Failed to get sensor type for sensor %d", - l_sensorNumbers.at(i).first); - - l_SensorType = 0; - // since we are in the commit path, lets just delete this - // error and move on. - delete e; - } + if( e ) + { + TRACFCOMP(g_trac_errl, + ERR_MRK"Failed to get sensor type for sensor %d", + l_sensorNumbers.at(i).first); - // if no offset has been configured set it based on the severity - if( l_eventOffset == IPMISEL::event_data1_invalid_offset ) - { - getSensorOffsetBasedOnSeverity(io_err, l_eventDirType, - l_eventOffset ); - } + l_SensorType = 0; + // since we are in the commit path, lets just delete this + // error and move on. + delete e; + } - // if we are sending the first sel then we will include the - // pel data, otherwise we send no data - uint32_t selSize = ( i == 0 ) ? l_pelSize:0; + // if no offset has been configured set it based on the severity + if( l_eventOffset == IPMISEL::event_data1_invalid_offset ) + { + getSensorOffsetBasedOnSeverity(io_err, l_eventDirType, + l_eventOffset ); + } + // if we are sending the first sel then we will include the + // pel data, otherwise we send no data + uint32_t selSize = ( i == 0 ) ? l_pelSize:0; + + TRACFCOMP(g_trac_errl, INFO_MRK + "sendErrLogToBmc: sensor %.2x/%.2x event %x/%x, size %d", + l_SensorType, l_sensorNumbers.at(i).first, + l_eventDirType, l_eventOffset, selSize ); + + IPMISEL::sendESEL(l_pelData, selSize, + io_err->eid(), + l_eventDirType, l_eventOffset, + l_SensorType, + l_sensorNumbers.at(i).first); + } // for l_sensorNumbers + } + else + { + // don't send sensor SELs TRACFCOMP(g_trac_errl, INFO_MRK - "sendErrLogToBmc: sensor %.2x/%.2x event %x/%x, size %d", - l_SensorType, l_sensorNumbers.at(i).first, - l_eventDirType, l_eventOffset, selSize ); + "sendErrLogToBmc: no sensor SELs, size %d", + l_pelSize ); - IPMISEL::sendESEL(l_pelData, selSize, + uint8_t l_eventDirType = IPMISEL::sensor_specific; + uint8_t l_eventOffset = IPMISEL::event_data1_invalid_offset; + IPMISEL::sendESEL(l_pelData, l_pelSize, io_err->eid(), l_eventDirType, l_eventOffset, - l_SensorType, - l_sensorNumbers.at(i).first); + SENSOR::INVALID_TYPE, TARGETING::UTIL::INVALID_IPMI_SENSOR); } // free the buffer @@ -623,7 +646,6 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) void getSensorInfo(HWAS::callout_ud_t *i_ud, uint8_t &o_sensorNumber, uint8_t &o_eventOffset, - HWAS::callOutPriority &io_priority, errlHndl_t &io_err ) { diff --git a/src/usr/errl/runtime/rt_errlmanager.C b/src/usr/errl/runtime/rt_errlmanager.C index f75844172..49d53b65f 100644 --- a/src/usr/errl/runtime/rt_errlmanager.C +++ b/src/usr/errl/runtime/rt_errlmanager.C @@ -304,10 +304,4 @@ bool rt_processCallout(errlHndl_t &io_errl, return true; } -void ErrlManager::errlAckErrorlog(uint32_t i_eid) -{ - ERRORLOG::theErrlManager::instance().ackErrLogInPnor(i_eid); - return; -} - } // End namespace |

