diff options
-rw-r--r-- | src/include/usr/errl/errlmanager.H | 21 | ||||
-rw-r--r-- | src/usr/errl/errlmanager.C | 50 | ||||
-rw-r--r-- | src/usr/errl/errlmanager_common.C | 250 | ||||
-rw-r--r-- | src/usr/errl/runtime/rt_errlmanager.C | 6 | ||||
-rw-r--r-- | src/usr/ipmi/ipmisel.C | 83 |
5 files changed, 213 insertions, 197 deletions
diff --git a/src/include/usr/errl/errlmanager.H b/src/include/usr/errl/errlmanager.H index 586b42221..be3445007 100644 --- a/src/include/usr/errl/errlmanager.H +++ b/src/include/usr/errl/errlmanager.H @@ -482,9 +482,11 @@ private: * @brief Create an ipmi message with the error log and send it to BMC * * @param[in,out] io_err Error log handle to be committed - * + * @param[in] i_sendSels true if sensor SELs should be sent to BMC, + * false if not (ie, on read from PNOR, do not + * (re)send the SEL to the BMC */ - void sendErrLogToBmc(errlHndl_t &io_err); + void sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels = true); #endif /** @@ -504,16 +506,17 @@ private: // Note: When adding a new flag, add to the trace in errlogShutdown() enum ERRLOG_FLAGS { - PNOR_FLAG = 0x01, - MBOX_FLAG = 0x02, - ERRLDISP_FLAG = 0x04, - IPMI_FLAG = 0x08, - ALL_FLAGS = PNOR_FLAG | MBOX_FLAG + PNOR_FLAG = 0x01, + MBOX_FLAG = 0x02, + ERRLDISP_FLAG = 0x04, + IPMI_FLAG = 0x08, + IPMI_NOSEL_FLAG = 0x10, + ALL_FLAGS = PNOR_FLAG | MBOX_FLAG #ifdef CONFIG_BMC_IPMI - | IPMI_FLAG + | IPMI_FLAG #endif #ifdef CONFIG_CONSOLE_OUTPUT_ERRORDISPLAY - | ERRLDISP_FLAG + | ERRLDISP_FLAG #endif }; diff --git a/src/usr/errl/errlmanager.C b/src/usr/errl/errlmanager.C index dea951ce4..dbca48014 100644 --- a/src/usr/errl/errlmanager.C +++ b/src/usr/errl/errlmanager.C @@ -386,6 +386,13 @@ void ErrlManager::errlogMsgHndlr () // Mark IPMI processing complete _clearFlag(*it, IPMI_FLAG); } + else if (_isFlagSet(*it, IPMI_NOSEL_FLAG)) + { + // send errorlog + sendErrLogToBmc(it->first, false); + // Mark IPMI processing complete + _clearFlag(*it, IPMI_NOSEL_FLAG); + } _updateErrlListIter(it); } #endif @@ -759,8 +766,6 @@ void ErrlManager::setHwasProcessCalloutFn(HWAS::processCalloutFn i_fn) ERRORLOG::theErrlManager::instance().iv_hwasProcessCalloutFn = i_fn; } -/////////////////////////////////////////////////////////////////////////////// -// Global function (not a method on an object) to commit the error log. void ErrlManager::errlResourceReady(errlManagerNeeds i_needs) { ERRORLOG::theErrlManager::instance().sendResourcesMsg(i_needs); @@ -810,39 +815,6 @@ void ErrlManager::sendResourcesMsg(errlManagerNeeds i_needs) return; } -/////////////////////////////////////////////////////////////////////////////// -// Global function (not a method on an object) to ack that the error log -// was sent to the BMC. -void ErrlManager::errlAckErrorlog(uint32_t i_eid) -{ - ERRORLOG::theErrlManager::instance().sendAckErrorlog(i_eid); - return; -} - -void ErrlManager::sendAckErrorlog(uint32_t i_eid) -{ - TRACFCOMP( g_trac_errl, ENTER_MRK"ErrlManager::sendAddErrorlog 0x%.8X", - i_eid); - - //Create a message to send to Host boot error message queue. - msg_t *msg = msg_allocate(); - msg->type = ERRLOG_COMMITTED_ACK_RESPONSE_TYPE; - //Pass along the eid of the error, shifted up to the first word - msg->data[0] = static_cast<uint64_t>(i_eid) << 32; - - //Send the msg asynchronously to error message queue to handle. - int rc = msg_send ( ERRORLOG::ErrlManager::iv_msgQ, msg ); - - //Return code is non-zero when the message queue is invalid - //or the message type is invalid. - if ( rc ) - { - TRACFCOMP( g_trac_errl, ERR_MRK "Failed (rc=%d) to send ack 0x%.8X message.", - rc, i_eid); - } - return; -} - bool ErrlManager::errlCommittedThisBoot() { @@ -918,10 +890,6 @@ void ErrlManager::errlogShutdown() iv_errlList.pop_front(); } // while items on iv_errlList list - // Ensure that all the error logs are pushed out to PNOR - // prior to the PNOR resource provider shutting down. - PNOR::flush(PNOR::HB_ERRLOGS); - // Un-register error log message queue from the shutdown INITSERVICE::unregisterShutdownEvent( iv_msgQ); @@ -939,6 +907,10 @@ void ErrlManager::errlogShutdown() // shutting down. // msg_q_destroy(iv_msgQ); + // Ensure that all the error logs are pushed out to PNOR + // prior to the PNOR resource provider shutting down. + PNOR::flush(PNOR::HB_ERRLOGS); + return; } diff --git a/src/usr/errl/errlmanager_common.C b/src/usr/errl/errlmanager_common.C index 17422cb12..51ed8fa77 100644 --- a/src/usr/errl/errlmanager_common.C +++ b/src/usr/errl/errlmanager_common.C @@ -42,13 +42,6 @@ const uint32_t EMPTY_ERRLOG_IN_PNOR = 0xFFFFFFFF; const uint32_t FIRST_BYTE_ERRLOG = 0xF0000000; /////////////////////////////////////////////////////////////////////////////// -// Atomically increment log id and return it. -uint32_t ErrlManager::getUniqueErrId() -{ - return (__sync_add_and_fetch(&iv_currLogId, 1)); -} - -/////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Global function (not a method on an object) to commit the error log. void errlCommit(errlHndl_t& io_err, compId_t i_committerComp ) @@ -57,13 +50,30 @@ void errlCommit(errlHndl_t& io_err, compId_t i_committerComp ) return; } +/////////////////////////////////////////////////////////////////////////////// +// Global function (not a method on an object) to ack that the error log +// was sent to the BMC. +void ErrlManager::errlAckErrorlog(uint32_t i_eid) +{ + ERRORLOG::theErrlManager::instance().ackErrLogInPnor(i_eid); + return; +} + +/////////////////////////////////////////////////////////////////////////////// // Global function (not a method on an object) to get the hidden logs flag. uint8_t getHiddenLogsEnable( ) { return ERRORLOG::theErrlManager::instance().iv_hiddenErrLogsEnable; } +/////////////////////////////////////////////////////////////////////////////// +// Atomically increment log id and return it. +uint32_t ErrlManager::getUniqueErrId() +{ + return (__sync_add_and_fetch(&iv_currLogId, 1)); +} + // ------------------------------------------------------------------ // setupPnorInfo // ------------------------------------------------------------------ @@ -156,7 +166,8 @@ void ErrlManager::setupPnorInfo() if (iv_isIpmiEnabled) { // convert to SEL/eSEL and send to BMC over IPMI - sendErrLogToBmc(err); + sendErrLogToBmc(err, + false /* do not resend SELs */); delete err; } else @@ -164,9 +175,9 @@ void ErrlManager::setupPnorInfo() TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo pushing slot %d eid %.8X to iv_errList.", i, l_id); - // Pair with IPMI flag to add to the errlList + // Pair with IPMI_NOSEL flag to add to the errlList // so that it'll get sent down when IPMI is up - ErrlFlagPair_t l_pair(err, IPMI_FLAG + ErrlFlagPair_t l_pair(err, IPMI_NOSEL_FLAG #ifdef CONFIG_CONSOLE_OUTPUT_ERRORDISPLAY | ERRLDISP_FLAG #endif @@ -436,16 +447,16 @@ void getSensorOffsetBasedOnSeverity(errlHndl_t & io_err, void getSensorInfo(HWAS::callout_ud_t *i_ud, uint8_t &o_sensorNumber, uint8_t &o_eventOffset, - HWAS::callOutPriority &io_priority, errlHndl_t& io_error ); /////////////////////////////////////////////////////////////////////////////// // ErrlManager::sendErrLogToBmc() /////////////////////////////////////////////////////////////////////////////// -void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) +void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err, bool i_sendSels) { TRACFCOMP(g_trac_errl, ENTER_MRK - "sendErrLogToBmc errlogId 0x%.8x", io_err->eid()); + "sendErrLogToBmc errlogId 0x%.8x, i_sendSels %d", + io_err->eid(), i_sendSels); do { @@ -458,70 +469,66 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) break; } - // look thru the errlog for any Callout UserDetail sections - // to determine the sensor information for the SEL - // create a vector of sensor numbers and offsets std::vector<std::pair<uint8_t, uint8_t> > l_sensorNumbers; HWAS::callOutPriority l_priority = HWAS::SRCI_PRIORITY_NONE; - - for(std::vector<ErrlUD*>::const_iterator - it = io_err->iv_SectionVector.begin(); - it != io_err->iv_SectionVector.end(); - it++ ) + if (i_sendSels) { - uint8_t l_sensorNumber = TARGETING::UTIL::INVALID_IPMI_SENSOR; - uint8_t l_eventOffset = IPMISEL::event_data1_invalid_offset; - - HWAS::callout_ud_t *l_ud = - reinterpret_cast<HWAS::callout_ud_t*>((*it)->iv_pData); - - // if this is a CALLOUT that will have a target - if ((ERRL_COMP_ID == (*it)->iv_header.iv_compId) && - (1 == (*it)->iv_header.iv_ver) && - (ERRL_UDT_CALLOUT == (*it)->iv_header.iv_sst) ) + // look thru the errlog for any Callout UserDetail sections + // to determine the sensor information for the SEL + // create a vector of sensor numbers and offsets + for(std::vector<ErrlUD*>::const_iterator + it = io_err->iv_SectionVector.begin(); + it != io_err->iv_SectionVector.end(); + it++ ) { - // if this callout is higher than any previous callout - if (l_ud->priority > l_priority) + // if this is a CALLOUT + if ((ERRL_COMP_ID == (*it)->iv_header.iv_compId) && + (1 == (*it)->iv_header.iv_ver) && + (ERRL_UDT_CALLOUT == (*it)->iv_header.iv_sst) ) { - TRACFCOMP(g_trac_errl, - "sendErrLogToBmc new priority picked 0x%x > 0x%x", - l_ud->priority, l_priority ); - - // get sensor number for the target. - // we found a higher priority callout, get the sensor - // information for it - getSensorInfo( l_ud, l_sensorNumber, l_eventOffset, - l_priority, io_err); - - TRACFCOMP(g_trac_errl, - "l_sensorNumber = 0x%x, l_eventOffset = 0x%x", - l_sensorNumber, l_eventOffset ); + HWAS::callout_ud_t *l_ud = + reinterpret_cast<HWAS::callout_ud_t*>((*it)->iv_pData); + // if this callout isn't higher than any previous callout + if (l_ud->priority < l_priority) + { + continue; // on to the next + } - //remove previous sensor data - l_sensorNumbers.clear(); + // if greater, than clear out the previous list + if (l_ud->priority > l_priority) + { + TRACFCOMP(g_trac_errl, + "sendErrLogToBmc new priority picked 0x%x > 0x%x", + l_ud->priority, l_priority ); - l_sensorNumbers.push_back(std::make_pair(l_sensorNumber, - l_eventOffset)); + //remove previous sensor data + l_sensorNumbers.clear(); // and update the priority - l_priority = l_ud->priority; + l_priority = l_ud->priority; + } + + // greater than or equal - save this sensor - } - // or if it has the same priority - else if(l_ud->priority == l_priority) - { //get the sensor number for the target + uint8_t l_sensorNumber = + TARGETING::UTIL::INVALID_IPMI_SENSOR; + uint8_t l_eventOffset = + IPMISEL::event_data1_invalid_offset; - getSensorInfo( l_ud, l_sensorNumber, - l_eventOffset, l_priority, io_err); + getSensorInfo( l_ud, l_sensorNumber, l_eventOffset, + io_err); - l_sensorNumbers.push_back(std::make_pair(l_sensorNumber, - l_eventOffset)); - } - } - } // for each SectionVector + TRACFCOMP(g_trac_errl, + "l_sensorNumber = 0x%x, l_eventOffset = 0x%x", + l_sensorNumber, l_eventOffset ); + l_sensorNumbers.push_back( + std::make_pair(l_sensorNumber, l_eventOffset)); + } // if callout + } // for each SectionVector + } // if i_sendSels // flatten into buffer, truncate to max eSEL size uint32_t l_pelSize = io_err->flattenedSize(); @@ -537,7 +544,7 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) uint32_t l_errSize = io_err->flatten (l_pelData, l_pelSize, true /* truncate */); - if (l_errSize ==0 ) + if (l_errSize == 0 ) { // flatten didn't work TRACFCOMP( g_trac_errl, ERR_MRK @@ -546,71 +553,87 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) break; } - for(size_t i = 0; i < l_sensorNumbers.size(); i++) + if (i_sendSels) { + for(size_t i = 0; i < l_sensorNumbers.size(); i++) + { + uint8_t l_eventDirType = IPMISEL::sensor_specific; - uint8_t l_eventDirType = IPMISEL::sensor_specific; - - // if the offset is unknown after this then it will - // be updated based on elog severity below - uint8_t l_eventOffset = l_sensorNumbers.at(i).second ; + // if the offset is unknown after this then it will + // be updated based on elog severity below + uint8_t l_eventOffset = l_sensorNumbers.at(i).second ; - // last ditch effort, if no sensor number is present at this - // point, just use the system event sensor - if( l_sensorNumbers.at(i).first == - TARGETING::UTIL::INVALID_IPMI_SENSOR ) - { - l_sensorNumbers.at(i).first = - TARGETING::UTIL::getSensorNumber(NULL, - TARGETING::SENSOR_NAME_SYSTEM_EVENT); + // last ditch effort, if no sensor number is present at this + // point, just use the system event sensor + if( l_sensorNumbers.at(i).first == + TARGETING::UTIL::INVALID_IPMI_SENSOR ) + { + l_sensorNumbers.at(i).first = + TARGETING::UTIL::getSensorNumber(NULL, + TARGETING::SENSOR_NAME_SYSTEM_EVENT); - l_sensorNumbers.at(i).second = - SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE; + l_sensorNumbers.at(i).second = + SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE; - } + } - // grab the sensor type so the bmc knows how to use the offset - uint8_t unused = 0; - uint8_t l_SensorType = 0; + // grab the sensor type so the bmc knows how to use the offset + uint8_t unused = 0; + uint8_t l_SensorType = 0; - errlHndl_t e = - SENSOR::SensorBase::getSensorType( - l_sensorNumbers.at(i).first, - l_SensorType,unused); + errlHndl_t e = + SENSOR::SensorBase::getSensorType( + l_sensorNumbers.at(i).first, + l_SensorType,unused); - if( e ) - { - TRACFCOMP(g_trac_errl, - ERR_MRK"Failed to get sensor type for sensor %d", - l_sensorNumbers.at(i).first); - - l_SensorType = 0; - // since we are in the commit path, lets just delete this - // error and move on. - delete e; - } + if( e ) + { + TRACFCOMP(g_trac_errl, + ERR_MRK"Failed to get sensor type for sensor %d", + l_sensorNumbers.at(i).first); - // if no offset has been configured set it based on the severity - if( l_eventOffset == IPMISEL::event_data1_invalid_offset ) - { - getSensorOffsetBasedOnSeverity(io_err, l_eventDirType, - l_eventOffset ); - } + l_SensorType = 0; + // since we are in the commit path, lets just delete this + // error and move on. + delete e; + } - // if we are sending the first sel then we will include the - // pel data, otherwise we send no data - uint32_t selSize = ( i == 0 ) ? l_pelSize:0; + // if no offset has been configured set it based on the severity + if( l_eventOffset == IPMISEL::event_data1_invalid_offset ) + { + getSensorOffsetBasedOnSeverity(io_err, l_eventDirType, + l_eventOffset ); + } + // if we are sending the first sel then we will include the + // pel data, otherwise we send no data + uint32_t selSize = ( i == 0 ) ? l_pelSize:0; + + TRACFCOMP(g_trac_errl, INFO_MRK + "sendErrLogToBmc: sensor %.2x/%.2x event %x/%x, size %d", + l_SensorType, l_sensorNumbers.at(i).first, + l_eventDirType, l_eventOffset, selSize ); + + IPMISEL::sendESEL(l_pelData, selSize, + io_err->eid(), + l_eventDirType, l_eventOffset, + l_SensorType, + l_sensorNumbers.at(i).first); + } // for l_sensorNumbers + } + else + { + // don't send sensor SELs TRACFCOMP(g_trac_errl, INFO_MRK - "sendErrLogToBmc: sensor %.2x/%.2x event %x/%x, size %d", - l_SensorType, l_sensorNumbers.at(i).first, - l_eventDirType, l_eventOffset, selSize ); + "sendErrLogToBmc: no sensor SELs, size %d", + l_pelSize ); - IPMISEL::sendESEL(l_pelData, selSize, + uint8_t l_eventDirType = IPMISEL::sensor_specific; + uint8_t l_eventOffset = IPMISEL::event_data1_invalid_offset; + IPMISEL::sendESEL(l_pelData, l_pelSize, io_err->eid(), l_eventDirType, l_eventOffset, - l_SensorType, - l_sensorNumbers.at(i).first); + SENSOR::INVALID_TYPE, TARGETING::UTIL::INVALID_IPMI_SENSOR); } // free the buffer @@ -623,7 +646,6 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) void getSensorInfo(HWAS::callout_ud_t *i_ud, uint8_t &o_sensorNumber, uint8_t &o_eventOffset, - HWAS::callOutPriority &io_priority, errlHndl_t &io_err ) { diff --git a/src/usr/errl/runtime/rt_errlmanager.C b/src/usr/errl/runtime/rt_errlmanager.C index f75844172..49d53b65f 100644 --- a/src/usr/errl/runtime/rt_errlmanager.C +++ b/src/usr/errl/runtime/rt_errlmanager.C @@ -304,10 +304,4 @@ bool rt_processCallout(errlHndl_t &io_errl, return true; } -void ErrlManager::errlAckErrorlog(uint32_t i_eid) -{ - ERRORLOG::theErrlManager::instance().ackErrLogInPnor(i_eid); - return; -} - } // End namespace diff --git a/src/usr/ipmi/ipmisel.C b/src/usr/ipmi/ipmisel.C index 54512e606..26ef43771 100644 --- a/src/usr/ipmi/ipmisel.C +++ b/src/usr/ipmi/ipmisel.C @@ -32,6 +32,7 @@ #include <ipmi/ipmisel.H> #include "ipmiconfig.H" #include <ipmi/ipmi_reasoncodes.H> +#include <ipmi/ipmisensor.H> #include <sys/task.h> #include <initservice/taskargs.H> @@ -190,8 +191,8 @@ void process_esel(msg_t *i_msg) else if((l_cc == IPMI::CC_OK) && // no error (l_eid != 0)) // and it's an errorlog { - // eSEL successfully sent to the BMC - 'send' an ack to the errlmanager - IPMI_TRAC(INFO_MRK "Sending ack for eid 0x%.8X", l_eid); + // eSEL successfully sent to the BMC - have errlmanager do the ack + IPMI_TRAC(INFO_MRK "Doing ack for eid 0x%.8X", l_eid); ERRORLOG::ErrlManager::errlAckErrorlog(l_eid); } @@ -216,6 +217,14 @@ void send_esel(eselInitData * i_data, do{ const size_t l_eSELlen = i_data->dataSize; + + if (l_eSELlen == 0) + { + IPMI_TRAC(INFO_MRK "no eSEL data present, skipping to SEL"); + // sending sensor SELs only, not the eSEL + break; + } + uint8_t reserveID[2] = {0,0}; // we need to send down the extended sel data (eSEL), which is // longer than the protocol buffer, so we need to do a reservation and @@ -339,41 +348,57 @@ void send_esel(eselInitData * i_data, }while(0); // if eSEL wasn't created due to an error, we don't want to continue - if(o_err == NULL) + if (o_err == NULL) { - // if the eSEL wasn't created due to a bad completion code, we will - // still try to send down a SEL that we create, which will contain - // the eSEL recordID (if it was successful) - delete [] data; - len = sizeof(IPMISEL::selRecord); - data = new uint8_t[len]; - - // copy in the SEL event record data - memcpy(data, i_data->eSel, sizeof(IPMISEL::selRecord)); - // copy the eSEL recordID (if it was created) into the extra data area - data[offsetof(selRecord,event_data2)] = esel_recordID[1]; - data[offsetof(selRecord,event_data3)] = esel_recordID[0]; - - // use local cc so that we don't corrupt the esel from above - IPMI::completion_code l_cc = IPMI::CC_UNKBAD; - TRACFBIN( g_trac_ipmi, INFO_MRK"add_sel:", data, len); - o_err = IPMI::sendrecv(IPMI::add_sel(),l_cc,len,data); - if(o_err) + // caller wants us to NOT create sensor SEL + if ((i_data->eSel[offsetof(selRecord,sensor_type)] == SENSOR::INVALID_TYPE) && + (i_data->eSel[offsetof(selRecord,sensor_number)] == TARGETING::UTIL::INVALID_IPMI_SENSOR) + ) { - IPMI_TRAC(ERR_MRK "error from add_sel"); - } - else if (l_cc != IPMI::CC_OK) - { - IPMI_TRAC(ERR_MRK "failed add_sel, l_cc %02x", l_cc); + IPMI_TRAC(INFO_MRK "Invalid sensor type/number - NOT sending sensor SELs"); } else { - // if CC_OK, then len = 2 and data contains the recordID of the new SEL - storeReserveRecord(sel_recordID,data); + // if the eSEL wasn't created due to a bad completion code, we will + // still try to send down a SEL that we create, which will contain + // the eSEL recordID (if it was successful) + if (data) + { + delete [] data; + } + len = sizeof(IPMISEL::selRecord); + data = new uint8_t[len]; + + // copy in the SEL event record data + memcpy(data, i_data->eSel, sizeof(IPMISEL::selRecord)); + // copy the eSEL recordID (if it was created) into the extra data area + data[offsetof(selRecord,event_data2)] = esel_recordID[1]; + data[offsetof(selRecord,event_data3)] = esel_recordID[0]; + + // use local cc so that we don't corrupt the esel from above + IPMI::completion_code l_cc = IPMI::CC_UNKBAD; + TRACFBIN( g_trac_ipmi, INFO_MRK"add_sel:", data, len); + o_err = IPMI::sendrecv(IPMI::add_sel(),l_cc,len,data); + if(o_err) + { + IPMI_TRAC(ERR_MRK "error from add_sel"); + } + else if (l_cc != IPMI::CC_OK) + { + IPMI_TRAC(ERR_MRK "failed add_sel, l_cc %02x", l_cc); + } + else + { + // if CC_OK, then len=2 and data contains the recordID of the new SEL + storeReserveRecord(sel_recordID,data); + } } } - delete[] data; + if (data) + { + delete [] data; + } IPMI_TRAC(EXIT_MRK "send_esel o_err=%.8X, o_cc=x%.2x, sel recID=x%x%x, esel recID=x%x%x", |