From a85a8f8c44e76c0edfeca53835cebbc7d2fdd383 Mon Sep 17 00:00:00 2001 From: "Richard J. Knight" Date: Mon, 27 Apr 2015 12:10:21 -0500 Subject: Return valid fault sensors for all types of callouts - Update code to return sensor numbers for targets which represent replaceable parts. - Use System Event sensor for procedure callouts. - Update code to handle all hw callout types. Change-Id: I626bce5c8c0c8b7d0a44408280de178c7a86a83f CQ:SW306556 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/18097 Reviewed-by: WILLIAM G. HOFFA Reviewed-by: A. Patrick Williams III Tested-by: FSP CI Jenkins Reviewed-by: Daniel M. Crowell Tested-by: Jenkins Server Tested-by: Jenkins OP Build CI Tested-by: Jenkins OP HW --- src/include/usr/ipmi/ipmisel.H | 8 +- src/include/usr/ipmi/ipmisensor.H | 19 +++ src/usr/errl/errlmanager_common.C | 317 ++++++++++++++++++++++++++------------ src/usr/ipmi/ipmisel.C | 2 +- src/usr/ipmi/ipmisensor.C | 51 ++++-- 5 files changed, 280 insertions(+), 117 deletions(-) diff --git a/src/include/usr/ipmi/ipmisel.H b/src/include/usr/ipmi/ipmisel.H index af808515d..13f44a84b 100644 --- a/src/include/usr/ipmi/ipmisel.H +++ b/src/include/usr/ipmi/ipmisel.H @@ -87,7 +87,9 @@ namespace IPMISEL format_ipmi_version_2_0 = 0x04, }; - // event_type, per section 42.1 of the IPMI spec + // sel_event_dir_type type, per section 42.1 of the IPMI spec + // bit = 0 -> 0 is an assertion event + // bits 1:7 describe the sensor type enum sel_event_dir_type { event_unspecified = 0x00, @@ -95,8 +97,9 @@ namespace IPMISEL event_state = 0x03, event_predictive = 0x04, event_limit = 0x05, - event_permformance = 0x06, + event_performance = 0x06, event_transition = 0x07, + sensor_specific = 0x6f, event_OEM = 0x70, }; @@ -114,6 +117,7 @@ namespace IPMISEL event_data1_trans_to_non_recoverable = 0x06, event_data1_trans_monitor = 0x07, event_data1_trans_informational = 0x08, + event_data1_invalid_offset = 0xFF, }; enum sel_generator_id diff --git a/src/include/usr/ipmi/ipmisensor.H b/src/include/usr/ipmi/ipmisensor.H index 86b9051ba..ba7d7bfd1 100644 --- a/src/include/usr/ipmi/ipmisensor.H +++ b/src/include/usr/ipmi/ipmisensor.H @@ -116,6 +116,19 @@ namespace SENSOR SYSTEM_FIRMWARE_PROGRESS = 0x02, }; + + /** + * @enum systemEventSensorOffsets + * offSets specific to the system event sensor. + * + */ + enum systemEventSensorOffsets + { + // offset 02h + UNDETERMINED_SYSTEM_HW_FAILURE = 0x02, + }; + + /** * @enum discrete09_Offsets * @@ -924,6 +937,12 @@ namespace SENSOR uint16_t getSensorOffsets(TARGETING::SENSOR_NAME i_name, sensorReadingType &o_readType ); + /** + * Helper function to return the backplane fault sensor + * + * @return sensor number + */ + uint8_t getBackPlaneFaultSensor(); }; // end namespace diff --git a/src/usr/errl/errlmanager_common.C b/src/usr/errl/errlmanager_common.C index 3b3f3f03b..a87323813 100644 --- a/src/usr/errl/errlmanager_common.C +++ b/src/usr/errl/errlmanager_common.C @@ -35,6 +35,18 @@ namespace ERRORLOG extern trace_desc_t* g_trac_errl; #ifdef CONFIG_BMC_IPMI + +void getSensorOffsetBasedOnSeverity(errlHndl_t & io_err, + uint8_t &o_eventDirType, + uint8_t & o_offset ); + +// helper function to gather sensor information +void getSensorInfo(HWAS::callout_ud_t *i_ud, + uint8_t &o_sensorNumber, + uint8_t &o_eventOffset, + HWAS::callOutPriority &io_priority, + errlHndl_t& io_error ); + void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) { TRACFCOMP(g_trac_errl, ENTER_MRK @@ -52,86 +64,69 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) } // look thru the errlog for any Callout UserDetail sections - // to determine the sensor information for the SEL - std::vector l_sensorNumbers; - std::vector l_sensorTypes; + // to determine the sensor information for the SEL + // create a vector of sensor numbers and offsets + std::vector > l_sensorNumbers; HWAS::callOutPriority l_priority = HWAS::SRCI_PRIORITY_NONE; + for(std::vector::const_iterator it = io_err->iv_SectionVector.begin(); it != io_err->iv_SectionVector.end(); it++ ) { + uint8_t l_sensorNumber = TARGETING::UTIL::INVALID_IPMI_SENSOR; + uint8_t l_eventOffset = IPMISEL::event_data1_invalid_offset; + HWAS::callout_ud_t *l_ud = reinterpret_cast((*it)->iv_pData); // if this is a CALLOUT that will have a target if ((ERRL_COMP_ID == (*it)->iv_header.iv_compId) && (1 == (*it)->iv_header.iv_ver) && - (ERRL_UDT_CALLOUT == (*it)->iv_header.iv_sst) && - (HWAS::HW_CALLOUT == l_ud->type) - ) + (ERRL_UDT_CALLOUT == (*it)->iv_header.iv_sst) ) { // if this callout is higher than any previous callout if (l_ud->priority > l_priority) { - // get the sensor number for the target - uint8_t * l_uData = (uint8_t *)(l_ud + 1); - TARGETING::Target *l_target = NULL; - bool l_err = HWAS::retrieveTarget(l_uData, - l_target, io_err); - if (!l_err) - { - //remove previous sensor data - l_sensorNumbers.clear(); - - // got a target, now get the sensor number - l_sensorNumbers.push_back( - SENSOR::getFaultSensorNumber(l_target)); + TRACFCOMP(g_trac_errl, + "sendErrLogToBmc new priority picked 0x%x > 0x%x", + l_ud->priority, l_priority ); + + // get sensor number for the target. + // we found a higher priority callout, get the sensor + // information for it + getSensorInfo( l_ud, l_sensorNumber, l_eventOffset, + l_priority, io_err); + + TRACFCOMP(g_trac_errl, + "l_sensorNumber = 0x%x, l_eventOffset = 0x%x", + l_sensorNumber, l_eventOffset ); + + + //remove previous sensor data + l_sensorNumbers.clear(); + + l_sensorNumbers.push_back(std::make_pair(l_sensorNumber, + l_eventOffset)); // and update the priority - l_priority = l_ud->priority; - } - // if this callout is equal to any previous callout - }else if(l_ud->priority == l_priority) + l_priority = l_ud->priority; + + } + // or if it has the same priority + else if(l_ud->priority == l_priority) { //get the sensor number for the target - uint8_t * l_uData = (uint8_t *)(l_ud + 1); - TARGETING::Target *l_target = NULL; - bool l_err = HWAS::retrieveTarget(l_uData, - l_target, io_err); - if(!l_err) - { - //add sensor data to array - l_sensorNumbers.push_back( - SENSOR::getFaultSensorNumber(l_target)); - } + + getSensorInfo( l_ud, l_sensorNumber, + l_eventOffset, l_priority, io_err); + + l_sensorNumbers.push_back(std::make_pair(l_sensorNumber, + l_eventOffset)); } } } // for each SectionVector - std::vector::const_iterator l_sensorIter; - for(l_sensorIter = l_sensorNumbers.begin(); - l_sensorIter != l_sensorNumbers.end(); - l_sensorIter++) - { - uint8_t unused = 0; - uint8_t l_getSensorType; - errlHndl_t e = - SENSOR::SensorBase::getSensorType( - *l_sensorIter, - l_getSensorType,unused); - l_sensorTypes.push_back(l_getSensorType); - if( e ) - { - TRACFCOMP(g_trac_errl, - ERR_MRK"Failed to get sensor type for sensor %d", - *l_sensorIter); - // since we are in the commit path, lets just delete this - // error and move on. - delete e; - } - - } // flatten into buffer, truncate to max eSEL size uint32_t l_pelSize = io_err->flattenedSize(); @@ -147,7 +142,7 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) uint32_t l_errSize = io_err->flatten (l_pelData, l_pelSize, true /* truncate */); - if (l_errSize ==0) + if (l_errSize ==0 ) { // flatten didn't work TRACFCOMP( g_trac_errl, ERR_MRK @@ -156,52 +151,71 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) break; } - uint8_t l_eventDirType = IPMISEL::event_transition; - uint8_t l_eventOffset = IPMISEL::event_data1_trans_to_non_recoverable; - switch (io_err->sev()) + for(size_t i = 0; i < l_sensorNumbers.size(); i++) { - case ERRORLOG::ERRL_SEV_INFORMATIONAL: - l_eventDirType = IPMISEL::event_transition; - l_eventOffset = IPMISEL::event_data1_trans_informational; - break; - case ERRL_SEV_RECOVERED: - l_eventDirType = IPMISEL::event_transition; - l_eventOffset = IPMISEL::event_data1_trans_to_ok; - break; - case ERRL_SEV_PREDICTIVE: - l_eventDirType = IPMISEL::event_predictive; - l_eventOffset = IPMISEL::event_data1_trans_to_noncrit_from_ok; - break; - case ERRL_SEV_UNRECOVERABLE: - l_eventDirType = IPMISEL::event_transition; - l_eventOffset = IPMISEL::event_data1_trans_to_non_recoverable; - break; - case ERRL_SEV_CRITICAL_SYS_TERM: - l_eventDirType = IPMISEL::event_transition; - l_eventOffset = IPMISEL::event_data1_trans_to_crit_from_non_r; - break; - case ERRL_SEV_UNKNOWN: - l_eventDirType = IPMISEL::event_state; - l_eventOffset = IPMISEL::event_data1_asserted; - break; - } - // send it to the BMC over IPMI - for(uint8_t l_sendIdx=0;l_sendIdx < l_sensorNumbers.size();l_sendIdx++) - { - TRACFCOMP(g_trac_errl, INFO_MRK - "sendErrLogToBmc: creating ESEL for sensor #%d",l_sendIdx); + uint8_t l_eventDirType = IPMISEL::sensor_specific; + + // if the offset is unknown after this then it will + // be updated based on elog severity below + uint8_t l_eventOffset = l_sensorNumbers.at(i).second ; + + // last ditch effort, if no sensor number is present at this + // point, just use the system event sensor + if( l_sensorNumbers.at(i).first == + TARGETING::UTIL::INVALID_IPMI_SENSOR ) + { + l_sensorNumbers.at(i).first = + TARGETING::UTIL::getSensorNumber(NULL, + TARGETING::SENSOR_NAME_SYSTEM_EVENT); + + l_sensorNumbers.at(i).second = + SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE; + + } + + // grab the sensor type so the bmc knows how to use the offset + uint8_t unused = 0; + uint8_t l_SensorType = 0; + + errlHndl_t e = + SENSOR::SensorBase::getSensorType( + l_sensorNumbers.at(i).first, + l_SensorType,unused); + + if( e ) + { + TRACFCOMP(g_trac_errl, + ERR_MRK"Failed to get sensor type for sensor %d", + l_sensorNumbers.at(i).first); + + l_SensorType = 0; + // since we are in the commit path, lets just delete this + // error and move on. + delete e; + } + + // if no offset has been configured set it based on the severity + if( l_eventOffset == IPMISEL::event_data1_invalid_offset ) + { + getSensorOffsetBasedOnSeverity(io_err, l_eventDirType, + l_eventOffset ); + } + + // if we are sending the first sel then we will include the + // pel data, otherwise we send no data + uint32_t selSize = ( i == 0 ) ? l_pelSize:0; + TRACFCOMP(g_trac_errl, INFO_MRK "sendErrLogToBmc: sensor %.2x/%.2x event %x/%x, size %d", - l_sensorTypes.at(l_sendIdx), l_sensorNumbers.at(l_sendIdx), - l_eventDirType, l_eventOffset, - ((l_sendIdx==0)?l_pelSize:0)); - - IPMISEL::sendESEL(l_pelData, ((l_sendIdx==0) ? l_pelSize:0), - io_err->eid(), - l_eventDirType, l_eventOffset, - l_sensorTypes.at(l_sendIdx), - l_sensorNumbers.at(l_sendIdx)); + l_SensorType, l_sensorNumbers.at(i).first, + l_eventDirType, l_eventOffset, selSize ); + + IPMISEL::sendESEL(l_pelData, selSize, + io_err->eid(), + l_eventDirType, l_eventOffset, + l_SensorType, + l_sensorNumbers.at(i).first); } // free the buffer @@ -212,6 +226,111 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err) TRACFCOMP(g_trac_errl, EXIT_MRK "sendErrLogToBmc"); } // sendErrLogToBmc + +void getSensorInfo(HWAS::callout_ud_t *i_ud, uint8_t + &o_sensorNumber, uint8_t &o_eventOffset, + HWAS::callOutPriority &io_priority, + errlHndl_t &io_err ) +{ + + // reset the offset, we will test and configure it later + o_eventOffset = IPMISEL::event_data1_invalid_offset; + + if( i_ud->type == HWAS::PROCEDURE_CALLOUT ) + { + // for procedure callouts generate sel using the system event + // sensor + o_sensorNumber = TARGETING::UTIL::getSensorNumber(NULL, + TARGETING::SENSOR_NAME_SYSTEM_EVENT); + + // use the generic offset to indicate there is more work + // required to figure out what went wrong, ie. follow + // the procedure in the elog + o_eventOffset = SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE; + + } + // if its a clock callout or a its a part callout and its not + // the VPD part or the SBE EEPROM, then use the backplane fault + // sensor as these parts reside there. + else if((i_ud->type == HWAS::CLOCK_CALLOUT ) || + ((i_ud->type == HWAS::PART_CALLOUT ) && + !((i_ud->partType == HWAS::VPD_PART_TYPE ) || + (i_ud->partType == HWAS::SBE_SEEPROM_PART_TYPE)) + )) + { + o_sensorNumber = SENSOR::getBackPlaneFaultSensor(); + } + else + { + // for all other types there will be at least + // one target in the next user data section, we will use + // that target to find the fault sensor. For a + // bus callout, we will just use the first of the + // bus target endpoints provided. + // + // NOTE: if the provided target does not have a fault sensor, the + // physical path will be used to determine the parent FRU which has + // a fault sensor associated with it. + uint8_t * l_uData = (uint8_t *)(i_ud + 1); + TARGETING::Target *l_target = NULL; + bool l_err = HWAS::retrieveTarget(l_uData, + l_target, io_err); + + if (!l_err) + { + // got a target, now get the sensor number + o_sensorNumber = SENSOR::getFaultSensorNumber(l_target); + } + else + { + // couldnt expand the target so we are unable to get + // a sensor number - use the event sensor for this one + o_sensorNumber = TARGETING::UTIL::getSensorNumber(NULL, + TARGETING::SENSOR_NAME_SYSTEM_EVENT); + + o_eventOffset = SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE; + + } + } +} + +void getSensorOffsetBasedOnSeverity(errlHndl_t & io_err, + uint8_t &o_eventDirType, + uint8_t & o_eventOffset ) +{ + switch (io_err->sev()) + { + case ERRORLOG::ERRL_SEV_INFORMATIONAL: + o_eventDirType = IPMISEL::event_transition; + o_eventOffset = IPMISEL::event_data1_trans_informational; + break; + case ERRL_SEV_RECOVERED: + o_eventDirType = IPMISEL::event_transition; + o_eventOffset = IPMISEL::event_data1_trans_to_ok; + break; + case ERRL_SEV_PREDICTIVE: + o_eventDirType = IPMISEL::event_predictive; + o_eventOffset = IPMISEL::event_data1_trans_to_noncrit_from_ok; + break; + case ERRL_SEV_UNRECOVERABLE: + o_eventDirType = IPMISEL::event_transition; + o_eventOffset = IPMISEL::event_data1_trans_to_non_recoverable; + break; + case ERRL_SEV_CRITICAL_SYS_TERM: + o_eventDirType = IPMISEL::event_transition; + o_eventOffset = IPMISEL::event_data1_trans_to_crit_from_non_r; + break; + case ERRL_SEV_UNKNOWN: + o_eventDirType = IPMISEL::event_state; + o_eventOffset = IPMISEL::event_data1_asserted; + break; + default: + o_eventDirType = IPMISEL::sensor_specific; + o_eventOffset = IPMISEL::event_data1_trans_to_non_recoverable; + break; + } +} + #endif } // end namespace diff --git a/src/usr/ipmi/ipmisel.C b/src/usr/ipmi/ipmisel.C index 6ae3e4b63..d2381efad 100644 --- a/src/usr/ipmi/ipmisel.C +++ b/src/usr/ipmi/ipmisel.C @@ -84,7 +84,7 @@ namespace IPMISEL void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize, uint32_t i_eid, uint8_t i_eventDirType, uint8_t i_eventOffset, - uint8_t i_sensorType, uint8_t i_sensorNumber) + uint8_t i_sensorType, uint8_t i_sensorNumber ) { IPMI_TRAC(ENTER_MRK "sendESEL()"); diff --git a/src/usr/ipmi/ipmisensor.C b/src/usr/ipmi/ipmisensor.C index 094231152..621aa7cf1 100644 --- a/src/usr/ipmi/ipmisensor.C +++ b/src/usr/ipmi/ipmisensor.C @@ -1046,7 +1046,9 @@ namespace SENSOR updateBMCFaultSensorStatus(); }; - // returns a sensor number based on input target type + // returns a sensor number for the FRU based on input target type + // there are currently 4 frus defined system, backplane, DIMM, PROC + // uint32_t getFaultSensorNumber( TARGETING::ConstTargetHandle_t i_pTarget ) { TRACDCOMP(g_trac_ipmi,">>getFaultSensorNumber()"); @@ -1064,46 +1066,51 @@ namespace SENSOR l_sensor_number = TARGETING::UTIL::getSensorNumber( i_pTarget, TARGETING::SENSOR_NAME_SYSTEM_EVENT ); + + TRACDCOMP(g_trac_ipmi,"Sensor Number = 0x%x", l_sensor_number); break; } case TARGETING::TYPE_NODE: { - - TRACDCOMP(g_trac_ipmi, "return backplane fault sensor\n"); + TRACDCOMP(g_trac_ipmi, "returning the \"BACKPLANE_FAULT\" sensor\n"); l_sensor_number = TARGETING::UTIL::getSensorNumber( i_pTarget, TARGETING::SENSOR_NAME_BACKPLANE_FAULT ); + + TRACDCOMP(g_trac_ipmi,"Sensor Number = 0x%x", l_sensor_number); break; } + // these targets have specific status sensors case TARGETING::TYPE_DIMM: case TARGETING::TYPE_MEMBUF: case TARGETING::TYPE_PROC: - case TARGETING::TYPE_CORE: { l_sensor_number = StatusSensor(i_pTarget).getSensorNumber(); + + TRACDCOMP(g_trac_ipmi,"Sensor Number = 0x%x", l_sensor_number); break; } - case TARGETING::TYPE_EX: + default: { - // sensor number attribute is associated with the core - const TARGETING::Target * targ = getCoreChiplet(i_pTarget); - l_sensor_number = getFaultSensorNumber( targ ); + TARGETING::EntityPath l_targetPath = + i_pTarget->getAttr(); - break; - } + // chop off the last part and go again. + l_targetPath.removeLast(); - default: - { + TARGETING::TargetHandle_t l_target = NULL; + l_target = + TARGETING::targetService().toTarget(l_targetPath); - TARGETING::ConstTargetHandle_t targ = - getParentChip( i_pTarget); + l_sensor_number = getFaultSensorNumber( + static_cast(l_target)); - l_sensor_number = getFaultSensorNumber( targ ); + break; } } @@ -1216,4 +1223,18 @@ namespace SENSOR return offsets; } + uint8_t getBackPlaneFaultSensor() + { + TARGETING::TargetHandle_t sys = NULL; + TARGETING::TargetHandleList nodes; + TARGETING::targetService().getTopLevelTarget(sys); + assert(sys != NULL); + getChildAffinityTargets(nodes, sys, TARGETING::CLASS_ENC, + TARGETING::TYPE_NODE); + assert(!nodes.empty()); + + //Backplane sensor ID + return TARGETING::UTIL::getSensorNumber(nodes[0], + TARGETING::SENSOR_NAME_BACKPLANE_FAULT); + } }; // end name space -- cgit v1.2.1