summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard J. Knight <rjknight@us.ibm.com>2015-04-27 12:10:21 -0500
committerA. Patrick Williams III <iawillia@us.ibm.com>2015-08-18 18:10:19 -0500
commita85a8f8c44e76c0edfeca53835cebbc7d2fdd383 (patch)
treec56023e602d9b309e14c8eeaf85d186762af0abc
parent9f01b555b7eeced597c0ff658ac3412d85991d10 (diff)
downloadtalos-hostboot-a85a8f8c44e76c0edfeca53835cebbc7d2fdd383.tar.gz
talos-hostboot-a85a8f8c44e76c0edfeca53835cebbc7d2fdd383.zip
Return valid fault sensors for all types of callouts
- Update code to return sensor numbers for targets which represent replaceable parts. - Use System Event sensor for procedure callouts. - Update code to handle all hw callout types. Change-Id: I626bce5c8c0c8b7d0a44408280de178c7a86a83f CQ:SW306556 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/18097 Reviewed-by: WILLIAM G. HOFFA <wghoffa@us.ibm.com> Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com> Tested-by: FSP CI Jenkins Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com> Tested-by: Jenkins Server Tested-by: Jenkins OP Build CI Tested-by: Jenkins OP HW
-rw-r--r--src/include/usr/ipmi/ipmisel.H8
-rw-r--r--src/include/usr/ipmi/ipmisensor.H19
-rw-r--r--src/usr/errl/errlmanager_common.C317
-rw-r--r--src/usr/ipmi/ipmisel.C2
-rw-r--r--src/usr/ipmi/ipmisensor.C51
5 files changed, 280 insertions, 117 deletions
diff --git a/src/include/usr/ipmi/ipmisel.H b/src/include/usr/ipmi/ipmisel.H
index af808515d..13f44a84b 100644
--- a/src/include/usr/ipmi/ipmisel.H
+++ b/src/include/usr/ipmi/ipmisel.H
@@ -87,7 +87,9 @@ namespace IPMISEL
format_ipmi_version_2_0 = 0x04,
};
- // event_type, per section 42.1 of the IPMI spec
+ // sel_event_dir_type type, per section 42.1 of the IPMI spec
+ // bit = 0 -> 0 is an assertion event
+ // bits 1:7 describe the sensor type
enum sel_event_dir_type
{
event_unspecified = 0x00,
@@ -95,8 +97,9 @@ namespace IPMISEL
event_state = 0x03,
event_predictive = 0x04,
event_limit = 0x05,
- event_permformance = 0x06,
+ event_performance = 0x06,
event_transition = 0x07,
+ sensor_specific = 0x6f,
event_OEM = 0x70,
};
@@ -114,6 +117,7 @@ namespace IPMISEL
event_data1_trans_to_non_recoverable = 0x06,
event_data1_trans_monitor = 0x07,
event_data1_trans_informational = 0x08,
+ event_data1_invalid_offset = 0xFF,
};
enum sel_generator_id
diff --git a/src/include/usr/ipmi/ipmisensor.H b/src/include/usr/ipmi/ipmisensor.H
index 86b9051ba..ba7d7bfd1 100644
--- a/src/include/usr/ipmi/ipmisensor.H
+++ b/src/include/usr/ipmi/ipmisensor.H
@@ -116,6 +116,19 @@ namespace SENSOR
SYSTEM_FIRMWARE_PROGRESS = 0x02,
};
+
+ /**
+ * @enum systemEventSensorOffsets
+ * offSets specific to the system event sensor.
+ *
+ */
+ enum systemEventSensorOffsets
+ {
+ // offset 02h
+ UNDETERMINED_SYSTEM_HW_FAILURE = 0x02,
+ };
+
+
/**
* @enum discrete09_Offsets
*
@@ -924,6 +937,12 @@ namespace SENSOR
uint16_t getSensorOffsets(TARGETING::SENSOR_NAME i_name,
sensorReadingType &o_readType );
+ /**
+ * Helper function to return the backplane fault sensor
+ *
+ * @return sensor number
+ */
+ uint8_t getBackPlaneFaultSensor();
}; // end namespace
diff --git a/src/usr/errl/errlmanager_common.C b/src/usr/errl/errlmanager_common.C
index 3b3f3f03b..a87323813 100644
--- a/src/usr/errl/errlmanager_common.C
+++ b/src/usr/errl/errlmanager_common.C
@@ -35,6 +35,18 @@ namespace ERRORLOG
extern trace_desc_t* g_trac_errl;
#ifdef CONFIG_BMC_IPMI
+
+void getSensorOffsetBasedOnSeverity(errlHndl_t & io_err,
+ uint8_t &o_eventDirType,
+ uint8_t & o_offset );
+
+// helper function to gather sensor information
+void getSensorInfo(HWAS::callout_ud_t *i_ud,
+ uint8_t &o_sensorNumber,
+ uint8_t &o_eventOffset,
+ HWAS::callOutPriority &io_priority,
+ errlHndl_t& io_error );
+
void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err)
{
TRACFCOMP(g_trac_errl, ENTER_MRK
@@ -52,86 +64,69 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err)
}
// look thru the errlog for any Callout UserDetail sections
- // to determine the sensor information for the SEL
- std::vector<uint8_t> l_sensorNumbers;
- std::vector<uint8_t> l_sensorTypes;
+ // to determine the sensor information for the SEL
+ // create a vector of sensor numbers and offsets
+ std::vector<std::pair<uint8_t, uint8_t> > l_sensorNumbers;
HWAS::callOutPriority l_priority = HWAS::SRCI_PRIORITY_NONE;
+
for(std::vector<ErrlUD*>::const_iterator
it = io_err->iv_SectionVector.begin();
it != io_err->iv_SectionVector.end();
it++ )
{
+ uint8_t l_sensorNumber = TARGETING::UTIL::INVALID_IPMI_SENSOR;
+ uint8_t l_eventOffset = IPMISEL::event_data1_invalid_offset;
+
HWAS::callout_ud_t *l_ud =
reinterpret_cast<HWAS::callout_ud_t*>((*it)->iv_pData);
// if this is a CALLOUT that will have a target
if ((ERRL_COMP_ID == (*it)->iv_header.iv_compId) &&
(1 == (*it)->iv_header.iv_ver) &&
- (ERRL_UDT_CALLOUT == (*it)->iv_header.iv_sst) &&
- (HWAS::HW_CALLOUT == l_ud->type)
- )
+ (ERRL_UDT_CALLOUT == (*it)->iv_header.iv_sst) )
{
// if this callout is higher than any previous callout
if (l_ud->priority > l_priority)
{
- // get the sensor number for the target
- uint8_t * l_uData = (uint8_t *)(l_ud + 1);
- TARGETING::Target *l_target = NULL;
- bool l_err = HWAS::retrieveTarget(l_uData,
- l_target, io_err);
- if (!l_err)
- {
- //remove previous sensor data
- l_sensorNumbers.clear();
-
- // got a target, now get the sensor number
- l_sensorNumbers.push_back(
- SENSOR::getFaultSensorNumber(l_target));
+ TRACFCOMP(g_trac_errl,
+ "sendErrLogToBmc new priority picked 0x%x > 0x%x",
+ l_ud->priority, l_priority );
+
+ // get sensor number for the target.
+ // we found a higher priority callout, get the sensor
+ // information for it
+ getSensorInfo( l_ud, l_sensorNumber, l_eventOffset,
+ l_priority, io_err);
+
+ TRACFCOMP(g_trac_errl,
+ "l_sensorNumber = 0x%x, l_eventOffset = 0x%x",
+ l_sensorNumber, l_eventOffset );
+
+
+ //remove previous sensor data
+ l_sensorNumbers.clear();
+
+ l_sensorNumbers.push_back(std::make_pair(l_sensorNumber,
+ l_eventOffset));
// and update the priority
- l_priority = l_ud->priority;
- }
- // if this callout is equal to any previous callout
- }else if(l_ud->priority == l_priority)
+ l_priority = l_ud->priority;
+
+ }
+ // or if it has the same priority
+ else if(l_ud->priority == l_priority)
{
//get the sensor number for the target
- uint8_t * l_uData = (uint8_t *)(l_ud + 1);
- TARGETING::Target *l_target = NULL;
- bool l_err = HWAS::retrieveTarget(l_uData,
- l_target, io_err);
- if(!l_err)
- {
- //add sensor data to array
- l_sensorNumbers.push_back(
- SENSOR::getFaultSensorNumber(l_target));
- }
+
+ getSensorInfo( l_ud, l_sensorNumber,
+ l_eventOffset, l_priority, io_err);
+
+ l_sensorNumbers.push_back(std::make_pair(l_sensorNumber,
+ l_eventOffset));
}
}
} // for each SectionVector
- std::vector<uint8_t>::const_iterator l_sensorIter;
- for(l_sensorIter = l_sensorNumbers.begin();
- l_sensorIter != l_sensorNumbers.end();
- l_sensorIter++)
- {
- uint8_t unused = 0;
- uint8_t l_getSensorType;
- errlHndl_t e =
- SENSOR::SensorBase::getSensorType(
- *l_sensorIter,
- l_getSensorType,unused);
- l_sensorTypes.push_back(l_getSensorType);
- if( e )
- {
- TRACFCOMP(g_trac_errl,
- ERR_MRK"Failed to get sensor type for sensor %d",
- *l_sensorIter);
- // since we are in the commit path, lets just delete this
- // error and move on.
- delete e;
- }
-
- }
// flatten into buffer, truncate to max eSEL size
uint32_t l_pelSize = io_err->flattenedSize();
@@ -147,7 +142,7 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err)
uint32_t l_errSize = io_err->flatten (l_pelData,
l_pelSize, true /* truncate */);
- if (l_errSize ==0)
+ if (l_errSize ==0 )
{
// flatten didn't work
TRACFCOMP( g_trac_errl, ERR_MRK
@@ -156,52 +151,71 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err)
break;
}
- uint8_t l_eventDirType = IPMISEL::event_transition;
- uint8_t l_eventOffset = IPMISEL::event_data1_trans_to_non_recoverable;
- switch (io_err->sev())
+ for(size_t i = 0; i < l_sensorNumbers.size(); i++)
{
- case ERRORLOG::ERRL_SEV_INFORMATIONAL:
- l_eventDirType = IPMISEL::event_transition;
- l_eventOffset = IPMISEL::event_data1_trans_informational;
- break;
- case ERRL_SEV_RECOVERED:
- l_eventDirType = IPMISEL::event_transition;
- l_eventOffset = IPMISEL::event_data1_trans_to_ok;
- break;
- case ERRL_SEV_PREDICTIVE:
- l_eventDirType = IPMISEL::event_predictive;
- l_eventOffset = IPMISEL::event_data1_trans_to_noncrit_from_ok;
- break;
- case ERRL_SEV_UNRECOVERABLE:
- l_eventDirType = IPMISEL::event_transition;
- l_eventOffset = IPMISEL::event_data1_trans_to_non_recoverable;
- break;
- case ERRL_SEV_CRITICAL_SYS_TERM:
- l_eventDirType = IPMISEL::event_transition;
- l_eventOffset = IPMISEL::event_data1_trans_to_crit_from_non_r;
- break;
- case ERRL_SEV_UNKNOWN:
- l_eventDirType = IPMISEL::event_state;
- l_eventOffset = IPMISEL::event_data1_asserted;
- break;
- }
- // send it to the BMC over IPMI
- for(uint8_t l_sendIdx=0;l_sendIdx < l_sensorNumbers.size();l_sendIdx++)
- {
- TRACFCOMP(g_trac_errl, INFO_MRK
- "sendErrLogToBmc: creating ESEL for sensor #%d",l_sendIdx);
+ uint8_t l_eventDirType = IPMISEL::sensor_specific;
+
+ // if the offset is unknown after this then it will
+ // be updated based on elog severity below
+ uint8_t l_eventOffset = l_sensorNumbers.at(i).second ;
+
+ // last ditch effort, if no sensor number is present at this
+ // point, just use the system event sensor
+ if( l_sensorNumbers.at(i).first ==
+ TARGETING::UTIL::INVALID_IPMI_SENSOR )
+ {
+ l_sensorNumbers.at(i).first =
+ TARGETING::UTIL::getSensorNumber(NULL,
+ TARGETING::SENSOR_NAME_SYSTEM_EVENT);
+
+ l_sensorNumbers.at(i).second =
+ SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE;
+
+ }
+
+ // grab the sensor type so the bmc knows how to use the offset
+ uint8_t unused = 0;
+ uint8_t l_SensorType = 0;
+
+ errlHndl_t e =
+ SENSOR::SensorBase::getSensorType(
+ l_sensorNumbers.at(i).first,
+ l_SensorType,unused);
+
+ if( e )
+ {
+ TRACFCOMP(g_trac_errl,
+ ERR_MRK"Failed to get sensor type for sensor %d",
+ l_sensorNumbers.at(i).first);
+
+ l_SensorType = 0;
+ // since we are in the commit path, lets just delete this
+ // error and move on.
+ delete e;
+ }
+
+ // if no offset has been configured set it based on the severity
+ if( l_eventOffset == IPMISEL::event_data1_invalid_offset )
+ {
+ getSensorOffsetBasedOnSeverity(io_err, l_eventDirType,
+ l_eventOffset );
+ }
+
+ // if we are sending the first sel then we will include the
+ // pel data, otherwise we send no data
+ uint32_t selSize = ( i == 0 ) ? l_pelSize:0;
+
TRACFCOMP(g_trac_errl, INFO_MRK
"sendErrLogToBmc: sensor %.2x/%.2x event %x/%x, size %d",
- l_sensorTypes.at(l_sendIdx), l_sensorNumbers.at(l_sendIdx),
- l_eventDirType, l_eventOffset,
- ((l_sendIdx==0)?l_pelSize:0));
-
- IPMISEL::sendESEL(l_pelData, ((l_sendIdx==0) ? l_pelSize:0),
- io_err->eid(),
- l_eventDirType, l_eventOffset,
- l_sensorTypes.at(l_sendIdx),
- l_sensorNumbers.at(l_sendIdx));
+ l_SensorType, l_sensorNumbers.at(i).first,
+ l_eventDirType, l_eventOffset, selSize );
+
+ IPMISEL::sendESEL(l_pelData, selSize,
+ io_err->eid(),
+ l_eventDirType, l_eventOffset,
+ l_SensorType,
+ l_sensorNumbers.at(i).first);
}
// free the buffer
@@ -212,6 +226,111 @@ void ErrlManager::sendErrLogToBmc(errlHndl_t &io_err)
TRACFCOMP(g_trac_errl, EXIT_MRK "sendErrLogToBmc");
} // sendErrLogToBmc
+
+void getSensorInfo(HWAS::callout_ud_t *i_ud, uint8_t
+ &o_sensorNumber, uint8_t &o_eventOffset,
+ HWAS::callOutPriority &io_priority,
+ errlHndl_t &io_err )
+{
+
+ // reset the offset, we will test and configure it later
+ o_eventOffset = IPMISEL::event_data1_invalid_offset;
+
+ if( i_ud->type == HWAS::PROCEDURE_CALLOUT )
+ {
+ // for procedure callouts generate sel using the system event
+ // sensor
+ o_sensorNumber = TARGETING::UTIL::getSensorNumber(NULL,
+ TARGETING::SENSOR_NAME_SYSTEM_EVENT);
+
+ // use the generic offset to indicate there is more work
+ // required to figure out what went wrong, ie. follow
+ // the procedure in the elog
+ o_eventOffset = SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE;
+
+ }
+ // if its a clock callout or a its a part callout and its not
+ // the VPD part or the SBE EEPROM, then use the backplane fault
+ // sensor as these parts reside there.
+ else if((i_ud->type == HWAS::CLOCK_CALLOUT ) ||
+ ((i_ud->type == HWAS::PART_CALLOUT ) &&
+ !((i_ud->partType == HWAS::VPD_PART_TYPE ) ||
+ (i_ud->partType == HWAS::SBE_SEEPROM_PART_TYPE))
+ ))
+ {
+ o_sensorNumber = SENSOR::getBackPlaneFaultSensor();
+ }
+ else
+ {
+ // for all other types there will be at least
+ // one target in the next user data section, we will use
+ // that target to find the fault sensor. For a
+ // bus callout, we will just use the first of the
+ // bus target endpoints provided.
+ //
+ // NOTE: if the provided target does not have a fault sensor, the
+ // physical path will be used to determine the parent FRU which has
+ // a fault sensor associated with it.
+ uint8_t * l_uData = (uint8_t *)(i_ud + 1);
+ TARGETING::Target *l_target = NULL;
+ bool l_err = HWAS::retrieveTarget(l_uData,
+ l_target, io_err);
+
+ if (!l_err)
+ {
+ // got a target, now get the sensor number
+ o_sensorNumber = SENSOR::getFaultSensorNumber(l_target);
+ }
+ else
+ {
+ // couldnt expand the target so we are unable to get
+ // a sensor number - use the event sensor for this one
+ o_sensorNumber = TARGETING::UTIL::getSensorNumber(NULL,
+ TARGETING::SENSOR_NAME_SYSTEM_EVENT);
+
+ o_eventOffset = SENSOR::UNDETERMINED_SYSTEM_HW_FAILURE;
+
+ }
+ }
+}
+
+void getSensorOffsetBasedOnSeverity(errlHndl_t & io_err,
+ uint8_t &o_eventDirType,
+ uint8_t & o_eventOffset )
+{
+ switch (io_err->sev())
+ {
+ case ERRORLOG::ERRL_SEV_INFORMATIONAL:
+ o_eventDirType = IPMISEL::event_transition;
+ o_eventOffset = IPMISEL::event_data1_trans_informational;
+ break;
+ case ERRL_SEV_RECOVERED:
+ o_eventDirType = IPMISEL::event_transition;
+ o_eventOffset = IPMISEL::event_data1_trans_to_ok;
+ break;
+ case ERRL_SEV_PREDICTIVE:
+ o_eventDirType = IPMISEL::event_predictive;
+ o_eventOffset = IPMISEL::event_data1_trans_to_noncrit_from_ok;
+ break;
+ case ERRL_SEV_UNRECOVERABLE:
+ o_eventDirType = IPMISEL::event_transition;
+ o_eventOffset = IPMISEL::event_data1_trans_to_non_recoverable;
+ break;
+ case ERRL_SEV_CRITICAL_SYS_TERM:
+ o_eventDirType = IPMISEL::event_transition;
+ o_eventOffset = IPMISEL::event_data1_trans_to_crit_from_non_r;
+ break;
+ case ERRL_SEV_UNKNOWN:
+ o_eventDirType = IPMISEL::event_state;
+ o_eventOffset = IPMISEL::event_data1_asserted;
+ break;
+ default:
+ o_eventDirType = IPMISEL::sensor_specific;
+ o_eventOffset = IPMISEL::event_data1_trans_to_non_recoverable;
+ break;
+ }
+}
+
#endif
} // end namespace
diff --git a/src/usr/ipmi/ipmisel.C b/src/usr/ipmi/ipmisel.C
index 6ae3e4b63..d2381efad 100644
--- a/src/usr/ipmi/ipmisel.C
+++ b/src/usr/ipmi/ipmisel.C
@@ -84,7 +84,7 @@ namespace IPMISEL
void sendESEL(uint8_t* i_eselData, uint32_t i_dataSize,
uint32_t i_eid,
uint8_t i_eventDirType, uint8_t i_eventOffset,
- uint8_t i_sensorType, uint8_t i_sensorNumber)
+ uint8_t i_sensorType, uint8_t i_sensorNumber )
{
IPMI_TRAC(ENTER_MRK "sendESEL()");
diff --git a/src/usr/ipmi/ipmisensor.C b/src/usr/ipmi/ipmisensor.C
index 094231152..621aa7cf1 100644
--- a/src/usr/ipmi/ipmisensor.C
+++ b/src/usr/ipmi/ipmisensor.C
@@ -1046,7 +1046,9 @@ namespace SENSOR
updateBMCFaultSensorStatus();
};
- // returns a sensor number based on input target type
+ // returns a sensor number for the FRU based on input target type
+ // there are currently 4 frus defined system, backplane, DIMM, PROC
+ //
uint32_t getFaultSensorNumber( TARGETING::ConstTargetHandle_t i_pTarget )
{
TRACDCOMP(g_trac_ipmi,">>getFaultSensorNumber()");
@@ -1064,46 +1066,51 @@ namespace SENSOR
l_sensor_number = TARGETING::UTIL::getSensorNumber(
i_pTarget,
TARGETING::SENSOR_NAME_SYSTEM_EVENT );
+
+ TRACDCOMP(g_trac_ipmi,"Sensor Number = 0x%x", l_sensor_number);
break;
}
case TARGETING::TYPE_NODE:
{
-
- TRACDCOMP(g_trac_ipmi, "return backplane fault sensor\n");
+ TRACDCOMP(g_trac_ipmi, "returning the \"BACKPLANE_FAULT\" sensor\n");
l_sensor_number = TARGETING::UTIL::getSensorNumber(
i_pTarget,
TARGETING::SENSOR_NAME_BACKPLANE_FAULT );
+
+ TRACDCOMP(g_trac_ipmi,"Sensor Number = 0x%x", l_sensor_number);
break;
}
+ // these targets have specific status sensors
case TARGETING::TYPE_DIMM:
case TARGETING::TYPE_MEMBUF:
case TARGETING::TYPE_PROC:
- case TARGETING::TYPE_CORE:
{
l_sensor_number =
StatusSensor(i_pTarget).getSensorNumber();
+
+ TRACDCOMP(g_trac_ipmi,"Sensor Number = 0x%x", l_sensor_number);
break;
}
- case TARGETING::TYPE_EX:
+ default:
{
- // sensor number attribute is associated with the core
- const TARGETING::Target * targ = getCoreChiplet(i_pTarget);
- l_sensor_number = getFaultSensorNumber( targ );
+ TARGETING::EntityPath l_targetPath =
+ i_pTarget->getAttr<TARGETING::ATTR_PHYS_PATH>();
- break;
- }
+ // chop off the last part and go again.
+ l_targetPath.removeLast();
- default:
- {
+ TARGETING::TargetHandle_t l_target = NULL;
+ l_target =
+ TARGETING::targetService().toTarget(l_targetPath);
- TARGETING::ConstTargetHandle_t targ =
- getParentChip( i_pTarget);
+ l_sensor_number = getFaultSensorNumber(
+ static_cast<TARGETING::ConstTargetHandle_t>(l_target));
- l_sensor_number = getFaultSensorNumber( targ );
+ break;
}
}
@@ -1216,4 +1223,18 @@ namespace SENSOR
return offsets;
}
+ uint8_t getBackPlaneFaultSensor()
+ {
+ TARGETING::TargetHandle_t sys = NULL;
+ TARGETING::TargetHandleList nodes;
+ TARGETING::targetService().getTopLevelTarget(sys);
+ assert(sys != NULL);
+ getChildAffinityTargets(nodes, sys, TARGETING::CLASS_ENC,
+ TARGETING::TYPE_NODE);
+ assert(!nodes.empty());
+
+ //Backplane sensor ID
+ return TARGETING::UTIL::getSensorNumber(nodes[0],
+ TARGETING::SENSOR_NAME_BACKPLANE_FAULT);
+ }
}; // end name space
OpenPOWER on IntegriCloud