diff options
author | Mike Jones <mjjones@us.ibm.com> | 2014-01-27 15:33:09 -0600 |
---|---|---|
committer | A. Patrick Williams III <iawillia@us.ibm.com> | 2014-02-14 12:41:36 -0600 |
commit | 10a93afdbdd5bccc9a10b797d66b008580ba16bb (patch) | |
tree | caddb050384fe2eb4cd8844c59a28d9ea9e8b7c0 /src/usr/hwas | |
parent | 5850220077f29041920e83de66ec458dac9c82a7 (diff) | |
download | talos-hostboot-10a93afdbdd5bccc9a10b797d66b008580ba16bb.tar.gz talos-hostboot-10a93afdbdd5bccc9a10b797d66b008580ba16bb.zip |
Hostboot Serviceability Review Part 2
Resolving TODOs and ensuring error logs have correct callouts
Change-Id: Ic2e65427487fb91553ffe4ed6e3ed922004963ba
RTC: 92837
Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/8374
Tested-by: Jenkins Server
Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
Diffstat (limited to 'src/usr/hwas')
-rw-r--r-- | src/usr/hwas/common/deconfigGard.C | 26 | ||||
-rw-r--r-- | src/usr/hwas/hostbootIstep.C | 100 | ||||
-rw-r--r-- | src/usr/hwas/hwasPlatDeconfigGard.C | 19 |
3 files changed, 48 insertions, 97 deletions
diff --git a/src/usr/hwas/common/deconfigGard.C b/src/usr/hwas/common/deconfigGard.C index fe51d0914..522ba02b6 100644 --- a/src/usr/hwas/common/deconfigGard.C +++ b/src/usr/hwas/common/deconfigGard.C @@ -40,24 +40,6 @@ // Trace definition #define __COMP_TD__ g_trac_deconf -// TODO The DeconfigGard code needs to trace a target. The current recommended -// way is to get the Target's PHYS_PATH attribute and do a binary trace. -// However, the size of a EntityPath is more than 16 bytes. This code -// will trace only the first 16 bytes (which in most cases is enough) to avoid a -// multi-line binary trace. This all seems a little convoluted. Is there a -// better way to trace a Target -#define DG_DBG_TARGET(string, pPath) \ - HWAS_DBG_BIN(string, pPath, sizeof(EntityPath) - 1) -#define DG_INF_TARGET(string, pPath) \ - HWAS_INF_BIN(string, pPath, sizeof(EntityPath) - 1) -#define DG_ERR_TARGET(string, pPath) \ - HWAS_ERR_BIN(string, pPath, sizeof(EntityPath) - 1) - -// TODO There are a number of error logs created in this file. Most of them -// should include the target identifier (PHYS_PATH). There is a plan in RTC -// story 4110 to provide a way to easily add a target to an error log. When that -// is done need to update the error logs - namespace HWAS { @@ -266,8 +248,10 @@ errlHndl_t DeconfigGard::deconfigureTargetsFromGardRecordsForIpl( { // could be a platform specific target for the other // ie, we are hostboot and this is an FSP target, or vice-versa - DG_INF_TARGET("Could not find Target for", - &(l_gardRecord.iv_targetId)); + // Binary trace the iv_targetId (EntityPath) + HWAS_INF_BIN("Could not find Target for:", + &(l_gardRecord.iv_targetId), + sizeof(l_gardRecord.iv_targetId)); continue; } @@ -1343,7 +1327,7 @@ void DeconfigGard::_deconfigureTarget(Target & i_target, //****************************************************************************** void DeconfigGard::_doDeconfigureActions(Target & i_target) { - // TODO + // Placeholder for any necessary deconfigure actions } //****************************************************************************** diff --git a/src/usr/hwas/hostbootIstep.C b/src/usr/hwas/hostbootIstep.C index c7b5bff79..15d587357 100644 --- a/src/usr/hwas/hostbootIstep.C +++ b/src/usr/hwas/hostbootIstep.C @@ -108,50 +108,30 @@ void* host_discover_targets( void *io_pArgs ) // Check whether we're in MPIPL mode TARGETING::Target* l_pTopLevel = NULL; targetService().getTopLevelTarget( l_pTopLevel ); + HWAS_ASSERT(l_pTopLevel, "HWAS host_discover_targets: no TopLevelTarget"); - if( l_pTopLevel == NULL ) + if (l_pTopLevel->getAttr<ATTR_IS_MPIPL_HB>()) { - TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "Top level handle was NULL" ); - - /*@ - * @errortype - * @severity ERRORLOG::ERRL_SEV_UNRECOVERABLE - * @moduleid HWAS::MOD_HOST_DISCOVER_TARGETS - * @reasoncode HWAS::RC_TOP_LEVEL_TARGET_NULL - * @devdesc Call to get top level targeting handle - * returned NULL - */ - errl = hwasError( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - HWAS::MOD_HOST_DISCOVER_TARGETS, - HWAS::RC_TOP_LEVEL_TARGET_NULL ); + TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, "MPIPL mode"); + + // Sync attributes from Fsp + errl = syncAllAttributesFromFsp(); } else { - if (l_pTopLevel->getAttr<ATTR_IS_MPIPL_HB>()) - { - TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, "MPIPL mode"); + TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, "Normal IPL mode"); - // Sync attributes from Fsp - errl = syncAllAttributesFromFsp(); + errl = discoverTargets(); - } - else + // also if SP doesn't support change detection, call + // function to do it here. + if (!errl && + !l_pTopLevel->getAttr<ATTR_SP_FUNCTIONS>() + .hardwareChangeDetection) { - TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, "Normal IPL mode"); - - errl = discoverTargets(); - - // also if SP doesn't support change detection, call - // function to do it here. - if (!errl && - !l_pTopLevel->getAttr<ATTR_SP_FUNCTIONS>() - .hardwareChangeDetection) - { - TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, - "calling hwasChangeDetection"); - errl = hwasChangeDetection(); - } + TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, + "calling hwasChangeDetection"); + errl = hwasChangeDetection(); } } @@ -173,47 +153,27 @@ void* host_gard( void *io_pArgs ) // Check whether we're in MPIPL mode TARGETING::Target* l_pTopLevel = NULL; targetService().getTopLevelTarget( l_pTopLevel ); + HWAS_ASSERT(l_pTopLevel, "HWAS host_gard: no TopLevelTarget"); - if( l_pTopLevel == NULL ) + if (l_pTopLevel->getAttr<ATTR_IS_MPIPL_HB>()) { - TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "Top level handle was NULL" ); - - /*@ - * @errortype - * @severity ERRORLOG::ERRL_SEV_UNRECOVERABLE - * @moduleid HWAS::MOD_HOST_GARD - * @reasoncode HWAS::RC_TOP_LEVEL_TARGET_NULL - * @devdesc Call to get top level targeting handle - * returned NULL - */ - errl = hwasError( ERRORLOG::ERRL_SEV_UNRECOVERABLE, - HWAS::MOD_HOST_GARD, - HWAS::RC_TOP_LEVEL_TARGET_NULL ); + TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, "MPIPL mode"); + + // we only want EX units to be processed + TARGETING::PredicateCTM l_exFilter(TARGETING::CLASS_UNIT, + TARGETING::TYPE_EX); + errl = collectGard(&l_exFilter); } else { - if (l_pTopLevel->getAttr<ATTR_IS_MPIPL_HB>()) - { - TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, "MPIPL mode"); - - // we only want EX units to be processed - TARGETING::PredicateCTM l_exFilter(TARGETING::CLASS_UNIT, - TARGETING::TYPE_EX); - errl = collectGard(&l_exFilter); - } - else - { - TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, "Normal IPL mode"); + TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace, "Normal IPL mode"); - errl = collectGard(); + errl = collectGard(); - // if no error, - if (errl == NULL) - { - // check and see if we still have enough hardware to continue - errl = checkMinimumHardware(); - } + if (errl == NULL) + { + // check and see if we still have enough hardware to continue + errl = checkMinimumHardware(); } // If targets are deconfigured as a result of host_gard, they are // done so using the PLID as the reason for deconfiguration. This diff --git a/src/usr/hwas/hwasPlatDeconfigGard.C b/src/usr/hwas/hwasPlatDeconfigGard.C index 551373a15..99c679bbb 100644 --- a/src/usr/hwas/hwasPlatDeconfigGard.C +++ b/src/usr/hwas/hwasPlatDeconfigGard.C @@ -213,12 +213,12 @@ errlHndl_t DeconfigGard::platCreateGardRecord( (static_cast<uint64_t>(get_huid(i_pTarget)) << 32) | i_errlEid; const uint64_t userdata2 = (static_cast<uint64_t>(lDeconfigGardable) << 32) | lPresent; - l_pErr = hwasError( - ERRL_SEV_UNRECOVERABLE, + const bool hbSwError = true; + l_pErr = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, HWAS::MOD_PLAT_DECONFIG_GARD, HWAS::RC_TARGET_NOT_GARDABLE, - userdata1, - userdata2); + userdata1, userdata2, hbSwError); break; } @@ -308,6 +308,13 @@ errlHndl_t DeconfigGard::platCreateGardRecord( { HWAS_ERR("GARD Record Repository full"); + // TODO RTC 96397 + // Hostboot will only write GARD Records to PNOR when it is the + // gardRecordMaster. An error will be logged if GARD Record storage + // exceeds 90% and the GARD Record will not be written if full. The + // error will have a new procedure callout requesting that the + // machine be serviced. Right now, this error log has no callouts. + /*@ * @errortype * @moduleid HWAS::MOD_PLAT_DECONFIG_GARD @@ -319,8 +326,8 @@ errlHndl_t DeconfigGard::platCreateGardRecord( const uint64_t userdata1 = (static_cast<uint64_t> (get_huid(i_pTarget)) << 32) | i_errlEid; - l_pErr = hwasError( - ERRL_SEV_UNRECOVERABLE, + l_pErr = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, HWAS::MOD_PLAT_DECONFIG_GARD, HWAS::RC_GARD_REPOSITORY_FULL, userdata1); |