diff options
author | Dhruvaraj S <dhruvaraj@in.ibm.com> | 2015-02-12 04:23:39 -0600 |
---|---|---|
committer | A. Patrick Williams III <iawillia@us.ibm.com> | 2015-03-09 19:13:57 -0500 |
commit | 0481b521db49eb5161def9fe10824a07c9b5068b (patch) | |
tree | 95f0588b4045a77212e1a855d37d3d48ff1b83fb /src | |
parent | 8c22c53bfebf79a9e7548b9e32aad22901af3ca9 (diff) | |
download | talos-hostboot-0481b521db49eb5161def9fe10824a07c9b5068b.tar.gz talos-hostboot-0481b521db49eb5161def9fe10824a07c9b5068b.zip |
62392: HB Common Resource Recovery
HB Common resource recovery will recover any resources
which is not garded by a FATAL error and try to use in the
system for the current IPL
Change-Id: I6ea4114ba19f47f062ec04449c3e0209d85a1faf
RTC: 62392
CQ: SW291079
CMVC-Coreq: 948355
Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/15678
Tested-by: Jenkins Server
Reviewed-by: Nicholas E. Bofferding <bofferdn@us.ibm.com>
Reviewed-by: Brian H. Horton <brianh@linux.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/include/usr/hwas/common/deconfigGard.H | 37 | ||||
-rw-r--r-- | src/include/usr/hwas/common/hwas.H | 7 | ||||
-rw-r--r-- | src/include/usr/hwas/common/hwasCommon.H | 6 | ||||
-rw-r--r-- | src/include/usr/hwas/common/hwas_reasoncodes.H | 4 | ||||
-rw-r--r-- | src/include/usr/targeting/common/predicates/predicatehwas.H | 23 | ||||
-rw-r--r-- | src/usr/hwas/common/deconfigGard.C | 473 | ||||
-rw-r--r-- | src/usr/hwas/common/hwas.C | 58 | ||||
-rw-r--r-- | src/usr/hwas/hwasPlat.C | 5 | ||||
-rw-r--r-- | src/usr/targeting/common/xmltohb/attribute_types.xml | 10 |
9 files changed, 493 insertions, 130 deletions
diff --git a/src/include/usr/hwas/common/deconfigGard.H b/src/include/usr/hwas/common/deconfigGard.H index b9fd4a754..374b6b44f 100644 --- a/src/include/usr/hwas/common/deconfigGard.H +++ b/src/include/usr/hwas/common/deconfigGard.H @@ -88,10 +88,11 @@ public: PREDICTIVE, // not applied due to predictive policy PREDICATE, // not applied due to predicate GARD_NOT_APPLIED, // not applied for other reason + RESOURCE_RECOVERED // Resource recovered }; - // enums to indicate to deconfigureTarget() specific RUNTIME behaviors - enum DeconfigureRuntime + // enums to indicate to deconfigureTarget() specific behaviors + enum DeconfigureFlags { NOT_AT_RUNTIME, // non-runtime behavior - if the system is at // runtime, no deconfigs happen; @@ -99,6 +100,7 @@ public: // runtime behaviors: FULLY_AT_RUNTIME, // - functional=false,dumpfunctional=false DUMP_AT_RUNTIME, // - functional=false,dumpfunctional=true + SPEC_DECONFIG, //speculative deconfig }; // enums to indicate non-error reason for a targets deconfiguration. @@ -146,6 +148,8 @@ public: // set by presentByAssoc() when a DIMM has no MBA DECONFIGURED_BY_NO_PARENT_MBA, // BASE | 0x0D + CONFIGURED_BY_RESOURCE_RECOVERY, // BASE | 0x0E + // mask - these bits mean it's a PLID and not an enum DECONFIGURED_BY_PLID_MASK = 0xFFFF0000, }; @@ -194,6 +198,23 @@ public: ~DeconfigGard(); /** + * @brief Apply gard record for a specific target. + * + * Called by deconfigureTargetsFromGardRecordsForIpl. + * + * @param i_pTarget Target to be garded + * + * @param i_gardRecord gard record to be applied + * + * @param i_deconfigRule Options for deconfigure + * + * @return errlHndl_t. Error log handle. + */ + errlHndl_t applyGardRecord(TARGETING::Target *i_pTarget, + GardRecord &i_gardRecord, + const DeconfigureFlags i_deconfigRule = NOT_AT_RUNTIME); + + /** * @brief Clears GARD Records for replaced Targets. * * Called by HWAS as part of initial IPL steps. @@ -260,14 +281,14 @@ public: * @param[in] i_errlEid Error log EID to store in Deconfigure Record. * @param[out] o_targetDeconfigured - if pointer is valid, bool is set * to true if target was deconfigured; NOT SET OTHERWISE - * @param[in] i_runTimeDeconfigRule Deconfigure Runtime options - + * @param[in] i_deconfigRule Deconfigure options - * default not at runtime * @return errlHndl_t. Error log handle. */ errlHndl_t deconfigureTarget(TARGETING::Target & i_target, const uint32_t i_errlEid, bool *o_targetDeconfigured = NULL, - const DeconfigureRuntime i_runTimeDeconfigRule = + const DeconfigureFlags i_deconfigRule = NOT_AT_RUNTIME); /** @@ -433,12 +454,12 @@ private: * * @param[in] i_target Reference to base Target. * @param[in] i_errlEid Error log EID to store in Deconfigure Record. - * @param[in] i_runTimeDeconfigRule Deconfigure Runtime options - + * @param[in] i_deconfigRule Deconfigure Runtime options - * default not at runtime */ void _deconfigureByAssoc(TARGETING::Target & i_target, const uint32_t i_errlEid, - const DeconfigureRuntime i_runTimeDeconfigRule = + const DeconfigureFlags i_deconfigRule = NOT_AT_RUNTIME); /** @@ -451,13 +472,13 @@ private: * @param[in] i_errlEid Error log EID to store in Deconfigure Record. * @param[out] o_targetDeconfigured - if pointer is valid, bool is set * to true if target was deconfigured; NOT SET OTHERWISE - * @param[in] i_runTimeDeconfigRule Deconfigure Runtime options - + * @param[in] i_deconfigRule Deconfigure Runtime options - * default not at runtime */ void _deconfigureTarget(TARGETING::Target & i_target, const uint32_t i_errlEid, bool *o_targetDeconfigured = NULL, - const DeconfigureRuntime i_runTimeDeconfigRule = + const DeconfigureFlags i_deconfigRule = NOT_AT_RUNTIME); /** diff --git a/src/include/usr/hwas/common/hwas.H b/src/include/usr/hwas/common/hwas.H index 0a1236679..caae04a79 100644 --- a/src/include/usr/hwas/common/hwas.H +++ b/src/include/usr/hwas/common/hwas.H @@ -121,11 +121,16 @@ errlHndl_t restrictEXunits( * running. * * @param[in] i_node node target to restrict hw check + * @param[out] o_bootable Indicate whether the system is + * is bootable with current configuration. + * if o_bootable is not NULL an error for + * system unavailability will not be logged * * @return error log handle */ errlHndl_t checkMinimumHardware( - const TARGETING::ConstTargetHandle_t i_node = NULL); + const TARGETING::ConstTargetHandle_t i_node = NULL, + bool *o_bootable = NULL); /** * @brief Struct representing a particular target. Used by diff --git a/src/include/usr/hwas/common/hwasCommon.H b/src/include/usr/hwas/common/hwasCommon.H index 5a3522bf9..f9ffc9752 100644 --- a/src/include/usr/hwas/common/hwasCommon.H +++ b/src/include/usr/hwas/common/hwasCommon.H @@ -253,9 +253,13 @@ void hwasErrorUpdatePlid(errlHndl_t & io_errl, * * @param[io] io_plid Reference to plid. * @param[in] i_node node target to restrict hw check + * @param[out] o_bootable Indicates whether system is bootable with current + * configuratio, if a non NULL pointer is passed erros will not + * be logged if system cannot ipl and just return a true or false. */ void platCheckMinimumHardware(uint32_t & io_plid, - const TARGETING::ConstTargetHandle_t i_node = NULL); + const TARGETING::ConstTargetHandle_t i_node = NULL, + bool *o_bootable = NULL); } // namespace HWAS diff --git a/src/include/usr/hwas/common/hwas_reasoncodes.H b/src/include/usr/hwas/common/hwas_reasoncodes.H index c7b735fa8..0a8be88ac 100644 --- a/src/include/usr/hwas/common/hwas_reasoncodes.H +++ b/src/include/usr/hwas/common/hwas_reasoncodes.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2014 */ +/* Contributors Listed Below - COPYRIGHT 2012,2015 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -32,6 +32,7 @@ namespace HWAS MOD_DECONFIG_GARD = 0x01, MOD_PROCESS_CALLOUT = 0x02, MOD_CHECK_MIN_HW = 0x03, + MOD_DECONFIG_TARGETS_FROM_GARD = 0x04, }; enum HwasReasonCode @@ -50,6 +51,7 @@ namespace HWAS RC_SYSAVAIL_NO_PROCS_FUNC = HWAS_COMP_ID | 0x07, RC_SYSAVAIL_NO_NODES_FUNC = HWAS_COMP_ID | 0x08, RC_SYSAVAIL_NO_MEMBUFS_FUNC = HWAS_COMP_ID | 0x09, + RC_RESOURCE_RECOVERED = HWAS_COMP_ID | 0x0A, }; }; diff --git a/src/include/usr/targeting/common/predicates/predicatehwas.H b/src/include/usr/targeting/common/predicates/predicatehwas.H index d433c3748..523332646 100644 --- a/src/include/usr/targeting/common/predicates/predicatehwas.H +++ b/src/include/usr/targeting/common/predicates/predicatehwas.H @@ -5,7 +5,9 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* COPYRIGHT International Business Machines Corp. 2012,2014 */ +/* Contributors Listed Below - COPYRIGHT 2012,2015 */ +/* [+] International Business Machines Corp. */ +/* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ @@ -125,6 +127,15 @@ class PredicateHwas : public PredicateBase */ PredicateHwas& dumpFunctional(const bool i_dumpFunctional); + /** + * @brief Configure predicate to look for the given speculative + * deconfig state + * + * @param[in] i_specdeconfig Desired speculative deconfig state + * + * @return Reference to the predicate, for chaining + */ + PredicateHwas& specdeconfig(const bool i_specdeconfig); /** * @brief Returns whether target matches the desired HWAS state * @@ -229,6 +240,16 @@ inline PredicateHwas& PredicateHwas::dumpFunctional( return *this; } +//****************************************************************************** +// PredicateHwas::specdeconfig +//****************************************************************************** +inline PredicateHwas& PredicateHwas::specdeconfig( + const bool i_specdeconfig) +{ + iv_desired.attribute.specdeconfig = i_specdeconfig; + iv_valid.attribute.specdeconfig = true; + return *this; +} #undef TARG_CLASS #undef TARG_NAMESPACE diff --git a/src/usr/hwas/common/deconfigGard.C b/src/usr/hwas/common/deconfigGard.C index a5b30be2c..cf24cab37 100644 --- a/src/usr/hwas/common/deconfigGard.C +++ b/src/usr/hwas/common/deconfigGard.C @@ -122,7 +122,61 @@ DeconfigGard::~DeconfigGard() HWAS_MUTEX_DESTROY(iv_mutex); free(iv_platDeconfigGard); } +//****************************************************************************** +errlHndl_t DeconfigGard::applyGardRecord(Target *i_pTarget, + GardRecord &i_gardRecord, + const DeconfigureFlags i_deconfigRule) +{ + HWAS_INF("Apply gard record for a target"); + errlHndl_t l_pErr = NULL; + do + { + // skip if not present + if (!i_pTarget->getAttr<ATTR_HWAS_STATE>().present) + { + HWAS_INF("skipping %.8X - target not present", + get_huid(i_pTarget)); + l_pErr = platLogEvent(i_pTarget, GARD_NOT_APPLIED); + if (l_pErr) + { + HWAS_ERR("platLogEvent returned an error"); + } + break; + } + + // special case - use errlogEid UNLESS it's a Manual Gard + uint32_t l_errlogEid = + (i_gardRecord.iv_errorType == GARD_User_Manual) ? + DECONFIGURED_BY_MANUAL_GARD : i_gardRecord.iv_errlogEid; + + // all ok - do the work + HWAS_MUTEX_LOCK(iv_mutex); + + // Deconfigure the Target + // don't need to check ATTR_DECONFIG_GARDABLE -- if we get + // here, it's because of a gard record on this target + _deconfigureTarget(*i_pTarget, l_errlogEid,NULL,i_deconfigRule); + + // Deconfigure other Targets by association + _deconfigureByAssoc(*i_pTarget, l_errlogEid,i_deconfigRule); + HWAS_MUTEX_UNLOCK(iv_mutex); + + if(i_deconfigRule == SPEC_DECONFIG) + { + break; + } + + l_pErr = platLogEvent(i_pTarget, GARD_APPLIED); + if (l_pErr) + { + HWAS_ERR("platLogEvent returned an error"); + break; + } + } + while(0); + return l_pErr; +}//applyGardRecord //****************************************************************************** errlHndl_t DeconfigGard::clearGardRecordsForReplacedTargets() { @@ -245,13 +299,22 @@ errlHndl_t DeconfigGard::deconfigureTargetsFromGardRecordsForIpl( HWAS_DBG("%d GARD Records found", l_gardRecords.size()); std::vector<uint32_t> errlLogEidList; - // For each GARD Record + //First apply all Unrecoverable or Fatal gard records and + //check whether system is bootable, if not bootable + //exit from this funtion or try to apply remaining records for (GardRecordsCItr_t l_itr = l_gardRecords.begin(); l_itr != l_gardRecords.end(); ++l_itr) { GardRecord l_gardRecord = *l_itr; + //Continue only with FATAL/UNRECOVERABLE gard errors. + if((l_gardRecord.iv_errorType != GARD_Fatal)&& + (l_gardRecord.iv_errorType != GARD_Unrecoverable)) + { + //Skip recoverable gard records + continue; + } // Find the associated Target Target * l_pTarget = targetService().toTarget(l_gardRecord.iv_targetId); @@ -280,7 +343,78 @@ errlHndl_t DeconfigGard::deconfigureTargetsFromGardRecordsForIpl( } continue; } + l_pErr = applyGardRecord(l_pTarget, l_gardRecord); + if (l_pErr) + { + HWAS_ERR("applyGardRecord returned an error"); + break; + } + uint32_t l_errlogEid = l_gardRecord.iv_errlogEid; + //If the errlogEid is already in the errLogEidList, then + //don't need to log it again as a single error log can + //create multiple guard records and we only need to repost + //it once. + std::vector<uint32_t>::iterator low = + std::lower_bound(errlLogEidList.begin(), + errlLogEidList.end(), l_errlogEid); + if((low == errlLogEidList.end()) || ((*low) != l_errlogEid)) + { + errlLogEidList.insert(low, l_errlogEid); + l_pErr = platReLogGardError(l_gardRecord); + if (l_pErr) + { + HWAS_ERR("platReLogGardError returned an error"); + break; + } + } + + } // for + if (l_pErr) + { + break; + } + + bool l_isSystemBootable = false; + l_pErr = checkMinimumHardware(NULL,&l_isSystemBootable); + if (l_pErr) + { + HWAS_ERR("checkMinimumHardware returned an error"); + break; + } + + if(!l_isSystemBootable) + { + //Break here system is not bootable after applying + //non recoverable gard records. + HWAS_ERR("System is not bootable after applying gard record"); + break; + } + + //Now loop through all gard records and apply recoverable + //gard records(non Fatal and non Unrecoverable) check + //whether system can be booted after applying each gard record + //if system cant be booted after applying gard record that need + //to be rolled back and try with next one. + for (GardRecordsCItr_t l_itr = l_gardRecords.begin(); + l_itr != l_gardRecords.end(); + ++l_itr) + { + GardRecord l_gardRecord = *l_itr; + // Find the associated Target + Target * l_pTarget = + targetService().toTarget(l_gardRecord.iv_targetId); + + if (l_pTarget == NULL) + { + // could be a platform specific target for the other + // ie, we are hostboot and this is an FSP target, or vice-versa + // Binary trace the iv_targetId (EntityPath) + HWAS_INF_BIN("Could not find Target for:", + &(l_gardRecord.iv_targetId), + sizeof(l_gardRecord.iv_targetId)); + continue; + } if ((l_sys_policy & CDM_POLICIES_PREDICTIVE_DISABLED) && (l_gardRecord.iv_errorType == GARD_Predictive)) { @@ -295,13 +429,20 @@ errlHndl_t DeconfigGard::deconfigureTargetsFromGardRecordsForIpl( } continue; } + //Continue only with recoverable gard errors. + if((l_gardRecord.iv_errorType == GARD_Fatal)|| + (l_gardRecord.iv_errorType == GARD_Unrecoverable)) + { + //Skip non-recoverable gard records + continue; + } - // skip if not present - if (!l_pTarget->getAttr<ATTR_HWAS_STATE>().present) + // if this does NOT match, continue to next in loop + if (i_pPredicate && ((*i_pPredicate)(l_pTarget) == false)) { - HWAS_INF("skipping %.8X - target not present", + HWAS_INF("skipping %.8X - predicate didn't match", get_huid(l_pTarget)); - l_pErr = platLogEvent(l_pTarget, GARD_NOT_APPLIED); + l_pErr = platLogEvent(l_pTarget, PREDICATE); if (l_pErr) { HWAS_ERR("platLogEvent returned an error"); @@ -309,32 +450,117 @@ errlHndl_t DeconfigGard::deconfigureTargetsFromGardRecordsForIpl( } continue; } + l_pErr = applyGardRecord(l_pTarget,l_gardRecord,SPEC_DECONFIG); + if (l_pErr) + { + HWAS_ERR("applyGardRecord returned an error"); + break; + } - // special case - use errlogEid UNLESS it's a Manual Gard - const uint32_t l_errlogEid = - (l_gardRecord.iv_errorType == GARD_User_Manual) ? - DECONFIGURED_BY_MANUAL_GARD : l_gardRecord.iv_errlogEid; - - // all ok - do the work - HWAS_MUTEX_LOCK(iv_mutex); + l_pErr = checkMinimumHardware(NULL,&l_isSystemBootable); + if (l_pErr) + { + HWAS_ERR("checkMinimumHardware returned an error"); + break; + } - // Deconfigure the Target - // don't need to check ATTR_DECONFIG_GARDABLE -- if we get - // here, it's because of a gard record on this target - _deconfigureTarget(*l_pTarget, l_errlogEid); + if(!l_isSystemBootable) + { + HWAS_INF("System cannot ipl, rolling back the gard Target 0x%08x", + get_huid(l_pTarget)); + HwasState l_state = l_pTarget->getAttr<ATTR_HWAS_STATE>(); + l_state.deconfiguredByEid = CONFIGURED_BY_RESOURCE_RECOVERY; + l_state.specdeconfig = 0; + l_pTarget->setAttr<ATTR_HWAS_STATE>(l_state); + + //Now go through all other targets which are speculatively + //deconfigured and roll back gard on that too. + PredicateHwas predSpecDeconfig; + predSpecDeconfig.specdeconfig(true); + TargetHandleList l_specDeconfgList; + targetService().getAssociated(l_specDeconfgList, pSys, + TargetService::CHILD, TargetService::ALL, + &predSpecDeconfig); + + for (TargetHandleList::const_iterator + l_sdIter = l_specDeconfgList.begin(); + l_sdIter != l_specDeconfgList.end(); + ++l_sdIter) + { + l_state = (*l_sdIter)->getAttr<ATTR_HWAS_STATE>(); + l_state.deconfiguredByEid = 0; + l_state.specdeconfig = 0; + (*l_sdIter)->setAttr<ATTR_HWAS_STATE>(l_state); + } - // Deconfigure other Targets by association - _deconfigureByAssoc(*l_pTarget, l_errlogEid); + /*@ + * @errortype + * @severity ERRL_SEV_INFORMATIONAL + * @moduleid MOD_DECONFIG_TARGETS_FROM_GARD + * @reasoncode RC_RESOURCE_RECOVERED + * @devdesc A gard record was not applied due to a + * lack of resources. + * @custdesc A previously discovered hardware issue is + * being ignored to allow the system to boot. + * @userdata1[00:31] HUID the resource + * @userdata2[00:31] EID from the gard record. + */ + const uint64_t userdata1 = + (static_cast<uint64_t>(get_huid(l_pTarget)) << 32); + const uint64_t userdata2 = + (static_cast<uint64_t>(l_gardRecord.iv_errlogEid) << 32); + + l_pErr = hwasError(ERRL_SEV_INFORMATIONAL, + MOD_DECONFIG_TARGETS_FROM_GARD, + RC_RESOURCE_RECOVERED, + userdata1, + userdata2); + errlCommit(l_pErr, HWAS_COMP_ID); + l_pErr = platLogEvent(l_pTarget, RESOURCE_RECOVERED); + if (l_pErr) + { + HWAS_ERR("platLogEvent returned an error"); + break; + } - HWAS_MUTEX_UNLOCK(iv_mutex); + //Mark parent node as resource recovered + PredicateCTM predNode(CLASS_ENC, TYPE_NODE); + PredicateHwas predFunctional; + predFunctional.functional(true); + PredicatePostfixExpr nodeCheckExpr; + nodeCheckExpr.push(&predNode).push(&predFunctional).And(); + + TargetHandleList pNodeList; + targetService().getAssociated(pNodeList, l_pTarget, + TargetService::PARENT, TargetService::ALL, + &nodeCheckExpr); + if(!pNodeList.empty()) + { + HwasState l_state = + pNodeList[0]->getAttr<ATTR_HWAS_STATE>(); + l_state.deconfiguredByEid = + CONFIGURED_BY_RESOURCE_RECOVERY; + pNodeList[0]->setAttr<ATTR_HWAS_STATE>(l_state); + } + continue; + } + //The system can be booted even after gardingthis resource + //Apply the gard record. + l_pErr = applyGardRecord(l_pTarget, l_gardRecord); + if (l_pErr) + { + HWAS_ERR("applyGardRecord returned an error"); + break; + } + uint32_t l_errlogEid = l_gardRecord.iv_errlogEid; //If the errlogEid is already in the errLogEidList, then //don't need to log it again as a single error log can //create multiple guard records and we only need to repost //it once. std::vector<uint32_t>::iterator low = - std::lower_bound(errlLogEidList.begin(), - errlLogEidList.end(), l_errlogEid); + std::lower_bound(errlLogEidList.begin(), + errlLogEidList.end(), l_errlogEid); if((low == errlLogEidList.end()) || ((*low) != l_errlogEid)) { errlLogEidList.insert(low, l_errlogEid); @@ -345,16 +571,8 @@ errlHndl_t DeconfigGard::deconfigureTargetsFromGardRecordsForIpl( break; } } - - l_pErr = platLogEvent(l_pTarget, GARD_APPLIED); - if (l_pErr) - { - HWAS_ERR("platLogEvent returned an error"); - break; - } } // for - - if (l_pErr) + if(l_pErr) { break; } @@ -523,7 +741,7 @@ errlHndl_t DeconfigGard::deconfigureTarget( Target & i_target, const uint32_t i_errlEid, bool *o_targetDeconfigured, - const DeconfigureRuntime i_runTimeDeconfigRule) + const DeconfigureFlags i_deconfigRule) { HWAS_DBG("Deconfigure Target"); errlHndl_t l_pErr = NULL; @@ -532,7 +750,7 @@ errlHndl_t DeconfigGard::deconfigureTarget( { // Do not deconfig Target if we're NOT being asked to force AND // the is System is at runtime - if ((i_runTimeDeconfigRule == NOT_AT_RUNTIME) && + if ((i_deconfigRule == NOT_AT_RUNTIME) && platSystemIsAtRuntime()) { HWAS_INF("Skipping deconfigureTarget: at Runtime; target %.8X", @@ -542,7 +760,7 @@ errlHndl_t DeconfigGard::deconfigureTarget( // just to make sure that we haven't missed anything in development // AT RUNTIME: we should only be called to deconfigure these types. - if (i_runTimeDeconfigRule != NOT_AT_RUNTIME) + if (i_deconfigRule != NOT_AT_RUNTIME) { TYPE target_type = i_target.getAttr<ATTR_TYPE>(); // TODO RTC 88471: use attribute vs hardcoded list. @@ -596,10 +814,10 @@ errlHndl_t DeconfigGard::deconfigureTarget( // Deconfigure the Target _deconfigureTarget(i_target, i_errlEid, o_targetDeconfigured, - i_runTimeDeconfigRule); + i_deconfigRule); // Deconfigure other Targets by association - _deconfigureByAssoc(i_target, i_errlEid, i_runTimeDeconfigRule); + _deconfigureByAssoc(i_target, i_errlEid, i_deconfigRule); HWAS_MUTEX_UNLOCK(iv_mutex); } @@ -995,14 +1213,19 @@ errlHndl_t DeconfigGard::_invokeDeconfigureAssocProc( void DeconfigGard::_deconfigureByAssoc( Target & i_target, const uint32_t i_errlEid, - const DeconfigureRuntime i_runTimeDeconfigRule) + const DeconfigureFlags i_deconfigRule) { - HWAS_INF("_deconfigureByAssoc for %.8X (i_runTimeDeconfigRule %d)", - get_huid(&i_target), i_runTimeDeconfigRule); + HWAS_INF("_deconfigureByAssoc for %.8X (i_deconfigRule %d)", + get_huid(&i_target), i_deconfigRule); // some common variables used below TargetHandleList pChildList; - PredicateIsFunctional isFunctional; + PredicateHwas isFunctional; + isFunctional.functional(true); + if(i_deconfigRule == SPEC_DECONFIG) + { + isFunctional.specdeconfig(false); + } // note - ATTR_DECONFIG_GARDABLE is NOT checked for all 'by association' // deconfigures, as that attribute is only for direct deconfigure requests. @@ -1018,12 +1241,13 @@ void DeconfigGard::_deconfigureByAssoc( HWAS_INF("_deconfigureByAssoc CHILD: %.8X", get_huid(pChild)); _deconfigureTarget(*pChild, i_errlEid, NULL, - i_runTimeDeconfigRule); + i_deconfigRule); // Deconfigure other Targets by association - _deconfigureByAssoc(*pChild, i_errlEid, i_runTimeDeconfigRule); + _deconfigureByAssoc(*pChild, i_errlEid, i_deconfigRule); } // for CHILD - if (i_runTimeDeconfigRule == NOT_AT_RUNTIME) + if ((i_deconfigRule == NOT_AT_RUNTIME)|| + (i_deconfigRule == SPEC_DECONFIG)) { // if the rule is NOT_AT_RUNTIME and we got here, then we are // not at runtime. @@ -1045,9 +1269,9 @@ void DeconfigGard::_deconfigureByAssoc( HWAS_INF("_deconfigureByAssoc CHILD_BY_AFFINITY: %.8X", get_huid(pChild)); _deconfigureTarget(*pChild, i_errlEid, NULL, - i_runTimeDeconfigRule); + i_deconfigRule); // Deconfigure other Targets by association - _deconfigureByAssoc(*pChild, i_errlEid, i_runTimeDeconfigRule); + _deconfigureByAssoc(*pChild, i_errlEid, i_deconfigRule); } // for CHILD_BY_AFFINITY // Handles bus endpoint (TYPE_XBUS, TYPE_ABUS, TYPE_PSI) and @@ -1072,9 +1296,9 @@ void DeconfigGard::_deconfigureByAssoc( HWAS_INF("_deconfigureByAssoc MEMBUF parent MCS: %.8X", get_huid(l_parentMcs)); _deconfigureTarget(const_cast<Target &> (*l_parentMcs), - i_errlEid, NULL, i_runTimeDeconfigRule); + i_errlEid, NULL, i_deconfigRule); _deconfigureByAssoc(const_cast<Target &> (*l_parentMcs), - i_errlEid, i_runTimeDeconfigRule); + i_errlEid, i_deconfigRule); } Target *pSys; @@ -1091,16 +1315,16 @@ void DeconfigGard::_deconfigureByAssoc( // find paired MCS / MEMBUF (Centaur) const Target *l_partnerMcs = findPartnerForMcs(l_parentMcs); - // If partner MCS is functional (NULL otherwise) - if (l_partnerMcs) + // If partner MCS is functional or not spec deconfigured + if ((l_partnerMcs)&&(isFunctional(l_partnerMcs))) { // deconfigure the paired MCS HWAS_INF("_deconfigureByAssoc MCS (& MEMBUF) paired: %.8X", get_huid(l_partnerMcs)); _deconfigureTarget(const_cast<Target &> (*l_partnerMcs), - i_errlEid, NULL,i_runTimeDeconfigRule); + i_errlEid, NULL,i_deconfigRule); _deconfigureByAssoc(const_cast<Target &> (*l_partnerMcs), - i_errlEid,i_runTimeDeconfigRule); + i_errlEid,i_deconfigRule); } break; } // TYPE_MEMBUF @@ -1112,10 +1336,14 @@ void DeconfigGard::_deconfigureByAssoc( // get children DIMM that are functional TargetHandleList pDimmList; - getChildAffinityTargetsByState(pDimmList,l_parentMembuf, - CLASS_LOGICAL_CARD, - TYPE_DIMM, - UTIL_FILTER_FUNCTIONAL); + PredicateCTM predDimm(CLASS_LOGICAL_CARD, TYPE_DIMM); + PredicatePostfixExpr funcDimms; + funcDimms.push(&predDimm).push(&isFunctional).And(); + targetService().getAssociated(pDimmList, + l_parentMembuf, + TargetService::CHILD_BY_AFFINITY, + TargetService::ALL, + &funcDimms); // if parent MEMBUF (Centaur) has no functional memory if (pDimmList.empty()) @@ -1124,9 +1352,9 @@ void DeconfigGard::_deconfigureByAssoc( HWAS_INF("_deconfigureByAssoc MEMBUF parent with no memory: %.8X", get_huid(l_parentMembuf)); _deconfigureTarget(const_cast<Target &> (*l_parentMembuf), - i_errlEid, NULL, i_runTimeDeconfigRule); + i_errlEid, NULL, i_deconfigRule); _deconfigureByAssoc(const_cast<Target &> (*l_parentMembuf), - i_errlEid, i_runTimeDeconfigRule); + i_errlEid, i_deconfigRule); // and we're done, so break; break; @@ -1147,8 +1375,15 @@ void DeconfigGard::_deconfigureByAssoc( // find parent MCS TargetHandleList pParentMcsList; - getParentAffinityTargetsByState(pParentMcsList, l_parentMembuf, - CLASS_UNIT, TYPE_MCS, UTIL_FILTER_FUNCTIONAL); + PredicateCTM predMcs(CLASS_UNIT, TYPE_MCS); + PredicatePostfixExpr funcMcs; + funcMcs.push(&predMcs).push(&isFunctional).And(); + targetService().getAssociated(pParentMcsList, + l_parentMembuf, + TargetService::PARENT_BY_AFFINITY, + TargetService::ALL, + &funcMcs); + HWAS_ASSERT((pParentMcsList.size() <= 1), "HWAS _deconfigureByAssoc: pParentMcsList > 1"); @@ -1165,8 +1400,8 @@ void DeconfigGard::_deconfigureByAssoc( const Target *l_partnerMcs = findPartnerForMcs(l_parentMcs); // If partner MCS is non-functional - // (findPartnerForMcs returned NULL) - if (!l_partnerMcs) + // (findPartnerForMcs returned NULL) or speculated deconfig + if ((!l_partnerMcs)&&(!isFunctional(l_partnerMcs))) { // We're done. break; @@ -1174,10 +1409,15 @@ void DeconfigGard::_deconfigureByAssoc( // Obtain MBA targets related to paired MCS TargetHandleList pMbaList; - getChildAffinityTargetsByState(pMbaList,l_partnerMcs, - CLASS_UNIT, - TYPE_MBA, - UTIL_FILTER_FUNCTIONAL); + PredicateCTM predMba(CLASS_UNIT, TYPE_MBA); + PredicatePostfixExpr funcMba; + funcMba.push(&predMba).push(&isFunctional).And(); + targetService().getAssociated(pMbaList, + l_partnerMcs, + TargetService::CHILD_BY_AFFINITY, + TargetService::ALL, + &funcMba); + // Declare list to hold any MBA targets we need to deconfigure // as we look for matches. This list will be used to run @@ -1229,7 +1469,7 @@ void DeconfigGard::_deconfigureByAssoc( HWAS_INF("_deconfigureByAssoc MBA matched: %.8X", get_huid(pMba)); _deconfigureTarget(*pMba, i_errlEid, - NULL, i_runTimeDeconfigRule); + NULL, i_deconfigRule); l_deconfigList.push_back(pMba); break; // only need to do 1 MBA - we're done. } @@ -1259,7 +1499,7 @@ void DeconfigGard::_deconfigureByAssoc( HWAS_INF("_deconfigureByAssoc MBA matched: %.8X", get_huid(pMba)); _deconfigureTarget(*pMba, i_errlEid, - NULL, i_runTimeDeconfigRule); + NULL, i_deconfigRule); l_deconfigList.push_back(pMba); break; // only need to do 1 MBA - we're done. } @@ -1279,7 +1519,7 @@ void DeconfigGard::_deconfigureByAssoc( TargetHandle_t pMba = *pMba_it; HWAS_INF("_deconfigureByAssoc MBA matched (bA): %.8X", get_huid(pMba)); - _deconfigureByAssoc(*pMba, i_errlEid,i_runTimeDeconfigRule); + _deconfigureByAssoc(*pMba, i_errlEid,i_deconfigRule); } // for break; } // TYPE_MBA @@ -1288,8 +1528,15 @@ void DeconfigGard::_deconfigureByAssoc( { // get deconfigure parent MBA TargetHandleList pParentMbaList; - getParentAffinityTargets(pParentMbaList, &i_target, - CLASS_UNIT, TYPE_MBA, true /*functional*/); + PredicateCTM predMba(CLASS_UNIT, TYPE_MBA); + PredicatePostfixExpr funcMba; + funcMba.push(&predMba).push(&isFunctional).And(); + targetService().getAssociated(pParentMbaList, + &i_target, + TargetService::PARENT_BY_AFFINITY, + TargetService::ALL, + &funcMba); + HWAS_ASSERT((pParentMbaList.size() <= 1), "HWAS _deconfigureByAssoc: pParentMbaList > 1"); @@ -1300,9 +1547,9 @@ void DeconfigGard::_deconfigureByAssoc( HWAS_INF("_deconfigureByAssoc DIMM parent MBA: %.8X", get_huid(l_parentMba)); _deconfigureTarget(const_cast<Target &> (*l_parentMba), - i_errlEid, NULL, i_runTimeDeconfigRule); + i_errlEid, NULL, i_deconfigRule); _deconfigureByAssoc(const_cast<Target &> (*l_parentMba), - i_errlEid, i_runTimeDeconfigRule); + i_errlEid, i_deconfigRule); } break; } // TYPE_DIMM @@ -1323,7 +1570,7 @@ void DeconfigGard::_deconfigureByAssoc( get_huid(l_pDstTarget)); _deconfigureTarget(const_cast<Target &> (*l_pDstTarget), i_errlEid, NULL, - i_runTimeDeconfigRule); + i_deconfigRule); } break; } // TYPE_XBUS, TYPE_ABUS @@ -1336,9 +1583,9 @@ void DeconfigGard::_deconfigureByAssoc( get_huid(l_pParentProc)); _deconfigureTarget(const_cast<Target &> (*l_pParentProc), i_errlEid, NULL, - i_runTimeDeconfigRule); + i_deconfigRule); _deconfigureByAssoc(const_cast<Target &> (*l_pParentProc), - i_errlEid, i_runTimeDeconfigRule); + i_errlEid, i_deconfigRule); break; } // TYPE_PORE default: @@ -1355,7 +1602,7 @@ void DeconfigGard::_deconfigureTarget( Target & i_target, const uint32_t i_errlEid, bool *o_targetDeconfigured, - const DeconfigureRuntime i_runTimeDeconfigRule) + const DeconfigureFlags i_deconfigRule) { HWAS_INF("Deconfiguring Target %.8X, errlEid 0x%X", get_huid(&i_target), i_errlEid); @@ -1366,7 +1613,7 @@ void DeconfigGard::_deconfigureTarget( HwasState l_state = i_target.getAttr<ATTR_HWAS_STATE>(); // if the rule is DUMP_AT_RUNTIME and we got here, then we are at runtime. - if (i_runTimeDeconfigRule == DUMP_AT_RUNTIME) + if (i_deconfigRule == DUMP_AT_RUNTIME) { l_state.dumpfunctional = 1; } @@ -1381,7 +1628,7 @@ void DeconfigGard::_deconfigureTarget( "Target HWAS_STATE already has functional=0; deconfiguredByEid=0x%X", l_state.deconfiguredByEid); - if (i_runTimeDeconfigRule != NOT_AT_RUNTIME) + if (i_deconfigRule != NOT_AT_RUNTIME) { // if FULLY_AT_RUNTIME or DUMP_AT_RUNTIME, then the dumpfunctional // state changed, so do the setAttr @@ -1390,42 +1637,52 @@ void DeconfigGard::_deconfigureTarget( } else { - HWAS_INF( - "Setting Target HWAS_STATE: functional=0, deconfiguredByEid=0x%X", - i_errlEid); - l_state.functional = 0; - - l_state.deconfiguredByEid = i_errlEid; - i_target.setAttr<ATTR_HWAS_STATE>(l_state); - if (o_targetDeconfigured) + if(i_deconfigRule == SPEC_DECONFIG) { - *o_targetDeconfigured = true; + HWAS_INF("Setting speculative deconfig"); + l_state.specdeconfig = 1; + l_state.deconfiguredByEid = i_errlEid; + i_target.setAttr<ATTR_HWAS_STATE>(l_state); } - - // if this is a real error, trigger a reconfigure loop - if (i_errlEid & DECONFIGURED_BY_PLID_MASK) + else { - // Set RECONFIGURE_LOOP attribute to indicate it was caused by - // a hw deconfigure - TARGETING::Target* l_pTopLevel = NULL; - TARGETING::targetService().getTopLevelTarget(l_pTopLevel); - TARGETING::ATTR_RECONFIGURE_LOOP_type l_reconfigAttr = - l_pTopLevel->getAttr<ATTR_RECONFIGURE_LOOP>(); - // 'OR' values in case of multiple reasons for reconfigure - l_reconfigAttr |= TARGETING::RECONFIGURE_LOOP_DECONFIGURE; - l_pTopLevel->setAttr<ATTR_RECONFIGURE_LOOP>(l_reconfigAttr); - } + HWAS_INF( + "Setting Target HWAS_STATE: functional=0, deconfiguredByEid=0x%X", + i_errlEid); + l_state.functional = 0; + l_state.specdeconfig = 0; - // Do any necessary Deconfigure Actions - _doDeconfigureActions(i_target); - } + l_state.deconfiguredByEid = i_errlEid; + i_target.setAttr<ATTR_HWAS_STATE>(l_state); + if (o_targetDeconfigured) + { + *o_targetDeconfigured = true; + } - // If target being deconfigured is an x/a bus endpoint - if ((TYPE_XBUS == i_target.getAttr<ATTR_TYPE>()) || - (TYPE_ABUS == i_target.getAttr<ATTR_TYPE>())) - { - // Set flag indicating x/a bus endpoint deconfiguration - iv_XABusEndpointDeconfigured = true; + // if this is a real error, trigger a reconfigure loop + if (i_errlEid & DECONFIGURED_BY_PLID_MASK) + { + // Set RECONFIGURE_LOOP attribute to indicate it was caused by + // a hw deconfigure + TARGETING::Target* l_pTopLevel = NULL; + TARGETING::targetService().getTopLevelTarget(l_pTopLevel); + TARGETING::ATTR_RECONFIGURE_LOOP_type l_reconfigAttr = + l_pTopLevel->getAttr<ATTR_RECONFIGURE_LOOP>(); + // 'OR' values in case of multiple reasons for reconfigure + l_reconfigAttr |= TARGETING::RECONFIGURE_LOOP_DECONFIGURE; + l_pTopLevel->setAttr<ATTR_RECONFIGURE_LOOP>(l_reconfigAttr); + } + + // Do any necessary Deconfigure Actions + _doDeconfigureActions(i_target); + // If target being deconfigured is an x/a bus endpoint + if ((TYPE_XBUS == i_target.getAttr<ATTR_TYPE>()) || + (TYPE_ABUS == i_target.getAttr<ATTR_TYPE>())) + { + // Set flag indicating x/a bus endpoint deconfiguration + iv_XABusEndpointDeconfigured = true; + } + } } //HWAS_DBG("Deconfiguring Target %.8X exiting", get_huid(&i_target)); diff --git a/src/usr/hwas/common/hwas.C b/src/usr/hwas/common/hwas.C index c96abaf13..4c6790d10 100644 --- a/src/usr/hwas/common/hwas.C +++ b/src/usr/hwas/common/hwas.C @@ -667,7 +667,8 @@ errlHndl_t restrictEXunits( return errl; } // restrictEXunits -errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) +errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node, + bool *o_bootable) { errlHndl_t l_errl = NULL; HWAS_INF("checkMinimumHardware entry"); @@ -679,9 +680,18 @@ errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) // Common present and functional hardware checks. //*********************************************************************/ + if(o_bootable) + { + *o_bootable = true; + } PredicateHwas l_present; l_present.present(true); - PredicateIsFunctional l_functional; + PredicateHwas l_functional; + if(o_bootable) + { + l_functional.specdeconfig(false); + } + l_functional.functional(true); // top 'starting' point - use first node if no i_node given (hostboot) Target *pTop; @@ -698,6 +708,12 @@ errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) if (l_nodes.empty()) { // no functional nodes, get out now + if(o_bootable) + { + *o_bootable = false; + break; + } + HWAS_ERR("Insufficient HW to continue IPL: (no func nodes)"); /*@ * @errortype @@ -748,6 +764,11 @@ errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) { HWAS_ERR("Insufficient HW to continue IPL: (no master proc)"); + if(o_bootable) + { + *o_bootable = false; + break; + } // determine some numbers to help figure out what's up.. PredicateCTM l_proc(CLASS_CHIP, TYPE_PROC); TargetHandleList l_plist; @@ -805,7 +826,13 @@ errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) // we have a Master Proc and it's functional // check for at least 1 functional ex/core on Master Proc TargetHandleList l_cores; - getChildChiplets(l_cores, l_pMasterProc, TYPE_EX, true); + PredicateCTM l_core(CLASS_UNIT, TYPE_EX); + PredicatePostfixExpr l_coresFunctional; + l_coresFunctional.push(&l_core).push(&l_functional).And(); + targetService().getAssociated(l_cores, l_pMasterProc, + TargetService::CHILD, TargetService::ALL, + &l_coresFunctional); + HWAS_DBG( "checkMinimumHardware: %d functional cores", l_cores.size() ); @@ -813,6 +840,11 @@ errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) { HWAS_ERR("Insufficient HW to continue IPL: (no func cores)"); + if(o_bootable) + { + *o_bootable = false; + break; + } // determine some numbers to help figure out what's up.. PredicateCTM l_ex(CLASS_UNIT, TYPE_EX); TargetHandleList l_plist; @@ -875,7 +907,12 @@ errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) if (l_dimms.empty()) { HWAS_ERR( "Insufficient hardware to continue IPL (func DIMM)"); - + + if(o_bootable) + { + *o_bootable = false; + break; + } // determine some numbers to help figure out what's up.. TargetHandleList l_plist; PredicatePostfixExpr l_checkExprPresent; @@ -934,7 +971,11 @@ errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) if (l_funcMembufTargetList.empty()) { HWAS_ERR( "Insufficient hardware to continue IPL (func membufs)"); - + if(o_bootable) + { + *o_bootable = false; + break; + } TargetHandleList l_presentMembufTargetList; PredicatePostfixExpr l_checkExprPresentMembufs; l_checkExprPresentMembufs.push(&l_membuf).push(&l_present).And(); @@ -988,14 +1029,14 @@ errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) // running on (ie, hostboot or fsp in hwsv). // if there is an issue, create and commit an error, and tie it to the // the rest of them with the common plid. - platCheckMinimumHardware(l_commonPlid, i_node); + platCheckMinimumHardware(l_commonPlid, i_node,o_bootable); } while (0); // --------------------------------------------------------------- // if the common plid got set anywhere above, we have an error. // --------------------------------------------------------------- - if (l_commonPlid) + if ((l_commonPlid)&&(o_bootable == NULL)) { /*@ @@ -1018,7 +1059,8 @@ errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_node) } HWAS_INF("checkMinimumHardware exit - minimum hardware %s", - (l_errl == NULL) ? "available" : "NOT available"); + ((l_errl != NULL)||((o_bootable!=NULL)&&(!*o_bootable))) ? + "NOT available" : "available"); return l_errl ; } // checkMinimumHardware diff --git a/src/usr/hwas/hwasPlat.C b/src/usr/hwas/hwasPlat.C index 57fabdee6..71ed3bbad 100644 --- a/src/usr/hwas/hwasPlat.C +++ b/src/usr/hwas/hwasPlat.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2014 */ +/* Contributors Listed Below - COPYRIGHT 2012,2015 */ /* [+] Google Inc. */ /* [+] International Business Machines Corp. */ /* */ @@ -503,7 +503,8 @@ void markTargetChanged(TARGETING::TargetHandle_t i_target) // platCheckMinimumHardware() //****************************************************************************** void platCheckMinimumHardware(uint32_t & io_plid, - const TARGETING::ConstTargetHandle_t i_node) + const TARGETING::ConstTargetHandle_t i_node, + bool *o_bootable) { //errlHndl_t l_errl = NULL; diff --git a/src/usr/targeting/common/xmltohb/attribute_types.xml b/src/usr/targeting/common/xmltohb/attribute_types.xml index ad0ce83a5..c961ebe6a 100644 --- a/src/usr/targeting/common/xmltohb/attribute_types.xml +++ b/src/usr/targeting/common/xmltohb/attribute_types.xml @@ -930,6 +930,16 @@ <bits>1</bits> <default>0</default> </field> + <field> + <name>specdeconfig</name> + <description>Set for speculative deconfig; + 0b0: target not speculative deconfig; + 0b1: target is speculatively deconfigured; + </description> + <type>uint8_t</type> + <bits>1</bits> + <default>0</default> + </field> </complexType> <persistency>volatile</persistency> <readable/> |