diff options
| author | Brian Horton <brianh@linux.ibm.com> | 2012-05-01 11:29:51 -0500 |
|---|---|---|
| committer | A. Patrick Williams III <iawillia@us.ibm.com> | 2012-05-10 13:20:08 -0500 |
| commit | 3d4b593321870fb646be9d86880a19bcc2d3453e (patch) | |
| tree | 6afcc538195f2831d9e63c9b08722c8706a39393 /src/usr | |
| parent | 59a58400b22274c58e3512e09468e45daebc1436 (diff) | |
| download | blackbird-hostboot-3d4b593321870fb646be9d86880a19bcc2d3453e.tar.gz blackbird-hostboot-3d4b593321870fb646be9d86880a19bcc2d3453e.zip | |
platReadIDEC killing IPL for single failure
Correct HWAS code - if target is detected as present, but the read
of the ID/EC fails, then mark the target as present but NOT
functional, commit the errl, and continue the target processing.
All physical children under that parent target will be marked as
present and NOT functional as well.
Same for the deviceRead for the Presence detect - if it fails,
mark the target as not present, commit the errl, and continue.
Change-Id: I6d911c6e6aa3968395721dc2826bf860589dc737
RTC: 41123
Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/974
Tested-by: Jenkins Server
Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
Diffstat (limited to 'src/usr')
| -rw-r--r-- | src/usr/hwas/hwas.C | 103 | ||||
| -rw-r--r-- | src/usr/hwas/plat/hwasPlat.C | 138 |
2 files changed, 115 insertions, 126 deletions
diff --git a/src/usr/hwas/hwas.C b/src/usr/hwas/hwas.C index ac7ac32c4..2baf602ec 100644 --- a/src/usr/hwas/hwas.C +++ b/src/usr/hwas/hwas.C @@ -33,18 +33,20 @@ /******************************************************************************/ // Includes /******************************************************************************/ -#include <stdint.h> -#include <assert.h> +#include <stdint.h> +#include <assert.h> -#include <targeting/common/commontargeting.H> +#include <initservice/taskargs.H> +#include <targeting/common/commontargeting.H> -#include <hwas/hwas.H> -#include <hwas/hwasCommon.H> +#include <hwas/hwas.H> +#include <hwas/hwasCommon.H> +#include <hwas/hwasError.H> -namespace HWAS +namespace HWAS { -using namespace TARGETING; +using namespace TARGETING; /** @@ -56,12 +58,12 @@ using namespace TARGETING; * @return none * */ -void enableHwasState(Target *i_target) +void enableHwasState(Target *i_target, bool i_functional) { HwasState hwasState = i_target->getAttr<ATTR_HWAS_STATE>(); hwasState.poweredOn = true; hwasState.present = true; - hwasState.functional = true; + hwasState.functional = i_functional; i_target->setAttr<ATTR_HWAS_STATE>( hwasState ); } @@ -101,10 +103,9 @@ errlHndl_t discoverTargets() assert(pSys, "HWAS discoverTargets: no CLASS_SYS TopLevelTarget found"); // mark this as present - enableHwasState(pSys); - HWAS_DBG("pSys %x (%p) %x/%x - marked present", - pSys->getAttr<ATTR_HUID>(), pSys, - pSys->getAttr<ATTR_CLASS>(), pSys->getAttr<ATTR_TYPE>()); + enableHwasState(pSys, true); + HWAS_DBG("pSys %x (%p) - marked present", + pSys->getAttr<ATTR_HUID>(), pSys); // find CLASS_ENC PredicateCTM predEnc(CLASS_ENC); @@ -119,10 +120,9 @@ errlHndl_t discoverTargets() TargetHandle_t pEnc = *pEnc_it; // mark it as present - enableHwasState(pEnc); - HWAS_DBG("pEnc %x (%p) %x/%x - marked present", - pEnc->getAttr<ATTR_HUID>(), pEnc, - pEnc->getAttr<ATTR_CLASS>(), pEnc->getAttr<ATTR_TYPE>()); + enableHwasState(pEnc, true); + HWAS_DBG("pEnc %x (%p) - marked present", + pEnc->getAttr<ATTR_HUID>(), pEnc); } // for pEnc_it // find TYPE_PROC, TYPE_MEMBUF and TYPE_DIMM @@ -148,22 +148,44 @@ errlHndl_t discoverTargets() } // no errors - keep going - // for each, mark them and their descendants as present + // for each, read their ID/EC level. if that works, + // mark them and their descendants as present and functional for (TargetHandleList::iterator pTarget_it = pCheckPres.begin(); pTarget_it != pCheckPres.end(); - ) // increment will be done in the loop below + pTarget_it++ + ) { TargetHandle_t pTarget = *pTarget_it; - // set HWAS state to show it's present - enableHwasState(pTarget); - HWAS_DBG("pTarget %x (%p) %x/%x - detected present", + // read Chip ID/EC data from these physical chips + if (pTarget->getAttr<ATTR_CLASS>() == CLASS_CHIP) + { + errl = platReadIDEC(pTarget); + } + + bool isFunctional; + if (!errl) + { // no error + isFunctional = true; + } + else + { // read of ID/EC failed even tho we were present.. + isFunctional = false; + + // commit the error but keep going + errlCommit(errl, HWAS_COMP_ID); + // errl is now NULL + } + + HWAS_DBG("pTarget %x (%p) - detected present %s functional", pTarget->getAttr<ATTR_HUID>(), pTarget, - pTarget->getAttr<ATTR_CLASS>(), - pTarget->getAttr<ATTR_TYPE>()); + isFunctional ? "and" : "NOT"); + + // set HWAS state to show it's present + enableHwasState(pTarget, isFunctional); // now need to mark all of this target's - // physical descendants as present + // physical descendants as present and NOT functional TargetHandleList pDescList; targetService().getAssociated( pDescList, pTarget, TargetService::CHILD, TargetService::ALL); @@ -172,38 +194,13 @@ errlHndl_t discoverTargets() pDesc_it++) { TargetHandle_t pDesc = *pDesc_it; - enableHwasState(pDesc); - HWAS_DBG("pDesc %x (%p) %x/%x - marked present", + enableHwasState(pDesc, isFunctional); + HWAS_DBG("pDesc %x (%p) - marked present %s functional", pDesc->getAttr<ATTR_HUID>(), pDesc, - pDesc->getAttr<ATTR_CLASS>(), - pDesc->getAttr<ATTR_TYPE>()); - } - - // if we're not a CHIP, remove us from the list, so that - // when we do the Chip ID/EC call after the loop, we have - // a list that is CHIPs only - if (pTarget->getAttr<ATTR_CLASS>() != CLASS_CHIP) - { - // erase this target, and 'increment' to next - pTarget_it = pCheckPres.erase(pTarget_it); - } - else - { - // advance to next entry in the list - pTarget_it++; + isFunctional ? "and" : "NOT"); } } // for pTarget_it - // at this point, pCheckPres only has present CLASS_CHIP targets - // read Chip ID/EC data from these physical chips - HWAS_DBG("pCheckPres size: %d", pCheckPres.size()); - errl = platReadIDEC(pCheckPres); - - if (errl != NULL) - { - break; // break out of the do/while so that we can return - } - } while (0); if (errl != NULL) diff --git a/src/usr/hwas/plat/hwasPlat.C b/src/usr/hwas/plat/hwasPlat.C index 8611b6531..0af9a7ace 100644 --- a/src/usr/hwas/plat/hwasPlat.C +++ b/src/usr/hwas/plat/hwasPlat.C @@ -28,8 +28,10 @@ #include <hwas/hwas.H> #include <hwas/hwasCommon.H> +#include <hwas/hwasError.H> #include <devicefw/driverif.H> +#include <initservice/taskargs.H> // trace setup; used by HWAS_DBG and HWAS_ERR macros trace_desc_t *g_trac_dbg_hwas = NULL; // debug - fast @@ -46,52 +48,39 @@ using namespace TARGETING; //****************************************************************************** // platReadIDEC function //****************************************************************************** -errlHndl_t platReadIDEC(const TargetHandleList &i_targets) +errlHndl_t platReadIDEC(const TargetHandle_t &i_target) { + // we got a target - read the ID/EC + // and update the appropriate ATTR_ field. + uint64_t id_ec; + size_t op_size = sizeof(id_ec); errlHndl_t errl = NULL; - - // we got a list of targets - read the ID/EC for each - // and update the appropriate ATTR_ fields. - for (TargetHandleList::const_iterator pTarget_it = i_targets.begin(); - pTarget_it != i_targets.end(); - pTarget_it++) - { - TargetHandle_t pTarget = *pTarget_it; - - uint64_t id_ec; - size_t op_size = sizeof(id_ec); - errl = DeviceFW::deviceRead(pTarget, &id_ec, - op_size, DEVICE_SCOM_ADDRESS(0x000F000Full)); - - if (errl == NULL) - { // no error, so we got a valid ID/EC value back - // EC - nibbles 0,2 - // 01234567 - uint8_t ec = (((id_ec & 0xF000000000000000ull) >> 56) | - ((id_ec & 0x00F0000000000000ull) >> 52)); - pTarget->setAttr<ATTR_EC>(ec); - - // ID - nibbles 1,5,3,4 - // 01234567 - uint32_t id = (((id_ec & 0x0F00000000000000ull) >> 44) | - ((id_ec & 0x00000F0000000000ull) >> 32) | - ((id_ec & 0x000F000000000000ull) >> 44) | - ((id_ec & 0x0000F00000000000ull) >> 44)); - pTarget->setAttr<ATTR_CHIP_ID>(id); - HWAS_DBG( "pTarget %x (%p) id %x ec %x", - pTarget->getAttr<ATTR_HUID>(), pTarget, id, ec); - } - else - { // errl was set - this is an error condition. - HWAS_ERR( "pTarget %x (%p) %x/%x - failed ID/EC read", - pTarget->getAttr<ATTR_HUID>(), pTarget, - pTarget->getAttr<ATTR_CLASS>(), - pTarget->getAttr<ATTR_TYPE>()); - - // break out so that we can return an error - break; - } - } // for pTarget_it + errl = DeviceFW::deviceRead(i_target, &id_ec, + op_size, DEVICE_SCOM_ADDRESS(0x000F000Full)); + + if (errl == NULL) + { // no error, so we got a valid ID/EC value back + // EC - nibbles 0,2 + // 01234567 + uint8_t ec = (((id_ec & 0xF000000000000000ull) >> 56) | + ((id_ec & 0x00F0000000000000ull) >> 52)); + i_target->setAttr<ATTR_EC>(ec); + + // ID - nibbles 1,5,3,4 + // 01234567 + uint32_t id = (((id_ec & 0x0F00000000000000ull) >> 44) | + ((id_ec & 0x00000F0000000000ull) >> 32) | + ((id_ec & 0x000F000000000000ull) >> 44) | + ((id_ec & 0x0000F00000000000ull) >> 44)); + i_target->setAttr<ATTR_CHIP_ID>(id); + HWAS_DBG( "i_target %x (%p) - id %x ec %x", + i_target->getAttr<ATTR_HUID>(), i_target, id, ec); + } + else + { // errl was set - this is an error condition. + HWAS_ERR( "i_target %x (%p) - failed ID/EC read", + i_target->getAttr<ATTR_HUID>(), i_target); + } return errl; } // platReadIDEC @@ -117,38 +106,41 @@ errlHndl_t platPresenceDetect(TargetHandleList &io_targets) errl = deviceRead(pTarget, &present, presentSize, DEVICE_PRESENT_ADDRESS()); - if (errl == NULL) - { // no error, so we got a valid present value back - if (present == true) - { - HWAS_DBG( "pTarget %x (%p) %x/%x - detected present", - pTarget->getAttr<ATTR_HUID>(), pTarget, - pTarget->getAttr<ATTR_CLASS>(), - pTarget->getAttr<ATTR_TYPE>()); - - // advance to next entry in the list - pTarget_it++; - } - else - { // chip no present -- remove from list - HWAS_DBG( "pTarget %x (%p) %x/%x - no presence", - pTarget->getAttr<ATTR_HUID>(), pTarget, - pTarget->getAttr<ATTR_CLASS>(), - pTarget->getAttr<ATTR_TYPE>()); - - // erase this target, and 'increment' to next - pTarget_it = io_targets.erase(pTarget_it); - } + if (errl != NULL) + { // errl was set - this is an error condition. + HWAS_ERR( "pTarget %x (%p) - failed presence detect", + pTarget->getAttr<ATTR_HUID>(), pTarget); + + // commit the error but keep going + errlCommit(errl, HWAS_COMP_ID); + // errl is now NULL + + // chip not present -- remove from list + HWAS_DBG( "pTarget %x (%p) - no presence", + pTarget->getAttr<ATTR_HUID>(), pTarget); + + // erase this target, and 'increment' to next + pTarget_it = io_targets.erase(pTarget_it); + + // target is not present - fall thru + present = false; + } + + if (present == true) + { + HWAS_DBG( "pTarget %x (%p) - detected present", + pTarget->getAttr<ATTR_HUID>(), pTarget); + + // advance to next entry in the list + pTarget_it++; } else - { // errl was set - this is an error condition. - HWAS_ERR( "pTarget %x (%p) %x/%x - failed presence detect", - pTarget->getAttr<ATTR_HUID>(), pTarget, - pTarget->getAttr<ATTR_CLASS>(), - pTarget->getAttr<ATTR_TYPE>()); + { // chip not present -- remove from list + HWAS_DBG( "pTarget %x (%p) - no presence", + pTarget->getAttr<ATTR_HUID>(), pTarget); - // break out so that we can return an error - break; + // erase this target, and 'increment' to next + pTarget_it = io_targets.erase(pTarget_it); } } // for pTarget_it |

