summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Horton <brianh@linux.ibm.com>2012-05-01 11:29:51 -0500
committerA. Patrick Williams III <iawillia@us.ibm.com>2012-05-10 13:20:08 -0500
commit3d4b593321870fb646be9d86880a19bcc2d3453e (patch)
tree6afcc538195f2831d9e63c9b08722c8706a39393
parent59a58400b22274c58e3512e09468e45daebc1436 (diff)
downloadblackbird-hostboot-3d4b593321870fb646be9d86880a19bcc2d3453e.tar.gz
blackbird-hostboot-3d4b593321870fb646be9d86880a19bcc2d3453e.zip
platReadIDEC killing IPL for single failure
Correct HWAS code - if target is detected as present, but the read of the ID/EC fails, then mark the target as present but NOT functional, commit the errl, and continue the target processing. All physical children under that parent target will be marked as present and NOT functional as well. Same for the deviceRead for the Presence detect - if it fails, mark the target as not present, commit the errl, and continue. Change-Id: I6d911c6e6aa3968395721dc2826bf860589dc737 RTC: 41123 Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/974 Tested-by: Jenkins Server Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
-rw-r--r--src/include/usr/hwas/hwasCommon.H10
-rw-r--r--src/include/usr/hwas/hwasError.H39
-rw-r--r--src/include/usr/hwas/plat/error.H59
-rw-r--r--src/usr/hwas/hwas.C103
-rw-r--r--src/usr/hwas/plat/hwasPlat.C138
5 files changed, 218 insertions, 131 deletions
diff --git a/src/include/usr/hwas/hwasCommon.H b/src/include/usr/hwas/hwasCommon.H
index f6d5ce566..412fe81ef 100644
--- a/src/include/usr/hwas/hwasCommon.H
+++ b/src/include/usr/hwas/hwasCommon.H
@@ -59,16 +59,16 @@ namespace HWAS
errlHndl_t platPresenceDetect(TARGETING::TargetHandleList &io_targets);
/**
- * @brief platform specific code to determine the ID/EC of the targets
- * in the input list. The platform specific code is responsible for setting
- * the ATTR_EC and ATTR_CHIP_ID in each target.
+ * @brief platform specific code to determine the ID/EC of the input
+ * target. The platform specific code is responsible for setting the
+ * ATTR_EC and ATTR_CHIP_ID in each target.
*
- * @param[in] i_targets TargetHandleList of targets to check for chip ID/EC
+ * @param[in] i_target target to check for chip ID/EC
*
* @return errlHndl_t valid errlHndl_t handle if there was an error
* NULL if no errors;
*/
-errlHndl_t platReadIDEC(const TARGETING::TargetHandleList &i_targets);
+errlHndl_t platReadIDEC(const TARGETING::TargetHandle_t &i_target);
} // namespace HWAS
diff --git a/src/include/usr/hwas/hwasError.H b/src/include/usr/hwas/hwasError.H
new file mode 100644
index 000000000..884575093
--- /dev/null
+++ b/src/include/usr/hwas/hwasError.H
@@ -0,0 +1,39 @@
+// IBM_PROLOG_BEGIN_TAG
+// This is an automatically generated prolog.
+//
+// $Source: src/include/usr/hwas/hwasError.H $
+//
+// IBM CONFIDENTIAL
+//
+// COPYRIGHT International Business Machines Corp. 2012
+//
+// p1
+//
+// Object Code Only (OCO) source materials
+// Licensed Internal Code Source Materials
+// IBM HostBoot Licensed Internal Code
+//
+// The source code for this program is not published or other-
+// wise divested of its trade secrets, irrespective of what has
+// been deposited with the U.S. Copyright Office.
+//
+// Origin: 30
+//
+// IBM_PROLOG_END
+#ifndef __HWAS_ERROR_H
+#define __HWAS_ERROR_H
+
+/**
+* @file hwas/hwasError.H
+*
+* @brief Common header to abstract away error handling differences between
+* platforms
+*/
+
+//******************************************************************************
+// Includes
+//******************************************************************************
+
+#include <hwas/plat/error.H>
+
+#endif // __HWAS_ERROR_H
diff --git a/src/include/usr/hwas/plat/error.H b/src/include/usr/hwas/plat/error.H
new file mode 100644
index 000000000..c578f5efa
--- /dev/null
+++ b/src/include/usr/hwas/plat/error.H
@@ -0,0 +1,59 @@
+// IBM_PROLOG_BEGIN_TAG
+// This is an automatically generated prolog.
+//
+// $Source: src/include/usr/hwas/plat/error.H $
+//
+// IBM CONFIDENTIAL
+//
+// COPYRIGHT International Business Machines Corp. 2012
+//
+// p1
+//
+// Object Code Only (OCO) source materials
+// Licensed Internal Code Source Materials
+// IBM HostBoot Licensed Internal Code
+//
+// The source code for this program is not published or other-
+// wise divested of its trade secrets, irrespective of what has
+// been deposited with the U.S. Copyright Office.
+//
+// Origin: 30
+//
+// IBM_PROLOG_END
+#ifndef __HWAS_PLAT_ERROR_H
+#define __HWAS_PLAT_ERROR_H
+
+/**
+* @file hwas/plat/error.H
+*
+* @brief Adapts platform neutral error log requests to the platform
+*/
+
+//******************************************************************************
+// Includes
+//******************************************************************************
+
+// Include the platform specific errlentry.H file and pick up the platform
+// specific errlHndlt_t typedef. This is enough for common code to use pass
+// errlHndl_t pointers around and check for NULL
+#include <errl/errlentry.H>
+
+namespace HWAS
+{
+
+namespace COMMON
+{
+/**
+* @brief Import the ::ERRORLOG namespace into the ::HWAS:COMMON
+* namespace. This allows all the common hwas code to use the
+* same namespacing yet directly invoke platform specific error log
+* support. Since nothing else is declared in the namespace, there is
+* no danger of a namespace collision.
+*/
+using namespace ::ERRORLOG;
+
+} // End namespace COMMON
+
+} // End namespace HWAS
+
+#endif // __HWAS_PLAT_ERROR_H
diff --git a/src/usr/hwas/hwas.C b/src/usr/hwas/hwas.C
index ac7ac32c4..2baf602ec 100644
--- a/src/usr/hwas/hwas.C
+++ b/src/usr/hwas/hwas.C
@@ -33,18 +33,20 @@
/******************************************************************************/
// Includes
/******************************************************************************/
-#include <stdint.h>
-#include <assert.h>
+#include <stdint.h>
+#include <assert.h>
-#include <targeting/common/commontargeting.H>
+#include <initservice/taskargs.H>
+#include <targeting/common/commontargeting.H>
-#include <hwas/hwas.H>
-#include <hwas/hwasCommon.H>
+#include <hwas/hwas.H>
+#include <hwas/hwasCommon.H>
+#include <hwas/hwasError.H>
-namespace HWAS
+namespace HWAS
{
-using namespace TARGETING;
+using namespace TARGETING;
/**
@@ -56,12 +58,12 @@ using namespace TARGETING;
* @return none
*
*/
-void enableHwasState(Target *i_target)
+void enableHwasState(Target *i_target, bool i_functional)
{
HwasState hwasState = i_target->getAttr<ATTR_HWAS_STATE>();
hwasState.poweredOn = true;
hwasState.present = true;
- hwasState.functional = true;
+ hwasState.functional = i_functional;
i_target->setAttr<ATTR_HWAS_STATE>( hwasState );
}
@@ -101,10 +103,9 @@ errlHndl_t discoverTargets()
assert(pSys, "HWAS discoverTargets: no CLASS_SYS TopLevelTarget found");
// mark this as present
- enableHwasState(pSys);
- HWAS_DBG("pSys %x (%p) %x/%x - marked present",
- pSys->getAttr<ATTR_HUID>(), pSys,
- pSys->getAttr<ATTR_CLASS>(), pSys->getAttr<ATTR_TYPE>());
+ enableHwasState(pSys, true);
+ HWAS_DBG("pSys %x (%p) - marked present",
+ pSys->getAttr<ATTR_HUID>(), pSys);
// find CLASS_ENC
PredicateCTM predEnc(CLASS_ENC);
@@ -119,10 +120,9 @@ errlHndl_t discoverTargets()
TargetHandle_t pEnc = *pEnc_it;
// mark it as present
- enableHwasState(pEnc);
- HWAS_DBG("pEnc %x (%p) %x/%x - marked present",
- pEnc->getAttr<ATTR_HUID>(), pEnc,
- pEnc->getAttr<ATTR_CLASS>(), pEnc->getAttr<ATTR_TYPE>());
+ enableHwasState(pEnc, true);
+ HWAS_DBG("pEnc %x (%p) - marked present",
+ pEnc->getAttr<ATTR_HUID>(), pEnc);
} // for pEnc_it
// find TYPE_PROC, TYPE_MEMBUF and TYPE_DIMM
@@ -148,22 +148,44 @@ errlHndl_t discoverTargets()
}
// no errors - keep going
- // for each, mark them and their descendants as present
+ // for each, read their ID/EC level. if that works,
+ // mark them and their descendants as present and functional
for (TargetHandleList::iterator pTarget_it = pCheckPres.begin();
pTarget_it != pCheckPres.end();
- ) // increment will be done in the loop below
+ pTarget_it++
+ )
{
TargetHandle_t pTarget = *pTarget_it;
- // set HWAS state to show it's present
- enableHwasState(pTarget);
- HWAS_DBG("pTarget %x (%p) %x/%x - detected present",
+ // read Chip ID/EC data from these physical chips
+ if (pTarget->getAttr<ATTR_CLASS>() == CLASS_CHIP)
+ {
+ errl = platReadIDEC(pTarget);
+ }
+
+ bool isFunctional;
+ if (!errl)
+ { // no error
+ isFunctional = true;
+ }
+ else
+ { // read of ID/EC failed even tho we were present..
+ isFunctional = false;
+
+ // commit the error but keep going
+ errlCommit(errl, HWAS_COMP_ID);
+ // errl is now NULL
+ }
+
+ HWAS_DBG("pTarget %x (%p) - detected present %s functional",
pTarget->getAttr<ATTR_HUID>(), pTarget,
- pTarget->getAttr<ATTR_CLASS>(),
- pTarget->getAttr<ATTR_TYPE>());
+ isFunctional ? "and" : "NOT");
+
+ // set HWAS state to show it's present
+ enableHwasState(pTarget, isFunctional);
// now need to mark all of this target's
- // physical descendants as present
+ // physical descendants as present and NOT functional
TargetHandleList pDescList;
targetService().getAssociated( pDescList, pTarget,
TargetService::CHILD, TargetService::ALL);
@@ -172,38 +194,13 @@ errlHndl_t discoverTargets()
pDesc_it++)
{
TargetHandle_t pDesc = *pDesc_it;
- enableHwasState(pDesc);
- HWAS_DBG("pDesc %x (%p) %x/%x - marked present",
+ enableHwasState(pDesc, isFunctional);
+ HWAS_DBG("pDesc %x (%p) - marked present %s functional",
pDesc->getAttr<ATTR_HUID>(), pDesc,
- pDesc->getAttr<ATTR_CLASS>(),
- pDesc->getAttr<ATTR_TYPE>());
- }
-
- // if we're not a CHIP, remove us from the list, so that
- // when we do the Chip ID/EC call after the loop, we have
- // a list that is CHIPs only
- if (pTarget->getAttr<ATTR_CLASS>() != CLASS_CHIP)
- {
- // erase this target, and 'increment' to next
- pTarget_it = pCheckPres.erase(pTarget_it);
- }
- else
- {
- // advance to next entry in the list
- pTarget_it++;
+ isFunctional ? "and" : "NOT");
}
} // for pTarget_it
- // at this point, pCheckPres only has present CLASS_CHIP targets
- // read Chip ID/EC data from these physical chips
- HWAS_DBG("pCheckPres size: %d", pCheckPres.size());
- errl = platReadIDEC(pCheckPres);
-
- if (errl != NULL)
- {
- break; // break out of the do/while so that we can return
- }
-
} while (0);
if (errl != NULL)
diff --git a/src/usr/hwas/plat/hwasPlat.C b/src/usr/hwas/plat/hwasPlat.C
index 8611b6531..0af9a7ace 100644
--- a/src/usr/hwas/plat/hwasPlat.C
+++ b/src/usr/hwas/plat/hwasPlat.C
@@ -28,8 +28,10 @@
#include <hwas/hwas.H>
#include <hwas/hwasCommon.H>
+#include <hwas/hwasError.H>
#include <devicefw/driverif.H>
+#include <initservice/taskargs.H>
// trace setup; used by HWAS_DBG and HWAS_ERR macros
trace_desc_t *g_trac_dbg_hwas = NULL; // debug - fast
@@ -46,52 +48,39 @@ using namespace TARGETING;
//******************************************************************************
// platReadIDEC function
//******************************************************************************
-errlHndl_t platReadIDEC(const TargetHandleList &i_targets)
+errlHndl_t platReadIDEC(const TargetHandle_t &i_target)
{
+ // we got a target - read the ID/EC
+ // and update the appropriate ATTR_ field.
+ uint64_t id_ec;
+ size_t op_size = sizeof(id_ec);
errlHndl_t errl = NULL;
-
- // we got a list of targets - read the ID/EC for each
- // and update the appropriate ATTR_ fields.
- for (TargetHandleList::const_iterator pTarget_it = i_targets.begin();
- pTarget_it != i_targets.end();
- pTarget_it++)
- {
- TargetHandle_t pTarget = *pTarget_it;
-
- uint64_t id_ec;
- size_t op_size = sizeof(id_ec);
- errl = DeviceFW::deviceRead(pTarget, &id_ec,
- op_size, DEVICE_SCOM_ADDRESS(0x000F000Full));
-
- if (errl == NULL)
- { // no error, so we got a valid ID/EC value back
- // EC - nibbles 0,2
- // 01234567
- uint8_t ec = (((id_ec & 0xF000000000000000ull) >> 56) |
- ((id_ec & 0x00F0000000000000ull) >> 52));
- pTarget->setAttr<ATTR_EC>(ec);
-
- // ID - nibbles 1,5,3,4
- // 01234567
- uint32_t id = (((id_ec & 0x0F00000000000000ull) >> 44) |
- ((id_ec & 0x00000F0000000000ull) >> 32) |
- ((id_ec & 0x000F000000000000ull) >> 44) |
- ((id_ec & 0x0000F00000000000ull) >> 44));
- pTarget->setAttr<ATTR_CHIP_ID>(id);
- HWAS_DBG( "pTarget %x (%p) id %x ec %x",
- pTarget->getAttr<ATTR_HUID>(), pTarget, id, ec);
- }
- else
- { // errl was set - this is an error condition.
- HWAS_ERR( "pTarget %x (%p) %x/%x - failed ID/EC read",
- pTarget->getAttr<ATTR_HUID>(), pTarget,
- pTarget->getAttr<ATTR_CLASS>(),
- pTarget->getAttr<ATTR_TYPE>());
-
- // break out so that we can return an error
- break;
- }
- } // for pTarget_it
+ errl = DeviceFW::deviceRead(i_target, &id_ec,
+ op_size, DEVICE_SCOM_ADDRESS(0x000F000Full));
+
+ if (errl == NULL)
+ { // no error, so we got a valid ID/EC value back
+ // EC - nibbles 0,2
+ // 01234567
+ uint8_t ec = (((id_ec & 0xF000000000000000ull) >> 56) |
+ ((id_ec & 0x00F0000000000000ull) >> 52));
+ i_target->setAttr<ATTR_EC>(ec);
+
+ // ID - nibbles 1,5,3,4
+ // 01234567
+ uint32_t id = (((id_ec & 0x0F00000000000000ull) >> 44) |
+ ((id_ec & 0x00000F0000000000ull) >> 32) |
+ ((id_ec & 0x000F000000000000ull) >> 44) |
+ ((id_ec & 0x0000F00000000000ull) >> 44));
+ i_target->setAttr<ATTR_CHIP_ID>(id);
+ HWAS_DBG( "i_target %x (%p) - id %x ec %x",
+ i_target->getAttr<ATTR_HUID>(), i_target, id, ec);
+ }
+ else
+ { // errl was set - this is an error condition.
+ HWAS_ERR( "i_target %x (%p) - failed ID/EC read",
+ i_target->getAttr<ATTR_HUID>(), i_target);
+ }
return errl;
} // platReadIDEC
@@ -117,38 +106,41 @@ errlHndl_t platPresenceDetect(TargetHandleList &io_targets)
errl = deviceRead(pTarget, &present, presentSize,
DEVICE_PRESENT_ADDRESS());
- if (errl == NULL)
- { // no error, so we got a valid present value back
- if (present == true)
- {
- HWAS_DBG( "pTarget %x (%p) %x/%x - detected present",
- pTarget->getAttr<ATTR_HUID>(), pTarget,
- pTarget->getAttr<ATTR_CLASS>(),
- pTarget->getAttr<ATTR_TYPE>());
-
- // advance to next entry in the list
- pTarget_it++;
- }
- else
- { // chip no present -- remove from list
- HWAS_DBG( "pTarget %x (%p) %x/%x - no presence",
- pTarget->getAttr<ATTR_HUID>(), pTarget,
- pTarget->getAttr<ATTR_CLASS>(),
- pTarget->getAttr<ATTR_TYPE>());
-
- // erase this target, and 'increment' to next
- pTarget_it = io_targets.erase(pTarget_it);
- }
+ if (errl != NULL)
+ { // errl was set - this is an error condition.
+ HWAS_ERR( "pTarget %x (%p) - failed presence detect",
+ pTarget->getAttr<ATTR_HUID>(), pTarget);
+
+ // commit the error but keep going
+ errlCommit(errl, HWAS_COMP_ID);
+ // errl is now NULL
+
+ // chip not present -- remove from list
+ HWAS_DBG( "pTarget %x (%p) - no presence",
+ pTarget->getAttr<ATTR_HUID>(), pTarget);
+
+ // erase this target, and 'increment' to next
+ pTarget_it = io_targets.erase(pTarget_it);
+
+ // target is not present - fall thru
+ present = false;
+ }
+
+ if (present == true)
+ {
+ HWAS_DBG( "pTarget %x (%p) - detected present",
+ pTarget->getAttr<ATTR_HUID>(), pTarget);
+
+ // advance to next entry in the list
+ pTarget_it++;
}
else
- { // errl was set - this is an error condition.
- HWAS_ERR( "pTarget %x (%p) %x/%x - failed presence detect",
- pTarget->getAttr<ATTR_HUID>(), pTarget,
- pTarget->getAttr<ATTR_CLASS>(),
- pTarget->getAttr<ATTR_TYPE>());
+ { // chip not present -- remove from list
+ HWAS_DBG( "pTarget %x (%p) - no presence",
+ pTarget->getAttr<ATTR_HUID>(), pTarget);
- // break out so that we can return an error
- break;
+ // erase this target, and 'increment' to next
+ pTarget_it = io_targets.erase(pTarget_it);
}
} // for pTarget_it
OpenPOWER on IntegriCloud