summaryrefslogtreecommitdiffstats
path: root/src/usr/hwas
diff options
context:
space:
mode:
authorMatt Derksen <v2cibmd@us.ibm.com>2016-11-11 09:23:34 -0600
committerDaniel M. Crowell <dcrowell@us.ibm.com>2016-11-28 17:00:05 -0500
commit2f193c6b29f2c3bcbdeff7fd8c590fb12ddfaa4c (patch)
treecf84f6169768722f3f7a14f72b421061472acaf2 /src/usr/hwas
parenteaa6695c507e306f20f659ccdcc49e59191b7d31 (diff)
downloadtalos-hostboot-2f193c6b29f2c3bcbdeff7fd8c590fb12ddfaa4c.tar.gz
talos-hostboot-2f193c6b29f2c3bcbdeff7fd8c590fb12ddfaa4c.zip
Rollup non-functional EC to EQ level while processing PG vpd
An EQ has 2 EXs which each have 2 ECs. If the ECs are all bad, roll that non-functional status up to the EQ level. Change-Id: I0ffcb1679bc00c435fcbd6c439cba84da0311472 RTC:163413 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/32546 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Martin Gloff <mgloff@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/hwas')
-rw-r--r--src/usr/hwas/common/hwas.C93
-rw-r--r--src/usr/hwas/test/hwas1test.H93
2 files changed, 185 insertions, 1 deletions
diff --git a/src/usr/hwas/common/hwas.C b/src/usr/hwas/common/hwas.C
index 6973a4246..4b08ba6ac 100644
--- a/src/usr/hwas/common/hwas.C
+++ b/src/usr/hwas/common/hwas.C
@@ -155,6 +155,43 @@ bool areL3L2REFRtripletsValid(uint16_t i_pgData)
return l_valid;
}
+/**
+ * @brief simple helper fn to check core data for rollup
+ *
+ * @param[in] i_firstCore First core to look at
+ * @param[in] i_numCoresToCheck number of cores to check from first one
+ * @param[in] i_pgData PG keyword VPD
+ *
+ * @return bool All ECxx domains were marked bad
+ *
+ */
+bool allCoresBad(const uint8_t & i_firstCore,
+ const uint8_t & i_numCoresToCheck,
+ const uint16_t i_pgData[])
+{
+ bool coresBad = true;
+ uint8_t coreNum = 0;
+ do
+ {
+ // don't look outside of EC core entries
+ if ((i_firstCore + coreNum) >= VPD_CP00_PG_ECxx_MAX_ENTRIES)
+ {
+ HWAS_INF("allCoresBad: requested %d cores beginning at %d, "
+ "but only able to check %d cores",
+ i_numCoresToCheck, i_firstCore, coreNum);
+ break;
+ }
+ if (i_pgData[VPD_CP00_PG_EC00_INDEX + i_firstCore + coreNum] ==
+ VPD_CP00_PG_ECxx_GOOD)
+ {
+ coresBad = false;
+ }
+ coreNum++;
+ }
+ while (coresBad && (coreNum < i_numCoresToCheck));
+
+ return coresBad;
+}
errlHndl_t discoverTargets()
{
@@ -655,6 +692,44 @@ bool isDescFunctional(const TARGETING::TargetHandle_t &i_desc,
VPD_CP00_PG_EPx_GOOD);
l_descFunctional = false;
}
+ else
+ {
+ // Look for a rollup bad status
+ // Either both EXs are bad or all 4 EC's are bad
+
+ // index of first EX of 2 EXs under this EQ
+ uint8_t indexEX = (uint8_t)indexEP * 2;
+
+ // index of first EC of 4 ECs under this EQ
+ uint8_t indexEC = indexEX * 2;
+ uint8_t coresToCheck = 4;
+
+ // check if both EX's are bad
+ if (((i_pgData[VPD_CP00_PG_EP0_INDEX + indexEP] &
+ VPD_CP00_PG_EPx_L3L2REFR[0]) != 0) &&
+ ((i_pgData[VPD_CP00_PG_EP0_INDEX + indexEP] &
+ VPD_CP00_PG_EPx_L3L2REFR[1]) != 0))
+ {
+ HWAS_INF("pDesc %.8X - EQ%d marked bad because its EXs "
+ "(%d and %d) are both bad",
+ i_desc->getAttr<ATTR_HUID>(),
+ indexEP,
+ indexEX, indexEX+1);
+
+ l_descFunctional = false;
+ }
+ else
+ // check if child cores are bad
+ if (allCoresBad(indexEC, coresToCheck, i_pgData))
+ {
+ HWAS_INF("pDesc %.8X - EQ%d marked bad because its %d CORES "
+ "(EC%d - EC%d) are all bad",
+ i_desc->getAttr<ATTR_HUID>(),
+ indexEP,
+ coresToCheck, indexEC, indexEC+3);
+ l_descFunctional = false;
+ }
+ }
}
else
if (i_desc->getAttr<ATTR_TYPE>() == TYPE_EX)
@@ -665,6 +740,10 @@ bool isDescFunctional(const TARGETING::TargetHandle_t &i_desc,
size_t indexEP = indexEX / 2;
// 2 L3/L2/REFR triplets per EX chiplet
size_t indexL3L2REFR = indexEX % 2;
+ // 2 EC children per EX
+ uint8_t indexEC = indexEX * 2;
+ uint8_t allCoresToCheck = 2; // 2 CORES per EX
+
// Check triplet of bits in EPx entry
if ((i_pgData[VPD_CP00_PG_EP0_INDEX + indexEP] &
VPD_CP00_PG_EPx_L3L2REFR[indexL3L2REFR]) != 0)
@@ -678,6 +757,20 @@ bool isDescFunctional(const TARGETING::TargetHandle_t &i_desc,
~VPD_CP00_PG_EPx_L3L2REFR[indexL3L2REFR]));
l_descFunctional = false;
}
+ else
+ // Check that EX does not have 2 bad CORE children
+ if (allCoresBad(indexEC, allCoresToCheck, i_pgData))
+ {
+ HWAS_INF("pDesc %.8X - EX%d marked bad since it has no good cores",
+ i_desc->getAttr<ATTR_HUID>(), indexEX);
+ HWAS_INF("(core %d: actual 0x%04X, expected 0x%04X) "
+ "(core %d: actual 0x%04X, expected 0x%04X)",
+ indexEC, i_pgData[VPD_CP00_PG_EC00_INDEX + indexEC],
+ VPD_CP00_PG_ECxx_GOOD,
+ indexEC+1, i_pgData[VPD_CP00_PG_EC00_INDEX + indexEC+1],
+ VPD_CP00_PG_ECxx_GOOD);
+ l_descFunctional = false;
+ }
}
else
if (i_desc->getAttr<ATTR_TYPE>() == TYPE_CORE)
diff --git a/src/usr/hwas/test/hwas1test.H b/src/usr/hwas/test/hwas1test.H
index a85324d62..667d03a46 100644
--- a/src/usr/hwas/test/hwas1test.H
+++ b/src/usr/hwas/test/hwas1test.H
@@ -639,6 +639,7 @@ public:
pDesc->getAttr<ATTR_CHIP_UNIT>();
char l_type_str[9];
char l_pgData[] = "";
+ uint8_t core_idx;
TS_INFO("testHWASisDescFunctional: descendant functional - "
"attr type 0x%04X, chip unit %d", l_type, l_chipUnit);
@@ -923,7 +924,7 @@ public:
// going through loop, thus creating and
// testing mismatched pairs.
if (pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] &
- l_mask)
+ l_mask)
{
// Turn off a bit that should be on
pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] &=
@@ -952,6 +953,70 @@ public:
pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] =
(uint16_t)VPD_CP00_PG_EPx_GOOD;
}
+
+ //////////////////////////////////////
+ // make children bad to check rollup
+ //////////////////////////////////////
+
+ // mark bad EX children
+ TS_INFO("testHWASisDescFunctional>"
+ "Mark EQ%d's EX chiplets bad", l_chipUnit);
+
+ // Choosing a single failure for each EX out of
+ // multiple possibilities
+
+ // Mark l3x, l2x, and/or refrx bad (x = 0 or 1)
+
+ // 1st EX bad: mark refr0 as bad
+ pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] |=
+ 0x0008;
+
+ // 2nd EX bad: mark L21 as bad too
+ pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] |=
+ 0x0040;
+
+ if (isDescFunctional(pDesc, pgData))
+ {
+ TS_FAIL("testHWASisDescFunctional>"
+ "functional = 0x%x, should be false. "
+ "EQ/EP%d = 0x%04x (expected 0x%04X), "
+ "EX children were marked bad ",
+ isDescFunctional(pDesc, pgData),
+ l_chipUnit,
+ pgData[VPD_CP00_PG_EP0_INDEX
+ + l_chipUnit],
+ VPD_CP00_PG_EPx_GOOD);
+ }
+
+ // Restore the "all good" data
+ pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] =
+ (uint16_t)VPD_CP00_PG_EPx_GOOD;
+
+ // now try bad CORE rollup
+ core_idx = (uint8_t)l_chipUnit * 4;
+ TS_INFO("testHWASisDescFunctional>"
+ "Now try marking EQ%d cores EC%d - EC%d bad",
+ l_chipUnit, core_idx, core_idx+3);
+ for (int i = 0; i < 4; i++)
+ {
+ pgData[VPD_CP00_PG_EC00_INDEX + core_idx + i] |=
+ ~VPD_CP00_PG_ECxx_GOOD;
+ }
+ if (isDescFunctional(pDesc, pgData))
+ {
+ TS_FAIL("testHWASisDescFunctional>"
+ "EQ/EP%d functional = 0x%x, should be false. "
+ "All 4 EC children were marked bad",
+ l_chipUnit,
+ isDescFunctional(pDesc, pgData));
+ }
+ // Restore the "all good" core data
+ for (int i = 0; i < 4; i++)
+ {
+ pgData[VPD_CP00_PG_EC00_INDEX + core_idx + i] =
+ VPD_CP00_PG_ECxx_GOOD;
+ }
+
break;
case TYPE_EX:
@@ -973,6 +1038,32 @@ public:
pgData[VPD_CP00_PG_EP0_INDEX + (l_chipUnit / 2)] =
(uint16_t)VPD_CP00_PG_EPx_GOOD;
+
+ // Now mark its 2 EC cores as bad and check for rollup
+ core_idx = (uint8_t)l_chipUnit * 2;
+ TS_INFO("testHWASisDescFunctional>"
+ "Now try marking EX%d cores EC%d and EC%d bad",
+ l_chipUnit, core_idx, core_idx+1);
+ for (int i = 0; i < 2; i++)
+ {
+ pgData[VPD_CP00_PG_EC00_INDEX + core_idx + i] |=
+ ~VPD_CP00_PG_ECxx_GOOD;
+ }
+ if (isDescFunctional(pDesc, pgData))
+ {
+ TS_FAIL("testHWASisDescFunctional>"
+ "EX%d functional = 0x%x, should be false. "
+ "All 2 EC children were marked bad",
+ l_chipUnit,
+ isDescFunctional(pDesc, pgData));
+ }
+ // Restore the "all good" core data
+ for (int i = 0; i < 2; i++)
+ {
+ pgData[VPD_CP00_PG_EC00_INDEX + core_idx + i] =
+ VPD_CP00_PG_ECxx_GOOD;
+ }
+
break;
case TYPE_CORE:
OpenPOWER on IntegriCloud