diff options
author | Matt Derksen <v2cibmd@us.ibm.com> | 2016-11-11 09:23:34 -0600 |
---|---|---|
committer | Daniel M. Crowell <dcrowell@us.ibm.com> | 2016-11-28 17:00:05 -0500 |
commit | 2f193c6b29f2c3bcbdeff7fd8c590fb12ddfaa4c (patch) | |
tree | cf84f6169768722f3f7a14f72b421061472acaf2 /src/usr/hwas | |
parent | eaa6695c507e306f20f659ccdcc49e59191b7d31 (diff) | |
download | talos-hostboot-2f193c6b29f2c3bcbdeff7fd8c590fb12ddfaa4c.tar.gz talos-hostboot-2f193c6b29f2c3bcbdeff7fd8c590fb12ddfaa4c.zip |
Rollup non-functional EC to EQ level while processing PG vpd
An EQ has 2 EXs which each have 2 ECs. If the ECs are all bad,
roll that non-functional status up to the EQ level.
Change-Id: I0ffcb1679bc00c435fcbd6c439cba84da0311472
RTC:163413
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/32546
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martin Gloff <mgloff@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/hwas')
-rw-r--r-- | src/usr/hwas/common/hwas.C | 93 | ||||
-rw-r--r-- | src/usr/hwas/test/hwas1test.H | 93 |
2 files changed, 185 insertions, 1 deletions
diff --git a/src/usr/hwas/common/hwas.C b/src/usr/hwas/common/hwas.C index 6973a4246..4b08ba6ac 100644 --- a/src/usr/hwas/common/hwas.C +++ b/src/usr/hwas/common/hwas.C @@ -155,6 +155,43 @@ bool areL3L2REFRtripletsValid(uint16_t i_pgData) return l_valid; } +/** + * @brief simple helper fn to check core data for rollup + * + * @param[in] i_firstCore First core to look at + * @param[in] i_numCoresToCheck number of cores to check from first one + * @param[in] i_pgData PG keyword VPD + * + * @return bool All ECxx domains were marked bad + * + */ +bool allCoresBad(const uint8_t & i_firstCore, + const uint8_t & i_numCoresToCheck, + const uint16_t i_pgData[]) +{ + bool coresBad = true; + uint8_t coreNum = 0; + do + { + // don't look outside of EC core entries + if ((i_firstCore + coreNum) >= VPD_CP00_PG_ECxx_MAX_ENTRIES) + { + HWAS_INF("allCoresBad: requested %d cores beginning at %d, " + "but only able to check %d cores", + i_numCoresToCheck, i_firstCore, coreNum); + break; + } + if (i_pgData[VPD_CP00_PG_EC00_INDEX + i_firstCore + coreNum] == + VPD_CP00_PG_ECxx_GOOD) + { + coresBad = false; + } + coreNum++; + } + while (coresBad && (coreNum < i_numCoresToCheck)); + + return coresBad; +} errlHndl_t discoverTargets() { @@ -655,6 +692,44 @@ bool isDescFunctional(const TARGETING::TargetHandle_t &i_desc, VPD_CP00_PG_EPx_GOOD); l_descFunctional = false; } + else + { + // Look for a rollup bad status + // Either both EXs are bad or all 4 EC's are bad + + // index of first EX of 2 EXs under this EQ + uint8_t indexEX = (uint8_t)indexEP * 2; + + // index of first EC of 4 ECs under this EQ + uint8_t indexEC = indexEX * 2; + uint8_t coresToCheck = 4; + + // check if both EX's are bad + if (((i_pgData[VPD_CP00_PG_EP0_INDEX + indexEP] & + VPD_CP00_PG_EPx_L3L2REFR[0]) != 0) && + ((i_pgData[VPD_CP00_PG_EP0_INDEX + indexEP] & + VPD_CP00_PG_EPx_L3L2REFR[1]) != 0)) + { + HWAS_INF("pDesc %.8X - EQ%d marked bad because its EXs " + "(%d and %d) are both bad", + i_desc->getAttr<ATTR_HUID>(), + indexEP, + indexEX, indexEX+1); + + l_descFunctional = false; + } + else + // check if child cores are bad + if (allCoresBad(indexEC, coresToCheck, i_pgData)) + { + HWAS_INF("pDesc %.8X - EQ%d marked bad because its %d CORES " + "(EC%d - EC%d) are all bad", + i_desc->getAttr<ATTR_HUID>(), + indexEP, + coresToCheck, indexEC, indexEC+3); + l_descFunctional = false; + } + } } else if (i_desc->getAttr<ATTR_TYPE>() == TYPE_EX) @@ -665,6 +740,10 @@ bool isDescFunctional(const TARGETING::TargetHandle_t &i_desc, size_t indexEP = indexEX / 2; // 2 L3/L2/REFR triplets per EX chiplet size_t indexL3L2REFR = indexEX % 2; + // 2 EC children per EX + uint8_t indexEC = indexEX * 2; + uint8_t allCoresToCheck = 2; // 2 CORES per EX + // Check triplet of bits in EPx entry if ((i_pgData[VPD_CP00_PG_EP0_INDEX + indexEP] & VPD_CP00_PG_EPx_L3L2REFR[indexL3L2REFR]) != 0) @@ -678,6 +757,20 @@ bool isDescFunctional(const TARGETING::TargetHandle_t &i_desc, ~VPD_CP00_PG_EPx_L3L2REFR[indexL3L2REFR])); l_descFunctional = false; } + else + // Check that EX does not have 2 bad CORE children + if (allCoresBad(indexEC, allCoresToCheck, i_pgData)) + { + HWAS_INF("pDesc %.8X - EX%d marked bad since it has no good cores", + i_desc->getAttr<ATTR_HUID>(), indexEX); + HWAS_INF("(core %d: actual 0x%04X, expected 0x%04X) " + "(core %d: actual 0x%04X, expected 0x%04X)", + indexEC, i_pgData[VPD_CP00_PG_EC00_INDEX + indexEC], + VPD_CP00_PG_ECxx_GOOD, + indexEC+1, i_pgData[VPD_CP00_PG_EC00_INDEX + indexEC+1], + VPD_CP00_PG_ECxx_GOOD); + l_descFunctional = false; + } } else if (i_desc->getAttr<ATTR_TYPE>() == TYPE_CORE) diff --git a/src/usr/hwas/test/hwas1test.H b/src/usr/hwas/test/hwas1test.H index a85324d62..667d03a46 100644 --- a/src/usr/hwas/test/hwas1test.H +++ b/src/usr/hwas/test/hwas1test.H @@ -639,6 +639,7 @@ public: pDesc->getAttr<ATTR_CHIP_UNIT>(); char l_type_str[9]; char l_pgData[] = ""; + uint8_t core_idx; TS_INFO("testHWASisDescFunctional: descendant functional - " "attr type 0x%04X, chip unit %d", l_type, l_chipUnit); @@ -923,7 +924,7 @@ public: // going through loop, thus creating and // testing mismatched pairs. if (pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] & - l_mask) + l_mask) { // Turn off a bit that should be on pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] &= @@ -952,6 +953,70 @@ public: pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] = (uint16_t)VPD_CP00_PG_EPx_GOOD; } + + ////////////////////////////////////// + // make children bad to check rollup + ////////////////////////////////////// + + // mark bad EX children + TS_INFO("testHWASisDescFunctional>" + "Mark EQ%d's EX chiplets bad", l_chipUnit); + + // Choosing a single failure for each EX out of + // multiple possibilities + + // Mark l3x, l2x, and/or refrx bad (x = 0 or 1) + + // 1st EX bad: mark refr0 as bad + pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] |= + 0x0008; + + // 2nd EX bad: mark L21 as bad too + pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] |= + 0x0040; + + if (isDescFunctional(pDesc, pgData)) + { + TS_FAIL("testHWASisDescFunctional>" + "functional = 0x%x, should be false. " + "EQ/EP%d = 0x%04x (expected 0x%04X), " + "EX children were marked bad ", + isDescFunctional(pDesc, pgData), + l_chipUnit, + pgData[VPD_CP00_PG_EP0_INDEX + + l_chipUnit], + VPD_CP00_PG_EPx_GOOD); + } + + // Restore the "all good" data + pgData[VPD_CP00_PG_EP0_INDEX + l_chipUnit] = + (uint16_t)VPD_CP00_PG_EPx_GOOD; + + // now try bad CORE rollup + core_idx = (uint8_t)l_chipUnit * 4; + TS_INFO("testHWASisDescFunctional>" + "Now try marking EQ%d cores EC%d - EC%d bad", + l_chipUnit, core_idx, core_idx+3); + for (int i = 0; i < 4; i++) + { + pgData[VPD_CP00_PG_EC00_INDEX + core_idx + i] |= + ~VPD_CP00_PG_ECxx_GOOD; + } + if (isDescFunctional(pDesc, pgData)) + { + TS_FAIL("testHWASisDescFunctional>" + "EQ/EP%d functional = 0x%x, should be false. " + "All 4 EC children were marked bad", + l_chipUnit, + isDescFunctional(pDesc, pgData)); + } + // Restore the "all good" core data + for (int i = 0; i < 4; i++) + { + pgData[VPD_CP00_PG_EC00_INDEX + core_idx + i] = + VPD_CP00_PG_ECxx_GOOD; + } + break; case TYPE_EX: @@ -973,6 +1038,32 @@ public: pgData[VPD_CP00_PG_EP0_INDEX + (l_chipUnit / 2)] = (uint16_t)VPD_CP00_PG_EPx_GOOD; + + // Now mark its 2 EC cores as bad and check for rollup + core_idx = (uint8_t)l_chipUnit * 2; + TS_INFO("testHWASisDescFunctional>" + "Now try marking EX%d cores EC%d and EC%d bad", + l_chipUnit, core_idx, core_idx+1); + for (int i = 0; i < 2; i++) + { + pgData[VPD_CP00_PG_EC00_INDEX + core_idx + i] |= + ~VPD_CP00_PG_ECxx_GOOD; + } + if (isDescFunctional(pDesc, pgData)) + { + TS_FAIL("testHWASisDescFunctional>" + "EX%d functional = 0x%x, should be false. " + "All 2 EC children were marked bad", + l_chipUnit, + isDescFunctional(pDesc, pgData)); + } + // Restore the "all good" core data + for (int i = 0; i < 2; i++) + { + pgData[VPD_CP00_PG_EC00_INDEX + core_idx + i] = + VPD_CP00_PG_ECxx_GOOD; + } + break; case TYPE_CORE: |