diff options
author | Andrew Geissler <andrewg@us.ibm.com> | 2018-06-13 13:40:39 -0500 |
---|---|---|
committer | Daniel M. Crowell <dcrowell@us.ibm.com> | 2018-06-19 12:18:25 -0400 |
commit | ea86539a69de1f7415cf697fba5c35b8970a7db4 (patch) | |
tree | 9f2289fb9a51a2bf77a742c0639522a9c56869d1 /src/usr/hwas/test | |
parent | 74bfadb2ab8796c738c7d951041d890261aee293 (diff) | |
download | talos-hostboot-ea86539a69de1f7415cf697fba5c35b8970a7db4.tar.gz talos-hostboot-ea86539a69de1f7415cf697fba5c35b8970a7db4.zip |
Ensure hwas state reflects resource recovery actions
Once resource recovery is run to recover hardware resources to
allow the system to boot, it will not be run again until the
configuration of the hardware changes. An issue with that design
is that the HWAS state, deconfiguredByEid, will not be properly
updated in situations where resource recovery has already run.
This state is used by the gard command line tool to inform
the user on the state of their garded hardware.
This commit ensure the deconfiguredByEid is properly updated
for targets that are guarded, but have been resource recovered
to allow system boot.
Change-Id: Ib15c1e8402e7c13b6497915c5138831e5e591bbe
CQ: SW432846
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/60486
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: DHRUVARAJ SUBHASH CHANDRAN <dhruvaraj@in.ibm.com>
Reviewed-by: ARAVIND T. NAIR <aravindnair@in.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/hwas/test')
-rw-r--r-- | src/usr/hwas/test/hwasSysAvailSvcTest.H | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/src/usr/hwas/test/hwasSysAvailSvcTest.H b/src/usr/hwas/test/hwasSysAvailSvcTest.H index 7bd889cc5..cb266c279 100644 --- a/src/usr/hwas/test/hwasSysAvailSvcTest.H +++ b/src/usr/hwas/test/hwasSysAvailSvcTest.H @@ -121,6 +121,47 @@ public: } /** + * @brief Verify input resources are resource recovered and then restore + * them to original state + * + * @return True if all passes, false otherwise + */ + bool verifyRecoveredAndRestore(TargetHandleList &i_targetHList, + TARGETING::HwasState *i_origStates) + { + for (uint32_t i=0; i < i_targetHList.size(); i++) + { + TARGETING::HwasState l_hwasState = + i_targetHList[i]->getAttr<TARGETING::ATTR_HWAS_STATE>(); + TS_TRACE( "testVerifyResourceRecovery: After Recovery - " + "HwasState fakeState: " + "deconf=0x%x,PO=0x%x,pres=0x%x,func=0x%x,dumpf=0x%x," + "specdeconfig=0x%x", + l_hwasState.deconfiguredByEid, + l_hwasState.poweredOn, + l_hwasState.present, + l_hwasState.functional, + l_hwasState.dumpfunctional, + l_hwasState.specdeconfig + ); + + // Verify resources were brought back to life by resource recovery + if ((l_hwasState.deconfiguredByEid != + DeconfigGard::CONFIGURED_BY_RESOURCE_RECOVERY) || + (l_hwasState.specdeconfig != 0) || + (l_hwasState.functional != 1)) + { + TS_FAIL("testVerifyResourceRecovery: Incorrect resource " + "recovery hwas states"); + return false; + } + // Restore old state + i_targetHList[i]->setAttr<TARGETING::ATTR_HWAS_STATE>(i_origStates[i]); + } + return true; + } + + /** * @brief mark all cores nonfunctional and then call checkMinimumHardware() */ void testCheckMinimumHardwareCore() @@ -428,6 +469,140 @@ public: #endif } + /** + * @brief Speculative gard all cores and verify they are recovered + */ + void testVerifyResourceRecovery() + { +#if DISABLE_UNIT_TESTS + // NOTE: These tests mark targets nonfunctional and then + // restore them. Since all the unit tests run in parallel, this + // may cause other tests to fail. + // Do not leave this test enabled for normal operations. + TS_TRACE(INFO_MRK "SKIPPING: other tests could be affected."); + HWAS_INF("testVerifyResourceRecovery: Skipped"); +#else + + do + { + errlHndl_t l_errl = NULL; + TS_TRACE(INFO_MRK "testVerifyResourceRecovery"); + HWAS_INF("testVerifyResourceRecovery: Started"); + + TargetHandleList l_cores; + getAllChiplets(l_cores, TYPE_CORE, true ); + TS_TRACE( "testVerifyResourceRecovery: %d functional cores", + l_cores.size() ); + + // Keep track of original hwas states + TARGETING::HwasState l_origStates[ l_cores.size() ] ; + HWAS_INF("testVerifyResourceRecovery: Create predictive gard record" + " for all cores"); + + for (uint32_t i=0; i < l_cores.size(); i++) + { + TARGETING::HwasState l_hwasState = + l_cores[i]->getAttr<TARGETING::ATTR_HWAS_STATE>(); + l_origStates[i] = l_hwasState; + TS_TRACE( "testVerifyResourceRecovery: HwasState fakeState: " + "deconf=0x%x,PO=0x%x,pres=0x%x,func=0x%x,dumpf=0x%x," + "specdeconfig=0x%x", + l_hwasState.deconfiguredByEid, + l_hwasState.poweredOn, + l_hwasState.present, + l_hwasState.functional, + l_hwasState.dumpfunctional, + l_hwasState.specdeconfig + ); + + // Create a GARD record for all cores + l_errl = theDeconfigGard().platCreateGardRecord(l_cores[i], + 0x12, + GARD_Predictive); + if (l_errl) + { + TS_FAIL("testVerifyResourceRecovery: Error from " + "platCreateGardRecord"); + break; + } + } + if (l_errl) + { + errlCommit( l_errl, HWAS_COMP_ID ); + break; + } + + HWAS_INF("testVerifyResourceRecovery: Enable spec deconfig"); + + // Ensure gard will execute speculative deconfig + Target * pSys; + targetService().getTopLevelTarget(pSys); + pSys->setAttr<ATTR_BLOCK_SPEC_DECONFIG>(0); + + // Apply gard records and run resource recovery + l_errl = theDeconfigGard().deconfigureTargetsFromGardRecordsForIpl(); + if ( l_errl ) + { + TS_FAIL("testVerifyResourceRecovery: Error from " + "deconfigureTargetsFromGardRecordsForIpl"); + errlCommit( l_errl, HWAS_COMP_ID ); + break; + } + + // Verify all cores have been "Recovered" and then restore + // old hwas state + if (!verifyRecoveredAndRestore(l_cores,l_origStates)) + { + // TS_FAIL logged in function so just exit out + HWAS_INF("testVerifyResourceRecovery: Verify failed"); + break; + } + + // Verify spec deconfig is disabled (since it's already been + // run). + if (pSys->getAttr<ATTR_BLOCK_SPEC_DECONFIG>() != 1) + { + TS_FAIL("testVerifyResourceRecovery: Spec deconfig is not " + "disabled"); + break; + } + + // Call the deconfigure interface again and verify even without + // spec deconfig enabled, that hwas states are still updated properly + l_errl = theDeconfigGard().deconfigureTargetsFromGardRecordsForIpl(); + if ( l_errl ) + { + TS_FAIL("testVerifyResourceRecovery: Error from " + "deconfigureTargetsFromGardRecordsForIpl"); + errlCommit( l_errl, HWAS_COMP_ID ); + break; + } + + // Verify cores are functional and resource recovered + if (!verifyRecoveredAndRestore(l_cores,l_origStates)) + { + // TS_FAIL logged in function so just exit out + HWAS_INF("testVerifyResourceRecovery: Verify phase 2 failed"); + break; + } + else + { + HWAS_INF("testVerifyResourceRecovery: PASSED"); + TS_TRACE("testVerifyResourceRecovery: PASSED"); + } + + // Reset to 0 + pSys->setAttr<ATTR_BLOCK_SPEC_DECONFIG>(0); + + // Clear all gard records + for (uint32_t i=0; i < l_cores.size(); i++) + { + theDeconfigGard().clearGardRecords(l_cores[i]); + } + } while(0); +#endif + } + }; // end class #undef DISABLE_UNIT_TESTS |