/* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ /* $Source: src/usr/hwas/common/hwas.C $ */ /* */ /* OpenPOWER HostBoot Project */ /* */ /* Contributors Listed Below - COPYRIGHT 2012,2019 */ /* [+] Google Inc. */ /* [+] International Business Machines Corp. */ /* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ /* You may obtain a copy of the License at */ /* */ /* http://www.apache.org/licenses/LICENSE-2.0 */ /* */ /* Unless required by applicable law or agreed to in writing, software */ /* distributed under the License is distributed on an "AS IS" BASIS, */ /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ /* implied. See the License for the specific language governing */ /* permissions and limitations under the License. */ /* */ /* IBM_PROLOG_END_TAG */ /** * @file hwas.C * * HardWare Availability Service functions. * See hwas.H for doxygen documentation tags. * */ /******************************************************************************/ // Includes /******************************************************************************/ #include #include #include #include // sprintf #ifdef __HOSTBOOT_MODULE #include #endif #include #include #include #include #include #include #include #include #include namespace HWAS { using namespace TARGETING; using namespace HWAS::COMMON; // trace setup; used by HWAS_DBG and HWAS_ERR macros HWAS_TD_t g_trac_dbg_hwas = NULL; // debug - fast HWAS_TD_t g_trac_imp_hwas = NULL; // important - slow #ifdef __HOSTBOOT_MODULE TRAC_INIT(&g_trac_dbg_hwas, "HWAS", KILOBYTE ); TRAC_INIT(&g_trac_imp_hwas, "HWAS_I", KILOBYTE ); #else TRAC_INIT(&g_trac_dbg_hwas, "HWAS", 1024 ); TRAC_INIT(&g_trac_imp_hwas, "HWAS_I", 1024 ); #endif // SORT functions that we'll use for PR keyword processing bool compareProcGroup(procRestrict_t t1, procRestrict_t t2) { if (t1.group == t2.group) { return (t1.target->getAttr() < t2.target->getAttr()); } return (t1.group < t2.group); } bool compareAffinity(const TargetInfo t1, const TargetInfo t2) { return t1.affinityPath < t2.affinityPath; } /* * @brief This function takes in proc target and returns group/chip id * in the following bit format: GGGG CCC * where G = Group Id and C = Chip Id * * @param[in] i_proc: proc target * @retval: chip info including group and chip id */ uint64_t getGroupChipIdInfo (TargetHandle_t i_proc) { auto l_grp_id = i_proc->getAttr(); auto l_chip_id = i_proc->getAttr(); //Chip ID is three bits long, therefore, shift group id //by 3 and OR it with chip id return ((l_grp_id << 3) | l_chip_id); } /* * @brief This function takes in the value of ATTR_PROC_MEM_TO_USE * and extract out group and chip id * in the following bit format: GGGG CCC * where G = Group Id and C = Chip Id * * @param[in] i_proc_mem_to_use: Value of ATTR_PROC_MEM_TO_USE * @param[out] o_grp_id: groupd id * @param[out] o_chip_id: chip id */ void parseProcMemToUseIntoGrpChipId (uint8_t i_proc_mem_to_use, uint8_t & o_grp_id, uint8_t & o_chip_id) { o_grp_id = (i_proc_mem_to_use >> 3) & 0x0F; o_chip_id = i_proc_mem_to_use & 0x07; } /** * @brief simple helper fn to get and set hwas state to poweredOn, * present, functional * * @param[in] i_target pointer to target that we're looking at * @param[in] i_present boolean indicating present or not * @param[in] i_functional boolean indicating functional or not * @param[in] i_errlEid erreid that caused change to non-funcational; * 0 if not associated with an error or if * functional is true * * @return none * */ void enableHwasState(Target *i_target, bool i_present, bool i_functional, uint32_t i_errlEid) { HwasState hwasState = i_target->getAttr(); if (i_functional == false) { // record the EID as a reason that we're marking non-functional hwasState.deconfiguredByEid = i_errlEid; } hwasState.poweredOn = true; hwasState.present = i_present; hwasState.functional = i_functional; i_target->setAttr( hwasState ); } /** * @brief disable obuses in wrap config. * Due to fabric limitations, we can only have 2 parallel OBUS * connections at a time in wrap config.So, deconfigure appropriate * OBUSes using the following rule. If the value of * MFG_WRAP_TEST_ABUS_LINKS_SET_ENABLE (on the system target) does * not match with the value of MFG_WRAP_TEST_ABUS_LINKS_SET (on the * OBUS target), then deconfigure the OBUSes. * @return errlHndl_t * */ errlHndl_t disableOBUSes() { errlHndl_t l_err = nullptr; do { //get system target and figure out which links to enable Target* pSys; targetService().getTopLevelTarget(pSys); auto l_links_set_enable = pSys->getAttr(); //get all OBUS chiplets TargetHandleList l_obusList; getAllChiplets(l_obusList, TYPE_OBUS); for (const auto & l_obus : l_obusList) { //It fails to compile if you compare two different enum types //That's why, typecasting here. The underlying enum value //should be the same. ATTR_MFG_WRAP_TEST_ABUS_LINKS_SET_ENABLE_type l_link_set = (ATTR_MFG_WRAP_TEST_ABUS_LINKS_SET_ENABLE_type) l_obus->getAttr(); if (l_links_set_enable != l_link_set) { //deconfigure l_err = HWAS::theDeconfigGard().deconfigureTarget( *l_obus, HWAS::DeconfigGard::DECONFIGURED_BY_NO_MATCHING_LINK_SET); if (l_err) { HWAS_ERR("disableOBUSes: Unable to deconfigure %x OBUS", get_huid(l_obus)); break; } } } if (l_err) { break; } //Sanity Check to make sure each proc only has a max of 2 OBUSes //only if MFG_WRAP_TEST_ABUS_LINKS_SET_ENABLE was overidden //because that means we are trying to run in wrap mode. //Otherwise, it will be defaulted to SET_NONE if (l_links_set_enable) { TargetHandleList l_procList; getAllChips(l_procList, TYPE_PROC); for (const auto & l_proc : l_procList) { getChildChiplets(l_obusList, l_proc, TYPE_OBUS, true); if (l_obusList.size() > 2) { HWAS_ERR("disableOBUSes: Only 2 BUSes should be functional" " under %x, found %d", get_huid(l_proc), l_obusList.size()); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_DISABLE_OBUS * @reasoncode RC_ONLY_TWO_OBUS_SHOULD_BE_CONFIGURED * @devdesc Due to fabric limitations, only 2 * OBUSes should be configured, we found * too many * @custdesc A problem occurred during the IPL of * the system: Found too many obus links * @userdata1 HUID of proc * @userdata2 number of functional OBUSes */ l_err = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_DISABLE_OBUS, RC_ONLY_TWO_OBUS_SHOULD_BE_CONFIGURED, get_huid(l_proc), l_obusList.size()); break; } } } } while (0); return l_err; } errlHndl_t update_proc_mem_to_use (const Target* i_node) { errlHndl_t l_errl {nullptr}; TargetHandle_t l_masterProcTarget {nullptr}; do { //Get master proc l_errl = targetService().queryMasterProcChipTargetHandle(l_masterProcTarget, i_node); if (l_errl) { HWAS_ERR("update_proc_mem_to_use: unable to get master proc"); break; } //Check if this processor has missing memory //If yes, then get the HRMOR of the proc we want to use the mem of uint8_t l_proc_mem_to_use = l_masterProcTarget->getAttr (); uint8_t l_proc_mem_to_use_save = l_proc_mem_to_use; bool l_found_missing_mem = false; l_errl = check_for_missing_memory(i_node, l_proc_mem_to_use, l_found_missing_mem); if (l_errl) { HWAS_ERR("update_proc_mem_to_use: unable to check for missing mem"); break; } //We found missing memory behind master proc, but //check_for_missing_memory didn't update proc_mem_to_use //probably because there are no other procs with memory, //create an error. if (l_found_missing_mem && (l_proc_mem_to_use==l_proc_mem_to_use_save)) { HWAS_ERR("update_proc_mem_to_use: ATTR_PROC_MEM_TO_USE didn't get" " updated even though we were missing mem behind master proc"); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_UPDATE_PROC_MEM_TO_USE * @reasoncode RC_NO_UPDATE_WHEN_MEM_MISSING * @devdesc No procs found with valid memory * @custdesc A problem occurred during the IPL of * the system: No memory found * @userdata1 Saved value of ATTR_PROC_MEM_TO_USE * @userdata2 Updated value of ATTR_PROC_MEM_TO_USE */ l_errl = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_UPDATE_PROC_MEM_TO_USE, RC_NO_UPDATE_WHEN_MEM_MISSING, l_proc_mem_to_use_save, l_proc_mem_to_use); hwasErrorAddProcedureCallout(l_errl, EPUB_PRC_FIND_DECONFIGURED_PART, SRCI_PRIORITY_HIGH); break; } //set PROC_MEM_TO_USE to the group/chip id of the proc we want to //use the mem of //get all procs behind the input node TargetHandleList l_procs; getChildAffinityTargetsByState( l_procs, i_node, CLASS_CHIP, TYPE_PROC, UTIL_FILTER_ALL); for (auto & l_proc : l_procs) { l_proc->setAttr(l_proc_mem_to_use); } } while (0); return l_errl; } errlHndl_t check_for_missing_memory (const Target* i_node, uint8_t & io_proc_mem_to_use, bool & o_found_missing_mem) { errlHndl_t l_errl {nullptr}; o_found_missing_mem = true; do { ///////////////////////////////////////////////////////////// //Step 1 -- Figure out the lowest group/chip id proc that has // memory ///////////////////////////////////////////////////////////// //get all procs behind the input node TargetHandleList l_procs; getChildAffinityTargetsByState( l_procs, i_node, CLASS_CHIP, TYPE_PROC, UTIL_FILTER_FUNCTIONAL); //sort based on group/chip id. So, we can deterministically //pick the processor with memory. This will also help guarantee //that we will attempt to use master (or altmaster) proc's memory //first before using slave proc's memory. std::sort(l_procs.begin(), l_procs.end(), [] (TargetHandle_t a, TargetHandle_t b) { return getGroupChipIdInfo(a) < getGroupChipIdInfo(b); }); uint8_t l_temp_proc_mem_to_use = io_proc_mem_to_use; //find a processor that has dimms for (auto & l_proc : l_procs) { TargetHandleList l_funcDimms; getChildAffinityTargetsByState( l_funcDimms, l_proc, CLASS_LOGICAL_CARD, TYPE_DIMM, UTIL_FILTER_FUNCTIONAL); //Pick the first proc we find with dimms if (l_funcDimms.size() > 0) { l_temp_proc_mem_to_use = getGroupChipIdInfo(l_proc); break; } } ///////////////////////////////////////////////////////////// //Step 2 -- Get the proc we are currently using the memory of // and check if it has memory ///////////////////////////////////////////////////////////// //get the proc pointed by PROC_MEM_TO_USE and check //if there is memory behind that proc. We rely on the current //value of PROC_MEM_TO_USE, so, we don't change our answer //unnecessarily (in cases when both master proc and altmaster //have memory) uint8_t l_grp = 0; uint8_t l_chip = 0; parseProcMemToUseIntoGrpChipId(io_proc_mem_to_use, l_grp, l_chip); PredicateAttrVal l_predGrp (l_grp); PredicateAttrVal l_predChip (l_chip); PredicateCTM l_predProc (CLASS_CHIP, TYPE_PROC); PredicateIsFunctional l_isFunctional; PredicatePostfixExpr l_procCheckExpr; l_procCheckExpr.push(&l_predProc).push(&l_isFunctional). push(&l_predGrp).push(&l_predChip).And().And().And(); TargetHandleList l_procMemUsedCurrently; targetService().getAssociated(l_procMemUsedCurrently, i_node, TargetService::CHILD_BY_AFFINITY, TargetService::IMMEDIATE, &l_procCheckExpr); HWAS_INF("check_for_missing_memory: looking for a proc with " "grp=0x%x chip=0x%x, found %d procs", l_grp, l_chip, l_procMemUsedCurrently.size()); if (l_procMemUsedCurrently.size() >= 1) { //found proc //Check if proc whose memory we are currently using has dimms TargetHandleList l_funcDimms; getChildAffinityTargetsByState( l_funcDimms, l_procMemUsedCurrently[0], CLASS_LOGICAL_CARD, TYPE_DIMM, UTIL_FILTER_FUNCTIONAL); if (l_funcDimms.size() > 0) { //we found dimms behind the proc we are currently using o_found_missing_mem = false; } } ///////////////////////////////////////////////////////////// //Step 3-- If proc picked in Step1 has lower group/chip id // than current proc_mem_to_use value or // there is no memory behind the currently used proc, // then we update the proc_mem_to_use //NOTE: This ensures that if someone replaces the dimm on a lowered // number proc, then we can fall back to that lowered number // proc. Also, it makes sure that we are updating only when // current proc_mem_to_use doesn't have memory or it's not // pointing to a valid proc. ///////////////////////////////////////////////////////////// if ((l_temp_proc_mem_to_use < io_proc_mem_to_use) || (o_found_missing_mem)) { HWAS_INF("check_for_missing_memory: found a need to switch" " PROC_MEM_TO_USE from 0x%x to 0x%x", io_proc_mem_to_use, l_temp_proc_mem_to_use); io_proc_mem_to_use = l_temp_proc_mem_to_use; } else { HWAS_INF("check_for_missing_memory: kept PROC_MEM_TO_USE same" " 0x%x", io_proc_mem_to_use); } } while (0); return l_errl; } errlHndl_t check_current_proc_mem_to_use_is_still_valid (bool o_match) { errlHndl_t l_err {nullptr}; o_match = true; do { //Get the master proc to get the current value of PROC_MEM_TO_USE TargetHandle_t l_mProc; l_err = targetService().queryMasterProcChipTargetHandle(l_mProc); if (l_err) { HWAS_ERR("ERROR: getting master proc"); break; } auto l_proc_mem_to_use = l_mProc->getAttr(); //Get the node target to pass to check_for_missing_memory TargetHandleList l_nodes; getEncResources(l_nodes, TYPE_NODE, UTIL_FILTER_FUNCTIONAL); HWAS_ASSERT((l_nodes.size() == 1), "Only expecting 1 functional node"); auto l_curr_proc_mem_to_use = l_proc_mem_to_use; bool l_found_missing_mem {false}; l_err = HWAS::check_for_missing_memory(l_nodes[0], l_proc_mem_to_use, l_found_missing_mem); if (l_err) { HWAS_ERR("ERROR: check_for_missing_memory"); break; } HWAS_INF("PROC_MEM_TO_USE currentVal=0x%x reComputedVal=0x%x", l_curr_proc_mem_to_use, l_proc_mem_to_use); if (l_curr_proc_mem_to_use != l_proc_mem_to_use) { HWAS_INF("check_current_proc_mem_to_use_is_still_valid: " "currentVal and reComputerVal don't match"); o_match = false; } } while (0); return l_err; } /** * @brief Do presence detect on only MUX targets and enable HWAS state * * @param[in] i_sysTarget the top level target (CLASS_SYS) * @return errlHndl_t return nullptr if no error, * else return a handle to an error entry * */ errlHndl_t discoverMuxTargetsAndEnable(const Target &i_sysTarget) { HWAS_DBG(ENTER_MRK"discoverMuxTargetsAndEnable"); errlHndl_t l_err{nullptr}; do { // Only get MUX targets const PredicateCTM l_muxPred(CLASS_CHIP, TYPE_I2C_MUX); TARGETING::PredicatePostfixExpr l_muxPredExpr; l_muxPredExpr.push(&l_muxPred); TargetHandleList l_pMuxCheckPres; targetService().getAssociated( l_pMuxCheckPres, (&i_sysTarget), TargetService::CHILD, TargetService::ALL, &l_muxPredExpr); // Do the presence detect on only MUX targets l_err = platPresenceDetect(l_pMuxCheckPres); // If an issue with platPresenceDetect, then exit, returning // error back to caller if (nullptr != l_err) { break; } // Enable the HWAS State for the MUXes const bool l_present(true); const bool l_functional(true); const uint32_t l_errlEid(0); for (TargetHandle_t pTarget : l_pMuxCheckPres) { // set HWAS state to show MUX is present and functional enableHwasState(pTarget, l_present, l_functional, l_errlEid); } } while (0); HWAS_DBG(EXIT_MRK"discoverMuxTargetsAndEnable exit with %s", (nullptr == l_err ? "no error" : "error")); return l_err; } /** * @brief Do presence detect on only PMIC targets and enable HWAS state * * @param[in] i_sysTarget the top level target (CLASS_SYS) * @return errlHndl_t return nullptr if no error, * else return a handle to an error entry * */ errlHndl_t discoverPmicTargetsAndEnable(const Target &i_sysTarget) { HWAS_INF(ENTER_MRK"discoverPmicTargetsAndEnable"); errlHndl_t l_err{nullptr}; do { // Only get PMIC targets const PredicateCTM l_pmicPred(CLASS_ASIC, TYPE_PMIC); TARGETING::PredicatePostfixExpr l_asicPredExpr; l_asicPredExpr.push(&l_pmicPred); TargetHandleList l_pPmicCheckPres; targetService().getAssociated( l_pPmicCheckPres, (&i_sysTarget), TargetService::CHILD, TargetService::ALL, &l_asicPredExpr); // Do the presence detect on only PMIC targets // NOTE: this function will remove any non-functional targets // from pPmicCheckPres l_err = platPresenceDetect(l_pPmicCheckPres); // If an issue with platPresenceDetect, then exit, returning // error back to caller if (nullptr != l_err) { break; } // Enable the HWAS State for the PMICs const bool l_present(true); const bool l_functional(true); const uint32_t l_errlEid(0); for (TargetHandle_t pTarget : l_pPmicCheckPres) { // set HWAS state to show PMIC is present and functional enableHwasState(pTarget, l_present, l_functional, l_errlEid); } } while (0); HWAS_INF(EXIT_MRK"discoverPmicTargetsAndEnable exit with %s", (nullptr == l_err ? "no error" : "error")); return l_err; } errlHndl_t discoverTargets() { HWAS_DBG("discoverTargets entry"); errlHndl_t errl = NULL; // loop through all the targets and set HWAS_STATE to a known default for (TargetIterator target = targetService().begin(); target != targetService().end(); ++target) { // TODO:RTC:151617 Need to find a better way // to initialize the target TARGETING::ATTR_INIT_TO_AVAILABLE_type initToAvailable = false; if( (target->tryGetAttr( initToAvailable)) && (initToAvailable)) { HwasState hwasState = target->getAttr(); hwasState.deconfiguredByEid = 0; hwasState.poweredOn = true; hwasState.present = true; hwasState.functional = true; hwasState.dumpfunctional = false; target->setAttr(hwasState); } else { HwasState hwasState = target->getAttr(); hwasState.deconfiguredByEid = 0; hwasState.poweredOn = false; hwasState.present = false; hwasState.functional = false; hwasState.dumpfunctional = false; target->setAttr(hwasState); } } // Assumptions and actions: // CLASS_SYS (exactly 1) - mark as present // CLASS_ENC, TYPE_PROC, TYPE_MCS, TYPE_MEMBUF, TYPE_DIMM // (ALL require hardware query) - call platPresenceDetect // \->children: CLASS_* (NONE require hardware query) - mark as present do { // find CLASS_SYS (the top level target) Target* pSys; targetService().getTopLevelTarget(pSys); HWAS_ASSERT(pSys, "HWAS discoverTargets: no CLASS_SYS TopLevelTarget found"); // mark this as present enableHwasState(pSys, true, true, 0); HWAS_DBG("pSys %.8X - marked present", pSys->getAttr()); // Certain targets have dependencies on the MUX, so it is best to // presence detect and enable the MUX before moving on to these targets. // Please take this into consideration if code needs to be rearranged // in the future. errl = discoverMuxTargetsAndEnable(*pSys); if (errl != NULL) { break; // break out of the do/while so that we can return } PredicateCTM predEnc(CLASS_ENC); PredicateCTM predChip(CLASS_CHIP); PredicateCTM predDimm(CLASS_LOGICAL_CARD, TYPE_DIMM); PredicateCTM predMcs(CLASS_UNIT, TYPE_MCS); PredicateCTM predPmic(CLASS_ASIC, TYPE_PMIC); // We can ignore chips of TYPE_I2C_MUX because they // were already detected above in discoverMuxTargetsAndEnable // Also we can ignore chips of type PMIC because they will be processed // below. PredicateCTM predMux(CLASS_CHIP, TYPE_I2C_MUX); PredicatePostfixExpr checkExpr; checkExpr.push(&predChip).push(&predDimm).Or().push(&predEnc).Or(). push(&predMcs).Or().push(&predMux).Not().And(). push(&predPmic).Not().And(); TargetHandleList pCheckPres; targetService().getAssociated( pCheckPres, pSys, TargetService::CHILD, TargetService::ALL, &checkExpr ); // pass this list to the hwas platform-specific api where // pCheckPres will be modified to only have present targets HWAS_DBG("pCheckPres size: %d", pCheckPres.size()); errl = platPresenceDetect(pCheckPres); HWAS_DBG("pCheckPres size: %d", pCheckPres.size()); if (errl != NULL) { break; // break out of the do/while so that we can return } // for each, read their ID/EC level. if that works, // mark them and their descendants as present // read the partialGood vector to determine if any are not functional // and read and store values from the PR keyword // list of procs and data that we'll need to look at when potentially // reducing the list of valid ECs later procRestrict_t l_procEntry; std::vector l_procRestrictList; // sort the list by ATTR_HUID to ensure that we // start at the same place each time std::sort(pCheckPres.begin(), pCheckPres.end(), compareTargetHuid); for (TargetHandleList::const_iterator pTarget_it = pCheckPres.begin(); pTarget_it != pCheckPres.end(); ++pTarget_it ) { TargetHandle_t pTarget = *pTarget_it; // if CLASS_ENC is still in this list, mark as present if (pTarget->getAttr() == CLASS_ENC) { enableHwasState(pTarget, true, true, 0); HWAS_DBG("pTarget %.8X - CLASS_ENC marked present", pTarget->getAttr()); // on to the next target continue; } bool chipPresent = true; bool chipFunctional = true; bool createInfoLog = false; uint32_t errlEid = 0; uint16_t pgData[VPD_CP00_PG_DATA_ENTRIES]; bzero(pgData, sizeof(pgData)); // Cache the target type auto l_targetType = pTarget->getAttr(); // This error is created preemptively to capture any targets that // were deemed non-functional for partial good reasons. If there are // no issues, then this error log is deleted. /*@ * @errortype * @severity ERRL_SEV_INFORMATIONAL * @moduleid MOD_DISCOVER_TARGETS * @reasoncode RC_PARTIAL_GOOD_INFORMATION * @devdesc Partial Good (PG) issues are present within the * system and this error log contains information * about which targets, procs, and entries in the * PG vector are problematic. * @custdesc An issue occured during IPL of the system: * Internal Firmware Error * @userdata1 None * @userdata2 None */ errlHndl_t infoErrl = hwasError(ERRL_SEV_INFORMATIONAL, MOD_DISCOVER_TARGETS, RC_PARTIAL_GOOD_INFORMATION); if( (pTarget->getAttr() == CLASS_CHIP) && (l_targetType != TYPE_TPM) && (l_targetType != TYPE_SP) && (l_targetType != TYPE_BMC) && (l_targetType != TYPE_I2C_MUX)) { // read Chip ID/EC data from these physical chips errl = platReadIDEC(pTarget); if (errl) { // read of ID/EC failed even tho we THOUGHT we were present. HWAS_INF("pTarget 0x%.8X - read IDEC failed " "(eid 0x%X) - bad", get_huid(pTarget), errl->eid()); // chip NOT present and NOT functional, so that FSP doesn't // include this for HB to process chipPresent = false; chipFunctional = false; errlEid = errl->eid(); // commit the error but keep going errlCommit(errl, HWAS_COMP_ID); // errl is now NULL } else if (l_targetType == TYPE_PROC) { // read partialGood vector from these as well. errl = platReadPartialGood(pTarget, pgData); if (errl) { // read of PG failed even tho we were present.. HWAS_INF("pTarget 0x%.8X - read PG failed " "(eid 0x%X) - bad", get_huid(pTarget), errl->eid()); chipFunctional = false; errlEid = errl->eid(); // commit the error but keep going errlCommit(errl, HWAS_COMP_ID); // errl is now NULL } else { // look at the 'nest' logic to override the // functionality of this proc chipFunctional = isChipFunctional(pTarget, pgData); if(!chipFunctional) { // Add this proc to the informational error log. platHwasErrorAddHWCallout(infoErrl, pTarget, SRCI_PRIORITY_HIGH, NO_DECONFIG, GARD_NULL); createInfoLog = true; } // Fill in a dummy restrict list l_procEntry.target = pTarget; // every proc is uniquely counted l_procEntry.group = pTarget->getAttr(); // just 1 proc per group l_procEntry.procs = 1; // indicates we should use all available ECs l_procEntry.maxECs = UINT32_MAX; l_procRestrictList.push_back(l_procEntry); } } // TYPE_PROC } // CLASS_CHIP HWAS_DBG("pTarget %.8X - detected present, %sfunctional", pTarget->getAttr(), chipFunctional ? "" : "NOT "); // Now determine if the descendants of this target are // present and/or functional checkPartialGoodForDescendants(pTarget, pgData, chipFunctional, errlEid, infoErrl, createInfoLog); // set HWAS state to show CHIP is present, functional per above enableHwasState(pTarget, chipPresent, chipFunctional, errlEid); // If there were partial good issues with the chip or its // descendents then create an info error log. Otherwise, delete // and move on. if (createInfoLog) { TargetHandle_t l_masterProc = nullptr; //Get master proc errl = targetService() .queryMasterProcChipTargetHandle(l_masterProc); if (errl) { HWAS_ERR("discoverTargets: unable to get master proc"); errlCommit(errl, HWAS_COMP_ID); errlCommit(infoErrl, HWAS_COMP_ID); break; } auto l_model = l_masterProc->getAttr(); // Setup model dependent all good data uint16_t l_modelPgData[MODEL_PG_DATA_ENTRIES] = {0}; l_modelPgData[0] = (MODEL_NIMBUS == l_model) ? (VPD_CP00_PG_XBUS_GOOD_NIMBUS | VPD_CP00_PG_XBUS_IOX[0]) : VPD_CP00_PG_XBUS_GOOD_CUMULUS; l_modelPgData[1] = (TARGETING::MODEL_NIMBUS == l_model) ? VPD_CP00_PG_RESERVED_GOOD : VPD_CP00_PG_OBUS_GOOD; hwasErrorAddPartialGoodFFDC(infoErrl, l_modelPgData, pgData); errlCommit(infoErrl, HWAS_COMP_ID); } else { delete infoErrl; infoErrl = nullptr; } } // for pTarget_it // After processing all other targets look at the pmics, // we must wait because we need the SPD cached from the OCMBs // which occurs when OCMBs go through presence detection above errl = discoverPmicTargetsAndEnable(*pSys); if (errl != NULL) { break; // break out of the do/while so that we can return } // Check for non-present Procs and if found, trigger // DeconfigGard::_invokeDeconfigureAssocProc() to run by setting // setXAOBusEndpointDeconfigured to true PredicateCTM predProc(CLASS_CHIP, TYPE_PROC); TargetHandleList l_procs; targetService().getAssociated(l_procs, pSys, TargetService::CHILD, TargetService::ALL, &predProc); for (TargetHandleList::const_iterator l_procsIter = l_procs.begin(); l_procsIter != l_procs.end(); ++l_procsIter) { if ( !(*l_procsIter)->getAttr().present ) { HWAS_INF("discoverTargets: Proc %.8X not present", (*l_procsIter)->getAttr()); HWAS::theDeconfigGard().setXAOBusEndpointDeconfigured(true); } } //Check all of the slave processor's EC levels to ensure they match master //processor's EC level. //function will return error log pointing to all error logs created //by this function as this function could detect multiple procs w/ //bad ECs and will make a log for each errl = validateProcessorEcLevels(); if (errl) { HWAS_ERR("discoverTargets: validateProcessorEcLevels failed"); break; } // Potentially reduce the number of ec/core units that are present // based on fused mode // marking bad units as present=false; // deconfigReason = 0 because present is false so this is not a // deconfigured event. errl = restrictECunits(l_procRestrictList, false, 0); if (errl) { HWAS_ERR("discoverTargets: restrictECunits failed"); break; } // Mark any MCA units that are present but have a disabled port // as non-functional errl = markDisabledMcas(); if (errl) { HWAS_ERR("discoverTargets: markDisabledMcas failed"); break; } // call invokePresentByAssoc() to obtain functional MCSs, MEMBUFs, and // DIMMs for non-direct memory or MCSs, MCAs, and DIMMs for direct // memory. Call algorithm function presentByAssoc() to determine // targets that need to be deconfigured invokePresentByAssoc(); #ifdef __HOSTBOOT_MODULE if (INITSERVICE::isSMPWrapConfig()) { //Due to fabric limitations, we can only have 2 parallel OBUS //connections at a time in wrap config. So, deconfigure appropriate //OBUSes using the following rule. If the value of //MFG_WRAP_TEST_ABUS_LINKS_SET_ENABLE (on the system target) does //not match with the value of MFG_WRAP_TEST_ABUS_LINKS_SET (on the //OBUS target), then deconfigure the OBUSes. errl = disableOBUSes(); if (errl) { HWAS_ERR ("discoverTargets:: disableOBUSes failed"); break; } } #endif } while (0); if (errl) { HWAS_INF("discoverTargets failed (plid 0x%X)", errl->plid()); } else { HWAS_INF("discoverTargets completed successfully"); } return errl; } // discoverTargets bool isChipFunctional(const TARGETING::TargetHandle_t &i_target, const uint16_t i_pgData[]) { bool l_chipFunctional = true; ATTR_MODEL_type l_model = i_target->getAttr(); uint16_t l_xbus = (l_model == MODEL_NIMBUS) ? VPD_CP00_PG_XBUS_GOOD_NIMBUS : VPD_CP00_PG_XBUS_GOOD_CUMULUS; uint16_t l_perv = (l_model == MODEL_AXONE) ? VPD_CP00_PG_PERVASIVE_GOOD_AXONE : VPD_CP00_PG_PERVASIVE_GOOD; uint16_t l_n2 = (l_model == MODEL_AXONE) ? VPD_CP00_PG_N2_GOOD_AXONE : VPD_CP00_PG_N2_GOOD; // Check all bits in FSI entry if (i_pgData[VPD_CP00_PG_FSI_INDEX] != VPD_CP00_PG_FSI_GOOD) { HWAS_INF("pTarget %.8X - FSI pgData[%d]: " "actual 0x%04X, expected 0x%04X - bad", i_target->getAttr(), VPD_CP00_PG_FSI_INDEX, i_pgData[VPD_CP00_PG_FSI_INDEX], VPD_CP00_PG_FSI_GOOD); l_chipFunctional = false; } else // Check all bits in PRV entry if (i_pgData[VPD_CP00_PG_PERVASIVE_INDEX] != l_perv) { HWAS_INF("pTarget %.8X - Pervasive pgData[%d]: " "actual 0x%04X, expected 0x%04X - bad", i_target->getAttr(), VPD_CP00_PG_PERVASIVE_INDEX, i_pgData[VPD_CP00_PG_PERVASIVE_INDEX], l_perv); l_chipFunctional = false; } else // Check all bits in N0 entry if (i_pgData[VPD_CP00_PG_N0_INDEX] != VPD_CP00_PG_N0_GOOD) { HWAS_INF("pTarget %.8X - N0 pgData[%d]: " "actual 0x%04X, expected 0x%04X - bad", i_target->getAttr(), VPD_CP00_PG_N0_INDEX, i_pgData[VPD_CP00_PG_N0_INDEX], VPD_CP00_PG_N0_GOOD); l_chipFunctional = false; } else // Check bits in N1 entry except those in partial good region if ((i_pgData[VPD_CP00_PG_N1_INDEX] & ~VPD_CP00_PG_N1_PG_MASK) != VPD_CP00_PG_N1_GOOD) { HWAS_INF("pTarget %.8X - N1 pgData[%d]: " "actual 0x%04X, expected 0x%04X - bad", i_target->getAttr(), VPD_CP00_PG_N1_INDEX, i_pgData[VPD_CP00_PG_N1_INDEX], VPD_CP00_PG_N1_GOOD); l_chipFunctional = false; } else // Check all bits in N2 entry if (i_pgData[VPD_CP00_PG_N2_INDEX] != l_n2) { HWAS_INF("pTarget %.8X - N2 pgData[%d]: " "actual 0x%04X, expected 0x%04X - bad", i_target->getAttr(), VPD_CP00_PG_N2_INDEX, i_pgData[VPD_CP00_PG_N2_INDEX], l_n2); l_chipFunctional = false; } else // Check bits in N3 entry except those in partial good region if ((i_pgData[VPD_CP00_PG_N3_INDEX] & ~VPD_CP00_PG_N3_PG_MASK) != VPD_CP00_PG_N3_GOOD) { HWAS_INF("pTarget %.8X - N3 pgData[%d]: " "actual 0x%04X, expected 0x%04X - bad", i_target->getAttr(), VPD_CP00_PG_N3_INDEX, i_pgData[VPD_CP00_PG_N3_INDEX], VPD_CP00_PG_N3_GOOD); l_chipFunctional = false; } else // Check bits in XBUS entry, ignoring individual xbus targets // Note that what is good is different bewteen Nimbus/Cumulus if (((i_pgData[VPD_CP00_PG_XBUS_INDEX] & ~VPD_CP00_PG_XBUS_PG_MASK) != l_xbus)) { HWAS_INF("pTarget %.8X - XBUS pgData[%d]: " "actual 0x%04X, expected 0x%04X - bad", i_target->getAttr(), VPD_CP00_PG_XBUS_INDEX, i_pgData[VPD_CP00_PG_XBUS_INDEX], l_xbus); l_chipFunctional = false; } return l_chipFunctional; } // isChipFunctional bool isDescFunctional(const TARGETING::TargetHandle_t &i_desc, const uint16_t (&i_pgData)[VPD_CP00_PG_DATA_ENTRIES], pgState_map &io_targetStates) { bool l_functional = true, l_previouslySeen = false; do { // Look in the targetStates map to see if the target has been given a // state already. If it's not in the map, then continue with the // algorithm. Otherwise, only continue if the state was marked as // functional. Since the list input into isDescFunctional is sorted // where all of the children are first in the array, then if the current // target is found in io_targetStates and it's not functional that means // that it has no functional children and we shouldn't do any further // checking on it. auto selfState_it = io_targetStates.find(i_desc); if ((selfState_it != io_targetStates.end()) && (selfState_it->second != true)) { // This target has been seen, return. l_previouslySeen = true; l_functional = selfState_it->second; break; } // Since the target has at least one functional child (or no children), // next we must apply the correct partial good rules to determine // functionality. // // Lookup the correct partial good logic for the given target. To avoid // errors of omission, the lookup must return at least one pg logic // struct. If a target has no associated rules then an NA rule will be // returned that was created by the default constructor which will cause // the next for loop to function as a no-op. PARTIAL_GOOD::pgLogic_t descPgLogic; errlHndl_t l_returnErrl = PARTIAL_GOOD::pgTable .findRulesForTarget(i_desc, descPgLogic); if (l_returnErrl != nullptr) { errlCommit(l_returnErrl, HWAS_COMP_ID); break; } // Iterate through the list of partial good logic for this target. If // any of them fail then the target is non-functional. for(PARTIAL_GOOD::pgLogic_t::const_iterator pgLogic_it = descPgLogic.begin(); pgLogic_it != descPgLogic.end(); ++pgLogic_it) { PARTIAL_GOOD::PartialGoodLogic pgLogic = *pgLogic_it; if ((i_pgData[pgLogic.iv_pgIndex] & pgLogic.iv_pgMask) != pgLogic.iv_agMask) { HWAS_INF("pDesc 0x%.8X - pgData[%d]: " "actual 0x%04X, expected 0x%04X - bad", i_desc->getAttr(), pgLogic.iv_pgIndex, i_pgData[pgLogic.iv_pgIndex] & pgLogic.iv_pgMask, pgLogic.iv_agMask); l_functional = false; break; } // The final check is to see if there is any additional logic // that cannot be generically included in this loop. If there is // special logic it will be included in a function that is // hard-coded for that particular target and a function pointer will // point to it. // // Any of the structs in the vector could have a pointer to a // special rule so this check is included in the iteration. if (pgLogic.iv_specialRule != nullptr) { // This pgLogic struct has a special rule. Call it to determine // functionality. l_functional = pgLogic.iv_specialRule(i_desc, i_pgData); if (!l_functional) { break; } } } } while(0); if (!l_previouslySeen) { // Record the result in the targetStates map for later use. io_targetStates[i_desc] = l_functional; } return l_functional; } void markChildrenNonFunctional(const TARGETING::TargetHandle_t &i_parent, pgState_map &io_targetStates) { // Get the state for the parent. auto parentState_it = io_targetStates.find(i_parent); if ((parentState_it != io_targetStates.end()) && !parentState_it->second) { // Parent is non-functional. So get the list of all children // and mark them non-functional as well. TargetHandleList pDescChildren; targetService().getAssociated(pDescChildren, i_parent, TargetService::CHILD, TargetService::IMMEDIATE); for(auto child : pDescChildren) { auto childState_it = io_targetStates.find(child); if (childState_it != io_targetStates.end()) { // Ignore children that are already non-functional because the // first part of the partial good algorithm was done by starting // at the bottom of the target hierarchy and working up to the // top. Since this function is called while operating top-down, // that means if there is a child of this target that is // non-functional which has functional children we don't have to // deal with it now since it will eventually be passed into this // function as the parent target and its functional children // will be taken care of at that time. if (childState_it->second == true) { // Child state is true so change it to false. childState_it->second = false; // Since this child's state is true it may have functional // children that need to be marked non-functional. So, we // should check this child's children. TargetHandleList pGrandChildren; targetService().getAssociated(pGrandChildren, child, TargetService::CHILD, TargetService::IMMEDIATE); if (!pGrandChildren.empty()) { markChildrenNonFunctional(child, io_targetStates); } } } else { // Child is missing from the targetStates map. Insert it and // mark it non-functional. // NOTE: This won't happen during the actual PG algorithm but // is left here to be used for testcases or other uses. io_targetStates[child] = false; // Since the child was missing and its state is unknown, check // if it has any children and make those non-functional as well. TargetHandleList pGrandChildren; targetService().getAssociated(pGrandChildren, child, TargetService::CHILD, TargetService::IMMEDIATE); if (!pGrandChildren.empty()) { markChildrenNonFunctional(child, io_targetStates); } } } } } errlHndl_t checkPartialGoodForDescendants( const TARGETING::TargetHandle_t &i_pTarget, const uint16_t (&i_pgData)[VPD_CP00_PG_DATA_ENTRIES], const bool i_chipFunctional, const uint32_t i_errlEid, errlHndl_t io_infoErrl, bool &io_createInfoLog, bool i_isTestcase /* = false */, bool* o_testResult /* = nullptr */ ) { errlHndl_t errl = nullptr; // A map that will keep track of what has already been checked to // eliminate re-checking targets. It also holds functional state. pgState_map targetStates; // by default, the descendant's functionality is 'inherited' bool descFunctional = i_chipFunctional; // Get a list of this target's physical descendants TargetHandleList pDescList; targetService().getAssociated( pDescList, i_pTarget, TargetService::CHILD, TargetService::ALL); if (i_isTestcase) { // If we are running a testcase then i_pTarget is the target to be // checked and the children of i_pTarget should be checked along with // it. So, add it to the list and the algorithm will check it too. pDescList.push_back(i_pTarget); } if (i_chipFunctional) { // Sort the list of descendants such that the largest affinity // paths are first in the list and targets are grouped by // parent. std::sort(pDescList.begin(), pDescList.end(), // Define a lambda comparator function for sorting // criteria. [](const TargetHandle_t a, const TargetHandle_t b) { EntityPath aPath = a->getAttr(); TargetHandle_t aParent = getImmediateParentByAffinity(a); EntityPath bPath = b->getAttr(); TargetHandle_t bParent = getImmediateParentByAffinity(b); // a goes before b if its affinity path is // greater than b's and its parent pointer // is different from b's. bool result = false; if ((aPath.size() > bPath.size()) || ((aPath.size() == bPath.size()) && (aParent > bParent))) { result = true; } return result; }); // A pointer to a descendant's parent. This will be updated as // the first pass of PG checking occurs. TargetHandle_t parent = nullptr; // Assume the parent has no functional children and the // descendant's state is false. bool parentState = false, descState = false; // =========== Partial Good Checking First Pass =========== // Now that the list of descendants has been sorted, we can // proceed with the PG algorithm in two passes. In this pass, // the target hierarchy is navigated from the bottom up to the // top. // // This pass will check all children of a parent and when it // encounters a new parent, it will set the previous parent's // state as true or false. // true: the parent has at least one functional child // false: the parent has no functional children. // By setting a parent's state false ahead of time // isDescFunctional is able to skip over that target since, // regardless of PG results, that target will still be // non-functional due to not having functional children. for (auto pDesc : pDescList) { // Check if the parent has changed during iteration. If it // has then all of the children of that parent have been // checked and we now know if it has any functional // children. So, add the parent to targetStates with the // result. if (getImmediateParentByAffinity(pDesc) != parent) { if (parent != nullptr) { // Add parent's state to the targetStates map. // Note: If parentState has remained non-functional then // that means that it had no functional children // which is not allowed. So, PG checks will be // skipped for it when it is passed into // isDescFunctional. targetStates[parent] = parentState; if (parentState == false) { // No functional children of this target were found. // Target is considered not functional. HWAS_INF("pDesc parent 0x%.8X - marked bad because " "all of its children were bad.", parent->getAttr()); } } // Update parent pointer to the new parent. parent = getImmediateParentByAffinity(pDesc); // Reset parentState to false. parentState = false; } descState = isDescFunctional(pDesc, i_pgData, targetStates); // If one descendant of the current parent is functional, // then the parent is functional and should be checked by // isDescFunctional for partial good issues. if (descState == true) { parentState = true; } } } // =========== Partial Good Checking Second Pass =========== // After the first pass completes, all targets have had PG checks // applied to them (if necessary) and all parents have been checked // to have at least one functional child. Now we iterate through the // list one final time in reverse and propagate all non-functional // parent states down to functional children, since functional // children must not have non-functional parents. // // As the algorithm works its way through the hierarchy in a // top-down fashion, the final hwasState of the current target is // known and can be set as it works through all of the targets this // time. // // NOTE: If the chip is not functional then the first pass will not execute // and this iteration will serve only to mark all descendants // non-functional. TargetHandleList::const_iterator pDescList_rbegin = pDescList.end() - 1; TargetHandleList::const_iterator pDescList_rend = pDescList.begin() - 1; for (TargetHandleList::const_iterator pDesc_it = pDescList_rbegin; pDesc_it != pDescList_rend; --pDesc_it) { TargetHandle_t pDesc = *pDesc_it; if (i_chipFunctional) { // If this descendant is non-functional then // propagate non-functional state down to its children. markChildrenNonFunctional(pDesc, targetStates); auto pDesc_mapIt = targetStates.find(pDesc); if (pDesc_mapIt != targetStates.end()) { descFunctional = pDesc_mapIt->second; } else { /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_CHECK_PG_FOR_DESC * @reasoncode RC_PARTIAL_GOOD_MISSING_TARGET * @devdesc A target was not found in the map of * states kept by the PG checking * algorithm. Therefore, it did not have * PG checks run against it. * @custdesc An issue occured during IPL of the * system: Internal Firmware Error * @userdata1 huid of the target */ errl = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_CHECK_PG_FOR_DESC, RC_PARTIAL_GOOD_MISSING_TARGET, pDesc->getAttr()); break; } if(!descFunctional && !i_isTestcase) { // Add this descendant to the error log. hwasErrorAddTargetInfo(io_infoErrl, pDesc); io_createInfoLog = true; } } // Don't mess with the state of the system if we are doing test cases. if (!i_isTestcase) { if (pDesc->getAttr() == TYPE_PERV) { // for sub-parts of PERV, it's always present. enableHwasState(pDesc, i_chipFunctional, descFunctional, i_errlEid); HWAS_DBG("pDesc %.8X - marked %spresent, %sfunctional", pDesc->getAttr(), i_chipFunctional ? "" : "NOT", descFunctional ? "" : "NOT "); } else { // for other sub-parts, if it's not functional, // it's not present. enableHwasState(pDesc, descFunctional, descFunctional, i_errlEid); HWAS_DBG("pDesc %.8X - marked %spresent, %sfunctional", pDesc->getAttr(), descFunctional ? "" : "NOT ", descFunctional ? "" : "NOT "); } } } // Before we return, if this was run in a testcase then we should set the // o_testResult parameter so the testcase is aware of i_pTarget's state. if (i_isTestcase && (o_testResult != nullptr)) { *o_testResult = targetStates[i_pTarget]; } return errl; } void forceEcExEqDeconfig(const TARGETING::TargetHandle_t i_core, const bool i_present, const uint32_t i_deconfigReason) { TargetHandleList pECList; TargetHandleList pEXList; //Deconfig the EC enableHwasState(i_core, i_present, false, i_deconfigReason); HWAS_INF("pEC %.8X - marked %spresent, NOT functional", i_core->getAttr(), i_present ? "" : "NOT "); //Get parent EX and see if any other cores, if none, deconfig auto exType = TARGETING::TYPE_EX; auto eqType = TARGETING::TYPE_EQ; TARGETING::Target* l_ex = getParent(i_core, exType); getChildChiplets(pECList, l_ex, TYPE_CORE, true); if(pECList.size() == 0) { enableHwasState(l_ex, i_present, false, i_deconfigReason); HWAS_INF("pEX %.8X - marked %spresent, NOT functional", l_ex->getAttr(), i_present ? "" : "NOT "); //Now get the parent EQ and check to see if it should be deconfigured TARGETING::Target* l_eq = getParent(l_ex, eqType); getChildChiplets(pEXList, l_eq, TYPE_EX, true); if(pEXList.size() == 0) { enableHwasState(l_eq, i_present, false, i_deconfigReason); HWAS_INF("pEQ %.8X - marked %spresent, NOT functional", l_eq->getAttr(), i_present ? "" : "NOT "); } } } errlHndl_t restrictECunits( std::vector &i_procList, const bool i_present, const uint32_t i_deconfigReason) { HWAS_INF("restrictECunits entry, %d elements", i_procList.size()); errlHndl_t errl = nullptr; TargetHandle_t l_masterProcTarget = nullptr; do { errl = targetService().queryMasterProcChipTargetHandle(l_masterProcTarget); if(errl) { HWAS_ERR( "restrictECunits:: Unable to find master proc"); break; } HWAS_DBG("master proc huid: 0x%X", TARGETING::get_huid(l_masterProcTarget)); // sort by group so PROC# are in the right groupings. std::sort(i_procList.begin(), i_procList.end(), compareProcGroup); // loop thru procs to handle restrict const uint32_t l_ProcCount = i_procList.size(); for (uint32_t procIdx = 0; procIdx < l_ProcCount; // the increment will happen in the loop to handle // groups covering more than 1 proc target ) { // determine the number of procs we should enable uint8_t procs = i_procList[procIdx].procs; int l_masterProc = -1; uint32_t maxECs = i_procList[procIdx].maxECs; // this procs number, used to determine groupings uint32_t thisGroup = i_procList[procIdx].group; HWAS_INF("procRestrictList[%d] - maxECs 0x%X, procs %d, group %d", procIdx, maxECs, procs, thisGroup); // exs, ecs, and iters for each proc in this vpd set TargetHandleList pEXList[procs]; TargetHandleList::const_iterator pEX_it[procs]; TargetHandleList pECList[procs][NUM_EX_PER_CHIP]; TargetHandleList::const_iterator pEC_it[procs][NUM_EX_PER_CHIP]; // find the proc's that we think are in this group uint32_t currentPairedECs = 0; uint32_t currentSingleECs = 0; for (uint32_t i = 0; i < procs; ) // increment in loop { TargetHandle_t pProc = i_procList[procIdx].target; // if this proc is past the last of the proc count // OR is NOT in the same group if ((procIdx >= l_ProcCount) || (thisGroup != i_procList[procIdx].group)) { HWAS_DBG("procRestrictList[%d] - group %d not in group %d", i, i_procList[procIdx].group, thisGroup); // change this to be how many we actually have here procs = i; // we're done - break so that we use procIdx as the // start index next time break; } // is this proc the master for this node? if (pProc == l_masterProcTarget) { l_masterProc = i; } // get this proc's (CHILD) EX units // Need to get all so we init the pEC_it array getChildChiplets(pEXList[i], pProc, TYPE_EX, false); if (!pEXList[i].empty()) { // sort the list by ATTR_HUID to ensure that we // start at the same place each time std::sort(pEXList[i].begin(), pEXList[i].end(), compareTargetHuid); // keep a pointer into that list pEX_it[i] = pEXList[i].begin(); for (uint32_t j = 0; (j < NUM_EX_PER_CHIP) && (pEX_it[i] != pEXList[i].end()); j++) { TargetHandle_t pEX = *(pEX_it[i]); // get this EX's (CHILD) functional EC/core units getChildChiplets(pECList[i][j], pEX, TYPE_CORE, true); // keep a pointer into that list pEC_it[i][j] = pECList[i][j].begin(); if (!pECList[i][j].empty()) { // sort the list by ATTR_HUID to ensure that we // start at the same place each time std::sort(pECList[i][j].begin(), pECList[i][j].end(), compareTargetHuid); // keep local count of current functional EC units if (pECList[i][j].size() == 2) { // track ECs that can make a fused-core pair currentPairedECs += pECList[i][j].size(); } else { // track ECs without a pair for a fused-core currentSingleECs += pECList[i][j].size(); } } // go to next EX (pEX_it[i])++; } // for j < NUM_EX_PER_CHIP // go to next proc i++; } else { // this one is bad, stay on this i but lower the end count procs--; } // advance the outer loop as well since we're doing these // procs together ++procIdx; } // for i < procs // adjust maxECs based on fused mode if( is_fused_mode() ) { // only allow complete pairs maxECs = std::min( currentPairedECs, maxECs ); } if ((currentPairedECs + currentSingleECs) <= maxECs) { // we don't need to restrict - we're done with this group. HWAS_INF("currentECs 0x%X <= maxECs 0x%X -- done", (currentPairedECs + currentSingleECs), maxECs); continue; } HWAS_INF("master proc idx: %d", l_masterProc); HWAS_DBG("currentECs 0x%X > maxECs 0x%X -- restricting!", (currentPairedECs + currentSingleECs), maxECs); // now need to find EC units that stay functional, going // across the list of units for each proc and EX we have, // until we get to the max or run out of ECs, giving // preference to paired ECs and if we are in fused mode // excluding single, non-paired ECs. // Use as many paired ECs as we can up to maxECs uint32_t pairedECs_remaining = (maxECs < currentPairedECs) ? maxECs : currentPairedECs; // If not in fused mode, use single ECs as needed to get to maxECs uint32_t singleECs_remaining = ((maxECs > currentPairedECs) && !is_fused_mode()) ? (maxECs - currentPairedECs) : 0; uint32_t goodECs = 0; HWAS_DBG("procs 0x%X maxECs 0x%X", procs, maxECs); // Keep track of when we allocate at least one core to the master chip // in order to avoid the situation of master not having any cores. bool l_allocatedToMaster = false; // Each pECList has ECs for a given EX and proc. Check each EC list to // determine if it has an EC pair or a single EC and if the remaining // count indicates the given EC from that list is to stay functional. // Cycle through the first EX of each proc, then the second EX of each // proc and so on as we decrement remaining ECs. We put procs as the // inner loop and EXs as the outer to distribute the functional ECs // evenly between procs. After we run out of ECs, we deconfigure the // remaining ones. // Mark the ECs that have been accounted for uint8_t EC_checkedList[procs][NUM_EX_PER_CHIP]; memset(EC_checkedList, 0, sizeof(EC_checkedList)); for (uint32_t l_EX = 0; l_EX < NUM_EX_PER_CHIP; l_EX++) { for (int l_proc = 0; l_proc < procs; l_proc++) { // Save l_EX value to current EX, this is to be restored later uint32_t currrentEX = l_EX; // If core doesn't exist or already checked, find the // next available core on this proc in order to balance // the core distribution. uint8_t nextEXwithCore = 0; if ( (!pECList[l_proc][l_EX].size()) || (EC_checkedList[l_proc][l_EX]) ) { HWAS_INF("Current EX = %d, PROC %d: Need to find next " "avail EX with cores.", l_EX, l_proc); for (nextEXwithCore = l_EX+1; nextEXwithCore < NUM_EX_PER_CHIP; nextEXwithCore++) { if ( (pECList[l_proc][nextEXwithCore].size()) && (!(EC_checkedList[l_proc][nextEXwithCore]) ) ) { l_EX = nextEXwithCore; HWAS_INF("Next avail EX with cores = %d", nextEXwithCore); break; } } // No more core in this proc if (nextEXwithCore == NUM_EX_PER_CHIP) { HWAS_INF("No more EX with cores in proc %d", l_proc); l_EX = currrentEX; continue; } } // Mark this core has been checked. EC_checkedList[l_proc][l_EX] = 1; // Walk through the EC list from this EX while (pEC_it[l_proc][l_EX] != pECList[l_proc][l_EX].end()) { // Check if EC pair for this EX if ((pECList[l_proc][l_EX].size() == 2) && (pairedECs_remaining != 0) && (l_proc==l_masterProc || // is master or l_allocatedToMaster || // was allocated to master pairedECs_remaining > 2)) // save 2 cores for master { // got a functional EC that is part of a pair goodECs++; pairedECs_remaining--; HWAS_DBG("pEC 0x%.8X - is good %d! (paired) pi:%d EXi:%d pairedECs_remaining %d", (*(pEC_it[l_proc][l_EX]))->getAttr(), goodECs, l_proc, l_EX, pairedECs_remaining); if (l_proc == l_masterProc) { HWAS_DBG("Allocated to master"); l_allocatedToMaster = true; } } // Check if single EC for this EX else if ((pECList[l_proc][l_EX].size() == 1) && (singleECs_remaining != 0) && (l_proc==l_masterProc || // is master or l_allocatedToMaster || // was allocated to master singleECs_remaining > 1)) // save core for master { // got a functional EC without a pair goodECs++; singleECs_remaining--; HWAS_DBG("pEC 0x%.8X - is good %d! (single) pi:%d EXi:%d singleECs_remaining %d", (*(pEC_it[l_proc][l_EX]))->getAttr(), goodECs, l_proc, l_EX, singleECs_remaining); if (l_proc == l_masterProc) { HWAS_DBG("Allocated to master"); l_allocatedToMaster = true; } } // Otherwise paired or single EC, but not needed for maxECs else { // got an EC to be restricted and marked not functional TargetHandle_t l_pEC = *(pEC_it[l_proc][l_EX]); forceEcExEqDeconfig(l_pEC, i_present, i_deconfigReason); HWAS_DBG("pEC 0x%.8X - deconfigured! (%s) pi:%d EXi:%d", (*(pEC_it[l_proc][l_EX]))->getAttr(), (pECList[l_proc][l_EX].size() == 1)? "single": "paired", l_proc, l_EX); } (pEC_it[l_proc][l_EX])++; // next ec in this ex's list } // while pEC_it[l_proc][l_EX] != pECList[l_proc][l_EX].end() // Restore current EX l_EX = currrentEX; } // for l_proc < procs } // for l_EX < NUM_EX_PER_CHIP } // for procIdx < l_ProcCount } while(0); // do { if (errl) { HWAS_INF("restrictECunits failed (plid 0x%X)", errl->plid()); } else { HWAS_INF("restrictECunits completed successfully"); } return errl; } // restrictECunits void checkCriticalResources(uint32_t & io_commonPlid, const Target * i_pTop) { errlHndl_t l_errl = NULL; PredicatePostfixExpr l_customPredicate; PredicateIsFunctional l_isFunctional; TargetHandleList l_plist; // filter for targets that are deemed critical by ATTR_RESOURCE_IS_CRITICAL uint8_t l_critical = 1; PredicateAttrVal l_isCritical(l_critical); l_customPredicate.push(&l_isFunctional).Not().push(&l_isCritical).And(); targetService().getAssociated( l_plist, i_pTop, TargetService::CHILD, TargetService::ALL, &l_customPredicate); //if this list has ANYTHING then something critical has been deconfigured if(l_plist.size()) { HWAS_ERR("Insufficient HW to continue IPL: (critical resource not functional)"); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_CHECK_MIN_HW * @reasoncode RC_SYSAVAIL_MISSING_CRITICAL_RESOURCE * @devdesc checkCriticalResources found a critical * resource to be deconfigured * @custdesc A problem occurred during the IPL of the * system: A critical resource was found * to be deconfigured * * @userdata1[00:31] Number of critical resources * @userdata1[32:63] HUID of first critical resource found * @userdata2[00:31] HUID of second critical resource found, if present * @userdata2[32:63] HUID of third critical resource found, if present */ uint64_t userdata1 = 0; uint64_t userdata2 = 0; switch(std::min(3,(int)l_plist.size())) { case 3: userdata2 = static_cast(get_huid(l_plist[2])); /*fall through*/ // keep BEAM quiet case 2: userdata2 |= static_cast(get_huid(l_plist[1])) << 32; /*fall through*/ // keep BEAM quiet case 1: userdata1 = (static_cast(l_plist.size()) << 32) | static_cast(get_huid(l_plist[0])); } l_errl = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_CHECK_MIN_HW, RC_SYSAVAIL_MISSING_CRITICAL_RESOURCE, userdata1, userdata2 ); // call out the procedure to find the deconfigured part. hwasErrorAddProcedureCallout(l_errl, EPUB_PRC_FIND_DECONFIGURED_PART, SRCI_PRIORITY_HIGH); // if we already have an error, link this one to the earlier; // if not, set the common plid hwasErrorUpdatePlid(l_errl, io_commonPlid); errlCommit(l_errl, HWAS_COMP_ID); // errl is now NULL } } errlHndl_t checkMinimumHardware(const TARGETING::ConstTargetHandle_t i_nodeOrSys, bool *o_bootable) { errlHndl_t l_errl = NULL; HWAS_INF("checkMinimumHardware entry"); uint32_t l_commonPlid = 0; do { //*********************************************************************/ // Common present and functional hardware checks. //*********************************************************************/ if(o_bootable) { *o_bootable = true; } PredicateHwas l_present; l_present.present(true); PredicateHwas l_functional; if(o_bootable) { // Speculative deconfig sets the specdeconfig to true for the target // in question, so we want to filter out the targets that have been // speculatively deconfigured. Setting specdeconfig to false in this // predicate will ensure that those targets are left out of the list // of functional targets. l_functional.specdeconfig(false); } l_functional.functional(true); // top 'starting' point - use first node if no i_node given (hostboot) Target *pTop; if (i_nodeOrSys == NULL) { Target *pSys; targetService().getTopLevelTarget(pSys); PredicateCTM l_predEnc(CLASS_ENC); PredicatePostfixExpr l_nodeFilter; l_nodeFilter.push(&l_predEnc).push(&l_functional).And(); TargetHandleList l_nodes; targetService().getAssociated( l_nodes, pSys, TargetService::CHILD, TargetService::IMMEDIATE, &l_nodeFilter ); if (l_nodes.empty()) { // no functional nodes, get out now if(o_bootable) { *o_bootable = false; break; } HWAS_ERR("Insufficient HW to continue IPL: (no func nodes)"); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_CHECK_MIN_HW * @reasoncode RC_SYSAVAIL_NO_NODES_FUNC * @devdesc checkMinimumHardware found no functional * nodes on the system * @custdesc A problem occurred during the IPL of the * system: No functional nodes were found on * the system. */ l_errl = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_CHECK_MIN_HW, RC_SYSAVAIL_NO_NODES_FUNC); // call out the procedure to find the deconfigured part. hwasErrorAddProcedureCallout(l_errl, EPUB_PRC_FIND_DECONFIGURED_PART, SRCI_PRIORITY_HIGH); // if we already have an error, link this one to the earlier; // if not, set the common plid hwasErrorUpdatePlid(l_errl, l_commonPlid); errlCommit(l_errl, HWAS_COMP_ID); // errl is now NULL break; } // top level has at least 1 node - and it's our node. pTop = l_nodes[0]; HWAS_INF("checkMinimumHardware: i_nodeOrSys = NULL, using %.8X", get_huid(pTop)); } else { pTop = const_cast(i_nodeOrSys); HWAS_INF("checkMinimumHardware: i_nodeOrSys %.8X", get_huid(pTop)); } // check for functional Master Proc on this node Target* l_pMasterProc = NULL; //Get master proc at system level or node level based on target type if(pTop->getAttr() == TYPE_SYS) { targetService().queryMasterProcChipTargetHandle(l_pMasterProc); } else { targetService().queryMasterProcChipTargetHandle(l_pMasterProc, pTop); } if ((l_pMasterProc == NULL) || (!l_functional(l_pMasterProc))) { HWAS_ERR("Insufficient HW to continue IPL: (no master proc)"); if(o_bootable) { *o_bootable = false; break; } // determine some numbers to help figure out what's up.. PredicateCTM l_proc(CLASS_CHIP, TYPE_PROC); TargetHandleList l_plist; PredicatePostfixExpr l_checkExprPresent; l_checkExprPresent.push(&l_proc).push(&l_present).And(); targetService().getAssociated(l_plist, pTop, TargetService::CHILD, TargetService::ALL, &l_checkExprPresent); uint32_t procs_present = l_plist.size(); PredicatePostfixExpr l_checkExprFunctional; l_checkExprFunctional.push(&l_proc).push(&l_functional).And(); targetService().getAssociated(l_plist, pTop, TargetService::CHILD, TargetService::ALL, &l_checkExprFunctional); uint32_t procs_functional = l_plist.size(); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_CHECK_MIN_HW * @reasoncode RC_SYSAVAIL_NO_PROCS_FUNC * @devdesc checkMinimumHardware found no functional * master processor on this node * @custdesc A problem occurred during the IPL of the * system: No functional master processor * was found on this node. * @userdata1[00:31] HUID of node * @userdata2[00:31] number of present procs * @userdata2[32:63] number of present functional non-master procs */ const uint64_t userdata1 = (static_cast(get_huid(pTop)) << 32); const uint64_t userdata2 = (static_cast(procs_present) << 32) | procs_functional; l_errl = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_CHECK_MIN_HW, RC_SYSAVAIL_NO_PROCS_FUNC, userdata1, userdata2); // call out the procedure to find the deconfigured part. hwasErrorAddProcedureCallout(l_errl, EPUB_PRC_FIND_DECONFIGURED_PART, SRCI_PRIORITY_HIGH); // if we already have an error, link this one to the earlier; // if not, set the common plid hwasErrorUpdatePlid(l_errl, l_commonPlid); errlCommit(l_errl, HWAS_COMP_ID); // errl is now NULL } else { // we have a Master Proc and it's functional // check for at least 1 functional ec/core on Master Proc TargetHandleList l_cores; PredicateCTM l_core(CLASS_UNIT, TYPE_CORE); PredicatePostfixExpr l_coresFunctional; l_coresFunctional.push(&l_core).push(&l_functional).And(); targetService().getAssociated(l_cores, l_pMasterProc, TargetService::CHILD, TargetService::ALL, &l_coresFunctional); HWAS_DBG( "checkMinimumHardware: %d functional cores", l_cores.size() ); if (l_cores.empty()) { HWAS_ERR("Insufficient HW to continue IPL: (no func cores)"); if(o_bootable) { *o_bootable = false; break; } // determine some numbers to help figure out what's up.. PredicateCTM l_ex(CLASS_UNIT, TYPE_EX); TargetHandleList l_plist; PredicatePostfixExpr l_checkExprPresent; l_checkExprPresent.push(&l_ex).push(&l_present).And(); targetService().getAssociated(l_plist, l_pMasterProc, TargetService::CHILD, TargetService::IMMEDIATE, &l_checkExprPresent); uint32_t exs_present = l_plist.size(); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_CHECK_MIN_HW * @reasoncode RC_SYSAVAIL_NO_CORES_FUNC * @devdesc checkMinimumHardware found no functional * processor cores on the master proc * @custdesc A problem occurred during the IPL of the * system: No functional processor cores * were found on the master processor. * @userdata1[00:31] HUID of node * @userdata1[32:63] HUID of master proc * @userdata2[00:31] number of present, non-functional cores */ const uint64_t userdata1 = (static_cast(get_huid(pTop)) << 32) | get_huid(l_pMasterProc); const uint64_t userdata2 = (static_cast(exs_present) << 32); l_errl = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_CHECK_MIN_HW, RC_SYSAVAIL_NO_CORES_FUNC, userdata1, userdata2); // call out the procedure to find the deconfigured part. hwasErrorAddProcedureCallout( l_errl, EPUB_PRC_FIND_DECONFIGURED_PART, SRCI_PRIORITY_HIGH ); // if we already have an error, link this one to the earlier; // if not, set the common plid hwasErrorUpdatePlid( l_errl, l_commonPlid ); errlCommit(l_errl, HWAS_COMP_ID); // errl is now NULL } // if no cores } // check here for functional dimms TargetHandleList l_dimms; PredicateCTM l_dimm(CLASS_LOGICAL_CARD, TYPE_DIMM); PredicatePostfixExpr l_checkExprFunctional; l_checkExprFunctional.push(&l_dimm).push(&l_functional).And(); targetService().getAssociated(l_dimms, pTop, TargetService::CHILD, TargetService::ALL, &l_checkExprFunctional); HWAS_DBG( "checkMinimumHardware: %d functional dimms", l_dimms.size()); if (l_dimms.empty()) { HWAS_ERR( "Insufficient hardware to continue IPL (func DIMM)"); if(o_bootable) { *o_bootable = false; break; } // determine some numbers to help figure out what's up.. TargetHandleList l_plist; PredicatePostfixExpr l_checkExprPresent; l_checkExprPresent.push(&l_dimm).push(&l_present).And(); targetService().getAssociated(l_plist, pTop, TargetService::CHILD, TargetService::ALL, &l_checkExprPresent); uint32_t dimms_present = l_plist.size(); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_CHECK_MIN_HW * @reasoncode RC_SYSAVAIL_NO_MEMORY_FUNC * @devdesc checkMinimumHardware found no * functional dimm cards. * @custdesc A problem occurred during the IPL of the * system: Found no functional dimm cards. * @userdata1[00:31] HUID of node * @userdata2[00:31] number of present, non-functional dimms */ const uint64_t userdata1 = (static_cast(get_huid(pTop)) << 32); const uint64_t userdata2 = (static_cast(dimms_present) << 32); l_errl = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_CHECK_MIN_HW, RC_SYSAVAIL_NO_MEMORY_FUNC, userdata1, userdata2); // call out the procedure to find the deconfigured part. hwasErrorAddProcedureCallout( l_errl, EPUB_PRC_FIND_DECONFIGURED_PART, SRCI_PRIORITY_HIGH ); // if we already have an error, link this one to the earlier; // if not, set the common plid hwasErrorUpdatePlid( l_errl, l_commonPlid ); errlCommit(l_errl, HWAS_COMP_ID); // errl is now NULL } // if no dimms // check for functional NX chiplets // Take specdeconfig into account here TargetHandleList l_functionalNXChiplets; PredicateCTM l_nxChiplet(CLASS_UNIT, TYPE_NX); PredicatePostfixExpr l_checkExprFunctionalNxChiplets; l_checkExprFunctionalNxChiplets.push(&l_nxChiplet) .push(&l_functional) .And(); targetService().getAssociated(l_functionalNXChiplets, pTop, TargetService::CHILD, TargetService::ALL, &l_checkExprFunctionalNxChiplets); HWAS_DBG( "checkMinimumHardware: %d NX chiplets", l_functionalNXChiplets.size()); if (l_functionalNXChiplets.empty()) { HWAS_ERR( "Insufficient hardware to continue IPL (NX chiplets)"); if(o_bootable) { *o_bootable = false; break; } TargetHandleList l_presentNXChiplets; getChildChiplets(l_presentNXChiplets, pTop, TYPE_NX, false); uint32_t nx_present = l_presentNXChiplets.size(); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_CHECK_MIN_HW * @reasoncode RC_SYSAVAIL_NO_NX_FUNC * @devdesc checkMinimumHardware found no * functional NX chiplets * @custdesc Insufficient hardware to continue IPL * @userdata1[00:31] HUID of node * @userdata2[00:31] number of present nonfunctional NX chiplets */ const uint64_t userdata1 = (static_cast(get_huid(pTop)) << 32); const uint64_t userdata2 = (static_cast(nx_present) << 32); l_errl = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_CHECK_MIN_HW, RC_SYSAVAIL_NO_NX_FUNC, userdata1, userdata2); // call out the procedure to find the deconfigured part. hwasErrorAddProcedureCallout( l_errl, EPUB_PRC_FIND_DECONFIGURED_PART, SRCI_PRIORITY_HIGH ); // if we already have an error, link this one to the earlier; // if not, set the common plid hwasErrorUpdatePlid( l_errl, l_commonPlid ); errlCommit(l_errl, HWAS_COMP_ID); } // ------------------------------------------------------------ // Check for Mirrored memory - // If the user requests mirrored memory and we do not have it, // post an errorlog but do not return a terminating error. // ------------------------------------------------------------ // Need to read an attribute set by PHYP? // check for minimum hardware that is specific to platform that we're // running on (ie, hostboot or fsp in hwsv). // if there is an issue, create and commit an error, and tie it to the // the rest of them with the common plid. HWAS::checkCriticalResources(l_commonPlid, pTop); platCheckMinimumHardware(l_commonPlid, i_nodeOrSys, o_bootable); } while (0); // --------------------------------------------------------------- // if the common plid got set anywhere above, we have an error. // --------------------------------------------------------------- if ((l_commonPlid)&&(o_bootable == NULL)) { /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_CHECK_MIN_HW * @reasoncode RC_SYSAVAIL_INSUFFICIENT_HW * @devdesc Insufficient hardware to continue. */ l_errl = hwasError( ERRL_SEV_UNRECOVERABLE, MOD_CHECK_MIN_HW, RC_SYSAVAIL_INSUFFICIENT_HW); // call out the procedure to find the deconfigured part. hwasErrorAddProcedureCallout( l_errl, EPUB_PRC_FIND_DECONFIGURED_PART, SRCI_PRIORITY_HIGH ); // if we already have an error, link this one to the earlier; // if not, set the common plid hwasErrorUpdatePlid( l_errl, l_commonPlid ); } HWAS_INF("checkMinimumHardware exit - minimum hardware %s", ((l_errl != NULL)||((o_bootable!=NULL)&&(!*o_bootable))) ? "NOT available" : "available"); if((l_errl != NULL)||((o_bootable!=NULL)&&(!*o_bootable))) { // Minimum hardware not available, block speculative deconfigs Target *pSys; targetService().getTopLevelTarget(pSys); pSys->setAttr(1); } return l_errl ; } // checkMinimumHardware /** * @brief Checks if both targets have the same paths up to a certain number * of path elements, determined by the smaller affinity path. For Axone, * if an OMIC and OMI target are given as the parameters then it will use * the OMI's special OMIC_PARENT relation and compare that to the OMIC * target. Otherwise, affinity path comparison between OMI and OMIC * targets will always fail erroneously. * * @param[in] i_t1 TargetInfo containing the first target's affinity path * @param[in] i_t2 TargetInfo containing the second target's affinity path */ bool isSameSubPath(TargetInfo i_t1, TargetInfo i_t2) { PredicateCTM isOmic(CLASS_NA, TYPE_OMIC), isOmi(CLASS_NA, TYPE_OMI); EntityPath l_t1Path = i_t1.affinityPath, l_t2Path = i_t2.affinityPath; // Due to the special OMIC_PARENT relation between OMI and OMIC targets // this function will only work correctly if we use the OMIC_PARENT path // instead of the OMI affinity path when comparing OMI and OMIC targets. if (((i_t1.pThisTarget != nullptr) && (i_t2.pThisTarget != nullptr)) && ((isOmi(i_t1.pThisTarget) && isOmic(i_t2.pThisTarget)) || (isOmi(i_t2.pThisTarget) && isOmic(i_t1.pThisTarget)))) { TargetHandleList l_pOmicParent; if (i_t1.type == TYPE_OMI) { targetService().getAssociated(l_pOmicParent, i_t1.pThisTarget, TargetService::OMIC_PARENT, TargetService::ALL); l_t1Path = l_pOmicParent[0]->getAttr(); } else { targetService().getAssociated(l_pOmicParent, i_t2.pThisTarget, TargetService::OMIC_PARENT, TargetService::ALL); l_t2Path = l_pOmicParent[0]->getAttr(); } } size_t l_size = std::min(l_t1Path.size(), l_t2Path.size()); return l_t1Path.equals(l_t2Path, l_size); } /** * @brief Deconfigures a target based on type * * Called by invokePresentByAssoc() after presentByAssoc() is called * * @param[in] i_targInfo TargetInfo for the target to be deconfigured */ void deconfigPresentByAssoc(TargetInfo i_targInfo) { TargetHandleList pChildList; // find all CHILD matches for this target and deconfigure them getChildChiplets(pChildList, i_targInfo.pThisTarget, TYPE_NA); for (TargetHandleList::const_iterator pChild_it = pChildList.begin(); pChild_it != pChildList.end(); ++pChild_it) { TargetHandle_t l_childTarget = *pChild_it; enableHwasState(l_childTarget, true, false, i_targInfo.reason); HWAS_INF("deconfigPresentByAssoc: Target %.8X" " marked present, not functional: reason %.x", l_childTarget->getAttr(), i_targInfo.reason); } // find all CHILD_BY_AFFINITY matches for this target and deconfigure them getChildAffinityTargets(pChildList, i_targInfo.pThisTarget, CLASS_NA ,TYPE_NA); for (TargetHandleList::const_iterator pChild_it = pChildList.begin(); pChild_it != pChildList.end(); ++pChild_it) { TargetHandle_t l_affinityTarget = *pChild_it; enableHwasState(l_affinityTarget,true,false, i_targInfo.reason); HWAS_INF("deconfigPresentByAssoc: Target %.8X" " marked present, not functional: reason %.x", l_affinityTarget->getAttr(), i_targInfo.reason); } // deconfigure the target itself enableHwasState(i_targInfo.pThisTarget,true,false,i_targInfo.reason); HWAS_INF("deconfigPresentByAssoc: Target %.8X" " marked present, not functional, reason %.x", i_targInfo.pThisTarget->getAttr(), i_targInfo.reason); } // deconfigPresentByAssoc void invokePresentByAssoc() { HWAS_DBG("invokePresentByAssoc enter"); // make one list TargetHandleList l_funcTargetList; // get the functional MCBISTs (for Nimbus based systems) TargetHandleList l_funcMCBISTTargetList; getAllChiplets(l_funcMCBISTTargetList, TYPE_MCBIST, true ); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcMCBISTTargetList.begin(), l_funcMCBISTTargetList.end()); // If VPO, dump targets (MCBIST) for verification & debug purposes #ifdef CONFIG_VPO_COMPILE HWAS_INF("invokePresentByAssoc(): MCBIST targets:"); for (auto l_MCBIST : l_funcMCBISTTargetList) { HWAS_INF(" MCBIST: HUID %.8x", TARGETING::get_huid(l_MCBIST)); } #endif // get the functional MCSs (for Nimbus based systems) TargetHandleList l_funcMCSTargetList; getAllChiplets(l_funcMCSTargetList, TYPE_MCS, true ); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcMCSTargetList.begin(), l_funcMCSTargetList.end()); // If VPO, dump targets (MCS) for verification & debug purposes #ifdef CONFIG_VPO_COMPILE HWAS_INF("invokePresentByAssoc(): MCS targets:"); for (auto l_MCS : l_funcMCSTargetList) { HWAS_INF(" MCS: HUID %.8x", TARGETING::get_huid(l_MCS)); } #endif // get the functional MCs (for Cumulus based systems) TargetHandleList l_funcMCTargetList; getAllChiplets(l_funcMCTargetList, TYPE_MC, true ); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcMCTargetList.begin(), l_funcMCTargetList.end()); // If VPO, dump targets (MC) for verification & debug purposes #ifdef CONFIG_VPO_COMPILE HWAS_INF("invokePresentByAssoc(): MC targets:"); for (auto l_MC : l_funcMCTargetList) { HWAS_INF(" MC: HUID %.8x", TARGETING::get_huid(l_MC)); } #endif // get the functional MIs (for Cumulus based systems) TargetHandleList l_funcMITargetList; getAllChiplets(l_funcMITargetList, TYPE_MI, true ); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcMITargetList.begin(), l_funcMITargetList.end()); // If VPO, dump targets (MI) for verification & debug purposes #ifdef CONFIG_VPO_COMPILE HWAS_INF("invokePresentByAssoc(): MI targets:"); for (auto l_MI : l_funcMITargetList) { HWAS_INF(" MI: HUID %.8x", TARGETING::get_huid(l_MI)); } #endif // get the functional DMIs (for Cumulus based systems) TargetHandleList l_funcDMITargetList; getAllChiplets(l_funcDMITargetList, TYPE_DMI, true ); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcDMITargetList.begin(), l_funcDMITargetList.end()); // If VPO, dump targets (DMI) for verification & debug purposes #ifdef CONFIG_VPO_COMPILE HWAS_INF("invokePresentByAssoc(): MI targets:"); for (auto l_DMI : l_funcDMITargetList) { HWAS_INF(" DMI: HUID %.8x", TARGETING::get_huid(l_DMI)); } #endif PredicateCTM mccPred(CLASS_NA, TYPE_MCC), omiPred(CLASS_NA, TYPE_OMI), omicPred(CLASS_NA, TYPE_OMIC), ocmbPred(CLASS_CHIP, TYPE_OCMB_CHIP), memportPred(CLASS_NA, TYPE_MEM_PORT); PredicateHwas functionalPred; functionalPred.functional(true); Target *pSys; targetService().getTopLevelTarget(pSys); PredicatePostfixExpr l_funcAxoneMemoryUnits; l_funcAxoneMemoryUnits.push(&mccPred).push(&omiPred).Or() .push(&omicPred).Or().push(&ocmbPred).Or().push(&memportPred) .Or().push(&functionalPred).And(); TargetHandleList l_funcAxoneTargetList; targetService().getAssociated(l_funcAxoneTargetList, pSys, TargetService::CHILD, TargetService::ALL, &l_funcAxoneMemoryUnits); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcAxoneTargetList.begin(), l_funcAxoneTargetList.end()); // get the functional membufs // note: do not expect membufs for NIMBUS and AXONE TargetHandleList l_funcMembufTargetList; getAllChips(l_funcMembufTargetList, TYPE_MEMBUF, true ); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcMembufTargetList.begin(), l_funcMembufTargetList.end()); // If VPO, dump targets (MEMBUF) for verification & debug purposes #ifdef CONFIG_VPO_COMPILE HWAS_INF("invokePresentByAssoc(): MEMBUF targets:"); for (auto l_MEMBUF : l_funcMembufTargetList) { HWAS_INF(" MEMBUF: HUID %.8x", TARGETING::get_huid(l_MEMBUF)); } #endif // get the functional mbas // note: do not expect mbas for NIMBUS TargetHandleList l_funcMBATargetList; getAllChiplets(l_funcMBATargetList, TYPE_MBA, true ); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcMBATargetList.begin(), l_funcMBATargetList.end()); // If VPO, dump targets (MBA) for verification & debug purposes #ifdef CONFIG_VPO_COMPILE HWAS_INF("invokePresentByAssoc(): MBA targets:"); for (auto l_MBA : l_funcMBATargetList) { HWAS_INF(" MBA: HUID %.8x", TARGETING::get_huid(l_MBA)); } #endif // get the functional MCAs // note: MCAs are expected for NIMBUS direct memory attach TargetHandleList l_funcMcaTargetList; getAllChiplets(l_funcMcaTargetList, TYPE_MCA, true ); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcMcaTargetList.begin(), l_funcMcaTargetList.end()); // If VPO, dump targets (MCA) for verification & debug purposes #ifdef CONFIG_VPO_COMPILE HWAS_INF("invokePresentByAssocDA(): MCA targets:"); for (auto l_MCA : l_funcMcaTargetList) { HWAS_INF(" MCA: HUID %.8x", TARGETING::get_huid(l_MCA)); } #endif // get the functional dimms TargetHandleList l_funcDIMMTargetList; getAllLogicalCards(l_funcDIMMTargetList, TYPE_DIMM, true ); l_funcTargetList.insert(l_funcTargetList.begin(), l_funcDIMMTargetList.begin(), l_funcDIMMTargetList.end()); // If VPO, dump targets (DIMM) for verification & debug purposes #ifdef CONFIG_VPO_COMPILE HWAS_INF("invokePresentByAssoc(): DIMM targets:"); for (auto l_DIMM : l_funcDIMMTargetList) { HWAS_INF(" DIMM: HUID %.8x", TARGETING::get_huid(l_DIMM)); } #endif // Define vectors of TargetInfo structs to be used in presentByAssoc TargetInfoVector l_targInfo; TargetInfoVector l_targToDeconfig; // Iterate through targets and populate l_targInfo vector for (TargetHandleList::const_iterator l_targIter = l_funcTargetList.begin(); l_targIter != l_funcTargetList.end(); ++l_targIter) { TargetHandle_t pTarg = *l_targIter; TargetInfo l_TargetInfo; l_TargetInfo.pThisTarget = pTarg; l_TargetInfo.affinityPath = pTarg->getAttr(); l_TargetInfo.type = pTarg->getAttr(); l_targInfo.push_back(l_TargetInfo); } // Call presentByAssoc to take the functional targets in l_targInfo // and determine which ones need to be deconfigured presentByAssoc(l_targInfo, l_targToDeconfig); // Deconfigure targets in l_targToDeconfig for (TargetInfoVector::const_iterator l_targIter = l_targToDeconfig.begin(); l_targIter != l_targToDeconfig.end(); ++l_targIter) { deconfigPresentByAssoc(*l_targIter); } } // invokePresentByAssoc void presentByAssoc(TargetInfoVector& io_funcTargets, TargetInfoVector& o_targToDeconfig) { HWAS_DBG("presentByAssoc entry"); // Sort entire vector by affinity path. This provides the algorithm with // an ordered vector of targets, making it easy to check if: // for NIMBUS direct attach memory - // MCS has child MCA // MCA has child DIMM and parent MCS // DIMM has parent MCA. // for CUMULUS non direct attach memory - // MC has child MI // MI has parent MC and child DMI // DMI has parent MI and child MEMBUF // MEMBUF has parent DMI and child MBA // MBA has parent MEMBUF and child DIMM // DIMM has parent MBA. // for AXONE // MC has child MI and child OMIC // MI has parent MC and child MCC // OMIC has parent MC and child OMI // MCC has parent MC and child OMI // OMI has parent MCC and OMIC and child OCMB // OCMB has parent OMI and child MEM_PORT // MEM_PORT has parent OCMB and child DIMM // DIMM has parent MEM_PORT std::sort(io_funcTargets.begin(), io_funcTargets.end(), compareAffinity); // Keep track of the most recently seen MCBIST, MCS & MCA for NIMBUS // MC, MI, DMI, MEMBUF and MBA for CUMULUS. This allows the // algorithm to quickly check if targets share a MCS or MEMBUF and used // for backtracking after deleting a target from the vector size_t l_MCBISTIndex = __INT_MAX__; size_t l_MCSIndex = __INT_MAX__; size_t l_MCAIndex = __INT_MAX__; size_t l_MCIndex = __INT_MAX__; size_t l_MIIndex = __INT_MAX__; size_t l_DMIIndex = __INT_MAX__; size_t l_MCCIndex = __INT_MAX__; size_t l_MEMBUFIndex = __INT_MAX__; size_t l_MEMPORTIndex = __INT_MAX__; size_t l_MBAIndex = __INT_MAX__; size_t l_OMIIndex = __INT_MAX__; size_t l_OCMBIndex = __INT_MAX__; size_t i = 0; // Perform presentByAssoc algorithm while ( i < io_funcTargets.size() ) { // INIT STEPS: // Reset iterator, check if the next taget in // the vector is valid or even needed // Get iterator to erase elements from vector when needed std::vector::iterator it = io_funcTargets.begin(); std::advance(it,i); TargetInfo& l_curTargetInfo = *it; // Check if there is a next target and set it // Don't need to check next target with a DIMM TargetInfo* l_nextTargetInfo = NULL; if ( ((i + 1) < io_funcTargets.size()) && (l_curTargetInfo.type != TYPE_DIMM) ) { l_nextTargetInfo = &(*(it + 1)); } switch (l_curTargetInfo.type) { case TYPE_MCBIST: //NIMBUS { // No Child MCSs // If next is not a MCS sharing the same MCAs, deconfig MCBIST if ( (l_nextTargetInfo == NULL) || (l_nextTargetInfo->type != TYPE_MCS) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo) ) { // Disable MCBIST - NO_CHILD_MCS l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_MCS; // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); //Just erased current MCBIST, MCA/MCS index invalid l_MCAIndex = __INT_MAX__; l_MCSIndex = __INT_MAX__; } // Update MCBIST Index else { l_MCBISTIndex = i; l_MCAIndex = __INT_MAX__; //New MCBIST, MCA index invalid l_MCSIndex = __INT_MAX__; //New MCBIST, MCS index invalid i++; continue; } break; }// MCBIST case TYPE_MCS: //NIMBUS { // No Child MCAs // If next is not an MCA sharing the same MCS, deconfig MCS if ( (l_nextTargetInfo == NULL) || (l_nextTargetInfo->type != TYPE_MCA) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo) ) { // Disable MCS - NO_CHILD_MCA l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_MCA; } // No Parent MCBIST // If MCS doesn't share the same MCBIST as MCSIndex, deconfig MCS else if ( (l_MCBISTIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MCBISTIndex])) { // Disable MCS - NO_PARENT_MCBIST l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_MCBIST; } // Update MCS Index else { l_MCSIndex = i; l_MCAIndex = __INT_MAX__; //New MCS, MCA index invalid i++; continue; } // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); // Backtrack to last MCBIST if ( l_MCBISTIndex != __INT_MAX__ ) { i = l_MCBISTIndex; l_MCAIndex = __INT_MAX__; //New MCBIST, MCA index invalid l_MCSIndex = __INT_MAX__; //New MCBIST, MCS index invalid } // Backtrack to beginning if no MCS has been seen yet else { i = 0; } break; } // MCS case TYPE_MC: //CUMULUS and AXONE { // No child MIs and (Axone) no child OMICs // (Axone) Since OMIC targets are always sorted after the DIMMs // of the current MC, we need to look for them instead // of following the main algorithm's procedure of // checking the next target in the vector. std::vector::iterator searchIt = it; if (searchIt != io_funcTargets.end()) { std::advance(searchIt, 1); } while (searchIt != io_funcTargets.end()) { TargetInfo& searchTargetInfo = *searchIt; // Stop searching for an OMIC if we encounter another MC // target since that would mean we have finished searching // all descendants of the current target. If we find an OMIC // target then stop also, since only one child is required. if ((searchTargetInfo.type == TYPE_MC) || ((searchTargetInfo.type == TYPE_OMIC) && isSameSubPath(l_curTargetInfo, searchTargetInfo))) { break; } std::advance(searchIt, 1); } // If next is not a MI sharing the same MC, deconfig MC // (Axone) And if we encountered another MC or reached the end of // the vector then there are no child OMIC targets for this // MC and it should be deconfigured. // Note: LHS (Non-Axone) && RHS (Axone) // For Non-Axone, RHS is always true and for Axone LHS is // always true. if (((l_nextTargetInfo == NULL) || (l_nextTargetInfo->type != TYPE_MI) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo)) && ((searchIt == io_funcTargets.end()) || ((*searchIt).type == TYPE_MC))) { // Disable MC - NO_CHILD_MI l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_MI; // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); //Just erased current MC, so MI/DMI index invalid l_MIIndex = __INT_MAX__; l_DMIIndex = __INT_MAX__; l_MCCIndex = __INT_MAX__; } else { // Update MC Index l_MCIndex = i; l_MIIndex = __INT_MAX__; //New MC,so MI index invalid l_DMIIndex = __INT_MAX__; //New MC,so DMI index invalid l_MCCIndex = __INT_MAX__; //New MC,so MCC index invalid i++; continue; } break; }// MC case TYPE_MI: //CUMULUS and AXONE { // No Child DMIs (for CUMULUS) or MCCs (for AXONE) // If next is not a DMI/MCC sharing the same MI, deconfig MI if ( (l_nextTargetInfo == NULL) || ((l_nextTargetInfo->type != TYPE_DMI) && (l_nextTargetInfo->type != TYPE_MCC)) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo)) { // Disable MI - NO_CHILD_DMI_OR_MCC l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_DMI; } // No Parent MC // If MI doesn't share the same MC as MIIndex, deconfig MI else if ( (l_MCIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MCIndex])) { // Disable MI - NO_PARENT_MC l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_MC; } // Update MI Index else { l_MIIndex = i; l_DMIIndex = __INT_MAX__; //New MI, so DMI index invalid l_MCCIndex = __INT_MAX__; //New MI, so MCC index invalid i++; continue; } // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); // Backtrack to last MC if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; l_MIIndex = __INT_MAX__; //New MC, MI index invalid l_DMIIndex = __INT_MAX__; //New MC, DMI index invalid l_MCCIndex = __INT_MAX__; //New MC, MCC index invalid } // Backtrack to beginning if no MC has been seen yet else { i = 0; } break; } // MI case TYPE_OMIC: { // No Child OMIs // Since OMIC targets are sorted after all of the rest of the // targets in the same subpath, we must do a backward search for // the correct OMI child. The l_OMIIndex is unreliable because it // may be pointing at an OMI that isn't this OMICs child. std::vector::iterator searchIt = it; if (searchIt != io_funcTargets.begin()) { std::advance(searchIt, -1); } while (searchIt != io_funcTargets.begin()) { TargetInfo& searchTargetInfo = *searchIt; // Stop the search if we encounter an MC because, based on the // sorting of the vector, there are no more OMIs to check for // this subpath. Also, if a child of the OMIC is found stop. if ((searchTargetInfo.type == TYPE_MC) || ((searchTargetInfo.type == TYPE_OMI) && isSameSubPath(l_curTargetInfo, searchTargetInfo))) { break; } std::advance(searchIt, -1); } // We encountered an MC target indicating that no OMIs were seen in // the vector that are children of this OMIC or we backtracked all // the way to the beginning and didn't find any children. if (searchIt == io_funcTargets.begin() || ((*searchIt).type == TYPE_MC)) { // Disable OMIC - NO_CHILD_OMI l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_OMI; } // No Parent MC // If OMIC doesn't share the same MC as MCIndex, deconfig OMIC else if ( (l_MCIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MCIndex])) { // Disable OMIC - NO_PARENT_MC l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_MC; } else { // Advance to the next target in the vector. i++; continue; } // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); //Backtrack to last MC if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; } // Backtrack to beginning if no MC has been seen yet else { i = 0; } break; } case TYPE_DMI: //CUMULUS { // No Child MEMBUFs // If next is not a MEMBUF sharing the same DMI, deconfig DMI if ( (l_nextTargetInfo == NULL) || ( l_nextTargetInfo->type != TYPE_MEMBUF) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo) ) { // Disable DMI - NO_CHILD_MEMBUF l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_MEMBUF; } // No Parent MI // If DMI doesn't share the same MI as DMIIndex, deconfig DMI else if ( (l_MIIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MIIndex])) { // Disable DMI - NO_PARENT_MI l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_MI; } // Update DMI Index else { l_DMIIndex = i; i++; continue; } // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); // Backtrack to last MI if ( l_MIIndex != __INT_MAX__ ) { i = l_MIIndex; l_DMIIndex = __INT_MAX__; //New MI, DMI index invalid } //Backtrack to last MC, if no MI has been seen yet else if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; l_DMIIndex = __INT_MAX__; //New MC, DMI index invalid } // Backtrack to beginning if no MI has been seen yet else { i = 0; } break; } // DMI case TYPE_MCC: // AXONE { // No Child OMIs // If next is not a OMI sharing the same MCC, deconfig MCC if ( (l_nextTargetInfo == NULL) || ( l_nextTargetInfo->type != TYPE_OMI) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo) ) { // Disable MCC - NO_CHILD_OMI l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_OMI; } // No Parent MI // If MCC doesn't share the same MI as MCCIndex, deconfig MCC else if ( (l_MIIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MIIndex])) { // Disable MCC - NO_PARENT_MI l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_MI; } // Update MCC Index else { l_MCCIndex = i; i++; continue; } // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); // Backtrack to last MI if ( l_MIIndex != __INT_MAX__ ) { i = l_MIIndex; l_MCCIndex = __INT_MAX__; //New MI, MCC index invalid } //Backtrack to last MC, if no MI has been seen yet else if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; l_MCCIndex = __INT_MAX__; //New MC, MCC index invalid } // Backtrack to beginning if no MC has been seen yet else { i = 0; } break; } // MCC case TYPE_OMI: // AXONE { // No Child OCMBs // If next is not a OCMB sharing the same OMI, deconfig OMI if ( (l_nextTargetInfo == NULL) || ( l_nextTargetInfo->type != TYPE_OCMB_CHIP) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo) ) { // Disable OMI - NO_CHILD_OCMB_CHIP l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_OCMB_CHIP; } // No Parent MCC // If OMI doesn't share the same MCC as MCCIndex, deconfig OMI else if ( (l_MCCIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MCCIndex])) { // Disable OMI - NO_PARENT_MCC l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_MCC; } // No Parent OMIC // If OMI doesn't share the same OMIC as OMICIndex, deconfig OMI else { // Since OMIC targets are always sorted after the DIMMs of // the current MC, we need to look for them instead of // following the main algorithm's procedure of checking the // next target in the vector. std::vector::iterator searchIt = it; if (searchIt != io_funcTargets.end()) { std::advance(searchIt, 1); } while (searchIt != io_funcTargets.end()) { TargetInfo& searchTargetInfo = *searchIt; // Stop the search when we encounter an MC since that means // no parent OMICs were seen at the end of this sorted // subpath and also stop searching when we found the OMIC // parent. if ((searchTargetInfo.type == TYPE_MC) || ((searchTargetInfo.type == TYPE_OMIC) && isSameSubPath(l_curTargetInfo, searchTargetInfo))) { break; } std::advance(searchIt, 1); } // Either an MC was encountered or the end of the vector was // reached meaning that no parent OMIC was found. Deconfigure // this OMI. if (searchIt == io_funcTargets.end() || ((*searchIt).type == TYPE_MC)) { // Disable OMI - NO_PARENT_OMIC l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_OMIC; } // Update OMI Index else { l_OMIIndex = i; i++; continue; } } // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); // Backtrack to last MCC if ( l_MCCIndex != __INT_MAX__ ) { i = l_MCCIndex; l_OMIIndex = __INT_MAX__; //New MCC, OMI index invalid } // Backtrack to last MI, if no MCC has been seen yet else if ( l_MIIndex != __INT_MAX__ ) { i = l_MIIndex; l_OMIIndex = __INT_MAX__; //New MI, OMI index invalid } // Backtrack to last MC, if no MI has been seen yet else if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; l_OMIIndex = __INT_MAX__; //New MC, OMI index invalid } // Backtrack to beginning if no MC has been seen yet else { i = 0; } break; } // OMI case TYPE_OCMB_CHIP: // AXONE { // No Child MEMPORTs // If next is not a MEMPORT sharing the same OCMB, deconfig OCMB if ( (l_nextTargetInfo == NULL) || (l_nextTargetInfo->type != TYPE_MEM_PORT) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo)) { // Disable OCMB - NO_CHILD_MEM_PORT l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_MEM_PORT; } // No Parent OMI // If OCMB doesn't share the same OMI as OCMBIndex, deconfig OCMB else if ( (l_OMIIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_OMIIndex])) { // Disable OCMB - NO_PARENT_OMI l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_OMI; } // Update OCMB Index else { l_OCMBIndex = i; i++; continue; } // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); // Backtrack to last OMI if ( l_OMIIndex != __INT_MAX__ ) { i = l_OMIIndex; l_OCMBIndex = __INT_MAX__; //New OMI, OCMB index invalid } // Backtrack to last MCC, if no OMI has been seen yet else if ( l_MCCIndex != __INT_MAX__ ) { i = l_MCCIndex; l_OCMBIndex = __INT_MAX__; //New MCC, OCMB index invalid } //Backtrack to last MI, if no MCC has been seen yet else if ( l_MIIndex != __INT_MAX__ ) { i = l_MIIndex; l_OCMBIndex = __INT_MAX__; //New MI, OCMB index invalid } //Backtrack to last MC, if no MI has been seen yet else if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; l_OCMBIndex = __INT_MAX__; //New MC, OCMB index invalid } // Backtrack to beginning if no MC has been seen yet else { i = 0; } break; } // OCMB case TYPE_MEM_PORT: // AXONE { // No Child DIMMs // If next is not a DIMM sharing the same MEMPORT, deconfig MEMPORT if ( (l_nextTargetInfo == NULL) || (l_nextTargetInfo->type != TYPE_DIMM) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo)) { // Disable MEMPORT - NO_CHILD_DIMM l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_DIMM; } // No Parent OCMB // If MEMPORT doesn't share the same OCMB as MEMPORTIndex, // deconfig MEMPORT else if ( (l_OCMBIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_OCMBIndex])) { // Disable MEMPORT - NO_PARENT_OCMB_CHIP l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_OCMB_CHIP; } // Update MEMPORT Index else { l_MEMPORTIndex = i; i++; continue; } // Add target to Deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); // Backtrack to last OCMB if ( l_OCMBIndex != __INT_MAX__ ) { i = l_OCMBIndex; l_MEMPORTIndex = __INT_MAX__; //New OCMB, MEMPORT index invalid } // Backtrack to last OMI, if no OCMB has been seen yet else if ( l_OMIIndex != __INT_MAX__ ) { i = l_OMIIndex; l_MEMPORTIndex = __INT_MAX__; //New OMI, MEMPORT index invalid } // Backtrack to last MCC, if no OMI has been seen yet else if ( l_MCCIndex != __INT_MAX__ ) { i = l_MCCIndex; l_MEMPORTIndex = __INT_MAX__; //New MCC, MEMPORT index invalid } //Backtrack to last MI, if no MCC has been seen yet else if ( l_MIIndex != __INT_MAX__ ) { i = l_MIIndex; l_MEMPORTIndex = __INT_MAX__; //New MI, MEMPORT index invalid } //Backtrack to last MC, if no MI has been seen yet else if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; l_MEMPORTIndex = __INT_MAX__; //New MC, MEMPORT index invalid } // Backtrack to beginning if no MC has been seen yet else { i = 0; } break; } // MEMPORT case TYPE_MEMBUF: // CUMULUS { // No Child MBAs // If next is not a MBA sharing the same MEMBUF, deconfig MEMBUF if ( (l_nextTargetInfo == NULL) || (l_nextTargetInfo->type != TYPE_MBA) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo) ) { // Disable MEMBUF - NO_CHILD_MBA l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_MBA; } // No Parent DMI (CUMULUS) // If MEMBUF doesn't share the same same DMI as DMIIndex // (for CUMULUS), deconfig MEMBUF else if ((l_DMIIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_DMIIndex])) { // Disable MEMBUF - NO_PARENT_MCS_OR_DMI l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_DMI; } // Update MEMBUF Index else { l_MEMBUFIndex = i; i++; continue; } // Add target to deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); //Backtrack to last DMI (CUMULUS), if no MEMBUF has been seen yet if ( l_DMIIndex != __INT_MAX__ ) { i = l_DMIIndex; } //Backtrack to last MI (CUMULUS), if no DMI has been seen yet else if ( l_MIIndex != __INT_MAX__ ) { i = l_MIIndex; } //Backtrack to last MC (CUMULUS), if no MI has been seen yet else if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; } // Backtrack to beginning if no MCS (NIMBUS) or DMI (CUMULUS) // has been seen yet else { i = 0; } break; } // MEMBUF case TYPE_MBA: //CUMULUS { // No Child DIMMs // If next is not a DIMM sharing the same MBA, deconfig MBA if ( (l_nextTargetInfo == NULL) || (l_nextTargetInfo->type != TYPE_DIMM) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo) ) { // Disable MBA - NO_CHILD_DIMM l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_DIMM; } // No Parent MEMBUF // If MBA doesn't share the same MEMBUF as MEMBUFIndex, deconfig MBA else if ( (l_MEMBUFIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MEMBUFIndex])) { // Disable MBA - NO_PARENT_MEMBUF l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_MEMBUF; } // Update MBA Index else { l_MBAIndex = i; i++; continue; } // Add target to deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); // Backtrack to last MEMBUF if ( l_MEMBUFIndex != __INT_MAX__ ) { i = l_MEMBUFIndex; } //Backtrack to last DMI (CUMULUS), if no MEMBUF has been seen yet else if ( l_DMIIndex != __INT_MAX__ ) { i = l_DMIIndex; } //Backtrack to last MI (CUMULUS), if no DMI has been seen yet else if ( l_MIIndex != __INT_MAX__ ) { i = l_MIIndex; } //Backtrack to last MC (CUMULUS), if no MI has been seen yet else if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; } // Backtrack to beginning else { i = 0; } break; } // MBA case TYPE_MCA: { // No Child DIMMs // If next is not a DIMM sharing the same MCA, deconfig MCS if ( (l_nextTargetInfo == NULL) || (l_nextTargetInfo->type != TYPE_DIMM) || !isSameSubPath(l_curTargetInfo, *l_nextTargetInfo) ) { // Disable MCS - NO_CHILD_DIMM l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_CHILD_DIMM; } // No Parent MCS // If MCA doesn't share the same MCS as MCSIndex, deconfig MCA else if ( (l_MCSIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MCSIndex])) { // Disable MCA - NO_PARENT_MCS l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_MCS; } // Update MCA Index else { l_MCAIndex = i; i++; continue; } // Add target to deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); l_MCAIndex = __INT_MAX__; //MCA removed, MCA index invalid // Backtrack to last MCS if ( l_MCSIndex != __INT_MAX__ ) { i = l_MCSIndex; } // Backtrack to last MCBIST else if ( l_MCBISTIndex != __INT_MAX__ ) { i = l_MCBISTIndex; } // Backtrack to beginning if no MCBIST has been seen yet else { i = 0; } break; } // MCS case TYPE_DIMM: { // No Parent MBA or MCA or MEMPORT // If DIMM does not share the same MBA as MBAIndex, // or if DIMM does not share the same MCA as MCAIndex, // or if DIMM does not share the same MEMPORT as MEMPORTIndex, // deconfig DIMM if ( ((l_MBAIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MBAIndex])) && ((l_MCAIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MCAIndex])) && ((l_MEMPORTIndex == __INT_MAX__) || !isSameSubPath(l_curTargetInfo, io_funcTargets[l_MEMPORTIndex]))) { // Disable DIMM l_curTargetInfo.reason = DeconfigGard::DECONFIGURED_BY_NO_PARENT_MBA_OR_MCA; // Add target to deconfig vector to be deconfigured later o_targToDeconfig.push_back(l_curTargetInfo); // Remove target from funcTargets io_funcTargets.erase(it); // Backtrack to last MEMPORT (AXONE) if ( l_MEMPORTIndex != __INT_MAX__ ) { i = l_MEMPORTIndex; } // Backtrack to last OCMB (AXONE) if no MEMPORT has been // seen yet else if ( l_OCMBIndex != __INT_MAX__ ) { i = l_OCMBIndex; } // Backtrack to last OMI (AXONE) if no OCMB has been seen yet else if ( l_OMIIndex != __INT_MAX__ ) { i = l_OMIIndex; } // Backtrack to last MCC (AXONE) if no OMI has been seen yet else if ( l_MCCIndex != __INT_MAX__ ) { i = l_MCCIndex; } // Backtrack to last MBA (CUMULUS) else if ( l_MBAIndex != __INT_MAX__ ) { i = l_MBAIndex; } // Backtrack to last MCA (NIMBUS) else if ( l_MCAIndex != __INT_MAX__) { i = l_MCAIndex; } // Backtrack to last MCS (NIMBUS) if no MCA has been seen yet else if ( l_MCSIndex != __INT_MAX__) { i = l_MCSIndex; } // Backtrack to last MCBIST (NIMBUS) if no MCS has been seen yet else if ( l_MCBISTIndex != __INT_MAX__) { i = l_MCBISTIndex; } // Backtrack to last MEMBUF (CUMULUS) if no MBA has been // seen yet else if ( l_MEMBUFIndex != __INT_MAX__) { i = l_MEMBUFIndex; } // Backtrack to last DMI (CUMULUS),if no MEMBUF has been // seen yet else if ( l_DMIIndex != __INT_MAX__ ) { i = l_DMIIndex; } // Backtrack to last MI (CUMULUS and AXONE), if no DMI has been // seen yet else if ( l_MIIndex != __INT_MAX__ ) { i = l_MIIndex; } // Backtrack to last MC (CUMULUS and AXONE), if no MI has been // seen yet else if ( l_MCIndex != __INT_MAX__ ) { i = l_MCIndex; } // Backtrack to beginning if no MCS has been seen yet else { i = 0; } } else { i++; } break; } // DIMM default: // no action break; } // switch } // while } // presentByAssoc void setChipletGardsOnProc(TARGETING::Target * i_procTarget) { TARGETING::TargetHandleList l_targetList; TARGETING::ATTR_EQ_GARD_type l_eqGard = 0xFF; TARGETING::ATTR_EC_GARD_type l_ecGard = 0xFFFFFFFF; TARGETING::PredicateCTM l_eqs(TARGETING::CLASS_UNIT, TARGETING::TYPE_EQ); TARGETING::PredicateCTM l_ecs(TARGETING::CLASS_UNIT, TARGETING::TYPE_CORE); TARGETING::PredicateIsFunctional l_isFunctional; TARGETING::PredicatePostfixExpr l_funcChipletFilter; l_funcChipletFilter.push(&l_eqs).push(&l_ecs).Or(). push(&l_isFunctional).And(); TARGETING::targetService().getAssociated(l_targetList, i_procTarget, TARGETING::TargetService::CHILD, TARGETING::TargetService::ALL, &l_funcChipletFilter); for(auto & l_targ : l_targetList) { TARGETING::ATTR_CHIP_UNIT_type l_chipUnit = l_targ->getAttr(); if((l_targ)->getAttr() == TARGETING::TYPE_EQ) { l_eqGard &= ~(0x80 >> l_chipUnit ); } else { l_ecGard &= ~(0x80000000 >> l_chipUnit ); } } HWAS_INF("EQ Gard Bit:0x%x EC Gard Bit:0x%08x on proc with HUID: 0x%lx ", l_eqGard,l_ecGard, i_procTarget->getAttr()); i_procTarget->setAttr(l_eqGard); i_procTarget->setAttr(l_ecGard); }//setChipletGardsOnProc bool mixedECsAllowed(TARGETING::ATTR_MODEL_type i_model, TARGETING::ATTR_EC_type i_baseEC, TARGETING::ATTR_EC_type i_compareEC) { bool l_mixOk = false; #ifdef __HOSTBOOT_MODULE //Only check risk level in HB, HWSV always allow //get risk level (used to know if in compat mode) Target* pSys; targetService().getTopLevelTarget(pSys); auto l_risk = pSys->getAttr(); #endif if(TARGETING::MODEL_NIMBUS == i_model) { //For P9N -- DD2.2 and DD2.3 can be run in mixed compat mode //Compat mode risk levels 0-3 (4+ are native). Only pass when //actually running compat mode if ((i_baseEC != i_compareEC) && #ifdef __HOSTBOOT_MODULE //Only check risk level in HB, HWSV always allow (l_risk < TARGETING::UTIL::P9N23_P9C13_NATIVE_MODE_MINIMUM) && #endif ((i_baseEC == 0x22) || (i_baseEC == 0x23)) && ((i_compareEC == 0x22) || (i_compareEC == 0x23))) { l_mixOk = true; } } else if (TARGETING::MODEL_CUMULUS == i_model) { //For P9C -- DD1.2 and DD1.3 can be run in mixed compat mode //Compat mode risk levels 0-3 (4+ are native). Only pass when //actually running compat mode if ((i_baseEC != i_compareEC) && #ifdef __HOSTBOOT_MODULE //Only check risk level in HB, HWSV always allow (l_risk < TARGETING::UTIL::P9N23_P9C13_NATIVE_MODE_MINIMUM) && #endif ((i_baseEC == 0x12) || (i_baseEC == 0x13)) && ((i_compareEC == 0x12) || (i_compareEC == 0x13))) { l_mixOk = true; } } //else no other compat mode chips return l_mixOk; } /** * @brief Upgrade the Compatibility Risk Level to Native Risk Level * @param io_risk - RISK_LEVEL that gets upgraded */ void upgradeRiskLevel( uint8_t & io_risk ) { if (io_risk <= TARGETING::UTIL::P9N22_P9C12_RUGBY_FAVOR_PERFORMANCE) { // 0,1 -> 4 io_risk = TARGETING::UTIL::P9N23_P9C13_NATIVE_MODE_MINIMUM; } else if (io_risk == TARGETING::UTIL::P9N22_NO_RUGBY_MITIGATIONS) { // 2 -> 5 io_risk = TARGETING::UTIL::P9N23_P9C13_NATIVE_SMF_RUGBY_FAVOR_PERFORMANCE; } // 3-5: stay same } /** * @brief Downgrade the Native Risk Level to Compatibility Risk Level * @param io_risk - RISK_LEVEL that gets downgraded */ void downgradeRiskLevel( uint8_t & io_risk ) { if (io_risk == TARGETING::UTIL::P9N23_P9C13_NATIVE_MODE_MINIMUM) { // Base level Native needs to go to base level Compatibilty // 4 -> 0 io_risk = TARGETING::UTIL::P9N22_P9C12_RUGBY_FAVOR_SECURITY; } else if (io_risk == TARGETING::UTIL::P9N23_P9C13_NATIVE_SMF_RUGBY_FAVOR_PERFORMANCE) { // 5 -> 2 io_risk = TARGETING::UTIL::P9N22_NO_RUGBY_MITIGATIONS; } // 0-3: stay same } /** * @brief Update ATTR_RISK_LEVEL of NIMBUS_ONLY systems to * a native or compatibility level if so directed via * MRW attribute setting * See README file for Compatibility Truth Tables that will * indicate what RISK_LEVEL should be after this function. * @return Error if not able to update setting, else nullptr */ errlHndl_t updateProcCompatibilityRiskLevel() { HWAS_INF("updateProcCompatibilityRiskLevel entry"); errlHndl_t l_err = nullptr; // First EC chip level on the system (first processor's checked level) TARGETING::ATTR_EC_type l_firstEc = 0; TARGETING::Target * pFirstEcChip = nullptr; // Last EC chip level on the system (last processor's checked level) TARGETING::ATTR_EC_type l_lastEc = 0; TARGETING::Target * pLastEcChip = nullptr; TARGETING::TargetHandleList l_procChips; bool mixedEc = false; do { //Get all functional chips getAllChips(l_procChips, TYPE_PROC); //Loop through all functional procs and //check for a mismatch of EC levels for(const auto & l_chip : l_procChips) { l_lastEc = l_chip->getAttr(); if (l_firstEc == 0) { // first chip, setup the last EC read to valid values l_firstEc = l_lastEc; pFirstEcChip = l_chip; } if (l_firstEc != l_lastEc) { // found a different EC level so mark ECs mixed mixedEc = true; pLastEcChip = l_chip; break; } } // Now update the RISK_LEVEL Target* pSys; targetService().getTopLevelTarget(pSys); auto l_risk = pSys->getAttr(); auto l_original_risk = l_risk; auto l_risk_origin = pSys->getAttr(); auto l_proc_compatibility_req = pSys->getAttr(); if (l_proc_compatibility_req == TARGETING::PROC_COMPATIBILITY_REQ_FORCED_COMPATIBILITY) { // If RISK_LEVEL is a Native setting (4 or more) if (l_risk >= TARGETING::UTIL::P9N23_P9C13_NATIVE_MODE_MINIMUM) { // Both PROC_COMPATIBILITY_REQ and DEFAULT_MRW_RISK_LEVEL are // MRW attributes that should make sense // Error if risk is Native and set by the MRW because // we don't know what the MRW really wants for RISK_LEVEL. if (l_risk_origin == TARGETING::RISK_LEVEL_ORIGIN_MRW) { HWAS_ERR("updateProcCompatibilityRiskLevel::Trying to " "force compatibility of invalid MRW risk level %d", l_risk); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_UPDATE_PROC_COMPAT_RISK_LEVEL * @reasoncode RC_FORCED_COMPAT_INVALID_LEVEL * @devdesc MRW setting of RISK_VALUE is invalid * for FORCED_COMPATIBILITY * @custdesc Incompatible Processor Chip Levels * @userdata1[00:31] 1st EC level * @userdata1[32:63] 2nd EC level * @userdata2[00:15] RISK_LEVEL * @userdata2[16:31] PROC_COMPATIBILITY_REQ * @userdata2[32:63] RISK_LEVEL_ORIGIN (0=USER, 1=MRW) * */ const uint64_t userdata1 = (static_cast(l_firstEc) << 32) | static_cast(l_lastEc); const uint64_t userdata2 = (static_cast(l_risk) << 48) | (static_cast(l_proc_compatibility_req) << 32) | (static_cast(l_risk_origin)); l_err = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_UPDATE_PROC_COMPAT_RISK_LEVEL, RC_FORCED_COMPAT_INVALID_LEVEL, userdata1, userdata2); // SW_CALLOUT - MRW setting error hwasErrorAddProcedureCallout( l_err, HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); break; } // All system types should be put in compatibility mode so // downgrade to force compatibility if risk is 4 or more downgradeRiskLevel(l_risk); } } else if (l_proc_compatibility_req == TARGETING::PROC_COMPATIBILITY_REQ_ALLOW_COMPATIBILITY) { // Upgrade DD2.3 system if risk is 2 or less and set by MRW if (!mixedEc && (l_risk <= TARGETING::UTIL::P9N22_NO_RUGBY_MITIGATIONS) && (l_risk_origin == TARGETING::RISK_LEVEL_ORIGIN_MRW) && (l_firstEc == 0x23)) { upgradeRiskLevel(l_risk); } // Downgrade Mixed or DD2.2 systems with risk = 4 or more else if ((mixedEc || (l_firstEc == 0x22)) && (l_risk >= TARGETING::UTIL::P9N23_P9C13_NATIVE_MODE_MINIMUM)) { downgradeRiskLevel(l_risk); } } else // TARGETING::MRW_COMPATIBILITY_RISK_FLAG_FORCE_NATIVE { // NATIVE mode does NOT allow mixed EC if (mixedEc) { HWAS_ERR("updateProcCompatibilityRiskLevel::Trying to " "force native compatibility of mixed processor levels", " (0x%02X and 0x%02X)", l_firstEc, l_lastEc ); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_UPDATE_PROC_COMPAT_RISK_LEVEL * @reasoncode RC_FORCED_NATIVE_INVALID_MIXED_EC * @devdesc Forced native compatibility not allowed * for mixed EC levels * @custdesc Incompatible Processor Chip Levels * @userdata1[00:31] 1st EC level * @userdata1[32:63] 2nd EC level * @userdata2[00:15] RISK_LEVEL * @userdata2[16:31] PROC_COMPATIBILITY_REQ * @userdata2[32:63] RISK_LEVEL_ORIGIN (0=USER, 1=MRW) * */ const uint64_t userdata1 = (static_cast(l_firstEc) << 32) | static_cast(l_lastEc); const uint64_t userdata2 = (static_cast(l_risk) << 48) | (static_cast(l_proc_compatibility_req) << 32) | (static_cast(l_risk_origin)); l_err = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_UPDATE_PROC_COMPAT_RISK_LEVEL, RC_FORCED_NATIVE_INVALID_MIXED_EC, userdata1, userdata2); // Callout the DD2.2 as high and DD2.3 as low if (l_firstEc == 0x22) { // pFirstEcChip is DD2.2 platHwasErrorAddHWCallout(l_err, pFirstEcChip, HWAS::SRCI_PRIORITY_HIGH, NO_DECONFIG, GARD_NULL); // pLastEcChip is DD2.3 platHwasErrorAddHWCallout(l_err, pLastEcChip, HWAS::SRCI_PRIORITY_LOW, NO_DECONFIG, GARD_NULL); } else { // pFirstEcChip is DD2.3 platHwasErrorAddHWCallout(l_err, pFirstEcChip, HWAS::SRCI_PRIORITY_LOW, NO_DECONFIG, GARD_NULL); // pLastEcChip is DD2.2 platHwasErrorAddHWCallout(l_err, pLastEcChip, HWAS::SRCI_PRIORITY_HIGH, NO_DECONFIG, GARD_NULL); } break; } // DD2.3 system does not support risk=3, if in NATIVE mode if ((l_firstEc == 0x23) && (l_risk == TARGETING::UTIL::P9N22_P9N23_JAVA_PERF)) { HWAS_ERR("updateProcCompatibilityRiskLevel::Trying to " "force native compatibility of DD2.3 for risk level %d", l_risk); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_UPDATE_PROC_COMPAT_RISK_LEVEL * @reasoncode RC_FORCED_NATIVE_OF_INCOMPATIBLE_RISK * @devdesc Risk level 3 is incompatible for forced * native setting of DD2.3 * @custdesc Incompatible Processor Chip Levels * @userdata1[00:31] 1st EC level * @userdata1[32:63] 2nd EC level * @userdata2[00:15] RISK_LEVEL * @userdata2[16:31] PROC_COMPATIBILITY_REQ * @userdata2[32:63] RISK_LEVEL_ORIGIN (0=USER, 1=MRW) * */ const uint64_t userdata1 = (static_cast(l_firstEc) << 32) | static_cast(l_lastEc); const uint64_t userdata2 = (static_cast(l_risk) << 48) | (static_cast(l_proc_compatibility_req) << 32) | (static_cast(l_risk_origin)); l_err = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_UPDATE_PROC_COMPAT_RISK_LEVEL, RC_FORCED_NATIVE_OF_INCOMPATIBLE_RISK, userdata1, userdata2); // SW_CALLOUT - MRW setting error (FORCED_NATIVE) hwasErrorAddProcedureCallout( l_err, HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW ); break; } // DD2.3 system should be upgraded to native level if // risk is 2 or less if ((l_firstEc == 0x23) && (l_risk <= TARGETING::UTIL::P9N22_NO_RUGBY_MITIGATIONS)) { upgradeRiskLevel(l_risk); } // DD2.2 system should be downgraded to run in its // native mode if risk is 4 or more if ((l_firstEc == 0x22) && (l_risk >= TARGETING::UTIL::P9N23_P9C13_NATIVE_MODE_MINIMUM)) { downgradeRiskLevel(l_risk); } } char ecLevelStr[20]; if (mixedEc) { sprintf(ecLevelStr,"Mixed"); } else { sprintf(ecLevelStr,"0x%02X", l_firstEc); } if (l_risk != l_original_risk) { HWAS_INF("updateProcCompatibilityRiskLevel: " "Update RISK_LEVEL from %d to %d " "(EC Level: %s, RISK_ORIGIN: %s, PROC_COMPATIBILITY_REQ: %d)", l_original_risk, l_risk, ecLevelStr, l_risk_origin==TARGETING::RISK_LEVEL_ORIGIN_MRW?"MRW":"User", l_proc_compatibility_req); pSys->setAttr(l_risk); } else { HWAS_DBG("updateProcCompatibilityRiskLevel: " "Keeping RISK_LEVEL %d " "(EC Level: %s, RISK_ORIGIN: %s, PROC_COMPATIBILITY_REQ: %d)", l_risk, ecLevelStr, l_risk_origin==TARGETING::RISK_LEVEL_ORIGIN_MRW?"MRW":"User", l_proc_compatibility_req); } } while (0); HWAS_INF("updateProcCompatibilityRiskLevel exit"); return l_err; } /** * @brief Normalize the RISK_LEVEL for Axone to use the upper range */ void normalizeRiskLevelForAxone( void ) { // Axone follows Nimbus DD2.3 settings except it can use // the low or high numbers. Let's normalize it to the // high range to make things less confusing. Target* pSys; targetService().getTopLevelTarget(pSys); auto l_risk = pSys->getAttr(); if( TARGETING::UTIL::P9A_RUGBY_FAVOR_SECURITY_LOWER == l_risk ) { l_risk = TARGETING::UTIL::P9A_RUGBY_FAVOR_SECURITY; } else if( TARGETING::UTIL::P9A_RUGBY_FAVOR_PERFORMANCE_LOWER == l_risk ) { l_risk = TARGETING::UTIL::P9A_RUGBY_FAVOR_PERFORMANCE; } else { // Nothing to change, just leave return; } pSys->setAttr(l_risk); } errlHndl_t validateProcessorEcLevels() { HWAS_INF("validateProcessorEcLevels entry"); errlHndl_t l_err = nullptr; uint32_t l_commonPlid = 0; TARGETING::ATTR_EC_type l_masterEc = 0; TARGETING::ATTR_EC_type l_ecToCompare = 0; TARGETING::ATTR_HUID_type l_masterHuid = 0; TARGETING::TargetHandleList l_procChips; Target* l_pMasterProc = NULL; TARGETING::ATTR_MODEL_type l_model; do { //Get all functional chips getAllChips(l_procChips, TYPE_PROC); // check for functional Master Proc on this node l_err = targetService().queryMasterProcChipTargetHandle(l_pMasterProc, NULL, true); //queryMasterProcChipTargetHandle will check for null, make sure //there was no problem finding the master proc if(l_err) { HWAS_ERR( "validateProcessorEcLevels:: Unable to find master proc"); //Don't commit the error just let it get returned from function break; } //Get master info and store it for comparing later l_masterEc = l_pMasterProc->getAttr(); l_masterHuid = get_huid(l_pMasterProc); l_model = l_pMasterProc->getAttr(); // Update the RISK_LEVEL attribute before checking EC level mismatch if(TARGETING::MODEL_NIMBUS == l_model) { l_err = updateProcCompatibilityRiskLevel(); if (l_err) { HWAS_ERR("validateProcessorEcLevels:: Unable to update RISK_LEVEL"); break; } } else if(TARGETING::MODEL_AXONE == l_model) { // Axone follows Nimbus DD2.3 settings except it can use // the low or high numbers, going to force one way. normalizeRiskLevelForAxone(); } //Loop through all functional procs and create error logs //for any processors whose EC does not match the master for(const auto & l_chip : l_procChips) { l_ecToCompare = l_chip->getAttr(); bool l_mixOk = mixedECsAllowed(l_model,l_masterEc, l_ecToCompare); if((l_ecToCompare != l_masterEc) && !l_mixOk) { HWAS_ERR("validateProcessorEcLevels:: Slave Proc EC level not does not match master, " "this is an unrecoverable error.. system will shut down"); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_VALIDATE_EC_LEVELS * @reasoncode RC_EC_MISMATCH * @devdesc Found a slave processor whose EC level * did not match the master * @custdesc Incompatible Processor Chip Levels * @userdata1[00:31] HUID of slave chip * @userdata1[32:63] EC level of slave chip * @userdata2[00:31] HUID of master chip * @userdata2[32:63] EC level of master chip */ const uint64_t userdata1 = (static_cast(get_huid(l_chip)) << 32) | static_cast(l_ecToCompare); const uint64_t userdata2 = (static_cast(l_masterHuid) << 32) | static_cast(l_masterEc); l_err = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_VALIDATE_EC_LEVELS, RC_EC_MISMATCH, userdata1, userdata2); // call out the procedure to find the deconfigured part. platHwasErrorAddHWCallout( l_err, l_chip, SRCI_PRIORITY_HIGH, NO_DECONFIG, GARD_NULL); // if we already have an error, link this one to the earlier; // if not, set the common plid hwasErrorUpdatePlid(l_err, l_commonPlid); errlCommit(l_err, HWAS_COMP_ID); //Do not break, we want to find all mismatches } } }while(0); if(l_commonPlid) { HWAS_ERR("validateProcessorEcLevels:: One or more slave processor's EC level did not match master, check error logs"); /*@ * @errortype * @severity ERRL_SEV_UNRECOVERABLE * @moduleid MOD_VALIDATE_EC_LEVELS * @reasoncode RC_FAILED_EC_VALIDATION * @devdesc Found one or more slave processor whose EC level * did not match the master * @custdesc Incompatible Processor Chip Levels * @userdata1[00:64] Number of Procs */ const uint64_t userdata1 = static_cast(l_procChips.size()); const uint64_t userdata2 = (static_cast(l_masterHuid) << 32) | static_cast(l_masterEc); l_err = hwasError(ERRL_SEV_UNRECOVERABLE, MOD_VALIDATE_EC_LEVELS, RC_FAILED_EC_VALIDATION, userdata1, userdata2); // link this error to the earlier errors; hwasErrorUpdatePlid(l_err, l_commonPlid); } HWAS_INF("validateProcessorEcLevels exit"); return l_err; } //validateProccesorEcLevels errlHndl_t markDisabledMcas() { errlHndl_t l_errl = nullptr; uint8_t lxData[HWAS::VPD_CRP0_LX_HDR_DATA_LENGTH]; HWAS_INF("markDisabledMcas entry"); do { //Get the functional MCAs TargetHandleList l_mcaList; getAllChiplets(l_mcaList, TYPE_MCA, true); for (auto l_mca : l_mcaList) { // fill the Lx data buffer with zeros memset(lxData, 0x00, VPD_CRP0_LX_HDR_DATA_LENGTH); //Read Lx keyword for associated proc and MCA l_errl = platReadLx(l_mca, lxData); if (l_errl) { // commit the error but keep going errlCommit(l_errl, HWAS_COMP_ID); } if (lxData[VPD_CRP0_LX_FREQ_INDEP_INDEX + VPD_CRP0_LX_PORT_DISABLED] != 0) { // Since port is disabled, MCA is not functional, but // it's present. enableHwasState(l_mca, true, // present false, // not functional DeconfigGard::DECONFIGURED_BY_DISABLED_PORT); HWAS_DBG("MCA %.8X - marked present, not functional", l_mca->getAttr()); TargetInfo l_TargetInfo; l_TargetInfo.affinityPath = l_mca->getAttr(); l_TargetInfo.pThisTarget = l_mca; l_TargetInfo.type = l_mca->getAttr(); l_TargetInfo.reason = DeconfigGard::DECONFIGURED_BY_DISABLED_PORT; // Deconfigure child targets for this MCA deconfigPresentByAssoc(l_TargetInfo); } } }while(0); HWAS_INF("markDisabledMcas exit"); return l_errl; } //markDisabledMcas }; // end namespace