summaryrefslogtreecommitdiffstats
path: root/src/usr
diff options
context:
space:
mode:
authorPrachi Gupta <pragupta@us.ibm.com>2018-05-21 15:29:40 -0500
committerWilliam G. Hoffa <wghoffa@us.ibm.com>2018-06-02 11:59:58 -0400
commit5815703c3be9f8830011f573a719e69553cb1b94 (patch)
tree46a7c93cb82a1736d52c7bef338519a444724677 /src/usr
parentc6916a42d34bdd1c9502056740ec3a819c082099 (diff)
downloadtalos-hostboot-5815703c3be9f8830011f573a719e69553cb1b94.tar.gz
talos-hostboot-5815703c3be9f8830011f573a719e69553cb1b94.zip
Add support for missing memory behind master proc
On a phyp based system, when we detect that memory is missing behind master processor, we find a proc with memory and set ATTR_PROC_MEM_TO_USE to its HRMOR. This commit adds this support in hwas common code as HWSV will call this function, update SBE mbox registers, and IPL the system. Change-Id: I88a6cb69aa10147365c556f9cf31014066bd3d08 CQ:SW430015 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59159 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com> Reviewed-by: Sachin Gupta <sgupta2m@in.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r--src/usr/hwas/common/hwas.C235
-rw-r--r--src/usr/isteps/istep07/call_mss_attr_update.C222
2 files changed, 305 insertions, 152 deletions
diff --git a/src/usr/hwas/common/hwas.C b/src/usr/hwas/common/hwas.C
index a6fa872b5..c00d8cf78 100644
--- a/src/usr/hwas/common/hwas.C
+++ b/src/usr/hwas/common/hwas.C
@@ -87,6 +87,24 @@ bool compareAffinity(const TargetInfo t1, const TargetInfo t2)
return t1.affinityPath < t2.affinityPath;
}
+/*
+ * @brief This function takes in proc target and returns group/chip id
+ * in the following bit format: GGGG CCC
+ * where G = Group Id and C = Chip Id
+ *
+ * @param[in] i_proc: proc target
+ * @retval: chip info including group and chip id
+ */
+uint64_t getGroupChipIdInfo (TargetHandle_t i_proc)
+{
+ auto l_grp_id = i_proc->getAttr<ATTR_FABRIC_GROUP_ID>();
+ auto l_chip_id = i_proc->getAttr<ATTR_FABRIC_CHIP_ID>();
+
+ //Chip ID is three bits long, therefore, shift group id
+ //by 3 and OR it with chip id
+ return ((l_grp_id << 3) | l_chip_id);
+}
+
/**
* @brief simple helper fn to get and set hwas state to poweredOn,
* present, functional
@@ -290,6 +308,223 @@ errlHndl_t disableOBUSes()
return l_err;
}
+errlHndl_t update_proc_mem_to_use (const Target* i_node)
+{
+ errlHndl_t l_errl {nullptr};
+ TargetHandle_t l_masterProcTarget {nullptr};
+
+ do
+ {
+ //Get master proc
+ l_errl =
+ targetService().queryMasterProcChipTargetHandle(l_masterProcTarget,
+ i_node);
+ if (l_errl)
+ {
+ HWAS_ERR("update_proc_mem_to_use: unable to get master proc");
+ break;
+ }
+
+
+ //Check if this processor has missing memory
+ //If yes, then get the HRMOR of the proc we want to use the mem of
+ uint8_t l_proc_mem_to_use = l_masterProcTarget->getAttr
+ <ATTR_PROC_MEM_TO_USE>();
+ uint8_t l_proc_mem_to_use_save = l_proc_mem_to_use;
+ bool l_found_missing_mem = false;
+ l_errl = check_for_missing_memory(i_node, l_proc_mem_to_use,
+ l_found_missing_mem);
+ if (l_errl)
+ {
+ HWAS_ERR("update_proc_mem_to_use: unable to check for missing mem");
+ break;
+ }
+
+ //We found missing memory behind master proc, but
+ //check_for_missing_memory didn't update proc_mem_to_use
+ //probably because there are no other procs with memory,
+ //create an error.
+ if (l_found_missing_mem && (l_proc_mem_to_use==l_proc_mem_to_use_save))
+ {
+ HWAS_ERR("update_proc_mem_to_use: ATTR_PROC_MEM_TO_USE didn't get"
+ " updated even though we were missing mem behind master proc");
+
+ /*@
+ * @errortype
+ * @severity ERRL_SEV_UNRECOVERABLE
+ * @moduleid MOD_UPDATE_PROC_MEM_TO_USE
+ * @reasoncode RC_NO_UPDATE_WHEN_MEM_MISSING
+ * @devdesc No procs found with valid memory
+ * @custdesc A problem occurred during the IPL of
+ * the system: No memory found
+ * @userdata1 Saved value of ATTR_PROC_MEM_TO_USE
+ * @userdata2 Updated value of ATTR_PROC_MEM_TO_USE
+ */
+ l_errl = hwasError(ERRL_SEV_UNRECOVERABLE,
+ MOD_UPDATE_PROC_MEM_TO_USE,
+ RC_NO_UPDATE_WHEN_MEM_MISSING,
+ l_proc_mem_to_use_save,
+ l_proc_mem_to_use);
+
+ hwasErrorAddProcedureCallout(l_errl,
+ EPUB_PRC_FIND_DECONFIGURED_PART,
+ SRCI_PRIORITY_HIGH);
+ break;
+
+ }
+
+ //set PROC_MEM_TO_USE to the group/chip id of the proc we want to
+ //use the mem of
+ //get all procs behind the input node
+ TargetHandleList l_procs;
+ getChildAffinityTargetsByState( l_procs,
+ i_node,
+ CLASS_CHIP,
+ TYPE_PROC,
+ UTIL_FILTER_ALL);
+ for (auto & l_proc : l_procs)
+ {
+ l_proc->setAttr<ATTR_PROC_MEM_TO_USE>(l_proc_mem_to_use);
+ }
+
+ } while (0);
+
+ return l_errl;
+}
+
+errlHndl_t check_for_missing_memory (const Target* i_node,
+ uint8_t & io_proc_mem_to_use,
+ bool & o_found_missing_mem)
+{
+
+ errlHndl_t l_errl {nullptr};
+ o_found_missing_mem = true;
+
+ do
+ {
+ /////////////////////////////////////////////////////////////
+ //Step 1 -- Figure out the lowest group/chip id proc that has
+ // memory
+ /////////////////////////////////////////////////////////////
+ //get all procs behind the input node
+ TargetHandleList l_procs;
+ getChildAffinityTargetsByState( l_procs,
+ i_node,
+ CLASS_CHIP,
+ TYPE_PROC,
+ UTIL_FILTER_FUNCTIONAL);
+
+ //sort based on group/chip id. So, we can deterministically
+ //pick the processor with memory. This will also help guarantee
+ //that we will attempt to use master (or altmaster) proc's memory
+ //first before using slave proc's memory.
+ std::sort(l_procs.begin(), l_procs.end(),
+ [] (TargetHandle_t a, TargetHandle_t b)
+ {
+ return getGroupChipIdInfo(a) < getGroupChipIdInfo(b);
+ });
+
+ uint8_t l_temp_proc_mem_to_use = io_proc_mem_to_use;
+
+ //find a processor that has dimms
+ for (auto & l_proc : l_procs)
+ {
+ TargetHandleList l_funcDimms;
+ getChildAffinityTargetsByState( l_funcDimms,
+ l_proc,
+ CLASS_LOGICAL_CARD,
+ TYPE_DIMM,
+ UTIL_FILTER_FUNCTIONAL);
+
+ //Pick the first proc we find with dimms
+ if (l_funcDimms.size() > 0)
+ {
+ l_temp_proc_mem_to_use = getGroupChipIdInfo(l_proc);
+ break;
+ }
+
+ }
+
+ /////////////////////////////////////////////////////////////
+ //Step 2 -- Get the proc we are currently using the memory of
+ // and check if it has memory
+ /////////////////////////////////////////////////////////////
+ //get the proc pointed by PROC_MEM_TO_USE and check
+ //if there is memory behind that proc. We rely on the current
+ //value of PROC_MEM_TO_USE, so, we don't change our answer
+ //unnecessarily (in cases when both master proc and altmaster
+ //have memory)
+ auto l_grp = (io_proc_mem_to_use >> 3);
+ auto l_chip = (io_proc_mem_to_use & 0x07); // last three bits are chipId
+ PredicateAttrVal<ATTR_FABRIC_GROUP_ID> l_predGrp (l_grp);
+ PredicateAttrVal<ATTR_FABRIC_CHIP_ID> l_predChip (l_chip);
+ PredicateCTM l_predProc (CLASS_CHIP, TYPE_PROC);
+ PredicateIsFunctional l_isFunctional;
+ PredicatePostfixExpr l_procCheckExpr;
+
+ l_procCheckExpr.push(&l_predProc).push(&l_isFunctional).
+ push(&l_predGrp).push(&l_predChip).And().And().And();
+
+ TargetHandleList l_procMemUsedCurrently;
+ targetService().getAssociated(l_procMemUsedCurrently,
+ i_node,
+ TargetService::CHILD_BY_AFFINITY,
+ TargetService::IMMEDIATE,
+ &l_procCheckExpr);
+
+ HWAS_INF("check_for_missing_memory: looking for a proc with "
+ "grp=0x%x chip=0x%x, found %d procs",
+ l_grp, l_chip, l_procMemUsedCurrently.size());
+
+ if (l_procMemUsedCurrently.size() >= 1)
+ {
+ //found proc
+ //Check if proc whose memory we are currently using has dimms
+ TargetHandleList l_funcDimms;
+ getChildAffinityTargetsByState( l_funcDimms,
+ l_procMemUsedCurrently[0],
+ CLASS_LOGICAL_CARD,
+ TYPE_DIMM,
+ UTIL_FILTER_FUNCTIONAL);
+ if (l_funcDimms.size() > 0)
+ {
+ //we found dimms behind the proc we are currently using
+ o_found_missing_mem = false;
+ }
+ }
+
+
+ /////////////////////////////////////////////////////////////
+ //Step 3-- If a proc with lower group/chip id has memory or
+ // there is no memory behind the currently used proc,
+ // then we update the proc_mem_to_use
+ //NOTE: This ensures that if someone replaces the dimm on a lowered
+ // number proc, then we can fall back to that lowered number
+ // proc. Also, it makes sure that we are updating only when
+ // current proc_mem_to_use doesn't have memory or it's not
+ // pointing to a valid proc.
+ /////////////////////////////////////////////////////////////
+ if ((l_temp_proc_mem_to_use < io_proc_mem_to_use)
+ || (o_found_missing_mem))
+ {
+ HWAS_INF("check_for_missing_memory: found a need to switch"
+ " PROC_MEM_TO_USE from 0x%x to 0x%x",
+ io_proc_mem_to_use, l_temp_proc_mem_to_use);
+ io_proc_mem_to_use = l_temp_proc_mem_to_use;
+ }
+ else
+ {
+ HWAS_INF("check_for_missing_memory: kept PROC_MEM_TO_USE same"
+ " 0x%x", io_proc_mem_to_use);
+ }
+
+
+ } while (0);
+
+ return l_errl;
+}
+
+
errlHndl_t discoverTargets()
{
HWAS_DBG("discoverTargets entry");
diff --git a/src/usr/isteps/istep07/call_mss_attr_update.C b/src/usr/isteps/istep07/call_mss_attr_update.C
index f4eb9fb1d..eae864a7f 100644
--- a/src/usr/isteps/istep07/call_mss_attr_update.C
+++ b/src/usr/isteps/istep07/call_mss_attr_update.C
@@ -52,6 +52,9 @@
#include <targeting/common/commontargeting.H>
#include <targeting/common/utilFilter.H>
+// HWAS
+#include <hwas/common/hwas.H>
+
// fapi2 support
#include <fapi2.H>
#include <fapi2/target.H>
@@ -119,9 +122,6 @@ errlHndl_t check_proc0_memory_config(IStepError & io_istepErr)
TargetHandleList l_procsList;
getAllChips(l_procsList, TYPE_PROC);
- TARGETING::Target * l_sys = NULL;
- TARGETING::targetService().getTopLevelTarget(l_sys);
-
// Loop through all procs getting IDs
procIds_t l_procIds[l_procsList.size()];
uint8_t i = 0;
@@ -190,9 +190,6 @@ errlHndl_t check_proc0_memory_config(IStepError & io_istepErr)
TargetService::ALL,
&l_checkExprFunctional);
- TARGETING::ATTR_PAYLOAD_KIND_type payload_kind =
- l_sys->getAttr<TARGETING::ATTR_PAYLOAD_KIND>();
-
TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
"check_proc0_memory_config: %d functional dimms behind proc0 "
"%.8X",
@@ -232,42 +229,6 @@ errlHndl_t check_proc0_memory_config(IStepError & io_istepErr)
continue;
}
- // If our master proc doesn't have memory, and we're on a phyp
- // system, we want to use this proc's memory instead.
-#if 0
- // TODO RTC: 181139. This support can not be put into place
- // until we're able to use the Get Capabilities function
- TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
- "check_proc0_memory_config: Payload kind is %llx",
- payload_kind);
- if(payload_kind == TARGETING::PAYLOAD_KIND_PHYP)
- {
- TRACFCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
- "check_proc0_memory_config: We are in a PHYP system, "
- "setting master to use alt memory from proc %llx.",
- get_huid(l_procIds[i].proc));
-
- uint8_t l_chipID = l_procIds[i].chipId;
- uint8_t l_groupID = l_procIds[i].groupId;
-
- TargetHandle_t l_masterProc = NULL;
- targetService().masterProcChipTargetHandle(l_masterProc);
-
- uint8_t l_proc_memory = l_masterProc->getAttr<
- TARGETING::ATTR_PROC_MEM_TO_USE>();
-
- if( l_proc_memory != ((l_groupID <<3) | l_chipID))
- {
- l_masterProc->setAttr<TARGETING::ATTR_PROC_MEM_TO_USE>(
- ((l_groupID << 3) | l_chipID));
-
- l_updateNeeded = true;
- // Leave loop after switching memory
- break;
- }
- }else
- {
-#endif
// Use this proc for swapping memory with proc0
l_victim = i;
@@ -293,106 +254,50 @@ errlHndl_t check_proc0_memory_config(IStepError & io_istepErr)
// Leave loop after swapping memory
break;
-#if 0
- }
-#endif
- }
-
- if(payload_kind != TARGETING::PAYLOAD_KIND_PHYP)
- {
- // Check that a victim was found
- assert( l_victim < l_procsList.size(), "No swap match found" );
}
- }
-#if 0
- // TODO RTC: 181139. This support can not be put into place
- // until we're able to use the Get Capabilities function
- else if( !(l_dimms.empty()) &&
- (payload_kind == TARGETING::PAYLOAD_KIND_PHYP) )
- {
- // If the memory isn't empty, and we're on a phyp system,
- // we want to verify that we're set up to use the correct memory
- uint8_t l_chipID = l_procIds[i].chipId;
- uint8_t l_groupID = l_procIds[i].groupId;
-
- TargetHandle_t l_masterProc = NULL;
- targetService().masterProcChipTargetHandle(l_masterProc);
- uint8_t l_proc_memory =
- l_masterProc->getAttr<TARGETING::ATTR_PROC_MEM_TO_USE>();
-
- if( l_proc_memory != ((l_groupID <<3) | l_chipID))
- {
- l_masterProc->setAttr<TARGETING::ATTR_PROC_MEM_TO_USE>(
- ((l_groupID << 3) | l_chipID));
-
- l_updateNeeded = true;
- }
}
-#endif
- if(payload_kind != TARGETING::PAYLOAD_KIND_PHYP)
+ // Loop through all procs detecting that IDs are set correctly
+ for (i = 0; i < l_procsList.size(); i++)
{
-#if 0
- // TODO RTC: 181139. This support can not be put into place
- // until we're able to use the Get Capabilities function
- TargetHandle_t l_masterProc = NULL;
- targetService().masterProcChipTargetHandle(l_masterProc);
-
- // Check the attribute, and default it to proc0 if
- // it doesn't match.
- uint8_t l_proc_memory =
- l_masterProc->getAttr<TARGETING::ATTR_PROC_MEM_TO_USE>();
-
- if( l_proc_memory != 0)
+ TRACDCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
+ "check_proc0_memory_config: Compare settings for "
+ "Proc %.8X\n"
+ " groupIdEff = %d, groupId = %d\n"
+ " chipIdEff = %d, chipId = %d",
+ get_huid(l_procIds[i].proc),
+ l_procIds[i].groupIdEff,
+ l_procIds[i].groupId,
+ l_procIds[i].chipIdEff,
+ l_procIds[i].chipId);
+
+ if((l_procIds[i].groupId != l_procIds[i].groupIdEff) ||
+ (l_procIds[i].chipId != l_procIds[i].chipIdEff) )
{
- l_masterProc->setAttr<TARGETING::ATTR_PROC_MEM_TO_USE>(0);
+ // Update attributes
+ (l_procIds[i].proc)->
+ setAttr<ATTR_PROC_EFF_FABRIC_GROUP_ID>(l_procIds[i].groupId);
+ (l_procIds[i].proc)->
+ setAttr<ATTR_PROC_EFF_FABRIC_CHIP_ID>(l_procIds[i].chipId);
l_updateNeeded = true;
}
-#endif
- // Loop through all procs detecting that IDs are set correctly
- for (i = 0; i < l_procsList.size(); i++)
- {
- TRACDCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
- "check_proc0_memory_config: Compare settings for "
- "Proc %.8X\n"
- " groupIdEff = %d, groupId = %d\n"
- " chipIdEff = %d, chipId = %d",
- get_huid(l_procIds[i].proc),
- l_procIds[i].groupIdEff,
- l_procIds[i].groupId,
- l_procIds[i].chipIdEff,
- l_procIds[i].chipId);
-
- if((l_procIds[i].groupId != l_procIds[i].groupIdEff) ||
- (l_procIds[i].chipId != l_procIds[i].chipIdEff) )
- {
- // Update attributes
- (l_procIds[i].proc)->
- setAttr<ATTR_PROC_EFF_FABRIC_GROUP_ID>(l_procIds[i].groupId);
- (l_procIds[i].proc)->
- setAttr<ATTR_PROC_EFF_FABRIC_CHIP_ID>(l_procIds[i].chipId);
-
- l_updateNeeded = true;
- }
-
- TRACDCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
- "check_proc0_memory_config: Current attribute "
- "settings for Proc %.8X\n"
- " ATTR_PROC_EFF_FABRIC_GROUP_ID = %d\n"
- " ATTR_FABRIC_GROUP_ID = %d\n"
- " ATTR_PROC_EFF_FABRIC_CHIP_ID = %d\n"
- " ATTR_FABRIC_CHIP_ID = %d",
- get_huid(l_procIds[i].proc),
- (l_procIds[i].proc)->
- getAttr<ATTR_PROC_EFF_FABRIC_GROUP_ID>(),
- (l_procIds[i].proc)->getAttr<ATTR_FABRIC_GROUP_ID>(),
- (l_procIds[i].proc)->
- getAttr<ATTR_PROC_EFF_FABRIC_CHIP_ID>(),
- (l_procIds[i].proc)->getAttr<ATTR_FABRIC_CHIP_ID>());
- }
+ TRACDCOMP(ISTEPS_TRACE::g_trac_isteps_trace,
+ "check_proc0_memory_config: Current attribute "
+ "settings for Proc %.8X\n"
+ " ATTR_PROC_EFF_FABRIC_GROUP_ID = %d\n"
+ " ATTR_FABRIC_GROUP_ID = %d\n"
+ " ATTR_PROC_EFF_FABRIC_CHIP_ID = %d\n"
+ " ATTR_FABRIC_CHIP_ID = %d",
+ get_huid(l_procIds[i].proc),
+ (l_procIds[i].proc)->
+ getAttr<ATTR_PROC_EFF_FABRIC_GROUP_ID>(),
+ (l_procIds[i].proc)->getAttr<ATTR_FABRIC_GROUP_ID>(),
+ (l_procIds[i].proc)->
+ getAttr<ATTR_PROC_EFF_FABRIC_CHIP_ID>(),
+ (l_procIds[i].proc)->getAttr<ATTR_FABRIC_CHIP_ID>());
}
if(l_updateNeeded)
@@ -451,27 +356,41 @@ void* call_mss_attr_update( void *io_pArgs )
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, "call_mss_attr_update entry");
errlHndl_t l_err = NULL;
- // Check the memory on proc0 chip
- l_err = check_proc0_memory_config(l_StepError);
-
- if (l_err)
+ do
{
- TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
- "ERROR 0x%.8X: check_proc0_memory_config",
- l_err->reasonCode());
+ bool l_isPhyp = TARGETING::is_phyp_load();
+ bool l_spEnabled = INITSERVICE::spBaseServicesEnabled();
+
+ // Check the memory on master proc chip
+ // Use this mechanism for:
+ // non-phyp case or
+ // PHYP on OpenPower machine
+ if (!l_isPhyp || (l_isPhyp && !l_spEnabled))
+ {
+ l_err = check_proc0_memory_config(l_StepError);
- // Ensure istep error created and has same plid as this error
- l_StepError.addErrorDetails( l_err );
- errlCommit( l_err, HWPF_COMP_ID );
- }
- else
- {
- TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
- "SUCCESS: check_proc0_memory_config");
- }
+ if (l_err)
+ {
+ TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
+ "ERROR 0x%.8X: check_proc0_memory_config",
+ l_err->reasonCode());
+
+ // Ensure istep error created and has same plid as this error
+ l_StepError.addErrorDetails( l_err );
+ errlCommit( l_err, HWPF_COMP_ID );
+ break;
+ }
+ else
+ {
+ TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
+ "SUCCESS: check_proc0_memory_config");
+ }
+ }
+ else
+ {
+ //TODO -- next commit adds the logic for this case
+ }
- if (l_StepError.isNull())
- {
// Get all functional MCS chiplets
TARGETING::TargetHandleList l_mcsTargetList;
getAllChiplets(l_mcsTargetList, TYPE_MCS);
@@ -495,8 +414,7 @@ void* call_mss_attr_update( void *io_pArgs )
errlCommit( l_err, HWPF_COMP_ID );
}
}
- }
-
+ } while (0);
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, "call_mss_attr_update exit" );
return l_StepError.getErrorHandle();
OpenPOWER on IntegriCloud