summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThi Tran <thi@us.ibm.com>2018-05-07 16:22:44 -0500
committerDaniel M. Crowell <dcrowell@us.ibm.com>2018-06-14 10:50:51 -0400
commitd46f111a8f66830714e32d003982718d13abf66f (patch)
treeb0baff7eda79e92c8ad26449f68dd9383ccb4602
parent8d97caa96550ad4c884ce3cd8e8f2771d724cf2a (diff)
downloadtalos-hostboot-d46f111a8f66830714e32d003982718d13abf66f.tar.gz
talos-hostboot-d46f111a8f66830714e32d003982718d13abf66f.zip
Fix unbalance FCO distribution between procs
Current code doesn't take into account the case where an EX has no core under. In this scenario, the code skips the current proc and takes out the cores in the next proc. This leads to more cores taken out from the next proc if current proc has more EXs with no core. The fix is to look and take out the next available cores in current proc until no core available before move on to the next proc. Change-Id: I0e8650766c9f13c5616141b87e05967950d6fb56 CQ:SW424855 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58460 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Richard J. Knight <rjknight@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
-rw-r--r--src/usr/hwas/common/hwas.C91
1 files changed, 70 insertions, 21 deletions
diff --git a/src/usr/hwas/common/hwas.C b/src/usr/hwas/common/hwas.C
index e9dabaae5..98908d1fc 100644
--- a/src/usr/hwas/common/hwas.C
+++ b/src/usr/hwas/common/hwas.C
@@ -1804,17 +1804,59 @@ errlHndl_t restrictECunits(
// inner loop and EXs as the outer to distribute the functional ECs
// evenly between procs. After we run out of ECs, we deconfigure the
// remaining ones.
- for (uint32_t j = 0; j < NUM_EX_PER_CHIP; j++)
+
+ // Mark the ECs that have been accounted for
+ uint8_t EC_checkedList[procs][NUM_EX_PER_CHIP];
+ memset(EC_checkedList, 0, sizeof(EC_checkedList));
+
+ for (uint32_t l_EX = 0; l_EX < NUM_EX_PER_CHIP; l_EX++)
{
- for (int i = 0; i < procs; i++)
+ for (int l_proc = 0; l_proc < procs; l_proc++)
{
+ // Save l_EX value to current EX, this is to be restored later
+ uint32_t currrentEX = l_EX;
+
+ // If core doesn't exist or already checked, find the
+ // next available core on this proc in order to balance
+ // the core distribution.
+ uint8_t nextEXwithCore = 0;
+ if ( (!pECList[l_proc][l_EX].size()) ||
+ (EC_checkedList[l_proc][l_EX]) )
+ {
+ HWAS_INF("Current EX = %d, PROC %d: Need to find next "
+ "avail EX with cores.", l_EX, l_proc);
+ for (nextEXwithCore = l_EX+1;
+ nextEXwithCore < NUM_EX_PER_CHIP;
+ nextEXwithCore++)
+ {
+ if ( (pECList[l_proc][nextEXwithCore].size()) &&
+ (!(EC_checkedList[l_proc][nextEXwithCore]) ) )
+ {
+ l_EX = nextEXwithCore;
+ HWAS_INF("Next avail EX with cores = %d",
+ nextEXwithCore);
+ break;
+ }
+ }
+ // No more core in this proc
+ if (nextEXwithCore == NUM_EX_PER_CHIP)
+ {
+ HWAS_INF("No more EX with cores in proc %d", l_proc);
+ l_EX = currrentEX;
+ continue;
+ }
+ }
+
+ // Mark this core has been checked.
+ EC_checkedList[l_proc][l_EX] = 1;
+
// Walk through the EC list from this EX
- while (pEC_it[i][j] != pECList[i][j].end())
+ while (pEC_it[l_proc][l_EX] != pECList[l_proc][l_EX].end())
{
// Check if EC pair for this EX
- if ((pECList[i][j].size() == 2) &&
+ if ((pECList[l_proc][l_EX].size() == 2) &&
(pairedECs_remaining != 0) &&
- (i==l_masterProc || // is master or
+ (l_proc==l_masterProc || // is master or
l_allocatedToMaster || // was allocated to master
pairedECs_remaining > 2)) // save 2 cores for master
{
@@ -1822,18 +1864,18 @@ errlHndl_t restrictECunits(
goodECs++;
pairedECs_remaining--;
HWAS_DBG("pEC 0x%.8X - is good %d! (paired) pi:%d EXi:%d pairedECs_remaining %d",
- (*(pEC_it[i][j]))->getAttr<ATTR_HUID>(),
- goodECs, i, j, pairedECs_remaining);
- if (i == l_masterProc)
+ (*(pEC_it[l_proc][l_EX]))->getAttr<ATTR_HUID>(),
+ goodECs, l_proc, l_EX, pairedECs_remaining);
+ if (l_proc == l_masterProc)
{
HWAS_DBG("Allocated to master");
l_allocatedToMaster = true;
}
}
// Check if single EC for this EX
- else if ((pECList[i][j].size() == 1) &&
+ else if ((pECList[l_proc][l_EX].size() == 1) &&
(singleECs_remaining != 0) &&
- (i==l_masterProc || // is master or
+ (l_proc==l_masterProc || // is master or
l_allocatedToMaster || // was allocated to master
singleECs_remaining > 1)) // save core for master
@@ -1842,9 +1884,9 @@ errlHndl_t restrictECunits(
goodECs++;
singleECs_remaining--;
HWAS_DBG("pEC 0x%.8X - is good %d! (single) pi:%d EXi:%d singleECs_remaining %d",
- (*(pEC_it[i][j]))->getAttr<ATTR_HUID>(),
- goodECs, i, j, singleECs_remaining);
- if (i == l_masterProc)
+ (*(pEC_it[l_proc][l_EX]))->getAttr<ATTR_HUID>(),
+ goodECs, l_proc, l_EX, singleECs_remaining);
+ if (l_proc == l_masterProc)
{
HWAS_DBG("Allocated to master");
l_allocatedToMaster = true;
@@ -1854,18 +1896,25 @@ errlHndl_t restrictECunits(
else
{
// got an EC to be restricted and marked not functional
- TargetHandle_t l_pEC = *(pEC_it[i][j]);
+ TargetHandle_t l_pEC = *(pEC_it[l_proc][l_EX]);
forceEcExEqDeconfig(l_pEC, i_present, i_deconfigReason);
HWAS_DBG("pEC 0x%.8X - deconfigured! (%s) pi:%d EXi:%d",
- (*(pEC_it[i][j]))->getAttr<ATTR_HUID>(),
- (pECList[i][j].size() == 1)? "single": "paired",
- i, j);
+ (*(pEC_it[l_proc][l_EX]))->getAttr<ATTR_HUID>(),
+ (pECList[l_proc][l_EX].size() == 1)? "single": "paired",
+ l_proc, l_EX);
}
- (pEC_it[i][j])++; // next ec in this ex's list
- } // while pEC_it[i][j] != pECList[i][j].end()
- } // for i < procs
- } // for j < NUM_EX_PER_CHIP
+ (pEC_it[l_proc][l_EX])++; // next ec in this ex's list
+
+ } // while pEC_it[l_proc][l_EX] != pECList[l_proc][l_EX].end()
+
+ // Restore current EX
+ l_EX = currrentEX;
+
+ } // for l_proc < procs
+
+ } // for l_EX < NUM_EX_PER_CHIP
+
} // for procIdx < l_ProcCount
} while(0); // do {
OpenPOWER on IntegriCloud