summaryrefslogtreecommitdiffstats
path: root/src/usr/pnor/pnorrp.C
diff options
context:
space:
mode:
authorDean Sanner <dsanner@us.ibm.com>2017-08-14 10:02:43 -0500
committerDaniel M. Crowell <dcrowell@us.ibm.com>2017-09-14 22:29:41 -0400
commit9acfce99596f12dcc60952f8506a77e542609cbf (patch)
treec0053f3d4c74e412f598c7d704da02c4c83bc0de /src/usr/pnor/pnorrp.C
parent16887e07aa54b19b64f8c754d41b6076fe72464f (diff)
downloadtalos-hostboot-9acfce99596f12dcc60952f8506a77e542609cbf.tar.gz
talos-hostboot-9acfce99596f12dcc60952f8506a77e542609cbf.zip
Clear ECC sections marked "clearOnEccErr" on error
- Add the capability for Hostboot to recover (with reboot) when it consumes an ECC error - PNOR layout needs to be updated to flag the recoverable sections (generally cached or throw away data like *VPD HBEL, and GUARD partitions) - Upon bad ECC detection, Hostboot will check partition flag and if set, it will clear and write good ECC to PNOR. It will then throw the normal error and terminate, waiting for the BMC to issue a reboot Change-Id: Ie4f4c0637d3962e9d4871e84a0bda8c256a74440 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44608 Reviewed-by: Stephen M. Cprek <smcprek@us.ibm.com> Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Reviewed-by: Nicholas E. Bofferding <bofferdn@us.ibm.com> Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/usr/pnor/pnorrp.C')
-rw-r--r--src/usr/pnor/pnorrp.C45
1 files changed, 45 insertions, 0 deletions
diff --git a/src/usr/pnor/pnorrp.C b/src/usr/pnor/pnorrp.C
index d138d5e39..162881830 100644
--- a/src/usr/pnor/pnorrp.C
+++ b/src/usr/pnor/pnorrp.C
@@ -649,6 +649,8 @@ errlHndl_t PnorRP::getSectionInfo( PNOR::SectionId i_section,
!= 0) ? true : false;
o_info.Volatile = ((iv_TOC[id].misc & FFS_MISC_VOLATILE)
!= 0) ? true : false;
+ o_info.clearOnEccErr = ((iv_TOC[id].misc & FFS_MISC_CLR_ECC_ERR)
+ != 0) ? true : false;
}
} while(0);
@@ -1360,9 +1362,22 @@ errlHndl_t PnorRP::readFromDevice( uint64_t i_offset,
// create an error if we couldn't correct things
if( ecc_stat == PNOR::ECC::UNCORRECTABLE )
{
+ PNOR::SectionId l_id = computeSectionPhys(i_offset);
TRACFCOMP( g_trac_pnor, "PnorRP::readFromDevice> Uncorrectable ECC error : chip=%d,offset=0x%.X", i_chip, i_offset );
CONSOLE::displayf( NULL, "ECC error in PNOR flash in section offset 0x%.8X\n", i_offset );
+ //Attempt to find the section and check if we can clear
+ //it to recover
+ if ((l_id != PNOR::INVALID_SECTION )
+ && ((iv_TOC[l_id].misc & FFS_MISC_CLR_ECC_ERR) != 0))
+ {
+ CONSOLE::displayf( nullptr, "Clearing section %s due to ECC error\n",
+ SectionIdToString(l_id));
+ clearSection(l_id); //shutting down -- ignore and leak errl
+
+ CONSOLE::displayf( nullptr, "Done\n");
+ }
+
// Need to shutdown here instead of creating an error log
// because the bad page could be critical to the regular
// error handling path and cause an infinite loop.
@@ -1589,6 +1604,36 @@ errlHndl_t PnorRP::computeSection( uint64_t i_vaddr,
return errhdl;
}
+/**
+ * @brief Figure out which section a PA belongs to
+ */
+PNOR::SectionId PnorRP::computeSectionPhys( uint64_t i_offset)
+{
+ PNOR::SectionId o_id = PNOR::INVALID_SECTION;
+
+ // loop through all sections to find a matching id
+ for( PNOR::SectionId id = PNOR::FIRST_SECTION;
+ id < PNOR::NUM_SECTIONS;
+ id = static_cast<PNOR::SectionId>(id + 1) )
+ {
+ //Need to take ECC into account for the size
+ uint32_t l_size = iv_TOC[id].size;
+ if ((iv_TOC[id].integrity & FFS_INTEG_ECC_PROTECT) != 0) //ECC
+ {
+ l_size = (l_size / 8) * 9;
+ }
+
+ if( (i_offset >= iv_TOC[id].flashAddr)
+ && (i_offset < (iv_TOC[id].flashAddr + l_size)) )
+ {
+ o_id = iv_TOC[id].id;
+ break;
+ }
+ }
+
+ return o_id;
+}
+
errlHndl_t PnorRP::clearSection(PNOR::SectionId i_section)
{
TRACDCOMP(g_trac_pnor, "PnorRP::clearSection Section id = %d", i_section);
OpenPOWER on IntegriCloud