summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C')
-rw-r--r--src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C467
1 files changed, 218 insertions, 249 deletions
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C b/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C
index cfcf67afa..2f2d88ea5 100644
--- a/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C
+++ b/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C
@@ -34,9 +34,11 @@
#include "common/plat/pegasus/prdfCalloutUtil.H"
#include "common/plat/pegasus/prdfCenDqBitmap.H"
#include "common/plat/pegasus/prdfCenMarkstore.H"
+#include "common/plat/pegasus/prdfCenMbaExtraSig.H"
#include "common/plat/pegasus/prdfCenSymbol.H"
#include "common/plat/pegasus/prdfMemoryMru.H"
#include "framework/service/prdfPlatServices.H"
+#include "plat/pegasus/prdfPlatCalloutUtil.H"
using namespace HWAS;
using namespace std;
@@ -45,111 +47,76 @@ using namespace TARGETING;
namespace PRDF
{
-static const uint8_t INVALID_SYMBOL = 0xff;
+using namespace PlatServices;
-bool validSymbol(uint8_t i_symbol)
+namespace RDR // local utility functions to support PRDF::restoreDramRepairs()
{
- return i_symbol != INVALID_SYMBOL;
-}
-void commitRestoreCallout( void (*i_func)(errlHndl_t &, void *), void * i_data,
- TargetHandle_t i_mba )
+// Creates and returns an error log.
+errlHndl_t createErrl( uint32_t i_reasonCode, TargetHandle_t i_mba,
+ uint32_t i_signature )
{
- PRDF_DENTER("commitRestoreCallout");
-
- errlHndl_t err = NULL;
-
- PRDF_HW_CREATE_ERRL(
- err,
- ERRL_SEV_PREDICTIVE,
- ERRL_ETYPE_NOT_APPLICABLE,
- SRCI_MACH_CHECK,
- SRCI_NO_ATTR,
- PRDF_RESTORE_DRAM_REPAIR,
- FSP_DEFAULT_REFCODE,
- PRDF_DETECTED_FAIL_HARDWARE_PROBABLE,
- 0, 0, 0, 0, // user data
- HWSV_SYS_NO_TERMINATE,
- false); // no pld check
-
- // add the callout
-
- (*i_func)(err, i_data);
-
- bool term = false;
-
- CenMbaCaptureData::addDramRepairsData( i_mba, err );
+ uint64_t userdata12 = PRDF_GET_UINT64_FROM_UINT32( getHuid(i_mba), 0 );
+ uint64_t userdata34 = PRDF_GET_UINT64_FROM_UINT32( i_signature, 0 );
+
+ // Note that the error log tags are not needed because PRD uses its own
+ // signature parser.
+
+ return new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_PREDICTIVE, // severity
+ PRDF_RESTORE_DRAM_REPAIR, // module ID
+ i_reasonCode, // reason code
+ userdata12, // user data 1 & 2
+ userdata34 ); // user data 3 & 4
+}
- PRDF_HW_COMMIT_ERRL(
- term,
- err,
- HWSV::HWSV_DECONFIG_DEFER,
- ERRL_ACTION_REPORT,
- HWSV_CONTINUE);
+//------------------------------------------------------------------------------
- if(term)
+// If an error log is given, will add DRAM repairs FFDC and traces to error log,
+// then commit the error log.
+void commitErrl( errlHndl_t i_errl, TargetHandle_t i_mba )
+{
+ if ( NULL != i_errl )
{
- // FIXME...this is a little goofy.
- // Should be scrubbed with RTC 51552
+ // Add capture data
+ CenMbaCaptureData::addMemEccData( i_mba, i_errl );
- PRDF_COMMIT_ERRL(err, ERRL_ACTION_REPORT);
+ // Add traces
+ i_errl->collectTrace( PRDF_COMP_NAME, 512 );
+
+ // Commit the error log
+ ERRORLOG::errlCommit( i_errl, PRDF_COMP_ID );
}
}
-void addMemMruCallout(errlHndl_t & io_log, void * i_memMru)
-{
- PRDF_DENTER("addMemMruCallout");
+//------------------------------------------------------------------------------
- if ( NULL != i_memMru )
+// If there were analysis errors, will create and commit an error log with 2nd
+// level support callout.
+void commitSoftError( uint32_t i_reasonCode, TargetHandle_t i_mba,
+ uint32_t i_signature, bool i_analysisErrors )
+{
+ if ( i_analysisErrors )
{
- MemoryMru *memMru = static_cast<MemoryMru *>(i_memMru);
-
- TargetHandleList partList = memMru->getCalloutList();
- for ( TargetHandleList::iterator it = partList.begin();
- it != partList.end(); it++ )
- {
- PRDF_HW_ADD_CALLOUT(
- *it,
- SRCI_PRIORITY_HIGH,
- HWSV::HWSV_DECONFIG,
- HWSV::HWSV_DECONFIG_GARD,
- io_log,
- false, // don't write src to vpd
- GARD_Predictive,
- ERRL_SEV_PREDICTIVE,
- false); // don't update hcdb
- }
+ errlHndl_t errl = createErrl( i_reasonCode, i_mba, i_signature );
+ errl->addProcedureCallout( EPUB_PRC_LVL_SUPP, SRCI_PRIORITY_HIGH );
+ commitErrl( errl, i_mba );
}
}
-void addDimmCallout(errlHndl_t & io_log, void * i_dimm)
-{
- PRDF_DENTER("addDimmCallout");
-
- PRDF_HW_ADD_CALLOUT(
- static_cast<TargetHandle_t>(i_dimm),
- SRCI_PRIORITY_HIGH,
- HWSV::HWSV_DECONFIG,
- HWSV::HWSV_DECONFIG_GARD,
- io_log,
- false, // don't write src to vpd
- GARD_Predictive,
- ERRL_SEV_PREDICTIVE,
- false); // don't update hcdb
-}
+//------------------------------------------------------------------------------
bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask )
{
- PRDF_DENTER("processRepairedRanks: %p, 0x%02x",
- i_mba, i_repairedRankMask);
+ #define PRDF_FUNC "[processRepairedRanks] "
- // check the argument ranks for repairs
- // that violate RAS policy
+ // The bits in i_repairedRankMask represent ranks that have repairs. Query
+ // hardware and compare against RAS policies.
- bool calloutMade = false;
+ bool o_calloutMade = false;
+ bool analysisErrors = false;
- // check each rank for repairs
- // that violate RAS policy
+ errlHndl_t errl = NULL; // Initially NULL, will create if needed.
for ( uint8_t r = 0; r < MAX_RANKS_PER_MBA; ++r )
{
@@ -161,15 +128,21 @@ bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask )
CenRank rank ( r );
CenMark mark;
- if ( SUCCESS != PlatServices::mssGetMarkStore(i_mba, rank, mark) )
+ if ( SUCCESS != mssGetMarkStore(i_mba, rank, mark) )
{
+ PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed: MBA=0x%08x rank=%d",
+ getHuid(i_mba), rank.flatten() );
+ analysisErrors = true;
continue; // skip this rank
}
CenSymbol sp0, sp1, sp;
- if ( SUCCESS != PlatServices::mssGetSteerMux(i_mba, rank, sp0, sp1, sp))
+ if ( SUCCESS != mssGetSteerMux(i_mba, rank, sp0, sp1, sp))
{
+ PRDF_ERR( PRDF_FUNC"mssGetSteerMux() failed: MBA=0x%08x rank=%d",
+ getHuid(i_mba), rank.flatten() );
+ analysisErrors = true;
continue; // skip this rank
}
@@ -179,112 +152,110 @@ bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask )
// This rank has both a steer and a chip mark. Call out the DIMM
// with the chip mark.
- MemoryMru memoryMru( i_mba, rank, mark.getCM() );
-
- commitRestoreCallout( &addMemMruCallout, &memoryMru, i_mba );
+ if ( NULL == errl )
+ {
+ errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
+ PRDFSIG_RdrRepairsUsed );
+ }
- calloutMade = true;
+ MemoryMru memoryMru( i_mba, rank, mark.getCM() );
+ CalloutUtil::calloutMemoryMru( errl, memoryMru,
+ SRCI_PRIORITY_HIGH,
+ HWAS::DELAYED_DECONFIG,
+ HWAS::GARD_Predictive );
+ o_calloutMade = true;
}
}
- PRDF_DEXIT("processRepairedRanks");
+ // Commit the error log, if needed.
+ commitErrl( errl, i_mba );
- return calloutMade;
-}
+ // Commit an additional error log indicating something failed in the
+ // analysis, if needed.
+ commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba,
+ PRDFSIG_RdrInternalFail, analysisErrors );
-bool processBadDimms(TargetHandle_t i_mba, uint8_t i_badDimmMask)
-{
- PRDF_DENTER("processBadDimms: %p, 0x%02x", i_mba, i_badDimmMask);
+ return o_calloutMade;
- const struct DimmPortAssoc
- {
- uint8_t port;
- uint8_t dimm;
- uint8_t enc;
-
- } dimmPortAssoc[] = {
-
- {0, 0, 0x8},
- {0, 1, 0x4},
- {1, 0, 0x2},
- {1, 1, 0x1},
- };
-
- uint64_t calloutCount = 0;
+ #undef PRDF_FUNC
+}
- // callout the argument dimms
+//------------------------------------------------------------------------------
- // get all the dimms connected to this MBA
+bool processBadDimms( TargetHandle_t i_mba, uint8_t i_badDimmMask )
+{
+ #define PRDF_FUNC "[processBadDimms] "
- TARGETING::TargetHandleList dimms = PlatServices::getConnected(
- i_mba, TARGETING::TYPE_DIMM);
+ // The bits in i_badDimmMask represent DIMMs that have exceeded the
+ // available repairs. Callout these DIMMs.
- // convert the encoded dimms that had too many repairs to
- // dimm targets
+ bool o_calloutMade = false;
+ bool analysisErrors = false;
- TargetHandleList::iterator dit = dimms.end();
+ errlHndl_t errl = NULL; // Initially NULL, will create if needed.
- while(dit-- != dimms.begin())
+ // Iterate the list of all DIMMs be
+ TargetHandleList dimms = getConnected( i_mba, TYPE_DIMM );
+ for ( TargetHandleList::iterator i = dimms.begin(); i < dimms.end(); i++ )
{
uint8_t port = 0, dimm = 0;
- if(SUCCESS != PlatServices::getMbaPort(*dit, port))
+ if ( SUCCESS != getMbaPort(*i, port) )
{
- // skip this dimm
- continue;
+ PRDF_ERR( PRDF_FUNC"getMbaPort() failed: DIMM=0x%08x", getHuid(*i));
+ analysisErrors = true;
+ continue; // skip this dimm
}
- if(SUCCESS != PlatServices::getMbaDimm(*dit, dimm))
+ if ( SUCCESS != getMbaDimm(*i, dimm) )
{
- // skip this dimm
- continue;
+ PRDF_ERR( PRDF_FUNC"getMbaDimm() failed: DIMM=0x%08x", getHuid(*i));
+ analysisErrors = true;
+ continue; // skip this dimm
}
- // see if the passed in dimm
- // was flagged as bad by the restore procedure
-
- bool match = false;
+ // The 4 bits of i_badDimmMask is defined as p0d0, p0d1, p1d0, and p1d1.
+ uint8_t mask = 0x8 >> (port * PORT_SLCT_PER_MBA + dimm);
- const DimmPortAssoc * it = dimmPortAssoc
- + sizeof(dimmPortAssoc)/sizeof(*dimmPortAssoc);
-
- while(!match && it-- != dimmPortAssoc)
+ if ( 0 != (i_badDimmMask & mask) )
{
- if(i_badDimmMask & it->enc
- && port == it->port
- && dimm == it->dimm)
+ if ( NULL == errl )
{
- // this dimm is a match
-
- match = true;
+ errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
+ PRDFSIG_RdrRepairUnavail );
}
- }
- // call them out
-
- if(match)
- {
- ++calloutCount;
- commitRestoreCallout( &addDimmCallout, *dit, i_mba );
+ o_calloutMade = true;
+ errl->addHwCallout( *i, SRCI_PRIORITY_HIGH, HWAS::DELAYED_DECONFIG,
+ HWAS::GARD_Predictive );
}
}
- PRDF_DEXIT("processBadDimms: bad dimm count: %d", calloutCount);
+ // Commit the error log, if needed.
+ commitErrl( errl, i_mba );
- return 0 != calloutCount;
+ // Commit an additional error log indicating something failed in the
+ // analysis, if needed.
+ commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba,
+ PRDFSIG_RdrInternalFail, analysisErrors );
+
+ return o_calloutMade;
+
+ #undef PRDF_FUNC
}
-bool processDq(TargetHandle_t i_mba)
+//------------------------------------------------------------------------------
+
+bool screenBadDqs( TargetHandle_t i_mba )
{
- using namespace TARGETING;
- using namespace PlatServices;
+ #define PRDF_FUNC "[screenBadDqs] "
- PRDF_DENTER("processDq: %p", i_mba);
+ // Callout any attached DIMMs that have any bad DQs.
- // callout any dimms on the argument MBA
- // that have any bad dq
+ bool o_calloutMade = false;
+ bool analysisErrors = false;
- uint64_t calloutCount = 0;
+ errlHndl_t errl = NULL; // Initially NULL, will create if needed.
for ( uint32_t r = 0; r < MAX_RANKS_PER_MBA; r++ )
{
@@ -293,6 +264,9 @@ bool processDq(TargetHandle_t i_mba)
if ( SUCCESS != getBadDqBitmap(i_mba, rank, bitmap, true) )
{
+ PRDF_ERR( PRDF_FUNC"getBadDqBitmap() failed: MBA=0x%08x rank=%d",
+ getHuid(i_mba), rank.flatten() );
+ analysisErrors = true;
continue; // skip this rank
}
@@ -301,175 +275,170 @@ bool processDq(TargetHandle_t i_mba)
bool badDqs = false;
if ( SUCCESS != bitmap.badDqs(p, badDqs) )
{
+ PRDF_ERR( PRDF_FUNC"badDqs() failed: MBA=0x%08x rank=%d "
+ "port=%d", getHuid(i_mba), rank.flatten(), p );
+ analysisErrors = true;
continue; // skip this DIMM
}
if ( !badDqs )
{
- continue; // skip this DIMM
+ continue; // nothing to do, skip this DIMM
}
TargetHandleList list = CalloutUtil::getConnectedDimms( i_mba,
rank, p );
if ( 0 == list.size() )
{
- PRDF_ERR( "[processDq] bad bits present but no connected "
- "DIMM: MBA=0x%08x rank=%d port=%d", getHuid(i_mba),
+ PRDF_ERR( PRDF_FUNC"bad bits present but no connected DIMM: "
+ "MBA=0x%08x rank=%d port=%d", getHuid(i_mba),
rank.flatten(), p );
- continue;
+ analysisErrors = true;
+ continue; // skip this DIMM
}
for ( TargetHandleList::iterator i = list.begin();
i < list.end(); i++ )
{
- ++calloutCount;
- commitRestoreCallout( &addDimmCallout, *i, i_mba );
+ if ( NULL == errl )
+ {
+ errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
+ PRDFSIG_RdrScreenBadDqs );
+ }
+
+ o_calloutMade = true;
+ errl->addHwCallout( *i, SRCI_PRIORITY_HIGH,
+ HWAS::DELAYED_DECONFIG,
+ HWAS::GARD_Predictive );
}
}
}
- PRDF_DEXIT("processDq: bad dq dimm count: %d", calloutCount);
+ // Commit the error log, if needed.
+ commitErrl( errl, i_mba );
+
+ // Commit an additional error log indicating something failed in the
+ // analysis, if needed.
+ commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba,
+ PRDFSIG_RdrInternalFail, analysisErrors );
+
+ return o_calloutMade;
- return 0 != calloutCount;
+ #undef PRDF_FUNC
}
-void deployDramSpares(TargetHandle_t i_mba)
-{
- using namespace fapi;
+//------------------------------------------------------------------------------
- bool x4 = PlatServices::isDramWidthX4(i_mba);
+void deployDramSpares( TargetHandle_t i_mba )
+{
+ bool x4 = isDramWidthX4(i_mba);
for ( uint32_t r = 0; r < MAX_RANKS_PER_MBA; r++ )
{
CenRank rank ( r );
- CenSymbol symbol = CenSymbol::fromSymbol( i_mba, rank, 0 );
- // ignore errors from putSteerMux
+ // Doesn't matter which DRAM is spared as long as they are all spared.
+ // Also, make sure the ECC spare is on a different DRAM than the spare
+ // DRAM.
+ CenSymbol symPort0 = CenSymbol::fromDimmDq( i_mba, rank, 0, 0 );
+ CenSymbol symPort1 = CenSymbol::fromDimmDq( i_mba, rank, 0, 1 );
+ CenSymbol symEccSp = CenSymbol::fromDimmDq( i_mba, rank, 8, 0 );
+
+ int32_t l_rc = SUCCESS;
- static_cast<void>(
- PlatServices::mssSetSteerMux(i_mba, rank, symbol, false) );
+ l_rc = mssSetSteerMux( i_mba, rank, symPort0, false );
+ l_rc |= mssSetSteerMux( i_mba, rank, symPort1, false );
- if( x4 )
+ if ( x4 )
+ l_rc |= mssSetSteerMux( i_mba, rank, symEccSp, true );
+
+ if ( SUCCESS != l_rc )
{
- static_cast<void>(
- PlatServices::mssSetSteerMux(i_mba, rank, symbol, true) );
+ // mssSetSteerMux() will print a trace and commit the error log,
+ // however, we need to handle the return code or we get a compile
+ // warning in Hostboot.
+ continue;
}
}
}
+} // end namespace RDR
+
//------------------------------------------------------------------------------
// External functions - declared in prdfMain.H
//------------------------------------------------------------------------------
int32_t restoreDramRepairs( TargetHandle_t i_mba )
{
- PRDF_ENTER( "restoreDramRepairs(0x%08x)", PlatServices::getHuid(i_mba) );
-
- bool calloutMade = false;
+ #define PRDF_FUNC "PRDF::restoreDramRepairs"
- uint8_t repairedRankMask = 0, badDimmMask = 0;
+ PRDF_ENTER( PRDF_FUNC"(0x%08x)", getHuid(i_mba) );
- do {
+ bool calloutMade = false;
- if(PlatServices::isMemoryPreservingIpl())
+ do
+ {
+ if ( isMemoryPreservingIpl() )
{
- // nothing to do in MPIPL
-
+ // Power is preserved on a Centaur for a MPIPL. So the marks and
+ // spares will not need to be restored.
break;
}
- bool spareDramDeploy = PlatServices::mnfgSpareDramDeploy();
+ bool spareDramDeploy = mnfgSpareDramDeploy();
- if(spareDramDeploy)
+ if ( spareDramDeploy )
{
- deployDramSpares(i_mba);
+ // Deploy all spares for MNFG corner tests.
+ RDR::deployDramSpares(i_mba);
}
- // in mfg mode, check dq and don't restore anything
-
- if(PlatServices::areDramRepairsDisabled())
+ if ( areDramRepairsDisabled() )
{
- if(processDq(i_mba))
- {
- calloutMade = true;
- }
+ // DRAM Repairs are disabled in MNFG mode, so screen all DIMMs with
+ // VPD information.
+ if ( RDR::screenBadDqs(i_mba) ) calloutMade = true;
+ // No need to continue because there will not be anything to
+ // restore.
break;
}
- if(spareDramDeploy)
+ if ( spareDramDeploy )
{
- // this is an error...the spare dram
- // deploy bit was set but we weren't
- // in mfg mode...log an error for MFG
-
- errlHndl_t err = NULL;
-
- PRDF_ERR( "[restoreDramRepairs] "
- "The specified combination of mfg policy flags is invalid");
-
- /*@
- * @errortype
- * @reasoncode PRDF_INVALID_CONFIG
- * @subsys EPUB_FIRMWARE_SUBSYS
- * @moduleid PRDF_RESTORE_DRAM_REPAIR
- * @devdesc The specified combination of policy flags is invalid.
- */
- PRDF_CREATE_ERRL(
- err,
- ERRL_SEV_PREDICTIVE,
- ERRL_ETYPE_NOT_APPLICABLE,
- SRCI_MACH_CHECK,
- SRCI_NO_ATTR,
- PRDF_RESTORE_DRAM_REPAIR,
- FSP_DEFAULT_REFCODE,
- PRDF_INVALID_CONFIG,
- 0, 0, 0, 0);
- PRDF_COMMIT_ERRL(err, ERRL_ACTION_REPORT);
-
- // assume mfg mode (no repairs) ...
+ // This is an error. The MNFG spare DRAM deply bit is set, but DRAM
+ // Repairs have not been disabled.
- break;
- }
-
- if(SUCCESS != PlatServices::mssRestoreDramRepairs(
- i_mba,
- repairedRankMask,
- badDimmMask))
- {
- // can't check anything if
- // this doesn't work
+ PRDF_ERR( "["PRDF_FUNC"] MNFG spare deploy enabled, but DRAM "
+ "repairs are not disabled" );
- PRDF_ERR( "[restoreDramRepairs] "
- "PlatServices::mssRestoreDramRepairs failed" );
+ RDR::commitSoftError( PRDF_INVALID_CONFIG, i_mba,
+ PRDFSIG_RdrInvalidConfig, true );
- break;
+ break; // Assume user meant to disable DRAM repairs.
}
- // callout bad dimms
-
- if(processBadDimms(
- i_mba,
- badDimmMask))
+ uint8_t rankMask = 0, dimmMask = 0;
+ if ( SUCCESS != mssRestoreDramRepairs(i_mba, rankMask, dimmMask) )
{
- calloutMade = true;
+ // Can't check anything if this doesn't work.
+ PRDF_ERR( "["PRDF_FUNC"] mssRestoreDramRepairs() failed" );
+ break;
}
- // check repaired ranks for
- // RAS policy violations
+ // Callout DIMMs with too many bad bits and not enough repairs available
+ if ( RDR::processBadDimms(i_mba, dimmMask) ) calloutMade = true;
- if(processRepairedRanks(
- i_mba,
- repairedRankMask))
- {
- calloutMade = true;
- }
+ // Check repaired ranks for RAS policy violations.
+ if ( RDR::processRepairedRanks(i_mba, rankMask) ) calloutMade = true;
} while(0);
- PRDF_EXIT( "restoreDramRepairs(0x%08x)", PlatServices::getHuid(i_mba) );
+ PRDF_EXIT( PRDF_FUNC"(0x%08x)", getHuid(i_mba) );
return calloutMade ? FAIL : SUCCESS;
+
+ #undef PRDF_FUNC
}
} // end namespace PRDF
OpenPOWER on IntegriCloud