summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C298
-rw-r--r--src/usr/diag/prdf/plat/prdfPlatServices_ipl.C4
2 files changed, 129 insertions, 173 deletions
diff --git a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
index 3ad9878bc..c32dd52a7 100644
--- a/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
+++ b/src/usr/diag/prdf/plat/mem/prdfRestoreDramRepairs.C
@@ -282,7 +282,6 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt,
bool o_calloutMade = false;
- /* TODO RTC 178743
bool analysisErrors = false;
errlHndl_t errl = NULL; // Initially NULL, will create if needed.
@@ -297,19 +296,30 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt,
continue; // this rank didn't have any repairs
}
- CenRank rank ( r );
- CenMark mark;
+ MemRank rank ( r );
+ MemMark chipMark, symMark;
- if ( SUCCESS != mssGetMarkStore(i_trgt, rank, mark) )
+ ExtensibleChip * chip = (ExtensibleChip *)systemPtr->GetChip( i_trgt );
+
+ if (SUCCESS != MarkStore::readChipMark<TYPE_MBA>(chip, rank, chipMark))
{
- PRDF_ERR( PRDF_FUNC "mssGetMarkStore() failed: MBA=0x%08x "
- "rank=%d", getHuid(i_trgt), rank.getMaster() );
+ PRDF_ERR( PRDF_FUNC "readChipMark() failed: MBA=0x%08x "
+ "rank=0x%02x", getHuid(i_trgt), rank.getKey() );
+ analysisErrors = true;
+ continue; // skip this rank
+ }
+
+ if (SUCCESS != MarkStore::readSymbolMark<TYPE_MBA>(chip, rank, symMark))
+ {
+ PRDF_ERR( PRDF_FUNC "readSymbolMark() failed: MBA=0x%08x "
+ "rank=0x%02x", getHuid(i_trgt), rank.getKey() );
analysisErrors = true;
continue; // skip this rank
}
- CenSymbol sp0, sp1, ecc;
+ MemSymbol sp0, sp1, ecc;
+ /* TODO RTC 189221 DRAM sparing
if ( SUCCESS != mssGetSteerMux(i_trgt, rank, sp0, sp1, ecc) )
{
PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed: MBA=0x%08x "
@@ -317,9 +327,10 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt,
analysisErrors = true;
continue; // skip this rank
}
+ */
- bool isCm = mark.getCM().isValid(); // chip mark
- bool isSm = mark.getSM().isValid(); // symbol mark
+ bool isCm = chipMark.isValid(); // chip mark
+ bool isSm = symMark.isValid(); // symbol mark
bool isSp = (sp0.isValid() || sp1.isValid()); // either DRAM spare
bool isEcc = ecc.isValid(); // ECC spare
@@ -332,33 +343,41 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt,
if ( NULL == errl )
{
errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE,
- i_trgt,
- PRDFSIG_RdrRepairsUsed );
+ i_trgt, PRDFSIG_RdrRepairsUsed );
}
- std::vector<CenSymbol> list;
- list.push_back( mark.getCM() );
- list.push_back( mark.getSM() );
- list.push_back( sp0 );
- list.push_back( sp1 );
- list.push_back( ecc );
-
- for ( std::vector<CenSymbol>::iterator it = list.begin();
- it != list.end(); it++ )
+ // Keep a list of DIMMs to callout. Note that we are using a map
+ // with the DIMM target as the key so that we can maintain a
+ // unique list. The map value has no significance.
+ std::map<TargetHandle_t, uint32_t> calloutList;
+ std::vector<MemSymbol> symList;
+ symList.push_back( chipMark.getSymbol() );
+ symList.push_back( symMark.getSymbol() );
+ symList.push_back( sp0 );
+ symList.push_back( sp1 );
+ symList.push_back( ecc );
+
+ for ( auto & sym : symList )
{
- if ( !it->isValid() ) continue;
+ if ( !sym.isValid() ) continue;
+
+ MemoryMru mm( i_trgt, rank, sym );
// Add all parts to the error log.
- TargetHandleList partList = i_memmru.getCalloutList();
- for ( auto &part : partList )
+ for ( auto & dimm : mm.getCalloutList() )
{
- errl->addHwCallout( part, MRU_HIGH,
- HWAS::DELAYED_DECONFIG,
- HWAS::GARD_Predictive );
+ calloutList[dimm] = 1;
}
// Add the MemoryMru to the capture data.
- MemCaptureData::addExtMemMruData( i_memmru, errl );
+ MemCaptureData::addExtMemMruData( mm, errl );
+ }
+
+ // Callout all DIMMs in the map.
+ for ( auto const & dimm : calloutList )
+ {
+ __calloutDimm<TYPE_MBA, DIMMS_PER_RANK::MBA>( errl, i_trgt,
+ dimm.first );
}
o_calloutMade = true;
@@ -366,13 +385,12 @@ bool processRepairedRanks<TYPE_MBA>( TargetHandle_t i_trgt,
}
// Commit the error log, if needed.
- commitErrl( errl, i_trgt );
+ commitErrl<TYPE_MBA>( errl, i_trgt );
// Commit an additional error log indicating something failed in the
// analysis, if needed.
- commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
- PRDFSIG_RdrInternalFail, analysisErrors );
- */
+ commitSoftError<TYPE_MBA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
+ PRDFSIG_RdrInternalFail, analysisErrors );
return o_calloutMade;
@@ -446,57 +464,41 @@ bool processBadDimms<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask )
// available repairs. Callout these DIMMs.
bool o_calloutMade = false;
-
- /* TODO RTC 178743
bool analysisErrors = false;
- errlHndl_t errl = NULL; // Initially NULL, will create if needed.
+ errlHndl_t errl = nullptr; // Initially NULL, will create if needed.
// Iterate the list of all DIMMs be
- TargetHandleList dimms = getConnected( i_mba, TYPE_DIMM );
- for ( TargetHandleList::iterator i = dimms.begin(); i < dimms.end(); i++ )
+ TargetHandleList dimms = getConnected( i_trgt, TYPE_DIMM );
+ for ( auto & dimm : dimms )
{
- uint8_t port = 0, dimm = 0;
-
- if ( SUCCESS != getMbaPort(*i, port) )
- {
- PRDF_ERR( PRDF_FUNC "getMbaPort() failed: DIMM=0x%08x", getHuid(*i));
- analysisErrors = true;
- continue; // skip this dimm
- }
-
- if ( SUCCESS != getMbaDimm(*i, dimm) )
- {
- PRDF_ERR( PRDF_FUNC "getMbaDimm() failed: DIMM=0x%08x", getHuid(*i));
- analysisErrors = true;
- continue; // skip this dimm
- }
+ uint8_t portSlct = getDimmPort<TYPE_MBA>( dimm );
+ uint8_t dimmSlct = getDimmSlct<TYPE_MBA>( dimm );
// The 4 bits of i_badDimmMask is defined as p0d0, p0d1, p1d0, and p1d1.
- uint8_t mask = 0x8 >> (port * MBA_DIMMS_PER_RANK + dimm);
+ uint8_t mask = 0x8 >> (portSlct * MBA_DIMMS_PER_RANK + dimmSlct);
if ( 0 != (i_badDimmMask & mask) )
{
if ( NULL == errl )
{
- errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
- PRDFSIG_RdrRepairUnavail );
+ errl = createErrl<TYPE_MBA>( PRDF_DETECTED_FAIL_HARDWARE,
+ i_trgt, PRDFSIG_RdrRepairUnavail );
}
+ __calloutDimm<TYPE_MBA, DIMMS_PER_RANK::MBA>( errl, i_trgt, dimm );
+
o_calloutMade = true;
- errl->addHwCallout( *i, MRU_HIGH, HWAS::DELAYED_DECONFIG,
- HWAS::GARD_Predictive );
}
}
// Commit the error log, if needed.
- commitErrl( errl, i_mba );
+ commitErrl<TYPE_MBA>( errl, i_trgt );
// Commit an additional error log indicating something failed in the
// analysis, if needed.
- commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba,
- PRDFSIG_RdrInternalFail, analysisErrors );
- */
+ commitSoftError<TYPE_MBA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
+ PRDFSIG_RdrInternalFail, analysisErrors );
return o_calloutMade;
@@ -505,7 +507,26 @@ bool processBadDimms<TYPE_MBA>( TargetHandle_t i_trgt, uint8_t i_badDimmMask )
//------------------------------------------------------------------------------
-template<DIMMS_PER_RANK T>
+template<TARGETING::TYPE>
+int32_t __readBadDqBitmap( TargetHandle_t i_trgt, MemRank i_rank );
+
+template<>
+int32_t __readBadDqBitmap<TYPE_MCA>( TargetHandle_t i_trgt, MemRank i_rank )
+{
+ MemDqBitmap<DIMMS_PER_RANK::MCA> bitmap;
+ return getBadDqBitmap<DIMMS_PER_RANK::MCA>( i_trgt, i_rank, bitmap );
+}
+
+template<>
+int32_t __readBadDqBitmap<TYPE_MBA>( TargetHandle_t i_trgt, MemRank i_rank )
+{
+ MemDqBitmap<DIMMS_PER_RANK::MBA> bitmap;
+ return getBadDqBitmap<DIMMS_PER_RANK::MBA>( i_trgt, i_rank, bitmap );
+}
+
+//------------------------------------------------------------------------------
+
+template<TARGETING::TYPE T>
bool screenBadDqs( TargetHandle_t i_trgt, const std::vector<MemRank> & i_ranks )
{
#define PRDF_FUNC "[screenBadDqs<T>] "
@@ -522,10 +543,9 @@ bool screenBadDqs( TargetHandle_t i_trgt, const std::vector<MemRank> & i_ranks )
// flag is set. PRD will simply need to iterate through all the ranks
// to ensure all DIMMs are screen and the procedure will do the rest.
- MemDqBitmap<T> bitmap;
- if ( SUCCESS != getBadDqBitmap<T>(i_trgt, rank, bitmap) )
+ if ( SUCCESS != __readBadDqBitmap<T>(i_trgt, rank) )
{
- PRDF_ERR( PRDF_FUNC "getBadDqBitmap() failed: TRGT=0x%08x "
+ PRDF_ERR( PRDF_FUNC "__readBadDqBitmap() failed: TRGT=0x%08x "
"rank=0x%02x", getHuid(i_trgt), rank.getKey() );
analysisErrors = true;
continue; // skip this rank
@@ -534,16 +554,8 @@ bool screenBadDqs( TargetHandle_t i_trgt, const std::vector<MemRank> & i_ranks )
// Commit an additional error log indicating something failed in the
// analysis, if needed.
- if ( DIMMS_PER_RANK::MBA == T )
- {
- commitSoftError<TYPE_MBA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
- PRDFSIG_RdrInternalFail, analysisErrors );
- }
- else
- {
- commitSoftError<TYPE_MCA>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
- PRDFSIG_RdrInternalFail, analysisErrors );
- }
+ commitSoftError<T>( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
+ PRDFSIG_RdrInternalFail, analysisErrors );
return o_calloutMade;
@@ -552,33 +564,41 @@ bool screenBadDqs( TargetHandle_t i_trgt, const std::vector<MemRank> & i_ranks )
//------------------------------------------------------------------------------
-void deployDramSpares( TargetHandle_t i_mba,
- const std::vector<CenRank> & i_ranks )
+template<TARGETING::TYPE>
+void deployDramSpares( TargetHandle_t i_trgt,
+ const std::vector<MemRank> & i_ranks );
+
+template<>
+void deployDramSpares<TYPE_MCA>( TargetHandle_t i_trgt,
+ const std::vector<MemRank> & i_ranks ){}
+template<>
+void deployDramSpares<TYPE_MBA>( TargetHandle_t i_trgt,
+ const std::vector<MemRank> & i_ranks )
{
- /* TODO RTC 178743
- bool x4 = isDramWidthX4(i_mba);
+ PRDF_TRAC( "deployDramSpares: Function not implemented yet" );
+ /* TODO RTC 189221
+ bool x4 = isDramWidthX4( i_trgt );
bool cenDimm = isMembufOnDimm<TYPE_MBA>( i_mba );
- for ( std::vector<CenRank>::const_iterator rank = i_ranks.begin();
- rank != i_ranks.end(); rank++ )
+ for ( auto & rank : i_ranks )
{
// Doesn't matter which DRAM is spared as long as they are all spared.
// Also, make sure the ECC spare is on a different DRAM than the spare
// DRAM.
- CenSymbol symPort0 = CenSymbol::fromSymbol( i_mba, *rank, 71 );
- CenSymbol symPort1 = CenSymbol::fromSymbol( i_mba, *rank, 53 );
- CenSymbol symEccSp = CenSymbol::fromSymbol( i_mba, *rank, 67 );
+ MemSymbol symPort0 = MemSymbol::fromSymbol( i_trgt, rank, 71 );
+ MemSymbol symPort1 = MemSymbol::fromSymbol( i_trgt, rank, 53 );
+ MemSymbol symEccSp = MemSymbol::fromSymbol( i_trgt, rank, 67 );
int32_t l_rc = SUCCESS;
if ( cenDimm )
{
- l_rc |= mssSetSteerMux( i_mba, *rank, symPort0, false );
- l_rc |= mssSetSteerMux( i_mba, *rank, symPort1, false );
+ l_rc |= mssSetSteerMux( i_trgt, rank, symPort0, false );
+ l_rc |= mssSetSteerMux( i_trgt, rank, symPort1, false );
}
if ( x4 )
- l_rc |= mssSetSteerMux( i_mba, *rank, symEccSp, true );
+ l_rc |= mssSetSteerMux( i_trgt, rank, symEccSp, true );
if ( SUCCESS != l_rc )
{
@@ -587,8 +607,7 @@ void deployDramSpares( TargetHandle_t i_mba,
// warning in Hostboot.
continue;
}
- }
- */
+ }*/
}
} // end namespace RDR
@@ -598,12 +617,9 @@ void deployDramSpares( TargetHandle_t i_mba,
//------------------------------------------------------------------------------
template<TARGETING::TYPE T>
-uint32_t restoreDramRepairs( TargetHandle_t i_trgt );
-
-template<>
-uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt )
+uint32_t restoreDramRepairs( TargetHandle_t i_trgt )
{
- #define PRDF_FUNC "PRDF::restoreDramRepairs<TYPE_MCA>"
+ #define PRDF_FUNC "PRDF::restoreDramRepairs<T>"
PRDF_ENTER( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) );
@@ -622,94 +638,28 @@ uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt )
if ( nullptr != errl )
{
PRDF_ERR( PRDF_FUNC "Failed to initialize PRD" );
- RDR::commitErrl<TYPE_MCA>( errl, i_trgt );
+ RDR::commitErrl<T>( errl, i_trgt );
break;
}
}
std::vector<MemRank> ranks;
- getMasterRanks<TYPE_MCA>( i_trgt, ranks );
-
- if ( areDramRepairsDisabled() )
- {
- // DRAM Repairs are disabled in MNFG mode, so screen all DIMMs with
- // VPD information.
- if ( RDR::screenBadDqs<DIMMS_PER_RANK::MCA>(i_trgt, ranks) )
- calloutMade = true;
-
- // No need to continue because there will not be anything to
- // restore.
- break;
- }
-
- uint8_t rankMask = 0, dimmMask = 0;
- if ( SUCCESS != mssRestoreDramRepairs<TYPE_MCA>(i_trgt, rankMask,
- dimmMask) )
- {
- // Can't check anything if this doesn't work.
- PRDF_ERR( "[" PRDF_FUNC "] mssRestoreDramRepairs() failed" );
- break;
- }
-
- // Callout DIMMs with too many bad bits and not enough repairs available
- if ( RDR::processBadDimms<TYPE_MCA>(i_trgt, dimmMask) )
- calloutMade = true;
-
- // Check repaired ranks for RAS policy violations.
- if ( RDR::processRepairedRanks<TYPE_MCA>(i_trgt, rankMask) )
- calloutMade = true;
-
- } while(0);
-
- PRDF_EXIT( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) );
-
- return calloutMade ? FAIL : SUCCESS;
-
- #undef PRDF_FUNC
-}
-
-//------------------------------------------------------------------------------
-
-template<>
-uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt )
-{
- #define PRDF_FUNC "PRDF::restoreDramRepairs<TYPE_MBA>"
-
- PRDF_ENTER( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) );
-
- // will unlock when going out of scope
- PRDF_SYSTEM_SCOPELOCK;
-
- bool calloutMade = false;
-
- /* TODO RTC 178743
- do
- {
- std::vector<CenRank> ranks;
- int32_t l_rc = getMasterRanks( i_trgt, ranks );
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( "[" PRDF_FUNC "] getMasterRanks() failed" );
-
- RDR::commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_trgt,
- PRDFSIG_RdrInternalFail, true );
-
- break; // Assume user meant to disable DRAM repairs.
- }
+ getMasterRanks<T>( i_trgt, ranks );
bool spareDramDeploy = mnfgSpareDramDeploy();
if ( spareDramDeploy )
{
// Deploy all spares for MNFG corner tests.
- RDR::deployDramSpares( i_trgt, ranks );
+ RDR::deployDramSpares<T>( i_trgt, ranks );
}
if ( areDramRepairsDisabled() )
{
// DRAM Repairs are disabled in MNFG mode, so screen all DIMMs with
// VPD information.
- if ( RDR::screenBadDqs(i_trgt, ranks) ) calloutMade = true;
+ if ( RDR::screenBadDqs<T>(i_trgt, ranks) )
+ calloutMade = true;
// No need to continue because there will not be anything to
// restore.
@@ -724,14 +674,15 @@ uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt )
PRDF_ERR( "[" PRDF_FUNC "] MNFG spare deploy enabled, but DRAM "
"repairs are not disabled" );
- RDR::commitSoftError( PRDF_INVALID_CONFIG, i_trgt,
- PRDFSIG_RdrInvalidConfig, true );
+ RDR::commitSoftError<T>( PRDF_INVALID_CONFIG, i_trgt,
+ PRDFSIG_RdrInvalidConfig, true );
break; // Assume user meant to disable DRAM repairs.
}
uint8_t rankMask = 0, dimmMask = 0;
- if ( SUCCESS != mssRestoreDramRepairs(i_trgt, rankMask, dimmMask) )
+ if ( SUCCESS != mssRestoreDramRepairs<T>( i_trgt, rankMask,
+ dimmMask) )
{
// Can't check anything if this doesn't work.
PRDF_ERR( "[" PRDF_FUNC "] mssRestoreDramRepairs() failed" );
@@ -739,13 +690,14 @@ uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt )
}
// Callout DIMMs with too many bad bits and not enough repairs available
- if ( RDR::processBadDimms(i_trgt, dimmMask) ) calloutMade = true;
+ if ( RDR::processBadDimms<T>(i_trgt, dimmMask) )
+ calloutMade = true;
// Check repaired ranks for RAS policy violations.
- if ( RDR::processRepairedRanks(i_trgt, rankMask) ) calloutMade = true;
+ if ( RDR::processRepairedRanks<T>(i_trgt, rankMask) )
+ calloutMade = true;
} while(0);
- */
PRDF_EXIT( PRDF_FUNC "(0x%08x)", getHuid(i_trgt) );
@@ -754,6 +706,12 @@ uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt )
#undef PRDF_FUNC
}
+template
+uint32_t restoreDramRepairs<TYPE_MCA>( TargetHandle_t i_trgt );
+template
+uint32_t restoreDramRepairs<TYPE_MBA>( TargetHandle_t i_trgt );
+
+//------------------------------------------------------------------------------
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C
index 3982e17f5..d3c0729a9 100644
--- a/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C
+++ b/src/usr/diag/prdf/plat/prdfPlatServices_ipl.C
@@ -188,11 +188,10 @@ uint32_t mssRestoreDramRepairs<TYPE_MBA>( TargetHandle_t i_target,
{
uint32_t o_rc = SUCCESS;
- /* TODO RTC 178743
errlHndl_t errl = NULL;
FAPI_INVOKE_HWP( errl, mss_restore_DRAM_repairs,
- fapi::Target(fapi::TARGET_TYPE_MCA_CHIPLET, i_target),
+ fapi2::Target<fapi2::TARGET_TYPE_MBA>( i_target ),
o_repairedRankMask, o_badDimmMask );
if ( NULL != errl )
@@ -203,7 +202,6 @@ uint32_t mssRestoreDramRepairs<TYPE_MBA>( TargetHandle_t i_target,
PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
o_rc = FAIL;
}
- */
return o_rc;
}
OpenPOWER on IntegriCloud