diff options
author | Caleb Palmer <cnpalmer@us.ibm.com> | 2017-09-08 14:35:19 -0500 |
---|---|---|
committer | Zane C. Shelley <zshelle@us.ibm.com> | 2017-09-29 12:09:51 -0400 |
commit | 8f0545457ea9e680e13bcf8a68b5ab993b3883fb (patch) | |
tree | 0de2b22419a51a8ede68f4beadaec2de726536e9 /src/usr | |
parent | 9d06cbbf69bfecbbcbc4e796bf8c236b024a7065 (diff) | |
download | talos-hostboot-8f0545457ea9e680e13bcf8a68b5ab993b3883fb.tar.gz talos-hostboot-8f0545457ea9e680e13bcf8a68b5ab993b3883fb.zip |
PRD: Set Signatures during Runtime TPS
Change-Id: I8f25b42e8e940d07a6c8be8a97e2458bcbfc4943
CQ: SW401831
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/45976
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Brian J. Stegmiller <bjs@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46807
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Diffstat (limited to 'src/usr')
-rw-r--r-- | src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H | 9 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C | 48 |
2 files changed, 55 insertions, 2 deletions
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H b/src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H index 123688bb1..9ea48f0f2 100644 --- a/src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H +++ b/src/usr/diag/prdf/common/plat/mem/prdfP9McaExtraSig.H @@ -49,6 +49,15 @@ PRDR_ERROR_SIGNATURE(VcmFalseAlarm, 0xffff0021, "", "VCM: false alarm"); PRDR_ERROR_SIGNATURE(VcmFalseAlarmTH, 0xffff0022, "", "VCM: false alarm threshold"); PRDR_ERROR_SIGNATURE(AllDramRepairs, 0xffff002F, "", "all DRAM repairs used"); +PRDR_ERROR_SIGNATURE(TpsFalseAlarm, 0xffff0061, "", "TPS: false alarm"); +PRDR_ERROR_SIGNATURE(TpsFalseAlarmTH, 0xffff0062, "", "TPS: false alarm threshold"); +PRDR_ERROR_SIGNATURE(TpsSymbolMark, 0xffff0063, "", "TPS: symbol mark placed"); +PRDR_ERROR_SIGNATURE(TpsChipMark, 0xffff0064, "", "TPS: chip mark placed"); +PRDR_ERROR_SIGNATURE(TpsSymUeRisk, 0xffff0065, "", "TPS: placing symbol mark risks UE"); +PRDR_ERROR_SIGNATURE(TpsChipUeRisk, 0xffff0066, "", "TPS: placing chip mark risks UE"); +PRDR_ERROR_SIGNATURE(TpsPotentialUe, 0xffff0067, "", "TPS: potential UE"); +PRDR_ERROR_SIGNATURE(TpsDramDisabled, 0xffff0068, "", "TPS: DRAM repairs disabled"); + PRDR_ERROR_SIGNATURE(MnfgIplHardCE, 0xffff0051, "", "MNFG IPL hard CE"); PRDR_ERROR_SIGNATURE(MnfgIplDramCTE, 0xffff0052, "", "MNFG IPL DRAM CTE"); PRDR_ERROR_SIGNATURE(MnfgIplRankCTE, 0xffff0053, "", "MNFG IPL rank CTE"); diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C index a98cfd961..fa9c0f090 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTps_rt.C @@ -531,6 +531,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, break; } + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsSymbolMark ); + // Update VPD with the symbol mark. o_rc = dqBitmap.setSymbol( i_badDqCount.symList[0].symbol ); if ( SUCCESS != o_rc ) @@ -551,6 +554,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, "_RANK::MCA>() failed." ); } + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsSymUeRisk ); + // Make the error log predictive. io_sc.service_data->setServiceCall(); @@ -599,6 +605,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, break; } + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsSymbolMark ); + // Update VPD with both symbols. for ( auto sym : i_badDqCount.symList ) { @@ -623,6 +632,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, "_RANK::MCA>() failed." ); } + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsSymUeRisk ); + // Make the error log predictive. io_sc.service_data->setServiceCall(); } @@ -669,11 +681,13 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, newChipMark ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "writeSymbolMark(0x%08x,0x%02x) " + PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) " "failed", iv_chip->getHuid(), getKey() ); break; } + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsChipMark ); // Update VPD with the chip mark. o_rc = dqBitmap.setDram( i_badChipCount.symList[0].symbol ); if ( SUCCESS != o_rc ) @@ -694,6 +708,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, "_RANK::MCA>() failed." ); } + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsChipUeRisk ); + // Make the error log predictive. io_sc.service_data->setServiceCall(); @@ -738,11 +755,14 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, newChipMark ); if ( SUCCESS != o_rc ) { - PRDF_ERR( PRDF_FUNC "writeSymbolMark(0x%08x,0x%02x) " + PRDF_ERR( PRDF_FUNC "writeChipMark(0x%08x,0x%02x) " "failed", iv_chip->getHuid(), getKey() ); break; } + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsChipMark ); + // Update VPD with the chip mark. o_rc = dqBitmap.setDram( i_badChipCount.symList[0].symbol ); if ( SUCCESS != o_rc ) @@ -765,6 +785,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, PRDF_ERR( PRDF_FUNC "__updateVpdCountAboveOne<DIMMS_PER" "_RANK::MCA>() failed." ); } + + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsChipUeRisk ); + // Make the error log predictive. io_sc.service_data->setServiceCall(); @@ -792,6 +816,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, break; } + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsSymbolMark ); + // Update VPD with the symbol mark. o_rc = dqBitmap.setSymbol( i_badDqCount.symList[0].symbol ); if ( SUCCESS != o_rc ) @@ -814,6 +841,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, PRDF_ERR( PRDF_FUNC "__updateVpdCountAboveOne<DIMMS_PER" "_RANK::MCA>() failed." ); } + + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsSymUeRisk ); + // Make the error log predictive. io_sc.service_data->setServiceCall(); @@ -834,6 +865,10 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, PRDF_ERR( PRDF_FUNC "__updateVpdCountAboveOne<DIMMS_PER" "_RANK::MCA>() failed." ); } + + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsPotentialUe ); + // Make the error log predictive. io_sc.service_data->setServiceCall(); @@ -844,11 +879,17 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCeSymbolCounts( CeCount i_badDqCount, // If analysis resulted in a false alarm. if ( tpsFalseAlarm ) { + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsFalseAlarm ); + // Increase false alarm counter. // If false alarm counter threshold of 3 per day is reached. if ( __getTpsFalseAlarmCounter<TYPE_MCA>(iv_chip)->inc( iv_rank, io_sc) ) { + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsFalseAlarmTH ); + // Permanently mask mainline NCEs and TCEs getMcaDataBundle(iv_chip)->iv_maskMainlineNceTce = true; @@ -1019,6 +1060,9 @@ uint32_t TpsEvent<TYPE_MCA>::analyzeCe( STEP_CODE_DATA_STRUCT & io_sc ) // abort this procedure. if ( areDramRepairsDisabled() ) { + io_sc.service_data->setSignature( iv_chip->getHuid(), + PRDFSIG_TpsDramDisabled ); + io_sc.service_data->setServiceCall(); break; } |