diff options
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/p9_ex_actions.rule | 6 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plat/p9/prdfP9Ex.C | 419 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plugins/plugins.mk | 2 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plugins/prdfLogParse_common.C | 12 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plugins/prdfParserEnums.H | 89 | ||||
-rw-r--r-- | src/usr/diag/prdf/common/plugins/prdfProcLogParse.C | 14 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_rt.C | 152 | ||||
-rw-r--r-- | src/usr/diag/prdf/plat/prdfPlatServices_rt.H | 47 | ||||
-rw-r--r-- | src/usr/diag/prdf/prdf_hb_only.mk | 7 | ||||
-rwxr-xr-x | src/usr/diag/prdf/test/prdf_hb_common_test.mk | 1 |
10 files changed, 629 insertions, 120 deletions
diff --git a/src/usr/diag/prdf/common/plat/p9/p9_ex_actions.rule b/src/usr/diag/prdf/common/plat/p9/p9_ex_actions.rule index 8a1afc902..c4bd973a5 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_ex_actions.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_ex_actions.rule @@ -33,7 +33,8 @@ actionclass l3_cache_ce actionclass l3_cache_ue { - TBDDefaultCallout; + self_th_1; + funccall("L3UE"); }; actionclass l3_dir_ce @@ -59,7 +60,8 @@ actionclass l2_cache_ce actionclass l2_cache_ue { - TBDDefaultCallout; + self_th_1; + funccall("L2UE"); }; actionclass l2_dir_ce diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9Ex.C b/src/usr/diag/prdf/common/plat/p9/prdfP9Ex.C index fbd61cd03..0f57349c1 100644 --- a/src/usr/diag/prdf/common/plat/p9/prdfP9Ex.C +++ b/src/usr/diag/prdf/common/plat/p9/prdfP9Ex.C @@ -32,6 +32,7 @@ #include <prdfP9ExExtraSig.H> #include <prdfMfgThresholdMgr.H> #include <prdfMfgThreshold.H> +#include <UtilHash.H> using namespace TARGETING; @@ -193,14 +194,281 @@ int32_t cacheCeWorkaround( ExtensibleChip * i_chip, } PRDF_PLUGIN_DEFINE( p9_ex, cacheCeWorkaround ); /** - * @brief L2FIR[0] - CE detected on L3 cache read + * @brief Adds L2 Line Delete/Column Repair FFDC to an SDC. + * @param i_exChip An ex chip. + * @param io_sc Step code data struct. + */ +void addL2LdCrFfdc( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + LD_CR_FFDC::L2LdCrFfdc & i_LdCrFfdc ) +{ + CaptureData & cd = io_sc.service_data->GetCaptureData(); + + static const size_t sz_word = sizeof(CPU_WORD); + + // Get the maximum capture data size and + // adjust the size for endianness. + static const size_t sz_maxData = + ((sizeof(LD_CR_FFDC::L2LdCrFfdc) + sz_word-1) / sz_word) * sz_word; + + uint8_t data[sz_maxData]; + memset( data, 0x00, sz_maxData ); + memcpy( &data, &i_LdCrFfdc, sz_maxData); + + // Fix endianness issues with non PPC machines. +#if( __BYTE_ORDER == __LITTLE_ENDIAN ) + + for ( uint32_t i = 0; i < (sz_maxData/sz_word); i++ ) + ((CPU_WORD*)data)[i] = htonl(((CPU_WORD*)data)[i]); + +#endif + + // Add data to capture data. + BitString bs( sz_maxData*8, (CPU_WORD *) &data ); + cd.Add( i_chip->GetChipHandle(), + Util::hashString(LD_CR_FFDC::L2TITLE), bs ); +} + +/** + * @brief Adds L3 Line Delete/Column Repair FFDC to an SDC. + * @param i_exChip An ex chip. + * @param io_sc Step code data struct. + */ +void addL3LdCrFfdc( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc, + LD_CR_FFDC::L3LdCrFfdc & i_LdCrFfdc ) +{ + CaptureData & cd = io_sc.service_data->GetCaptureData(); + + static const size_t sz_word = sizeof(CPU_WORD); + + // Get the maximum capture data size and + // adjust the size for endianness. + static const size_t sz_maxData = + ((sizeof(LD_CR_FFDC::L3LdCrFfdc) + sz_word-1) / sz_word) * sz_word; + + uint8_t data[sz_maxData]; + memset( data, 0x00, sz_maxData ); + memcpy( &data, &i_LdCrFfdc, sz_maxData); + + // Fix endianness issues with non PPC machines. +#if( __BYTE_ORDER == __LITTLE_ENDIAN ) + + for ( uint32_t i = 0; i < (sz_maxData/sz_word); i++ ) + ((CPU_WORD*)data)[i] = htonl(((CPU_WORD*)data)[i]); + +#endif + + // Add data to capture data. + BitString bs( sz_maxData*8, (CPU_WORD *) &data ); + cd.Add( i_chip->GetChipHandle(), + Util::hashString(LD_CR_FFDC::L3TITLE), bs ); +} + + +/** + * @brief Handle an L2 UE + * @param i_chip EX chip. + * @param io_sc Step code data struct. + * @return SUCCESS always + */ +int32_t L2UE( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ +#ifdef __HOSTBOOT_RUNTIME + int32_t l_rc = SUCCESS; + p9_l2err_extract_err_data errorAddr = + { L2ERR_CE_UE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + // Get failing location from trace array + l_rc = extractL2Err( i_chip->getTrgt(), false, errorAddr ); + if (SUCCESS != l_rc) + { + PRDF_ERR( "[L2UE] HUID: 0x%08x extractL2Err failed", + i_chip->GetId()); + return SUCCESS; + } + + PRDF_TRAC( "[L2UE] HUID: 0x%08x Error data: member=%d dw=%d " + "bank=%d macro=%d ow_select=%x bitline=%x is_top_sa=%x " + "is_left_sa=%x addr=%x", + i_chip->GetId(), errorAddr.member, errorAddr.dw, + errorAddr.bank, errorAddr.macro, errorAddr.ow_select, + errorAddr.bitline, errorAddr.is_top_sa, + errorAddr.is_left_sa, errorAddr.address ); + + // Add L2 FFDC + P9ExDataBundle * l_bundle = getExDataBundle(i_chip); + l_bundle->iv_L2LDCount++; + + LD_CR_FFDC::L2LdCrFfdc ldcrffdc; + ldcrffdc.L2LDcnt = l_bundle->iv_L2LDCount; + ldcrffdc.L2errMember = errorAddr.member; + ldcrffdc.L2errDW = errorAddr.dw; + ldcrffdc.L2errMacro = errorAddr.macro; + ldcrffdc.L2errBank = errorAddr.bank; + ldcrffdc.L2errOWSelect = errorAddr.ow_select; + ldcrffdc.L2errBitLine = errorAddr.bitline; + ldcrffdc.L2errIsTopSA = errorAddr.is_top_sa; + ldcrffdc.L2errIsLeftSA = errorAddr.is_left_sa; + ldcrffdc.L2errAddress = errorAddr.address; + addL2LdCrFfdc( i_chip, io_sc, ldcrffdc ); + +#endif + return SUCCESS; +} PRDF_PLUGIN_DEFINE( p9_ex, L2UE ); + +/** + * @brief Handle an L3 UE + * @param i_chip EX chip. + * @param io_sc Step code data struct. + * @return SUCCESS always + */ +int32_t L3UE( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) +{ +#ifdef __HOSTBOOT_RUNTIME + int32_t l_rc = SUCCESS; + p9_l3err_extract_err_data errorAddr = { L3ERR_CE_UE, 0, 0, 0, 0, 0, 0 }; + + // Get failing location from trace array + l_rc = extractL3Err( i_chip->getTrgt(), errorAddr ); + if (SUCCESS != l_rc) + { + PRDF_ERR( "[L3CE] HUID: 0x%08x extractL3Err failed", + i_chip->GetId()); + return SUCCESS; + } + + PRDF_TRAC( "[L3CE] HUID: 0x%08x Error data: member=%d dw=%d " + "bank=%d dataout=%d hashed addr=%x cache addr=%x", + i_chip->GetId(), errorAddr.member, errorAddr.dw, + errorAddr.bank, errorAddr.dataout, + errorAddr.hashed_real_address_45_56, + errorAddr.cache_read_address ); + + // Add L3 FFDC + P9ExDataBundle * l_bundle = getExDataBundle(i_chip); + l_bundle->iv_L3LDCount++; + + LD_CR_FFDC::L3LdCrFfdc ldcrffdc; + ldcrffdc.L3LDcnt = l_bundle->iv_L3LDCount; + ldcrffdc.L3errMember = errorAddr.member; + ldcrffdc.L3errDW = errorAddr.dw; + ldcrffdc.L3errBank = errorAddr.bank; + ldcrffdc.L3errDataOut = errorAddr.dataout; + ldcrffdc.L3errHshAddress = errorAddr.hashed_real_address_45_56; + ldcrffdc.L3errCacheAddress = errorAddr.cache_read_address; + addL3LdCrFfdc( i_chip, io_sc, ldcrffdc ); + +#endif + return SUCCESS; +} PRDF_PLUGIN_DEFINE( p9_ex, L3UE ); + +/** + * @brief Handle an L2 CE * @param i_chip EX chip. * @param io_sc Step code data struct. * @return SUCCESS always */ int32_t L2CE( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) { - // TODO: RTC 152593 add line delete support +#if defined(__HOSTBOOT_RUNTIME) || defined(ESW_SIM_COMPILE) + + do { + P9ExDataBundle * l_bundle = getExDataBundle(i_chip); + uint16_t l_maxLineDelAllowed = 0; + int32_t l_rc = SUCCESS; + +#ifdef __HOSTBOOT_RUNTIME + p9_l2err_extract_err_data errorAddr = + { L2ERR_CE_UE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + // Get failing location from trace array + l_rc = extractL2Err( i_chip->getTrgt(), true, errorAddr ); + if (SUCCESS != l_rc) + { + PRDF_ERR( "[L2CE] HUID: 0x%08x extractL2Err failed", + i_chip->GetId()); + break; + } + + PRDF_TRAC( "[L2CE] HUID: 0x%08x Error data: member=%d dw=%d " + "bank=%d macro=%d ow_select=%x bitline=%x is_top_sa=%x " + "is_left_sa=%x addr=%x", + i_chip->GetId(), errorAddr.member, errorAddr.dw, + errorAddr.bank, errorAddr.macro, errorAddr.ow_select, + errorAddr.bitline, errorAddr.is_top_sa, + errorAddr.is_left_sa, errorAddr.address ); + + LD_CR_FFDC::L2LdCrFfdc ldcrffdc; + ldcrffdc.L2LDcnt = l_bundle->iv_L2LDCount; + ldcrffdc.L2errMember = errorAddr.member; + ldcrffdc.L2errDW = errorAddr.dw; + ldcrffdc.L2errMacro = errorAddr.macro; + ldcrffdc.L2errBank = errorAddr.bank; + ldcrffdc.L2errOWSelect = errorAddr.ow_select; + ldcrffdc.L2errBitLine = errorAddr.bitline; + ldcrffdc.L2errIsTopSA = errorAddr.is_top_sa; + ldcrffdc.L2errIsLeftSA = errorAddr.is_left_sa; + ldcrffdc.L2errAddress = errorAddr.address; + addL2LdCrFfdc( i_chip, io_sc, ldcrffdc ); +#endif + if (mfgMode()) + l_maxLineDelAllowed = + getSystemTarget()->getAttr<ATTR_MNFG_TH_P8EX_L2_LINE_DELETES>(); + else + l_maxLineDelAllowed = + getSystemTarget()->getAttr<ATTR_FIELD_TH_P8EX_L2_LINE_DELETES>(); + + // Ensure we're still allowed to issue repairs + if (l_bundle->iv_L2LDCount >= l_maxLineDelAllowed) + { + PRDF_TRAC( "[L2CE] HUID: 0x%08x No more repairs allowed", + i_chip->GetId()); + + // MFG wants to be able to ignore these errors + // If they have LD allowed set to 0, wait for + // predictive threshold + if (!mfgMode() || + l_maxLineDelAllowed != 0 ) + { + io_sc.service_data->SetThresholdMaskId(0); + } + break; + } + + // Add to CE table and Check if we need to issue a repair on this CE + if (l_bundle->iv_L2CETable->addAddress(l_bundle->iv_L2LDCount, + io_sc) == false) + { + // No action required on this CE, we're waiting for additional + // errors before applying a line delete + break; + } + + // Execute the line delete + PRDF_TRAC( "[L2CE] HUID: 0x%08x apply directed line delete", + i_chip->GetId()); +#ifdef __HOSTBOOT_RUNTIME + l_rc = l2LineDelete(i_chip->getTrgt(), errorAddr); +#endif + if (SUCCESS != l_rc) + { + PRDF_ERR( "[L2CE] HUID: 0x%08x l2LineDelete failed", + i_chip->GetId()); + // Set signature to indicate L2 Line Delete failed + io_sc.service_data->SetErrorSig( + PRDFSIG_P9EX_L2CE_LD_FAILURE); + } + else + { + l_bundle->iv_L2LDCount++; + + // Set signature to indicate L2 Line Delete issued + io_sc.service_data->SetErrorSig( + PRDFSIG_P9EX_L2CE_LD_ISSUED); + } + + } while(0); + +#endif return SUCCESS; @@ -209,45 +477,98 @@ int32_t L2CE( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc ) /** * @brief Handle an L3 CE * @param i_chip Ex chip. - * @param i_stepcode Step Code data struct + * @param io_sc Step Code data struct * @return PRD return code */ int32_t L3CE( ExtensibleChip * i_chip, - STEP_CODE_DATA_STRUCT & i_stepcode ) + STEP_CODE_DATA_STRUCT & io_sc ) { #if defined(__HOSTBOOT_RUNTIME) || defined(ESW_SIM_COMPILE) - P9ExDataBundle * l_bundle = getExDataBundle(i_chip); - uint16_t l_maxL3LineDelAllowed = 0; + do { + P9ExDataBundle * l_bundle = getExDataBundle(i_chip); + uint16_t l_maxLineDelAllowed = 0; + int32_t l_rc = SUCCESS; +#ifdef __HOSTBOOT_RUNTIME + p9_l3err_extract_err_data errorAddr = + { L3ERR_CE_UE, 0, 0, 0, 0, 0, 0 }; - l_maxL3LineDelAllowed = - MfgThresholdMgr::getInstance()->getThreshold(PlatServices::mfgMode() ? - TARGETING::ATTR_MNFG_TH_P8EX_L3_LINE_DELETES: - TARGETING::ATTR_FIELD_TH_P8EX_L3_LINE_DELETES); + // Get failing location from trace array + l_rc = extractL3Err( i_chip->getTrgt(), errorAddr ); + if (SUCCESS != l_rc) + { + PRDF_ERR( "[L3CE] HUID: 0x%08x extractL3Err failed", + i_chip->GetId()); + break; + } - // MfgThresholdMgr treats 0 as a special value for infinite threshold - // For this threshold, we want 0 to really represent 0 repairs allowed - if (l_maxL3LineDelAllowed == MfgThreshold::INFINITE_LIMIT_THR) - l_maxL3LineDelAllowed = 0; + PRDF_TRAC( "[L3CE] HUID: 0x%08x Error data: member=%d dw=%d " + "bank=%d dataout=%d hashed addr=%x cache addr=%x", + i_chip->GetId(), errorAddr.member, errorAddr.dw, + errorAddr.bank, errorAddr.dataout, + errorAddr.hashed_real_address_45_56, + errorAddr.cache_read_address ); + + LD_CR_FFDC::L3LdCrFfdc ldcrffdc; + ldcrffdc.L3LDcnt = l_bundle->iv_L3LDCount; + ldcrffdc.L3errMember = errorAddr.member; + ldcrffdc.L3errDW = errorAddr.dw; + ldcrffdc.L3errBank = errorAddr.bank; + ldcrffdc.L3errDataOut = errorAddr.dataout; + ldcrffdc.L3errHshAddress = errorAddr.hashed_real_address_45_56; + ldcrffdc.L3errCacheAddress = errorAddr.cache_read_address; + addL3LdCrFfdc( i_chip, io_sc, ldcrffdc ); +#endif - // Ensure we're still allowed to issue repairs - if ((l_bundle->iv_L3LDCount < l_maxL3LineDelAllowed) && - (CHECK_STOP != i_stepcode.service_data->getPrimaryAttnType())) - { - // Add to CE table and Check if we need to issue a repair on this CE - bool l_doDelete = - l_bundle->iv_L3CETable->addAddress(l_bundle->iv_L3LDCount, - i_stepcode); + if (mfgMode()) + l_maxLineDelAllowed = + getSystemTarget()->getAttr<ATTR_MNFG_TH_P8EX_L3_LINE_DELETES>(); + else + l_maxLineDelAllowed = + getSystemTarget()->getAttr<ATTR_FIELD_TH_P8EX_L3_LINE_DELETES>(); - if (l_doDelete) + // Ensure we're still allowed to issue repairs + if (l_bundle->iv_L3LDCount >= l_maxLineDelAllowed) { - l_bundle->iv_L3LDCount++; + PRDF_TRAC( "[L3CE] HUID: 0x%08x No more repairs allowed", + i_chip->GetId()); + + // MFG wants to be able to ignore these errors + // If they have LD allowed set to 0, wait for + // predictive threshold + if (!mfgMode() || + l_maxLineDelAllowed != 0 ) + { + io_sc.service_data->SetThresholdMaskId(0); + } + break; + } + + // Add to CE table and Check if we need to issue a repair on this CE + if (l_bundle->iv_L3CETable->addAddress(l_bundle->iv_L3LDCount, + io_sc) == false) + { + // No action required on this CE, we're waiting for additional + // errors before applying a line delete + break; + } - // Do Delete - PRDF_TRAC( "[L3CE] HUID: 0x%08x apply line delete", + // Execute the line delete + if ( MODEL_NIMBUS != getChipModel(i_chip->getTrgt()) || + 0x10 != getChipLevel(i_chip->getTrgt()) ) + { + PRDF_TRAC( "[L3CE] HUID: 0x%08x apply directed line delete", i_chip->GetId()); +#ifdef __HOSTBOOT_RUNTIME + l_rc = l3LineDelete(i_chip->getTrgt(), errorAddr); +#endif + } + else + { + // HW bug affecting directed line delete on NIMBUS 1.0 + // So set delete-on-next-ce instead SCAN_COMM_REGISTER_CLASS * prgReg = i_chip->getRegister("L3_PURGE_REG"); @@ -255,40 +576,32 @@ int32_t L3CE( ExtensibleChip * i_chip, prgReg->clearAllBits(); prgReg->SetBit(5); - if (SUCCESS != prgReg->Write() ) - { - PRDF_ERR( "[L3CE] HUID: 0x%08x l3LineDelete failed", - i_chip->GetId()); - // Set signature to indicate L3 Line Delete failed - i_stepcode.service_data->SetErrorSig( - PRDFSIG_P9EX_L3CE_LD_FAILURE); - } - else - { - // Set signature to indicate L3 Line Delete issued - i_stepcode.service_data->SetErrorSig( - PRDFSIG_P9EX_L3CE_LD_ISSUED); - } + l_rc = prgReg->Write(); } - } - else - { - PRDF_TRAC( "[L3CE] HUID: 0x%08x No more repairs allowed", - i_chip->GetId()); - - // MFG wants to be able to ignore these errors - // If they have LD and array repairs set to 0, wait for - // predictive threshold - if (!PlatServices::mfgMode() || - l_maxL3LineDelAllowed != 0 ) + + if (SUCCESS != l_rc) { - i_stepcode.service_data->SetThresholdMaskId(0); + PRDF_ERR( "[L3CE] HUID: 0x%08x l3LineDelete failed", + i_chip->GetId()); + // Set signature to indicate L3 Line Delete failed + io_sc.service_data->SetErrorSig( + PRDFSIG_P9EX_L3CE_LD_FAILURE); } - } + else + { + l_bundle->iv_L3LDCount++; + + // Set signature to indicate L3 Line Delete issued + io_sc.service_data->SetErrorSig( + PRDFSIG_P9EX_L3CE_LD_ISSUED); + } + + } while(0); #endif return SUCCESS; + } PRDF_PLUGIN_DEFINE(p9_ex, L3CE); } diff --git a/src/usr/diag/prdf/common/plugins/plugins.mk b/src/usr/diag/prdf/common/plugins/plugins.mk index 4070476c8..9d7800723 100644 --- a/src/usr/diag/prdf/common/plugins/plugins.mk +++ b/src/usr/diag/prdf/common/plugins/plugins.mk @@ -34,7 +34,7 @@ PRDR_ERRL_LIB = lib${RULE_LIBRARY_NAME} LIBRARY_OFILES += prdfLogParse.o LIBRARY_OFILES += prdfLogParse_common.o LIBRARY_OFILES += prdfMemLogParse.o -#LIBRARY_OFILES += prdfProcLogParse.o TODO RTC 136050 +LIBRARY_OFILES += prdfProcLogParse.o LIBRARY_OFILES += prdrErrlPluginsSupt.o LIBRARY_OFILES += prdfParserUtils.o LIBRARY_OFILES += prdfBitString.o diff --git a/src/usr/diag/prdf/common/plugins/prdfLogParse_common.C b/src/usr/diag/prdf/common/plugins/prdfLogParse_common.C index e31f4f712..6d197b356 100644 --- a/src/usr/diag/prdf/common/plugins/prdfLogParse_common.C +++ b/src/usr/diag/prdf/common/plugins/prdfLogParse_common.C @@ -49,7 +49,7 @@ #include <attributeenums.H> // For TARGETING::TYPE enum #include <prdfMemLogParse.H> -//#include <prdfProcLogParse.H> TODO: RTC 136050 +#include <prdfProcLogParse.H> #include <prdfParserEnums.H> #include <prdfMemoryMruData.H> #include <prdfBitString.H> @@ -461,6 +461,11 @@ bool parseCaptureData( void * i_buffer, uint32_t i_buflen, { parseTodFfdcData( sigData, sigDataSize, i_parser ); } + else if ( Util::hashString("OCC_CS_FFDC") == sigId) + { + parsePnorFirData( sigData, sigDataSize, i_parser ); + } +*/ else if ( Util::hashString(LD_CR_FFDC::L2TITLE) == sigId ) { parseL2LdCrFfdc( sigData, sigDataSize, i_parser ); @@ -469,11 +474,6 @@ bool parseCaptureData( void * i_buffer, uint32_t i_buflen, { parseL3LdCrFfdc( sigData, sigDataSize, i_parser ); } - else if ( Util::hashString("OCC_CS_FFDC") == sigId) - { - parsePnorFirData( sigData, sigDataSize, i_parser ); - } -*/ else if ( (0 != sigDataSize) && (sizeof(uint64_t) >= sigDataSize) ) { // Print one reg/line if the data size <= 8 bytes diff --git a/src/usr/diag/prdf/common/plugins/prdfParserEnums.H b/src/usr/diag/prdf/common/plugins/prdfParserEnums.H index a496ec9d9..6e062e46f 100644 --- a/src/usr/diag/prdf/common/plugins/prdfParserEnums.H +++ b/src/usr/diag/prdf/common/plugins/prdfParserEnums.H @@ -223,11 +223,10 @@ namespace LD_CR_FFDC { #if __BYTE_ORDER == __LITTLE_ENDIAN uint32_t L2LDcnt : 4; - uint32_t L2LDallowed : 1; uint32_t L2LDMaxAllowed : 4; uint32_t L2CRMaxAllowed : 4; uint32_t L2CRPresent : 4; - uint32_t L2reserved1 :15; + uint32_t L2reserved1 :16; uint32_t L2errMember : 3; uint32_t L2errDW : 3; @@ -236,26 +235,27 @@ namespace LD_CR_FFDC uint32_t L2errOWSelect : 1; uint32_t L2errBitLine : 4; uint32_t L2errIsTopSA : 1; + uint32_t L2errIsLeftSA : 1; uint32_t L2errAddress :10; - uint32_t L2reserved2 : 7; + uint32_t L2reserved2 : 6; L2LdCrFfdc(): - L2LDcnt(0), L2LDallowed(0), L2LDMaxAllowed(0), + L2LDcnt(0), L2LDMaxAllowed(0), L2CRMaxAllowed(0), L2CRPresent(0), L2reserved1(0), L2errMember(0), L2errDW(0), L2errMacro(0), L2errBank(0), L2errOWSelect(0), L2errBitLine(0), - L2errIsTopSA(0), L2errAddress(0), L2reserved2(0) + L2errIsTopSA(0), L2errIsLeftSA(0), L2errAddress(0), L2reserved2(0) {} #else - uint32_t L2reserved1 :15; + uint32_t L2reserved1 :16; uint32_t L2CRPresent : 4; uint32_t L2CRMaxAllowed : 4; uint32_t L2LDMaxAllowed : 4; - uint32_t L2LDallowed : 1; uint32_t L2LDcnt : 4; - uint32_t L2reserved2 : 7; + uint32_t L2reserved2 : 6; uint32_t L2errAddress :10; + uint32_t L2errIsLeftSA : 1; uint32_t L2errIsTopSA : 1; uint32_t L2errBitLine : 4; uint32_t L2errOWSelect : 1; @@ -266,8 +266,8 @@ namespace LD_CR_FFDC L2LdCrFfdc(): L2reserved1(0), L2CRPresent(0), L2CRMaxAllowed(0), - L2LDMaxAllowed(0), L2LDallowed(0), L2LDcnt(0), - L2reserved2(0),L2errAddress(0), L2errIsTopSA(0), + L2LDMaxAllowed(0), L2LDcnt(0), L2reserved2(0), + L2errAddress(0), L2errIsLeftSA(0), L2errIsTopSA(0), L2errBitLine(0), L2errOWSelect(0), L2errBank(0), L2errMacro(0), L2errDW(0), L2errMember(0) {} @@ -278,52 +278,43 @@ namespace LD_CR_FFDC struct L3LdCrFfdc { #if __BYTE_ORDER == __LITTLE_ENDIAN - uint32_t L3LDcnt : 4; - uint32_t L3LDallowed : 1; - uint32_t L3LDMaxAllowed : 4; - uint32_t L3CRMaxAllowed : 4; - uint32_t L3CRPresent : 4; - uint32_t L3reserved1 :15; - - uint32_t L3errMember : 3; - uint32_t L3errDW : 3; - uint32_t L3errBank : 1; - uint32_t L3errDataOut : 1; - uint32_t L3errAddress : 4; - uint32_t L3errIO : 1; - uint32_t L3errRow :10; - uint32_t L3reserved2 : 9; + uint32_t L3LDcnt : 4; + uint32_t L3LDMaxAllowed : 4; + uint32_t L3CRMaxAllowed : 4; + uint32_t L3CRPresent : 4; + uint32_t L3errBank : 3; + uint32_t L3errDataOut : 8; + uint32_t L3reserved1 : 5; + + uint32_t L3errMember : 3; + uint32_t L3errDW : 3; + uint32_t L3errHshAddress :12; + uint32_t L3errCacheAddress :14; L3LdCrFfdc(): - L3LDcnt(0), L3LDallowed(0), L3LDMaxAllowed(0), - L3CRMaxAllowed(0), L3CRPresent(0), + L3LDcnt(0), L3LDMaxAllowed(0), L3CRMaxAllowed(0), + L3CRPresent(0), L3errBank(0), L3errDataOut(0), L3reserved1(0), L3errMember(0), L3errDW(0), - L3errBank(0), L3errDataOut(0), L3errAddress(0), - L3errIO(0), L3errRow(0), L3reserved2(0) + L3errHshAddress(0), L3errCacheAddress(0) {} #else - uint32_t L3reserved1 :15; - uint32_t L3CRPresent : 4; - uint32_t L3CRMaxAllowed : 4; - uint32_t L3LDMaxAllowed : 4; - uint32_t L3LDallowed : 1; - uint32_t L3LDcnt : 4; - - uint32_t L3reserved2 : 9; - uint32_t L3errRow :10; - uint32_t L3errIO : 1; - uint32_t L3errAddress : 4; - uint32_t L3errDataOut : 1; - uint32_t L3errBank : 1; - uint32_t L3errDW : 3; - uint32_t L3errMember : 3; + uint32_t L3reserved1 : 5; + uint32_t L3errDataOut : 8; + uint32_t L3errBank : 3; + uint32_t L3CRPresent : 4; + uint32_t L3CRMaxAllowed : 4; + uint32_t L3LDMaxAllowed : 4; + uint32_t L3LDcnt : 4; + + uint32_t L3errCacheAddress :14; + uint32_t L3errHshAddress :12; + uint32_t L3errDW : 3; + uint32_t L3errMember : 3; L3LdCrFfdc(): - L3reserved1(0), L3CRPresent(0), L3CRMaxAllowed(0), - L3LDMaxAllowed(0), L3LDallowed(0), L3LDcnt(0), - L3reserved2(0), L3errRow(0), L3errIO(0), - L3errAddress(0), L3errDataOut(0), L3errBank(0), - L3errDW(0), L3errMember(0) + L3reserved1(0), L3errDataOut(0), L3errBank(0), L3CRPresent(0), + L3CRMaxAllowed(0), L3LDMaxAllowed(0), L3LDcnt(0), + L3errCacheAddress(0), L3errHshAddress(0), L3errDW(0), L3errMember(0) {} #endif diff --git a/src/usr/diag/prdf/common/plugins/prdfProcLogParse.C b/src/usr/diag/prdf/common/plugins/prdfProcLogParse.C index 1422548e1..461704ab9 100644 --- a/src/usr/diag/prdf/common/plugins/prdfProcLogParse.C +++ b/src/usr/diag/prdf/common/plugins/prdfProcLogParse.C @@ -191,8 +191,6 @@ bool parseL2LdCrFfdc( uint8_t * i_buffer, uint32_t i_buflen, memcpy( &ldcrffdc, i_buffer, sizeof(LD_CR_FFDC::L2LdCrFfdc)); i_parser.PrintNumber( " L2 LD Counts", "%d", ldcrffdc.L2LDcnt ); - i_parser.PrintBool( " L2 LD Allowed", - 0 != ldcrffdc.L2LDallowed ); i_parser.PrintNumber( " L2 LD Max Allowed", "%d", ldcrffdc.L2LDMaxAllowed ); i_parser.PrintNumber( " L2 CR Max Allowed", "%d", @@ -213,6 +211,8 @@ bool parseL2LdCrFfdc( uint8_t * i_buffer, uint32_t i_buflen, ldcrffdc.L2errBitLine ); i_parser.PrintBool( " L2 Error Is Top SA", 0 != ldcrffdc.L2errIsTopSA ); + i_parser.PrintBool( " L2 Error Is Left SA", + 0 != ldcrffdc.L2errIsLeftSA ); i_parser.PrintNumber( " L2 Error Address", "%d", ldcrffdc.L2errAddress ); @@ -247,8 +247,6 @@ bool parseL3LdCrFfdc( uint8_t * i_buffer, uint32_t i_buflen, memcpy( &ldcrffdc, i_buffer, sizeof(LD_CR_FFDC::L3LdCrFfdc)); i_parser.PrintNumber( " L3 LD Counts", "%d", ldcrffdc.L3LDcnt ); - i_parser.PrintBool( " L3 LD Allowed", - 0 != ldcrffdc.L3LDallowed ); i_parser.PrintNumber( " L3 LD Max Allowed", "%d", ldcrffdc.L3LDMaxAllowed ); i_parser.PrintNumber( " L3 CR Max Allowed", "%d", @@ -262,10 +260,10 @@ bool parseL3LdCrFfdc( uint8_t * i_buffer, uint32_t i_buflen, i_parser.PrintNumber( " L3 Error Bank", "%d", ldcrffdc.L3errBank ); i_parser.PrintNumber( " L3 Error Data Out", "%d", ldcrffdc.L3errDataOut ); - i_parser.PrintNumber( " L3 Error Address", "%d", - ldcrffdc.L3errAddress ); - i_parser.PrintNumber( " L3 Error IO", "%d", ldcrffdc.L3errIO ); - i_parser.PrintNumber( " L3 Error Row", "%d", ldcrffdc.L3errRow ); + i_parser.PrintNumber( " L3 Error Hashed Address", "%d", + ldcrffdc.L3errHshAddress ); + i_parser.PrintNumber( " L3 Error Cache Address", "%d", + ldcrffdc.L3errCacheAddress ); } while (0); diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C index 39ec88657..131d45695 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.C +++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.C @@ -41,6 +41,11 @@ // Other includes #include <runtime/interface.h> +#include <p9_l3err_extract.H> +#include <p9_l2err_extract.H> +#include <p9_l3err_linedelete.H> +#include <p9_l2err_linedelete.H> +#include <p9_proc_gettracearray.H> //------------------------------------------------------------------------------ @@ -329,6 +334,153 @@ uint32_t startVcmPhase2<TYPE_MBA>( ExtensibleChip * i_chip, #undef PRDF_FUNC } +//############################################################################## +//## Line Delete Functions +//############################################################################## +int32_t extractL3Err( TargetHandle_t i_exTgt, + p9_l3err_extract_err_data &o_errorAddr) +{ + int32_t o_rc = SUCCESS; + errlHndl_t err = nullptr; + bool errFound = false; + + fapi2::Target<fapi2::TARGET_TYPE_EX> fapiTrgt (i_exTgt); + FAPI_INVOKE_HWP( err, + p9_l3err_extract, + i_exTgt, + o_errorAddr, + errFound ); + + if (nullptr != err) + { + PRDF_ERR( "[PlatServices::extractL3Err] huid: 0x%08x failed", + getHuid(i_exTgt)); + PRDF_COMMIT_ERRL( err, ERRL_ACTION_REPORT ); + o_rc = FAIL; + } + + if ( !errFound ) + { + PRDF_ERR( "[PlatServices::extractL3Err] huid: 0x%08x No Error Found", + getHuid(i_exTgt)); + o_rc = FAIL; + } + + return o_rc; +} + +int32_t l3LineDelete(TargetHandle_t i_exTgt, + const p9_l3err_extract_err_data& i_l3_err_data) +{ + int32_t o_rc = SUCCESS; + errlHndl_t err = NULL; + const uint64_t retryCount = 100; + + // Apply Line Delete + fapi2::Target<fapi2::TARGET_TYPE_EX> fapiTrgt (i_exTgt); + FAPI_INVOKE_HWP( err, + p9_l3err_linedelete, + fapiTrgt, + i_l3_err_data, + retryCount); + if(NULL != err) + { + PRDF_ERR( "[PlatServices::l3LineDelete] HUID: 0x%08x failed", + getHuid(i_exTgt)); + PRDF_COMMIT_ERRL( err, ERRL_ACTION_REPORT ); + o_rc = FAIL; + } + + // Do HCODE update to preserve line delete + + return o_rc; +} + +int32_t extractL2Err( TargetHandle_t i_exTgt, bool i_ce, + p9_l2err_extract_err_data &o_errorAddr) +{ + errlHndl_t err = nullptr; + bool errFound = false; + fapi2::variable_buffer ta_data( P9_TRACEARRAY_NUM_ROWS * + P9_TRACEARRAY_BITS_PER_ROW); + proc_gettracearray_args args; + + args.trace_bus = PROC_TB_L20; + args.stop_pre_dump = true; + args.ignore_mux_setting = false; + args.collect_dump = true; + args.reset_post_dump = false; + args.restart_post_dump = false; + + fapi2::Target<fapi2::TARGET_TYPE_EX> fapiTrgt (i_exTgt); + + FAPI_INVOKE_HWP( err, + p9_proc_gettracearray, + i_exTgt, + args, + ta_data); + if (nullptr != err) + { + PRDF_ERR( "[PlatServices::extractL2Err] huid: 0x%08x gettracearray " + "failed", getHuid(i_exTgt)); + PRDF_COMMIT_ERRL( err, ERRL_ACTION_REPORT ); + return FAIL; + } + + FAPI_INVOKE_HWP( err, + p9_l2err_extract, + i_exTgt, + ta_data, + i_ce ? L2ERR_CE : L2ERR_CE_UE, + o_errorAddr, + errFound ); + + if (nullptr != err) + { + PRDF_ERR( "[PlatServices::extractL2Err] huid: 0x%08x failed", + getHuid(i_exTgt)); + PRDF_COMMIT_ERRL( err, ERRL_ACTION_REPORT ); + return FAIL; + } + + if ( !errFound ) + { + PRDF_ERR( "[PlatServices::extractL2Err] huid: 0x%08x No Error Found", + getHuid(i_exTgt)); + return FAIL; + } + + return SUCCESS; +} + +int32_t l2LineDelete(TargetHandle_t i_exTgt, + const p9_l2err_extract_err_data& i_l2_err_data) +{ + int32_t o_rc = SUCCESS; + errlHndl_t err = NULL; + const uint64_t retryCount = 100; + + // Apply Line Delete + fapi2::Target<fapi2::TARGET_TYPE_EX> fapiTrgt (i_exTgt); + FAPI_INVOKE_HWP( err, + p9_l2err_linedelete, + fapiTrgt, + i_l2_err_data, + retryCount); + if(NULL != err) + { + PRDF_ERR( "[PlatServices::l2LineDelete] HUID: 0x%08x failed", + getHuid(i_exTgt)); + PRDF_COMMIT_ERRL( err, ERRL_ACTION_REPORT ); + o_rc = FAIL; + } + + // Do HCODE update to preserve line delete + + return o_rc; +} + + //------------------------------------------------------------------------------ } // end namespace PlatServices diff --git a/src/usr/diag/prdf/plat/prdfPlatServices_rt.H b/src/usr/diag/prdf/plat/prdfPlatServices_rt.H index 66da25424..48b28e60d 100644 --- a/src/usr/diag/prdf/plat/prdfPlatServices_rt.H +++ b/src/usr/diag/prdf/plat/prdfPlatServices_rt.H @@ -26,6 +26,9 @@ #ifndef __prdfPlatServices_rt_H #define __prdfPlatServices_rt_H +#include <p9_l3err_extract.H> +#include <p9_l2err_extract.H> + namespace PRDF { @@ -89,6 +92,50 @@ uint32_t stopBgScrub( ExtensibleChip * i_chip ); template<TARGETING::TYPE T> uint32_t resumeBgScrub( ExtensibleChip * i_chip ); +//############################################################################## +//## Line delete functions +//############################################################################## + +/** + * @brief Extracts address of an error from L3 Trace arrays + * @param i_exTgt EX target + * @param o_errorAddr struct containing address of the CE + * @return non-SUCCESS for failure, SUCCESS otherwise + */ +int32_t extractL3Err( TARGETING::TargetHandle_t i_exTgt, + p9_l3err_extract_err_data &o_errorAddr); + +/** + * @brief Calls HWP to execute a line delete. Saves Line delete command in + HCODE image. + * @param i_exTgt EX target + * @param i_errorAddr struct containing address of the CE + * @return non-SUCCESS for failure, SUCCESS otherwise + */ +int32_t l3LineDelete(TARGETING::TargetHandle_t i_exTgt, + const p9_l3err_extract_err_data& i_l3_err_data); + +/** + * @brief Extracts address of an error from L2 Trace arrays + * @param i_exTgt EX target + * @param i_ce Type of error we're looking for: CE or UE + * @param o_errorAddr struct containing address of the CE + * @return non-SUCCESS for failure, SUCCESS otherwise + */ +int32_t extractL2Err( TARGETING::TargetHandle_t i_exTgt, bool i_ce, + p9_l2err_extract_err_data &o_errorAddr); + +/** + * @brief Calls HWP to execute a line delete. Saves Line delete command in + HCODE image. + * @param i_exTgt EX target + * @param i_errorAddr struct containing address of the CE + * @return non-SUCCESS for failure, SUCCESS otherwise + */ +int32_t l2LineDelete(TARGETING::TargetHandle_t i_exTgt, + const p9_l2err_extract_err_data& i_l2_err_data); + + } // end namespace PlatServices } // end namespace PRDF diff --git a/src/usr/diag/prdf/prdf_hb_only.mk b/src/usr/diag/prdf/prdf_hb_only.mk index e09fb664d..74439aad9 100644 --- a/src/usr/diag/prdf/prdf_hb_only.mk +++ b/src/usr/diag/prdf/prdf_hb_only.mk @@ -61,6 +61,7 @@ prd_incpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/memory/ prd_incpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/memory/lib/mcbist/ prd_incpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/memory/utils/ prd_incpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/io/ +prd_incpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/cache/ prd_incpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/perv/ prd_incpath += ${ROOTPATH}/src/import/chips/p9/utils/imageProcs/ prd_incpath += ${ROOTPATH}/src/import/hwpf/fapi2/include @@ -160,12 +161,16 @@ prd_obj_no_sim += p9_sbe_tracearray.o ################################################################################ ifeq (${HOSTBOOT_RUNTIME},1) - +prd_vpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/cache/ # This is really the only file we need, but all of the other files below are # required because of dependencies. prd_obj_no_sim += memdiags.o prd_obj_no_sim += p9_io_xbus_clear_firs.o prd_obj_no_sim += p9_io_xbus_pdwn_lanes.o +prd_obj_no_sim += p9_l2err_linedelete.o +prd_obj_no_sim += p9_l2err_extract.o +prd_obj_no_sim += p9_l3err_linedelete.o +prd_obj_no_sim += p9_l3err_extract.o prd_vpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/memory/lib/dimm/ prd_vpath += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/memory/lib/eff_config/ diff --git a/src/usr/diag/prdf/test/prdf_hb_common_test.mk b/src/usr/diag/prdf/test/prdf_hb_common_test.mk index d7adf3371..2ed7279ec 100755 --- a/src/usr/diag/prdf/test/prdf_hb_common_test.mk +++ b/src/usr/diag/prdf/test/prdf_hb_common_test.mk @@ -63,6 +63,7 @@ EXTRAINCDIR += ${ROOTPATH}/src/include/usr/util EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/common/include/ EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/ffdc/ EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/memory +EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/procedures/hwp/cache/ EXTRAINCDIR += ${ROOTPATH}/src/import/chips/p9/utils/imageProcs EXTRAINCDIR += ${ROOTPATH}/src/import/hwpf/fapi2/include EXTRAINCDIR += ${ROOTPATH}/src/import/ |