summaryrefslogtreecommitdiffstats
path: root/src/usr/diag/prdf
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2013-07-02 15:50:48 -0500
committerA. Patrick Williams III <iawillia@us.ibm.com>2013-08-08 10:15:32 -0500
commit1ba4435e7dba11181fc98876ce558185c3f44499 (patch)
tree5580b21df73762356652dff5a6f26fd6cdc5d57d /src/usr/diag/prdf
parent011da4162142a73c222e87e2bb1037866f9c1344 (diff)
downloadtalos-hostboot-1ba4435e7dba11181fc98876ce558185c3f44499.tar.gz
talos-hostboot-1ba4435e7dba11181fc98876ce558185c3f44499.zip
PRD: add better callouts/FFDC for IPL analysis
Change-Id: Iad9349c3a0714915a4d3c5f29b6e49da9d823135 Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/5419 Tested-by: Jenkins Server Reviewed-by: Sachin Gupta <sgupta2m@in.ibm.com> Reviewed-by: Christopher T. Phan <cphan@us.ibm.com> Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com> Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/5725
Diffstat (limited to 'src/usr/diag/prdf')
-rwxr-xr-xsrc/usr/diag/prdf/common/framework/rule/prdrRegister.H3
-rwxr-xr-xsrc/usr/diag/prdf/common/framework/service/prdfPfa5Data.h4
-rw-r--r--src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.C22
-rw-r--r--src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.H6
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfCenMba.C2
-rw-r--r--src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.C93
-rw-r--r--src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.H38
-rw-r--r--src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaExtraSig.H11
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C2
-rwxr-xr-xsrc/usr/diag/prdf/common/prdfEnums.H4
-rw-r--r--src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C467
-rw-r--r--src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.C67
-rw-r--r--src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.H64
-rw-r--r--src/usr/diag/prdf/prdfErrlUtil.H4
-rw-r--r--src/usr/diag/prdf/prdf_hb_only.mk3
15 files changed, 512 insertions, 278 deletions
diff --git a/src/usr/diag/prdf/common/framework/rule/prdrRegister.H b/src/usr/diag/prdf/common/framework/rule/prdrRegister.H
index a19e8aa4a..b0060bea7 100755
--- a/src/usr/diag/prdf/common/framework/rule/prdrRegister.H
+++ b/src/usr/diag/prdf/common/framework/rule/prdrRegister.H
@@ -55,6 +55,7 @@
std::cout<<"Failed to write data to file"; \
exit(1); \
}
+
namespace PRDR_COMPILER
{
@@ -141,7 +142,7 @@ struct CaptureReqStruct
str = str.substr(1, str.size() - 2);
}
l_tmp32 = htonl(PRDF::Util::hashString(str.c_str()));
- fwrite(&l_tmp32, sizeof(l_tmp32), 1, l_file);
+ PRDR_FWRITE(&l_tmp32, sizeof(l_tmp32), 1, l_file);
}
};
};
diff --git a/src/usr/diag/prdf/common/framework/service/prdfPfa5Data.h b/src/usr/diag/prdf/common/framework/service/prdfPfa5Data.h
index 9e79aa1c8..94eb66fd0 100755
--- a/src/usr/diag/prdf/common/framework/service/prdfPfa5Data.h
+++ b/src/usr/diag/prdf/common/framework/service/prdfPfa5Data.h
@@ -28,7 +28,7 @@
@brief Version 5 format of the Pfa Data
*/
-#include <prdf_types.h>
+#include <iipconst.h>
#include <utilstream.H>
namespace PRDF
@@ -61,6 +61,8 @@ enum ErrlSubsect
ErrlCapData_2 = 2,
ErrlAVPData_1 = 41,
ErrlAVPData_2 = 42,
+ ErrlMruData_1 = 61, // This will only be used in non-attenion code when
+ // we want to add MRU.
ErrlString = 10,
};
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.C
index 2a5490b0e..82d3ef9cd 100644
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.C
@@ -33,6 +33,28 @@ namespace PRDF
using namespace PlatServices;
+bool CenDqBitmap::badDqs() const
+{
+ bool o_badDqs = false;
+
+ for ( uint32_t i = 0; i < PORT_SLCT_PER_MBA; i++ )
+ {
+ for ( uint32_t j = 0; j < DIMM_DQ_RANK_BITMAP_SIZE; j++ )
+ {
+ if ( 0 != iv_data[i][j] )
+ {
+ o_badDqs = true;
+ break;
+ }
+ }
+ if ( o_badDqs ) break;
+ }
+
+ return o_badDqs;
+}
+
+//------------------------------------------------------------------------------
+
int32_t CenDqBitmap::badDqs( uint8_t i_portSlct, bool & o_badDqs ) const
{
#define PRDF_FUNC "[CenDqBitmap::badDqs] "
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.H
index 225ac1a35..340c872bd 100644
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.H
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenDqBitmap.H
@@ -60,6 +60,12 @@ class CenDqBitmap
public: // functions
/**
+ * @brief Queries if there are any bad DQs present on either port.
+ * @return TRUE if any bad DQs present.
+ */
+ bool badDqs() const;
+
+ /**
* @brief Queries the given port to determine if there are any bad DQs
* present.
* @param i_portSlct The target port.
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMba.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMba.C
index f41fcca1b..1ff6265eb 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMba.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMba.C
@@ -103,7 +103,7 @@ int32_t MaintCmdComplete( ExtensibleChip * i_mbaChip,
// successful with no errors because the error log will not be
// committed.
if ( !i_sc.service_data->IsDontCommitErrl() )
- CenMbaCaptureData::addDramRepairsData( mbaTarget, i_sc );
+ CenMbaCaptureData::addMemEccData( mbaTarget, i_sc );
return PRD_NO_CLEAR_FIR_BITS; // FIR bits are cleared by this plugin
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.C
index e58748121..ff1d2a6af 100644
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.C
@@ -27,40 +27,57 @@
*/
#include <prdfCenMbaCaptureData.H>
+
+// Framwork includes
#include <utilmem.H>
#include <UtilHash.H>
#include <prdfDramRepairUsrData.H>
#include <iipServiceDataCollector.h>
#include <prdf_ras_services.H>
+// Pegasus includes
#include <prdfCenMarkstore.H>
+#include <prdfCenDqBitmap.H>
+
+using namespace TARGETING;
namespace PRDF
{
+using namespace PlatServices;
+
namespace CenMbaCaptureData
{
-// ----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
-void addDramRepairsData( TARGETING::TargetHandle_t i_mbaTarget,
- errlHndl_t o_errHdl )
+void addMemEccData( TargetHandle_t i_mba, errlHndl_t io_errl )
{
CaptureData cd;
- captureDramRepairsData( i_mbaTarget, cd);
- ErrDataService::AddCapData( cd, o_errHdl );
+
+ // Add DRAM repairs data from hardware.
+ captureDramRepairsData( i_mba, cd );
+
+ // Add DRAM repairs data from VPD.
+ captureDramRepairsVpd( i_mba, cd );
+
+ ErrDataService::AddCapData( cd, io_errl );
}
-// ----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
-void addDramRepairsData( TARGETING::TargetHandle_t i_mbaTarget,
- STEP_CODE_DATA_STRUCT & io_sc )
+void addMemEccData( TargetHandle_t i_mba, STEP_CODE_DATA_STRUCT & io_sc )
{
CaptureData & cd = io_sc.service_data->GetCaptureData();
- captureDramRepairsData( i_mbaTarget, cd);
+
+ // Add DRAM repairs data from hardware.
+ captureDramRepairsData( i_mba, cd );
+
+ // Add DRAM repairs data from VPD.
+ captureDramRepairsVpd( i_mba, cd );
}
-// ----------------------------------------------------------------------------
+//------------------------------------------------------------------------------
void captureDramRepairsData( TARGETING::TargetHandle_t i_mbaTarget,
CaptureData & o_cd )
@@ -141,6 +158,62 @@ void captureDramRepairsData( TARGETING::TargetHandle_t i_mbaTarget,
}
}
+//------------------------------------------------------------------------------
+
+void captureDramRepairsVpd( TargetHandle_t i_mba, CaptureData & io_cd )
+{
+ #define PRDF_FUNC "[captureDramRepairsVpd] "
+
+ // Get the maximum capture data size.
+ static const size_t sz_rank = sizeof(uint8_t);
+ static const size_t sz_entry = PORT_SLCT_PER_MBA * DIMM_DQ_RANK_BITMAP_SIZE;
+ static const size_t sz_word = sizeof(CPU_WORD);
+
+ // Get the maximum capture data size.
+ size_t sz_maxData = MAX_RANKS_PER_MBA * (sz_rank + sz_entry);
+
+ // Adjust the size for endianess.
+ sz_maxData = ((sz_maxData + sz_word-1) / sz_word) * sz_word;
+
+ // Initialize to 0.
+ uint8_t capData[sz_maxData];
+ memset( capData, 0x00, sz_maxData );
+
+ // Get the data for each rank.
+ uint32_t idx = 0;
+ for ( uint8_t r = 0; r < MAX_RANKS_PER_MBA; r++ )
+ {
+ CenRank rank ( r );
+ CenDqBitmap bitmap;
+
+ if ( SUCCESS != getBadDqBitmap(i_mba, rank, bitmap, true) )
+ {
+ PRDF_ERR( PRDF_FUNC"getBadDqBitmap() failed: MBA=0x%08x rank=%d",
+ getHuid(i_mba), r );
+ continue; // skip this rank
+ }
+
+ if ( bitmap.badDqs() ) // make sure the data is non-zero
+ {
+ // Add the rank, then the entry data.
+ capData[idx] = r; idx += sz_rank;
+ memcpy(&capData[idx], bitmap.getData(), sz_entry); idx += sz_entry;
+ }
+ }
+
+ // Fix endianess issues with non PPC machines.
+ size_t sz_capData = idx;
+ sz_capData = ((sz_capData + sz_word-1) / sz_word) * sz_word;
+ for ( uint32_t i = 0; i < (sz_capData/sz_word); i++ )
+ ((CPU_WORD*)capData)[i] = htonl(((CPU_WORD*)capData)[i]);
+
+ // Add data to capture data.
+ BIT_STRING_ADDRESS_CLASS bs ( 0, sz_capData*8, (CPU_WORD *) &capData );
+ io_cd.Add( i_mba, Util::hashString("DRAM_REPAIRS_VPD"), bs );
+
+ #undef PRDF_FUNC
+}
+
} //end namespace MbaCaptureData
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.H
index 07d7fd99c..b264ee31c 100644
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.H
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaCaptureData.H
@@ -42,21 +42,24 @@ namespace CenMbaCaptureData
{
/**
- * @brief Add DRAM repair data to error log.
- * @param i_mbaTarget An MBA target.
- * @param o_errHdl Error log handle.
+ * @brief Adds Memory ECC FFDC to an error log.
+ * @note This is intended to be used in non-normal analysis paths that do not
+ * have an SDC (i.e. Restore DRAM Repairs, IPL MNFG CE Isolation).
+ * @note The data will be limited and include things like the DRAM Repairs data
+ * but will not include stuff like the CE/UE tables, because that
+ * information is not relevent during its intented uses.
+ * @param i_mba Target MBA.
+ * @param o_errl Target error log.
*/
-void addDramRepairsData( TARGETING::TargetHandle_t i_mbaTarget,
- errlHndl_t o_errHdl );
-
+void addMemEccData( TARGETING::TargetHandle_t i_mba, errlHndl_t io_errl );
/**
- * @brief Add DRAM repair data to SDC.
- * @param i_mbaTarget An MBA target.
- * @param io_sc The step code data struct.
+ * @brief Adds Memory ECC FFDC to an SDC.
+ * @param i_mba Target MBA.
+ * @param io_sc Target step code data struct.
*/
-void addDramRepairsData( TARGETING::TargetHandle_t i_mbaTarget,
- STEP_CODE_DATA_STRUCT & io_sc );
+void addMemEccData( TARGETING::TargetHandle_t i_mba,
+ STEP_CODE_DATA_STRUCT & io_sc );
/**
* @brief Queries hardware for all DRAM repairs data (chip/symbol marks, DRAM
@@ -69,6 +72,19 @@ void addDramRepairsData( TARGETING::TargetHandle_t i_mbaTarget,
*/
void captureDramRepairsData( TARGETING::TargetHandle_t i_mbaTarget,
CaptureData & o_cd );
+
+/**
+ * @brief Queries the Bad DQ attributes for the content of the DRAM repairs VPD
+ * and add it to the capture data.
+ * @param i_mbaTarget An MBA target.
+ * @param o_cd Capture data struct.
+ * @note This function will be used to capture DRAM repair data into
+ * capture data struct. Other functions can call this function and
+ * update error log
+ */
+void captureDramRepairsVpd( TARGETING::TargetHandle_t i_mba,
+ CaptureData & o_cd );
+
} // end namespace MbaCaptureData
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaExtraSig.H b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaExtraSig.H
index 3fa49f259..26cc84bdd 100644
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaExtraSig.H
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMbaExtraSig.H
@@ -45,4 +45,15 @@ PRDR_ERROR_SIGNATURE(VcmMarksUnavail, 0xffff0023, "",
PRDR_ERROR_SIGNATURE(DsdDramSpared, 0xffff0030, "", "DSD: DRAM spared");
PRDR_ERROR_SIGNATURE(DsdBadSpare, 0xffff0031, "", "DSD: DRAM spare is bad");
+PRDR_ERROR_SIGNATURE(RdrInternalFail, 0xffff0040, "", "RDR: Internal failure");
+PRDR_ERROR_SIGNATURE(RdrInvalidConfig, 0xffff0041, "", "RDR: Invalid config");
+PRDR_ERROR_SIGNATURE(RdrScreenBadDqs, 0xffff0042, "",
+ "RDR: DRAM repairs disabled and VPD found");
+PRDR_ERROR_SIGNATURE(RdrRepairsUsed, 0xffff0043, "",
+ "RDR: Both spare and chip mark used");
+PRDR_ERROR_SIGNATURE(RdrRepairUnavail, 0xffff0044, "",
+ "RDR: Repairs needed but unavailable");
+
+
+
#endif // __prdfCenMbaExtraSig_H
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C
index c2ae26936..02fdc5405 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C
@@ -609,6 +609,8 @@ int32_t calloutPeerBus( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & i_sc,
} while (0);
return SUCCESS;
+
+ #undef PRDF_FUNC
}
#define PLUGIN_CALLOUT_PEER_BUS( BUS, TYPE, POS ) \
diff --git a/src/usr/diag/prdf/common/prdfEnums.H b/src/usr/diag/prdf/common/prdfEnums.H
index 7f6b907b8..ee2caff46 100755
--- a/src/usr/diag/prdf/common/prdfEnums.H
+++ b/src/usr/diag/prdf/common/prdfEnums.H
@@ -29,6 +29,10 @@
#ifdef __HOSTBOOT_MODULE
#ifndef __PRD_RULE_COMPILE
+
+ #include <errl/errlmanager.H> // Must be included before including
+ // hwasCallout.H, otherwise the compile will
+ // fail.
#include <hwas/common/hwasCallout.H>
// FIXME: RTC: 62867 will resolve this
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C b/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C
index cfcf67afa..2f2d88ea5 100644
--- a/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C
+++ b/src/usr/diag/prdf/plat/pegasus/prdfDramRepairs.C
@@ -34,9 +34,11 @@
#include "common/plat/pegasus/prdfCalloutUtil.H"
#include "common/plat/pegasus/prdfCenDqBitmap.H"
#include "common/plat/pegasus/prdfCenMarkstore.H"
+#include "common/plat/pegasus/prdfCenMbaExtraSig.H"
#include "common/plat/pegasus/prdfCenSymbol.H"
#include "common/plat/pegasus/prdfMemoryMru.H"
#include "framework/service/prdfPlatServices.H"
+#include "plat/pegasus/prdfPlatCalloutUtil.H"
using namespace HWAS;
using namespace std;
@@ -45,111 +47,76 @@ using namespace TARGETING;
namespace PRDF
{
-static const uint8_t INVALID_SYMBOL = 0xff;
+using namespace PlatServices;
-bool validSymbol(uint8_t i_symbol)
+namespace RDR // local utility functions to support PRDF::restoreDramRepairs()
{
- return i_symbol != INVALID_SYMBOL;
-}
-void commitRestoreCallout( void (*i_func)(errlHndl_t &, void *), void * i_data,
- TargetHandle_t i_mba )
+// Creates and returns an error log.
+errlHndl_t createErrl( uint32_t i_reasonCode, TargetHandle_t i_mba,
+ uint32_t i_signature )
{
- PRDF_DENTER("commitRestoreCallout");
-
- errlHndl_t err = NULL;
-
- PRDF_HW_CREATE_ERRL(
- err,
- ERRL_SEV_PREDICTIVE,
- ERRL_ETYPE_NOT_APPLICABLE,
- SRCI_MACH_CHECK,
- SRCI_NO_ATTR,
- PRDF_RESTORE_DRAM_REPAIR,
- FSP_DEFAULT_REFCODE,
- PRDF_DETECTED_FAIL_HARDWARE_PROBABLE,
- 0, 0, 0, 0, // user data
- HWSV_SYS_NO_TERMINATE,
- false); // no pld check
-
- // add the callout
-
- (*i_func)(err, i_data);
-
- bool term = false;
-
- CenMbaCaptureData::addDramRepairsData( i_mba, err );
+ uint64_t userdata12 = PRDF_GET_UINT64_FROM_UINT32( getHuid(i_mba), 0 );
+ uint64_t userdata34 = PRDF_GET_UINT64_FROM_UINT32( i_signature, 0 );
+
+ // Note that the error log tags are not needed because PRD uses its own
+ // signature parser.
+
+ return new ERRORLOG::ErrlEntry(
+ ERRORLOG::ERRL_SEV_PREDICTIVE, // severity
+ PRDF_RESTORE_DRAM_REPAIR, // module ID
+ i_reasonCode, // reason code
+ userdata12, // user data 1 & 2
+ userdata34 ); // user data 3 & 4
+}
- PRDF_HW_COMMIT_ERRL(
- term,
- err,
- HWSV::HWSV_DECONFIG_DEFER,
- ERRL_ACTION_REPORT,
- HWSV_CONTINUE);
+//------------------------------------------------------------------------------
- if(term)
+// If an error log is given, will add DRAM repairs FFDC and traces to error log,
+// then commit the error log.
+void commitErrl( errlHndl_t i_errl, TargetHandle_t i_mba )
+{
+ if ( NULL != i_errl )
{
- // FIXME...this is a little goofy.
- // Should be scrubbed with RTC 51552
+ // Add capture data
+ CenMbaCaptureData::addMemEccData( i_mba, i_errl );
- PRDF_COMMIT_ERRL(err, ERRL_ACTION_REPORT);
+ // Add traces
+ i_errl->collectTrace( PRDF_COMP_NAME, 512 );
+
+ // Commit the error log
+ ERRORLOG::errlCommit( i_errl, PRDF_COMP_ID );
}
}
-void addMemMruCallout(errlHndl_t & io_log, void * i_memMru)
-{
- PRDF_DENTER("addMemMruCallout");
+//------------------------------------------------------------------------------
- if ( NULL != i_memMru )
+// If there were analysis errors, will create and commit an error log with 2nd
+// level support callout.
+void commitSoftError( uint32_t i_reasonCode, TargetHandle_t i_mba,
+ uint32_t i_signature, bool i_analysisErrors )
+{
+ if ( i_analysisErrors )
{
- MemoryMru *memMru = static_cast<MemoryMru *>(i_memMru);
-
- TargetHandleList partList = memMru->getCalloutList();
- for ( TargetHandleList::iterator it = partList.begin();
- it != partList.end(); it++ )
- {
- PRDF_HW_ADD_CALLOUT(
- *it,
- SRCI_PRIORITY_HIGH,
- HWSV::HWSV_DECONFIG,
- HWSV::HWSV_DECONFIG_GARD,
- io_log,
- false, // don't write src to vpd
- GARD_Predictive,
- ERRL_SEV_PREDICTIVE,
- false); // don't update hcdb
- }
+ errlHndl_t errl = createErrl( i_reasonCode, i_mba, i_signature );
+ errl->addProcedureCallout( EPUB_PRC_LVL_SUPP, SRCI_PRIORITY_HIGH );
+ commitErrl( errl, i_mba );
}
}
-void addDimmCallout(errlHndl_t & io_log, void * i_dimm)
-{
- PRDF_DENTER("addDimmCallout");
-
- PRDF_HW_ADD_CALLOUT(
- static_cast<TargetHandle_t>(i_dimm),
- SRCI_PRIORITY_HIGH,
- HWSV::HWSV_DECONFIG,
- HWSV::HWSV_DECONFIG_GARD,
- io_log,
- false, // don't write src to vpd
- GARD_Predictive,
- ERRL_SEV_PREDICTIVE,
- false); // don't update hcdb
-}
+//------------------------------------------------------------------------------
bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask )
{
- PRDF_DENTER("processRepairedRanks: %p, 0x%02x",
- i_mba, i_repairedRankMask);
+ #define PRDF_FUNC "[processRepairedRanks] "
- // check the argument ranks for repairs
- // that violate RAS policy
+ // The bits in i_repairedRankMask represent ranks that have repairs. Query
+ // hardware and compare against RAS policies.
- bool calloutMade = false;
+ bool o_calloutMade = false;
+ bool analysisErrors = false;
- // check each rank for repairs
- // that violate RAS policy
+ errlHndl_t errl = NULL; // Initially NULL, will create if needed.
for ( uint8_t r = 0; r < MAX_RANKS_PER_MBA; ++r )
{
@@ -161,15 +128,21 @@ bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask )
CenRank rank ( r );
CenMark mark;
- if ( SUCCESS != PlatServices::mssGetMarkStore(i_mba, rank, mark) )
+ if ( SUCCESS != mssGetMarkStore(i_mba, rank, mark) )
{
+ PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed: MBA=0x%08x rank=%d",
+ getHuid(i_mba), rank.flatten() );
+ analysisErrors = true;
continue; // skip this rank
}
CenSymbol sp0, sp1, sp;
- if ( SUCCESS != PlatServices::mssGetSteerMux(i_mba, rank, sp0, sp1, sp))
+ if ( SUCCESS != mssGetSteerMux(i_mba, rank, sp0, sp1, sp))
{
+ PRDF_ERR( PRDF_FUNC"mssGetSteerMux() failed: MBA=0x%08x rank=%d",
+ getHuid(i_mba), rank.flatten() );
+ analysisErrors = true;
continue; // skip this rank
}
@@ -179,112 +152,110 @@ bool processRepairedRanks( TargetHandle_t i_mba, uint8_t i_repairedRankMask )
// This rank has both a steer and a chip mark. Call out the DIMM
// with the chip mark.
- MemoryMru memoryMru( i_mba, rank, mark.getCM() );
-
- commitRestoreCallout( &addMemMruCallout, &memoryMru, i_mba );
+ if ( NULL == errl )
+ {
+ errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
+ PRDFSIG_RdrRepairsUsed );
+ }
- calloutMade = true;
+ MemoryMru memoryMru( i_mba, rank, mark.getCM() );
+ CalloutUtil::calloutMemoryMru( errl, memoryMru,
+ SRCI_PRIORITY_HIGH,
+ HWAS::DELAYED_DECONFIG,
+ HWAS::GARD_Predictive );
+ o_calloutMade = true;
}
}
- PRDF_DEXIT("processRepairedRanks");
+ // Commit the error log, if needed.
+ commitErrl( errl, i_mba );
- return calloutMade;
-}
+ // Commit an additional error log indicating something failed in the
+ // analysis, if needed.
+ commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba,
+ PRDFSIG_RdrInternalFail, analysisErrors );
-bool processBadDimms(TargetHandle_t i_mba, uint8_t i_badDimmMask)
-{
- PRDF_DENTER("processBadDimms: %p, 0x%02x", i_mba, i_badDimmMask);
+ return o_calloutMade;
- const struct DimmPortAssoc
- {
- uint8_t port;
- uint8_t dimm;
- uint8_t enc;
-
- } dimmPortAssoc[] = {
-
- {0, 0, 0x8},
- {0, 1, 0x4},
- {1, 0, 0x2},
- {1, 1, 0x1},
- };
-
- uint64_t calloutCount = 0;
+ #undef PRDF_FUNC
+}
- // callout the argument dimms
+//------------------------------------------------------------------------------
- // get all the dimms connected to this MBA
+bool processBadDimms( TargetHandle_t i_mba, uint8_t i_badDimmMask )
+{
+ #define PRDF_FUNC "[processBadDimms] "
- TARGETING::TargetHandleList dimms = PlatServices::getConnected(
- i_mba, TARGETING::TYPE_DIMM);
+ // The bits in i_badDimmMask represent DIMMs that have exceeded the
+ // available repairs. Callout these DIMMs.
- // convert the encoded dimms that had too many repairs to
- // dimm targets
+ bool o_calloutMade = false;
+ bool analysisErrors = false;
- TargetHandleList::iterator dit = dimms.end();
+ errlHndl_t errl = NULL; // Initially NULL, will create if needed.
- while(dit-- != dimms.begin())
+ // Iterate the list of all DIMMs be
+ TargetHandleList dimms = getConnected( i_mba, TYPE_DIMM );
+ for ( TargetHandleList::iterator i = dimms.begin(); i < dimms.end(); i++ )
{
uint8_t port = 0, dimm = 0;
- if(SUCCESS != PlatServices::getMbaPort(*dit, port))
+ if ( SUCCESS != getMbaPort(*i, port) )
{
- // skip this dimm
- continue;
+ PRDF_ERR( PRDF_FUNC"getMbaPort() failed: DIMM=0x%08x", getHuid(*i));
+ analysisErrors = true;
+ continue; // skip this dimm
}
- if(SUCCESS != PlatServices::getMbaDimm(*dit, dimm))
+ if ( SUCCESS != getMbaDimm(*i, dimm) )
{
- // skip this dimm
- continue;
+ PRDF_ERR( PRDF_FUNC"getMbaDimm() failed: DIMM=0x%08x", getHuid(*i));
+ analysisErrors = true;
+ continue; // skip this dimm
}
- // see if the passed in dimm
- // was flagged as bad by the restore procedure
-
- bool match = false;
+ // The 4 bits of i_badDimmMask is defined as p0d0, p0d1, p1d0, and p1d1.
+ uint8_t mask = 0x8 >> (port * PORT_SLCT_PER_MBA + dimm);
- const DimmPortAssoc * it = dimmPortAssoc
- + sizeof(dimmPortAssoc)/sizeof(*dimmPortAssoc);
-
- while(!match && it-- != dimmPortAssoc)
+ if ( 0 != (i_badDimmMask & mask) )
{
- if(i_badDimmMask & it->enc
- && port == it->port
- && dimm == it->dimm)
+ if ( NULL == errl )
{
- // this dimm is a match
-
- match = true;
+ errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
+ PRDFSIG_RdrRepairUnavail );
}
- }
- // call them out
-
- if(match)
- {
- ++calloutCount;
- commitRestoreCallout( &addDimmCallout, *dit, i_mba );
+ o_calloutMade = true;
+ errl->addHwCallout( *i, SRCI_PRIORITY_HIGH, HWAS::DELAYED_DECONFIG,
+ HWAS::GARD_Predictive );
}
}
- PRDF_DEXIT("processBadDimms: bad dimm count: %d", calloutCount);
+ // Commit the error log, if needed.
+ commitErrl( errl, i_mba );
- return 0 != calloutCount;
+ // Commit an additional error log indicating something failed in the
+ // analysis, if needed.
+ commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba,
+ PRDFSIG_RdrInternalFail, analysisErrors );
+
+ return o_calloutMade;
+
+ #undef PRDF_FUNC
}
-bool processDq(TargetHandle_t i_mba)
+//------------------------------------------------------------------------------
+
+bool screenBadDqs( TargetHandle_t i_mba )
{
- using namespace TARGETING;
- using namespace PlatServices;
+ #define PRDF_FUNC "[screenBadDqs] "
- PRDF_DENTER("processDq: %p", i_mba);
+ // Callout any attached DIMMs that have any bad DQs.
- // callout any dimms on the argument MBA
- // that have any bad dq
+ bool o_calloutMade = false;
+ bool analysisErrors = false;
- uint64_t calloutCount = 0;
+ errlHndl_t errl = NULL; // Initially NULL, will create if needed.
for ( uint32_t r = 0; r < MAX_RANKS_PER_MBA; r++ )
{
@@ -293,6 +264,9 @@ bool processDq(TargetHandle_t i_mba)
if ( SUCCESS != getBadDqBitmap(i_mba, rank, bitmap, true) )
{
+ PRDF_ERR( PRDF_FUNC"getBadDqBitmap() failed: MBA=0x%08x rank=%d",
+ getHuid(i_mba), rank.flatten() );
+ analysisErrors = true;
continue; // skip this rank
}
@@ -301,175 +275,170 @@ bool processDq(TargetHandle_t i_mba)
bool badDqs = false;
if ( SUCCESS != bitmap.badDqs(p, badDqs) )
{
+ PRDF_ERR( PRDF_FUNC"badDqs() failed: MBA=0x%08x rank=%d "
+ "port=%d", getHuid(i_mba), rank.flatten(), p );
+ analysisErrors = true;
continue; // skip this DIMM
}
if ( !badDqs )
{
- continue; // skip this DIMM
+ continue; // nothing to do, skip this DIMM
}
TargetHandleList list = CalloutUtil::getConnectedDimms( i_mba,
rank, p );
if ( 0 == list.size() )
{
- PRDF_ERR( "[processDq] bad bits present but no connected "
- "DIMM: MBA=0x%08x rank=%d port=%d", getHuid(i_mba),
+ PRDF_ERR( PRDF_FUNC"bad bits present but no connected DIMM: "
+ "MBA=0x%08x rank=%d port=%d", getHuid(i_mba),
rank.flatten(), p );
- continue;
+ analysisErrors = true;
+ continue; // skip this DIMM
}
for ( TargetHandleList::iterator i = list.begin();
i < list.end(); i++ )
{
- ++calloutCount;
- commitRestoreCallout( &addDimmCallout, *i, i_mba );
+ if ( NULL == errl )
+ {
+ errl = createErrl( PRDF_DETECTED_FAIL_HARDWARE, i_mba,
+ PRDFSIG_RdrScreenBadDqs );
+ }
+
+ o_calloutMade = true;
+ errl->addHwCallout( *i, SRCI_PRIORITY_HIGH,
+ HWAS::DELAYED_DECONFIG,
+ HWAS::GARD_Predictive );
}
}
}
- PRDF_DEXIT("processDq: bad dq dimm count: %d", calloutCount);
+ // Commit the error log, if needed.
+ commitErrl( errl, i_mba );
+
+ // Commit an additional error log indicating something failed in the
+ // analysis, if needed.
+ commitSoftError( PRDF_DETECTED_FAIL_SOFTWARE, i_mba,
+ PRDFSIG_RdrInternalFail, analysisErrors );
+
+ return o_calloutMade;
- return 0 != calloutCount;
+ #undef PRDF_FUNC
}
-void deployDramSpares(TargetHandle_t i_mba)
-{
- using namespace fapi;
+//------------------------------------------------------------------------------
- bool x4 = PlatServices::isDramWidthX4(i_mba);
+void deployDramSpares( TargetHandle_t i_mba )
+{
+ bool x4 = isDramWidthX4(i_mba);
for ( uint32_t r = 0; r < MAX_RANKS_PER_MBA; r++ )
{
CenRank rank ( r );
- CenSymbol symbol = CenSymbol::fromSymbol( i_mba, rank, 0 );
- // ignore errors from putSteerMux
+ // Doesn't matter which DRAM is spared as long as they are all spared.
+ // Also, make sure the ECC spare is on a different DRAM than the spare
+ // DRAM.
+ CenSymbol symPort0 = CenSymbol::fromDimmDq( i_mba, rank, 0, 0 );
+ CenSymbol symPort1 = CenSymbol::fromDimmDq( i_mba, rank, 0, 1 );
+ CenSymbol symEccSp = CenSymbol::fromDimmDq( i_mba, rank, 8, 0 );
+
+ int32_t l_rc = SUCCESS;
- static_cast<void>(
- PlatServices::mssSetSteerMux(i_mba, rank, symbol, false) );
+ l_rc = mssSetSteerMux( i_mba, rank, symPort0, false );
+ l_rc |= mssSetSteerMux( i_mba, rank, symPort1, false );
- if( x4 )
+ if ( x4 )
+ l_rc |= mssSetSteerMux( i_mba, rank, symEccSp, true );
+
+ if ( SUCCESS != l_rc )
{
- static_cast<void>(
- PlatServices::mssSetSteerMux(i_mba, rank, symbol, true) );
+ // mssSetSteerMux() will print a trace and commit the error log,
+ // however, we need to handle the return code or we get a compile
+ // warning in Hostboot.
+ continue;
}
}
}
+} // end namespace RDR
+
//------------------------------------------------------------------------------
// External functions - declared in prdfMain.H
//------------------------------------------------------------------------------
int32_t restoreDramRepairs( TargetHandle_t i_mba )
{
- PRDF_ENTER( "restoreDramRepairs(0x%08x)", PlatServices::getHuid(i_mba) );
-
- bool calloutMade = false;
+ #define PRDF_FUNC "PRDF::restoreDramRepairs"
- uint8_t repairedRankMask = 0, badDimmMask = 0;
+ PRDF_ENTER( PRDF_FUNC"(0x%08x)", getHuid(i_mba) );
- do {
+ bool calloutMade = false;
- if(PlatServices::isMemoryPreservingIpl())
+ do
+ {
+ if ( isMemoryPreservingIpl() )
{
- // nothing to do in MPIPL
-
+ // Power is preserved on a Centaur for a MPIPL. So the marks and
+ // spares will not need to be restored.
break;
}
- bool spareDramDeploy = PlatServices::mnfgSpareDramDeploy();
+ bool spareDramDeploy = mnfgSpareDramDeploy();
- if(spareDramDeploy)
+ if ( spareDramDeploy )
{
- deployDramSpares(i_mba);
+ // Deploy all spares for MNFG corner tests.
+ RDR::deployDramSpares(i_mba);
}
- // in mfg mode, check dq and don't restore anything
-
- if(PlatServices::areDramRepairsDisabled())
+ if ( areDramRepairsDisabled() )
{
- if(processDq(i_mba))
- {
- calloutMade = true;
- }
+ // DRAM Repairs are disabled in MNFG mode, so screen all DIMMs with
+ // VPD information.
+ if ( RDR::screenBadDqs(i_mba) ) calloutMade = true;
+ // No need to continue because there will not be anything to
+ // restore.
break;
}
- if(spareDramDeploy)
+ if ( spareDramDeploy )
{
- // this is an error...the spare dram
- // deploy bit was set but we weren't
- // in mfg mode...log an error for MFG
-
- errlHndl_t err = NULL;
-
- PRDF_ERR( "[restoreDramRepairs] "
- "The specified combination of mfg policy flags is invalid");
-
- /*@
- * @errortype
- * @reasoncode PRDF_INVALID_CONFIG
- * @subsys EPUB_FIRMWARE_SUBSYS
- * @moduleid PRDF_RESTORE_DRAM_REPAIR
- * @devdesc The specified combination of policy flags is invalid.
- */
- PRDF_CREATE_ERRL(
- err,
- ERRL_SEV_PREDICTIVE,
- ERRL_ETYPE_NOT_APPLICABLE,
- SRCI_MACH_CHECK,
- SRCI_NO_ATTR,
- PRDF_RESTORE_DRAM_REPAIR,
- FSP_DEFAULT_REFCODE,
- PRDF_INVALID_CONFIG,
- 0, 0, 0, 0);
- PRDF_COMMIT_ERRL(err, ERRL_ACTION_REPORT);
-
- // assume mfg mode (no repairs) ...
+ // This is an error. The MNFG spare DRAM deply bit is set, but DRAM
+ // Repairs have not been disabled.
- break;
- }
-
- if(SUCCESS != PlatServices::mssRestoreDramRepairs(
- i_mba,
- repairedRankMask,
- badDimmMask))
- {
- // can't check anything if
- // this doesn't work
+ PRDF_ERR( "["PRDF_FUNC"] MNFG spare deploy enabled, but DRAM "
+ "repairs are not disabled" );
- PRDF_ERR( "[restoreDramRepairs] "
- "PlatServices::mssRestoreDramRepairs failed" );
+ RDR::commitSoftError( PRDF_INVALID_CONFIG, i_mba,
+ PRDFSIG_RdrInvalidConfig, true );
- break;
+ break; // Assume user meant to disable DRAM repairs.
}
- // callout bad dimms
-
- if(processBadDimms(
- i_mba,
- badDimmMask))
+ uint8_t rankMask = 0, dimmMask = 0;
+ if ( SUCCESS != mssRestoreDramRepairs(i_mba, rankMask, dimmMask) )
{
- calloutMade = true;
+ // Can't check anything if this doesn't work.
+ PRDF_ERR( "["PRDF_FUNC"] mssRestoreDramRepairs() failed" );
+ break;
}
- // check repaired ranks for
- // RAS policy violations
+ // Callout DIMMs with too many bad bits and not enough repairs available
+ if ( RDR::processBadDimms(i_mba, dimmMask) ) calloutMade = true;
- if(processRepairedRanks(
- i_mba,
- repairedRankMask))
- {
- calloutMade = true;
- }
+ // Check repaired ranks for RAS policy violations.
+ if ( RDR::processRepairedRanks(i_mba, rankMask) ) calloutMade = true;
} while(0);
- PRDF_EXIT( "restoreDramRepairs(0x%08x)", PlatServices::getHuid(i_mba) );
+ PRDF_EXIT( PRDF_FUNC"(0x%08x)", getHuid(i_mba) );
return calloutMade ? FAIL : SUCCESS;
+
+ #undef PRDF_FUNC
}
} // end namespace PRDF
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.C b/src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.C
new file mode 100644
index 000000000..a7efe46cb
--- /dev/null
+++ b/src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.C
@@ -0,0 +1,67 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.C $ */
+/* */
+/* IBM CONFIDENTIAL */
+/* */
+/* COPYRIGHT International Business Machines Corp. 2013 */
+/* */
+/* p1 */
+/* */
+/* Object Code Only (OCO) source materials */
+/* Licensed Internal Code Source Materials */
+/* IBM HostBoot Licensed Internal Code */
+/* */
+/* The source code for this program is not published or otherwise */
+/* divested of its trade secrets, irrespective of what has been */
+/* deposited with the U.S. Copyright Office. */
+/* */
+/* Origin: 30 */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+/** @file prdfPlatCalloutUtil.C */
+
+#include <prdfPlatCalloutUtil.H>
+
+// Framework includes
+#include <prdfErrlUtil.H>
+#include <prdfPfa5Data.h>
+#include <prdfPlatServices.H>
+
+// Pegasus includes
+#include <prdfMemoryMru.H>
+
+using namespace TARGETING;
+
+namespace PRDF
+{
+
+using namespace PlatServices;
+
+namespace CalloutUtil
+{
+
+void calloutMemoryMru( errlHndl_t io_errl, const MemoryMru & i_memmru,
+ const HWAS::callOutPriority i_priority,
+ const HWAS::DeconfigEnum i_deconfigState,
+ const HWAS::GARD_ErrorType i_gardType )
+{
+ // Add all parts to the error log.
+ TargetHandleList partList = i_memmru.getCalloutList();
+ for ( TargetHandleList::iterator it = partList.begin();
+ it != partList.end(); it++ )
+ {
+ io_errl->addHwCallout( *it, i_priority, i_deconfigState, i_gardType );
+ }
+
+ // Add the MemoryMru to the capture data.
+ uint32_t tmpMru = i_memmru.toUint32();
+ PRDF_ADD_FFDC( io_errl, &tmpMru, sizeof(tmpMru), ErrlVer1, ErrlMruData_1 );
+}
+
+} // end namespace CalloutUtil
+
+} // end namespace PRDF
+
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.H b/src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.H
new file mode 100644
index 000000000..17753da51
--- /dev/null
+++ b/src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.H
@@ -0,0 +1,64 @@
+/* IBM_PROLOG_BEGIN_TAG */
+/* This is an automatically generated prolog. */
+/* */
+/* $Source: src/usr/diag/prdf/plat/pegasus/prdfPlatCalloutUtil.H $ */
+/* */
+/* IBM CONFIDENTIAL */
+/* */
+/* COPYRIGHT International Business Machines Corp. 2012,2013 */
+/* */
+/* p1 */
+/* */
+/* Object Code Only (OCO) source materials */
+/* Licensed Internal Code Source Materials */
+/* IBM HostBoot Licensed Internal Code */
+/* */
+/* The source code for this program is not published or otherwise */
+/* divested of its trade secrets, irrespective of what has been */
+/* deposited with the U.S. Copyright Office. */
+/* */
+/* Origin: 30 */
+/* */
+/* IBM_PROLOG_END_TAG */
+
+#ifndef __prdfPlatCalloutUtil_H
+#define __prdfPlatCalloutUtil_H
+
+/** @file prdfPlatCalloutUtil.H */
+
+// Framework includes
+#include <prdfEnums.H>
+
+namespace PRDF
+{
+
+class MemoryMru;
+
+namespace CalloutUtil
+{
+
+/**
+ * @brief Add all parts of a MemoryMru to the callout list of an error log.
+ * Also, adds the MemoryMru info to the capture data.
+ *
+ * This is only intended to be used by non-attention analysis code like Restore
+ * DRAM Repairs or MNFG IPL CE analysis. In these cases, there is no SDC to
+ * collect the callout info or capture data.
+ *
+ * @param io_errl The target error log.
+ * @param i_memmru The target MemoryMru.
+ * @param i_priority The callout priority
+ * @param i_deconfigState The deconfiguration state.
+ * @param i_gardType The GARD error type.
+ */
+void calloutMemoryMru( errlHndl_t io_errl, const MemoryMru & i_memmru,
+ const HWAS::callOutPriority i_priority,
+ const HWAS::DeconfigEnum i_deconfigState,
+ const HWAS::GARD_ErrorType i_gardType );
+
+} // end namespace CalloutUtil
+
+} // end namespace PRDF
+
+#endif // __prdfPlatCalloutUtil_H
+
diff --git a/src/usr/diag/prdf/prdfErrlUtil.H b/src/usr/diag/prdf/prdfErrlUtil.H
index 9563758df..8089cabfb 100644
--- a/src/usr/diag/prdf/prdfErrlUtil.H
+++ b/src/usr/diag/prdf/prdfErrlUtil.H
@@ -32,10 +32,6 @@
* related declarations specific to hostboot.
*/
-/*--------------------------------------------------------------------*/
-/* Includes */
-/*--------------------------------------------------------------------*/
-#include <errl/errlmanager.H>
#include <prdfEnums.H>
/**
diff --git a/src/usr/diag/prdf/prdf_hb_only.mk b/src/usr/diag/prdf/prdf_hb_only.mk
index acb774ebe..a6d44d64c 100644
--- a/src/usr/diag/prdf/prdf_hb_only.mk
+++ b/src/usr/diag/prdf/prdf_hb_only.mk
@@ -33,5 +33,6 @@ PRDF_RULE_PLUGINS_PEGASUS_HB = \
################################################################################
prd_pegasus_specific_HB = \
- prdfDramRepairs.o
+ prdfDramRepairs.o \
+ prdfPlatCalloutUtil.o
OpenPOWER on IntegriCloud