summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZane Shelley <zshelle@us.ibm.com>2018-05-09 16:33:01 -0500
committerZane C. Shelley <zshelle@us.ibm.com>2018-06-03 16:38:16 -0400
commit9192da4a49db30aa2cba4f6376e5ea8b6449de67 (patch)
tree50fbc6e68bb9d7214f64b9f979ba5db6958f0c73
parentb14259e6b41abdf7a94b8c96b743d28071900549 (diff)
downloadtalos-hostboot-9192da4a49db30aa2cba4f6376e5ea8b6449de67.tar.gz
talos-hostboot-9192da4a49db30aa2cba4f6376e5ea8b6449de67.zip
PRD: refined handleChnlFail() for Centaur
Change-Id: I698b0320a51cb9452a6b0e661c6f3c24a8f70b78 RTC: 136123 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58607 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com> Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com> Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/59762 Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com> Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
-rw-r--r--src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C27
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/mem/prdfMemUtils.C168
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/mem/prdfMemUtils.H28
-rw-r--r--src/usr/diag/prdf/common/plat/p9/prdfP9Dmi_common.C17
-rw-r--r--src/usr/diag/prdf/common/plat/p9/prdfP9Mc_common.C17
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C79
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfP8Mcs.C23
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C8
-rw-r--r--src/usr/diag/prdf/common/plat/prdfPlatServices_common.C38
-rwxr-xr-xsrc/usr/diag/prdf/common/plat/prdfPlatServices_common.H11
-rw-r--r--src/usr/diag/prdf/plat/pegasus/prdfPlatCenMba_ipl.C97
-rw-r--r--src/usr/diag/prdf/prdf_hb_only.mk1
12 files changed, 252 insertions, 262 deletions
diff --git a/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C b/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C
index e34b40591..75cefe21e 100644
--- a/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C
+++ b/src/usr/diag/prdf/common/plat/cen/prdfCenMembuf_common.C
@@ -64,6 +64,23 @@ int32_t Initialize( ExtensibleChip * i_chip )
PRDF_PLUGIN_DEFINE( cen_centaur, Initialize );
/**
+ * @brief Analysis code that is called before the main analyze() function.
+ * @param i_chip A MEMBUF chip.
+ * @param io_sc The step code data struct.
+ * @param o_analyzed True if analysis is done on this chip, false otherwise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+int32_t PreAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_analyzed )
+{
+ o_analyzed = false;
+
+ // Check for a channel failure before analyzing this chip.
+ return MemUtils::handleChnlFail<TYPE_MEMBUF>( i_chip, io_sc );
+}
+PRDF_PLUGIN_DEFINE( cen_centaur, PreAnalysis );
+
+/**
* @brief Plugin function called after analysis is complete but before PRD
* exits.
* @param i_chip A MEMBUF chip.
@@ -108,11 +125,15 @@ int32_t analyzeDmiChnlFail( ExtensibleChip * i_mbChip,
do
{
+ // Query the connected DMI for channel fail attentions.
ExtensibleChip * dmiChip = getConnectedParent( i_mbChip, TYPE_DMI );
-
- // TODO: RTC 136123 Need to call new interface that queries if there was
- // a channel fail attention on the other side of the interface.
bool dmiChnlFail = false;
+ if ( SUCCESS != queryChnlFail<TYPE_DMI>(dmiChip, dmiChnlFail) )
+ {
+ PRDF_ERR( PRDF_FUNC "queryChnlFail(0x%08x) failed",
+ dmiChip->getHuid() );
+ break;
+ }
// If there is a channel fail attention on the other side of the bus,
// analyze the DMI target.
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C
index d2dfea2bd..3e3743dd7 100755
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.C
@@ -506,69 +506,153 @@ void cleanupChnlAttns<TYPE_MEMBUF>( ExtensibleChip * i_chip,
//------------------------------------------------------------------------------
-/* TODO RTC 136123
-int32_t checkMcsChannelFail( ExtensibleChip * i_mcsChip,
- STEP_CODE_DATA_STRUCT & io_sc )
+template<TARGETING::TYPE T>
+uint32_t __queryChnlFail( ExtensibleChip * i_chip, bool & o_chnlFail );
+
+template<>
+uint32_t __queryChnlFail<TYPE_MEMBUF>( ExtensibleChip * i_chip,
+ bool & o_chnlFail )
{
- #define PRDF_FUNC "[MemUtils::checkMcsChannelFail] "
+ #define PRDF_FUNC "[MemUtils::__queryChnlFail] "
- int32_t o_rc = SUCCESS;
+ PRDF_ASSERT( nullptr != i_chip );
+ PRDF_ASSERT( TYPE_MEMBUF == i_chip->getType() );
- do
- {
- // Skip if already handling unit checkstop.
- if ( io_sc.service_data->IsUnitCS() )
- break;
+ uint32_t o_rc = SUCCESS;
- // Must be an MCS.
- if ( TYPE_MCS != getTargetType(i_mcsChip->GetChipHandle()) )
- {
- PRDF_ERR( PRDF_FUNC "i_mcsChip is not TYPE_MCS" );
- o_rc = FAIL; break;
- }
+ o_chnlFail = false;
- // Check MCIFIR[31] for presence of channel fail.
- SCAN_COMM_REGISTER_CLASS * mcifir = i_mcsChip->getRegister("MCIFIR");
- o_rc = mcifir->Read();
+ do
+ {
+ // Simply check the Centaur CS global reg for active attentions.
+ SCAN_COMM_REGISTER_CLASS * fir = i_chip->getRegister("GLOBAL_CS_FIR");
+ o_rc = fir->Read();
if ( SUCCESS != o_rc )
{
- PRDF_ERR( PRDF_FUNC "Read() failed on MCIFIR" );
+ PRDF_ERR( PRDF_FUNC "Failed to read GLOBAL_CS_FIR on 0x%08x",
+ i_chip->getHuid() );
break;
}
- if ( !mcifir->IsBitSet(31) ) break; // No channel fail, so exit.
+ o_chnlFail = !fir->BitStringIsZero();
- // Set unit checkstop flag and cause attention type.
- io_sc.service_data->setFlag(ServiceDataCollector::UNIT_CS);
- io_sc.service_data->setSecondaryAttnType(UNIT_CS);
- io_sc.service_data->SetThresholdMaskId(0);
+ } while (0);
- // Set it as SUE generation point.
- io_sc.service_data->SetUERE();
+ return o_rc;
- // Indicate that cleanup is required.
- P8McsDataBundle * mcsdb = getMcsDataBundle( i_mcsChip );
- ExtensibleChip * membChip = mcsdb->getMembChip();
- if ( NULL == membChip )
- {
- PRDF_ERR( PRDF_FUNC "getMembChip() returned NULL" );
- o_rc = FAIL; break;
- }
- MembufDataBundle * mbdb = getMembufDataBundle( membChip );
- mbdb->iv_doChnlFailCleanup = true;
+ #undef PRDF_FUNC
+}
+
+template<>
+uint32_t __queryChnlFail<TYPE_DMI>( ExtensibleChip * i_chip, bool & o_chnlFail )
+{
+ // There is a HWP on the processor side that will query the CHIFIR, IOMCFIR,
+ // and associated configuration registers for a valid channel failure
+ // attention.
+ return PlatServices::queryChnlFail<TYPE_DMI>( i_chip, o_chnlFail );
+}
+
+//------------------------------------------------------------------------------
+
+template<TARGETING::TYPE T>
+void __setChnlFailCleanup( ExtensibleChip * i_chip );
+
+template<>
+void __setChnlFailCleanup<TYPE_MEMBUF>( ExtensibleChip * i_chip )
+{
+ PRDF_ASSERT( nullptr != i_chip );
+ PRDF_ASSERT( TYPE_MEMBUF == i_chip->getType() );
+
+ getMembufDataBundle(i_chip)->iv_doChnlFailCleanup = true;
+}
+
+template<>
+void __setChnlFailCleanup<TYPE_DMI>( ExtensibleChip * i_chip )
+{
+ PRDF_ASSERT( nullptr != i_chip );
+ PRDF_ASSERT( TYPE_DMI == i_chip->getType() );
+
+ ExtensibleChip * membChip = getConnectedChild( i_chip, TYPE_MEMBUF, 0 );
+ PRDF_ASSERT( nullptr != membChip ); // shouldn't be possible
+
+ __setChnlFailCleanup<TYPE_MEMBUF>( membChip );
+}
+
+//------------------------------------------------------------------------------
+
+template<TARGETING::TYPE T>
+uint32_t handleChnlFail( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ PRDF_ASSERT( nullptr != i_chip );
+ PRDF_ASSERT( T == i_chip->getType() );
+
+ uint32_t o_rc = SUCCESS;
+
+ do
+ {
+ // Skip if already handling channel failure.
+ if ( io_sc.service_data->IsUnitCS() ) break;
+
+ // Skip if currently analyzing a host attention. This is a required for
+ // a rare scenario when a channel failure occurs after PRD is called to
+ // handle the host attention.
+ if ( HOST_ATTN == io_sc.service_data->getPrimaryAttnType() ) break;
+
+ // Look for the channel fail attention.
+ bool isChnlFail = false;
+ uint32_t o_rc = __queryChnlFail<T>( i_chip, isChnlFail );
+ if ( SUCCESS != o_rc ) break;
+
+ if ( ! isChnlFail ) break; // No channel fail, nothing more to do.
+
+ // Change the secondary attention type to UNIT_CS so the rule code will
+ // start looking for UNIT_CS attentions instead of recoverable.
+ io_sc.service_data->setSecondaryAttnType( UNIT_CS );
+
+ // Set the UNIT_CS flag in the SDC to indicate a channel failure has
+ // been detected and there is no need to check again.
+ io_sc.service_data->setFlag( ServiceDataCollector::UNIT_CS );
+
+ // Make the error log predictive and set threshold.
+ io_sc.service_data->setFlag( ServiceDataCollector::SERVICE_CALL );
+ io_sc.service_data->setFlag( ServiceDataCollector::AT_THRESHOLD );
+
+ // Channel failures will always send SUEs.
+ io_sc.service_data->setFlag( ServiceDataCollector::UERE );
+
+ // Indicate cleanup is required on this channel.
+ __setChnlFailCleanup<T>( i_chip );
} while (0);
- if ( SUCCESS != o_rc )
+ return o_rc;
+}
+
+template
+uint32_t handleChnlFail<TYPE_MEMBUF>( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc );
+template
+uint32_t handleChnlFail<TYPE_DMI>( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc );
+
+template<>
+uint32_t handleChnlFail<TYPE_MC>( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc )
+{
+ PRDF_ASSERT( nullptr != i_chip );
+ PRDF_ASSERT( TYPE_MC == i_chip->getType() );
+
+ uint32_t o_rc = SUCCESS;
+
+ for ( auto & dmiChip : getConnected(i_chip, TYPE_DMI) )
{
- PRDF_ERR( PRDF_FUNC "Failed: i_mcsChip=0x%08x", i_mcsChip->GetId() );
+ o_rc = handleChnlFail<TYPE_DMI>( dmiChip, io_sc );
+ if ( SUCCESS != o_rc ) break;
}
return o_rc;
-
- #undef PRDF_FUNC
}
-*/
//------------------------------------------------------------------------------
diff --git a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H
index 64e04baac..82a77a95b 100755
--- a/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H
+++ b/src/usr/diag/prdf/common/plat/mem/prdfMemUtils.H
@@ -110,15 +110,6 @@ template<TARGETING::TYPE T>
uint8_t getDramSize( ExtensibleChip * i_chip, uint8_t i_dimmSlct = 0 );
/**
- * @brief Check for channel fail attentions on the MCS side of the DMI bus.
- * @param i_mcsChip An MCS chip.
- * @param io_sc The step code data struct.
- * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
- */
-int32_t checkMcsChannelFail( ExtensibleChip * i_mcsChip,
- STEP_CODE_DATA_STRUCT & io_sc );
-
-/**
* @brief determines the type of Centaur based raw card associated with MBA.
* @param i_mba mba target
* @param o_type raw card type.
@@ -141,6 +132,25 @@ template<TARGETING::TYPE T>
void cleanupChnlAttns( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc );
/**
+ * @brief Checks for channel fail attentions on the target side of the bus.
+ * @note If a channel fail attention is present, this function will set the
+ * secondary attention type to UNIT_CS, among other appropriate flags
+ * in the SDC. It is important that this is called in the PreAnalysis
+ * plugin so that PRD knows to look for UNIT_CS attentions instead of
+ * recoverable attentions.
+ * @note This only checks one side of the bus. It does not check both sides.
+ * @note If the secondary attention type is HOST_ATTN, the function exits and
+ * waits for the next attention. This is for the rare scenario where a
+ * channel failure occurs after PRD is called to handle a HOST_ATTN.
+ * @param i_chip MEMBUF, DMI, or MC chip.
+ * @param io_sc The step code data struct.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+template<TARGETING::TYPE T>
+uint32_t handleChnlFail( ExtensibleChip * i_chip,
+ STEP_CODE_DATA_STRUCT & io_sc );
+
+/**
* @brief After analyzing a memory channel failure, we want to mask off all
* possible attentions on that channel to ensure we don't get any errant
* attentions.
diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9Dmi_common.C b/src/usr/diag/prdf/common/plat/p9/prdfP9Dmi_common.C
index 801b25f89..ffd874ace 100644
--- a/src/usr/diag/prdf/common/plat/p9/prdfP9Dmi_common.C
+++ b/src/usr/diag/prdf/common/plat/p9/prdfP9Dmi_common.C
@@ -50,6 +50,23 @@ namespace p9_dmi
//##############################################################################
/**
+ * @brief Analysis code that is called before the main analyze() function.
+ * @param i_chip A DMI chip.
+ * @param io_sc The step code data struct.
+ * @param o_analyzed True if analysis is done on this chip, false otherwise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+int32_t PreAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_analyzed )
+{
+ o_analyzed = false;
+
+ // Check for a channel failure before analyzing this chip.
+ return MemUtils::handleChnlFail<TYPE_DMI>( i_chip, io_sc );
+}
+PRDF_PLUGIN_DEFINE( p9_dmi, PreAnalysis );
+
+/**
* @brief Plugin function called after analysis is complete but before PRD
* exits.
* @param i_chip A DMI chip.
diff --git a/src/usr/diag/prdf/common/plat/p9/prdfP9Mc_common.C b/src/usr/diag/prdf/common/plat/p9/prdfP9Mc_common.C
index 50ad49f53..e4fd01979 100644
--- a/src/usr/diag/prdf/common/plat/p9/prdfP9Mc_common.C
+++ b/src/usr/diag/prdf/common/plat/p9/prdfP9Mc_common.C
@@ -50,6 +50,23 @@ namespace p9_mc
//##############################################################################
/**
+ * @brief Analysis code that is called before the main analyze() function.
+ * @param i_chip An MC chip.
+ * @param io_sc The step code data struct.
+ * @param o_analyzed True if analysis is done on this chip, false otherwise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+int32_t PreAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc,
+ bool & o_analyzed )
+{
+ o_analyzed = false;
+
+ // Check for a channel failure before analyzing this chip.
+ return MemUtils::handleChnlFail<TYPE_MC>( i_chip, io_sc );
+}
+PRDF_PLUGIN_DEFINE( p9_mc, PreAnalysis );
+
+/**
* @brief Plugin function called after analysis is complete but before PRD
* exits.
* @param i_chip An MC chip.
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C
index 95dcc0c22..4ef094ccb 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfCenMembuf.C
@@ -70,20 +70,6 @@ namespace Membuf
//##############################################################################
/**
- * @brief Plugin that initializes the P8 Centaur Membuf data bundle.
- * @param i_mbaChip A Centaur Membuf chip.
- * @return SUCCESS
- */
-int32_t Initialize( ExtensibleChip * i_mbaChip )
-{
- i_mbaChip->getDataBundle() = new CenMembufDataBundle( i_mbaChip );
- return SUCCESS;
-}
-PRDF_PLUGIN_DEFINE( Membuf, Initialize );
-
-//------------------------------------------------------------------------------
-
-/**
* @fn CheckForRecovered
* @brief Used when the chip has a CHECK_STOP attention to check for the
* presence of recovered errors.
@@ -118,71 +104,6 @@ int32_t CheckForRecovered(ExtensibleChip * i_chip,
} PRDF_PLUGIN_DEFINE( Membuf, CheckForRecovered );
-//------------------------------------------------------------------------------
-
-/**
- * @brief Analysis code that is called before the main analyze() function.
- * @param i_mbChip A MEMBUF chip.
- * @param i_sc Step Code Data structure
- * @param o_analyzed TRUE if analysis has been done on this chip
- * @return failure or success
- */
-int32_t PreAnalysis( ExtensibleChip * i_mbChip, STEP_CODE_DATA_STRUCT & i_sc,
- bool & o_analyzed )
-{
- #define PRDF_FUNC "[Membuf::PreAnalysis] "
-
- int32_t o_rc = SUCCESS;
-
- o_analyzed = false;
-
- // Check for a Centaur Checkstop
- do
- {
- // Skip if we're already analyzing a unit checkstop
- if ( i_sc.service_data->IsUnitCS() )
- break;
-
- // Skip if we're analyzing a special attention.
- // This is a required for a rare scenario when Centaur CS bit comes
- // up after attention has called PRD and PRD was still at start of
- // analysis.
- if ( SPECIAL == i_sc.service_data->getPrimaryAttnType() )
- break;
-
- // MCIFIR[31] is not always reliable if the unit CS originated on the
- // Centaur. This is due to packets not getting forwarded to the MCS.
- // Instead, check for non-zero GLOBAL_CS_FIR.
-
- SCAN_COMM_REGISTER_CLASS * fir = i_mbChip->getRegister("GLOBAL_CS_FIR");
- o_rc = fir->Read();
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( PRDF_FUNC "Failed to read GLOBAL_CS_FIR on 0x%08x",
- i_mbChip->GetId() );
- break;
- }
-
- if ( fir->BitStringIsZero() ) break; // No unit checkstop
-
- // Set Unit checkstop flag
- i_sc.service_data->setFlag(ServiceDataCollector::UNIT_CS);
- i_sc.service_data->SetThresholdMaskId(0);
-
- // Set the cause attention type
- i_sc.service_data->setSecondaryAttnType(UNIT_CS);
-
- // Indicate that cleanup is required.
- mbdb->iv_doChnlFailCleanup = true;
-
- } while (0);
-
- return o_rc;
-
- #undef PRDF_FUNC
-}
-PRDF_PLUGIN_DEFINE( Membuf, PreAnalysis );
-
//##############################################################################
//
// DMIFIR
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Mcs.C b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Mcs.C
index 7b1dddc4f..7a35d87cb 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Mcs.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Mcs.C
@@ -73,29 +73,6 @@ int32_t Initialize( ExtensibleChip * i_mcsChip )
PRDF_PLUGIN_DEFINE( Mcs, Initialize );
/**
- * @brief Analysis code that is called before the main analyze() function.
- * @param i_mcsChip An MCS chip.
- * @param i_sc Step Code Data structure
- * @param o_analyzed TRUE if analysis has been done on this chip
- * @return failure or success
- */
-int32_t PreAnalysis( ExtensibleChip * i_mcsChip, STEP_CODE_DATA_STRUCT & i_sc,
- bool & o_analyzed )
-{
- o_analyzed = false;
-
- // Check for a Centaur Checkstop
- int32_t o_rc = MemUtils::checkMcsChannelFail( i_mcsChip, i_sc );
- if ( SUCCESS != o_rc )
- {
- PRDF_ERR( "[Mcs::PreAnalysis] MemUtils::checkMcsChannelFail() failed" );
- }
-
- return o_rc;
-}
-PRDF_PLUGIN_DEFINE( Mcs, PreAnalysis );
-
-/**
* @fn ClearMbsSecondaryBits
* @brief Clears MBS secondary Fir bits which may come up because of MCIFIR
* @param i_chip The Mcs chip.
diff --git a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C
index 1a9f5573f..75f7ebf9b 100755
--- a/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C
+++ b/src/usr/diag/prdf/common/plat/pegasus/prdfP8Proc.C
@@ -439,14 +439,6 @@ int32_t maxSparesExceeded_MCS( ExtensibleChip * i_procChip,
l_rc = FAIL; break;
}
- // Check for channel fails on the MCS side of this bus.
- l_rc = MemUtils::checkMcsChannelFail( mcsChip, i_sc );
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "checkMcsChannelFail() failed" );
- break;
- }
-
// Do additional bus analysis.
l_rc = handleLaneRepairEvent( i_procChip, TYPE_MCS, i_mcsPos, i_sc,
false );
diff --git a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C
index 044bbf12b..cf2e0462d 100644
--- a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C
+++ b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.C
@@ -39,6 +39,10 @@
#include <prdfTrace.H>
#include <prdfErrlUtil.H>
+#ifdef __HOSTBOOT_MODULE // TODO SW431530
+#include <p9c_query_channel_failure.H>
+#endif
+
#ifdef __HOSTBOOT_MODULE
#include <dimmBadDqBitmapFuncs.H>
#include <p9_io_xbus_read_erepair.H>
@@ -678,6 +682,40 @@ void getDimmDqAttr<TYPE_DIMM>( TargetHandle_t i_target,
} // end function getDimmDqAttr
//------------------------------------------------------------------------------
+
+template<>
+uint32_t queryChnlFail<TYPE_DMI>( ExtensibleChip * i_chip, bool & o_chnlFail )
+{
+ #define PRDF_FUNC "[PlatServices::queryChnlFail] "
+
+ PRDF_ASSERT( nullptr != i_chip );
+ PRDF_ASSERT( TYPE_DMI == i_chip->getType() );
+
+ uint32_t o_rc = SUCCESS;
+
+#ifdef __HOSTBOOT_MODULE // TODO SW431530
+ errlHndl_t errl = nullptr;
+
+ fapi2::Target<fapi2::TARGET_TYPE_DMI> fapiTrgt ( i_chip->getTrgt() );
+
+ FAPI_INVOKE_HWP( errl, p9c_query_channel_failure, fapiTrgt, o_chnlFail );
+ if ( nullptr != errl )
+ {
+ PRDF_ERR( PRDF_FUNC "p9c_query_channel_failure(0x%08x) failed",
+ i_chip->getHuid() );
+ PRDF_COMMIT_ERRL( errl, ERRL_ACTION_REPORT );
+ o_rc = FAIL;
+ }
+#else
+ PRDF_ERR( PRDF_FUNC "p9c_query_channel_failure() not supported yet" );
+#endif
+
+ return o_rc;
+
+ #undef PRDF_FUNC
+}
+
+//------------------------------------------------------------------------------
// Constants defined from Serial Presence Detect (SPD) specs
//---------------------------------------------------------------------
const uint8_t SPD_IDX_MODSPEC_COM_REF_BASIC_MEMORY_TYPE = 0x02;
diff --git a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H
index 9db6992aa..c692e7246 100755
--- a/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H
+++ b/src/usr/diag/prdf/common/plat/prdfPlatServices_common.H
@@ -319,7 +319,6 @@ int32_t getDimmSpareConfig( TARGETING::TargetHandle_t i_mba, CenRank i_rank,
int32_t getMemBufRawCardType( TARGETING::TargetHandle_t i_mbaTarget,
CEN_SYMBOL::WiringType & o_wiringType );
-
/**
* @brief get DIMM DQ map from FAPI routines
* @param i_target DIMM target
@@ -330,6 +329,16 @@ template <TARGETING::TYPE>
void getDimmDqAttr( TARGETING::TargetHandle_t i_target,
uint8_t (&io_dqMapPtr)[DQS_PER_DIMM] );
+/**
+ * @brief Queries for a channel fail on the given target.
+ * @param i_chip DMI chip.
+ * @param o_chnlFail True if a channel failure has been detected on this
+ * target. False, otherwise.
+ * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise.
+ */
+template<TARGETING::TYPE>
+uint32_t queryChnlFail( ExtensibleChip * i_chip, bool & o_chnlFail );
+
//##############################################################################
//## util functions
//##############################################################################
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfPlatCenMba_ipl.C b/src/usr/diag/prdf/plat/pegasus/prdfPlatCenMba_ipl.C
deleted file mode 100644
index a38604aa4..000000000
--- a/src/usr/diag/prdf/plat/pegasus/prdfPlatCenMba_ipl.C
+++ /dev/null
@@ -1,97 +0,0 @@
-/* IBM_PROLOG_BEGIN_TAG */
-/* This is an automatically generated prolog. */
-/* */
-/* $Source: src/usr/diag/prdf/plat/pegasus/prdfPlatCenMba_ipl.C $ */
-/* */
-/* OpenPOWER HostBoot Project */
-/* */
-/* Contributors Listed Below - COPYRIGHT 2014,2015 */
-/* [+] International Business Machines Corp. */
-/* */
-/* */
-/* Licensed under the Apache License, Version 2.0 (the "License"); */
-/* you may not use this file except in compliance with the License. */
-/* You may obtain a copy of the License at */
-/* */
-/* http://www.apache.org/licenses/LICENSE-2.0 */
-/* */
-/* Unless required by applicable law or agreed to in writing, software */
-/* distributed under the License is distributed on an "AS IS" BASIS, */
-/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
-/* implied. See the License for the specific language governing */
-/* permissions and limitations under the License. */
-/* */
-/* IBM_PROLOG_END_TAG */
-
-/** @file prdfCenMba.C
- * @brief Contains all Hostboot specific plugin code for the PRD Centaur MBA.
- */
-
-// Framework includes
-#include <iipServiceDataCollector.h>
-#include <prdfExtensibleChip.H>
-#include <prdfPlatServices.H>
-#include <prdfPluginMap.H>
-
-// Pegasus includes
-#include <prdfCenMbaCaptureData.H>
-#include <prdfCenMbaDataBundle.H>
-
-using namespace TARGETING;
-
-namespace PRDF
-{
-
-using namespace PlatServices;
-
-namespace Mba
-{
-
-//##############################################################################
-//
-// Special plugins
-//
-//##############################################################################
-
-/**
- * @brief Plugin function called after analysis is complete but before PRD
- * exits.
- * @param i_mbaChip A Centaur MBA chip.
- * @param i_sc The step code data struct.
- * @note This is especially useful for any analysis that still needs to be
- * done after the framework clears the FIR bits that were at attention.
- * @return SUCCESS.
- */
-int32_t PostAnalysis( ExtensibleChip * i_mbaChip,
- STEP_CODE_DATA_STRUCT & i_sc )
-{
- #define PRDF_FUNC "[Mba::PostAnalysis] "
-
- // Send command complete to MDIA.
- // This must be done in post analysis after attentions have been cleared.
-
- TargetHandle_t mbaTarget = i_mbaChip->GetChipHandle();
- CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip );
-
- if ( mbadb->iv_sendCmdCompleteMsg )
- {
- mbadb->iv_sendCmdCompleteMsg = false;
-
- int32_t l_rc = mdiaSendEventMsg( mbaTarget,
- mbadb->iv_cmdCompleteMsgData );
- if ( SUCCESS != l_rc )
- {
- PRDF_ERR( PRDF_FUNC "PlatServices::mdiaSendEventMsg() failed" );
- }
- }
-
- return SUCCESS; // Intentionally return SUCCESS for this plugin
-
- #undef PRDF_FUNC
-}
-PRDF_PLUGIN_DEFINE( Mba, PostAnalysis );
-
-} // end namespace Mba
-
-} // end namespace PRDF
-
diff --git a/src/usr/diag/prdf/prdf_hb_only.mk b/src/usr/diag/prdf/prdf_hb_only.mk
index 9c3b9a4b7..5f9aed43c 100644
--- a/src/usr/diag/prdf/prdf_hb_only.mk
+++ b/src/usr/diag/prdf/prdf_hb_only.mk
@@ -172,6 +172,7 @@ prd_obj_no_sim += p9_io_xbus_pdwn_lanes.o
prd_vpath += ${ROOTPATH}/src/import/chips/centaur/procedures/hwp/memory
prd_obj_no_sim += p9c_mss_maint_cmds.o
prd_obj_no_sim += p9c_dimmBadDqBitmapFuncs.o
+prd_obj_no_sim += p9c_query_channel_failure.o
################################################################################
# The following are hardware procedure utilities that we are pulling into the
OpenPOWER on IntegriCloud