diff options
author | Brian Silver <bsilver@us.ibm.com> | 2016-02-25 13:00:11 -0600 |
---|---|---|
committer | Daniel M. Crowell <dcrowell@us.ibm.com> | 2016-04-01 21:29:47 -0400 |
commit | cf867557080122d913017f1c6ae15fb648af2c38 (patch) | |
tree | ae0457c4269fde184af4e70c0e3725a667897a91 /src/import/chips/p9 | |
parent | ba2cdc25cb8894d3899b151b8e2a6c7745ee3e50 (diff) | |
download | talos-hostboot-cf867557080122d913017f1c6ae15fb648af2c38.tar.gz talos-hostboot-cf867557080122d913017f1c6ae15fb648af2c38.zip |
Change draminit_mc mcbist subtest to perform compares rather than ECC
Add checking for failed subtest
Add checking for UE and compare errors
Improve time out error handling
Change-Id: Iedcf3757407c0869d7aeda0d80d5fb6f84566f15
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/21326
Reviewed-by: Louis Stermole <stermole@us.ibm.com>
Tested-by: Jenkins Server
Tested-by: Hostboot CI
Reviewed-by: ANDRE A. MARIN <aamarin@us.ibm.com>
Reviewed-by: Jennifer A. Stofer <stofer@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/21356
Tested-by: FSP CI Jenkins
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Diffstat (limited to 'src/import/chips/p9')
-rw-r--r-- | src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.C | 36 | ||||
-rw-r--r-- | src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.H | 58 |
2 files changed, 83 insertions, 11 deletions
diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.C b/src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.C index c06df116a..c9ad1b4ad 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.C +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.C @@ -136,6 +136,7 @@ fapi2::ReturnCode execute( const fapi2::Target<TARGET_TYPE_MCBIST>& i_target, static const uint64_t l_done = fapi2::buffer<uint64_t>().setBit<TT::MCBIST_DONE>(); static const uint64_t l_fail = fapi2::buffer<uint64_t>().setBit<TT::MCBIST_FAIL>(); + static const uint64_t l_in_progress = fapi2::buffer<uint64_t>().setBit<TT::MCBIST_IN_PROGRESS>(); fapi2::buffer<uint64_t> l_status; @@ -170,22 +171,35 @@ fapi2::ReturnCode execute( const fapi2::Target<TARGET_TYPE_MCBIST>& i_target, return l_status.getBit<TT::MCBIST_IN_PROGRESS>() != 1; }); + // Check to see if we're still in progress - meaning we timed out. + FAPI_ASSERT((l_status & l_in_progress) != l_in_progress, + fapi2::MSS_MCBIST_TIMEOUT().set_TARGET_IN_ERROR(i_target), + "MCBIST timed out %s", mss::c_str(i_target)); + // The control register has a bit for done-and-happy and a bit for done-and-unhappy - if ((l_status & l_done) == l_done) + if ( ((l_status & l_done) == l_done) || ((l_status & l_fail) == l_fail) ) { - FAPI_DBG("MCBIST executed successfully."); - return fapi2::current_err; - } + FAPI_INF("MCBIST completed, processing errors"); - if ((l_status & l_fail) == l_fail) - { - FAPI_DBG("MCBIST failed execution."); - return fapi2::FAPI2_RC_FALSE; + // We're done. It doesn't mean that there were no errors. + FAPI_TRY( i_program.process_errors(i_target) ); + + // If we're here there were no errors, but lets report if the fail bit was set anyway. + FAPI_ASSERT( (l_status & l_fail) != l_fail, + fapi2::MSS_MCBIST_UNKNOWN_FAILURE() + .set_TARGET_IN_ERROR(i_target) + .set_STATUS_REGISTER(l_status), + "MCBIST reported a fail, but process_errors didn't find it 0x%016llx", l_status ); + + // And if we're here all is good with the world. + return fapi2::current_err; } - // So something set more than one bit in the control register? - FAPI_DBG("MCBIST executed <shrug>. Something's not good 0x%016llx", l_status); - return fapi2::FAPI2_RC_FALSE; + FAPI_ASSERT(false, + fapi2::MSS_MCBIST_MULTIPLE_FAIL_BITS() + .set_TARGET_IN_ERROR(i_target) + .set_STATUS_REGISTER(l_status), + "MCBIST executed <shrug>. Something's not good 0x%016llx", l_status ); fapi_try_exit: return fapi2::current_err; diff --git a/src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.H b/src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.H index 55d6f2b08..d93848f9f 100644 --- a/src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.H +++ b/src/import/chips/p9/procedures/hwp/memory/lib/mcbist/mcbist.H @@ -76,8 +76,10 @@ class mcbistTraits<fapi2::TARGET_TYPE_MCBIST> static const uint64_t CFGQ_REG = MCBIST_MCBCFGQ; static const uint64_t CNTLQ_REG = MCBIST_MCB_CNTLQ; static const uint64_t STATQ_REG = MCBIST_MCB_CNTLSTATQ; + static const uint64_t MCBSTATQ_REG = MCBIST_MCBSTATQ; static const uint64_t MCBPARMQ_REG = MCBIST_MCBPARMQ; static const uint64_t MCBAGRAQ_REG = MCBIST_MCBAGRAQ; + static const uint64_t SRERR_REG = MCBIST_MBSEC1Q; static const uint64_t MCBAMR0A0Q_REG = MCBIST_MCBAMR0A0Q; static const uint64_t MCBAMR1A0Q_REG = MCBIST_MCBAMR1A0Q; @@ -175,6 +177,11 @@ class mcbistTraits<fapi2::TARGET_TYPE_MCBIST> CFG_ENABLE_SPEC_ATTN = MCBIST_MCBCFGQ_CFG_ENABLE_SPEC_ATTN, CFG_ENABLE_HOST_ATTN = MCBIST_MCBCFGQ_CFG_ENABLE_HOST_ATTN, + LOGGED_ERROR_ON_PORT_INDICATOR = MCBIST_MCBSTATQ_MCBIST_LOGGED_ERROR_ON_PORT_INDICATOR, + LOGGED_ERROR_ON_PORT_INDICATOR_LEN = MCBIST_MCBSTATQ_MCBIST_LOGGED_ERROR_ON_PORT_INDICATOR_LEN, + + UE_COUNT = MCBIST_MBSEC1Q_UE_COUNT, + UE_COUNT_LEN = MCBIST_MBSEC1Q_UE_COUNT_LEN, }; }; @@ -760,6 +767,57 @@ class program return; } + /// + /// @brief Process mcbist errors + /// @param[in] i_target fapi2::Target<T> of the MCBIST + /// @return fapi2::ReturnCode FAPI2_RC_SUCCESS iff ok + /// + inline fapi2::ReturnCode process_errors( const fapi2::Target<T> i_target ) const + { + // TK: Check for more detailed errors + + // Until reading the error array is documented, comparison errors 'just' result in + // a flag indicating there was a problem on port. + { + fapi2::buffer<uint64_t> l_data; + uint64_t l_read = 0; + FAPI_TRY( mss::getScom(i_target, TT::MCBSTATQ_REG, l_data) ); + l_data.extractToRight<TT::LOGGED_ERROR_ON_PORT_INDICATOR, TT::LOGGED_ERROR_ON_PORT_INDICATOR_LEN>(l_read); + + FAPI_ASSERT( l_read == 0, + fapi2::MSS_MEMDIAGS_COMPARE_ERROR_IN_LAST_PATTERN() + .set_TARGET(i_target) + .set_PORT(mss::first_bit_set(l_read)), + "MCBIST error on port %d", mss::first_bit_set(l_read) ); + } + + // Check for UE errors + { + fapi2::buffer<uint64_t> l_data; + uint64_t l_read = 0; + FAPI_TRY( mss::getScom(i_target, TT::SRERR_REG, l_data) ); + + // For now, lets catch anything in the scrub/read error reg - not just UE +#ifdef CATCH_UE_ONLY + l_data.extractToRight<TT::UE_COUNT, TT::UE_COUNT_LEN>(l_read); +#else + l_read = l_data; +#endif + + FAPI_ASSERT( l_read == 0, + fapi2::MSS_MEMDIAGS_UE_OR_SUE_IN_LAST_PATTERN() + .set_TARGET(i_target) + .set_STATUS(l_read), + "MCBIST scrub/read error 0x%016lx", l_read ); + } + + FAPI_INF("Execution success - no errors seen from MCBIST program"); + + fapi_try_exit: + return fapi2::current_err; + } + + // Vector of subtests. Note the MCBIST subtests are spread across // 8 registers - 4 subtests fit in one 64b register // (16 bits/test, 4 x 16 == 64, 4x8 = 32 subtests) |