diff options
author | Joachim Fenkes <fenkes@de.ibm.com> | 2017-03-01 01:18:19 +0100 |
---|---|---|
committer | Sachin Gupta <sgupta2m@in.ibm.com> | 2017-03-07 08:38:05 -0500 |
commit | c25169e141cdc2654958a427e10c1e5847b450b3 (patch) | |
tree | cc5542ccc7d3177493ddbd5284ba0c0a2a550b0d | |
parent | b67458b8c4f546adc7aa41090933a571dfeaa080 (diff) | |
download | talos-sbe-c25169e141cdc2654958a427e10c1e5847b450b3.tar.gz talos-sbe-c25169e141cdc2654958a427e10c1e5847b450b3.zip |
p9_fastarray: Fix hang, add timeout
After stopping core clocks, the core vital fence is raised and therefore
ABIST_DONE cannot be observed via CPLT_STAT0, so the last fastarray update
caused fastarray_cleanup to loop forever. My testing procedure didn't set
that fence so I didn't catch this mistake.
Lower the vital fence around completing ABIST in fastarray_cleanup,
and also add a timeout to cleanup and abist_catchup to be safe.
Change-Id: I829943951d31619a6bbe085ef97aff5a7c78a307
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/37229
Dev-Ready: Joseph J. McGill <jmcgill@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: PPE CI <ppe-ci+hostboot@us.ibm.com>
Reviewed-by: Kevin F. Reick <reick@us.ibm.com>
Reviewed-by: Joseph J. McGill <jmcgill@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/37232
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Sachin Gupta <sgupta2m@in.ibm.com>
3 files changed, 35 insertions, 6 deletions
diff --git a/src/import/chips/p9/procedures/hwp/perv/p9_sbe_fastarray_abist_catchup.C b/src/import/chips/p9/procedures/hwp/perv/p9_sbe_fastarray_abist_catchup.C index c3b83538..4c3dcfbe 100644 --- a/src/import/chips/p9/procedures/hwp/perv/p9_sbe_fastarray_abist_catchup.C +++ b/src/import/chips/p9/procedures/hwp/perv/p9_sbe_fastarray_abist_catchup.C @@ -57,11 +57,21 @@ fapi2::ReturnCode p9_sbe_fastarray_abist_catchup( /* If we clocked more than a single cycle, do due diligence and wait for OPCG_DONE */ if( i_clockCyclesMinusOne ) { - do + uint32_t l_timeout = 100; + + while (--l_timeout) { FAPI_TRY(fapi2::getScom(i_target_chiplet, PERV_CPLT_STAT0, l_cc_buf), "Failed to read Chiplet Status 0 Register"); + + if (l_cc_buf.getBit<PERV_1_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC>()) + { + break; + } + + fapi2::delay(1000000, 100000); } - while (!l_cc_buf.getBit<PERV_1_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC>()); + + FAPI_ASSERT(l_timeout, fapi2::FASTARRAY_CLOCK_TIMEOUT(), "Clocking ABIST cycles timed out"); } return fapi2::FAPI2_RC_SUCCESS; diff --git a/src/import/chips/p9/procedures/hwp/perv/p9_sbe_fastarray_cleanup.C b/src/import/chips/p9/procedures/hwp/perv/p9_sbe_fastarray_cleanup.C index c0b18fcd..a2a059ce 100644 --- a/src/import/chips/p9/procedures/hwp/perv/p9_sbe_fastarray_cleanup.C +++ b/src/import/chips/p9/procedures/hwp/perv/p9_sbe_fastarray_cleanup.C @@ -53,13 +53,28 @@ fapi2::ReturnCode p9_sbe_fastarray_cleanup( { fapi2::buffer<uint64_t> l_buf; + /* Drop vital fence so that we can see the ABIST_DONE signal */ + FAPI_TRY(fapi2::putScom(i_target_chiplet, PERV_CPLT_CTRL1_CLEAR, 0x1000000000000000), "Failed to drop vitl fence"); + /* Let ABIST engines run to completion */ - do { - FAPI_TRY(p9_sbe_fastarray_abist_catchup(i_target_chiplet, 0xFFF), "Failed to clock ABIST to completion"); - FAPI_TRY(fapi2::getScom(i_target_chiplet, PERV_CPLT_STAT0, l_buf), "Failed to read Chiplet Status 0 Register"); + uint32_t l_timeout = 16; + + do + { + FAPI_TRY(p9_sbe_fastarray_abist_catchup(i_target_chiplet, 0xFFF), "Failed to clock ABIST to completion"); + FAPI_TRY(fapi2::getScom(i_target_chiplet, PERV_CPLT_STAT0, l_buf), "Failed to read Chiplet Status 0 Register"); + } + while (--l_timeout && !l_buf.getBit<PERV_1_CPLT_STAT0_SRAM_ABIST_DONE_DC>()); + + if (!l_timeout) + { + FAPI_ERR("Warning: ABIST_DONE not seen! Your dump is probably fine, but this is unexpected."); + } } - while (!l_buf.getBit<PERV_1_CPLT_STAT0_SRAM_ABIST_DONE_DC>()); + + /* Raise the vital fence back up */ + FAPI_TRY(fapi2::putScom(i_target_chiplet, PERV_CPLT_CTRL1_OR, 0x1000000000000000), "Failed to raise vitl fence"); /* Disable ABIST and clock engines so they can cleanly reset */ l_buf = 0; diff --git a/src/import/chips/p9/procedures/xml/error_info/p9_fastarray.xml b/src/import/chips/p9/procedures/xml/error_info/p9_fastarray.xml index f34b098f..85e1b2cd 100644 --- a/src/import/chips/p9/procedures/xml/error_info/p9_fastarray.xml +++ b/src/import/chips/p9/procedures/xml/error_info/p9_fastarray.xml @@ -61,6 +61,10 @@ <rc>RC_FASTARRAY_HEADER_CHECK_FAILED</rc> <description>The header check failed after a scan operation</description> </hwpError> + <hwpError> + <rc>RC_FASTARRAY_CLOCK_TIMEOUT</rc> + <description>An attempt to clock ABIST cycles did not complete within 100ms</description> + </hwpError> <!-- template for copying <hwpError> <rc>RC_FASTARRAY_</rc> |