From ec0caeeb0d414fa307fd0893c6bdb74959129eb6 Mon Sep 17 00:00:00 2001 From: "CHRISTINA L. GRAVES" Date: Thu, 1 Dec 2016 23:01:39 -0600 Subject: p9_pba_coherent_utils -- add PIB abort error handling for Cronus platform Change-Id: I64d748c70bbc4a4b3934b64d5d6202c83b21c2db RTC: 167768 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/33322 Tested-by: Jenkins Server Tested-by: PPE CI Reviewed-by: CHRISTINA L. GRAVES Reviewed-by: Thi N. Tran Reviewed-by: Joseph J. McGill Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/33323 Reviewed-by: Hostboot Team Tested-by: FSP CI Jenkins Reviewed-by: Sachin Gupta --- .../p9/procedures/hwp/nest/p9_pba_coherent_utils.C | 231 ++++++++++++++++++++- .../p9/procedures/hwp/nest/p9_pba_coherent_utils.H | 27 ++- .../p9/procedures/xml/error_info/p9_pba_errors.xml | 24 ++- 3 files changed, 271 insertions(+), 11 deletions(-) diff --git a/src/import/chips/p9/procedures/hwp/nest/p9_pba_coherent_utils.C b/src/import/chips/p9/procedures/hwp/nest/p9_pba_coherent_utils.C index 4f1836c1..0f297ac2 100644 --- a/src/import/chips/p9/procedures/hwp/nest/p9_pba_coherent_utils.C +++ b/src/import/chips/p9/procedures/hwp/nest/p9_pba_coherent_utils.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER sbe Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2015,2016 */ +/* Contributors Listed Below - COPYRIGHT 2015,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -41,6 +41,9 @@ #include #include #include +#include +#include +#include #include extern "C" @@ -127,6 +130,7 @@ extern "C" //OCB3_ADDRESS field/bit definitions const uint32_t OCB3_ADDRESS_REG_ADDR_SHIFT = 32; + const uint32_t FSI2PIB_RESET_PIB_RESET_BIT = 0; //--------------------------------------------------------------------------------- // Function definitions //--------------------------------------------------------------------------------- @@ -376,13 +380,18 @@ extern "C" } fapi2::buffer data(write_data); - FAPI_TRY(fapi2::putScom(i_target, PU_OCB_PIB_OCBDR3, data), - "Error writing to the PBA via the OCB"); + rc = fapi2::putScom(i_target, PU_OCB_PIB_OCBDR3, data); + + if (rc) + { + FAPI_ERR("Error writing to the PBA via the OCB"); + rc = p9_pba_coherent_error_handling(i_target, rc); + break; + } } - fapi_try_exit: FAPI_DBG("End"); - return fapi2::current_err; + return rc; } fapi2::ReturnCode p9_pba_coherent_pba_read( @@ -390,6 +399,7 @@ extern "C" const uint64_t i_address, uint8_t o_read_data[]) { + fapi2::ReturnCode rc; fapi2::buffer data; FAPI_DBG("Start"); @@ -397,8 +407,14 @@ extern "C" //Perform a 128B read -- need to do 16 8B reads since it's in linear mode which can only do 8B... for (int i = 0; i < 16; i++) { - FAPI_TRY(fapi2::getScom(i_target, PU_OCB_PIB_OCBDR3, data), - "Error reading from the PBA via the OCB"); + rc = fapi2::getScom(i_target, PU_OCB_PIB_OCBDR3, data); + + if (rc) + { + FAPI_ERR("Error reading from the PBA via the OCB"); + rc = p9_pba_coherent_error_handling(i_target, rc); + break; + } for (int j = 0; j < 8; j++) { @@ -406,9 +422,8 @@ extern "C" } } - fapi_try_exit: FAPI_DBG("End"); - return fapi2::current_err; + return rc; } fapi2::ReturnCode p9_pba_coherent_cleanup_pba( @@ -488,4 +503,202 @@ extern "C" return fapi2::current_err; } + // TODO RTC 167768: support pib abort condition on PPE platform +#ifndef __PPE__ + fapi2::ReturnCode p9_pba_coherent_check_ocb_status(const fapi2::Target& i_target) + { + fapi2::ReturnCode rc; + fapi2::buffer l_ocb_csr_data; + bool l_expected_state; + + // read OCB3 Status/Control register + FAPI_DBG("proc_pba_coherent_utils_check_ocb_status: Reading OCB3 Status/Control register"); + FAPI_TRY(fapi2::getScom(i_target, PU_OCB_PIB_OCBCSR3_RO, l_ocb_csr_data), + "Error reading from OCB Control/Status Register"); + + // check for any bits set (outside of status/reserved fields) + l_expected_state = + !l_ocb_csr_data.getBit() && + !l_ocb_csr_data.getBit() && + !l_ocb_csr_data.getBit() && + !l_ocb_csr_data.getBit() && + !l_ocb_csr_data.getBit() && + !l_ocb_csr_data.getBit() && + !l_ocb_csr_data.getBit() && + !l_ocb_csr_data.getBit(); + + FAPI_ASSERT(!(l_expected_state), fapi2::P9_PBA_COHERENT_UTILS_OCB_STATUS_MISMATCH().set_TARGET(i_target).set_DATA( + l_ocb_csr_data)); + fapi_try_exit: + FAPI_DBG("End"); + return fapi2::current_err; + } + + fapi2::ReturnCode p9_pba_coherent_check_pba_fir(const fapi2::Target& i_target) + { + fapi2::buffer l_pba_fir_data; + bool l_expected_state; + + // read PBA FIR register + FAPI_DBG("proc_pba_coherent_utils_check_pba_fir: Reading PBA FIR register"); + FAPI_TRY(fapi2::getScom(i_target, PU_PBAFIR, l_pba_fir_data), "Error reading PBA Fir register"); + + // check for unexpected state + l_expected_state = + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + //l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit() && + !l_pba_fir_data.getBit(); + FAPI_ASSERT(!(l_expected_state), fapi2::P9_PBA_COHERENT_UTILS_PBA_FIR_ERR().set_TARGET(i_target).set_DATA( + l_pba_fir_data), + "Error in PBA FIR"); + fapi_try_exit: + FAPI_DBG("End"); + return fapi2::current_err; + + } + + fapi2::ReturnCode p9_pba_coherent_check_status_for_err_handling(const fapi2::Target& + i_target) + { + FAPI_TRY(p9_pba_coherent_status_check(i_target), "Error calling p9_pba_coherent_status_check"); + FAPI_TRY(p9_pba_coherent_check_ocb_status(i_target), "Error calling p9_pba_coherent_check_ocb_status"); + FAPI_TRY(p9_pba_coherent_check_pba_fir(i_target), "Error calling p9_pba_coherent_check_pba_fir"); + + fapi_try_exit: + FAPI_DBG("End"); + return fapi2::current_err; + } + + + fapi2::ReturnCode p9_pba_utils_unlock_pib(const fapi2::Target& i_target) + { + fapi2::ReturnCode rc; + fapi2::buffer l_cfam_data; + fapi2::buffer l_pba_slv_rst_data; + + FAPI_DBG("Start"); + // unlock PIB :n case of HW229314 + FAPI_DBG("Checking FSI2PIB Status Register"); + rc = fapi2::getCfamRegister(i_target, PERV_FSI2PIB_STATUS_FSI, l_cfam_data); + + if (rc != fapi2::FAPI2_RC_SUCCESS) + { + FAPI_ERR("getCfamRegister error"); + return rc; + } + + if (l_cfam_data.getBit(PERV_FSI2PIB_STATUS_PIB_ABORT)) + { + FAPI_DBG("Performing PIB reset"); + + // reset PIB/OCB + l_cfam_data.flush<0>(); + l_cfam_data.setBit(FSI2PIB_RESET_PIB_RESET_BIT); + const fapi2::buffer l_const_cfam_data = l_cfam_data; + rc = fapi2::putCfamRegister(i_target, PERV_FSI2PIB_RESET_FSI, l_const_cfam_data); + + if (rc != fapi2::FAPI2_RC_SUCCESS) + { + FAPI_ERR("Error resetting PIB/OCB"); + return rc; + } + + // ensure PBA region is unlocked, discard/ignore return code + (void) fapi2::getScom(i_target, PU_PBASLVRST_PIB, l_pba_slv_rst_data); + + rc = fapi2::putCfamRegister(i_target, PERV_FSI2PIB_RESET_FSI, l_const_cfam_data); + + if (rc != fapi2::FAPI2_RC_SUCCESS) + { + FAPI_ERR("Error ensuring PBA region is unlocked"); + return rc; + } + } + else + { + // ensure PBA region is unlocked, discard/ignore return code + (void) fapi2::getScom(i_target, PU_PBASLVRST_PIB, l_pba_slv_rst_data); + } + + return rc; + } +#endif + + fapi2::ReturnCode p9_pba_coherent_error_handling(const fapi2::Target& i_target, + fapi2::ReturnCode i_rc) + { + fapi2::ReturnCode l_return_rc; + + + FAPI_DBG("Start"); + + // TODO RTC 167768: support pib abort condition on PPE platform +#ifndef __PPE__ + + // analyze failure, attempt to differentiate between SCOM failure due to faulty HW + // versus failure to return read data, which under some conditions can cause SCOM to fail as well + FAPI_ERR("Error from a read or write with the PBA"); + + // ensure that PIB abort condition (which may have occurred as a result of SCOM read + // failure) is cleared, so that analysis of HW state is possible + l_return_rc = p9_pba_utils_unlock_pib(i_target); + + if (l_return_rc != fapi2::FAPI2_RC_SUCCESS) + { + // if the chip is in a functional state, accesses to the cfam region + // should always succeed -- if a fail occurs, return the original return code + // (as this represents the first error encountered, and the analysis + // indicates a HW issue which the PBA operation did not functionally cause) + FAPI_ERR("Error from p9_pba_utils_unlock_pib, returning original SCOM fail rc"); + l_return_rc = i_rc; + FAPI_DBG("End"); + return l_return_rc; + } + + // PIB is unlocked, analyze state of FIRs/state machines to see if the SCOM + // failure was a result of a read failure on the fabric launched by PBA + l_return_rc = p9_pba_coherent_check_status_for_err_handling(i_target); + + if (l_return_rc != fapi2::FAPI2_RC_SUCCESS) + { + // check for return codes that this routine can emit which indicate + // a functional failure in PBA that could result in the SCOM failure as a side-effect + if ((l_return_rc == (fapi2::ReturnCode) fapi2::RC_P9_PBA_COHERENT_UTILS_PBA_FIR_ERR) || + (l_return_rc == (fapi2::ReturnCode) fapi2::RC_P9_PBA_COHERENT_UTILS_OCB_STATUS_MISMATCH) || + (l_return_rc == (fapi2::ReturnCode) fapi2::RC_P9_PBA_STATUS_ERR)) + { + FAPI_ERR("Error from p9_pba_coherent_check_status_for_err_handling, returning PBA rc"); + } + // none of these match, return the original return code as it was the first error + else + { + FAPI_ERR("Error from p9_pba_coherent_check_status_for_err_handling, returning original SCOM fail rc"); + l_return_rc = i_rc; + } + + FAPI_DBG("End"); + return l_return_rc; + } + + // no sign of an error in PBA/OCB logic, just return the original return code for the SCOM fail + FAPI_ERR("No PBA error found, returning original SCOM fail rc"); +#endif + l_return_rc = i_rc; + FAPI_DBG("End"); + return l_return_rc; + + } + } //extern "C" diff --git a/src/import/chips/p9/procedures/hwp/nest/p9_pba_coherent_utils.H b/src/import/chips/p9/procedures/hwp/nest/p9_pba_coherent_utils.H index 049d19e2..bf6ff046 100644 --- a/src/import/chips/p9/procedures/hwp/nest/p9_pba_coherent_utils.H +++ b/src/import/chips/p9/procedures/hwp/nest/p9_pba_coherent_utils.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER sbe Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2015,2016 */ +/* Contributors Listed Below - COPYRIGHT 2015,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -280,6 +280,31 @@ extern "C" const fapi2::Target& i_target, const uint64_t i_baseAddress); +///@brief does extra error handling for if we hit a problem with a read or write +///@param[in] i_target => P9 chip target +///@param[in] i_rc => The current error that we are seeing from the read/write +///@return the error that we got from the scom or a different error that points to a problem in the PBA + fapi2::ReturnCode p9_pba_coherent_error_handling(const fapi2::Target& i_target, + fapi2::ReturnCode i_rc); + +///@brief does error checking on the OCB side +///@param[in] i_target => P9 chip target +///@return FAPI_RC_SUCCESS if no errors are detected otherwise an error that contains what is in the OCB status registers + fapi2::ReturnCode p9_pba_coherent_check_ocb_status(const fapi2::Target& i_target); + +///@brief does error checking on the PBA Fir +///@param[in] i_target => P9 chip target +///@return FAPI_RC_SUCCESS if no errros are detected otherwise an error that has what error is in the PBA Fir + fapi2::ReturnCode p9_pba_coherent_check_pba_fir(const fapi2::Target& i_target); + +///@brief calls all of the error checking procedures (ocb_status, check_pba_fir, and status_check) +///@param[in] i_target => P9 chip target +///@return FAPI_RC_SUCCESS if no errors are detected on the PBA + fapi2::ReturnCode p9_pba_coherent_check_status_for_err_handling(const fapi2::Target& + i_target); + + + } //extern "C" #endif //_P9_PBA_COHERENT_UTILS_H_ diff --git a/src/import/chips/p9/procedures/xml/error_info/p9_pba_errors.xml b/src/import/chips/p9/procedures/xml/error_info/p9_pba_errors.xml index 337ab26b..4781f12e 100644 --- a/src/import/chips/p9/procedures/xml/error_info/p9_pba_errors.xml +++ b/src/import/chips/p9/procedures/xml/error_info/p9_pba_errors.xml @@ -5,7 +5,7 @@ - + @@ -74,5 +74,27 @@ RUNNING + + + RC_P9_PBA_COHERENT_UTILS_OCB_STATUS_MISMATCH + + Procedure: p9_pba_coherent_utils + Mismatch in expected state for OCB Status register + + TARGET + DATA + + + + + RC_P9_PBA_COHERENT_UTILS_PBA_FIR_ERR + + Procedure: p9_pba_coherent_utils + PBA FIR bit active after transaction. + + TARGET + DATA + + -- cgit v1.2.1