From d7355b73ce6b9b6b2a8634a4f349d3ca2c25104c Mon Sep 17 00:00:00 2001 From: Marty Gloff Date: Mon, 23 Apr 2018 12:01:45 -0500 Subject: Improve resource recovery path to handle memory plugging rules Resource recovery is a RAS feature wherein we do not apply certain gard records if those records would result in a boot fail due to a lack of hardware. Change the logic from applying speculative deconfiguration one by one to applying all predictive gard records and then removing those records if minimal hardware is not available. Also add BLOCK_SPEC_DECONFIG attribute to flag when speculative deconfiguration is not allowed. This flag is set if minimum hardware is not available and is cleared after a hardware change. Change-Id: Ia065de3a44ab29fbf33ad4ce98bc42ea5144463f RTC: 191414 CQ: SW424137 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/58041 Tested-by: Jenkins Server Tested-by: Jenkins OP Build CI Tested-by: Jenkins OP HW Tested-by: FSP CI Jenkins Reviewed-by: Christian R. Geddes Reviewed-by: Daniel M. Crowell --- src/usr/util/runtime/rt_cmds.C | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) (limited to 'src/usr/util') diff --git a/src/usr/util/runtime/rt_cmds.C b/src/usr/util/runtime/rt_cmds.C index af49e71e9..e350e42dc 100644 --- a/src/usr/util/runtime/rt_cmds.C +++ b/src/usr/util/runtime/rt_cmds.C @@ -646,16 +646,21 @@ void cmd_putscom( char*& o_output, * @param[in] i_word2 Userdata 3 & 4 * @param[in] i_callout HUID of target to callout (zero if none) * @param[in] i_ffdcLength Additional ffdc data bytes to add to the error log + * @param[in] i_deconfig Indication if callout target should be deconfigured + * @param[in] i_gard Indication of type of failure for callout */ void cmd_errorlog( char*& o_output, uint64_t i_word1, uint64_t i_word2, uint32_t i_callout, - uint32_t i_ffdcLength ) + uint32_t i_ffdcLength, + HWAS::DeconfigEnum i_deconfig, + HWAS::GARD_ErrorType i_gard ) { - UTIL_FT( "cmd_errorlog> word1=%.8X%.8X, word2=%.8X%.8X, i_callout=%.8X ffdcLength=%ld", + UTIL_FT( "cmd_errorlog> word1=%.8X%.8X, word2=%.8X%.8X, i_callout=%.8X ffdcLength=%ld, deconfig=%.2X, gard=%.2X", (uint32_t)(i_word1>>32), (uint32_t)i_word1, - (uint32_t)(i_word2>>32), (uint32_t)i_word2, i_callout, i_ffdcLength ); + (uint32_t)(i_word2>>32), (uint32_t)i_word2, i_callout, + i_ffdcLength, i_deconfig, i_gard ); o_output = new char[100]; errlHndl_t l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE, @@ -669,8 +674,8 @@ void cmd_errorlog( char*& o_output, { l_err->addHwCallout( l_targ, HWAS::SRCI_PRIORITY_HIGH, - HWAS::NO_DECONFIG, - HWAS::GARD_NULL ); + i_deconfig, + i_gard ); } if (i_ffdcLength > 0) @@ -1158,24 +1163,39 @@ int hbrtCommand( int argc, } else if( !strcmp( argv[0], "errorlog" ) ) { - // errorlog - if( (argc == 3) || (argc == 4) || (argc == 5) ) + // errorlog + if( (argc == 3) || (argc == 4) || (argc == 5) || (argc == 6) || + (argc == 7) ) { uint32_t l_huid = 0; uint32_t l_ffdcLength = 0; - if( argc == 4 ) + HWAS::DeconfigEnum l_deconfig = HWAS::NO_DECONFIG; + HWAS::GARD_ErrorType l_gard = HWAS::GARD_NULL; + if( argc >= 4 ) { l_huid = strtou64( argv[3], NULL, 16 ); } - if (argc == 5) + if (argc >= 5) { l_ffdcLength = strtou64( argv[4], NULL, 16 ); } + if( argc >= 6 ) + { + l_deconfig = static_cast( + strtou64( argv[5], NULL, 16 )); + } + if( argc >= 7 ) + { + l_gard = static_cast( + strtou64( argv[6], NULL, 16 )); + } cmd_errorlog( *l_output, strtou64( argv[1], NULL, 16 ), strtou64( argv[2], NULL, 16 ), l_huid, - l_ffdcLength ); + l_ffdcLength, + l_deconfig, + l_gard ); } else { @@ -1249,7 +1269,7 @@ int hbrtCommand( int argc, strcat( *l_output, l_tmpstr ); sprintf( l_tmpstr, "putscom
\n" ); strcat( *l_output, l_tmpstr ); - sprintf( l_tmpstr, "errorlog [] [size]\n" ); + sprintf( l_tmpstr, "errorlog [] [size] [deconfig] [gard]\n" ); strcat( *l_output, l_tmpstr ); sprintf( l_tmpstr, "sbemsg \n" ); strcat( *l_output, l_tmpstr ); -- cgit v1.2.1