diff options
Diffstat (limited to 'src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C')
-rw-r--r-- | src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C | 1342 |
1 files changed, 1342 insertions, 0 deletions
diff --git a/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C new file mode 100644 index 000000000..40f7b51a4 --- /dev/null +++ b/src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C @@ -0,0 +1,1342 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/diag/prdf/plat/pegasus/prdfCenMbaTdCtlr.C $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2013 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#include <prdfCenMbaTdCtlr.H> + +// Framework includes +#include <iipconst.h> +#include <iipServiceDataCollector.h> +#include <prdfExtensibleChip.H> +#include <prdfGlobal.H> +#include <prdfPlatServices.H> +#include <prdfRegisterCache.H> +#include <prdfTrace.H> + +// Pegasus includes +#include <prdfCalloutUtil.H> +#include <prdfCenAddress.H> +#include <prdfCenConst.H> +#include <prdfCenDqBitmap.H> +#include <prdfCenMbaDataBundle.H> +#include <prdfCenSymbol.H> + +// TODO: RTC 68096 Currently we are only supporting x8 DRAM. Once support for x4 +// DRAM is available, it will have impact on DRAM spare. + +using namespace TARGETING; + +namespace PRDF +{ + +using namespace PlatServices; + +enum EccErrorMask +{ + NO_ERROR = 0, ///< No ECC errors found + UE = 0x80, ///< UE + MPE = 0x40, ///< Chip mark placed + RCE = 0x20, ///< Retry CE + MCE = 0x10, ///< CE on chip mark +}; + +//------------------------------------------------------------------------------ +// Class Variables +//------------------------------------------------------------------------------ + +CenMbaTdCtlr::CMD_COMPLETE_FUNCS CenMbaTdCtlr::cv_cmdCompleteFuncs[] = +{ + &CenMbaTdCtlr::analyzeCmdComplete, // NO_OP + &CenMbaTdCtlr::analyzeVcmPhase1, // VCM_PHASE_1 + &CenMbaTdCtlr::analyzeVcmPhase2, // VCM_PHASE_2 + &CenMbaTdCtlr::analyzeDsdPhase1, // DSD_PHASE_1 + &CenMbaTdCtlr::analyzeDsdPhase2, // DSD_PHASE_2 +}; + +//------------------------------------------------------------------------------ +// Public Functions +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::handleCmdCompleteEvent( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::handleCmdCompleteEvent] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + if ( !isInMdiaMode() ) + { + PRDF_ERR( PRDF_FUNC"A hostboot maintenance command complete " + "attention occurred while MDIA was not running." ); + o_rc = FAIL; + break; + } + + o_rc = initialize(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"initialize() failed" ); + break; + } + + // Immediately inform MDIA that the command has finished. + o_rc = mdiaSendEventMsg( mba, MDIA::RESET_TIMER ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mdiaSendEventMsg(RESET_TIMER) failed" ); + break; + } + + o_rc = (this->*cv_cmdCompleteFuncs[iv_tdState])( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Failed to continue analysis" ); + break; + } + + // Do some cleanup if the TD procedure is complete. + if ( !isInTdMode() ) + { + o_rc = exitTdSequence(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"exitTdSequence() failed" ); + break; + } + } + + } while(0); + + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"iv_mbaChip:0x%08x iv_initialized:%c iv_tdState:%d " + "iv_rank:%d iv_mark:%2d %2d", getHuid(mba), + iv_initialized ? 'T' : 'F', iv_tdState, iv_rank.flatten(), + iv_mark.getCM().getSymbol(), iv_mark.getSM().getSymbol() ); + + int32_t l_rc = cleanupPrevCmd(); // Just in case. + if ( SUCCESS != l_rc ) + PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" ); + + l_rc = mdiaSendEventMsg( mba, MDIA::SKIP_MBA ); + if ( SUCCESS != l_rc ) + PRDF_ERR( PRDF_FUNC"mdiaSendEventMsg(SKIP_MBA) failed" ); + + io_sc.service_data->SetErrorSig( PRDFSIG_MaintCmdComplete_ERROR ); + io_sc.service_data->SetServiceCall(); + + // There may have been a code bug, callout 2nd level support. + io_sc.service_data->SetCallout( NextLevelSupport_ENUM, MRU_HIGH ); + + // Callout the mark. If nothing was added to the callout list (no valid + // marks), callout the MBA. + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + if ( 1 == io_sc.service_data->GetMruList().size() ) + io_sc.service_data->SetCallout( mba ); + + // Just in case it was a legitimate maintenance command complete (error + // log not committed) but something else failed. + io_sc.service_data->ClearFlag(ServiceDataCollector::DONT_COMMIT_ERRL); + } + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ +// Private Functions +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::initialize() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::initialize] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + if ( iv_initialized ) break; // nothing to do + + // Check for valid MBA. + if ( TYPE_MBA != getTargetType(mba) ) + { + PRDF_ERR( PRDF_FUNC"iv_mbaChip is not TYPE_MBA" ); + o_rc = FAIL; break; + } + + iv_initialized = true; + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::analyzeCmdComplete( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::analyzeCmdComplete] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + if ( NO_OP != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + // Get the rank on which maintenance command stopped + CenAddr addr; + o_rc = getCenMaintStartAddr( iv_mbaChip, addr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"cenGetMaintAddr() failed" ); + break; + } + iv_rank = CenRank( addr.getRank() ); + + // Get error condition which caused command to stop + uint8_t eccErrorMask = NO_ERROR; + o_rc = checkEccErrors( eccErrorMask ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" ); + break; + } + + if ( eccErrorMask & UE ) + { + // Handle UE. Highest priority + o_rc = handleUE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleUE() failed" ); + break; + } + } + else if ( eccErrorMask & MPE ) + { + // Get the current marks in hardware. + o_rc = mssGetMarkStore( mba, iv_rank, iv_mark ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed"); + break; + } + + if ( !iv_mark.getCM().isValid() ) + { + PRDF_ERR( PRDF_FUNC"No valid chip mark to verify"); + o_rc = FAIL; break; + } + + io_sc.service_data->SetErrorSig( PRDFSIG_StartVcmPhase1 ); + + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + + // Start VCM procedure + o_rc = startVcmPhase1(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startVcmPhase1() failed" ); + break; + } + } + else + { + // If maint cmd completed with no error, don't commit error log. + io_sc.service_data->DontCommitErrorLog(); + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::analyzeVcmPhase1( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::analyzeVcmPhase1] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + if ( VCM_PHASE_1 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + // Get error condition which caused command to stop + uint8_t eccErrorMask = NO_ERROR; + o_rc = checkEccErrors( eccErrorMask ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" ); + break; + } + + if ( (eccErrorMask & UE) || (eccErrorMask & RCE) ) + { + // Handle UE. Highest priority + o_rc = handleUE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleUE() failed" ); + break; + } + } + else + { + io_sc.service_data->SetErrorSig( PRDFSIG_StartVcmPhase2 ); + + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + + // Start VCM Phase 2 + o_rc = startVcmPhase2(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startVcmPhase2() failed" ); + break; + } + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::analyzeVcmPhase2( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::analyzeVcmPhase2] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + if ( VCM_PHASE_2 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + // Get error condition which caused command to stop + uint8_t eccErrorMask = NO_ERROR; + o_rc = checkEccErrors( eccErrorMask ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" ); + break; + } + + if ( eccErrorMask & UE ) + { + // Handle UE. Highest priority + o_rc = handleUE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleUE() failed" ); + break; + } + } + else if ( eccErrorMask & MCE ) + { + // Chip mark is verified. + + // Do callouts, VPD updates, and start DRAM sparing, if possible. + o_rc = handleMCE_VCM2( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleMCE_VCM2() failed" ); + break; + } + } + else + { + // Chip mark verification failed. + + iv_tdState = NO_OP; // Abort the TD procedure. + + io_sc.service_data->SetErrorSig( PRDFSIG_VcmFalseAlarm ); + + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + + // In the field, this error log will be recoverable for now, but we + // may have to add thresholding later if they become a problem. In + // manufacturing, this error log will be predictive. + + if ( areDramRepairsDisabled() ) + io_sc.service_data->SetServiceCall(); + + // Remove chip mark from hardware. + iv_mark.clearCM(); + bool junk; + o_rc = mssSetMarkStore( mba, iv_rank, iv_mark, junk ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssSetMarkStore() failed" ); + break; + } + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::analyzeDsdPhase1( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::analyzeDsdPhase1] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + if ( DSD_PHASE_1 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + // Get error condition which caused command to stop + uint8_t eccErrorMask = NO_ERROR; + o_rc = checkEccErrors( eccErrorMask ); + if ( SUCCESS != o_rc) + { + PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" ); + break; + } + + if ( ( eccErrorMask & UE) || ( eccErrorMask & RCE ) ) + { + // Handle UE. Highest priority + o_rc = handleUE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleUE() failed" ); + break; + } + } + else + { + io_sc.service_data->SetErrorSig( PRDFSIG_StartDsdPhase2 ); + + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + + // Start DSD Phase 2 + startDsdPhase2(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startDsdPhase2() failed" ); + break; + } + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::analyzeDsdPhase2( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::analyzeDsdPhase2] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + if ( DSD_PHASE_2 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + // Get error condition which caused command to stop + uint8_t eccErrorMask = NO_ERROR; + o_rc = checkEccErrors( eccErrorMask ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"checkEccErrors() failed" ); + break; + } + + if ( eccErrorMask & UE) + { + // Handle UE. Highest priority + o_rc = handleUE( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleUE() failed" ); + break; + } + } + else if ( eccErrorMask & MCE ) + { + // The spare is bad. + + // Do callouts and VPD updates. + o_rc = handleMCE_DSD2( io_sc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"handleMCE_DSD2() failed" ); + break; + } + } + else + { + // The chip mark has successfully been steered to the spare. + + io_sc.service_data->SetErrorSig( PRDFSIG_DsdDramSpared ); + + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + + // Remove chip mark from hardware. + iv_mark.clearCM(); + bool junk; + o_rc = mssSetMarkStore( mba, iv_rank, iv_mark, junk ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssSetMarkStore() failed" ); + break; + } + } + + iv_tdState = NO_OP; // The TD procedure is complete. + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::startVcmPhase1() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::startVcmPhase1] " + + int32_t o_rc = SUCCESS; + + iv_tdState = VCM_PHASE_1; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // Start phase 1. + uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | + mss_MaintCmd::STOP_ON_END_ADDRESS | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); + + iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP, + mba, iv_rank, stopCond ); + if ( NULL == iv_mssCmd ) + { + PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); + o_rc = FAIL; break; + } + + o_rc = iv_mssCmd->setupAndExecuteCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::startVcmPhase2() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::startVcmPhase2] " + + int32_t o_rc = SUCCESS; + + iv_tdState = VCM_PHASE_2; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // Start phase 2. + uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | + mss_MaintCmd::STOP_ON_END_ADDRESS | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); + + iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ, + mba, iv_rank, stopCond ); + if ( NULL == iv_mssCmd ) + { + PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); + o_rc = FAIL; break; + } + + o_rc = iv_mssCmd->setupAndExecuteCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::startDsdPhase1() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::startDsdPhase1] " + + int32_t o_rc = SUCCESS; + + iv_tdState = DSD_PHASE_1; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // Set the steer mux + o_rc = mssSetSteerMux( mba, iv_rank, iv_mark.getCM(), false ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssSetSteerMux() failed" ); + break; + } + + // Start phase 1. + uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | + mss_MaintCmd::STOP_ON_END_ADDRESS | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); + + iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::TIMEBASE_STEER_CLEANUP, + mba, iv_rank, stopCond ); + if ( NULL == iv_mssCmd ) + { + PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); + o_rc = FAIL; break; + } + + o_rc = iv_mssCmd->setupAndExecuteCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::startDsdPhase2() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::startDsdPhase2] " + + int32_t o_rc = SUCCESS; + + iv_tdState = DSD_PHASE_2; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // Start phase 2. + uint32_t stopCond = ( mss_MaintCmd::STOP_END_OF_RANK | + mss_MaintCmd::STOP_ON_END_ADDRESS | + mss_MaintCmd::ENABLE_CMD_COMPLETE_ATTENTION ); + + iv_mssCmd = createMssCmd( mss_MaintCmdWrapper::SUPERFAST_READ, + mba, iv_rank, stopCond ); + if ( NULL == iv_mssCmd ) + { + PRDF_ERR( PRDF_FUNC"createMssCmd() failed"); + o_rc = FAIL; break; + } + + o_rc = iv_mssCmd->setupAndExecuteCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setupAndExecuteCmd() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +bool CenMbaTdCtlr::isInTdMode() +{ + return ( (NO_OP != iv_tdState) && (MAX_TD_STATE > iv_tdState) ); +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::checkEccErrors( uint8_t & o_eccErrorMask ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::checkEccErrors] " + + int32_t o_rc = SUCCESS; + + o_eccErrorMask = NO_ERROR; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + ExtensibleChip * membChip = mbadb->getMembChip(); + if ( NULL == membChip ) + { + PRDF_ERR( PRDF_FUNC"getMembChip() failed: MBA=0x%08x", + getHuid(mba) ); + o_rc = FAIL; break; + } + + const char * reg_str = ( 0 == getTargetPosition(mba) ) + ? "MBSECC01FIR" : "MBSECC23FIR"; + SCAN_COMM_REGISTER_CLASS * mbsEccFir = membChip->getRegister( reg_str ); + + o_rc = mbsEccFir->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Read() failed on %s", reg_str ); + break; + } + + if ( mbsEccFir->IsBitSet(20 + iv_rank.flatten()) ) + { + o_eccErrorMask |= MPE; + + // Clean up side-effect FIRs that may be set due to the chip mark. + o_rc = chipMarkCleanup(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"chipMarkCleanup() failed" ); + break; + } + } + + if ( mbsEccFir->IsBitSet(38) ) o_eccErrorMask |= MCE; + if ( mbsEccFir->IsBitSet(41) ) o_eccErrorMask |= UE; + if ( mbsEccFir->IsBitSet(42) ) o_eccErrorMask |= RCE; + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::handleUE] " + + using namespace CalloutUtil; + + int32_t o_rc = SUCCESS; + + iv_tdState = NO_OP; // Abort the TD procedure. + + io_sc.service_data->SetErrorSig( PRDFSIG_MaintUE ); + io_sc.service_data->SetServiceCall(); + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + // Clean up the maintenance command. This is needed just in case the UE + // isolation procedure is modified to use maintenance commands. + o_rc = cleanupPrevCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" ); + break; + } + + // Look for all failing bits on this rank. + CenDqBitmap bitmap; + o_rc = mssIplUeIsolation( mba, iv_rank, bitmap ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssIplUeIsolation() failed" ); + break; + } + + // Add UE data to capture data. + bitmap.getCaptureData( io_sc.service_data->GetCaptureData() ); + + // Callout the failing DIMMs. + TargetHandleList callouts; + for ( int32_t ps = 0; ps < PORT_SLCT_PER_MBA; ps++ ) + { + bool badDqs = false; + o_rc = bitmap.badDqs( ps, badDqs ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"badDqs(%d) failed", ps ); + break; + } + + if ( !badDqs ) continue; // nothing to do. + + TargetHandleList dimms = getConnectedDimms( mba, iv_rank, ps ); + if ( 0 == dimms.size() ) + { + PRDF_ERR( PRDF_FUNC"getConnectedDimms(%d) failed", ps ); + o_rc = FAIL; break; + } + + callouts.insert( callouts.end(), dimms.begin(), dimms.end() ); + } + if ( SUCCESS != o_rc ) break; + + if ( 0 == callouts.size() ) + { + // It is possible the scrub counters have rolled over to zero due to + // a known DD1.0 hardware bug. In this case, the best we can do is + // callout both DIMMs, because at minimum we know there was a UE, we + // just don't know where. + // NOTE: If this condition happens because of a DD2.0+ bug, the + // mssIplUeIsolation procedure will callout the Centaur. + callouts = getConnectedDimms( mba, iv_rank ); + if ( 0 == callouts.size() ) + { + PRDF_ERR( PRDF_FUNC"getConnectedDimms() failed" ); + o_rc = FAIL; break; + } + } + + // Callout all DIMMs in the list. + for ( TargetHandleList::iterator i = callouts.begin(); + i != callouts.end(); i++ ) + { + io_sc.service_data->SetCallout( *i, MRU_HIGH ); + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::handleMCE_VCM2( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::handleMCE_VCM2] " + + int32_t o_rc = SUCCESS; + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + if ( VCM_PHASE_2 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + io_sc.service_data->SetErrorSig( PRDFSIG_VcmVerified ); + + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + + if ( areDramRepairsDisabled() ) + { + iv_tdState = NO_OP; // The TD procedure is complete. + + io_sc.service_data->SetServiceCall(); + + break; // nothing else to do. + } + + bool startDsdProcedure = false; + + // Read VPD. + CenDqBitmap bitmap; + o_rc = getBadDqBitmap( mba, iv_rank, bitmap ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"getBadDqBitmap() failed" ); + break; + } + + // The chip mark is considered verified, so set it in VPD. + // NOTE: If this chip mark was placed on the spare, the original failing + // DRAM will have already been set in VPD so this will be + // redundant but it simplifies the rest of the logic below. + o_rc = bitmap.setDram( iv_mark.getCM().getSymbol() ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setDram() failed" ); + break; + } + + // RAS callout policies can be determined by the DIMM type. We can + // assume IS DIMMs are on low end systems and Centaur DIMMs are on + // mid/high end systems. + bool isCenDimm = false; + o_rc = isMembufOnDimm( mba, isCenDimm ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"isMembufOnDimm() failed" ); + break; + } + + if ( isCenDimm ) // Medium/high end systems + { + uint8_t ps = iv_mark.getCM().getPortSlct(); + + // It is possible that a Centaur DIMM does not have spare DRAMs. + // Check the VPD for available spares. Note that a x4 DIMM may have + // one or two spare DRAMs so check for availability on both. + // TODO: RTC 68096 Add support for x4 DRAMs. + bool dramSparePossible = false; + o_rc = bitmap.isDramSpareAvailable( ps, dramSparePossible ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"isDramSpareAvailable() failed" ); + break; + } + + if ( dramSparePossible ) + { + // Verify the spare is not already used. + CenSymbol sp0, sp1, ecc; + // TODO: RTC 68096 need to support ECC spare. + o_rc = mssGetSteerMux( mba, iv_rank, sp0, sp1, ecc ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"mssGetSteerMux() failed" ); + break; + } + + if ( ((0 == ps) && !sp0.isValid()) || + ((1 == ps) && !sp1.isValid()) ) + { + // A spare DRAM is available. + startDsdProcedure = true; + } + else if ( iv_mark.getCM().getDram() == + (0 == ps ? sp0.getDram() : sp1.getDram()) ) + { + io_sc.service_data->SetErrorSig( PRDFSIG_VcmBadSpare ); + + // The chip mark was on the spare DRAM and it is bad, so + // call it out and set it in VPD. + + MemoryMru memmru ( mba, iv_rank, iv_mark.getCM() ); + memmru.setDramSpared(); + io_sc.service_data->SetCallout( memmru ); + io_sc.service_data->SetServiceCall(); + + o_rc = bitmap.setDramSpare( ps ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setDramSpare() failed" ); + break; + } + } + else + { + // Chip mark and DRAM spare are both used. + io_sc.service_data->SetErrorSig( PRDFSIG_VcmMarksUnavail ); + io_sc.service_data->SetServiceCall(); + } + } + else + { + // Chip mark is in place and sparing is not possible. + io_sc.service_data->SetErrorSig( PRDFSIG_VcmMarksUnavail ); + io_sc.service_data->SetServiceCall(); + } + } + else // Low end systems + { + // Not able to do dram sparing. If there is a symbol mark, there are + // no repairs available so call it out and set the error log to + // predictive. + if ( iv_mark.getSM().isValid() ) + { + io_sc.service_data->SetErrorSig( PRDFSIG_VcmMarksUnavail ); + io_sc.service_data->SetServiceCall(); + } + } + + // Write VPD. + o_rc = setBadDqBitmap( mba, iv_rank, bitmap ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setBadDqBitmap() failed" ); + break; + } + + // Start DSD Phase 1, if possible. + if ( startDsdProcedure ) + { + io_sc.service_data->SetErrorSig( PRDFSIG_StartDsdPhase1 ); + + o_rc = startDsdPhase1(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"startDsdPhase1() failed" ); + break; + } + } + else + { + iv_tdState = NO_OP; // The TD procedure is complete. + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::handleMCE_DSD2( STEP_CODE_DATA_STRUCT & io_sc ) +{ + #define PRDF_FUNC "[CenMbaTdCtlr::handleMCE_DSD2] " + + int32_t o_rc = SUCCESS; + + io_sc.service_data->SetErrorSig( PRDFSIG_DsdBadSpare ); + io_sc.service_data->SetServiceCall(); + + TargetHandle_t mba = iv_mbaChip->GetChipHandle(); + + do + { + if ( DSD_PHASE_2 != iv_tdState ) + { + PRDF_ERR( PRDF_FUNC"Invalid state machine configuration" ); + o_rc = FAIL; break; + } + + // Callout mark and spare DRAM. + CalloutUtil::calloutMark( mba, iv_rank, iv_mark, io_sc ); + + MemoryMru memmru ( mba, iv_rank, iv_mark.getCM() ); + memmru.setDramSpared(); + io_sc.service_data->SetCallout( memmru ); + + // The spare DRAM is bad, so set it in VPD. At this point, the chip mark + // should have already been set in the VPD because it was recently + // verified. + + CenDqBitmap bitmap; + o_rc = getBadDqBitmap( mba, iv_rank, bitmap ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"getBadDqBitmap() failed" ); + break; + } + + o_rc = bitmap.setDramSpare( iv_mark.getCM().getPortSlct() ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setDramSpare() failed" ); + break; + } + + o_rc = setBadDqBitmap( mba, iv_rank, bitmap ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"setBadDqBitmap() failed" ); + break; + } + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::exitTdSequence() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::exitTdSequence] " + + int32_t o_rc = SUCCESS; + + do + { + // Clean up the previous command + // PRD is not starting another command but MDIA might be so clear the + // counters and FIRs as well. + o_rc = prepareNextCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"prepareNextCmd() failed" ); + break; + } + + // Inform MDIA about command complete + // Note that we only want to send the command complete message if + // everything above is successful because a bad return code will result + // in a SKIP_MBA message sent. There is no need to send redundant + // messages. + o_rc = signalMdiaCmdComplete(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"signalMdiaCmdComplete() failed" ); + break; + } + + // Clear out the mark, just in case. This is so we don't accidentally + // callout this mark on another rank in an error patch scenario. + iv_mark = CenMark(); + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::prepareNextCmd() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::prepareNextCmd] " + + int32_t o_rc = SUCCESS; + + do + { + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + ExtensibleChip * membChip = mbadb->getMembChip(); + if ( NULL == membChip ) + { + PRDF_ERR( PRDF_FUNC"getMembChip() failed" ); + o_rc = FAIL; break; + } + + uint32_t mbaPos = getTargetPosition( iv_mbaChip->GetChipHandle() ); + + //---------------------------------------------------------------------- + // Clean up previous command + //---------------------------------------------------------------------- + + o_rc = cleanupPrevCmd(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" ); + break; + } + + //---------------------------------------------------------------------- + // Clear ECC counters + //---------------------------------------------------------------------- + + const char * reg_str = ( 0 == mbaPos ) ? "MBSTR_0" : "MBSTR_1"; + SCAN_COMM_REGISTER_CLASS * mbstr = membChip->getRegister( reg_str ); + o_rc = mbstr->Read(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Read() failed on %s", reg_str ); + break; + } + + mbstr->SetBit(53); // Setting this bit clears all counters. + + o_rc = mbstr->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Write() failed on %s", reg_str ); + break; + } + + // Hardware automatically clears bit 53, so flush this register out of + // the register cache to avoid clearing the counters again with a write + // from the out-of-date cached copy. + RegDataCache & cache = RegDataCache::getCachedRegisters(); + cache.flush( membChip, mbstr ); + + //---------------------------------------------------------------------- + // Clear ECC FIRs + //---------------------------------------------------------------------- + + reg_str = ( 0 == mbaPos ) ? "MBSECC01FIR_AND" : "MBSECC23FIR_AND"; + SCAN_COMM_REGISTER_CLASS * firand = membChip->getRegister( reg_str ); + firand->setAllBits(); + + // Clear MPE bit for this rank. + firand->ClearBit( 20 + iv_rank.flatten() ); + + // Clear NCE, SCE, MCE, RCE, SUE, UE bits (36-41) + firand->SetBitFieldJustified( 36, 6, 0 ); + + o_rc = firand->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"Write() failed on %s", reg_str ); + break; + } + + } while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +int32_t CenMbaTdCtlr::signalMdiaCmdComplete() +{ + #define PRDF_FUNC "[CenMbaTdCtlr::signalMdiaCmdComplete] " + + int32_t o_rc = SUCCESS; + + do + { + // Determine for MDIA whether or not the command finished at the end of + // the last rank or if the command will need to be restarted. + + // Get the last address of the last rank in memory. + CenAddr junk, allEndAddr; + o_rc = getMemAddrRange( iv_mbaChip->GetChipHandle(), junk, allEndAddr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"getMemAddrRange() failed" ); + break; + } + + // Get the address currently in the MBMEA. + CenAddr curEndAddr; + o_rc = getCenMaintEndAddr( iv_mbaChip, curEndAddr ); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC"cenGetMaintAddr() failed" ); + break; + } + + // The actual message will need to be sent in post analysis after the + // FIR bits have been cleared. + CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); + mbadb->iv_sendCmdCompleteMsg = true; + mbadb->iv_cmdCompleteMsgData = + (allEndAddr == curEndAddr) ? MDIA::COMMAND_COMPLETE + : MDIA::COMMAND_STOPPED; + + } while(0); + + return o_rc; + + #undef PRDF_FUNC +} + +} // end namespace PRDF + |