diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/include/usr/scan/scanif.H | 9 | ||||
-rw-r--r-- | src/usr/scan/scandd.C | 123 | ||||
-rw-r--r-- | src/usr/scom/scom.C | 362 | ||||
-rw-r--r-- | src/usr/scom/scom.H | 71 | ||||
-rw-r--r-- | src/usr/scom/scomtrans.C | 49 | ||||
-rw-r--r-- | src/usr/xscom/piberror.C | 6 | ||||
-rw-r--r-- | src/usr/xscom/runtime/makefile | 4 | ||||
-rw-r--r-- | src/usr/xscom/xscom.C | 29 |
8 files changed, 366 insertions, 287 deletions
diff --git a/src/include/usr/scan/scanif.H b/src/include/usr/scan/scanif.H index e164cf6cb..d2e20beb1 100644 --- a/src/include/usr/scan/scanif.H +++ b/src/include/usr/scan/scanif.H @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2011,2012 */ +/* COPYRIGHT International Business Machines Corp. 2011,2014 */ /* */ /* p1 */ /* */ @@ -40,7 +40,12 @@ namespace SCAN }; - } +// Standard Traces +#define SCANDD_TRACE_BUF "SCANDD" + +// Register Traces +#define SCANDD_RTRACE_BUF "SCANDDR" + #endif diff --git a/src/usr/scan/scandd.C b/src/usr/scan/scandd.C index 4b5d891b8..871a9c017 100644 --- a/src/usr/scan/scandd.C +++ b/src/usr/scan/scandd.C @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2011,2013 */ +/* COPYRIGHT International Business Machines Corp. 2011,2014 */ /* */ /* p1 */ /* */ @@ -41,6 +41,7 @@ #include <scan/scan_reasoncodes.H> #include <scan/scanif.H> #include "scandd.H" +#include <errl/errludtarget.H> // ---------------------------------------------- // Globals @@ -50,10 +51,10 @@ // Trace definitions // ---------------------------------------------- trace_desc_t* g_trac_scandd = NULL; -TRAC_INIT( & g_trac_scandd, "SCANDD", KILOBYTE ); +TRAC_INIT( & g_trac_scandd, SCANDD_TRACE_BUF, KILOBYTE ); trace_desc_t* g_trac_scanddr = NULL; -TRAC_INIT( & g_trac_scanddr, "SCANDDR", KILOBYTE ); +TRAC_INIT( & g_trac_scanddr, SCANDD_RTRACE_BUF, KILOBYTE ); // ---------------------------------------------- @@ -112,9 +113,13 @@ errlHndl_t scanPerformOp( DeviceFW::OperationType i_opType, SCAN::MOD_SCANDD_DDOP, SCAN::RC_INVALID_LENGTH, i_ring, - i_ringlength); + i_ringlength, + true/*SW Error*/); + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); - l_err->collectTrace("SCANDD",1024); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } @@ -135,9 +140,13 @@ errlHndl_t scanPerformOp( DeviceFW::OperationType i_opType, SCAN::MOD_SCANDD_DDOP, SCAN::RC_INVALID_RING_ADDRESS, i_ring, - TARGETING::get_huid(i_target)); + TARGETING::get_huid(i_target), + true/*SW Error*/); + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); - l_err->collectTrace("SCANDD",1024); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } @@ -158,9 +167,13 @@ errlHndl_t scanPerformOp( DeviceFW::OperationType i_opType, SCAN::MOD_SCANDD_DDOP, SCAN::RC_INVALID_BUF_SIZE, io_buflen, - i_ringlength); + i_ringlength, + true/*SW Error*/); + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); - l_err->collectTrace("SCANDD",1024); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } @@ -196,8 +209,12 @@ errlHndl_t scanPerformOp( DeviceFW::OperationType i_opType, SCAN::MOD_SCANDD_DDOP, SCAN::RC_INVALID_OPERATION, i_ring, - TO_UINT64(i_opType)); - l_err->collectTrace("SCANDD",1024); + TO_UINT64(i_opType), + true/*SW Error*/); + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } @@ -288,8 +305,10 @@ errlHndl_t scanDoScan( DeviceFW::OperationType i_opType, { TRACFCOMP( g_trac_scandd, ERR_MRK "SCAN::scanDoScan> SCOM Write to scan select register failed. i_ring=%lX, scanTypeData=%lX,scanTypeAddr=%lX, target =%.8X", i_ring, l_scanTypeData,l_scanTypeAddr, TARGETING::get_huid(i_target) ); - - // TODO: Add usrDetails + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } @@ -337,8 +356,10 @@ errlHndl_t scanDoScan( DeviceFW::OperationType i_opType, if(l_err) { TRACFCOMP( g_trac_scandd, ERR_MRK"SCAN::scanDoScan> ERROR i_ring=%.8X, target=%.8X , scanTypeData=%.8X, l_HeaderDataAddr=%.8X", i_ring, TARGETING::get_huid(i_target), l_buffer[0], l_headerDataAddr); - - // TODO: Add usrDetails + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } } @@ -360,8 +381,10 @@ errlHndl_t scanDoScan( DeviceFW::OperationType i_opType, if(l_err) { TRACFCOMP( g_trac_scandd, ERR_MRK"SCAN::scanDoScan> ERROR i_ring=%.8X, target=%.8X , scanTypeData=%.8X, l_HeaderDataAddr=%.8X", i_ring, TARGETING::get_huid(i_target), l_buffer[0], l_headerDataAddr); - - // TODO: Add usrDetails + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } @@ -453,8 +476,10 @@ errlHndl_t scanDoScan( DeviceFW::OperationType i_opType, if(l_err) { TRACFCOMP( g_trac_scandd,ERR_MRK "SCAN::scanDoScan: Device OP error> i_ring=%.8X, target=%.8X , scanTypeData=%.8X, i_flag=%.8X,", i_ring, TARGETING::get_huid(i_target), l_scanDataAddr, i_flag ); - - // TODO: Add user details + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } @@ -534,8 +559,10 @@ errlHndl_t scanDoScan( DeviceFW::OperationType i_opType, if(l_err) { TRACFCOMP( g_trac_scandd, ERR_MRK "SCAN::scanDoScan: OP and shift of < 32bits i_ring=%.8X, scanTypeDataAddr=%.8X, l_lastDataBits=%.8X, target=%.8X", i_ring, l_scanDataAddr, l_lastDataBits, TARGETING::get_huid(i_target) ); - - // TODO: Add user details + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } @@ -615,8 +642,7 @@ errlHndl_t scanDoScan( DeviceFW::OperationType i_opType, if(l_err) { TRACFCOMP( g_trac_scandd,ERR_MRK "SCAN::scanDoScan> ERROR i_ring=%.8X, HeaderDataAddr=%.8X, i_flag=%.8X, target=%.8X", i_ring, l_headerDataAddr, i_flag, TARGETING::get_huid(i_target) ); - - // TODO: Add user details + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } @@ -626,9 +652,8 @@ errlHndl_t scanDoScan( DeviceFW::OperationType i_opType, // If the header data did not match.. if ((l_buffer[0] != HEADER_CHECK_DATA)) { - TRACDCOMP( g_trac_scandd,"SCAN::scanDoScan> Header Check Failed expect deadbeef.. i_ring=%.8X, i_opType=%.8X , ring data=%.8X, i_flag=%.8X,", i_ring, i_opType, l_buffer[0], i_flag ); - - TRACFCOMP( g_trac_scandd,"SCAN: HEADER DATA FAILED!! %.8x = %.8x %.8x",l_headerDataAddr , l_buffer[0], l_buffer[1]); + TRACFCOMP( g_trac_scandd, "SCAN::scanDoScan> Header Check Failed on %.8X: i_ring=%.8X, i_opType=%.8X, i_flag=%.8X,", TARGETING::get_huid(i_target), i_ring, i_opType, i_flag ); + TRACFCOMP( g_trac_scandd, "%.8X = %.8X_%.8X (expected 0xDEADBEEF)", l_headerDataAddr , l_buffer[0], l_buffer[1] ); /*@ * @errortype @@ -636,14 +661,46 @@ errlHndl_t scanDoScan( DeviceFW::OperationType i_opType, * @reasoncode SCAN::RC_HEADER_DATA_MISMATCH * @userdata1 SCAN Ring Address * @userdata2 Operation Type (i_opType) - * @devdesc ScanDD::scanDoScan> Got a data mismatch when reading back the header + * @devdesc ScanDD::scanDoScan> Got a data mismatch + * when reading back the header */ - l_err = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_UNRECOVERABLE, - SCAN::MOD_SCANDD_DOSCAN, - SCAN::RC_HEADER_DATA_MISMATCH, - i_ring, - TO_UINT64(i_opType)); - l_err->collectTrace("SCANDD",1024); + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + SCAN::MOD_SCANDD_DOSCAN, + SCAN::RC_HEADER_DATA_MISMATCH, + i_ring, + TO_UINT64(i_opType) ); + //Most like cause (based on experience) is some kind + // of a clock issue + TARGETING::TYPE type = + i_target->getAttr<TARGETING::ATTR_TYPE>(); + if( type == TARGETING::TYPE_PROC) + { + l_err->addClockCallout(i_target, + HWAS::OSCREFCLK_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + } + else if( type == TARGETING::TYPE_MEMBUF ) + { + l_err->addClockCallout(i_target, + HWAS::MEMCLK_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + } + else // for anything else, just blame the refclock + { + l_err->addClockCallout(i_target, + HWAS::OSCREFCLK_TYPE, + HWAS::SRCI_PRIORITY_HIGH); + } + //Could also be a busted chip + l_err->addHwCallout( i_target, + HWAS::SRCI_PRIORITY_LOW, + HWAS::DECONFIG, //allows us to continue + HWAS::GARD_NULL ); + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"Scan Target") + .addToLog(l_err); + l_err->collectTrace(SCANDD_TRACE_BUF,1024); break; } diff --git a/src/usr/scom/scom.C b/src/usr/scom/scom.C index 8030d52b5..ae83f7769 100644 --- a/src/usr/scom/scom.C +++ b/src/usr/scom/scom.C @@ -38,6 +38,8 @@ #include <scom/scomreasoncodes.H> #include <ibscom/ibscomreasoncodes.H> #include <sys/time.h> +#include <xscom/piberror.H> +#include <errl/errludtarget.H> // Trace definition @@ -101,28 +103,29 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, enum { MAX_INDSCOM_TIMEOUT_NS = 100000 }; //=.1ms - // In HOSTBOOT_RUNTIME we always defer indirect scoms to Sapphire. + mutex_t* l_mutex = NULL; + bool need_unlock = false; + + do { + // In HOSTBOOT_RUNTIME we always defer indirect scoms to Sapphire. #ifndef __HOSTBOOT_RUNTIME - // If the indirect scom bit is 0, then doing a regular scom - if( (i_addr & 0x8000000000000000) == 0) - { + // If the indirect scom bit is 0, then doing a regular scom + if( (i_addr & 0x8000000000000000) == 0) + { #endif // __HOSTBOOT_RUNTIME - l_err = doScomOp(i_opType, - i_target, - io_buffer, - io_buflen, - i_accessType, - i_addr); + l_err = doScomOp(i_opType, + i_target, + io_buffer, + io_buflen, + i_accessType, + i_addr); + //all done + break; #ifndef __HOSTBOOT_RUNTIME - } - // We are performing an indirect scom. - else - { - mutex_t* l_mutex = NULL; - uint64_t elapsed_indScom_time_ns = 0; - bool l_indScomError = false; - uint64_t temp_io_buffer = 0; + } + // We are performing an indirect scom. + uint64_t elapsed_indScom_time_ns = 0; uint64_t l_io_buffer = 0; uint64_t temp_scomAddr = 0; @@ -155,6 +158,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, i_target->getHbMutexAttr<TARGETING::ATTR_SCOM_IND_MUTEX>(); mutex_lock(l_mutex); + need_unlock = true; // turn the read bit on. l_io_buffer = l_io_buffer | 0x8000000000000000; @@ -170,20 +174,20 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, if (l_err != NULL) { - mutex_unlock(l_mutex); - return l_err; + break; } - // Need to check loop on read until either - // bit (32) = 1 or we have exceeded our max - // retries. + // Need to check loop on read until we see done, error, + // or we timeout + IndirectScom_t scomout; + scomout.data64 = 0; do { // Now perform the op requested using the passed in // IO_Buffer to pass the read data back to caller. l_err = doScomOp(i_opType, i_target, - io_buffer, + &(scomout.data64), io_buflen, i_accessType, i_addr); @@ -194,27 +198,9 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, } // if bit 32 is on indicating a complete bit - if ((*((uint64_t *)io_buffer) & SCOM_IND_COMPLETE_MASK) - == SCOM_IND_COMPLETE_MASK) + // or we saw an error, then we're done + if (scomout.done || scomout.piberr) { - // check for bits 33-35 to be 0 - // indicating the read is valid - if ((*((uint64_t *)io_buffer) & SCOM_IND_ERROR_MASK) - == 0) - - { - // Clear out the other bits in the io_buffer - // register to only return the read data to caller - *((uint64_t *)io_buffer) &= 0x00000000000FFFF; - - } - else - { - // indicate that we do have a indirect scom failure - l_indScomError = true; - } - - // break out because we got the complete bit.. break; } @@ -224,64 +210,80 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, }while ( elapsed_indScom_time_ns <= MAX_INDSCOM_TIMEOUT_NS); mutex_unlock(l_mutex); + need_unlock = false; - if (l_err == NULL) - { - if (l_indScomError == true) - { - // got an indirect read error - // the data buffer is in tempIoData - TRACFCOMP(g_trac_scom, - "INDIRECT SCOM READ= ERROR valid bits are not on.. scomreg=0x%.16X", - *((uint64_t *)io_buffer)); - - /*@ - * @errortype - * @moduleid SCOM::SCOM_CHECK_INDIRECT_AND_DO_SCOM - * @reasoncode SCOM::SCOM_INDIRECT_READ_FAIL - * @userdata1 Address - * @userdata2 Scom data read from Address - * @devdesc Indirect SCOM Read error - */ - l_err = new ERRORLOG::ErrlEntry( - ERRORLOG::ERRL_SEV_UNRECOVERABLE, - SCOM_CHECK_INDIRECT_AND_DO_SCOM, - SCOM_INDIRECT_READ_FAIL, - i_addr, - *((uint64_t *)io_buffer)); - - //@TODO - add usr details to the errorlog when we have one to - // give better info regarding the fail.. - - } - // if we got a timeout, create an errorlog. - else if( elapsed_indScom_time_ns > MAX_INDSCOM_TIMEOUT_NS ) - { - // got an indirect read timeout - TRACFCOMP(g_trac_scom, - "INDIRECT SCOM READ=indirect read timout .. scomreg=0x%.16X", - *((uint64_t *)io_buffer)); - - - /*@ - * @errortype - * @moduleid SCOM::SCOM_CHECK_INDIRECT_AND_DO_SCOM - * @reasoncode SCOM::SCOM_INDIRECT_READ_TIMEOUT - * @userdata1 Address - * @userdata2 Scom data read from Address - * @devdesc Indirect SCOM complete bit did not come on - */ - l_err = new ERRORLOG::ErrlEntry( - ERRORLOG::ERRL_SEV_UNRECOVERABLE, - SCOM_CHECK_INDIRECT_AND_DO_SCOM, - SCOM_INDIRECT_READ_TIMEOUT, - i_addr, - *((uint64_t *)io_buffer)); - - //@TODO - add usr details to the errorlog when we have - // one to give better info regarding the fail.. + if (l_err) { break; } - } + // Check for a PCB/PIB Error + if( scomout.piberr != 0 ) + { + // got an indirect read error + // the data buffer is in tempIoData + TRACFCOMP(g_trac_scom, + "INDIRECT SCOM READ: PIB Error=%d (reg=0x%.16X)", + scomout.piberr, scomout.data64); + + /*@ + * @errortype + * @moduleid SCOM::SCOM_CHECK_INDIRECT_AND_DO_SCOM + * @reasoncode SCOM::SCOM_INDIRECT_READ_FAIL + * @userdata1 Address + * @userdata2 Indirect Scom Status Register + * @devdesc Indirect SCOM Read error + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + SCOM_CHECK_INDIRECT_AND_DO_SCOM, + SCOM_INDIRECT_READ_FAIL, + i_addr, + scomout.data64); + + //Add the callouts for the specific PCB/PIB error + PIB::addFruCallouts( i_target, + scomout.piberr, + l_err ); + + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target") + .addToLog(l_err); + } + // if we got a timeout, create an errorlog. + else if( scomout.done == 0 ) + { + // got an indirect read timeout + TRACFCOMP(g_trac_scom, + "INDIRECT SCOM READ: Timeout, reg=0x%.16X", + scomout.data64); + + /*@ + * @errortype + * @moduleid SCOM::SCOM_CHECK_INDIRECT_AND_DO_SCOM + * @reasoncode SCOM::SCOM_INDIRECT_READ_TIMEOUT + * @userdata1 Address + * @userdata2 Indirect Scom Status Register + * @devdesc Indirect SCOM complete bit did not come on + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + SCOM_CHECK_INDIRECT_AND_DO_SCOM, + SCOM_INDIRECT_READ_TIMEOUT, + i_addr, + scomout.data64); + + //Best guess is the chip + l_err->addHwCallout( i_target, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Predictive ); + + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target") + .addToLog(l_err); + } + else // It worked + { + uint64_t tmp = static_cast<uint64_t>(scomout.data); + memcpy( io_buffer, &tmp, sizeof(uint64_t) ); } } else //write @@ -290,7 +292,7 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, l_io_buffer = l_io_buffer & 0x7FFFFFFFFFFFFFFF; // Now perform the op requested using the - // locai io_buffer with the indirect addr imbedded. + // local io_buffer with the indirect addr imbedded. l_err = doScomOp(i_opType, i_target, & l_io_buffer, @@ -298,46 +300,30 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, i_accessType, i_addr); - // Need to check loop on read until either - // bit (32) = 1 or we have exceeded our max - // retries. + // Need to check loop on read until we see done, error, + // or we timeout + IndirectScom_t scomout; + scomout.data64 = 0; do { - - memcpy(&temp_io_buffer, io_buffer, 8); - - // Now perform the op requested using the passed in - // IO_Buffer to pass the read data back to caller. + // Now look for status l_err = doScomOp(DeviceFW::READ, i_target, - & temp_io_buffer, + &(scomout.data64), io_buflen, i_accessType, i_addr); - if (l_err != NULL) { break; } // if bit 32 is on indicating a complete bit - if ((temp_io_buffer & SCOM_IND_COMPLETE_MASK) - == SCOM_IND_COMPLETE_MASK) + // or we saw an error, then we're done + if (scomout.done || scomout.piberr) { - // The write is valid when bits 33-35 are 0.. - // if not on return error - if ((temp_io_buffer & SCOM_IND_ERROR_MASK) - != 0) - - { - // bits did not get turned on.. set error to true. - l_indScomError = true; - } - - // break out because we got the complete bit on break; - } nanosleep( 0, 10000 ); //sleep for 10,000 ns @@ -345,66 +331,80 @@ errlHndl_t checkIndirectAndDoScom(DeviceFW::OperationType i_opType, }while ( elapsed_indScom_time_ns <= MAX_INDSCOM_TIMEOUT_NS); - if (l_err == NULL) - { - // If the indirect scom has an error. - if (l_indScomError == true) - { - // got an indirect write error - TRACFCOMP(g_trac_scom, "INDIRECT SCOM WRITE= ERROR valid bits are not on.. scomreg=0x%.16X", temp_io_buffer); - - /*@ - * @errortype - * @moduleid SCOM::SCOM_CHECK_INDIRECT_AND_DO_SCOM - * @reasoncode SCOM::SCOM_INDIRECT_WRITE_FAIL - * @userdata1 Address - * @userdata2 Scom data read from Address - * @devdesc Indirect SCOM Write failed for this address - */ - l_err = new ERRORLOG::ErrlEntry( - ERRORLOG::ERRL_SEV_UNRECOVERABLE, - SCOM_CHECK_INDIRECT_AND_DO_SCOM, - SCOM_INDIRECT_WRITE_FAIL, - i_addr, - temp_io_buffer); - - //@TODO - add usr details to the errorlog when we have - // one to give better info regarding the fail.. + if (l_err) { break; } - } - // if we got a timeout, create an errorlog. - else if( elapsed_indScom_time_ns > MAX_INDSCOM_TIMEOUT_NS ) - { - // got an indirect write timeout - TRACFCOMP(g_trac_scom, - "INDIRECT SCOM WRITE=indirect write timeout .. scomreg=0x%.16X", - temp_io_buffer); - - - /*@ - * @errortype - * @moduleid SCOM::SCOM_CHECK_INDIRECT_AND_DO_SCOM - * @reasoncode SCOM::SCOM_INDIRECT_WRITE_TIMEOUT - * @userdata1 Address - * @userdata2 Scom data read from Address - * @devdesc Indirect SCOM write timeout, complete - * bit did not come one - */ - l_err = new ERRORLOG::ErrlEntry( - ERRORLOG::ERRL_SEV_UNRECOVERABLE, - SCOM_CHECK_INDIRECT_AND_DO_SCOM, - SCOM_INDIRECT_WRITE_TIMEOUT, - i_addr, - temp_io_buffer); - - //@TODO - add usr details to the errorlog when we have - // one to give better info regarding the fail.. - - } + // Check for a PCB/PIB Error + if( scomout.piberr != 0 ) + { + // got an indirect write error + TRACFCOMP(g_trac_scom, "INDIRECT SCOM PIB Error=%d (reg=0x%.16X)", scomout.piberr, scomout.data64); + + /*@ + * @errortype + * @moduleid SCOM::SCOM_CHECK_INDIRECT_AND_DO_SCOM + * @reasoncode SCOM::SCOM_INDIRECT_WRITE_FAIL + * @userdata1 Address + * @userdata2 Indirect Scom Status Register + * @devdesc Indirect SCOM Write failed for this address + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + SCOM_CHECK_INDIRECT_AND_DO_SCOM, + SCOM_INDIRECT_WRITE_FAIL, + i_addr, + scomout.data64); + + //Add the callouts for the specific PCB/PIB error + PIB::addFruCallouts( i_target, + scomout.piberr, + l_err ); + + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target") + .addToLog(l_err); + } + // if we got a timeout, create an errorlog. + else if( scomout.done == 0 ) + { + // got an indirect read timeout + TRACFCOMP(g_trac_scom, + "INDIRECT SCOM WRITE: Timeout, reg=0x%.16X", + scomout.data64); + + /*@ + * @errortype + * @moduleid SCOM::SCOM_CHECK_INDIRECT_AND_DO_SCOM + * @reasoncode SCOM::SCOM_INDIRECT_WRITE_TIMEOUT + * @userdata1 Address + * @userdata2 Indirect Scom Status Register + * @devdesc Indirect SCOM complete bit did not come on + */ + l_err = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + SCOM_CHECK_INDIRECT_AND_DO_SCOM, + SCOM_INDIRECT_WRITE_TIMEOUT, + i_addr, + scomout.data64); + + //Best guess is the chip + l_err->addHwCallout( i_target, + HWAS::SRCI_PRIORITY_HIGH, + HWAS::DECONFIG, + HWAS::GARD_Predictive ); + + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"IndSCOM Target") + .addToLog(l_err); } } // end of write - } #endif // __HOSTBOOT_RUNTIME + } while(0); + + if( need_unlock ) + { + mutex_unlock(l_mutex); + } + return l_err; } diff --git a/src/usr/scom/scom.H b/src/usr/scom/scom.H index bcb9f2fe2..06416640b 100644 --- a/src/usr/scom/scom.H +++ b/src/usr/scom/scom.H @@ -1,25 +1,25 @@ -// IBM_PROLOG_BEGIN_TAG -// This is an automatically generated prolog. -// -// $Source: src/usr/scom/scom.H $ -// -// IBM CONFIDENTIAL -// -// COPYRIGHT International Business Machines Corp. 2011 -// -// p1 -// -// Object Code Only (OCO) source materials -// Licensed Internal Code Source Materials -// IBM HostBoot Licensed Internal Code -// -// The source code for this program is not published or other- -// wise divested of its trade secrets, irrespective of what has -// been deposited with the U.S. Copyright Office. -// -// Origin: 30 -// -// IBM_PROLOG_END +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/scom/scom.H $ */ +/* */ +/* IBM CONFIDENTIAL */ +/* */ +/* COPYRIGHT International Business Machines Corp. 2011,2014 */ +/* */ +/* p1 */ +/* */ +/* Object Code Only (OCO) source materials */ +/* Licensed Internal Code Source Materials */ +/* IBM HostBoot Licensed Internal Code */ +/* */ +/* The source code for this program is not published or otherwise */ +/* divested of its trade secrets, irrespective of what has been */ +/* deposited with the U.S. Copyright Office. */ +/* */ +/* Origin: 30 */ +/* */ +/* IBM_PROLOG_END_TAG */ #ifndef __SCOM_H #define __SCOM_H @@ -31,13 +31,30 @@ namespace SCOM { - enum ScomErrorMask +/** + * @brief Indirect SCOM Status + */ +union IndirectScom_t +{ + uint64_t data64; + struct { - // checkf for complete bit .. bit 32 - SCOM_IND_COMPLETE_MASK = 0x0000000080000000, - //check for bits 33-35 - SCOM_IND_ERROR_MASK = 0x0000000070000000 + uint64_t :12; //0:11 + uint64_t addr:20; //12:31 + uint64_t done:1; //32 + uint64_t piberr:3; //33:35 + uint64_t userstat:4; //36:39 + uint64_t :8; //40:47 + uint64_t data:16; //48:63 }; +}; +enum ScomErrorMask +{ + // checkf for complete bit .. bit 32 + SCOM_IND_COMPLETE_MASK = 0x0000000080000000, + //check for bits 33-35 + SCOM_IND_ERROR_MASK = 0x0000000070000000 +}; diff --git a/src/usr/scom/scomtrans.C b/src/usr/scom/scomtrans.C index 024b2cd54..2f533ea5c 100644 --- a/src/usr/scom/scomtrans.C +++ b/src/usr/scom/scomtrans.C @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2011,2013 */ +/* COPYRIGHT International Business Machines Corp. 2011,2014 */ /* */ /* p1 */ /* */ @@ -40,6 +40,7 @@ #include "scom.H" #include "scomtrans.H" #include <scom/scomreasoncodes.H> +#include <errl/errludtarget.H> // Trace definition extern trace_desc_t* g_trac_scom; @@ -512,9 +513,11 @@ errlHndl_t scomTranslate(DeviceFW::OperationType i_opType, SCOM_TRANSLATE, SCOM_TRANS_INVALID_TYPE, i_addr, - l_type); - - + l_type, + true/*SW Error*/); + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"SCOM Target") + .addToLog(l_err); } } @@ -536,8 +539,12 @@ errlHndl_t scomTranslate(DeviceFW::OperationType i_opType, SCOM_TRANSLATE, SCOM_INVALID_ADDR, i_addr, - l_type); - l_err->collectTrace("SCOM",1024); + l_type, + true/*SW Error*/); + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(i_target,"SCOM Target") + .addToLog(l_err); + l_err->collectTrace(SCOM_COMP_NAME,1024); } @@ -587,6 +594,8 @@ errlHndl_t scomPerformTranslate(TARGETING::EntityPath i_epath, } else { + TRACFCOMP(g_trac_scom,"SCOMPERFORMTRANSLATE Invalid Address.i_addr =0x%X for mask = 0x%X", i_addr, i_mask); + /*@ * @errortype * @moduleid SCOM::SCOM_PERFORM_TRANSLATE @@ -596,14 +605,16 @@ errlHndl_t scomPerformTranslate(TARGETING::EntityPath i_epath, * @devdesc Invalid Address for the mask passed in. */ l_err = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_UNRECOVERABLE, - SCOM_PERFORM_TRANSLATE, - SCOM_INVALID_ADDR, - i_addr, - o_target->getAttr<TARGETING::ATTR_TYPE>()); + SCOM_PERFORM_TRANSLATE, + SCOM_INVALID_ADDR, + i_addr, + o_target->getAttr<TARGETING::ATTR_TYPE>(), + true/*SW Error*/); + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(o_target,"SCOM Target") + .addToLog(l_err); - l_err->collectTrace("SCOM",1024); - - TRACFCOMP(g_trac_scom,"SCOMPERFORMTRANSLATE Invalid Address.i_addr =0x%X for mask = 0x%X", i_addr, i_mask); + l_err->collectTrace(SCOM_COMP_NAME,1024); return (l_err); } @@ -657,7 +668,7 @@ errlHndl_t scomfindParentTarget( TARGETING::EntityPath i_epath, if (!foundParent) { - // got and error.. bad address.. write an errorlog.. + TRACFCOMP(g_trac_scom, "TRANSLATE..Did not find parent type=0x%X ", i_ptype); /*@ * @errortype * @moduleid SCOM::SCOM_PERFORM_TRANSLATE @@ -670,12 +681,14 @@ errlHndl_t scomfindParentTarget( TARGETING::EntityPath i_epath, SCOM_FIND_PARENT_TARGET, SCOM_NO_MATCHING_PARENT, i_ptype, - o_target->getAttr<TARGETING::ATTR_TYPE>()); + o_target->getAttr<TARGETING::ATTR_TYPE>(), + true/*SW Error*/); - l_err->collectTrace("SCOM",1024); + //Add this target to the FFDC + ERRORLOG::ErrlUserDetailsTarget(o_target,"SCOM Target") + .addToLog(l_err); - // Need to write and errorlog and return.. - TRACFCOMP(g_trac_scom, "TRANSLATE..Did not find parent type=0x%X ", i_ptype); + l_err->collectTrace(SCOM_COMP_NAME,1024); } return l_err; diff --git a/src/usr/xscom/piberror.C b/src/usr/xscom/piberror.C index 0ffe42a2b..498d36991 100644 --- a/src/usr/xscom/piberror.C +++ b/src/usr/xscom/piberror.C @@ -1,4 +1,3 @@ - /* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ @@ -6,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2013 */ +/* COPYRIGHT International Business Machines Corp. 2013,2014 */ /* */ /* p1 */ /* */ @@ -21,6 +20,7 @@ /* Origin: 30 */ /* */ /* IBM_PROLOG_END_TAG */ + /** * @file piberror.C * @@ -59,7 +59,7 @@ void addFruCallouts(TARGETING::Target* i_target, HWAS::SRCI_PRIORITY_HIGH); io_errl->addHwCallout( i_target, HWAS::SRCI_PRIORITY_LOW, - HWAS::NO_DECONFIG, + HWAS::DECONFIG, HWAS::GARD_NULL ); break; case PIB::PIB_PARTIAL_GOOD: diff --git a/src/usr/xscom/runtime/makefile b/src/usr/xscom/runtime/makefile index b49e707b5..a534fc13b 100644 --- a/src/usr/xscom/runtime/makefile +++ b/src/usr/xscom/runtime/makefile @@ -5,7 +5,7 @@ # # IBM CONFIDENTIAL # -# COPYRIGHT International Business Machines Corp. 2013 +# COPYRIGHT International Business Machines Corp. 2013,2014 # # p1 # @@ -24,7 +24,7 @@ HOSTBOOT_RUNTIME = 1 ROOTPATH = ../../../.. MODULE = xscom_rt -OBJS = rt_xscom.o +OBJS = rt_xscom.o piberror.o SUBDIRS = test.d diff --git a/src/usr/xscom/xscom.C b/src/usr/xscom/xscom.C index 671702775..9b62d6f78 100644 --- a/src/usr/xscom/xscom.C +++ b/src/usr/xscom/xscom.C @@ -108,26 +108,6 @@ HMER waitForHMERStatus() /** - * @brief Internal routine that checks to see if retry is - * possible on an XSCOM error - * - * @return true if retry is possible; false otherwise. - */ -bool XSComRetry(const HMER i_hmer) -{ - bool l_retry = false; - switch (i_hmer.mXSComStatus) - { - case PIB::PIB_RESOURCE_OCCUPIED: - l_retry = true; - break; - default: - break; - } - return l_retry; -} - -/** * @brief Internal routine that verifies the validity of input parameters * for an XSCOM access. * @@ -494,8 +474,15 @@ errlHndl_t xScomDoOp(DeviceFW::OperationType i_opType, { // print a trace message.. for debug purposes // incase we are stuck in a retry loop. - TRACFCOMP(g_trac_xscom,"xscomPerformOp - RESOUCE OCCUPIED LOOP Cntr = %d: OpType 0x%.16llX, Address 0x%llX, MMIO Address 0x%llX", l_retryCtr, static_cast<uint64_t>(i_opType),i_xscomAddr,static_cast<uint64_t>(l_mmioAddr)); + TRACFCOMP(g_trac_xscom,"xscomPerformOp - RESOURCE OCCUPIED LOOP Cntr = %d: OpType 0x%.16llX, Address 0x%llX, MMIO Address 0x%llX, HMER=%.16X", l_retryCtr, static_cast<uint64_t>(i_opType), i_xscomAddr, static_cast<uint64_t>(l_mmioAddr), io_hmer.mRegister ); + // we don't want to hang forever so break out after + // an obscene amount of time + if( l_retryCtr > 500000 ) + { + TRACFCOMP( g_trac_xscom, "Giving up, we're still locked..." ); + break; + } } } while (io_hmer.mXSComStatus == PIB::PIB_RESOURCE_OCCUPIED); |