diff options
Diffstat (limited to 'src/usr')
| -rw-r--r-- | src/usr/intr/intrrp.C | 160 | ||||
| -rw-r--r-- | src/usr/intr/intrrp.H | 28 | ||||
| -rw-r--r-- | src/usr/mbox/mailboxsp.C | 96 | ||||
| -rw-r--r-- | src/usr/mbox/mailboxsp.H | 18 | ||||
| -rw-r--r-- | src/usr/mbox/mboxdd.C | 84 | ||||
| -rw-r--r-- | src/usr/mbox/mboxdd.H | 16 |
6 files changed, 391 insertions, 11 deletions
diff --git a/src/usr/intr/intrrp.C b/src/usr/intr/intrrp.C index 9e3f5930b..bb6aabff2 100644 --- a/src/usr/intr/intrrp.C +++ b/src/usr/intr/intrrp.C @@ -30,6 +30,7 @@ #include "intrrp.H" #include <trace/interface.H> #include <errno.h> +#include <string.h> #include <initservice/taskargs.H> #include <initservice/initserviceif.H> #include <util/singleton.H> @@ -1331,6 +1332,16 @@ void IntrRp::msgHandler() msg_respond(iv_msgQ,msg); } break; + case MSG_INTR_DUMP: + { + // Run the functions that dump out + // interrupt info to slow buffer + printEsbStates(); + printLSIInfo(); + printPSIHBInfo(); + msg_free(msg); // async message + } + break; default: msg->data[1] = -EINVAL; @@ -3392,3 +3403,152 @@ errlHndl_t INTR::IntrRp::enableSlaveProcInterrupts(TARGETING::Target * i_target) return l_err; } +void INTR::esbStateToString(uint64_t i_esbState, const char** o_esbStateString) +{ + switch(i_esbState) + { + case ESB_STATE_RESET: + *o_esbStateString = "RESET"; + break; + case ESB_STATE_OFF: + *o_esbStateString = "OFF"; + break; + case ESB_STATE_PENDING: + *o_esbStateString = "PENDING"; + break; + case ESB_STATE_QUEUED: + *o_esbStateString = "QUEUED"; + break; + default: + *o_esbStateString = "INVALID"; + break; + } +} + +errlHndl_t INTR::printInterruptInfo() +{ + errlHndl_t err = NULL; + msg_q_t intr_msgQ = msg_q_resolve(VFS_ROOT_MSG_INTR); + if(intr_msgQ) + { + msg_t * msg = msg_allocate(); + msg->type = MSG_INTR_DUMP; + int send_rc = msg_send(intr_msgQ, msg); + if (send_rc != 0) + { + TRACFCOMP(g_trac_intr, ERR_MRK"IntrRp::printInterruptInfo error " + "sending print intr info message"); + /*@ errorlog tag + * @errortype ERRL_SEV_UNRECOVERABLE + * @moduleid INTR::MOD_INTR_DUMP + * @reasoncode INTR::RC_MESSAGE_SEND_ERROR + * @userdata1 RC from msg_send command + * @devdesc Error encountered sending print intr info + * message to INTRP + * @custdesc Error encountered gathering diagnostic info + */ + err = new ERRORLOG::ErrlEntry + ( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, // severity + INTR::MOD_INTR_DUMP, // moduleid + INTR::RC_MESSAGE_SEND_ERROR, // reason code + send_rc, + 0 + ); + } + } + else + { + /*@ errorlog tag + * @errortype ERRL_SEV_INFORMATIONAL + * @moduleid INTR::MOD_INTR_DUMP + * @reasoncode INTR::RC_RP_NOT_INITIALIZED + * @userdata1 MSG_INTR_DUMP + * @userdata2 0 + * @devdesc Interrupt resource provider not initialized yet. + * @custdesc Error encountered gathering diagnostic info + */ + err = new ERRORLOG::ErrlEntry + ( + ERRORLOG::ERRL_SEV_INFORMATIONAL, // severity + INTR::MOD_INTR_DUMP, // moduleid + INTR::RC_RP_NOT_INITIALIZED, // reason code + static_cast<uint64_t>(MSG_INTR_DUMP), + 0 + ); + } + return err; +} + +void INTR::IntrRp::printLSIInfo() const +{ + TRACFCOMP(g_trac_intr, "---LSI Sources---"); + + //Read LSI Interrupt Status register from each enabled + // proc chip to see which caused the interrupt + for(auto targ_itr = iv_chipList.begin(); + targ_itr != iv_chipList.end(); ++targ_itr) + { + uint64_t l_mmioRead = (*targ_itr)->psiHbBaseAddr->lsiintstatus; + uint32_t l_huid = get_huid((*targ_itr)->proc); + TRACFCOMP(g_trac_intr, "Processor 0x%lx", l_huid); + TRACFCOMP(g_trac_intr, " lsiIntStatus : vAddr=0x%016lx Value=0x%016lx", &(*targ_itr)->psiHbBaseAddr->lsiintstatus , l_mmioRead); + l_mmioRead = (*targ_itr)->psiHbBaseAddr->lsiintlevel; + TRACFCOMP(g_trac_intr, " lsiIntLevel : vAddr=0x%016lx Value=0x%016lx", &(*targ_itr)->psiHbBaseAddr->lsiintlevel, l_mmioRead); + } +} + +void INTR::IntrRp::printPSIHBInfo() const +{ + TRACFCOMP(g_trac_intr, "---PSIHB Info---"); + //Read LSI Interrupt Status register from each enabled + // proc chip to see which caused the interrupt + for(auto targ_itr = iv_chipList.begin(); + targ_itr != iv_chipList.end(); ++targ_itr) + { + uint32_t l_huid = get_huid((*targ_itr)->proc); + uint64_t l_mmioRead = (*targ_itr)->psiHbBaseAddr->psihbcr; + + TRACFCOMP(g_trac_intr, "Processor 0x%lx", l_huid); + + TRACFCOMP(g_trac_intr, " PSIHB Ctrl/Status Reg : vAddr=0x%016lx Value=0x%016lx", + &(*targ_itr)->psiHbBaseAddr->psihbcr, l_mmioRead); + + l_mmioRead = (*targ_itr)->psiHbBaseAddr->psisemr; + TRACFCOMP(g_trac_intr, " PSIHB Error/Status Reg : vAddr=0x%016lx Value=0x%016lx", + &(*targ_itr)->psiHbBaseAddr->psisemr, l_mmioRead); + + l_mmioRead = (*targ_itr)->psiHbBaseAddr->phbdsr; + TRACFCOMP(g_trac_intr, " PSIHB Dbg Setting Reg : vAddr=0x%016lx Value=0x%016lx", + &(*targ_itr)->psiHbBaseAddr->phbdsr, l_mmioRead); + + l_mmioRead = (*targ_itr)->psiHbBaseAddr->icr; + TRACFCOMP(g_trac_intr, " PSIHB Interrupt Control Reg : vAddr=0x%016lx Value=0x%016lx", + &(*targ_itr)->psiHbBaseAddr->icr, l_mmioRead); + } +} + +void INTR::IntrRp::printEsbStates() const +{ + TRACFCOMP(g_trac_intr, "---ESB States---"); + for(auto targ_itr = iv_chipList.begin(); + targ_itr != iv_chipList.end(); ++targ_itr) + { + TRACFCOMP(g_trac_intr, "Processor 0x%lx", get_huid((*targ_itr)->proc)); + for (uint8_t i = 0; i < LSI_LAST_SOURCE; i++) + { + // Ready from the ESB_QUERY_OFFSET to ensure the read doesn't + // affect the state + uint64_t * l_psiHbEsbptr = (*targ_itr)->psiHbEsbBaseAddr + + (((i*PAGE_SIZE)+ESB_QUERY_OFFSET) /sizeof(uint64_t)); + + volatile uint64_t l_esbState = *l_psiHbEsbptr; + const char* l_esbStateString = nullptr; + + // Use toString method to look up human readable string + esbStateToString(l_esbState, &l_esbStateString); + + TRACFCOMP(g_trac_intr, " SRC: %02d State: %s", i , l_esbStateString ); + } + } +} diff --git a/src/usr/intr/intrrp.H b/src/usr/intr/intrrp.H index b0245a868..6b4b57197 100644 --- a/src/usr/intr/intrrp.H +++ b/src/usr/intr/intrrp.H @@ -231,10 +231,6 @@ namespace INTR ESB_QUERY_OFFSET = 0x800, ESB_OFF_OFFSET = 0xD00, ESB_RESET_OFFSET = 0XC00, - ESB_STATE_RESET = 0, - ESB_STATE_OFF = 1, - ESB_STATE_PENDING = 2, - ESB_STATE_QUEUED = 3, }; enum INTR_ROUTING_t @@ -889,6 +885,30 @@ namespace INTR */ errlHndl_t resetIntpForMpipl(void); + /** + * Print out the ESB state for every source on all processors + * that the Interrp is aware of + * + * @return void + */ + void printEsbStates() const; + + /** + * Print out the PSIHB info for all processors + * that the Interrp is aware of + * + * @return void + */ + void printPSIHBInfo() const; + + /** + * Print out the LSI info for all processors + * that the Interrp is aware of + * + * @return void + */ + void printLSIInfo() const; + }; }; // INTR namespace diff --git a/src/usr/mbox/mailboxsp.C b/src/usr/mbox/mailboxsp.C index 7bd1ccde5..926dcdc2d 100644 --- a/src/usr/mbox/mailboxsp.C +++ b/src/usr/mbox/mailboxsp.C @@ -44,6 +44,7 @@ #include <arch/ppc.H> #include <errl/errlmanager.H> #include <sys/misc.h> +#include <util/misc.H> #include <errl/errludprintk.H> #include <errno.h> #include <kernel/console.H> @@ -82,6 +83,7 @@ MailboxSp::MailboxSp() iv_sendq(), iv_respondq(), iv_dmaBuffer(), + iv_dmaRequestWatchdog(0), iv_trgt(NULL), iv_shutdown_msg(NULL), iv_rts(true), @@ -832,6 +834,17 @@ void MailboxSp::send_msg(mbox_msg_t * i_msg) &iv_msg_to_send, mbox_msg_len, DeviceFW::MAILBOX); + + // Create a watchdog task that will run for 60 seconds + // if there is no response in 60 seconds then dbg info will + // be printed in the slow trace buffer + if(iv_msg_to_send.msg_payload.type == MSG_REQUEST_DMA_BUFFERS + && !Util::isSimicsRunning() + && !iv_dmaRequestWatchdog) + { + iv_dmaRequestWatchdog = task_create(&watchdogTimeoutTask, this); + assert (iv_dmaRequestWatchdog > 0 ); + } } if(err) @@ -1448,6 +1461,89 @@ void MailboxSp::sendReclaimDmaBfrsMsg( mbox_msg_t & i_mbox_msg ) return; } +void * MailboxSp::watchdogTimeoutTask(void * i_mailboxSp) +{ + // We don't want this to be a zombie because parent keeps going + task_detach(); + + // create a task which we can wait, this way we can print + // an error message if the taskWorker crashes + tid_t l_tid = task_create( &watchdogTimeoutTaskWorker, i_mailboxSp); + assert (l_tid > 0 ); + + int l_status = 0; + void* l_rc = nullptr; + + tid_t l_tidRc = task_wait_tid(l_tid, &l_status, &l_rc); + + if(l_status == TASK_STATUS_CRASHED) + { + TRACFCOMP(g_trac_mbox, + ERR_MRK + "MailboxSp::watchdogTimeoutTask - " + "Watchdog timeout crashed!! %lx", l_tidRc); + } + + return nullptr; +} + +void * MailboxSp::watchdogTimeoutTaskWorker(void * i_mailboxSp) +{ + + uint64_t MAX_TIMEOUT = 200000000000; // nanoseconds + uint64_t POLL_RATE = 1000000; // nanoseconds + uint64_t cur_timeout = 0; // nanoseconds + errlHndl_t err = nullptr; + + assert(i_mailboxSp != nullptr, "nullptr was passed to watchdogTimeoutTaskWorker"); + + MailboxSp & mboxSp = *static_cast<MailboxSp *>(i_mailboxSp); + + while(cur_timeout < MAX_TIMEOUT) + { + if( !mboxSp.iv_dma_pend ) + { + TRACFCOMP(g_trac_mbox, + INFO_MRK + "Breaking out of watchdog because FSP responded to DMA request"); + break; + } + // sleep for 1 ms + nanosleep(0, POLL_RATE); + cur_timeout += POLL_RATE; + } + + if(cur_timeout >= MAX_TIMEOUT) + { + TRACFCOMP(g_trac_mbox, + INFO_MRK + "Hang during DMA request detected, dumping state information"); + err = dumpMboxRegs(); + if(err) + { + TRACFCOMP(g_trac_mbox, + INFO_MRK + "Error occured while dumping MBOX information"); + err->collectTrace(MBOX_COMP_NAME); + errlCommit(err,MBOX_COMP_ID); + } + err = INTR::printInterruptInfo(); + if(err) + { + TRACFCOMP(g_trac_mbox, + INFO_MRK + "Error occured while dumping INTR information"); + err->collectTrace(INTR_COMP_NAME); + errlCommit(err,MBOX_COMP_ID); + } + } + + //Zero out the TID so another watchdog task can be created if needed + mboxSp.iv_dmaRequestWatchdog = 0; + return nullptr; + +} + errlHndl_t MailboxSp::msgq_register(queue_id_t i_queue_id, msg_q_t i_msgQ) { diff --git a/src/usr/mbox/mailboxsp.H b/src/usr/mbox/mailboxsp.H index 22fdf45e8..d1db0a88a 100644 --- a/src/usr/mbox/mailboxsp.H +++ b/src/usr/mbox/mailboxsp.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -310,6 +310,21 @@ namespace MBOX void sendReclaimDmaBfrsMsg( void ); /** + * Start the watchdogTimeoutTaskWorker and print + * out an error if it crashes + */ + static void * watchdogTimeoutTask(void * i_mailboxSp); + + /** + * Used to start a timer , if the timer expires then + * Hostboot will print out a bunch of MBOX and INTR + * error information to the SLOW buffer. This is used + * to collect debug information in the case where we + * are hanging, waiting for a response to an mailbox msg + */ + static void * watchdogTimeoutTaskWorker(void * i_mailboxSp); + + /** * Determine if a Reclaim Bfr message is outstanding * @return [true - Msg active | false - no msg active] */ @@ -367,6 +382,7 @@ namespace MBOX registry_t iv_registry; //!< Registered queue DmaBuffer iv_dmaBuffer; //!< DMA buffer manager send_q_t iv_pendingq; //!< Pending for queue registration + tid_t iv_dmaRequestWatchdog; //!< TID of dma buffer request watchdog TARGETING::Target * iv_trgt;//!< mailbox device driver target msg_t * iv_shutdown_msg;//!< Message to shutdown mbox diff --git a/src/usr/mbox/mboxdd.C b/src/usr/mbox/mboxdd.C index c5c19df99..f0e74f328 100644 --- a/src/usr/mbox/mboxdd.C +++ b/src/usr/mbox/mboxdd.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2015 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -29,11 +29,12 @@ #include <trace/interface.H> #include <errl/errlentry.H> #include <targeting/common/targetservice.H> +#include <targeting/common/utilFilter.H> #include <intr/interrupt.H> trace_desc_t* g_trac_mbox = NULL; -TRAC_INIT(&g_trac_mbox, "MBOX", KILOBYTE, TRACE::BUFFER_SLOW); //4K +TRAC_INIT(&g_trac_mbox, "MBOX", 16*KILOBYTE, TRACE::BUFFER_SLOW); //16K namespace MBOX @@ -697,6 +698,85 @@ errlHndl_t mboxddShutDown(TARGETING::Target* i_target) return err; } +errlHndl_t dumpMboxRegs() +{ + errlHndl_t l_err = nullptr; + TARGETING::TargetHandleList l_procList; + TARGETING::getAllChips( l_procList, TARGETING::TYPE_PROC); + assert(l_procList.size(), "No functional processors found"); + + TRACFCOMP(g_trac_mbox, "---Dumping Mbox registers---"); + + for( const auto l_procChip : l_procList) + { + uint32_t l_64bitBuf[2] = {0}; + size_t l_64bitSize = sizeof(uint64_t); + uint32_t l_huid = TARGETING::get_huid(l_procChip); + TRACFCOMP(g_trac_mbox, "Processor 0x%lx",l_huid); + + // Read the MBOX_DB_INT_REG_PIB + l_err = deviceOp(DeviceFW::READ,l_procChip, + l_64bitBuf,l_64bitSize, + DEVICE_XSCOM_ADDRESS(MBOX_DB_INT_REG_PIB)); + if (l_err) + { + TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read PIB Interrupt Register"); + break; + } + else + { + TRACFCOMP(g_trac_mbox, " PIB Interrupt Register (0x%08X) = 0x%08X", + MBOX_DB_INT_REG_PIB, l_64bitBuf[0]); + } + + // Read the MBOX_DB_STAT_CNTRL_1 + l_err = deviceOp(DeviceFW::READ,l_procChip, + l_64bitBuf,l_64bitSize, + DEVICE_XSCOM_ADDRESS(MBOX_DB_STAT_CNTRL_1)); + if (l_err) + { + TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read Doorbell Status/Control Register"); + break; + } + else + { + TRACFCOMP(g_trac_mbox, " Doorbell Status/Control Register (0x%08X) = 0x%08X", + MBOX_DB_STAT_CNTRL_1, l_64bitBuf[0]); + } + + // Read the MBOX_DB_ERR_STAT_PIB + l_err = deviceOp(DeviceFW::READ,l_procChip, + l_64bitBuf,l_64bitSize, + DEVICE_XSCOM_ADDRESS( MBOX_DB_ERR_STAT_LBUS)); + if (l_err) + { + TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read Doorbell Error/Status Register"); + break; + } + else + { + TRACFCOMP(g_trac_mbox, " Doorbell Error/Status Register (0x%08X) = 0x%08lx", + MBOX_DB_ERR_STAT_LBUS, l_64bitBuf[0]); + } + + for(uint8_t i = 0x0; i <= (MBOX_DATA_LBUS_END - MBOX_DATA_LBUS_START) ; i++) + { + // Read the MBOX_DATA_LBUS_START + i + l_err = deviceOp(DeviceFW::READ,l_procChip, + l_64bitBuf,l_64bitSize, + DEVICE_XSCOM_ADDRESS(MBOX_DATA_LBUS_START + i)); + if (l_err) + { + TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read MBOX_DATA_LBUS_START + %d Register", i); + break; + } + TRACFCOMP(g_trac_mbox, " MBOX_DATA_LBUS_START + %02d (0x%08X) = 0x%08lx", + i, MBOX_DATA_LBUS_START + i , l_64bitBuf[0]); + } + } + return l_err; +} + #if defined(__DESTRUCTIVE_MBOX_TEST__) void forceErrorOnNextOperation() { diff --git a/src/usr/mbox/mboxdd.H b/src/usr/mbox/mboxdd.H index 86fb1ee80..f79459615 100644 --- a/src/usr/mbox/mboxdd.H +++ b/src/usr/mbox/mboxdd.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2014 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -37,7 +37,7 @@ namespace MBOX /* * Mbox device driver public constants */ - enum + enum { MBOX_MAX_DATA_BYTES = 64, //16 32-bit Data Registers }; @@ -45,7 +45,7 @@ namespace MBOX /* * Mbox device driver status values */ - enum MboxReadStatus + enum MboxReadStatus { MBOX_DOORBELL_ERROR = 0x00000004, /* Error Set In Error Register */ MBOX_HW_ACK = 0x00000002, /* LBUS Data Acknowledgment */ @@ -58,7 +58,7 @@ namespace MBOX /** * @brief Initialize device driver hardware - * + * * @param[in] i_target, Chip target of the MBOX operation * @return errlHndl_t If scom error | NULL (success) */ @@ -112,6 +112,14 @@ namespace MBOX void* i_buffer, size_t& i_buflen); + /** + * @brief Print all the mailbox state information to slow trace + * buffer to aid in debug. + * + * @return errlHndl_t nullptr on success + */ + errlHndl_t dumpMboxRegs(); + /** * @brief Reads the mailbox PIB error status register * |

