diff options
Diffstat (limited to 'src/usr/mbox')
| -rw-r--r-- | src/usr/mbox/mailboxsp.C | 96 | ||||
| -rw-r--r-- | src/usr/mbox/mailboxsp.H | 18 | ||||
| -rw-r--r-- | src/usr/mbox/mboxdd.C | 84 | ||||
| -rw-r--r-- | src/usr/mbox/mboxdd.H | 16 |
4 files changed, 207 insertions, 7 deletions
diff --git a/src/usr/mbox/mailboxsp.C b/src/usr/mbox/mailboxsp.C index 7bd1ccde5..926dcdc2d 100644 --- a/src/usr/mbox/mailboxsp.C +++ b/src/usr/mbox/mailboxsp.C @@ -44,6 +44,7 @@ #include <arch/ppc.H> #include <errl/errlmanager.H> #include <sys/misc.h> +#include <util/misc.H> #include <errl/errludprintk.H> #include <errno.h> #include <kernel/console.H> @@ -82,6 +83,7 @@ MailboxSp::MailboxSp() iv_sendq(), iv_respondq(), iv_dmaBuffer(), + iv_dmaRequestWatchdog(0), iv_trgt(NULL), iv_shutdown_msg(NULL), iv_rts(true), @@ -832,6 +834,17 @@ void MailboxSp::send_msg(mbox_msg_t * i_msg) &iv_msg_to_send, mbox_msg_len, DeviceFW::MAILBOX); + + // Create a watchdog task that will run for 60 seconds + // if there is no response in 60 seconds then dbg info will + // be printed in the slow trace buffer + if(iv_msg_to_send.msg_payload.type == MSG_REQUEST_DMA_BUFFERS + && !Util::isSimicsRunning() + && !iv_dmaRequestWatchdog) + { + iv_dmaRequestWatchdog = task_create(&watchdogTimeoutTask, this); + assert (iv_dmaRequestWatchdog > 0 ); + } } if(err) @@ -1448,6 +1461,89 @@ void MailboxSp::sendReclaimDmaBfrsMsg( mbox_msg_t & i_mbox_msg ) return; } +void * MailboxSp::watchdogTimeoutTask(void * i_mailboxSp) +{ + // We don't want this to be a zombie because parent keeps going + task_detach(); + + // create a task which we can wait, this way we can print + // an error message if the taskWorker crashes + tid_t l_tid = task_create( &watchdogTimeoutTaskWorker, i_mailboxSp); + assert (l_tid > 0 ); + + int l_status = 0; + void* l_rc = nullptr; + + tid_t l_tidRc = task_wait_tid(l_tid, &l_status, &l_rc); + + if(l_status == TASK_STATUS_CRASHED) + { + TRACFCOMP(g_trac_mbox, + ERR_MRK + "MailboxSp::watchdogTimeoutTask - " + "Watchdog timeout crashed!! %lx", l_tidRc); + } + + return nullptr; +} + +void * MailboxSp::watchdogTimeoutTaskWorker(void * i_mailboxSp) +{ + + uint64_t MAX_TIMEOUT = 200000000000; // nanoseconds + uint64_t POLL_RATE = 1000000; // nanoseconds + uint64_t cur_timeout = 0; // nanoseconds + errlHndl_t err = nullptr; + + assert(i_mailboxSp != nullptr, "nullptr was passed to watchdogTimeoutTaskWorker"); + + MailboxSp & mboxSp = *static_cast<MailboxSp *>(i_mailboxSp); + + while(cur_timeout < MAX_TIMEOUT) + { + if( !mboxSp.iv_dma_pend ) + { + TRACFCOMP(g_trac_mbox, + INFO_MRK + "Breaking out of watchdog because FSP responded to DMA request"); + break; + } + // sleep for 1 ms + nanosleep(0, POLL_RATE); + cur_timeout += POLL_RATE; + } + + if(cur_timeout >= MAX_TIMEOUT) + { + TRACFCOMP(g_trac_mbox, + INFO_MRK + "Hang during DMA request detected, dumping state information"); + err = dumpMboxRegs(); + if(err) + { + TRACFCOMP(g_trac_mbox, + INFO_MRK + "Error occured while dumping MBOX information"); + err->collectTrace(MBOX_COMP_NAME); + errlCommit(err,MBOX_COMP_ID); + } + err = INTR::printInterruptInfo(); + if(err) + { + TRACFCOMP(g_trac_mbox, + INFO_MRK + "Error occured while dumping INTR information"); + err->collectTrace(INTR_COMP_NAME); + errlCommit(err,MBOX_COMP_ID); + } + } + + //Zero out the TID so another watchdog task can be created if needed + mboxSp.iv_dmaRequestWatchdog = 0; + return nullptr; + +} + errlHndl_t MailboxSp::msgq_register(queue_id_t i_queue_id, msg_q_t i_msgQ) { diff --git a/src/usr/mbox/mailboxsp.H b/src/usr/mbox/mailboxsp.H index 22fdf45e8..d1db0a88a 100644 --- a/src/usr/mbox/mailboxsp.H +++ b/src/usr/mbox/mailboxsp.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2017 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -310,6 +310,21 @@ namespace MBOX void sendReclaimDmaBfrsMsg( void ); /** + * Start the watchdogTimeoutTaskWorker and print + * out an error if it crashes + */ + static void * watchdogTimeoutTask(void * i_mailboxSp); + + /** + * Used to start a timer , if the timer expires then + * Hostboot will print out a bunch of MBOX and INTR + * error information to the SLOW buffer. This is used + * to collect debug information in the case where we + * are hanging, waiting for a response to an mailbox msg + */ + static void * watchdogTimeoutTaskWorker(void * i_mailboxSp); + + /** * Determine if a Reclaim Bfr message is outstanding * @return [true - Msg active | false - no msg active] */ @@ -367,6 +382,7 @@ namespace MBOX registry_t iv_registry; //!< Registered queue DmaBuffer iv_dmaBuffer; //!< DMA buffer manager send_q_t iv_pendingq; //!< Pending for queue registration + tid_t iv_dmaRequestWatchdog; //!< TID of dma buffer request watchdog TARGETING::Target * iv_trgt;//!< mailbox device driver target msg_t * iv_shutdown_msg;//!< Message to shutdown mbox diff --git a/src/usr/mbox/mboxdd.C b/src/usr/mbox/mboxdd.C index c5c19df99..f0e74f328 100644 --- a/src/usr/mbox/mboxdd.C +++ b/src/usr/mbox/mboxdd.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2015 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -29,11 +29,12 @@ #include <trace/interface.H> #include <errl/errlentry.H> #include <targeting/common/targetservice.H> +#include <targeting/common/utilFilter.H> #include <intr/interrupt.H> trace_desc_t* g_trac_mbox = NULL; -TRAC_INIT(&g_trac_mbox, "MBOX", KILOBYTE, TRACE::BUFFER_SLOW); //4K +TRAC_INIT(&g_trac_mbox, "MBOX", 16*KILOBYTE, TRACE::BUFFER_SLOW); //16K namespace MBOX @@ -697,6 +698,85 @@ errlHndl_t mboxddShutDown(TARGETING::Target* i_target) return err; } +errlHndl_t dumpMboxRegs() +{ + errlHndl_t l_err = nullptr; + TARGETING::TargetHandleList l_procList; + TARGETING::getAllChips( l_procList, TARGETING::TYPE_PROC); + assert(l_procList.size(), "No functional processors found"); + + TRACFCOMP(g_trac_mbox, "---Dumping Mbox registers---"); + + for( const auto l_procChip : l_procList) + { + uint32_t l_64bitBuf[2] = {0}; + size_t l_64bitSize = sizeof(uint64_t); + uint32_t l_huid = TARGETING::get_huid(l_procChip); + TRACFCOMP(g_trac_mbox, "Processor 0x%lx",l_huid); + + // Read the MBOX_DB_INT_REG_PIB + l_err = deviceOp(DeviceFW::READ,l_procChip, + l_64bitBuf,l_64bitSize, + DEVICE_XSCOM_ADDRESS(MBOX_DB_INT_REG_PIB)); + if (l_err) + { + TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read PIB Interrupt Register"); + break; + } + else + { + TRACFCOMP(g_trac_mbox, " PIB Interrupt Register (0x%08X) = 0x%08X", + MBOX_DB_INT_REG_PIB, l_64bitBuf[0]); + } + + // Read the MBOX_DB_STAT_CNTRL_1 + l_err = deviceOp(DeviceFW::READ,l_procChip, + l_64bitBuf,l_64bitSize, + DEVICE_XSCOM_ADDRESS(MBOX_DB_STAT_CNTRL_1)); + if (l_err) + { + TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read Doorbell Status/Control Register"); + break; + } + else + { + TRACFCOMP(g_trac_mbox, " Doorbell Status/Control Register (0x%08X) = 0x%08X", + MBOX_DB_STAT_CNTRL_1, l_64bitBuf[0]); + } + + // Read the MBOX_DB_ERR_STAT_PIB + l_err = deviceOp(DeviceFW::READ,l_procChip, + l_64bitBuf,l_64bitSize, + DEVICE_XSCOM_ADDRESS( MBOX_DB_ERR_STAT_LBUS)); + if (l_err) + { + TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read Doorbell Error/Status Register"); + break; + } + else + { + TRACFCOMP(g_trac_mbox, " Doorbell Error/Status Register (0x%08X) = 0x%08lx", + MBOX_DB_ERR_STAT_LBUS, l_64bitBuf[0]); + } + + for(uint8_t i = 0x0; i <= (MBOX_DATA_LBUS_END - MBOX_DATA_LBUS_START) ; i++) + { + // Read the MBOX_DATA_LBUS_START + i + l_err = deviceOp(DeviceFW::READ,l_procChip, + l_64bitBuf,l_64bitSize, + DEVICE_XSCOM_ADDRESS(MBOX_DATA_LBUS_START + i)); + if (l_err) + { + TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read MBOX_DATA_LBUS_START + %d Register", i); + break; + } + TRACFCOMP(g_trac_mbox, " MBOX_DATA_LBUS_START + %02d (0x%08X) = 0x%08lx", + i, MBOX_DATA_LBUS_START + i , l_64bitBuf[0]); + } + } + return l_err; +} + #if defined(__DESTRUCTIVE_MBOX_TEST__) void forceErrorOnNextOperation() { diff --git a/src/usr/mbox/mboxdd.H b/src/usr/mbox/mboxdd.H index 86fb1ee80..f79459615 100644 --- a/src/usr/mbox/mboxdd.H +++ b/src/usr/mbox/mboxdd.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2012,2014 */ +/* Contributors Listed Below - COPYRIGHT 2012,2018 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -37,7 +37,7 @@ namespace MBOX /* * Mbox device driver public constants */ - enum + enum { MBOX_MAX_DATA_BYTES = 64, //16 32-bit Data Registers }; @@ -45,7 +45,7 @@ namespace MBOX /* * Mbox device driver status values */ - enum MboxReadStatus + enum MboxReadStatus { MBOX_DOORBELL_ERROR = 0x00000004, /* Error Set In Error Register */ MBOX_HW_ACK = 0x00000002, /* LBUS Data Acknowledgment */ @@ -58,7 +58,7 @@ namespace MBOX /** * @brief Initialize device driver hardware - * + * * @param[in] i_target, Chip target of the MBOX operation * @return errlHndl_t If scom error | NULL (success) */ @@ -112,6 +112,14 @@ namespace MBOX void* i_buffer, size_t& i_buflen); + /** + * @brief Print all the mailbox state information to slow trace + * buffer to aid in debug. + * + * @return errlHndl_t nullptr on success + */ + errlHndl_t dumpMboxRegs(); + /** * @brief Reads the mailbox PIB error status register * |

