summaryrefslogtreecommitdiffstats
path: root/src/usr
diff options
context:
space:
mode:
Diffstat (limited to 'src/usr')
-rw-r--r--src/usr/intr/intrrp.C160
-rw-r--r--src/usr/intr/intrrp.H28
-rw-r--r--src/usr/mbox/mailboxsp.C96
-rw-r--r--src/usr/mbox/mailboxsp.H18
-rw-r--r--src/usr/mbox/mboxdd.C84
-rw-r--r--src/usr/mbox/mboxdd.H16
6 files changed, 391 insertions, 11 deletions
diff --git a/src/usr/intr/intrrp.C b/src/usr/intr/intrrp.C
index 9e3f5930b..bb6aabff2 100644
--- a/src/usr/intr/intrrp.C
+++ b/src/usr/intr/intrrp.C
@@ -30,6 +30,7 @@
#include "intrrp.H"
#include <trace/interface.H>
#include <errno.h>
+#include <string.h>
#include <initservice/taskargs.H>
#include <initservice/initserviceif.H>
#include <util/singleton.H>
@@ -1331,6 +1332,16 @@ void IntrRp::msgHandler()
msg_respond(iv_msgQ,msg);
}
break;
+ case MSG_INTR_DUMP:
+ {
+ // Run the functions that dump out
+ // interrupt info to slow buffer
+ printEsbStates();
+ printLSIInfo();
+ printPSIHBInfo();
+ msg_free(msg); // async message
+ }
+ break;
default:
msg->data[1] = -EINVAL;
@@ -3392,3 +3403,152 @@ errlHndl_t INTR::IntrRp::enableSlaveProcInterrupts(TARGETING::Target * i_target)
return l_err;
}
+void INTR::esbStateToString(uint64_t i_esbState, const char** o_esbStateString)
+{
+ switch(i_esbState)
+ {
+ case ESB_STATE_RESET:
+ *o_esbStateString = "RESET";
+ break;
+ case ESB_STATE_OFF:
+ *o_esbStateString = "OFF";
+ break;
+ case ESB_STATE_PENDING:
+ *o_esbStateString = "PENDING";
+ break;
+ case ESB_STATE_QUEUED:
+ *o_esbStateString = "QUEUED";
+ break;
+ default:
+ *o_esbStateString = "INVALID";
+ break;
+ }
+}
+
+errlHndl_t INTR::printInterruptInfo()
+{
+ errlHndl_t err = NULL;
+ msg_q_t intr_msgQ = msg_q_resolve(VFS_ROOT_MSG_INTR);
+ if(intr_msgQ)
+ {
+ msg_t * msg = msg_allocate();
+ msg->type = MSG_INTR_DUMP;
+ int send_rc = msg_send(intr_msgQ, msg);
+ if (send_rc != 0)
+ {
+ TRACFCOMP(g_trac_intr, ERR_MRK"IntrRp::printInterruptInfo error "
+ "sending print intr info message");
+ /*@ errorlog tag
+ * @errortype ERRL_SEV_UNRECOVERABLE
+ * @moduleid INTR::MOD_INTR_DUMP
+ * @reasoncode INTR::RC_MESSAGE_SEND_ERROR
+ * @userdata1 RC from msg_send command
+ * @devdesc Error encountered sending print intr info
+ * message to INTRP
+ * @custdesc Error encountered gathering diagnostic info
+ */
+ err = new ERRORLOG::ErrlEntry
+ (
+ ERRORLOG::ERRL_SEV_UNRECOVERABLE, // severity
+ INTR::MOD_INTR_DUMP, // moduleid
+ INTR::RC_MESSAGE_SEND_ERROR, // reason code
+ send_rc,
+ 0
+ );
+ }
+ }
+ else
+ {
+ /*@ errorlog tag
+ * @errortype ERRL_SEV_INFORMATIONAL
+ * @moduleid INTR::MOD_INTR_DUMP
+ * @reasoncode INTR::RC_RP_NOT_INITIALIZED
+ * @userdata1 MSG_INTR_DUMP
+ * @userdata2 0
+ * @devdesc Interrupt resource provider not initialized yet.
+ * @custdesc Error encountered gathering diagnostic info
+ */
+ err = new ERRORLOG::ErrlEntry
+ (
+ ERRORLOG::ERRL_SEV_INFORMATIONAL, // severity
+ INTR::MOD_INTR_DUMP, // moduleid
+ INTR::RC_RP_NOT_INITIALIZED, // reason code
+ static_cast<uint64_t>(MSG_INTR_DUMP),
+ 0
+ );
+ }
+ return err;
+}
+
+void INTR::IntrRp::printLSIInfo() const
+{
+ TRACFCOMP(g_trac_intr, "---LSI Sources---");
+
+ //Read LSI Interrupt Status register from each enabled
+ // proc chip to see which caused the interrupt
+ for(auto targ_itr = iv_chipList.begin();
+ targ_itr != iv_chipList.end(); ++targ_itr)
+ {
+ uint64_t l_mmioRead = (*targ_itr)->psiHbBaseAddr->lsiintstatus;
+ uint32_t l_huid = get_huid((*targ_itr)->proc);
+ TRACFCOMP(g_trac_intr, "Processor 0x%lx", l_huid);
+ TRACFCOMP(g_trac_intr, " lsiIntStatus : vAddr=0x%016lx Value=0x%016lx", &(*targ_itr)->psiHbBaseAddr->lsiintstatus , l_mmioRead);
+ l_mmioRead = (*targ_itr)->psiHbBaseAddr->lsiintlevel;
+ TRACFCOMP(g_trac_intr, " lsiIntLevel : vAddr=0x%016lx Value=0x%016lx", &(*targ_itr)->psiHbBaseAddr->lsiintlevel, l_mmioRead);
+ }
+}
+
+void INTR::IntrRp::printPSIHBInfo() const
+{
+ TRACFCOMP(g_trac_intr, "---PSIHB Info---");
+ //Read LSI Interrupt Status register from each enabled
+ // proc chip to see which caused the interrupt
+ for(auto targ_itr = iv_chipList.begin();
+ targ_itr != iv_chipList.end(); ++targ_itr)
+ {
+ uint32_t l_huid = get_huid((*targ_itr)->proc);
+ uint64_t l_mmioRead = (*targ_itr)->psiHbBaseAddr->psihbcr;
+
+ TRACFCOMP(g_trac_intr, "Processor 0x%lx", l_huid);
+
+ TRACFCOMP(g_trac_intr, " PSIHB Ctrl/Status Reg : vAddr=0x%016lx Value=0x%016lx",
+ &(*targ_itr)->psiHbBaseAddr->psihbcr, l_mmioRead);
+
+ l_mmioRead = (*targ_itr)->psiHbBaseAddr->psisemr;
+ TRACFCOMP(g_trac_intr, " PSIHB Error/Status Reg : vAddr=0x%016lx Value=0x%016lx",
+ &(*targ_itr)->psiHbBaseAddr->psisemr, l_mmioRead);
+
+ l_mmioRead = (*targ_itr)->psiHbBaseAddr->phbdsr;
+ TRACFCOMP(g_trac_intr, " PSIHB Dbg Setting Reg : vAddr=0x%016lx Value=0x%016lx",
+ &(*targ_itr)->psiHbBaseAddr->phbdsr, l_mmioRead);
+
+ l_mmioRead = (*targ_itr)->psiHbBaseAddr->icr;
+ TRACFCOMP(g_trac_intr, " PSIHB Interrupt Control Reg : vAddr=0x%016lx Value=0x%016lx",
+ &(*targ_itr)->psiHbBaseAddr->icr, l_mmioRead);
+ }
+}
+
+void INTR::IntrRp::printEsbStates() const
+{
+ TRACFCOMP(g_trac_intr, "---ESB States---");
+ for(auto targ_itr = iv_chipList.begin();
+ targ_itr != iv_chipList.end(); ++targ_itr)
+ {
+ TRACFCOMP(g_trac_intr, "Processor 0x%lx", get_huid((*targ_itr)->proc));
+ for (uint8_t i = 0; i < LSI_LAST_SOURCE; i++)
+ {
+ // Ready from the ESB_QUERY_OFFSET to ensure the read doesn't
+ // affect the state
+ uint64_t * l_psiHbEsbptr = (*targ_itr)->psiHbEsbBaseAddr +
+ (((i*PAGE_SIZE)+ESB_QUERY_OFFSET) /sizeof(uint64_t));
+
+ volatile uint64_t l_esbState = *l_psiHbEsbptr;
+ const char* l_esbStateString = nullptr;
+
+ // Use toString method to look up human readable string
+ esbStateToString(l_esbState, &l_esbStateString);
+
+ TRACFCOMP(g_trac_intr, " SRC: %02d State: %s", i , l_esbStateString );
+ }
+ }
+}
diff --git a/src/usr/intr/intrrp.H b/src/usr/intr/intrrp.H
index b0245a868..6b4b57197 100644
--- a/src/usr/intr/intrrp.H
+++ b/src/usr/intr/intrrp.H
@@ -231,10 +231,6 @@ namespace INTR
ESB_QUERY_OFFSET = 0x800,
ESB_OFF_OFFSET = 0xD00,
ESB_RESET_OFFSET = 0XC00,
- ESB_STATE_RESET = 0,
- ESB_STATE_OFF = 1,
- ESB_STATE_PENDING = 2,
- ESB_STATE_QUEUED = 3,
};
enum INTR_ROUTING_t
@@ -889,6 +885,30 @@ namespace INTR
*/
errlHndl_t resetIntpForMpipl(void);
+ /**
+ * Print out the ESB state for every source on all processors
+ * that the Interrp is aware of
+ *
+ * @return void
+ */
+ void printEsbStates() const;
+
+ /**
+ * Print out the PSIHB info for all processors
+ * that the Interrp is aware of
+ *
+ * @return void
+ */
+ void printPSIHBInfo() const;
+
+ /**
+ * Print out the LSI info for all processors
+ * that the Interrp is aware of
+ *
+ * @return void
+ */
+ void printLSIInfo() const;
+
};
}; // INTR namespace
diff --git a/src/usr/mbox/mailboxsp.C b/src/usr/mbox/mailboxsp.C
index 7bd1ccde5..926dcdc2d 100644
--- a/src/usr/mbox/mailboxsp.C
+++ b/src/usr/mbox/mailboxsp.C
@@ -44,6 +44,7 @@
#include <arch/ppc.H>
#include <errl/errlmanager.H>
#include <sys/misc.h>
+#include <util/misc.H>
#include <errl/errludprintk.H>
#include <errno.h>
#include <kernel/console.H>
@@ -82,6 +83,7 @@ MailboxSp::MailboxSp()
iv_sendq(),
iv_respondq(),
iv_dmaBuffer(),
+ iv_dmaRequestWatchdog(0),
iv_trgt(NULL),
iv_shutdown_msg(NULL),
iv_rts(true),
@@ -832,6 +834,17 @@ void MailboxSp::send_msg(mbox_msg_t * i_msg)
&iv_msg_to_send,
mbox_msg_len,
DeviceFW::MAILBOX);
+
+ // Create a watchdog task that will run for 60 seconds
+ // if there is no response in 60 seconds then dbg info will
+ // be printed in the slow trace buffer
+ if(iv_msg_to_send.msg_payload.type == MSG_REQUEST_DMA_BUFFERS
+ && !Util::isSimicsRunning()
+ && !iv_dmaRequestWatchdog)
+ {
+ iv_dmaRequestWatchdog = task_create(&watchdogTimeoutTask, this);
+ assert (iv_dmaRequestWatchdog > 0 );
+ }
}
if(err)
@@ -1448,6 +1461,89 @@ void MailboxSp::sendReclaimDmaBfrsMsg( mbox_msg_t & i_mbox_msg )
return;
}
+void * MailboxSp::watchdogTimeoutTask(void * i_mailboxSp)
+{
+ // We don't want this to be a zombie because parent keeps going
+ task_detach();
+
+ // create a task which we can wait, this way we can print
+ // an error message if the taskWorker crashes
+ tid_t l_tid = task_create( &watchdogTimeoutTaskWorker, i_mailboxSp);
+ assert (l_tid > 0 );
+
+ int l_status = 0;
+ void* l_rc = nullptr;
+
+ tid_t l_tidRc = task_wait_tid(l_tid, &l_status, &l_rc);
+
+ if(l_status == TASK_STATUS_CRASHED)
+ {
+ TRACFCOMP(g_trac_mbox,
+ ERR_MRK
+ "MailboxSp::watchdogTimeoutTask - "
+ "Watchdog timeout crashed!! %lx", l_tidRc);
+ }
+
+ return nullptr;
+}
+
+void * MailboxSp::watchdogTimeoutTaskWorker(void * i_mailboxSp)
+{
+
+ uint64_t MAX_TIMEOUT = 200000000000; // nanoseconds
+ uint64_t POLL_RATE = 1000000; // nanoseconds
+ uint64_t cur_timeout = 0; // nanoseconds
+ errlHndl_t err = nullptr;
+
+ assert(i_mailboxSp != nullptr, "nullptr was passed to watchdogTimeoutTaskWorker");
+
+ MailboxSp & mboxSp = *static_cast<MailboxSp *>(i_mailboxSp);
+
+ while(cur_timeout < MAX_TIMEOUT)
+ {
+ if( !mboxSp.iv_dma_pend )
+ {
+ TRACFCOMP(g_trac_mbox,
+ INFO_MRK
+ "Breaking out of watchdog because FSP responded to DMA request");
+ break;
+ }
+ // sleep for 1 ms
+ nanosleep(0, POLL_RATE);
+ cur_timeout += POLL_RATE;
+ }
+
+ if(cur_timeout >= MAX_TIMEOUT)
+ {
+ TRACFCOMP(g_trac_mbox,
+ INFO_MRK
+ "Hang during DMA request detected, dumping state information");
+ err = dumpMboxRegs();
+ if(err)
+ {
+ TRACFCOMP(g_trac_mbox,
+ INFO_MRK
+ "Error occured while dumping MBOX information");
+ err->collectTrace(MBOX_COMP_NAME);
+ errlCommit(err,MBOX_COMP_ID);
+ }
+ err = INTR::printInterruptInfo();
+ if(err)
+ {
+ TRACFCOMP(g_trac_mbox,
+ INFO_MRK
+ "Error occured while dumping INTR information");
+ err->collectTrace(INTR_COMP_NAME);
+ errlCommit(err,MBOX_COMP_ID);
+ }
+ }
+
+ //Zero out the TID so another watchdog task can be created if needed
+ mboxSp.iv_dmaRequestWatchdog = 0;
+ return nullptr;
+
+}
+
errlHndl_t MailboxSp::msgq_register(queue_id_t i_queue_id, msg_q_t i_msgQ)
{
diff --git a/src/usr/mbox/mailboxsp.H b/src/usr/mbox/mailboxsp.H
index 22fdf45e8..d1db0a88a 100644
--- a/src/usr/mbox/mailboxsp.H
+++ b/src/usr/mbox/mailboxsp.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2012,2017 */
+/* Contributors Listed Below - COPYRIGHT 2012,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -310,6 +310,21 @@ namespace MBOX
void sendReclaimDmaBfrsMsg( void );
/**
+ * Start the watchdogTimeoutTaskWorker and print
+ * out an error if it crashes
+ */
+ static void * watchdogTimeoutTask(void * i_mailboxSp);
+
+ /**
+ * Used to start a timer , if the timer expires then
+ * Hostboot will print out a bunch of MBOX and INTR
+ * error information to the SLOW buffer. This is used
+ * to collect debug information in the case where we
+ * are hanging, waiting for a response to an mailbox msg
+ */
+ static void * watchdogTimeoutTaskWorker(void * i_mailboxSp);
+
+ /**
* Determine if a Reclaim Bfr message is outstanding
* @return [true - Msg active | false - no msg active]
*/
@@ -367,6 +382,7 @@ namespace MBOX
registry_t iv_registry; //!< Registered queue
DmaBuffer iv_dmaBuffer; //!< DMA buffer manager
send_q_t iv_pendingq; //!< Pending for queue registration
+ tid_t iv_dmaRequestWatchdog; //!< TID of dma buffer request watchdog
TARGETING::Target * iv_trgt;//!< mailbox device driver target
msg_t * iv_shutdown_msg;//!< Message to shutdown mbox
diff --git a/src/usr/mbox/mboxdd.C b/src/usr/mbox/mboxdd.C
index c5c19df99..f0e74f328 100644
--- a/src/usr/mbox/mboxdd.C
+++ b/src/usr/mbox/mboxdd.C
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2012,2015 */
+/* Contributors Listed Below - COPYRIGHT 2012,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -29,11 +29,12 @@
#include <trace/interface.H>
#include <errl/errlentry.H>
#include <targeting/common/targetservice.H>
+#include <targeting/common/utilFilter.H>
#include <intr/interrupt.H>
trace_desc_t* g_trac_mbox = NULL;
-TRAC_INIT(&g_trac_mbox, "MBOX", KILOBYTE, TRACE::BUFFER_SLOW); //4K
+TRAC_INIT(&g_trac_mbox, "MBOX", 16*KILOBYTE, TRACE::BUFFER_SLOW); //16K
namespace MBOX
@@ -697,6 +698,85 @@ errlHndl_t mboxddShutDown(TARGETING::Target* i_target)
return err;
}
+errlHndl_t dumpMboxRegs()
+{
+ errlHndl_t l_err = nullptr;
+ TARGETING::TargetHandleList l_procList;
+ TARGETING::getAllChips( l_procList, TARGETING::TYPE_PROC);
+ assert(l_procList.size(), "No functional processors found");
+
+ TRACFCOMP(g_trac_mbox, "---Dumping Mbox registers---");
+
+ for( const auto l_procChip : l_procList)
+ {
+ uint32_t l_64bitBuf[2] = {0};
+ size_t l_64bitSize = sizeof(uint64_t);
+ uint32_t l_huid = TARGETING::get_huid(l_procChip);
+ TRACFCOMP(g_trac_mbox, "Processor 0x%lx",l_huid);
+
+ // Read the MBOX_DB_INT_REG_PIB
+ l_err = deviceOp(DeviceFW::READ,l_procChip,
+ l_64bitBuf,l_64bitSize,
+ DEVICE_XSCOM_ADDRESS(MBOX_DB_INT_REG_PIB));
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read PIB Interrupt Register");
+ break;
+ }
+ else
+ {
+ TRACFCOMP(g_trac_mbox, " PIB Interrupt Register (0x%08X) = 0x%08X",
+ MBOX_DB_INT_REG_PIB, l_64bitBuf[0]);
+ }
+
+ // Read the MBOX_DB_STAT_CNTRL_1
+ l_err = deviceOp(DeviceFW::READ,l_procChip,
+ l_64bitBuf,l_64bitSize,
+ DEVICE_XSCOM_ADDRESS(MBOX_DB_STAT_CNTRL_1));
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read Doorbell Status/Control Register");
+ break;
+ }
+ else
+ {
+ TRACFCOMP(g_trac_mbox, " Doorbell Status/Control Register (0x%08X) = 0x%08X",
+ MBOX_DB_STAT_CNTRL_1, l_64bitBuf[0]);
+ }
+
+ // Read the MBOX_DB_ERR_STAT_PIB
+ l_err = deviceOp(DeviceFW::READ,l_procChip,
+ l_64bitBuf,l_64bitSize,
+ DEVICE_XSCOM_ADDRESS( MBOX_DB_ERR_STAT_LBUS));
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read Doorbell Error/Status Register");
+ break;
+ }
+ else
+ {
+ TRACFCOMP(g_trac_mbox, " Doorbell Error/Status Register (0x%08X) = 0x%08lx",
+ MBOX_DB_ERR_STAT_LBUS, l_64bitBuf[0]);
+ }
+
+ for(uint8_t i = 0x0; i <= (MBOX_DATA_LBUS_END - MBOX_DATA_LBUS_START) ; i++)
+ {
+ // Read the MBOX_DATA_LBUS_START + i
+ l_err = deviceOp(DeviceFW::READ,l_procChip,
+ l_64bitBuf,l_64bitSize,
+ DEVICE_XSCOM_ADDRESS(MBOX_DATA_LBUS_START + i));
+ if (l_err)
+ {
+ TRACFCOMP(g_trac_mbox, ERR_MRK "dumpMboxRegs> Unable to read MBOX_DATA_LBUS_START + %d Register", i);
+ break;
+ }
+ TRACFCOMP(g_trac_mbox, " MBOX_DATA_LBUS_START + %02d (0x%08X) = 0x%08lx",
+ i, MBOX_DATA_LBUS_START + i , l_64bitBuf[0]);
+ }
+ }
+ return l_err;
+}
+
#if defined(__DESTRUCTIVE_MBOX_TEST__)
void forceErrorOnNextOperation()
{
diff --git a/src/usr/mbox/mboxdd.H b/src/usr/mbox/mboxdd.H
index 86fb1ee80..f79459615 100644
--- a/src/usr/mbox/mboxdd.H
+++ b/src/usr/mbox/mboxdd.H
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
-/* Contributors Listed Below - COPYRIGHT 2012,2014 */
+/* Contributors Listed Below - COPYRIGHT 2012,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -37,7 +37,7 @@ namespace MBOX
/*
* Mbox device driver public constants
*/
- enum
+ enum
{
MBOX_MAX_DATA_BYTES = 64, //16 32-bit Data Registers
};
@@ -45,7 +45,7 @@ namespace MBOX
/*
* Mbox device driver status values
*/
- enum MboxReadStatus
+ enum MboxReadStatus
{
MBOX_DOORBELL_ERROR = 0x00000004, /* Error Set In Error Register */
MBOX_HW_ACK = 0x00000002, /* LBUS Data Acknowledgment */
@@ -58,7 +58,7 @@ namespace MBOX
/**
* @brief Initialize device driver hardware
- *
+ *
* @param[in] i_target, Chip target of the MBOX operation
* @return errlHndl_t If scom error | NULL (success)
*/
@@ -112,6 +112,14 @@ namespace MBOX
void* i_buffer,
size_t& i_buflen);
+ /**
+ * @brief Print all the mailbox state information to slow trace
+ * buffer to aid in debug.
+ *
+ * @return errlHndl_t nullptr on success
+ */
+ errlHndl_t dumpMboxRegs();
+
/**
* @brief Reads the mailbox PIB error status register
*
OpenPOWER on IntegriCloud