diff options
| author | Christian Geddes <crgeddes@us.ibm.com> | 2018-06-20 11:06:19 -0500 |
|---|---|---|
| committer | William G. Hoffa <wghoffa@us.ibm.com> | 2018-07-05 09:50:43 -0400 |
| commit | 50e72792adbdea613e4a2aeea25b60ba1043a2b8 (patch) | |
| tree | 678dc780c974563cb60035eb4bc187b1df333aeb /src/usr/intr | |
| parent | 1759af757bd8f9a13386c4fb4624bd93394af67b (diff) | |
| download | talos-hostboot-50e72792adbdea613e4a2aeea25b60ba1043a2b8.tar.gz talos-hostboot-50e72792adbdea613e4a2aeea25b60ba1043a2b8.zip | |
Print out MBOX/INTR state info on DMA request hang
We have been stuck on a hang that occurs during memdiags on
our multi-node p9 systems. It appears that Hostboot is never
receiving the response to the request to reclaim DMA buffers
from the FSP. From debugging we know the FSP thinks it has sent
the message over the FSI mbox but hostboot isnt seeing it. Next
time this happens if this is in the code we should be able to
get a better idea of what is happening.
Change-Id: I6b702e4094da3576ba454b5cdf0660841961baff
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/60977
Reviewed-by: Richard Ward <rward15@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Roland Veloz <rveloz@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Diffstat (limited to 'src/usr/intr')
| -rw-r--r-- | src/usr/intr/intrrp.C | 160 | ||||
| -rw-r--r-- | src/usr/intr/intrrp.H | 28 |
2 files changed, 184 insertions, 4 deletions
diff --git a/src/usr/intr/intrrp.C b/src/usr/intr/intrrp.C index 9e3f5930b..bb6aabff2 100644 --- a/src/usr/intr/intrrp.C +++ b/src/usr/intr/intrrp.C @@ -30,6 +30,7 @@ #include "intrrp.H" #include <trace/interface.H> #include <errno.h> +#include <string.h> #include <initservice/taskargs.H> #include <initservice/initserviceif.H> #include <util/singleton.H> @@ -1331,6 +1332,16 @@ void IntrRp::msgHandler() msg_respond(iv_msgQ,msg); } break; + case MSG_INTR_DUMP: + { + // Run the functions that dump out + // interrupt info to slow buffer + printEsbStates(); + printLSIInfo(); + printPSIHBInfo(); + msg_free(msg); // async message + } + break; default: msg->data[1] = -EINVAL; @@ -3392,3 +3403,152 @@ errlHndl_t INTR::IntrRp::enableSlaveProcInterrupts(TARGETING::Target * i_target) return l_err; } +void INTR::esbStateToString(uint64_t i_esbState, const char** o_esbStateString) +{ + switch(i_esbState) + { + case ESB_STATE_RESET: + *o_esbStateString = "RESET"; + break; + case ESB_STATE_OFF: + *o_esbStateString = "OFF"; + break; + case ESB_STATE_PENDING: + *o_esbStateString = "PENDING"; + break; + case ESB_STATE_QUEUED: + *o_esbStateString = "QUEUED"; + break; + default: + *o_esbStateString = "INVALID"; + break; + } +} + +errlHndl_t INTR::printInterruptInfo() +{ + errlHndl_t err = NULL; + msg_q_t intr_msgQ = msg_q_resolve(VFS_ROOT_MSG_INTR); + if(intr_msgQ) + { + msg_t * msg = msg_allocate(); + msg->type = MSG_INTR_DUMP; + int send_rc = msg_send(intr_msgQ, msg); + if (send_rc != 0) + { + TRACFCOMP(g_trac_intr, ERR_MRK"IntrRp::printInterruptInfo error " + "sending print intr info message"); + /*@ errorlog tag + * @errortype ERRL_SEV_UNRECOVERABLE + * @moduleid INTR::MOD_INTR_DUMP + * @reasoncode INTR::RC_MESSAGE_SEND_ERROR + * @userdata1 RC from msg_send command + * @devdesc Error encountered sending print intr info + * message to INTRP + * @custdesc Error encountered gathering diagnostic info + */ + err = new ERRORLOG::ErrlEntry + ( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, // severity + INTR::MOD_INTR_DUMP, // moduleid + INTR::RC_MESSAGE_SEND_ERROR, // reason code + send_rc, + 0 + ); + } + } + else + { + /*@ errorlog tag + * @errortype ERRL_SEV_INFORMATIONAL + * @moduleid INTR::MOD_INTR_DUMP + * @reasoncode INTR::RC_RP_NOT_INITIALIZED + * @userdata1 MSG_INTR_DUMP + * @userdata2 0 + * @devdesc Interrupt resource provider not initialized yet. + * @custdesc Error encountered gathering diagnostic info + */ + err = new ERRORLOG::ErrlEntry + ( + ERRORLOG::ERRL_SEV_INFORMATIONAL, // severity + INTR::MOD_INTR_DUMP, // moduleid + INTR::RC_RP_NOT_INITIALIZED, // reason code + static_cast<uint64_t>(MSG_INTR_DUMP), + 0 + ); + } + return err; +} + +void INTR::IntrRp::printLSIInfo() const +{ + TRACFCOMP(g_trac_intr, "---LSI Sources---"); + + //Read LSI Interrupt Status register from each enabled + // proc chip to see which caused the interrupt + for(auto targ_itr = iv_chipList.begin(); + targ_itr != iv_chipList.end(); ++targ_itr) + { + uint64_t l_mmioRead = (*targ_itr)->psiHbBaseAddr->lsiintstatus; + uint32_t l_huid = get_huid((*targ_itr)->proc); + TRACFCOMP(g_trac_intr, "Processor 0x%lx", l_huid); + TRACFCOMP(g_trac_intr, " lsiIntStatus : vAddr=0x%016lx Value=0x%016lx", &(*targ_itr)->psiHbBaseAddr->lsiintstatus , l_mmioRead); + l_mmioRead = (*targ_itr)->psiHbBaseAddr->lsiintlevel; + TRACFCOMP(g_trac_intr, " lsiIntLevel : vAddr=0x%016lx Value=0x%016lx", &(*targ_itr)->psiHbBaseAddr->lsiintlevel, l_mmioRead); + } +} + +void INTR::IntrRp::printPSIHBInfo() const +{ + TRACFCOMP(g_trac_intr, "---PSIHB Info---"); + //Read LSI Interrupt Status register from each enabled + // proc chip to see which caused the interrupt + for(auto targ_itr = iv_chipList.begin(); + targ_itr != iv_chipList.end(); ++targ_itr) + { + uint32_t l_huid = get_huid((*targ_itr)->proc); + uint64_t l_mmioRead = (*targ_itr)->psiHbBaseAddr->psihbcr; + + TRACFCOMP(g_trac_intr, "Processor 0x%lx", l_huid); + + TRACFCOMP(g_trac_intr, " PSIHB Ctrl/Status Reg : vAddr=0x%016lx Value=0x%016lx", + &(*targ_itr)->psiHbBaseAddr->psihbcr, l_mmioRead); + + l_mmioRead = (*targ_itr)->psiHbBaseAddr->psisemr; + TRACFCOMP(g_trac_intr, " PSIHB Error/Status Reg : vAddr=0x%016lx Value=0x%016lx", + &(*targ_itr)->psiHbBaseAddr->psisemr, l_mmioRead); + + l_mmioRead = (*targ_itr)->psiHbBaseAddr->phbdsr; + TRACFCOMP(g_trac_intr, " PSIHB Dbg Setting Reg : vAddr=0x%016lx Value=0x%016lx", + &(*targ_itr)->psiHbBaseAddr->phbdsr, l_mmioRead); + + l_mmioRead = (*targ_itr)->psiHbBaseAddr->icr; + TRACFCOMP(g_trac_intr, " PSIHB Interrupt Control Reg : vAddr=0x%016lx Value=0x%016lx", + &(*targ_itr)->psiHbBaseAddr->icr, l_mmioRead); + } +} + +void INTR::IntrRp::printEsbStates() const +{ + TRACFCOMP(g_trac_intr, "---ESB States---"); + for(auto targ_itr = iv_chipList.begin(); + targ_itr != iv_chipList.end(); ++targ_itr) + { + TRACFCOMP(g_trac_intr, "Processor 0x%lx", get_huid((*targ_itr)->proc)); + for (uint8_t i = 0; i < LSI_LAST_SOURCE; i++) + { + // Ready from the ESB_QUERY_OFFSET to ensure the read doesn't + // affect the state + uint64_t * l_psiHbEsbptr = (*targ_itr)->psiHbEsbBaseAddr + + (((i*PAGE_SIZE)+ESB_QUERY_OFFSET) /sizeof(uint64_t)); + + volatile uint64_t l_esbState = *l_psiHbEsbptr; + const char* l_esbStateString = nullptr; + + // Use toString method to look up human readable string + esbStateToString(l_esbState, &l_esbStateString); + + TRACFCOMP(g_trac_intr, " SRC: %02d State: %s", i , l_esbStateString ); + } + } +} diff --git a/src/usr/intr/intrrp.H b/src/usr/intr/intrrp.H index b0245a868..6b4b57197 100644 --- a/src/usr/intr/intrrp.H +++ b/src/usr/intr/intrrp.H @@ -231,10 +231,6 @@ namespace INTR ESB_QUERY_OFFSET = 0x800, ESB_OFF_OFFSET = 0xD00, ESB_RESET_OFFSET = 0XC00, - ESB_STATE_RESET = 0, - ESB_STATE_OFF = 1, - ESB_STATE_PENDING = 2, - ESB_STATE_QUEUED = 3, }; enum INTR_ROUTING_t @@ -889,6 +885,30 @@ namespace INTR */ errlHndl_t resetIntpForMpipl(void); + /** + * Print out the ESB state for every source on all processors + * that the Interrp is aware of + * + * @return void + */ + void printEsbStates() const; + + /** + * Print out the PSIHB info for all processors + * that the Interrp is aware of + * + * @return void + */ + void printPSIHBInfo() const; + + /** + * Print out the LSI info for all processors + * that the Interrp is aware of + * + * @return void + */ + void printLSIInfo() const; + }; }; // INTR namespace |

