diff options
author | Dan Crowell <dcrowell@us.ibm.com> | 2017-07-19 13:49:58 -0500 |
---|---|---|
committer | William G. Hoffa <wghoffa@us.ibm.com> | 2017-07-31 12:24:33 -0400 |
commit | ad20498a1a7b857517759cbb173fe9d936107d63 (patch) | |
tree | cba05569f1cc0c6f69ef6d46cb630a08aed5627c /src/usr/intr | |
parent | fadc1f7542d63ef55f383cf922db86d4f5e48ffe (diff) | |
download | talos-hostboot-ad20498a1a7b857517759cbb173fe9d936107d63.tar.gz talos-hostboot-ad20498a1a7b857517759cbb173fe9d936107d63.zip |
Fix race condition between INTR and SBEIO
Fixed a race condition in clearing out the PSU interrupt
register that existed between the INTR and SBEIO code.
We can sometimes lose interrupts for SBE PSU operations
which leads to a timeout.
Also added code to look for SBE errors if a PSU
operation times out
Change-Id: I8cdcdcc08956b038bcc65ad7e00a34719bf14c61
CQ: SW396057
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/43339
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Elizabeth K. Liner <eliner@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martin Gloff <mgloff@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
Diffstat (limited to 'src/usr/intr')
-rw-r--r-- | src/usr/intr/intrrp.C | 83 | ||||
-rw-r--r-- | src/usr/intr/intrrp.H | 1 |
2 files changed, 23 insertions, 61 deletions
diff --git a/src/usr/intr/intrrp.C b/src/usr/intr/intrrp.C index 8448ae158..87e2b1374 100644 --- a/src/usr/intr/intrrp.C +++ b/src/usr/intr/intrrp.C @@ -366,6 +366,7 @@ errlHndl_t IntrRp::_init() uint64_t l_en_threads = get_enabled_threads(); TRACFCOMP(g_trac_intr, "IntrRp::_init() Threads enabled:" " %lx", l_en_threads); + } while(0); return l_err; @@ -1312,7 +1313,6 @@ void IntrRp::routeInterrupt(intr_hdlr_t* i_proc, } else if (i_type == LSI_PSU) { - TRACFCOMP(g_trac_intr, "PSU Interrupt Detected"); handlePsuInterrupt(i_type, i_proc, i_pir); } else // no queue registered for this interrupt type @@ -1623,84 +1623,44 @@ errlHndl_t IntrRp::handlePsuInterrupt(ext_intr_t i_type, // Long term will leverage mask register to avoid // polling loop below errlHndl_t l_err = NULL; - uint32_t l_addr = PSI_BRIDGE_PSU_DOORBELL_REG; - size_t scom_len = sizeof(uint64_t); - uint64_t reg = 0x0; - uint64_t l_elapsed_time_ns = 0; TARGETING::Target* procTarget = i_proc->proc; - do - { + do { + size_t scom_len = 8; + uint64_t l_reg = 0x0; l_err = deviceRead(procTarget, - ®, + &l_reg, scom_len, - DEVICE_SCOM_ADDRESS(l_addr)); - + DEVICE_SCOM_ADDRESS(PSI_BRIDGE_PSU_DOORBELL_REG)); if (l_err) { - TRACFCOMP(g_trac_intr, "Error Reading PSU SCOM address: %lx", - l_addr); break; } + TRACDCOMP( g_trac_intr, "%.8X = %.16llX", + PSI_BRIDGE_PSU_DOORBELL_REG, l_reg ); - //If the PSU Host Doorbell bit is on, wait for the - // PSU DD to handle - if (reg & PSI_BRIDGE_PSU_HOST_DOORBELL) + //If the interrupt is driven by the doorbell, yield + // to give the driver a chance to take care of it + if( l_reg & PSI_BRIDGE_PSU_HOST_DOORBELL ) { - TRACDCOMP(g_trac_intr, "Host/SBE Mailbox " - "response. Wait for Polling to handle" - " response"); nanosleep(0,10000); - l_elapsed_time_ns += 10000; - } - else - { - //Polling Complete - break; - } - if (l_elapsed_time_ns > MAX_PSU_LONG_TIMEOUT_NS) - { - TRACFCOMP(g_trac_intr, "PSU Timeout hit"); - /*@ errorlog tag - * @errortype ERRL_SEV_UNRECOVERABLE - * @moduleid INTR::MOD_INTRRP_HNDLPSUINTERRUPT - * @reasoncode INTR::RC_PSU_DOORBELL_TIMEOUT - * @userdata1 Scom Address with interrupt condition - * @userdata2 Register Value - * @devdesc PSU Doorbell Timeout hit waiting for doorbell - * interrupt condition to be cleared - */ - l_err = new ERRORLOG::ErrlEntry - ( - ERRORLOG::ERRL_SEV_UNRECOVERABLE, // severity - INTR::MOD_INTRRP_HNDLPSUINTERRUPT, // moduleid - INTR::RC_PSU_DOORBELL_TIMEOUT, // reason code - l_addr, - reg - ); - break; - } - - } while(1); - - do { - - if (l_err) - { - break; + task_yield(); } //Clear the PSU Scom Reg Interrupt Status register - uint64_t l_barValue = 0; - uint64_t size = sizeof(l_barValue); + // but ignore the bit that the PSU driver uses + // to avoid a race condition + uint64_t l_andVal = PSI_BRIDGE_PSU_HOST_DOORBELL; + uint64_t size = sizeof(l_andVal); l_err = deviceWrite(procTarget, - &l_barValue, - size, - DEVICE_SCOM_ADDRESS(l_addr)); + &l_andVal, + size, + DEVICE_SCOM_ADDRESS(PSI_BRIDGE_PSU_DOORBELL_ANDREG)); if (l_err) { - TRACFCOMP(g_trac_intr, "Error clearing scom - %x", l_addr); + TRACFCOMP(g_trac_intr, "Error clearing scom - %x", + PSI_BRIDGE_PSU_DOORBELL_ANDREG); break; } @@ -3714,3 +3674,4 @@ errlHndl_t INTR::IntrRp::enableSlaveProcInterrupts(TARGETING::Target * i_target) TRACFCOMP(g_trac_intr, INFO_MRK"Slave Proc Interrupt Routing setup complete\n"); return l_err; } + diff --git a/src/usr/intr/intrrp.H b/src/usr/intr/intrrp.H index db6669989..ecebf7e67 100644 --- a/src/usr/intr/intrrp.H +++ b/src/usr/intr/intrrp.H @@ -185,6 +185,7 @@ namespace INTR PSI_BRIDGE_ESB_OFF_OFFSET = 0xD00, PSI_BRIDGE_ESB_RESET_OFFSET = 0XC00, PSI_BRIDGE_PSU_DOORBELL_REG = 0x000D0063, + PSI_BRIDGE_PSU_DOORBELL_ANDREG = 0x000D0064, PSI_BRIDGE_PSU_HOST_DOORBELL = 0x8000000000000000, //XIVE Interrupt Controller Constants |