From 2d6ab3cf820134bd3086139adea76f51bbdceb64 Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Tue, 22 Oct 2013 15:10:18 -0500 Subject: Handle winkle-wakeup times in slave cores. Change-Id: I6978d66ecfdef57da9754e6251d2ac1d3d078210 RTC: 73559 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/6851 Tested-by: Jenkins Server Reviewed-by: A. Patrick Williams III --- src/include/errno.h | 5 +- src/include/kernel/cpumgr.H | 2 +- src/include/usr/intr/interrupt.H | 8 +- src/kernel/cpumgr.C | 7 +- src/kernel/intmsghandler.C | 7 +- src/kernel/syscall.C | 9 ++- src/usr/hwpf/hwp/core_activate/core_activate.C | 33 +++++--- src/usr/intr/intrrp.C | 102 +++++++++++++++++++++++-- src/usr/intr/intrrp.H | 15 ++++ 9 files changed, 157 insertions(+), 31 deletions(-) diff --git a/src/include/errno.h b/src/include/errno.h index 5e7833985..e50e2f390 100644 --- a/src/include/errno.h +++ b/src/include/errno.h @@ -5,7 +5,7 @@ /* */ /* IBM CONFIDENTIAL */ /* */ -/* COPYRIGHT International Business Machines Corp. 2011,2012 */ +/* COPYRIGHT International Business Machines Corp. 2011,2013 */ /* */ /* p1 */ /* */ @@ -33,7 +33,8 @@ #define EINVAL 22 // Invalid argument #define ENFILE 23 // Too many open files in system #define EDEADLK 35 // Operation would cause deadlock. -#define EALREADY 114 // Operation already in progress +#define ETIME 62 // Time expired. +#define EALREADY 114 // Operation already in progress #define EWOULDBLOCK EAGAIN // operation would block diff --git a/src/include/kernel/cpumgr.H b/src/include/kernel/cpumgr.H index 23207bf11..51c05f438 100644 --- a/src/include/kernel/cpumgr.H +++ b/src/include/kernel/cpumgr.H @@ -137,7 +137,7 @@ class CpuManager * @param[in] pir - PIR value of first thread in core. * @param[in] i_threads - Bitstring of threads to enable (left-justified). */ - static int startCore(uint64_t pir,uint64_t i_threads); + static void startCore(uint64_t pir,uint64_t i_threads); /** @fn forceMemoryPeriodic() diff --git a/src/include/usr/intr/interrupt.H b/src/include/usr/intr/interrupt.H index 473fa7cd3..f8dd202ac 100644 --- a/src/include/usr/intr/interrupt.H +++ b/src/include/usr/intr/interrupt.H @@ -53,6 +53,11 @@ namespace INTR }; PIR_t(uint32_t i_word = 0) : word(i_word) {} + PIR_t(uint32_t i_nodeId, uint32_t i_chipId, + uint32_t i_coreId, uint32_t i_thread = 0) : + nodeId(i_nodeId), chipId(i_chipId), + coreId(i_coreId), threadId(i_thread) {} + PIR_t operator= (uint32_t i_word) { word = i_word; @@ -71,7 +76,7 @@ namespace INTR * @note The XISR is 24 bits: * XISR[ 0: 4] NOT DEFINED * XISR[ 5: 7] Node id - * XISR[ 8:10] chipId within Node + * XISR[ 8:10] chipId within Node * XISR[11:12] Unit selection [GX='00', PHB0='01', PHB1='10', PHB2='11'] * XISR[13:19] BUID * XISR[20:23] level @@ -139,6 +144,7 @@ namespace INTR MSG_INTR_SHUTDOWN, //!< Call to shutdown interrupt presenter MSG_INTR_ENABLE_PSI_INTR, //!< Enable PSI interrupts MSG_INTR_MPIPL_CLEANUP, //!< Clean up interrupts on MPIPL + MSG_INTR_ADD_CPU_TIMEOUT, //!< Check for a timeout waiting for a core. }; /** diff --git a/src/kernel/cpumgr.C b/src/kernel/cpumgr.C index 759bcf18c..624751b5c 100644 --- a/src/kernel/cpumgr.C +++ b/src/kernel/cpumgr.C @@ -381,7 +381,7 @@ void CpuManager::executePeriodics(cpu_t * i_cpu) } -int CpuManager::startCore(uint64_t pir,uint64_t i_threads) +void CpuManager::startCore(uint64_t pir,uint64_t i_threads) { size_t threads = getThreadCount(); pir = pir & ~(threads-1); @@ -389,7 +389,8 @@ int CpuManager::startCore(uint64_t pir,uint64_t i_threads) if (pir >= (KERNEL_MAX_SUPPORTED_NODES * KERNEL_MAX_SUPPORTED_CPUS_PER_NODE)) { - return -ENXIO; + TASK_SETRTN(TaskManager::getCurrentTask(), -ENXIO); + return; } for(size_t i = 0; i < threads; i++) @@ -404,7 +405,7 @@ int CpuManager::startCore(uint64_t pir,uint64_t i_threads) InterruptMsgHdlr::addCpuCore(pir); - return 0; + return; }; size_t CpuManager::getThreadCount() diff --git a/src/kernel/intmsghandler.C b/src/kernel/intmsghandler.C index 03b8996d3..e8fd5e66c 100644 --- a/src/kernel/intmsghandler.C +++ b/src/kernel/intmsghandler.C @@ -117,7 +117,7 @@ void InterruptMsgHdlr::handleInterrupt() // from the ICP BAR SCOM register, however, since this value will // never change unless PHYP changes its memory map, it is deemed // sufficient to hard code the value. If this is not an MPIPL then - // there is a serious problem elsewhere. + // there is a serious problem elsewhere. cv_ipc_base_address = (uint64_t)(INTP_BAR_VALUE) << 32; // val in BAR cv_ipc_base_address >>= 14; // convert to base address @@ -196,6 +196,11 @@ MessageHandler::HandleResult InterruptMsgHdlr::handleResponse int i_rc ) { + if (MSG_INTR_ADD_CPU == i_type) + { + TASK_SETRTN(i_task, i_rc); + return SUCCESS; + } return UNHANDLED_RC; } diff --git a/src/kernel/syscall.C b/src/kernel/syscall.C index be2b51d59..38178499c 100644 --- a/src/kernel/syscall.C +++ b/src/kernel/syscall.C @@ -409,7 +409,7 @@ namespace Systemcalls mq->messages.insert(mp); if (!m->__reserved__pseudosync) { - // Choose next thread to execute, this one is delayed. + // Choose next task to execute, this one is delayed. t->cpu->scheduler->setNextRunnable(); } // For pseudo-sync, just keep running the current task. } @@ -670,9 +670,10 @@ namespace Systemcalls /** Prep core for activation. */ void CpuStartCore(task_t *t) { - TASK_SETRTN(t, - CpuManager::startCore(static_cast(TASK_GETARG0(t)), - static_cast(TASK_GETARG1(t)))); + // This will cause another task to be scheduled in while the + // core is started. + CpuManager::startCore(static_cast(TASK_GETARG0(t)), + static_cast(TASK_GETARG1(t))); }; /** Read SPR values. */ diff --git a/src/usr/hwpf/hwp/core_activate/core_activate.C b/src/usr/hwpf/hwp/core_activate/core_activate.C index d3433f3f4..36ab41cf5 100644 --- a/src/usr/hwpf/hwp/core_activate/core_activate.C +++ b/src/usr/hwpf/hwp/core_activate/core_activate.C @@ -45,6 +45,8 @@ #include #include +#include + // targeting support #include #include @@ -103,7 +105,7 @@ void* call_host_activate_master( void *io_pArgs ) const TARGETING::Target* l_masterCore = getMasterCore( ); assert( l_masterCore != NULL ); - + TARGETING::Target* l_cpu_target = const_cast ( getParentChip( l_masterCore ) ); @@ -114,7 +116,7 @@ void* call_host_activate_master( void *io_pArgs ) // Pass in Master EX target const TARGETING::Target* l_masterEx = getExChiplet(l_masterCore); assert(l_masterEx != NULL ); - + TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, "call_host_activate_master: call proc_prep_master_winkle. " "Target HUID %.8X", @@ -280,9 +282,7 @@ void* call_host_activate_slave_cores( void *io_pArgs ) assert( sys != NULL ); uint64_t en_threads = sys->getAttr(); - uint64_t pir = l_coreId << 3; - pir |= l_chipId << 7; - pir |= l_logicalNodeId << 10; + uint64_t pir = INTR::PIR_t(l_logicalNodeId, l_chipId, l_coreId).word; if (pir != l_masterCoreID) { @@ -292,11 +292,7 @@ void* call_host_activate_slave_cores( void *io_pArgs ) int rc = cpu_start_core(pir,en_threads); - // We purposefully only create one error log here. The only - // failure from the kernel is a bad PIR, which means we have - // a pervasive attribute problem of some sort. Just log the - // first failing PIR. - if ((0 != rc) && (NULL == l_errl)) + if (0 != rc) { TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, "call_host_activate_slave_cores: " @@ -313,12 +309,27 @@ void* call_host_activate_slave_cores( void *io_pArgs ) * * @devdesc Kernel returned error when trying to activate core. */ - l_errl = + errlHndl_t l_tmperrl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_UNRECOVERABLE, ISTEP_HOST_ACTIVATE_SLAVE_CORES, ISTEP_BAD_RC, pir, rc ); + + // Callout core that failed to wake up. + l_tmperrl->addHwCallout(*l_core, + HWAS::SRCI_PRIORITY_MED, + HWAS::DECONFIG, + HWAS::GARD_Predictive); + + if (NULL == l_errl) + { + l_errl = l_tmperrl; + } + else + { + errlCommit( l_tmperrl, HWPF_COMP_ID ); + } } } } diff --git a/src/usr/intr/intrrp.C b/src/usr/intr/intrrp.C index 37ca19995..675d08fae 100644 --- a/src/usr/intr/intrrp.C +++ b/src/usr/intr/intrrp.C @@ -204,7 +204,7 @@ errlHndl_t IntrRp::_init() assert(sys != NULL); uint64_t hrmor_base = sys->getAttr(); - + KernelIpc::ipc_data_area.pir = iv_masterCpu.word; KernelIpc::ipc_data_area.hrmor_base = hrmor_base; KernelIpc::ipc_data_area.msg_queue_id = 0; @@ -242,7 +242,7 @@ errlHndl_t IntrRp::_init() // found. cleanCheck(); } - + // Set up the interrupt provider registers // NOTE: It's only possible to set up the master core at this point. @@ -263,7 +263,7 @@ errlHndl_t IntrRp::_init() "IntrRp::_init: Skipping thread %d : en_threads=%X", thread,en_threads); continue; - } + } pir.threadId = thread; initInterruptPresenter(pir); } @@ -544,11 +544,63 @@ void IntrRp::msgHandler() { TRACDCOMP(g_trac_intr,"MSG_INTR_ADD_CPU: Skipping thread %d",thread); continue; - } + } pir.threadId = thread; initInterruptPresenter(pir); sendIPI(pir); } + + pir.threadId = 0; + task_create(handleCpuTimeout, + reinterpret_cast(pir.word)); + } + break; + + case MSG_INTR_ADD_CPU_TIMEOUT: + { + PIR_t pir = msg->data[0]; + size_t count = msg->data[1]; + + if(iv_ipisPending.count(pir)) + { + if (count < CPU_WAKEUP_INTERVAL_COUNT) + { + TRACDCOMP(g_trac_intr, + INFO_MRK "Cpu wakeup pending on %x", + pir.word); + + // Tell child thread to retry. + msg->data[1] = EAGAIN; + } + else // Timed out. + { + TRACFCOMP(g_trac_intr, + ERR_MRK "Cpu wakeup timeout on %x", + pir.word); + + // Tell child thread to exit. + msg->data[1] = 0; + + // Get saved thread info. + IPI_Info_t& ipiInfo = iv_ipisPending[pir]; + msg_t* ipiMsg = ipiInfo.second; + iv_ipisPending.erase(pir); + + // Respond to waiting thread with ETIME. + ipiMsg->data[1] = -ETIME; + msg_respond(iv_msgQ, ipiMsg); + } + } + else // Ended successfully. + { + TRACDCOMP(g_trac_intr, + INFO_MRK "Cpu wakeup completed on %x", + pir.word); + // Tell child thread to exit. + msg->data[1] = 0; + } + + msg_respond(iv_msgQ, msg); } break; @@ -1298,7 +1350,7 @@ void IntrRp::shutDown() { TRACDCOMP(g_trac_intr,"IntrRp::shutDown: Skipping thread %d",thread); continue; - } + } pir.threadId = thread; disableInterruptPresenter(pir); } @@ -1966,7 +2018,7 @@ errlHndl_t IntrRp::hw_disableIntrMpIpl() // Set interrupt presenter to allow all interrupts TRACFCOMP(g_trac_intr,"Allow interrupts"); - for(TARGETING::TargetHandleList::iterator + for(TARGETING::TargetHandleList::iterator core = procCores.begin(); core != procCores.end(); ++core) @@ -2040,7 +2092,7 @@ void IntrRp::cleanCheck() TARGETING::TargetHandleList procCores; getAllChiplets(procCores, TYPE_CORE); - for(TARGETING::TargetHandleList::iterator + for(TARGETING::TargetHandleList::iterator core = procCores.begin(); core != procCores.end(); ++core) @@ -2049,7 +2101,7 @@ void IntrRp::cleanCheck() FABRIC_CHIP_ID_ATTR chip = proc->getAttr(); FABRIC_NODE_ID_ATTR node = proc->getAttr(); - CHIP_UNIT_ATTR coreId = + CHIP_UNIT_ATTR coreId = (*core)->getAttr(); PIR_t pir(0); @@ -2309,3 +2361,37 @@ uint64_t INTR::getIntpAddr(const TARGETING::Target * i_ex, uint8_t i_thread) pir.word & (InterruptMsgHdlr::P8_PIR_THREADID_MSK | InterruptMsgHdlr::P8_PIR_COREID_MSK))); } + +void* INTR::IntrRp::handleCpuTimeout(void* _pir) +{ + uint64_t pir = reinterpret_cast(_pir); + task_detach(); + + int count = 0; + int rc = 0; + + // Allocate a message to send to the RP thread. + msg_t* msg = msg_allocate(); + msg->type = MSG_INTR_ADD_CPU_TIMEOUT; + msg->data[0] = pir; + msg_q_t intr_msgQ = msg_q_resolve(VFS_ROOT_MSG_INTR); + + do + { + // Sleep for the right amount. + nanosleep(0, CPU_WAKEUP_INTERVAL_NS); + + // Check the status with the RP thread. + msg->data[1] = count; + msg_sendrecv(intr_msgQ, msg); + + // Get the status from the response message. + rc = msg->data[1]; + count++; + + } while(rc == EAGAIN); + + msg_free(msg); + + return NULL; +} diff --git a/src/usr/intr/intrrp.H b/src/usr/intr/intrrp.H index 488f27706..e2495fb27 100644 --- a/src/usr/intr/intrrp.H +++ b/src/usr/intr/intrrp.H @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -293,6 +294,14 @@ namespace INTR }; + enum + { + CPU_WAKEUP_SECONDS = 1, + CPU_WAKEUP_INTERVAL_COUNT = 10, + CPU_WAKEUP_INTERVAL_NS = (NS_PER_SEC * CPU_WAKEUP_SECONDS) / + CPU_WAKEUP_INTERVAL_COUNT, + }; + typedef std::map Registry_t; typedef std::vector CpuList_t; typedef std::vector ChipList_t; @@ -545,6 +554,12 @@ namespace INTR */ static errlHndl_t checkAddress(uint64_t i_addr); + /** + * Background thread to handle if a core doesn't wake up. + * @param[in] _pir - The PIR value (as void*) to check for. + */ + static void* handleCpuTimeout(void* _pir); + }; }; // INTR namespace -- cgit v1.2.1