summaryrefslogtreecommitdiffstats
path: root/hw/fsp/fsp.c
diff options
context:
space:
mode:
authorAnanth N Mavinakayanahalli <ananth@in.ibm.com>2014-07-24 11:46:58 +0530
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-07-25 14:10:51 +1000
commitcad0d16e236bcbf457406023d5df4da8d55b778f (patch)
treeb63b5f3e36fba1a0bc4be4f8daf308b7b113b2c8 /hw/fsp/fsp.c
parent99608f2074d4c8877c8445e20a1275dc1257079c (diff)
downloadblackbird-skiboot-cad0d16e236bcbf457406023d5df4da8d55b778f.tar.gz
blackbird-skiboot-cad0d16e236bcbf457406023d5df4da8d55b778f.zip
FSP: Rework the R/R state machine
a. Do not trigger PSI link down on DISR's RR bit set. b. Do trigger HIR if DISR's Unit Check bit is set. c. On fsp_mbx_err, trigger a HIR (very rare occurance). d. Use fsp_start_rr() when the DISR's RR bit is seen so all mbox activity is stopped when the FSP indicates an RR start. We do not bring the link down voluntarily on DISR's RR begin, pending a PSI interrupt, which actually triggers the link down. Per Dean Sanner, this is the right protocol to follow. The assumption is that a DISR RR bit set would cause a PSI interrupt 'soon'. One TODO is to figure out if this interrupt never arrives. The PSI interrupt does come through albeit after a short while: SURV: [ 1dc662ef7f] Sending the hearbeat command to FSP SURV: Received heartbeat acknowledge from FSP FSP #0: DISR stat change = 0x000000a1 FSP #0: FSP in Reset. Waiting for PSI interrupt FSPCON: Closed consoles on account of FSP reset/reload SURV: Disabling surveillance FSP: Closing NVRAM on account of FSP Reset FSP #0: HDES stat change = 0xffffffff PSI[0x000]: PSI mgmnt interrupt CR=0xfcf0d100c0000000 PSI: PSI Reported Error PSI: PSI Link Inactive Transition PSI: SEMR set to fff0fff00000 PSI[0x000]: Disabling link! PSI: PSIHB_CR (error bits) set to 68f0510040000000 PSI: starting link polling PSI: Spurious interrupt, attempting clear PSI[0x001]: Poll CR=0x00f0100000000000 PSI[0x000]: Poll CR=0x68f0100040000000 PSI[0x001]: Poll CR=0x00f0100000000000 PSI[0x000]: Poll CR=0x68f0100040000000 ... And we recover: PSI[0x000]: Poll CR=0x68f0100040000000 PSI[0x001]: Poll CR=0xccf0300000000000 PSI[0x001]: Found active link! PSI: stopping link polling FSP: Connected to FSP-A FSP #0: DISR stat change = 0x000000a9 FSP #0: DISR stat change = 0x00000281 FSP #0: Detected R&R complete, acking FSP #0: HDES stat change = 0x00000000 FSP #0: DISR stat change = 0x00000081 FSP: FSP assuming new role FSP: SP says Reset/Reload complete Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'hw/fsp/fsp.c')
-rw-r--r--hw/fsp/fsp.c55
1 files changed, 42 insertions, 13 deletions
diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
index f5dcbf34..ffa408b9 100644
--- a/hw/fsp/fsp.c
+++ b/hw/fsp/fsp.c
@@ -372,9 +372,9 @@ static bool fsp_in_hir(struct fsp *fsp)
static bool fsp_in_reset(struct fsp *fsp)
{
switch (fsp->state) {
- case fsp_mbx_hir_seq_done: /* Will be reset soon */
+ case fsp_mbx_hir_seq_done: /* Link pulled down */
case fsp_mbx_err: /* Will be reset soon */
- case fsp_mbx_rr: /* Already in reset */
+ case fsp_mbx_rr: /* Mbx activity stopped pending reset */
return true;
default:
return false;
@@ -506,6 +506,10 @@ void fsp_trigger_reset(void)
unlock(&fsp_lock);
}
+/*
+ * Called when we trigger a HIR or when the FSP tells us via the DISR's
+ * RR bit that one is impending. We should therefore stop all mbox activity.
+ */
static void fsp_start_rr(struct fsp *fsp)
{
struct fsp_iopath *iop;
@@ -533,6 +537,12 @@ static void fsp_start_rr(struct fsp *fsp)
unlock(&fsp_lock);
fsp_notify_rr_state(FSP_RESET_START);
lock(&fsp_lock);
+
+ /*
+ * Unlike earlier, we don't trigger the PSI link polling
+ * from this point. We wait for the PSI interrupt to tell
+ * us the FSP is really down and then start the polling there.
+ */
}
static void fsp_trace_event(struct fsp *fsp, u32 evt,
@@ -604,12 +614,25 @@ static void fsp_handle_errors(struct fsp *fsp)
disr_last_print = disr;
}
+ /* On a deferred mbox error, trigger a HIR
+ * Note: We may never get here since the link inactive case is handled
+ * above and the other case is when the iop->psi is NULL, which is
+ * quite rare.
+ */
+ if (fsp->state == fsp_mbx_err) {
+ prerror("FSP #%d: Triggering HIR on mbx_err\n",
+ fsp->index);
+ fsp_trigger_reset();
+ return;
+ }
+
/*
- * We detect FSP_IN_RR in DSISR or we have a deferred mbox
- * error, we trigger an R&R after a bit of housekeeping to
- * limit the chance of a stray interrupt
+ * If we get here as part of normal flow, the FSP is telling
+ * us that there will be an impending R&R, so we stop all mbox
+ * activity. The actual link down trigger is via a PSI
+ * interrupt that may arrive in due course.
*/
- if ((disr & FSP_DISR_FSP_IN_RR) || (fsp->state == fsp_mbx_err)) {
+ if (disr & FSP_DISR_FSP_IN_RR) {
/*
* If we get here with DEBUG_IN_PROGRESS also set, the
* FSP is in debug and we should *not* reset it now
@@ -624,9 +647,19 @@ static void fsp_handle_errors(struct fsp *fsp)
if (fsp->state == fsp_mbx_rr)
return;
+ printf("FSP #%d: FSP in Reset. Waiting for PSI interrupt\n",
+ fsp->index);
+ fsp_start_rr(fsp);
+ }
+
+ /*
+ * However, if the Unit Check is also set, the FSP is asking us
+ * to trigger a HIR so it can try to recover via the DRCR route.
+ */
+ if (disr & FSP_DISR_FSP_UNIT_CHECK) {
fsp_trace_event(fsp, TRACE_FSP_EVT_SOFT_RR, disr, 0, 0, 0);
- printf("FSP #%d: FSP in reset or delayed error, starting R&R\n",
+ printf("FSP #%d: DISR's unit check set, starting HIR\n",
fsp->index);
/* Clear all interrupt conditions */
@@ -635,11 +668,7 @@ static void fsp_handle_errors(struct fsp *fsp)
/* Make sure this happened */
fsp_rreg(fsp, FSP_HDIR_REG);
- /* Bring the PSI link down */
- psi_disable_link(psi);
-
- /* Start R&R process */
- fsp_start_rr(fsp);
+ fsp_trigger_reset();
return;
}
@@ -1396,7 +1425,7 @@ static void __fsp_poll(bool interrupt)
again:
if (fsp->active_iopath < 0) {
/* That should never happen */
- if (interrupt)
+ if (interrupt && (fsp->state != fsp_mbx_rr))
prerror("FSP: Interrupt with no working IO path\n");
return;
}
OpenPOWER on IntegriCloud