summaryrefslogtreecommitdiffstats
path: root/hw
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2018-04-16 23:03:31 +0530
committerStewart Smith <stewart@linux.ibm.com>2018-04-17 03:52:10 -0500
commit674f7696f7c1e51ab159d81a05a18c445f0c896d (patch)
treee55261a42ca2c5ebd2955ac8f0aa79f7dd2afee4 /hw
parent099801d775ee273a9b500d921f4d47f96499c766 (diff)
downloadblackbird-skiboot-674f7696f7c1e51ab159d81a05a18c445f0c896d.tar.gz
blackbird-skiboot-674f7696f7c1e51ab159d81a05a18c445f0c896d.zip
opal/hmi: Rework HMI handling of TFAC errors
This patch reworks the HMI handling for TFAC errors by introducing 4 rendez-vous points improve the thread synchronization while handling timebase errors that requires all thread to clear dirty data from TB/HDEC register before clearing the errors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/chiptod.c118
1 files changed, 43 insertions, 75 deletions
diff --git a/hw/chiptod.c b/hw/chiptod.c
index cacc2734..a160e5a1 100644
--- a/hw/chiptod.c
+++ b/hw/chiptod.c
@@ -1370,17 +1370,10 @@ static bool tfmr_recover_tb_errors(uint64_t tfmr)
return true;
}
-static bool tfmr_recover_non_tb_errors(uint64_t tfmr)
+bool tfmr_recover_local_errors(uint64_t tfmr)
{
uint64_t tfmr_reset_errors = 0;
- /*
- * write 1 to bit 26 to clear TFMR HDEC parity error.
- * HDEC register has already been reset to zero as part pre-recovery.
- */
- if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR)
- tfmr_reset_errors |= SPR_TFMR_HDEC_PARITY_ERROR;
-
if (tfmr & SPR_TFMR_DEC_PARITY_ERR) {
/* Set DEC with all ones */
mtspr(SPR_DEC, ~0);
@@ -1390,11 +1383,11 @@ static bool tfmr_recover_non_tb_errors(uint64_t tfmr)
}
/*
- * Reset PURR/SPURR to recover. We also need help from KVM
- * layer to handle this change in PURR/SPURR. That needs
- * to be handled in kernel KVM layer. For now, to recover just
- * reset it.
- */
+ * Reset PURR/SPURR to recover. We also need help from KVM
+ * layer to handle this change in PURR/SPURR. That needs
+ * to be handled in kernel KVM layer. For now, to recover just
+ * reset it.
+ */
if (tfmr & SPR_TFMR_PURR_PARITY_ERR) {
/* set PURR register with sane value or reset it. */
mtspr(SPR_PURR, 0);
@@ -1432,7 +1425,7 @@ static bool tfmr_recover_non_tb_errors(uint64_t tfmr)
* MT(TFMR) bits 11 and 60 are b’1’
* MT(HMER) all bits 1 except for bits 4,5
*/
-static bool chiptod_recover_tfmr_error(void)
+bool recover_corrupt_tfmr(void)
{
uint64_t tfmr;
@@ -1468,6 +1461,37 @@ static bool chiptod_recover_tfmr_error(void)
return true;
}
+void tfmr_cleanup_core_errors(uint64_t tfmr)
+{
+ /* If HDEC is bad, clean it on all threads before we clear the
+ * error condition.
+ */
+ if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR)
+ mtspr(SPR_HDEC, 0);
+
+ /* If TB is invalid, clean it on all threads as well, it will be
+ * restored after the next rendez-vous
+ */
+ if (!(tfmr & SPR_TFMR_TB_VALID)) {
+ mtspr(SPR_TBWU, 0);
+ mtspr(SPR_TBWU, 0);
+ }
+}
+
+bool tfmr_clear_core_errors(uint64_t tfmr)
+{
+ uint64_t tfmr_reset_errors = 0;
+
+ if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR)
+ tfmr_reset_errors |= SPR_TFMR_HDEC_PARITY_ERROR;
+
+ /* Write TFMR twice to clear the error */
+ mtspr(SPR_TFMR, base_tfmr | tfmr_reset_errors);
+ mtspr(SPR_TFMR, base_tfmr | tfmr_reset_errors);
+
+ return true;
+}
+
/*
* Recover from TB and TOD errors.
* Timebase register is per core and first thread that gets chance to
@@ -1481,46 +1505,17 @@ static bool chiptod_recover_tfmr_error(void)
* 1 <= Successfully recovered from errors
* -1 <= No errors found. Errors are already been fixed.
*/
-int chiptod_recover_tb_errors(void)
+int chiptod_recover_tb_errors(uint64_t tfmr, bool *out_resynced)
{
- uint64_t tfmr;
int rc = -1;
- int thread_id;
+
+ *out_resynced = false;
if (chiptod_primary < 0)
return 0;
lock(&chiptod_lock);
- /* Get fresh copy of TFMR */
- tfmr = mfspr(SPR_TFMR);
-
- /*
- * Check for TFMR parity error and recover from it.
- * We can not trust any other bits in TFMR If it is corrupt. Fix this
- * before we do anything.
- */
- if (tfmr & SPR_TFMR_TFMR_CORRUPT) {
- if (!chiptod_recover_tfmr_error()) {
- rc = 0;
- goto error_out;
- }
- }
-
- /* Get fresh copy of TFMR */
- tfmr = mfspr(SPR_TFMR);
-
- /*
- * Workaround for HW logic bug in Power9
- * Even after clearing TB residue error by one thread it does not
- * get reflected to other threads on same core.
- * Check if TB is already valid and skip the checking of TB errors.
- */
-
- if ((proc_gen == proc_gen_p9) && (tfmr & SPR_TFMR_TB_RESIDUE_ERR)
- && (tfmr & SPR_TFMR_TB_VALID))
- goto skip_tb_error_clear;
-
/*
* Check for TB errors.
* On Sync check error, bit 44 of TFMR is set. Check for it and
@@ -1544,7 +1539,6 @@ int chiptod_recover_tb_errors(void)
}
}
-skip_tb_error_clear:
/*
* Check for TOD sync check error.
* On TOD errors, bit 51 of TFMR is set. If this bit is on then we
@@ -1574,35 +1568,9 @@ skip_tb_error_clear:
if (!chiptod_to_tb())
goto error_out;
- /* We have successfully able to get TB running. */
- rc = 1;
- }
+ *out_resynced = true;
- /*
- * Workaround for HW logic bug in power9.
- * In idea case (without the HW bug) only one thread from the core
- * would have fallen through tfmr_recover_non_tb_errors() to clear
- * HDEC parity error on TFMR.
- *
- * Hence to achieve same behavior, allow only thread 0 to clear the
- * HDEC parity error. And for rest of the threads just reset the bit
- * to avoid other threads to fall through tfmr_recover_non_tb_errors().
- */
- thread_id = cpu_get_thread_index(this_cpu());
- if ((proc_gen == proc_gen_p9) && thread_id)
- tfmr &= ~SPR_TFMR_HDEC_PARITY_ERROR;
-
- /*
- * Now that TB is running, check for TFMR non-TB errors.
- */
- if ((tfmr & SPR_TFMR_HDEC_PARITY_ERROR) ||
- (tfmr & SPR_TFMR_PURR_PARITY_ERR) ||
- (tfmr & SPR_TFMR_SPURR_PARITY_ERR) ||
- (tfmr & SPR_TFMR_DEC_PARITY_ERR)) {
- if (!tfmr_recover_non_tb_errors(tfmr)) {
- rc = 0;
- goto error_out;
- }
+ /* We have successfully able to get TB running. */
rc = 1;
}
OpenPOWER on IntegriCloud