diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2018-04-16 23:03:31 +0530 |
---|---|---|
committer | Stewart Smith <stewart@linux.ibm.com> | 2018-04-17 03:52:10 -0500 |
commit | 674f7696f7c1e51ab159d81a05a18c445f0c896d (patch) | |
tree | e55261a42ca2c5ebd2955ac8f0aa79f7dd2afee4 /hw | |
parent | 099801d775ee273a9b500d921f4d47f96499c766 (diff) | |
download | blackbird-skiboot-674f7696f7c1e51ab159d81a05a18c445f0c896d.tar.gz blackbird-skiboot-674f7696f7c1e51ab159d81a05a18c445f0c896d.zip |
opal/hmi: Rework HMI handling of TFAC errors
This patch reworks the HMI handling for TFAC errors by introducing
4 rendez-vous points improve the thread synchronization while handling
timebase errors that requires all thread to clear dirty data from TB/HDEC
register before clearing the errors.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/chiptod.c | 118 |
1 files changed, 43 insertions, 75 deletions
diff --git a/hw/chiptod.c b/hw/chiptod.c index cacc2734..a160e5a1 100644 --- a/hw/chiptod.c +++ b/hw/chiptod.c @@ -1370,17 +1370,10 @@ static bool tfmr_recover_tb_errors(uint64_t tfmr) return true; } -static bool tfmr_recover_non_tb_errors(uint64_t tfmr) +bool tfmr_recover_local_errors(uint64_t tfmr) { uint64_t tfmr_reset_errors = 0; - /* - * write 1 to bit 26 to clear TFMR HDEC parity error. - * HDEC register has already been reset to zero as part pre-recovery. - */ - if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR) - tfmr_reset_errors |= SPR_TFMR_HDEC_PARITY_ERROR; - if (tfmr & SPR_TFMR_DEC_PARITY_ERR) { /* Set DEC with all ones */ mtspr(SPR_DEC, ~0); @@ -1390,11 +1383,11 @@ static bool tfmr_recover_non_tb_errors(uint64_t tfmr) } /* - * Reset PURR/SPURR to recover. We also need help from KVM - * layer to handle this change in PURR/SPURR. That needs - * to be handled in kernel KVM layer. For now, to recover just - * reset it. - */ + * Reset PURR/SPURR to recover. We also need help from KVM + * layer to handle this change in PURR/SPURR. That needs + * to be handled in kernel KVM layer. For now, to recover just + * reset it. + */ if (tfmr & SPR_TFMR_PURR_PARITY_ERR) { /* set PURR register with sane value or reset it. */ mtspr(SPR_PURR, 0); @@ -1432,7 +1425,7 @@ static bool tfmr_recover_non_tb_errors(uint64_t tfmr) * MT(TFMR) bits 11 and 60 are b’1’ * MT(HMER) all bits 1 except for bits 4,5 */ -static bool chiptod_recover_tfmr_error(void) +bool recover_corrupt_tfmr(void) { uint64_t tfmr; @@ -1468,6 +1461,37 @@ static bool chiptod_recover_tfmr_error(void) return true; } +void tfmr_cleanup_core_errors(uint64_t tfmr) +{ + /* If HDEC is bad, clean it on all threads before we clear the + * error condition. + */ + if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR) + mtspr(SPR_HDEC, 0); + + /* If TB is invalid, clean it on all threads as well, it will be + * restored after the next rendez-vous + */ + if (!(tfmr & SPR_TFMR_TB_VALID)) { + mtspr(SPR_TBWU, 0); + mtspr(SPR_TBWU, 0); + } +} + +bool tfmr_clear_core_errors(uint64_t tfmr) +{ + uint64_t tfmr_reset_errors = 0; + + if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR) + tfmr_reset_errors |= SPR_TFMR_HDEC_PARITY_ERROR; + + /* Write TFMR twice to clear the error */ + mtspr(SPR_TFMR, base_tfmr | tfmr_reset_errors); + mtspr(SPR_TFMR, base_tfmr | tfmr_reset_errors); + + return true; +} + /* * Recover from TB and TOD errors. * Timebase register is per core and first thread that gets chance to @@ -1481,46 +1505,17 @@ static bool chiptod_recover_tfmr_error(void) * 1 <= Successfully recovered from errors * -1 <= No errors found. Errors are already been fixed. */ -int chiptod_recover_tb_errors(void) +int chiptod_recover_tb_errors(uint64_t tfmr, bool *out_resynced) { - uint64_t tfmr; int rc = -1; - int thread_id; + + *out_resynced = false; if (chiptod_primary < 0) return 0; lock(&chiptod_lock); - /* Get fresh copy of TFMR */ - tfmr = mfspr(SPR_TFMR); - - /* - * Check for TFMR parity error and recover from it. - * We can not trust any other bits in TFMR If it is corrupt. Fix this - * before we do anything. - */ - if (tfmr & SPR_TFMR_TFMR_CORRUPT) { - if (!chiptod_recover_tfmr_error()) { - rc = 0; - goto error_out; - } - } - - /* Get fresh copy of TFMR */ - tfmr = mfspr(SPR_TFMR); - - /* - * Workaround for HW logic bug in Power9 - * Even after clearing TB residue error by one thread it does not - * get reflected to other threads on same core. - * Check if TB is already valid and skip the checking of TB errors. - */ - - if ((proc_gen == proc_gen_p9) && (tfmr & SPR_TFMR_TB_RESIDUE_ERR) - && (tfmr & SPR_TFMR_TB_VALID)) - goto skip_tb_error_clear; - /* * Check for TB errors. * On Sync check error, bit 44 of TFMR is set. Check for it and @@ -1544,7 +1539,6 @@ int chiptod_recover_tb_errors(void) } } -skip_tb_error_clear: /* * Check for TOD sync check error. * On TOD errors, bit 51 of TFMR is set. If this bit is on then we @@ -1574,35 +1568,9 @@ skip_tb_error_clear: if (!chiptod_to_tb()) goto error_out; - /* We have successfully able to get TB running. */ - rc = 1; - } + *out_resynced = true; - /* - * Workaround for HW logic bug in power9. - * In idea case (without the HW bug) only one thread from the core - * would have fallen through tfmr_recover_non_tb_errors() to clear - * HDEC parity error on TFMR. - * - * Hence to achieve same behavior, allow only thread 0 to clear the - * HDEC parity error. And for rest of the threads just reset the bit - * to avoid other threads to fall through tfmr_recover_non_tb_errors(). - */ - thread_id = cpu_get_thread_index(this_cpu()); - if ((proc_gen == proc_gen_p9) && thread_id) - tfmr &= ~SPR_TFMR_HDEC_PARITY_ERROR; - - /* - * Now that TB is running, check for TFMR non-TB errors. - */ - if ((tfmr & SPR_TFMR_HDEC_PARITY_ERROR) || - (tfmr & SPR_TFMR_PURR_PARITY_ERR) || - (tfmr & SPR_TFMR_SPURR_PARITY_ERR) || - (tfmr & SPR_TFMR_DEC_PARITY_ERR)) { - if (!tfmr_recover_non_tb_errors(tfmr)) { - rc = 0; - goto error_out; - } + /* We have successfully able to get TB running. */ rc = 1; } |