/* Copyright 2013-2014 IBM Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* Handle ChipTOD chip & configure core and CAPP timebases */ #define pr_fmt(fmt) "CHIPTOD: " fmt #include #include #include #include #include #include #include #include #include /* TOD chip XSCOM addresses */ #define TOD_MASTER_PATH_CTRL 0x00040000 /* Master Path ctrl reg */ #define TOD_PRI_PORT0_CTRL 0x00040001 /* Primary port0 ctrl reg */ #define TOD_PRI_PORT1_CTRL 0x00040002 /* Primary port1 ctrl reg */ #define TOD_SEC_PORT0_CTRL 0x00040003 /* Secondary p0 ctrl reg */ #define TOD_SEC_PORT1_CTRL 0x00040004 /* Secondary p1 ctrl reg */ #define TOD_SLAVE_PATH_CTRL 0x00040005 /* Slave Path ctrl reg */ #define TOD_INTERNAL_PATH_CTRL 0x00040006 /* Internal Path ctrl reg */ /* -- TOD primary/secondary master/slave control register -- */ #define TOD_PSMS_CTRL 0x00040007 #define TOD_PSMSC_PM_TOD_SELECT PPC_BIT(1) /* Primary Master TOD */ #define TOD_PSMSC_PM_DRAW_SELECT PPC_BIT(2) /* Primary Master Drawer */ #define TOD_PSMSC_SM_TOD_SELECT PPC_BIT(9) /* Secondary Master TOD */ #define TOD_PSMSC_SM_DRAW_SELECT PPC_BIT(10) /* Secondary Master Draw */ /* -- TOD primary/secondary master/slave status register -- */ #define TOD_STATUS 0x00040008 #define TOD_ST_TOPOLOGY_SELECT PPC_BITMASK(0, 2) #define TOD_ST_MPATH0_STEP_VALID PPC_BIT(6) /* MasterPath0 step valid */ #define TOD_ST_MPATH1_STEP_VALID PPC_BIT(7) /* MasterPath1 step valid */ #define TOD_ST_SPATH0_STEP_VALID PPC_BIT(8) /* SlavePath0 step valid */ #define TOD_ST_SPATH1_STEP_VALID PPC_BIT(10) /* SlavePath1 step valid */ /* Primary master/slave path select (0 = PATH_0, 1 = PATH_1) */ #define TOD_ST_PRI_MPATH_SELECT PPC_BIT(12) /* Primary MPath Select */ #define TOD_ST_PRI_SPATH_SELECT PPC_BIT(15) /* Primary SPath Select */ /* Secondary master/slave path select (0 = PATH_0, 1 = PATH_1) */ #define TOD_ST_SEC_MPATH_SELECT PPC_BIT(16) /* Secondary MPath Select */ #define TOD_ST_SEC_SPATH_SELECT PPC_BIT(19) /* Secondary SPath Select */ #define TOD_ST_ACTIVE_MASTER PPC_BIT(23) #define TOD_ST_BACKUP_MASTER PPC_BIT(24) /* TOD chip XSCOM addresses */ #define TOD_CHIP_CTRL 0x00040010 /* Chip control register */ #define TOD_TTYPE_0 0x00040011 #define TOD_TTYPE_1 0x00040012 /* PSS switch */ #define TOD_TTYPE_2 0x00040013 /* Enable step checkers */ #define TOD_TTYPE_3 0x00040014 /* Request TOD */ #define TOD_TTYPE_4 0x00040015 /* Send TOD */ #define TOD_TTYPE_5 0x00040016 /* Invalidate TOD */ #define TOD_CHIPTOD_TO_TB 0x00040017 #define TOD_LOAD_TOD_MOD 0x00040018 #define TOD_CHIPTOD_VALUE 0x00040020 #define TOD_CHIPTOD_LOAD_TB 0x00040021 #define TOD_CHIPTOD_FSM 0x00040024 /* -- TOD PIB Master reg -- */ #define TOD_PIB_MASTER 0x00040027 #define TOD_PIBM_ADDR_CFG_MCAST PPC_BIT(25) #define TOD_PIBM_ADDR_CFG_SLADDR PPC_BITMASK(26, 31) #define TOD_PIBM_TTYPE4_SEND_MODE PPC_BIT(32) #define TOD_PIBM_TTYPE4_SEND_ENBL PPC_BIT(33) /* -- TOD Error interrupt register -- */ #define TOD_ERROR 0x00040030 /* SYNC errors */ #define TOD_ERR_CRMO_PARITY PPC_BIT(0) #define TOD_ERR_OSC0_PARITY PPC_BIT(1) #define TOD_ERR_OSC1_PARITY PPC_BIT(2) #define TOD_ERR_PPORT0_CREG_PARITY PPC_BIT(3) #define TOD_ERR_PPORT1_CREG_PARITY PPC_BIT(4) #define TOD_ERR_SPORT0_CREG_PARITY PPC_BIT(5) #define TOD_ERR_SPORT1_CREG_PARITY PPC_BIT(6) #define TOD_ERR_SPATH_CREG_PARITY PPC_BIT(7) #define TOD_ERR_IPATH_CREG_PARITY PPC_BIT(8) #define TOD_ERR_PSMS_CREG_PARITY PPC_BIT(9) #define TOD_ERR_CRITC_PARITY PPC_BIT(13) #define TOD_ERR_MP0_STEP_CHECK PPC_BIT(14) #define TOD_ERR_MP1_STEP_CHECK PPC_BIT(15) #define TOD_ERR_PSS_HAMMING_DISTANCE PPC_BIT(18) #define TOD_ERR_DELAY_COMPL_PARITY PPC_BIT(22) /* CNTR errors */ #define TOD_ERR_CTCR_PARITY PPC_BIT(32) #define TOD_ERR_TOD_SYNC_CHECK PPC_BIT(33) #define TOD_ERR_TOD_FSM_PARITY PPC_BIT(34) #define TOD_ERR_TOD_REGISTER_PARITY PPC_BIT(35) #define TOD_ERR_OVERFLOW_YR2042 PPC_BIT(36) #define TOD_ERR_TOD_WOF_LSTEP_PARITY PPC_BIT(37) #define TOD_ERR_TTYPE0_RECVD PPC_BIT(38) #define TOD_ERR_TTYPE1_RECVD PPC_BIT(39) #define TOD_ERR_TTYPE2_RECVD PPC_BIT(40) #define TOD_ERR_TTYPE3_RECVD PPC_BIT(41) #define TOD_ERR_TTYPE4_RECVD PPC_BIT(42) #define TOD_ERR_TTYPE5_RECVD PPC_BIT(43) /* -- TOD Error interrupt register -- */ #define TOD_ERROR_INJECT 0x00040031 /* Local FIR EH.TPCHIP.TPC.LOCAL_FIR */ #define LOCAL_CORE_FIR 0x0104000C #define LFIR_SWITCH_COMPLETE PPC_BIT(18) /* Number of iterations for the various timeouts */ #define TIMEOUT_LOOPS 20000000 /* TOD active Primary/secondary configuration */ #define TOD_PRI_CONF_IN_USE 0 /* Tod using primary topology*/ #define TOD_SEC_CONF_IN_USE 7 /* Tod using secondary topo */ /* Timebase State Machine error state */ #define TBST_STATE_ERROR 9 static enum chiptod_type { chiptod_unknown, chiptod_p7, chiptod_p8, chiptod_p9 } chiptod_type; enum chiptod_chip_role { chiptod_chip_role_UNKNOWN = -1, chiptod_chip_role_MDMT = 0, /* Master Drawer Master TOD */ chiptod_chip_role_MDST, /* Master Drawer Slave TOD */ chiptod_chip_role_SDMT, /* Slave Drawer Master TOD */ chiptod_chip_role_SDST, /* Slave Drawer Slave TOD */ }; enum chiptod_chip_status { chiptod_active_master = 0, /* Chip TOD is Active master */ chiptod_backup_master = 1, /* Chip TOD is backup master */ chiptod_backup_disabled, /* Chip TOD is backup but disabled */ }; struct chiptod_chip_config_info { int32_t id; /* chip id */ enum chiptod_chip_role role; /* Chip role */ enum chiptod_chip_status status; /* active/backup/disabled */ }; static int32_t chiptod_primary = -1; static int32_t chiptod_secondary = -1; static enum chiptod_topology current_topology = chiptod_topo_unknown; /* * chiptod_topology_info holds primary/secondary chip configuration info. * This info is initialized during chiptod_init(). This is an array of two: * [0] = [chiptod_topo_primary] = Primary topology config info * [1] = [chiptod_topo_secondary] = Secondary topology config info */ static struct chiptod_chip_config_info chiptod_topology_info[2]; /* * Array of TOD control registers that holds last known valid values. * * Cache chiptod control register values at following instances: * 1. Chiptod initialization * 2. After topology switch is complete. * 3. Upon receiving enable/disable topology request from FSP. * * Cache following chip TOD control registers: * - Master Path control register (0x00040000) * - Primary Port-0 control register (0x00040001) * - Primary Port-1 control register (0x00040002) * - Secondary Port-0 control register (0x00040003) * - Secondary Port-1 control register (0x00040004) * - Slave Path control register (0x00040005) * - Internal Path control register (0x00040006) * - Primary/secondary master/slave control register (0x00040007) * - Chip control register (0x00040010) * * This data is used for restoring respective TOD registers to sane values * whenever parity errors are reported on these registers (through HMI). * The error_bit maps to corresponding bit from TOD error register that * reports parity error on respective TOD registers. */ static struct chiptod_tod_regs { /* error bit from TOD Error reg */ const uint64_t error_bit; /* xscom address of TOD register to be restored. */ const uint64_t xscom_addr; /* per chip cached value of TOD control registers to be restored. */ struct { uint64_t data; bool valid; } val[MAX_CHIPS]; } chiptod_tod_regs[] = { { TOD_ERR_CRMO_PARITY, TOD_MASTER_PATH_CTRL, { } }, { TOD_ERR_PPORT0_CREG_PARITY, TOD_PRI_PORT0_CTRL, { } }, { TOD_ERR_PPORT1_CREG_PARITY, TOD_PRI_PORT1_CTRL, { } }, { TOD_ERR_SPORT0_CREG_PARITY, TOD_SEC_PORT0_CTRL, { } }, { TOD_ERR_SPORT1_CREG_PARITY, TOD_SEC_PORT1_CTRL, { } }, { TOD_ERR_SPATH_CREG_PARITY, TOD_SLAVE_PATH_CTRL, { } }, { TOD_ERR_IPATH_CREG_PARITY, TOD_INTERNAL_PATH_CTRL, { } }, { TOD_ERR_PSMS_CREG_PARITY, TOD_PSMS_CTRL, { } }, { TOD_ERR_CTCR_PARITY, TOD_CHIP_CTRL, { } }, }; /* The base TFMR value is the same for the whole machine * for now as far as I can tell */ static uint64_t base_tfmr; /* * For now, we use a global lock for runtime chiptod operations, * eventually make this a per-core lock for wakeup rsync and * take all of them for RAS cases. */ static struct lock chiptod_lock = LOCK_UNLOCKED; static void _chiptod_cache_tod_regs(int32_t chip_id) { int i; for (i = 0; i < ARRAY_SIZE(chiptod_tod_regs); i++) { if (xscom_read(chip_id, chiptod_tod_regs[i].xscom_addr, &(chiptod_tod_regs[i].val[chip_id].data))) { prerror("XSCOM error reading 0x%08llx reg.\n", chiptod_tod_regs[i].xscom_addr); /* Invalidate this record and continue */ chiptod_tod_regs[i].val[chip_id].valid = 0; continue; } chiptod_tod_regs[i].val[chip_id].valid = 1; } } static void chiptod_cache_tod_registers(void) { struct proc_chip *chip; for_each_chip(chip) _chiptod_cache_tod_regs(chip->id); } static void print_topo_info(enum chiptod_topology topo) { const char *role[] = { "Unknown", "MDMT", "MDST", "SDMT", "SDST" }; const char *status[] = { "Unknown", "Active Master", "Backup Master", "Backup Master Disabled" }; prlog(PR_DEBUG, " Chip id: %d, Role: %s, Status: %s\n", chiptod_topology_info[topo].id, role[chiptod_topology_info[topo].role + 1], status[chiptod_topology_info[topo].status + 1]); } static void print_topology_info(void) { const char *topo[] = { "Unknown", "Primary", "Secondary" }; if (current_topology < 0) return; prlog(PR_DEBUG, "TOD Topology in Use: %s\n", topo[current_topology+1]); prlog(PR_DEBUG, " Primary configuration:\n"); print_topo_info(chiptod_topo_primary); prlog(PR_DEBUG, " Secondary configuration:\n"); print_topo_info(chiptod_topo_secondary); } static enum chiptod_topology query_current_topology(void) { uint64_t tod_status; if (xscom_readme(TOD_STATUS, &tod_status)) { prerror("XSCOM error reading TOD_STATUS reg\n"); return chiptod_topo_unknown; } /* * Tod status register bit [0-2] tells configuration in use. * 000 <= primary configuration in use * 111 <= secondary configuration in use */ if ((tod_status & TOD_ST_TOPOLOGY_SELECT) == TOD_PRI_CONF_IN_USE) return chiptod_topo_primary; else return chiptod_topo_secondary; } static enum chiptod_chip_role chiptod_get_chip_role(enum chiptod_topology topology, int32_t chip_id) { uint64_t tod_ctrl; enum chiptod_chip_role role = chiptod_chip_role_UNKNOWN; if (chip_id < 0) return role; if (xscom_read(chip_id, TOD_PSMS_CTRL, &tod_ctrl)) { prerror("XSCOM error reading TOD_PSMS_CTRL\n"); return chiptod_chip_role_UNKNOWN; } switch (topology) { case chiptod_topo_primary: if (tod_ctrl & TOD_PSMSC_PM_DRAW_SELECT) { if (tod_ctrl & TOD_PSMSC_PM_TOD_SELECT) role = chiptod_chip_role_MDMT; else role = chiptod_chip_role_MDST; } else { if (tod_ctrl & TOD_PSMSC_PM_TOD_SELECT) role = chiptod_chip_role_SDMT; else role = chiptod_chip_role_SDST; } break; case chiptod_topo_secondary: if (tod_ctrl & TOD_PSMSC_SM_DRAW_SELECT) { if (tod_ctrl & TOD_PSMSC_SM_TOD_SELECT) role = chiptod_chip_role_MDMT; else role = chiptod_chip_role_MDST; } else { if (tod_ctrl & TOD_PSMSC_SM_TOD_SELECT) role = chiptod_chip_role_SDMT; else role = chiptod_chip_role_SDST; } break; case chiptod_topo_unknown: default: break; } return role; } /* * Check and return the status of sync step network for a given * topology configuration. * Return values: * true: Sync Step network is running * false: Sync Step network is not running */ static bool chiptod_sync_step_check_running(enum chiptod_topology topology) { uint64_t tod_status; enum chiptod_chip_role role; bool running = false; int32_t chip_id = chiptod_topology_info[topology].id; /* Sanity check */ if (chip_id < 0) return false; if (xscom_read(chip_id, TOD_STATUS, &tod_status)) { prerror("XSCOM error reading TOD_STATUS reg\n"); return false; } switch (topology) { case chiptod_topo_primary: /* Primary configuration */ role = chiptod_topology_info[topology].role; if (role == chiptod_chip_role_MDMT) { /* * Chip is using Master path. * Check if it is using path_0/path_1 and then * validity of that path. * * TOD_STATUS[12]: 0 = PATH_0, 1 = PATH_1 */ if (tod_status & TOD_ST_PRI_MPATH_SELECT) { if (tod_status & TOD_ST_MPATH1_STEP_VALID) running = true; } else { if (tod_status & TOD_ST_MPATH0_STEP_VALID) running = true; } } else { /* * Chip is using Slave path. * * TOD_STATUS[15]: 0 = PATH_0, 1 = PATH_1 */ if (tod_status & TOD_ST_PRI_SPATH_SELECT) { if (tod_status & TOD_ST_SPATH1_STEP_VALID) running = true; } else { if (tod_status & TOD_ST_SPATH0_STEP_VALID) running = true; } } break; case chiptod_topo_secondary: /* Secondary configuration */ role = chiptod_topology_info[topology].role; if (role == chiptod_chip_role_MDMT) { /* * Chip is using Master path. * Check if it is using path_0/path_1 and then * validity of that path. * * TOD_STATUS[12]: 0 = PATH_0, 1 = PATH_1 */ if (tod_status & TOD_ST_SEC_MPATH_SELECT) { if (tod_status & TOD_ST_MPATH1_STEP_VALID) running = true; } else { if (tod_status & TOD_ST_MPATH0_STEP_VALID) running = true; } } else { /* * Chip is using Slave path. * * TOD_STATUS[15]: 0 = PATH_0, 1 = PATH_1 */ if (tod_status & TOD_ST_SEC_SPATH_SELECT) { if (tod_status & TOD_ST_SPATH1_STEP_VALID) running = true; } else { if (tod_status & TOD_ST_SPATH0_STEP_VALID) running = true; } } break; default: break; } return running; } static enum chiptod_chip_status _chiptod_get_chip_status(int32_t chip_id) { uint64_t tod_status; enum chiptod_chip_status status = -1; if (chip_id < 0) return chiptod_backup_disabled; if (xscom_read(chip_id, TOD_STATUS, &tod_status)) { prerror("XSCOM error reading TOD_STATUS reg\n"); return status; } if (tod_status & TOD_ST_ACTIVE_MASTER) status = chiptod_active_master; else if (tod_status & TOD_ST_BACKUP_MASTER) status = chiptod_backup_master; return status; } static enum chiptod_chip_status chiptod_get_chip_status(enum chiptod_topology topology) { return _chiptod_get_chip_status(chiptod_topology_info[topology].id); } static void chiptod_update_topology(enum chiptod_topology topo) { int32_t chip_id = chiptod_topology_info[topo].id; if (chip_id < 0) return; chiptod_topology_info[topo].role = chiptod_get_chip_role(topo, chip_id); chiptod_topology_info[topo].status = chiptod_get_chip_status(topo); /* * If chip TOD on this topology is a backup master then check if * sync/step network is running on this topology. If not, * then mark status as backup not valid. */ if ((chiptod_topology_info[topo].status == chiptod_backup_master) && !chiptod_sync_step_check_running(topo)) chiptod_topology_info[topo].status = chiptod_backup_disabled; } static void chiptod_setup_base_tfmr(void) { struct dt_node *cpu = this_cpu()->node; uint64_t core_freq, tod_freq; uint64_t mcbs; base_tfmr = SPR_TFMR_TB_ECLIPZ; /* Get CPU and TOD freqs in Hz */ if (dt_has_node_property(cpu, "ibm,extended-clock-frequency", NULL)) core_freq = dt_prop_get_u64(cpu, "ibm,extended-clock-frequency"); else core_freq = dt_prop_get_u32(cpu, "clock-frequency"); tod_freq = 32000000; /* Calculate the "Max Cycles Between Steps" value according * to the magic formula: * * mcbs = (core_freq * max_jitter_factor) / (4 * tod_freq) / 100; * * The max jitter factor is set to 240 based on what pHyp uses. */ mcbs = (core_freq * 240) / (4 * tod_freq) / 100; prlog(PR_INFO, "Calculated MCBS is 0x%llx" " (Cfreq=%lld Tfreq=%lld)\n", mcbs, core_freq, tod_freq); /* Bake that all into TFMR */ base_tfmr = SETFIELD(SPR_TFMR_MAX_CYC_BET_STEPS, base_tfmr, mcbs); base_tfmr = SETFIELD(SPR_TFMR_N_CLKS_PER_STEP, base_tfmr, 0); base_tfmr = SETFIELD(SPR_TFMR_SYNC_BIT_SEL, base_tfmr, 4); } static bool chiptod_mod_tb(void) { uint64_t tfmr = base_tfmr; uint64_t timeout = 0; /* Switch timebase to "Not Set" state */ mtspr(SPR_TFMR, tfmr | SPR_TFMR_LOAD_TOD_MOD); do { if (++timeout >= (TIMEOUT_LOOPS*2)) { prerror("TB \"Not Set\" timeout\n"); return false; } tfmr = mfspr(SPR_TFMR); if (tfmr & SPR_TFMR_TFMR_CORRUPT) { prerror("TB \"Not Set\" TFMR corrupt\n"); return false; } if (GETFIELD(SPR_TFMR_TBST_ENCODED, tfmr) == 9) { prerror("TB \"Not Set\" TOD in error state\n"); return false; } } while (tfmr & SPR_TFMR_LOAD_TOD_MOD); return true; } static bool chiptod_interrupt_check(void) { uint64_t tfmr; uint64_t timeout = 0; do { if (++timeout >= TIMEOUT_LOOPS) { prerror("Interrupt check fail\n"); return false; } tfmr = mfspr(SPR_TFMR); if (tfmr & SPR_TFMR_TFMR_CORRUPT) { prerror("Interrupt check TFMR corrupt !\n"); return false; } } while (tfmr & SPR_TFMR_CHIP_TOD_INTERRUPT); return true; } static bool chiptod_running_check(uint32_t chip_id) { uint64_t tval; if (xscom_read(chip_id, TOD_CHIPTOD_FSM, &tval)) { prerror("XSCOM error polling run\n"); return false; } if (tval & 0x0800000000000000UL) return true; else return false; } static bool chiptod_poll_running(void) { uint64_t timeout = 0; uint64_t tval; /* Chip TOD running check */ do { if (++timeout >= TIMEOUT_LOOPS) { prerror("Running check fail timeout\n"); return false; } if (xscom_readme(TOD_CHIPTOD_FSM, &tval)) { prerror("XSCOM error polling run\n"); return false; } } while (!(tval & 0x0800000000000000UL)); return true; } static bool chiptod_to_tb(void) { uint64_t tval, tfmr, tvbits; uint64_t timeout = 0; /* Tell the ChipTOD about our fabric address * * The pib_master value is calculated from the CPU core ID, given in * the PIR. Because we have different core/thread arrangements in the * PIR between p7 and p8, we need to do the calculation differently. * * p7: 0b00001 || 3-bit core id * p8: 0b0001 || 4-bit core id */ if (xscom_readme(TOD_PIB_MASTER, &tval)) { prerror("XSCOM error reading PIB_MASTER\n"); return false; } if (chiptod_type == chiptod_p9) { tvbits = (this_cpu()->pir >> 2) & 0x1f; tvbits |= 0x20; } else if (chiptod_type == chiptod_p8) { tvbits = (this_cpu()->pir >> 3) & 0xf; tvbits |= 0x10; } else { tvbits = (this_cpu()->pir >> 2) & 0x7; tvbits |= 0x08; } tval &= ~TOD_PIBM_ADDR_CFG_MCAST; tval = SETFIELD(TOD_PIBM_ADDR_CFG_SLADDR, tval, tvbits); if (xscom_writeme(TOD_PIB_MASTER, tval)) { prerror("XSCOM error writing PIB_MASTER\n"); return false; } /* Make us ready to get the TB from the chipTOD */ mtspr(SPR_TFMR, base_tfmr | SPR_TFMR_MOVE_CHIP_TOD_TO_TB); /* Tell the ChipTOD to send it */ if (xscom_writeme(TOD_CHIPTOD_TO_TB, PPC_BIT(0))) { prerror("XSCOM error writing CHIPTOD_TO_TB\n"); return false; } /* Wait for it to complete */ timeout = 0; do { if (++timeout >= TIMEOUT_LOOPS) { prerror("Chip to TB timeout\n"); return false; } tfmr = mfspr(SPR_TFMR); if (tfmr & SPR_TFMR_TFMR_CORRUPT) { prerror("MoveToTB: corrupt TFMR !\n"); return false; } } while (tfmr & SPR_TFMR_MOVE_CHIP_TOD_TO_TB); return true; } static bool chiptod_check_tb_running(void) { /* We used to wait for two SYNC pulses in TFMR but that * doesn't seem to occur in sim, so instead we use a * method similar to what pHyp does which is to check for * TFMR SPR_TFMR_TB_VALID and not SPR_TFMR_TFMR_CORRUPT */ #if 0 uint64_t tfmr, timeout; unsigned int i; for (i = 0; i < 2; i++) { tfmr = mfspr(SPR_TFMR); tfmr &= ~SPR_TFMR_TB_SYNC_OCCURED; mtspr(SPR_TFMR, tfmr); timeout = 0; do { if (++timeout >= TIMEOUT_LOOPS) { prerror("CHIPTOD: No sync pulses\n"); return false; } tfmr = mfspr(SPR_TFMR); } while (!(tfmr & SPR_TFMR_TB_SYNC_OCCURED)); } #else uint64_t tfmr = mfspr(SPR_TFMR); return (tfmr & SPR_TFMR_TB_VALID) && !(tfmr & SPR_TFMR_TFMR_CORRUPT); #endif return true; } static bool chiptod_reset_tb_errors(void) { uint64_t tfmr; unsigned long timeout = 0; /* Ask for automatic clear of errors */ tfmr = base_tfmr | SPR_TFMR_CLEAR_TB_ERRORS; /* Additionally pHyp sets these (write-1-to-clear ?) */ tfmr |= SPR_TFMR_TB_MISSING_SYNC; tfmr |= SPR_TFMR_TB_MISSING_STEP; tfmr |= SPR_TFMR_TB_RESIDUE_ERR; mtspr(SPR_TFMR, tfmr); /* We have to write "Clear TB Errors" again */ tfmr = base_tfmr | SPR_TFMR_CLEAR_TB_ERRORS; mtspr(SPR_TFMR, tfmr); do { if (++timeout >= TIMEOUT_LOOPS) { /* Don't actually do anything on error for * now ... not much we can do, panic maybe ? */ prerror("TB error reset timeout !\n"); return false; } tfmr = mfspr(SPR_TFMR); if (tfmr & SPR_TFMR_TFMR_CORRUPT) { prerror("TB error reset: corrupt TFMR !\n"); return false; } } while (tfmr & SPR_TFMR_CLEAR_TB_ERRORS); return true; } static void chiptod_cleanup_thread_tfmr(void) { uint64_t tfmr = base_tfmr; tfmr |= SPR_TFMR_PURR_PARITY_ERR; tfmr |= SPR_TFMR_SPURR_PARITY_ERR; tfmr |= SPR_TFMR_DEC_PARITY_ERR; tfmr |= SPR_TFMR_TFMR_CORRUPT; tfmr |= SPR_TFMR_PURR_OVERFLOW; tfmr |= SPR_TFMR_SPURR_OVERFLOW; mtspr(SPR_TFMR, tfmr); } static void chiptod_reset_tod_errors(void) { uint64_t terr; /* * At boot, we clear the errors that the firmware is * supposed to handle. List provided by the pHyp folks. */ terr = TOD_ERR_CRITC_PARITY; terr |= TOD_ERR_PSS_HAMMING_DISTANCE; terr |= TOD_ERR_DELAY_COMPL_PARITY; terr |= TOD_ERR_CTCR_PARITY; terr |= TOD_ERR_TOD_SYNC_CHECK; terr |= TOD_ERR_TOD_FSM_PARITY; terr |= TOD_ERR_TOD_REGISTER_PARITY; if (xscom_writeme(TOD_ERROR, terr)) { prerror("XSCOM error writing TOD_ERROR !\n"); /* Not much we can do here ... abort ? */ } } static void chiptod_sync_master(void *data) { uint64_t initial_tb_value; bool *result = data; prlog(PR_DEBUG, "Master sync on CPU PIR 0x%04x...\n", this_cpu()->pir); /* Apply base tfmr */ mtspr(SPR_TFMR, base_tfmr); /* From recipe provided by pHyp folks, reset various errors * before attempting the sync */ chiptod_reset_tb_errors(); /* Cleanup thread tfmr bits */ chiptod_cleanup_thread_tfmr(); /* Reset errors in the chiptod itself */ chiptod_reset_tod_errors(); /* Switch timebase to "Not Set" state */ if (!chiptod_mod_tb()) goto error; prlog(PR_INSANE, "SYNC MASTER Step 2 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); /* Chip TOD step checkers enable */ if (xscom_writeme(TOD_TTYPE_2, PPC_BIT(0))) { prerror("XSCOM error enabling steppers\n"); goto error; } prlog(PR_INSANE, "SYNC MASTER Step 3 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); /* Chip TOD interrupt check */ if (!chiptod_interrupt_check()) goto error; prlog(PR_INSANE, "SYNC MASTER Step 4 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); /* Switch local chiptod to "Not Set" state */ if (xscom_writeme(TOD_LOAD_TOD_MOD, PPC_BIT(0))) { prerror("XSCOM error sending LOAD_TOD_MOD\n"); goto error; } /* Switch all remote chiptod to "Not Set" state */ if (xscom_writeme(TOD_TTYPE_5, PPC_BIT(0))) { prerror("XSCOM error sending TTYPE_5\n"); goto error; } /* * Load the master's current timebase value into the Chip TOD * network. This is so we have sane timestamps across the whole * IPL process. The Chip TOD documentation says that the loaded * value needs to be one STEP before a SYNC. In other words, * set the low bits to 0x1ff0. */ initial_tb_value = (mftb() & ~0x1fff) | 0x1ff0; /* Chip TOD load initial value */ if (xscom_writeme(TOD_CHIPTOD_LOAD_TB, initial_tb_value)) { prerror("XSCOM error setting init TB\n"); goto error; } prlog(PR_INSANE, "SYNC MASTER Step 5 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); if (!chiptod_poll_running()) goto error; prlog(PR_INSANE, "SYNC MASTER Step 6 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); /* Move chiptod value to core TB */ if (!chiptod_to_tb()) goto error; prlog(PR_INSANE, "SYNC MASTER Step 7 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); /* Send local chip TOD to all chips TOD */ if (xscom_writeme(TOD_TTYPE_4, PPC_BIT(0))) { prerror("XSCOM error sending TTYPE_4\n"); goto error; } /* Check if TB is running */ if (!chiptod_check_tb_running()) goto error; prlog(PR_INSANE, "Master sync completed, TB=%lx\n", mfspr(SPR_TBRL)); /* * A little delay to make sure the remote chips get up to * speed before we start syncing them. * * We have to do it here because we know our TB is running * while the boot thread TB might not yet. */ time_wait_ms(1); *result = true; return; error: prerror("Master sync failed! TFMR=0x%016lx\n", mfspr(SPR_TFMR)); *result = false; } static void chiptod_sync_slave(void *data) { bool *result = data; /* Only get primaries, not threads */ if (this_cpu()->is_secondary) { /* On secondaries we just cleanup the TFMR */ chiptod_cleanup_thread_tfmr(); *result = true; return; } prlog(PR_DEBUG, "Slave sync on CPU PIR 0x%04x...\n", this_cpu()->pir); /* Apply base tfmr */ mtspr(SPR_TFMR, base_tfmr); /* From recipe provided by pHyp folks, reset various errors * before attempting the sync */ chiptod_reset_tb_errors(); /* Cleanup thread tfmr bits */ chiptod_cleanup_thread_tfmr(); /* Switch timebase to "Not Set" state */ if (!chiptod_mod_tb()) goto error; prlog(PR_INSANE, "SYNC SLAVE Step 2 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); /* Chip TOD running check */ if (!chiptod_poll_running()) goto error; prlog(PR_INSANE, "SYNC SLAVE Step 3 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); /* Chip TOD interrupt check */ if (!chiptod_interrupt_check()) goto error; prlog(PR_INSANE, "SYNC SLAVE Step 4 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); /* Move chiptod value to core TB */ if (!chiptod_to_tb()) goto error; prlog(PR_INSANE, "SYNC SLAVE Step 5 TFMR=0x%016lx\n", mfspr(SPR_TFMR)); /* Check if TB is running */ if (!chiptod_check_tb_running()) goto error; prlog(PR_INSANE, "Slave sync completed, TB=%lx\n", mfspr(SPR_TBRL)); *result = true; return; error: prerror("Slave sync failed ! TFMR=0x%016lx\n", mfspr(SPR_TFMR)); *result = false; } bool chiptod_wakeup_resync(void) { if (chiptod_primary < 0) return 0; lock(&chiptod_lock); /* Apply base tfmr */ mtspr(SPR_TFMR, base_tfmr); /* From recipe provided by pHyp folks, reset various errors * before attempting the sync */ chiptod_reset_tb_errors(); /* Cleanup thread tfmr bits */ chiptod_cleanup_thread_tfmr(); /* Switch timebase to "Not Set" state */ if (!chiptod_mod_tb()) goto error; /* Move chiptod value to core TB */ if (!chiptod_to_tb()) goto error; unlock(&chiptod_lock); return true; error: prerror("Resync failed ! TFMR=0x%16lx\n", mfspr(SPR_TFMR)); unlock(&chiptod_lock); return false; } static int chiptod_recover_tod_errors(void) { uint64_t terr; uint64_t treset = 0; int i; int32_t chip_id = this_cpu()->chip_id; /* Read TOD error register */ if (xscom_readme(TOD_ERROR, &terr)) { prerror("XSCOM error reading TOD_ERROR reg\n"); return 0; } /* Check for sync check error and recover */ if ((terr & TOD_ERR_TOD_SYNC_CHECK) || (terr & TOD_ERR_TOD_FSM_PARITY) || (terr & TOD_ERR_CTCR_PARITY) || (terr & TOD_ERR_PSS_HAMMING_DISTANCE) || (terr & TOD_ERR_DELAY_COMPL_PARITY) || (terr & TOD_ERR_TOD_REGISTER_PARITY)) { chiptod_reset_tod_errors(); } /* * Check for TOD control register parity errors and restore those * registers with last saved valid values. */ for (i = 0; i < ARRAY_SIZE(chiptod_tod_regs); i++) { if (!(terr & chiptod_tod_regs[i].error_bit)) continue; /* Check if we have valid last saved register value. */ if (!chiptod_tod_regs[i].val[chip_id].valid) { prerror("Failed to restore TOD register: %08llx", chiptod_tod_regs[i].xscom_addr); return 0; } prlog(PR_DEBUG, "Parity error, Restoring TOD register: " "%08llx\n", chiptod_tod_regs[i].xscom_addr); if (xscom_writeme(chiptod_tod_regs[i].xscom_addr, chiptod_tod_regs[i].val[chip_id].data)) { prerror("XSCOM error writing 0x%08llx reg.\n", chiptod_tod_regs[i].xscom_addr); return 0; } treset |= chiptod_tod_regs[i].error_bit; } if (treset && (xscom_writeme(TOD_ERROR, treset))) { prerror("XSCOM error writing TOD_ERROR !\n"); return 0; } /* We have handled all the TOD errors routed to hypervisor */ return 1; } static int32_t chiptod_get_active_master(void) { if (current_topology < 0) return -1; if (chiptod_topology_info[current_topology].status == chiptod_active_master) return chiptod_topology_info[current_topology].id; return -1; } /* Return true if Active master TOD is running. */ static bool chiptod_master_running(void) { int32_t active_master_chip; active_master_chip = chiptod_get_active_master(); if (active_master_chip != -1) { if (chiptod_running_check(active_master_chip)) return true; } return false; } static bool chiptod_set_ttype4_mode(struct proc_chip *chip, bool enable) { uint64_t tval; /* Sanity check */ if (!chip) return false; if (xscom_read(chip->id, TOD_PIB_MASTER, &tval)) { prerror("XSCOM error reading PIB_MASTER\n"); return false; } if (enable) { /* * Enable TTYPE4 send mode. This allows TOD to respond to * TTYPE3 request. */ tval |= TOD_PIBM_TTYPE4_SEND_MODE; tval |= TOD_PIBM_TTYPE4_SEND_ENBL; } else { /* Disable TTYPE4 send mode. */ tval &= ~TOD_PIBM_TTYPE4_SEND_MODE; tval &= ~TOD_PIBM_TTYPE4_SEND_ENBL; } if (xscom_write(chip->id, TOD_PIB_MASTER, tval)) { prerror("XSCOM error writing PIB_MASTER\n"); return false; } return true; } /* Stop TODs on slave chips in backup topology. */ static void chiptod_stop_slave_tods(void) { struct proc_chip *chip = NULL; enum chiptod_topology backup_topo; uint64_t terr = 0; /* Inject TOD sync check error on salve TODs to stop them. */ terr |= TOD_ERR_TOD_SYNC_CHECK; if (current_topology == chiptod_topo_primary) backup_topo = chiptod_topo_secondary; else backup_topo = chiptod_topo_primary; for_each_chip(chip) { enum chiptod_chip_role role; /* Current chip TOD is already in stooped state */ if (chip->id == this_cpu()->chip_id) continue; role = chiptod_get_chip_role(backup_topo, chip->id); /* Skip backup master chip TOD. */ if (role == chiptod_chip_role_MDMT) continue; if (xscom_write(chip->id, TOD_ERROR_INJECT, terr)) prerror("XSCOM error writing TOD_ERROR_INJ\n"); if (chiptod_running_check(chip->id)) { prlog(PR_DEBUG, "Failed to stop TOD on slave CHIP [%d]\n", chip->id); } } } static bool is_topology_switch_required(void) { int32_t active_master_chip; uint64_t tod_error; active_master_chip = chiptod_get_active_master(); /* Check if TOD is running on Active master. */ if (chiptod_master_running()) return false; /* * Check if sync/step network is running. * * If sync/step network is not running on current active topology * then we need switch topology to recover from TOD error. */ if (!chiptod_sync_step_check_running(current_topology)) { prlog(PR_DEBUG, "Sync/Step network not running\n"); return true; } /* * Check if there is a step check error reported on * Active master. */ if (xscom_read(active_master_chip, TOD_ERROR, &tod_error)) { prerror("XSCOM error reading TOD_ERROR reg\n"); /* * Can't do anything here. But we already found that * sync/step network is running. Hence return false. */ return false; } if (tod_error & TOD_ERR_MP0_STEP_CHECK) { prlog(PR_DEBUG, "TOD step check error\n"); return true; } return false; } static bool chiptod_backup_valid(void) { enum chiptod_topology backup_topo; if (current_topology < 0) return false; if (current_topology == chiptod_topo_primary) backup_topo = chiptod_topo_secondary; else backup_topo = chiptod_topo_primary; if (chiptod_topology_info[backup_topo].status == chiptod_backup_master) return chiptod_sync_step_check_running(backup_topo); return false; } static void chiptod_topology_switch_complete(void) { /* * After the topology switch, we may have a non-functional backup * topology, and we won't be able to recover from future TOD errors * that requires topology switch. Someone needs to either fix it OR * configure new functional backup topology. * * Bit 18 of the Pervasive FIR is used to signal that TOD error * analysis needs to be performed. This allows FSP/PRD to * investigate and re-configure new backup topology if required. * Once new backup topology is configured and ready, FSP sends a * mailbox command xE6, s/c 0x06, mod 0, to enable the backup * topology. * * This isn't documented anywhere. This info is provided by FSP * folks. */ if (xscom_writeme(LOCAL_CORE_FIR, LFIR_SWITCH_COMPLETE)) { prerror("XSCOM error writing LOCAL_CORE_FIR\n"); return; } /* Save TOD control registers values. */ chiptod_cache_tod_registers(); prlog(PR_DEBUG, "Topology switch complete\n"); print_topology_info(); } /* * Sync up TOD with other chips and get TOD in running state. * Check if current topology is active and running. If not, then * trigger a topology switch. */ static int chiptod_start_tod(void) { struct proc_chip *chip = NULL; int rc = 1; /* Do a topology switch if required. */ if (is_topology_switch_required()) { int32_t mchip = chiptod_get_active_master(); prlog(PR_DEBUG, "Need topology switch to recover\n"); /* * There is a failure in StepSync network in current * active topology. TOD is not running on active master chip. * We need to sync with backup master chip TOD. * But before we do that we need to switch topology to make * backup master as the new active master. Once we switch the * topology we can then request TOD value from new active * master. But make sure we move local chiptod to Not Set * before requesting TOD value. * * Before triggering a topology switch, check if backup * is valid and stop all slave TODs in backup topology. */ if (!chiptod_backup_valid()) { prerror("Backup master is not enabled. " "Can not do a topology switch.\n"); return 0; } chiptod_stop_slave_tods(); if (xscom_write(mchip, TOD_TTYPE_1, PPC_BIT(0))) { prerror("XSCOM error switching primary/secondary\n"); return 0; } /* Update topology info. */ current_topology = query_current_topology(); chiptod_update_topology(chiptod_topo_primary); chiptod_update_topology(chiptod_topo_secondary); /* * We just switched topologies to recover. * Check if new master TOD is running. */ if (!chiptod_master_running()) { prerror("TOD is not running on new master.\n"); return 0; } /* * Enable step checkers on all Chip TODs * * During topology switch, step checkers are disabled * on all Chip TODs by default. Enable them. */ if (xscom_writeme(TOD_TTYPE_2, PPC_BIT(0))) { prerror("XSCOM error enabling steppers\n"); return 0; } chiptod_topology_switch_complete(); } if (!chiptod_master_running()) { /* * Active Master TOD is not running, which means it won't * respond to TTYPE_3 request. * * Find a chip that has TOD in running state and configure * it to respond to TTYPE_3 request. */ for_each_chip(chip) { if (chiptod_running_check(chip->id)) { if (chiptod_set_ttype4_mode(chip, true)) break; } } } /* Switch local chiptod to "Not Set" state */ if (xscom_writeme(TOD_LOAD_TOD_MOD, PPC_BIT(0))) { prerror("XSCOM error sending LOAD_TOD_MOD\n"); return 0; } /* * Request the current TOD value from another chip. * This will move TOD in running state */ if (xscom_writeme(TOD_TTYPE_3, PPC_BIT(0))) { prerror("XSCOM error sending TTYPE_3\n"); return 0; } /* Check if chip TOD is running. */ if (!chiptod_poll_running()) rc = 0; /* Restore the ttype4_mode. */ chiptod_set_ttype4_mode(chip, false); return rc; } static bool tfmr_recover_tb_errors(uint64_t tfmr) { uint64_t tfmr_reset_error; unsigned long timeout = 0; /* Ask for automatic clear of errors */ tfmr_reset_error = base_tfmr | SPR_TFMR_CLEAR_TB_ERRORS; /* Additionally pHyp sets these (write-1-to-clear ?) */ if (tfmr & SPR_TFMR_TB_MISSING_SYNC) tfmr_reset_error |= SPR_TFMR_TB_MISSING_SYNC; if (tfmr & SPR_TFMR_TB_MISSING_STEP) tfmr_reset_error |= SPR_TFMR_TB_MISSING_STEP; /* * write 1 to bit 45 to clear TB residue the error. * TB register has already been reset to zero as part pre-recovery. */ if (tfmr & SPR_TFMR_TB_RESIDUE_ERR) tfmr_reset_error |= SPR_TFMR_TB_RESIDUE_ERR; if (tfmr & SPR_TFMR_FW_CONTROL_ERR) tfmr_reset_error |= SPR_TFMR_FW_CONTROL_ERR; if (tfmr & SPR_TFMR_TBST_CORRUPT) tfmr_reset_error |= SPR_TFMR_TBST_CORRUPT; mtspr(SPR_TFMR, tfmr_reset_error); /* We have to write "Clear TB Errors" again */ tfmr_reset_error = base_tfmr | SPR_TFMR_CLEAR_TB_ERRORS; mtspr(SPR_TFMR, tfmr_reset_error); do { if (++timeout >= TIMEOUT_LOOPS) { prerror("TB error reset timeout !\n"); return false; } tfmr = mfspr(SPR_TFMR); if (tfmr & SPR_TFMR_TFMR_CORRUPT) { prerror("TB error reset: corrupt TFMR !\n"); return false; } } while (tfmr & SPR_TFMR_CLEAR_TB_ERRORS); return true; } static bool tfmr_recover_non_tb_errors(uint64_t tfmr) { uint64_t tfmr_reset_errors = 0; /* * write 1 to bit 26 to clear TFMR HDEC parity error. * HDEC register has already been reset to zero as part pre-recovery. */ if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR) tfmr_reset_errors |= SPR_TFMR_HDEC_PARITY_ERROR; if (tfmr & SPR_TFMR_DEC_PARITY_ERR) { /* Set DEC with all ones */ mtspr(SPR_DEC, ~0); /* set bit 59 to clear TFMR DEC parity error. */ tfmr_reset_errors |= SPR_TFMR_DEC_PARITY_ERR; } /* * Reset PURR/SPURR to recover. We also need help from KVM * layer to handle this change in PURR/SPURR. That needs * to be handled in kernel KVM layer. For now, to recover just * reset it. */ if (tfmr & SPR_TFMR_PURR_PARITY_ERR) { /* set PURR register with sane value or reset it. */ mtspr(SPR_PURR, 0); /* set bit 57 to clear TFMR PURR parity error. */ tfmr_reset_errors |= SPR_TFMR_PURR_PARITY_ERR; } if (tfmr & SPR_TFMR_SPURR_PARITY_ERR) { /* set PURR register with sane value or reset it. */ mtspr(SPR_SPURR, 0); /* set bit 58 to clear TFMR PURR parity error. */ tfmr_reset_errors |= SPR_TFMR_SPURR_PARITY_ERR; } /* Write TFMR twice to clear the error */ mtspr(SPR_TFMR, base_tfmr | tfmr_reset_errors); mtspr(SPR_TFMR, base_tfmr | tfmr_reset_errors); /* Get fresh copy of TFMR */ tfmr = mfspr(SPR_TFMR); /* Check if TFMR non-TB errors still present. */ if (tfmr & tfmr_reset_errors) { prerror("TFMR non-TB error recovery failed! " "TFMR=0x%016lx\n", mfspr(SPR_TFMR)); return false; } return true; } /* * TFMR parity error recovery as per pc_workbook: * MT(TFMR) bits 11 and 60 are b’1’ * MT(HMER) all bits 1 except for bits 4,5 */ static bool chiptod_recover_tfmr_error(void) { uint64_t tfmr; /* Get the base TFMR */ tfmr = base_tfmr; /* Set bit 60 to clear TFMR parity error. */ tfmr |= SPR_TFMR_TFMR_CORRUPT; mtspr(SPR_TFMR, tfmr); /* Write twice to clear the error */ mtspr(SPR_TFMR, tfmr); /* Get fresh copy of TFMR */ tfmr = mfspr(SPR_TFMR); /* Check if TFMR parity error still present. */ if (tfmr & SPR_TFMR_TFMR_CORRUPT) { prerror("TFMR error recovery: corrupt TFMR !\n"); return false; } /* * Now that we have sane value in TFMR, check if Timebase machine * state is in ERROR state. If yes, clear TB errors so that * Timebase machine state changes to RESET state. Once in RESET state * then we can then load TB with TOD value. */ if (GETFIELD(SPR_TFMR_TBST_ENCODED, tfmr) == TBST_STATE_ERROR) { if (!chiptod_reset_tb_errors()) return false; } return true; } /* * Recover from TB and TOD errors. * Timebase register is per core and first thread that gets chance to * handle interrupt would fix actual TFAC errors and rest of the threads * from same core would see no errors. Return -1 if no errors have been * found. The caller (handle_hmi_exception) of this function would not * send an HMI event to host if return value is -1. * * Return values: * 0 <= Failed to recover from errors * 1 <= Successfully recovered from errors * -1 <= No errors found. Errors are already been fixed. */ int chiptod_recover_tb_errors(void) { uint64_t tfmr; int rc = -1; int thread_id; if (chiptod_primary < 0) return 0; lock(&chiptod_lock); /* Get fresh copy of TFMR */ tfmr = mfspr(SPR_TFMR); /* * Check for TFMR parity error and recover from it. * We can not trust any other bits in TFMR If it is corrupt. Fix this * before we do anything. */ if (tfmr & SPR_TFMR_TFMR_CORRUPT) { if (!chiptod_recover_tfmr_error()) { rc = 0; goto error_out; } } /* Get fresh copy of TFMR */ tfmr = mfspr(SPR_TFMR); /* * Workaround for HW logic bug in Power9 * Even after clearing TB residue error by one thread it does not * get reflected to other threads on same core. * Check if TB is already valid and skip the checking of TB errors. */ if ((proc_gen == proc_gen_p9) && (tfmr & SPR_TFMR_TB_RESIDUE_ERR) && (tfmr & SPR_TFMR_TB_VALID)) goto skip_tb_error_clear; /* * Check for TB errors. * On Sync check error, bit 44 of TFMR is set. Check for it and * clear it. * * In some rare situations we may have all TB errors already cleared, * but TB stuck in waiting for new value from TOD with TFMR bit 18 * set to '1'. This uncertain state of TB would fail the process * of getting TB back into running state. Get TB in clean initial * state by clearing TB errors if TFMR[18] is set. */ if ((tfmr & SPR_TFMR_TB_MISSING_STEP) || (tfmr & SPR_TFMR_TB_RESIDUE_ERR) || (tfmr & SPR_TFMR_FW_CONTROL_ERR) || (tfmr & SPR_TFMR_TBST_CORRUPT) || (tfmr & SPR_TFMR_MOVE_CHIP_TOD_TO_TB) || (tfmr & SPR_TFMR_TB_MISSING_SYNC)) { if (!tfmr_recover_tb_errors(tfmr)) { rc = 0; goto error_out; } } skip_tb_error_clear: /* * Check for TOD sync check error. * On TOD errors, bit 51 of TFMR is set. If this bit is on then we * need to fetch TOD error register and recover from TOD errors. * Bit 33 of TOD error register indicates sync check error. */ if (tfmr & SPR_TFMR_CHIP_TOD_INTERRUPT) rc = chiptod_recover_tod_errors(); /* Check if TB is running. If not then we need to get it running. */ if (!(tfmr & SPR_TFMR_TB_VALID)) { rc = 0; /* Place TB in Notset state. */ if (!chiptod_mod_tb()) goto error_out; /* * Before we move TOD to core TB check if TOD is running. * If not, then get TOD in running state. */ if (!chiptod_running_check(this_cpu()->chip_id)) if (!chiptod_start_tod()) goto error_out; /* Move chiptod value to core TB */ if (!chiptod_to_tb()) goto error_out; /* We have successfully able to get TB running. */ rc = 1; } /* * Workaround for HW logic bug in power9. * In idea case (without the HW bug) only one thread from the core * would have fallen through tfmr_recover_non_tb_errors() to clear * HDEC parity error on TFMR. * * Hence to achieve same behavior, allow only thread 0 to clear the * HDEC parity error. And for rest of the threads just reset the bit * to avoid other threads to fall through tfmr_recover_non_tb_errors(). */ thread_id = cpu_get_thread_index(this_cpu()); if ((proc_gen == proc_gen_p9) && thread_id) tfmr &= ~SPR_TFMR_HDEC_PARITY_ERROR; /* * Now that TB is running, check for TFMR non-TB errors. */ if ((tfmr & SPR_TFMR_HDEC_PARITY_ERROR) || (tfmr & SPR_TFMR_PURR_PARITY_ERR) || (tfmr & SPR_TFMR_SPURR_PARITY_ERR) || (tfmr & SPR_TFMR_DEC_PARITY_ERR)) { if (!tfmr_recover_non_tb_errors(tfmr)) { rc = 0; goto error_out; } rc = 1; } error_out: unlock(&chiptod_lock); return rc; } static int64_t opal_resync_timebase(void) { if (!chiptod_wakeup_resync()) { prerror("OPAL: Resync timebase failed on CPU 0x%04x\n", this_cpu()->pir); return OPAL_HARDWARE; } return OPAL_SUCCESS; } opal_call(OPAL_RESYNC_TIMEBASE, opal_resync_timebase, 0); static void chiptod_print_tb(void *data __unused) { prlog(PR_DEBUG, "PIR 0x%04x TB=%lx\n", this_cpu()->pir, mfspr(SPR_TBRL)); } static bool chiptod_probe(void) { struct dt_node *np; dt_for_each_compatible(dt_root, np, "ibm,power-chiptod") { uint32_t chip; /* Old DT has chip-id in chiptod node, newer only in the * parent xscom bridge */ chip = dt_get_chip_id(np); if (dt_has_node_property(np, "primary", NULL)) { chiptod_primary = chip; if (dt_node_is_compatible(np, "ibm,power7-chiptod")) chiptod_type = chiptod_p7; if (dt_node_is_compatible(np, "ibm,power8-chiptod")) chiptod_type = chiptod_p8; if (dt_node_is_compatible(np, "ibm,power9-chiptod")) chiptod_type = chiptod_p9; } if (dt_has_node_property(np, "secondary", NULL)) chiptod_secondary = chip; } if (chiptod_type == chiptod_unknown) { prerror("Unknown TOD type !\n"); return false; } return true; } static void chiptod_discover_new_backup(enum chiptod_topology topo) { struct proc_chip *chip = NULL; /* Scan through available chips to find new backup master chip */ for_each_chip(chip) { if (_chiptod_get_chip_status(chip->id) == chiptod_backup_master) break; } /* Found new backup master chip. Update the topology info */ if (chip) { prlog(PR_DEBUG, "New backup master: CHIP [%d]\n", chip->id); if (topo == chiptod_topo_primary) chiptod_primary = chip->id; else chiptod_secondary = chip->id; chiptod_topology_info[topo].id = chip->id; chiptod_update_topology(topo); prlog(PR_DEBUG, "Backup topology configuration changed.\n"); print_topology_info(); } /* * Topology configuration has changed. Save TOD control registers * values. */ chiptod_cache_tod_registers(); } /* * Enable/disable backup topology. * If request is to enable topology, then discover new backup master * chip and update the topology configuration info. If the request is * to disable topology, then mark the current backup topology as disabled. * Return error (-1) if the action is requested on currenlty active * topology. * * Return values: * true <= Success * false <= Topology is active and in use. */ bool chiptod_adjust_topology(enum chiptod_topology topo, bool enable) { uint8_t rc = true; /* * The FSP can only request that the currently inactive topology * be disabled or enabled. If the requested topology is currently * the active topology, then fail this request with a -1 (TOD * topology in use) status as return code. */ lock(&chiptod_lock); if (topo == current_topology) { rc = false; goto out; } if (enable) chiptod_discover_new_backup(topo); else chiptod_topology_info[topo].status = chiptod_backup_disabled; out: unlock(&chiptod_lock); return rc; } static void chiptod_init_topology_info(void) { /* Find and update current topology in use. */ current_topology = query_current_topology(); /* Initialized primary topology chip config info */ chiptod_topology_info[chiptod_topo_primary].id = chiptod_primary; chiptod_update_topology(chiptod_topo_primary); /* Initialized secondary topology chip config info */ chiptod_topology_info[chiptod_topo_secondary].id = chiptod_secondary; chiptod_update_topology(chiptod_topo_secondary); /* Cache TOD control registers values. */ chiptod_cache_tod_registers(); print_topology_info(); } void chiptod_init(void) { struct cpu_thread *cpu0, *cpu; bool sres; /* Mambo and qemu doesn't simulate the chiptod */ if (chip_quirk(QUIRK_NO_CHIPTOD)) return; op_display(OP_LOG, OP_MOD_CHIPTOD, 0); if (!chiptod_probe()) { prerror("Failed ChipTOD detection !\n"); op_display(OP_FATAL, OP_MOD_CHIPTOD, 0); abort(); } op_display(OP_LOG, OP_MOD_CHIPTOD, 1); /* Pick somebody on the primary */ cpu0 = find_cpu_by_chip_id(chiptod_primary); /* Calculate the base TFMR value used for everybody */ chiptod_setup_base_tfmr(); prlog(PR_DEBUG, "Base TFMR=0x%016llx\n", base_tfmr); /* Schedule master sync */ sres = false; cpu_wait_job(cpu_queue_job(cpu0, "chiptod_sync_master", chiptod_sync_master, &sres), true); if (!sres) { op_display(OP_FATAL, OP_MOD_CHIPTOD, 2); abort(); } op_display(OP_LOG, OP_MOD_CHIPTOD, 2); /* Schedule slave sync */ for_each_available_cpu(cpu) { /* Skip master */ if (cpu == cpu0) continue; /* Queue job */ sres = false; cpu_wait_job(cpu_queue_job(cpu, "chiptod_sync_slave", chiptod_sync_slave, &sres), true); if (!sres) { op_display(OP_WARN, OP_MOD_CHIPTOD, 3|(cpu->pir << 8)); /* Disable threads */ cpu_disable_all_threads(cpu); } op_display(OP_LOG, OP_MOD_CHIPTOD, 3|(cpu->pir << 8)); } /* Display TBs */ for_each_available_cpu(cpu) { /* Only do primaries, not threads */ if (cpu->is_secondary) continue; cpu_wait_job(cpu_queue_job(cpu, "chiptod_print_tb", chiptod_print_tb, NULL), true); } chiptod_init_topology_info(); op_display(OP_LOG, OP_MOD_CHIPTOD, 4); } /* CAPP timebase sync */ static bool chiptod_capp_reset_tb_errors(uint32_t chip_id, uint32_t tfmr_addr, uint32_t offset) { uint64_t tfmr; unsigned long timeout = 0; /* Ask for automatic clear of errors */ tfmr = base_tfmr | SPR_TFMR_CLEAR_TB_ERRORS; /* Additionally pHyp sets these (write-1-to-clear ?) */ tfmr |= SPR_TFMR_TB_MISSING_SYNC; tfmr |= SPR_TFMR_TB_MISSING_STEP; tfmr |= SPR_TFMR_TB_RESIDUE_ERR; tfmr |= SPR_TFMR_TBST_CORRUPT; tfmr |= SPR_TFMR_TFMR_CORRUPT; /* Write CAPP TFMR */ xscom_write(chip_id, tfmr_addr + offset, tfmr); /* We have to write "Clear TB Errors" again */ tfmr = base_tfmr | SPR_TFMR_CLEAR_TB_ERRORS; /* Write CAPP TFMR */ xscom_write(chip_id, tfmr_addr + offset, tfmr); do { if (++timeout >= TIMEOUT_LOOPS) { prerror("CAPP: TB error reset timeout !\n"); return false; } /* Read CAPP TFMR */ xscom_read(chip_id, tfmr_addr + offset, &tfmr); if (tfmr & SPR_TFMR_TFMR_CORRUPT) { prerror("CAPP: TB error reset: corrupt TFMR!\n"); return false; } } while (tfmr & SPR_TFMR_CLEAR_TB_ERRORS); return true; } static bool chiptod_capp_mod_tb(uint32_t chip_id, uint32_t tfmr_addr, uint32_t offset) { uint64_t timeout = 0; uint64_t tfmr; /* Switch CAPP timebase to "Not Set" state */ tfmr = base_tfmr | SPR_TFMR_LOAD_TOD_MOD; xscom_write(chip_id, tfmr_addr + offset, tfmr); do { if (++timeout >= (TIMEOUT_LOOPS*2)) { prerror("CAPP: TB \"Not Set\" timeout\n"); return false; } xscom_read(chip_id, tfmr_addr + offset, &tfmr); if (tfmr & SPR_TFMR_TFMR_CORRUPT) { prerror("CAPP: TB \"Not Set\" TFMR corrupt\n"); return false; } if (GETFIELD(SPR_TFMR_TBST_ENCODED, tfmr) == 9) { prerror("CAPP: TB \"Not Set\" TOD in error state\n"); return false; } } while (tfmr & SPR_TFMR_LOAD_TOD_MOD); return true; } static bool chiptod_wait_for_chip_sync(void) { uint64_t tfmr; uint64_t timeout = 0; /* Read core TFMR, mask bit 42, write core TFMR back */ tfmr = mfspr(SPR_TFMR); tfmr &= ~SPR_TFMR_TB_SYNC_OCCURED; mtspr(SPR_TFMR, tfmr); /* Read core TFMR until the TB sync occurred */ do { if (++timeout >= TIMEOUT_LOOPS) { prerror("No sync pulses\n"); return false; } tfmr = mfspr(SPR_TFMR); } while (!(tfmr & SPR_TFMR_TB_SYNC_OCCURED)); return true; } static bool chiptod_capp_check_tb_running(uint32_t chip_id, uint32_t tfmr_addr, uint32_t offset) { uint64_t tfmr; uint64_t timeout = 0; /* Read CAPP TFMR until TB becomes valid */ do { if (++timeout >= (TIMEOUT_LOOPS*2)) { prerror("CAPP: TB Invalid!\n"); return false; } xscom_read(chip_id, tfmr_addr + offset, &tfmr); if (tfmr & SPR_TFMR_TFMR_CORRUPT) { prerror("CAPP: TFMR corrupt!\n"); return false; } } while (!(tfmr & SPR_TFMR_TB_VALID)); return true; } bool chiptod_capp_timebase_sync(unsigned int chip_id, uint32_t tfmr_addr, uint32_t tb_addr, uint32_t offset) { uint64_t tfmr; uint64_t capp_tb; int64_t delta; unsigned int retry = 0; /* Set CAPP TFMR to base tfmr value */ xscom_write(chip_id, tfmr_addr + offset, base_tfmr); /* Reset CAPP TB errors before attempting the sync */ if (!chiptod_capp_reset_tb_errors(chip_id, tfmr_addr, offset)) return false; /* Switch CAPP TB to "Not Set" state */ if (!chiptod_capp_mod_tb(chip_id, tfmr_addr, offset)) return false; /* Sync CAPP TB with core TB, retry while difference > 16usecs */ do { if (retry++ > 5) { prerror("CAPP: TB sync: giving up!\n"); return false; } /* Make CAPP ready to get the TB, wait for chip sync */ tfmr = base_tfmr | SPR_TFMR_MOVE_CHIP_TOD_TO_TB; xscom_write(chip_id, tfmr_addr + offset, tfmr); if (!chiptod_wait_for_chip_sync()) return false; /* Set CAPP TB from core TB */ xscom_write(chip_id, tb_addr + offset, mftb()); /* Wait for CAPP TFMR tb_valid bit */ if (!chiptod_capp_check_tb_running(chip_id, tfmr_addr, offset)) return false; /* Read CAPP TB, read core TB, compare */ xscom_read(chip_id, tb_addr + offset, &capp_tb); delta = mftb() - capp_tb; if (delta < 0) delta = -delta; } while (tb_to_usecs(delta) > 16); return true; }