diff options
author | Yue Du <daviddu@us.ibm.com> | 2017-06-12 14:11:23 -0500 |
---|---|---|
committer | Joshua Hunsberger <jahunsbe@us.ibm.com> | 2017-10-23 18:46:42 -0500 |
commit | 6ff776224fe82b0222a2836829f82d4c04e86c1d (patch) | |
tree | 15bffed1ec9174a3cb81ff20d371ee04161dba33 /import/chips/p9/procedures/ppe_closed | |
parent | 69fdbab199071ebd4a4a1605fa60afe430351961 (diff) | |
download | talos-hcode-6ff776224fe82b0222a2836829f82d4c04e86c1d.tar.gz talos-hcode-6ff776224fe82b0222a2836829f82d4c04e86c1d.zip |
STOP: Core Xstop Injection
Change-Id: Ia39449ebf5a013abd74bd5c3c0d0ea7113e2a490
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/41747
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Michael S. Floyd <mfloyd@us.ibm.com>
Reviewed-by: BRIAN D. VICTOR <brian.d.victor1@ibm.com>
Reviewed-by: Gregory S. Still <stillgs@us.ibm.com>
Diffstat (limited to 'import/chips/p9/procedures/ppe_closed')
17 files changed, 247 insertions, 115 deletions
diff --git a/import/chips/p9/procedures/ppe_closed/cme/cme_panic_codes.h b/import/chips/p9/procedures/ppe_closed/cme/cme_panic_codes.h index 42acbf47..e1870251 100644 --- a/import/chips/p9/procedures/ppe_closed/cme/cme_panic_codes.h +++ b/import/chips/p9/procedures/ppe_closed/cme/cme_panic_codes.h @@ -68,7 +68,7 @@ CME_STOP_EXIT_SCOM_RES_XSTOP_ERROR = 0x1d05, //_UNUSED_1d06 = 0x1d06, //_UNUSED_1d07 = 0x1d07, CME_STOP_ENTRY_STOPCLK_FAILED = 0x1d08, -CME_STOP_ENTRY_XSTOP_ERROR = 0x1d09, +CME_STOP_ENTRY_XSTOP_ERROR = 0x1d09, // NDD1 //_UNUSED_1d0a = 0x1d0a, //_UNUSED_1d0d = 0x1d0d, //_UNUSED_1d1c = 0x1d1c, diff --git a/import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c b/import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c index fceb5b47..59a837f1 100644 --- a/import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c +++ b/import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c @@ -38,8 +38,7 @@ CmePstateRecord G_cme_pstate_record; // CME Stop Header and Structure #include "p9_cme_stop.h" -CmeStopRecord G_cme_stop_record __attribute__((section (".dump_ptrs"))) = {{0}, {0}, 0}; - +CmeStopRecord G_cme_stop_record __attribute__((section (".dump_ptrs"))) = {{0}, {0}, 0, 0, 0, 0, 0, 0, 0, {0}}; void fit_handler() { diff --git a/import/chips/p9/procedures/ppe_closed/cme/p9_cme_main.c b/import/chips/p9/procedures/ppe_closed/cme/p9_cme_main.c index a7f4a0f5..d2eaecb5 100644 --- a/import/chips/p9/procedures/ppe_closed/cme/p9_cme_main.c +++ b/import/chips/p9/procedures/ppe_closed/cme/p9_cme_main.c @@ -38,7 +38,7 @@ CmePstateRecord G_cme_pstate_record; // CME Stop Header and Structure #include "p9_cme_stop.h" -CmeStopRecord G_cme_stop_record __attribute__((section (".dump_ptrs"))) = {{0}, {0}, 0}; +CmeStopRecord G_cme_stop_record __attribute__((section (".dump_ptrs"))) = {{0}, {0}, 0, 0, 0, 0, 0, 0, 0, {0}, {{0}}}; #if TEST_ONLY_BCE_IRR #include "p9_cme_copy_scan_ring.h" diff --git a/import/chips/p9/procedures/ppe_closed/cme/pk_app_cfg.h b/import/chips/p9/procedures/ppe_closed/cme/pk_app_cfg.h index d29a8de2..434af8a9 100644 --- a/import/chips/p9/procedures/ppe_closed/cme/pk_app_cfg.h +++ b/import/chips/p9/procedures/ppe_closed/cme/pk_app_cfg.h @@ -34,13 +34,11 @@ /// \brief Application specific overrides go here. /// -// Debug only enablement +// Debug Switches -#define TEST_ONLY_BCE_IRR 0 - -// Function disablement - -#define DISABLE_CME_DUAL_CAST 0 +#define TEST_ONLY_BCE_IRR 0 +#define DISABLE_CME_DUAL_CAST 0 +#define DISABLE_CORE_XSTOP_INJECTION 0 // -------------------- diff --git a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop.h b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop.h index 2a4b96b4..53ebc373 100644 --- a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop.h +++ b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop.h @@ -112,6 +112,7 @@ #define C_CLOCK_STAT_ARY 0x2003000a #define C_BIST 0x2003000B #define C_XFIR 0x20040000 +#define C_LFIR_OR 0x2004000C #define C_THERM_MODE_REG 0x2005000F #define C_SLAVE_CONFIG_REG 0x200F001E @@ -142,6 +143,18 @@ #define PERV_OPCG_CAPT2 0x20030012 #define PERV_CPLT_STAT0 0x20000100 +#define CME_STOP_CORE_ERROR_HANDLER(core, core_error, panic_code) \ + core &= ~core_error; \ + G_cme_stop_record.core_running |= core_error; \ + G_cme_stop_record.core_errored |= core_error; \ + G_cme_stop_record.error_code[core_error & 1] = panic_code; \ + /*set the WKUP_FAIL_STATUS breadcrumbs*/ \ + out32(CME_LCL_SICR_OR, core_error << SHIFT32(15)); \ + /*this pulses the FIR trigger using CME Local Debug register \ + to optionally set a recoverable or xstop on error*/ \ + out32(CME_LCL_DBG_OR, BIT32(16)); \ + out32(CME_LCL_DBG_CLR, BIT32(16)); \ + //PK_PANIC(panic_code); // enable if desire halt on error enum CME_IRQ_VECTORS @@ -236,6 +249,11 @@ typedef struct uint32_t core_blockey; // core in special wakeup, can be used as core select in scom address or data uint32_t core_in_spwu; + // core in error state, prevent it being further processed + uint32_t core_errored; + // store panic code indicating where and what that certain core encountered error + // mostly from various xstop detection or failed clock operation through stages of code + uint32_t error_code[2]; #if !defined(__IOTA__) PkSemaphore sem[2]; #endif diff --git a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_entry.c b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_entry.c index 735d6f7d..8362fcbc 100755 --- a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_entry.c +++ b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_entry.c @@ -210,6 +210,8 @@ void p9_cme_pcbmux_savior_epilogue(uint32_t core) #endif + + void p9_cme_stop_entry() { @@ -730,28 +732,25 @@ p9_cme_stop_entry() #if NIMBUS_DD_LEVEL == 10 // NDD1: Core Global Xstop FIR - if (core & CME_MASK_C0) + for (core_mask = 2; core_mask > 0; core_mask--) { - CME_GETSCOM(0x20040000, CME_MASK_C0, scom_data.value); - - if (scom_data.value) + if (core & core_mask) { - PK_TRACE_ERR("ERROR: Core[%d] GLOBAL XSTOP[%x] DETECTED. HALT CME!", - core, scom_data.words.upper); - PK_PANIC(CME_STOP_ENTRY_XSTOP_ERROR); - } - } + CME_GETSCOM(0x20040000, core_mask, scom_data.value); - if (core & CME_MASK_C1) - { - CME_GETSCOM(0x20040000, CME_MASK_C1, scom_data.value); + if (scom_data.value) + { + PK_TRACE_ERR("ERROR: Core[%d] GLOBAL XSTOP[%x] DETECTED. Gard Core!", + core_mask, scom_data.words.upper); + CME_STOP_CORE_ERROR_HANDLER(core, core_mask, CME_STOP_ENTRY_XSTOP_ERROR) + } - if (scom_data.value) - { - PK_TRACE_ERR("ERROR: Core[%d] GLOBAL XSTOP[%x] DETECTED. HALT CME!", - core, scom_data.words.upper); - PK_PANIC(CME_STOP_ENTRY_XSTOP_ERROR); + if (!core) + { + return; + } } + } #endif @@ -769,12 +768,24 @@ p9_cme_stop_entry() while(!(scom_data.words.upper & BIT32(8))); PK_TRACE("Check core clock is stopped via CLOCK_STAT_SL[4-13]"); - CME_GETSCOM_AND(C_CLOCK_STAT_SL, core, scom_data.value); - if (((~scom_data.value) & CLK_REGION_ALL_BUT_PLL) != 0) + for (core_mask = 2; core_mask > 0; core_mask--) { - PK_TRACE_ERR("ERROR: Core Clock Stop Failed. HALT CME!"); - PK_PANIC(CME_STOP_ENTRY_STOPCLK_FAILED); + if (core & core_mask) + { + CME_GETSCOM(C_CLOCK_STAT_SL, core_mask, scom_data.value); + + if (((~scom_data.value) & CLK_REGION_ALL_BUT_PLL) != 0) + { + PK_TRACE_ERR("ERROR: Core[%d] Clock Stop Failed. Gard Core!", core_mask); + CME_STOP_CORE_ERROR_HANDLER(core, core_mask, CME_STOP_ENTRY_STOPCLK_FAILED); + + if (!core) + { + return; + } + } + } } PK_TRACE_INF("SE.2C: Core Clock Stopped"); diff --git a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_exit.c b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_exit.c index 17dbb0d9..da6a8cba 100644 --- a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_exit.c +++ b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_exit.c @@ -53,6 +53,8 @@ uint8_t G_dsl[MAX_CORES_PER_CME][MAX_THREADS_PER_CORE] = {{0, 0, 0, 0}, {0, 0, 0 #endif + + static void p9_cme_stop_exit_end(uint32_t core, uint32_t spwu_stop) { @@ -69,11 +71,9 @@ p9_cme_stop_exit_end(uint32_t core, uint32_t spwu_stop) #endif //-------------------------------------------------------------------------- - PK_TRACE("+++++ +++++ END OF STOP EXIT +++++ +++++"); PK_TRACE_PERF("+++++ +++++ END OF STOP EXIT +++++ +++++"); //-------------------------------------------------------------------------- - PK_TRACE_DBG("Restore PSSCR.PLS+SRR1 back to actual level"); PK_TRACE_PERF("Restore PSSCR.PLS+SRR1 back to actual level"); #if HW386841_NDD1_DSL_STOP1_FIX @@ -231,11 +231,9 @@ p9_cme_stop_exit_end(uint32_t core, uint32_t spwu_stop) // make sure all direct control scom are completed before wake core up sync(); - PK_TRACE_INF("SX.0A: Core[%d] Waking up(pm_exit=1) via SICR[4/5]", core); PK_TRACE_PERF("SX.0A: Core Waking up(pm_exit=1) via SICR[4/5]"); out32(CME_LCL_SICR_OR, core << SHIFT32(5)); - PK_TRACE_DBG("Polling for Core Waking up(pm_active=0) via EINR[20/21]"); PK_TRACE_PERF("Polling for Core Waking up(pm_active=0) via EINR[20/21]"); while((in32(CME_LCL_EINR)) & (core << SHIFT32(21))); @@ -247,7 +245,6 @@ p9_cme_stop_exit_end(uint32_t core, uint32_t spwu_stop) #endif - PK_TRACE("Release PCB Mux back on Core via SICR[10/11]"); PK_TRACE_PERF("Release PCB Mux back on Core via SICR[10/11]"); out32(CME_LCL_SICR_CLR, core << SHIFT32(11)); @@ -281,7 +278,6 @@ p9_cme_stop_exit_end(uint32_t core, uint32_t spwu_stop) PK_TRACE_DBG("Drop auto spwu disable, enable auto spwu via LMCR[12/13]"); out32(CME_LCL_LMCR_CLR, core << SHIFT32(13)); - PK_TRACE_INF("SX.0B: Core Drop PM_EXIT via SICR[4/5]"); PK_TRACE_PERF("SX.0B: Core Drop PM_EXIT via SICR[4/5]"); out32(CME_LCL_SICR_CLR, core << SHIFT32(5)); @@ -301,7 +297,6 @@ p9_cme_stop_exit_end(uint32_t core, uint32_t spwu_stop) if ((core = (core & (~spwu_stop)))) { - PK_TRACE_INF("SX.0C: Core[%d] isnt SPWUed, Drop PM_EXIT via SICR[4/5]", core); PK_TRACE_PERF("SX.0C: Core isnt SPWUed, Drop PM_EXIT via SICR[4/5]"); out32(CME_LCL_SICR_CLR, core << SHIFT32(5)); } @@ -311,11 +306,11 @@ p9_cme_stop_exit_end(uint32_t core, uint32_t spwu_stop) } + static void p9_cme_stop_exit_lv2(uint32_t core) { //-------------------------------------------------------------------------- - PK_TRACE("+++++ +++++ STOP LEVEL 2 EXIT +++++ +++++"); PK_TRACE_PERF("+++++ +++++ STOP LEVEL 2 EXIT +++++ +++++"); //-------------------------------------------------------------------------- @@ -353,10 +348,16 @@ p9_cme_stop_exit_lv2(uint32_t core) #endif - PK_TRACE_INF("SX.2A: Core[%d] Start Clock", core); PK_TRACE_PERF("SX.2A: Core Start Clock"); p9_hcd_core_startclocks(core); + core &= ~G_cme_stop_record.core_errored; + + if (!core) + { + return; + } + PK_TRACE("Clear CPPM PECE shadow via PECES"); CME_PUTSCOM(CPPM_PECES, core, 0); } @@ -365,14 +366,14 @@ p9_cme_stop_exit_lv2(uint32_t core) #if !SKIP_EXIT_CATCHUP -static int +static uint32_t p9_cme_stop_exit_catchup(uint32_t* core, uint32_t* deeper_core, uint32_t* spwu_stop, uint8_t* target_level, int* d2u4_flag) { - int rc = 0; + uint32_t rc = 0; uint8_t catchup_level = 0; uint32_t core_mask = 0; uint32_t core_catchup = 0; @@ -436,6 +437,8 @@ p9_cme_stop_exit_catchup(uint32_t* core, #endif + + void p9_cme_stop_exit() { @@ -448,6 +451,8 @@ p9_cme_stop_exit() uint32_t wakeup = 0; uint32_t core = 0; uint32_t core_mask = 0; + uint32_t core_spattn = 0; + uint64_t spattn_mask = 0; data64_t scom_data = {0}; #if !SPWU_AUTO uint32_t spwu_stop = 0; @@ -459,7 +464,6 @@ p9_cme_stop_exit() cmeHeader_t* pCmeImgHdr = (cmeHeader_t*)(CME_SRAM_HEADER_ADDR); //-------------------------------------------------------------------------- - PK_TRACE("+++++ +++++ BEGIN OF STOP EXIT +++++ +++++"); PK_TRACE_PERF("+++++ +++++ BEGIN OF STOP EXIT +++++ +++++"); //-------------------------------------------------------------------------- @@ -627,6 +631,15 @@ p9_cme_stop_exit() p9_cme_stop_exit_lv2(core); + // remove errored cores from the list to be porcessed + // only continue if a core is left that does not have an error + core &= ~G_cme_stop_record.core_errored; + + if (!core) + { + return; + } + PK_TRACE("Drop chiplet fence via NC0INDIR[18]"); CME_PUTSCOM(CPPM_NC0INDIR_CLR, core, BIT64(18)); @@ -649,7 +662,6 @@ p9_cme_stop_exit() { //-------------------------------------------------------------------------- - PK_TRACE("+++++ +++++ STOP LEVEL 4 EXIT +++++ +++++"); PK_TRACE_PERF("+++++ +++++ STOP LEVEL 4 EXIT +++++ +++++"); //-------------------------------------------------------------------------- @@ -692,7 +704,6 @@ p9_cme_stop_exit() // Poll Infinitely for PCB Mux Grant while((core & (in32(CME_LCL_SISR) >> SHIFT32(11))) != core); - PK_TRACE("SX.40: PCB Mux Granted on Core[%d]", core); PK_TRACE_PERF("SX.40: PCB Mux Granted on Core"); // Note: in this case, no need to call p9_cme_pcbmux_savior_epilogue @@ -708,7 +719,6 @@ p9_cme_stop_exit() MARK_TAG(SX_POWERON, core) //======================== - PK_TRACE_INF("SX.4A: Core[%d] Poweron", core); PK_TRACE_PERF("SX.4A: Core Poweron"); p9_hcd_core_poweron(core); @@ -716,7 +726,6 @@ p9_cme_stop_exit() MARK_TRAP(SX_CHIPLET_RESET) //========================= - PK_TRACE_INF("SX.4B: Core Chiplet Reset"); PK_TRACE_PERF("SX.4B: Core Chiplet Reset"); p9_hcd_core_chiplet_reset(core); @@ -753,19 +762,16 @@ p9_cme_stop_exit() #if !SKIP_INITF - PK_TRACE_INF("SX.4C: Core[%d] Gptr/Time Initf after catchup A", core); PK_TRACE_PERF("SX.4C: Core Gptr/Time Initf after catchup A"); p9_hcd_core_gptr_time_initf(core); #endif - PK_TRACE_DBG("Core Chiplet Inits"); PK_TRACE_PERF("Core Chiplet Inits"); p9_hcd_core_chiplet_init(core); #if !SKIP_INITF - PK_TRACE_INF("SX.4D: Core Repair Initf"); PK_TRACE_PERF("SX.4D: Core Repair Initf"); p9_hcd_core_repair_initf(core); @@ -802,7 +808,6 @@ p9_cme_stop_exit() #if !SKIP_ARRAYINIT - PK_TRACE_INF("SX.4E: Core[%d] Array Init after catchup B", core); PK_TRACE_PERF("SX.4E: Core Array Init after catchup B"); p9_hcd_core_arrayinit(core); @@ -814,7 +819,6 @@ p9_cme_stop_exit() #if !SKIP_INITF - PK_TRACE_INF("SX.4F: Core Func Scan"); PK_TRACE_PERF("SX.4F: Core Func Scan"); p9_hcd_core_initf(core); @@ -848,6 +852,13 @@ p9_cme_stop_exit() p9_cme_stop_exit_lv2(core); + core &= ~G_cme_stop_record.core_errored; + + if (!core) + { + return; + } + if (target_level < STOP_LEVEL_4) { if (deeper_core && deeper_level == STOP_LEVEL_4) @@ -867,7 +878,6 @@ p9_cme_stop_exit() { //-------------------------------------------------------------------------- - PK_TRACE("+++++ +++++ STOP LEVEL 4 EXIT CONTINUE +++++ +++++"); PK_TRACE_PERF("+++++ +++++ STOP LEVEL 4 EXIT CONTINUE +++++ +++++"); //-------------------------------------------------------------------------- @@ -901,20 +911,24 @@ p9_cme_stop_exit() #endif - PK_TRACE("Core XIP Customized Scoms"); PK_TRACE_PERF("Core XIP Customized Scoms"); p9_hcd_core_scomcust(core); + core &= ~G_cme_stop_record.core_errored; + + if (!core) + { + return; + } + //============================== MARK_TAG(SX_RUNTIME_INITS, core) //============================== - PK_TRACE("RAS Runtime Scom on Core", core); - PK_TRACE_PERF("RAS Runtime Scom on Core", core); + PK_TRACE_PERF("RAS Runtime Scom on Core"); p9_hcd_core_ras_runtime_scom(core); - PK_TRACE("OCC Runtime Scom on Core", core); - PK_TRACE_PERF("OCC Runtime Scom on Core", core); + PK_TRACE_PERF("OCC Runtime Scom on Core"); p9_hcd_core_occ_runtime_scom(core); #endif @@ -928,7 +942,6 @@ p9_cme_stop_exit() PK_TRACE("Assert block interrupt to PC via SICR[2/3]"); out32(CME_LCL_SICR_OR, core << SHIFT32(3)); - PK_TRACE_INF("SF.RS: Self Restore Prepare, Core Waking up(pm_exit=1) via SICR[4/5]"); PK_TRACE_PERF("SF.RS: Self Restore Prepare, Core Waking up(pm_exit=1) via SICR[4/5]"); out32(CME_LCL_SICR_OR, core << SHIFT32(5)); @@ -1008,7 +1021,6 @@ p9_cme_stop_exit() #else - PK_TRACE_DBG("Core Wakes Up, Write HRMOR with HOMER address %x", scom_data.value); PK_TRACE_PERF("Core Wakes Up, Write HRMOR with HOMER address"); CME_PUTSCOM(HRMOR, core, scom_data.value); @@ -1017,10 +1029,29 @@ p9_cme_stop_exit() #endif PK_TRACE("Save off and mask SPATTN before self-restore"); - CME_GETSCOM(SPATTN_MASK, core, scom_data.value); + CME_GETSCOM(SPATTN_MASK, core, spattn_mask); CME_PUTSCOM(SPATTN_MASK, core, BITS64(0, 64)); - PK_TRACE_INF("SF.RS: Self Restore Kickoff, S-Reset All Core Threads"); +#if !DISABLE_CORE_XSTOP_INJECTION + + PK_TRACE("Read WKUP_ERR_INJECT_MODE via CPMMR[8]"); + + for (core_mask = 2; core_mask; core_mask--) + { + if (core & core_mask) + { + CME_GETSCOM(CPPM_CPMMR, core_mask, scom_data.value); + + if (scom_data.words.upper & BIT32(8)) + { + PK_TRACE_INF("Injecting a core[%d] xstop via C_LFIR[11]", core); + CME_PUTSCOM(C_LFIR_OR, core_mask, BIT64(11)); + } + } + } + +#endif + PK_TRACE_PERF("SF.RS: Self Restore Kickoff, S-Reset All Core Threads"); // Disable interrupts around the sreset to polling check to not miss the self-restore @@ -1046,18 +1077,24 @@ p9_cme_stop_exit() while((~(in32(CME_LCL_EINR))) & (core << SHIFT32(21))) { - if (in32_sh(CME_LCL_SISR) & (core << SHIFT64SH(33))) + core_spattn = (in32_sh(CME_LCL_SISR) >> SHIFT64SH(33)) & CME_MASK_BC; + + if (core_spattn) + { + PK_TRACE_ERR("ERROR: Core[%d] Special Attention Detected. Gard Core!", core_spattn); + CME_STOP_CORE_ERROR_HANDLER(core, core_spattn, CME_STOP_EXIT_SELF_RES_SPATTN); + } + + if (!core) { - PK_TRACE_ERR("ERROR: Core Special Attention Detected. HALT CME!"); - PK_PANIC(CME_STOP_EXIT_SELF_RES_SPATTN); + return; } } - PK_TRACE_INF("SF.RS: Self Restore Completed, Core Stopped Again(pm_exit=0/pm_active=1)"); PK_TRACE_PERF("SF.RS: Self Restore Completed, Core Stopped Again(pm_exit=0/pm_active=1)"); PK_TRACE("Restore SPATTN after self-restore"); - CME_PUTSCOM(SPATTN_MASK, core, scom_data.value); + CME_PUTSCOM(SPATTN_MASK, core, spattn_mask); PK_TRACE("Always Unfreeze IMA (by clearing bit 34) in case the CHTM is enabled to sample it"); CME_GETSCOM(IMA_EVENT_MASK, core, scom_data.value); diff --git a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_threads.c b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_threads.c index 530d1593..d2bb33ef 100644 --- a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_threads.c +++ b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_cme_stop_threads.c @@ -39,18 +39,23 @@ p9_cme_stop_eval_eimr_override() mask_irqs.words.lower = 0; mask_irqs.words.upper = - ((((~G_cme_record.core_enabled) | - G_cme_stop_record.core_running | - G_cme_stop_record.core_blockpc | + ((((~G_cme_record.core_enabled) | + G_cme_stop_record.core_running | + G_cme_stop_record.core_errored | + G_cme_stop_record.core_blockpc | G_cme_stop_record.core_blockwu) & CME_MASK_BC) << SHIFT32(13)) | #if SPWU_AUTO - ((((~G_cme_record.core_enabled) | - G_cme_stop_record.core_running) & CME_MASK_BC) << SHIFT32(15)) | + ((((~G_cme_record.core_enabled) | + G_cme_stop_record.core_running | + G_cme_stop_record.core_errored) & CME_MASK_BC) << SHIFT32(15)) | #endif - ((((~G_cme_record.core_enabled) | - G_cme_stop_record.core_running) & CME_MASK_BC) << SHIFT32(17)) | - (((~(G_cme_record.core_enabled & - G_cme_stop_record.core_running) & + ((((~G_cme_record.core_enabled) | + G_cme_stop_record.core_running | + G_cme_stop_record.core_errored | + G_cme_stop_record.core_blockwu) & CME_MASK_BC) << SHIFT32(17)) | + ((((~G_cme_record.core_enabled) | + (~G_cme_stop_record.core_running) | + G_cme_stop_record.core_errored | G_cme_stop_record.core_blockey) & CME_MASK_BC) << SHIFT32(21)); g_eimr_override |= mask_irqs.value; diff --git a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_hcd_core_scomcust.c b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_hcd_core_scomcust.c index 3f0b92ee..66ba9f6f 100644 --- a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_hcd_core_scomcust.c +++ b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_hcd_core_scomcust.c @@ -25,6 +25,8 @@ #include "p9_cme_stop_exit_marks.h" +extern CmeStopRecord G_cme_stop_record; + inline __attribute__((always_inline)) void p9_hcd_core_scomcust(uint32_t core) @@ -75,7 +77,7 @@ p9_hcd_core_scomcust(uint32_t core) { PK_TRACE_ERR("Core[%d] Chiplet Global Xstop FIR[%x] Detected After Scom Restore. HALT CME!", core_mask, scom_data.words.upper); - PK_PANIC(CME_STOP_EXIT_SCOM_RES_XSTOP_ERROR); + CME_STOP_CORE_ERROR_HANDLER(core, core_mask, CME_STOP_EXIT_SCOM_RES_XSTOP_ERROR); } } } diff --git a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_hcd_core_startclocks.c b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_hcd_core_startclocks.c index ed14c782..ad408c72 100644 --- a/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_hcd_core_startclocks.c +++ b/import/chips/p9/procedures/ppe_closed/cme/stop_cme/p9_hcd_core_startclocks.c @@ -25,6 +25,8 @@ #include "p9_cme_stop_exit_marks.h" +extern CmeStopRecord G_cme_stop_record; + inline __attribute__((always_inline)) void p9_hcd_core_startclocks(uint32_t core) @@ -152,12 +154,19 @@ p9_hcd_core_startclocks(uint32_t core) while((~(scom_data.words.upper)) & BIT32(8)); PK_TRACE("Check core clock is running via CLOCK_STAT_SL[4-13]"); - CME_GETSCOM_OR(C_CLOCK_STAT_SL, core, scom_data.value); - if(scom_data.value & CLK_REGION_ALL_BUT_PLL) + for (core_mask = 2; core_mask > 0; core_mask--) { - PK_TRACE_ERR("ERROR: Core Clock Start Failed. HALT CME!"); - PK_PANIC(CME_STOP_EXIT_STARTCLK_FAILED); + if (core & core_mask) + { + CME_GETSCOM(C_CLOCK_STAT_SL, core_mask, scom_data.value); + + if(scom_data.value & CLK_REGION_ALL_BUT_PLL) + { + PK_TRACE_ERR("ERROR: Core[%d] Clock Start Failed. Gard Core!", core_mask); + CME_STOP_CORE_ERROR_HANDLER(core, core_mask, CME_STOP_EXIT_STARTCLK_FAILED); + } + } } PK_TRACE("Core clock is now running"); @@ -171,18 +180,24 @@ p9_hcd_core_startclocks(uint32_t core) #if !EPM_P9_TUNING - for(core_mask = 2; core_mask; core_mask--) + PK_TRACE("Check Global Xstop FIR of Core Chiplet"); + + for (core_mask = 2; core_mask > 0; core_mask--) { if (core & core_mask) { - PK_TRACE("Check Global Xstop FIR of Core Chiplet"); CME_GETSCOM(C_XFIR, core_mask, scom_data.value); if (scom_data.words.upper & BITS32(0, 27)) { - PK_TRACE_ERR("Core[%d] Chiplet Global Xstop FIR[%x] Detected. HALT CME!", + PK_TRACE_ERR("Core[%d] Chiplet Global Xstop FIR[%x] Detected. Gard Core!", core_mask, scom_data.words.upper); - PK_PANIC(CME_STOP_EXIT_STARTCLK_XSTOP_ERROR); + CME_STOP_CORE_ERROR_HANDLER(core, core_mask, CME_STOP_EXIT_STARTCLK_XSTOP_ERROR); + + if (!core) + { + return; + } } } } diff --git a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_dpll_setup.c b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_dpll_setup.c index 8427a9ed..095e7a9a 100644 --- a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_dpll_setup.c +++ b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_dpll_setup.c @@ -71,7 +71,8 @@ p9_hcd_cache_dpll_setup(uint32_t quad) if (scom_data & BIT64(14)) { PK_TRACE_ERR("ERROR: Start DPLL Clock Failed. HALT SGPE!"); - PK_PANIC(SGPE_STOP_EXIT_DPLL_STARTCLK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(quad, SGPE_STOP_EXIT_DPLL_STARTCLK_FAILED); + return; } PK_TRACE("DPLL clock is now running"); diff --git a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_l2_startclocks.c b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_l2_startclocks.c index dad1fc37..ddaafd31 100644 --- a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_l2_startclocks.c +++ b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_l2_startclocks.c @@ -107,7 +107,8 @@ p9_hcd_cache_l2_startclocks(uint32_t quad, uint32_t ex) if (scom_data & ((uint64_t)ex << SHIFT64(9))) { PK_TRACE_ERR("ERROR: L2 Clock Start Failed. HALT SGPE!"); - PK_PANIC(SGPE_STOP_EXIT_L2_STARTCLK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(quad, SGPE_STOP_EXIT_L2_STARTCLK_FAILED); + return; } PK_TRACE("L2 clock is now running"); diff --git a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_scomcust.c b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_scomcust.c index 9a2d42dc..6c756d1a 100644 --- a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_scomcust.c +++ b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_scomcust.c @@ -143,7 +143,8 @@ p9_hcd_cache_scomcust(uint32_t quad, uint32_t m_ex, int is_stop8) { PK_TRACE_ERR("Cache[%d] Chiplet Global Xstop FIR[%x] Detected After Scom Restore. HALT SGPE!", quad, scom_data.words.upper); - PK_PANIC(SGPE_STOP_EXIT_SCOM_RES_XSTOP_ERROR); + SGPE_STOP_QUAD_ERROR_HANDLER(quad, SGPE_STOP_EXIT_SCOM_RES_XSTOP_ERROR); + return; } #endif diff --git a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_startclocks.c b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_startclocks.c index 818a8961..4b2342bb 100644 --- a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_startclocks.c +++ b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_hcd_cache_startclocks.c @@ -164,7 +164,8 @@ p9_hcd_cache_startclocks(uint32_t quad) ((uint64_t)G_sgpe_stop_record.group.expg[quad] << SHIFT64(13)))) { PK_TRACE_ERR("ERROR: Cache Clock Start Failed. HALT SGPE!"); - PK_PANIC(SGPE_STOP_EXIT_EQ_STARTCLK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(quad, SGPE_STOP_EXIT_EQ_STARTCLK_FAILED); + return; } PK_TRACE("Cache clocks running now"); @@ -182,7 +183,8 @@ p9_hcd_cache_startclocks(uint32_t quad) { PK_TRACE_ERR("Cache[%d] Chiplet Global Xstop FIR[%x] Detected After Start Clock. HALT SGPE!", quad, scom_data.words.upper); - PK_PANIC(SGPE_STOP_EXIT_STARTCLK_XSTOP_ERROR); + SGPE_STOP_QUAD_ERROR_HANDLER(quad, SGPE_STOP_EXIT_STARTCLK_XSTOP_ERROR); + return; } #endif diff --git a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop.h b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop.h index 46e30f56..d9edf4a6 100644 --- a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop.h +++ b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop.h @@ -175,6 +175,12 @@ extern "C" { #define PERV_NET_CTRL1_WAND 0x000F0045 +#define SGPE_STOP_QUAD_ERROR_HANDLER(quad_error, panic_code) \ + G_sgpe_stop_record.group.quad[VECTOR_ERROR] |= BIT32(quad_error); \ + G_sgpe_stop_record.group.quad[VECTOR_CONFIG] &= ~BIT32(quad_error); \ + G_sgpe_stop_record.group.quad[VECTOR_ACTIVE] &= ~BIT32(quad_error); \ + G_sgpe_stop_record.state[quad_error].error_code = panic_code; \ + //PK_PANIC(panic_code); //enable if desire halt on error enum SGPE_STOP_STATE_HISTORY_VECTORS { @@ -228,25 +234,27 @@ enum SGPE_STOP_VECTOR_INDEX VECTOR_EXIT = 3, //(core, quad_ipc qswu) VECTOR_ACTIVE = 4, //(core_ipc, quad_ipc, qswu_active) VECTOR_CONFIG = 5, //(core, quad) - VECTOR_RCLKE = 6, //(core_blocke, quad) - VECTOR_RCLKX = 7, //(core_blockx, quad) - VECTOR_PIGE = 8, //(core) - VECTOR_PIGX = 9, //(core) - VECTOR_PCWU = 10,//(core) + VECTOR_ERROR = 6, //( quad) + VECTOR_RCLKE = 7, //(core_blocke, quad) + VECTOR_RCLKX = 8, //(core_blockx, quad) + VECTOR_PIGE = 9, //(core) + VECTOR_PIGX = 10,//(core) + VECTOR_PCWU = 11 //(core) }; typedef struct { // requested stop state calculated from core stop levels - uint8_t req_state_x0; - uint8_t req_state_x1; - uint8_t req_state_q; + uint8_t req_state_x0; + uint8_t req_state_x1; + uint8_t req_state_q; // actual stop state - uint8_t act_state_x0; - uint8_t act_state_x1; - uint8_t act_state_q; + uint8_t act_state_x0; + uint8_t act_state_x1; + uint8_t act_state_q; // both cme_flags: first(0:3) | enable(4:7) - uint8_t cme_flags; + uint8_t cme_flags; + uint32_t error_code; } sgpe_state_t; typedef struct @@ -256,8 +264,8 @@ typedef struct uint32_t qex0[2]; // 6 bits uint32_t qex1[2]; // 6 bits uint32_t qswu[5]; // 6 bits - uint32_t quad[8]; // 6 bits - uint32_t core[11];// 24 bits + uint32_t quad[9]; // 6 bits + uint32_t core[12];// 24 bits } sgpe_group_t; typedef struct @@ -282,7 +290,7 @@ typedef struct sgpe_group_t group; sgpe_wof_t wof; PkSemaphore sem[2]; -} SgpeStopRecord; +} SgpeStopRecord __attribute__ ((aligned (8))); typedef struct { diff --git a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop_entry.c b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop_entry.c index e59abffd..e1a2f4a1 100644 --- a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop_entry.c +++ b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop_entry.c @@ -318,7 +318,8 @@ p9_sgpe_stop_entry() { PK_TRACE_ERR("ERROR: Failed to Obtain Cache %d Clk Ctrl Atomic Lock. Register Content: %x", qloop, scom_data.words.upper); - PK_PANIC(SGPE_STOP_ENTRY_GET_CLK_LOCK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_ENTRY_GET_CLK_LOCK_FAILED); + continue; } PK_TRACE("Update QSSR: stop_entry_ongoing"); @@ -389,7 +390,8 @@ p9_sgpe_stop_entry() if (((~(scom_data.words.upper)) & (ex << SHIFT32(9))) != 0) { PK_TRACE_ERR("ERROR: L2 clock stop failed. HALT SGPE!"); - PK_PANIC(SGPE_STOP_ENTRY_L2_STOPCLK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_ENTRY_L2_STOPCLK_FAILED); + continue; } // MF: verify compiler generate single rlwmni @@ -482,7 +484,8 @@ p9_sgpe_stop_entry() { PK_TRACE_ERR("ERROR: Failed to Release Cache %d Clk Ctrl Atomic Lock. Register Content: %x", qloop, scom_data.words.upper); - PK_PANIC(SGPE_STOP_ENTRY_DROP_CLK_LOCK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_ENTRY_DROP_CLK_LOCK_FAILED); + continue; } //================================================== @@ -518,7 +521,8 @@ p9_sgpe_stop_entry() { PK_TRACE_ERR("ERROR: Failed to Obtain Cache %d PCB Slave Atomic Lock. Register Content: %x", qloop, scom_data.words.upper); - PK_PANIC(SGPE_STOP_ENTRY_GET_SLV_LOCK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_ENTRY_GET_SLV_LOCK_FAILED); + continue; } PK_TRACE("Update QSSR: stop_entry_ongoing"); @@ -986,7 +990,8 @@ p9_sgpe_stop_entry() if (((~scom_data.value) & CLK_REGION_ALL) != 0) { PK_TRACE_ERR("ERROR: Cache clock stop failed. HALT SGPE!"); - PK_PANIC(SGPE_STOP_ENTRY_EQ_STOPCLK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_ENTRY_EQ_STOPCLK_FAILED); + continue; } PK_TRACE("Assert vital fence via CPLT_CTRL1[3]"); @@ -1174,7 +1179,8 @@ p9_sgpe_stop_entry() { PK_TRACE_ERR("ERROR: Failed to Release Cache %d PCB Slave Atomic Lock. Register Content: %x", qloop, scom_data.words.upper); - PK_PANIC(SGPE_STOP_ENTRY_DROP_SLV_LOCK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_ENTRY_DROP_SLV_LOCK_FAILED); + continue; } for(cloop = 0; cloop < CORES_PER_QUAD; cloop++) diff --git a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop_exit.c b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop_exit.c index b43f5b00..8219492a 100644 --- a/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop_exit.c +++ b/import/chips/p9/procedures/ppe_closed/sgpe/stop_gpe/p9_sgpe_stop_exit.c @@ -148,7 +148,8 @@ p9_sgpe_stop_exit_lv8(uint32_t qloop) { PK_TRACE_ERR("ERROR: Failed to Obtain Cache %d Clk Ctrl Atomic Lock. Register Content: %x", qloop, scom_data.words.upper); - PK_PANIC(SGPE_STOP_EXIT_GET_CLK_LOCK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_EXIT_GET_CLK_LOCK_FAILED); + return; } // do this again here for stop8 in addition to dpll_setup @@ -187,7 +188,8 @@ p9_sgpe_stop_exit_lv8(uint32_t qloop) { PK_TRACE_ERR("ERROR: Failed to Release Cache %d Clk Ctrl Atomic Lock. Register Content: %x", qloop, scom_data.words.upper); - PK_PANIC(SGPE_STOP_EXIT_DROP_CLK_LOCK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_EXIT_DROP_CLK_LOCK_FAILED); + return; } PK_TRACE("Update QSSR: drop l2_stopped"); @@ -453,6 +455,11 @@ p9_sgpe_stop_exit() if (G_sgpe_stop_record.group.ex01[qloop]) { p9_sgpe_stop_exit_lv8(qloop); + + if (G_sgpe_stop_record.group.quad[VECTOR_ERROR] & BIT32(qloop)) + { + continue; + } } p9_sgpe_stop_exit_end(qloop); @@ -481,7 +488,8 @@ p9_sgpe_stop_exit() { PK_TRACE_ERR("ERROR: Failed to Obtain Cache %d PCB Slave Atomic Lock. Register Content: %x", qloop, scom_data.words.upper); - PK_PANIC(SGPE_STOP_EXIT_GET_SLV_LOCK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_EXIT_GET_SLV_LOCK_FAILED); + continue; } PK_TRACE("Update STOP history on quad[%d]: in transition of exit", @@ -564,6 +572,11 @@ p9_sgpe_stop_exit() PK_TRACE_INF("SX.11D: Cache Dpll Setup"); p9_hcd_cache_dpll_setup(qloop); + if (G_sgpe_stop_record.group.quad[VECTOR_ERROR] & BIT32(qloop)) + { + continue; + } + #if !SKIP_INITF PK_TRACE_DBG("Cache DCC Skewadjust Setup"); @@ -732,9 +745,18 @@ p9_sgpe_stop_exit() PK_TRACE_INF("SX.11H: Cache Startclocks"); p9_hcd_cache_startclocks(qloop); + if (G_sgpe_stop_record.group.quad[VECTOR_ERROR] & BIT32(qloop)) + { + continue; + } p9_sgpe_stop_exit_lv8(qloop); + if (G_sgpe_stop_record.group.quad[VECTOR_ERROR] & BIT32(qloop)) + { + continue; + } + //-------------------------------------------------------------------------- PK_TRACE("+++++ +++++ QUAD STOP EXIT CONTINUE +++++ +++++"); //-------------------------------------------------------------------------- @@ -776,6 +798,11 @@ p9_sgpe_stop_exit() PK_TRACE_DBG("Cache Scom Cust"); p9_hcd_cache_scomcust(qloop, G_sgpe_stop_record.group.expg[qloop], 0); + if (G_sgpe_stop_record.group.quad[VECTOR_ERROR] & BIT32(qloop)) + { + continue; + } + //================================== MARK_TAG(SX_CME_BOOT, (32 >> qloop)) //================================== @@ -937,7 +964,8 @@ p9_sgpe_stop_exit() { PK_TRACE_ERR("ERROR: Failed to Release Cache %d PCB Slave Atomic Lock. Register Content: %x", qloop, scom_data.words.upper); - PK_PANIC(SGPE_STOP_EXIT_DROP_SLV_LOCK_FAILED); + SGPE_STOP_QUAD_ERROR_HANDLER(qloop, SGPE_STOP_EXIT_DROP_SLV_LOCK_FAILED); + continue; } PK_TRACE("Update STOP history on quad[%d]: \ |