From ad9bb2d1e41c1f0222c2d23474238bfa3b925c6c Mon Sep 17 00:00:00 2001 From: Chris Cain Date: Tue, 19 Mar 2019 16:04:30 -0500 Subject: Disable 24x7 when EPOW detected to prevent GPE halt OCC Updates: Master OCC will begin processing EPOW (vs waiting to process as a slave) 1. Stop 24x7 when EPOW detected 2. Run NVDIMM procedure Delay system checkstop processing 4 ticks to ensure NVDIMM procedure completes NVDIMM procedure updates: - Disable rcd recovery - Poll to wait for DRAM to reach STR Change-Id: I6834308d13866b49b6b2ad1a661f1f56fb05e939 CQ: SW460185 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/74690 Tested-by: FSP CI Jenkins Reviewed-by: William A. Bryan Reviewed-by: Douglas R. Gilbert Reviewed-by: Martha Broyles --- src/occ_405/amec/amec_sensors_power.c | 120 +++++++++++++++++++++------------- src/occ_405/amec/amec_sensors_power.h | 5 +- src/occ_405/common.c | 29 +++++++- src/occ_405/dcom/dcomMasterTx.c | 45 ++++++++----- src/occ_405/dcom/dcomSlaveRx.c | 11 ++-- src/occ_405/occbuildname.c | 2 +- src/occ_405/proc/proc_data.c | 119 ++++++++++++++++++--------------- 7 files changed, 200 insertions(+), 131 deletions(-) (limited to 'src/occ_405') diff --git a/src/occ_405/amec/amec_sensors_power.c b/src/occ_405/amec/amec_sensors_power.c index 7f1d038..5c0ec54 100755 --- a/src/occ_405/amec/amec_sensors_power.c +++ b/src/occ_405/amec/amec_sensors_power.c @@ -61,6 +61,7 @@ bool G_gpu_config_done = FALSE; GpeRequest G_epow_gpio_detected_req; GPE_BUFFER(epow_gpio_args_t G_epow_gpio_parms); bool G_epow_gpio_scheduled = FALSE; +bool G_epow_triggered = FALSE; // Bitmap of GPUs present @@ -228,7 +229,7 @@ bool amec_update_apss_sensors(void) uint8_t l_idx = 0; // Check GPIO_EPOW. Skip everything if asserted - if (epow_gpio_asserted()) + if (epow_gpio_asserted(FALSE)) { l_sensors_updated = FALSE; break; @@ -1085,11 +1086,12 @@ void amec_update_gpu_configuration(void) // Thread: RealTime Loop // // End Function Specification -bool epow_gpio_asserted() +bool epow_gpio_asserted(const bool i_from_slave_inbox) { bool l_epow_valid = FALSE; uint8_t l_epow_value = 1; - bool l_epow_asserted = FALSE; + static bool L_epow_asserted = FALSE; + static bool L_epow_scheduled = FALSE; // Get the value of GPIO_EPOW and make sure it is valid l_epow_valid = apss_gpio_get(G_sysConfigData.apss_gpio_map.nvdimm_epow, @@ -1105,61 +1107,87 @@ bool epow_gpio_asserted() L_trace = false; } - // Signal is active-low - if (l_epow_valid && !l_epow_value) + if (L_epow_asserted) { - TRAC_IMP("epow_gpio_asserted: GPIO EPOW Detected! Notifying GPE1"); - l_epow_asserted = TRUE; - // GPIO_EPOW was asserted create GpeRequest object to notify GPE1 - int l_rc = gpe_request_create(&G_epow_gpio_detected_req, // Task Request - &G_async_gpe_queue1, // GPE1 queue - IPC_ST_EPOW_GPIO_ASSERT_FUNCID, // Function ID - &G_epow_gpio_parms, // Task Parameters - SSX_WAIT_FOREVER, // No timeout - NULL, // No callback - NULL, // No callback parms - ASYNC_CALLBACK_IMMEDIATE); // Options - if (0 == l_rc) + // EPOW was asserted during the last call + + if (! L_epow_scheduled) { - // Need to send the configured MBA's bit field to GPE1 - G_epow_gpio_parms.configured_mbas = G_configured_mbas; - l_rc = gpe_request_schedule(&G_epow_gpio_detected_req); - if (0 == l_rc) + if ((OCC_MASTER != G_occ_role) || i_from_slave_inbox) { - G_epow_gpio_scheduled = TRUE; + TRAC_IMP("epow_gpio_asserted: GPIO EPOW Detected! Notifying GPE1 (tick=%d)", CURRENT_TICK); + + // GPIO_EPOW was asserted create GpeRequest object to notify GPE1 + int l_rc = gpe_request_create(&G_epow_gpio_detected_req, // Task Request + &G_async_gpe_queue1, // GPE1 queue + IPC_ST_EPOW_GPIO_ASSERT_FUNCID, // Function ID + &G_epow_gpio_parms, // Task Parameters + SSX_WAIT_FOREVER, // No timeout + NULL, // No callback + NULL, // No callback parms + ASYNC_CALLBACK_IMMEDIATE); // Options + if (0 == l_rc) + { + // Need to send the configured MBA's bit field to GPE1 + G_epow_gpio_parms.configured_mbas = G_configured_mbas; + l_rc = gpe_request_schedule(&G_epow_gpio_detected_req); + if (0 == l_rc) + { + G_epow_gpio_scheduled = TRUE; + } + else + { + TRAC_ERR("epow_gpio_asserted: schedule failed w/rc=0x%08X", l_rc); + } + } + else + { + TRAC_ERR("epow_gpio_asserted: Failed to create epow_gpio_detected IPC task (rc=%d)", l_rc); + } + + // Create informational error and request safe mode since system is powering off + /* + * @errortype + * @moduleid AMEC_UPDATE_APSS_SENSORS + * @reasoncode EPOW_ASSERTED + * @userdata1 GPE IPC RC + * @userdata2 Configured MBAs + * @userdata4 OCC_NO_EXTENDED_RC + * @devdesc GPIO_EPOW was asserted + */ + errlHndl_t l_err = createErrl(AMEC_UPDATE_APSS_SENSORS, + EPOW_ASSERTED, + OCC_NO_EXTENDED_RC, + ERRL_SEV_INFORMATIONAL, + NULL, + DEFAULT_TRACE_SIZE, + l_rc, + G_configured_mbas); + REQUEST_SAFE_MODE( l_err ); + + L_epow_scheduled = TRUE; } else { - TRAC_ERR("epow_gpio_asserted: schedule failed w/rc=0x%08X", l_rc); + TRAC_IMP("epow_gpio_asserted: GPIO EPOW Detected! skipping GPE1 notification (role=0x%02X, tick=%d)", + G_occ_role, CURRENT_TICK); } } - else + } + else if (l_epow_valid && !l_epow_value) // Signal is active-low + { + // EPOW has been detected + L_epow_asserted = TRUE; + + // Disable 24x7 to prevent GPE halt due (24x7 main memory access after epow procedure runs) + if ((G_internal_flags & INT_FLAG_DISABLE_24X7) == 0) { - TRAC_ERR("epow_gpio_asserted: Failed to create epow_gpio_detected IPC task (rc=%d)", l_rc); + TRAC_IMP("epow_gpio_asserted: GPIO EPOW Detected! Disabling 24x7"); + G_internal_flags |= INT_FLAG_DISABLE_24X7; } - - // Create informational error and request safe mode since system is powering off - /* - * @errortype - * @moduleid AMEC_UPDATE_APSS_SENSORS - * @reasoncode EPOW_ASSERTED - * @userdata1 GPE IPC RC - * @userdata2 Configured MBAs - * @userdata4 OCC_NO_EXTENDED_RC - * @devdesc GPIO_EPOW was asserted - */ - errlHndl_t l_err = createErrl(AMEC_UPDATE_APSS_SENSORS, - EPOW_ASSERTED, - OCC_NO_EXTENDED_RC, - ERRL_SEV_INFORMATIONAL, - NULL, - DEFAULT_TRACE_SIZE, - l_rc, - G_configured_mbas); - REQUEST_SAFE_MODE( l_err ); } - return l_epow_asserted; + return L_epow_asserted; } /*----------------------------------------------------------------------------*/ /* End */ diff --git a/src/occ_405/amec/amec_sensors_power.h b/src/occ_405/amec/amec_sensors_power.h index 1b52d33..4987175 100755 --- a/src/occ_405/amec/amec_sensors_power.h +++ b/src/occ_405/amec/amec_sensors_power.h @@ -5,7 +5,7 @@ /* */ /* OpenPOWER OnChipController Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2018 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -62,6 +62,7 @@ void amec_update_gpu_configuration(void); // Helper function called when updating the AMEC sensors for GPIO to detect // GPIO_EPOW. If it has been asserted, we send an IPC command to GPE1 to // perform some SCOMs allowing NVDIMMs to back up their data. +// Set i_from_slave_inbox to TRUE when called as a slave // Returns TRUE if EPOW was asserted. -bool epow_gpio_asserted(void); +bool epow_gpio_asserted(const bool i_from_slave_inbox); #endif // _AMEC_SENSORS_POWER_H diff --git a/src/occ_405/common.c b/src/occ_405/common.c index 833dba0..213f732 100755 --- a/src/occ_405/common.c +++ b/src/occ_405/common.c @@ -5,7 +5,7 @@ /* */ /* OpenPOWER OnChipController Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2018 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -33,6 +33,8 @@ uint8_t G_host_notifications_pending = 0; extern bool G_ipl_time; extern uint16_t G_allow_trace_flags; +extern uint8_t G_occ_interrupt_type; +#define TICKS_TO_DELAY_CHECKSTOP_PROCESSING 4 // Function Specification // @@ -56,7 +58,7 @@ void task_misc_405_checks(task_t *i_self) static bool L_checkstop_traced = false; uint8_t l_reason_code = 0; bool l_create_errl = false; - + static unsigned int L_delay_cstop = TICKS_TO_DELAY_CHECKSTOP_PROCESSING; do { @@ -87,7 +89,28 @@ void task_misc_405_checks(task_t *i_self) l_oisr0_status.fields.gpe0_error || // GPE0 Halt l_oisr0_status.fields.gpe1_error) // GPE1 Halt { - l_create_errl = true; + if(l_oisr0_status.fields.check_stop_ppc405) + { + // For FSP systems, delay the system checkstop processing to allow NVDIMM procedure to run + if ((G_occ_interrupt_type == FSP_SUPPORTED_OCC) && (L_delay_cstop > 0)) + { + if (L_delay_cstop == TICKS_TO_DELAY_CHECKSTOP_PROCESSING) + { + TRAC_IMP("task_misc_405_checks: System checkstop detected by RTL: OISR0[0x%08x] - delaying halt (tick=%d)", + l_oisr0_status.value, CURRENT_TICK); + } + --L_delay_cstop; + } + else + { + l_create_errl = true; + } + } + else + { + // GPE0/GPE1 Halt + l_create_errl = true; + } } } diff --git a/src/occ_405/dcom/dcomMasterTx.c b/src/occ_405/dcom/dcomMasterTx.c index 318ac25..01f5ff8 100644 --- a/src/occ_405/dcom/dcomMasterTx.c +++ b/src/occ_405/dcom/dcomMasterTx.c @@ -5,7 +5,7 @@ /* */ /* OpenPOWER OnChipController Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2017 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -38,6 +38,9 @@ #include #include +extern bool G_epow_triggered; +bool epow_gpio_asserted(const bool i_from_slave_inbox); + extern UINT8 g_amec_tb_record; // From amec_amester.c for syncronized traces extern PWR_READING_TYPE G_pwr_reading_type; @@ -114,11 +117,11 @@ uint32_t dcom_build_slv_inbox(void) G_apss_pwr_meas.adc, sizeof( G_dcom_slv_inbox_tx[l_slv_idx].adc)); - memcpy( G_dcom_slv_inbox_tx[l_slv_idx].gpio, + memcpy( G_dcom_slv_inbox_tx[l_slv_idx].gpio, G_apss_pwr_meas.gpio, sizeof( G_dcom_slv_inbox_tx[l_slv_idx].gpio)); - memcpy( G_dcom_slv_inbox_tx[l_slv_idx].tod, + memcpy( G_dcom_slv_inbox_tx[l_slv_idx].tod, &G_apss_pwr_meas.tod, sizeof( G_dcom_slv_inbox_tx[l_slv_idx].tod)); @@ -192,6 +195,15 @@ uint32_t dcom_build_slv_inbox(void) G_dcom_slv_inbox_doorbell_tx.magic_counter++; G_dcom_slv_inbox_doorbell_tx.magic2 = PBAX_MAGIC_NUMBER_32B; + if (IS_OCC_STATE_ACTIVE() && (!isSafeStateRequested())) + { + // If the EPOW is asserted, master should start handling it right away (vs waiting to handle as a slave) + memcpy(G_dcom_slv_inbox_rx.gpio, + G_apss_pwr_meas.gpio, + sizeof( G_dcom_slv_inbox_rx.gpio)); + epow_gpio_asserted(TRUE); + } + return l_addr_of_slv_inbox_in_main_mem; } @@ -232,7 +244,7 @@ uint32_t dcom_which_buffer(void) void task_dcom_tx_slv_inbox( task_t *i_self) { static bool L_error = FALSE; - static uint8_t L_bce_not_ready_count = 0; + static unsigned int L_bce_not_ready_count = 0; uint32_t l_orc = OCC_SUCCESS_REASON_CODE; uint32_t l_orc_ext = OCC_NO_EXTENDED_RC; uint64_t l_start = ssx_timebase_get(); @@ -240,6 +252,7 @@ void task_dcom_tx_slv_inbox( task_t *i_self) bool l_pwr_meas_complete_invalid = FALSE; bool l_request_reset = FALSE; bool l_ssx_failure = FALSE; + // Use a static local bool to track whether the BCE request used // here has ever been successfully created at least once static bool L_bce_slv_inbox_tx_request_created_once = FALSE; @@ -254,7 +267,7 @@ void task_dcom_tx_slv_inbox( task_t *i_self) (G_pwr_reading_type != PWR_READING_TYPE_APSS) || G_apss_recovery_requested ) { - G_ApssPwrMeasCompleted = TRUE; + G_ApssPwrMeasCompleted = TRUE; } l_pwr_meas = G_ApssPwrMeasCompleted; @@ -296,7 +309,7 @@ void task_dcom_tx_slv_inbox( task_t *i_self) else if (l_req_idle && l_req_complete) { // Most likely case first. The request was created - // and scheduled and has completed without error. Proceed. + // and scheduled and has completed without error. // Proceed with request create and schedule. l_proceed_with_request_and_schedule = TRUE; } @@ -334,20 +347,16 @@ void task_dcom_tx_slv_inbox( task_t *i_self) if(L_bce_not_ready_count == DCOM_TRACE_NOT_IDLE_AFTER_CONSEC_TIMES) { - // Trace important information from the request - TRAC_INFO("BCE slv inbox tx request not idle and not complete: callback_rc[%d] options[0x%x] state[0x%x] abort_state[0x%x] completion_state[0x%x]", - G_slv_inbox_tx_pba_request.request.callback_rc, - G_slv_inbox_tx_pba_request.request.options, - G_slv_inbox_tx_pba_request.request.state, - G_slv_inbox_tx_pba_request.request.abort_state, - G_slv_inbox_tx_pba_request.request.completion_state); - TRAC_INFO("NOT proceeding with BCE slv inbox tx request and schedule"); + // Trace important information from the request + TRAC_INFO("BCE slv inbox tx request not idle and not complete: callback_rc[%d] options[0x%x] state[0x%x] abort_state[0x%x] completion_state[0x%x]", + G_slv_inbox_tx_pba_request.request.callback_rc, + G_slv_inbox_tx_pba_request.request.options, + G_slv_inbox_tx_pba_request.request.state, + G_slv_inbox_tx_pba_request.request.abort_state, + G_slv_inbox_tx_pba_request.request.completion_state); + TRAC_INFO("NOT proceeding with BCE slv inbox tx request and schedule"); } } - else - { - // This case is not possible. Ignore it. - } // Only proceed if the BCE request state checked out if (l_proceed_with_request_and_schedule) diff --git a/src/occ_405/dcom/dcomSlaveRx.c b/src/occ_405/dcom/dcomSlaveRx.c index 5af0056..833af47 100644 --- a/src/occ_405/dcom/dcomSlaveRx.c +++ b/src/occ_405/dcom/dcomSlaveRx.c @@ -5,7 +5,7 @@ /* */ /* OpenPOWER OnChipController Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2017 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -481,7 +481,7 @@ void task_dcom_wait_for_master( task_t *i_self) static bool L_Pmax_error_logged = FALSE; static uint32_t L_pobid_retries_left = POBID_RETRIES; static uint16_t L_no_master_doorbell_cnt = 0; - static uint16_t L_trace_every_count = 1; + DCOM_DBG("0. Wait for Master\n"); @@ -507,17 +507,15 @@ void task_dcom_wait_for_master( task_t *i_self) // counter L_no_master_doorbell_cnt++; - if (L_no_master_doorbell_cnt % L_trace_every_count == 0) + if ((L_no_master_doorbell_cnt <= 10) || (L_no_master_doorbell_cnt % 10000 == 0)) { + // Trace first 10 occurances and then every 10,000 TRAC_INFO("task_dcom_wait_for_master: experiencing data collection problems! fail_count=%i", L_no_master_doorbell_cnt); } if (L_no_master_doorbell_cnt == APSS_DATA_FAIL_PMAX_RAIL) { - // Now only trace every 1000th occurrence - L_trace_every_count = 1000; - // Inform AMEC that Pmax_rail needs to change G_apss_lower_pmax_rail = TRUE; @@ -728,7 +726,6 @@ void task_dcom_wait_for_master( task_t *i_self) // the no_master_doorbell counter G_apss_lower_pmax_rail = FALSE; L_no_master_doorbell_cnt = 0; - L_trace_every_count = 1; } // Got a multicast doorbell diff --git a/src/occ_405/occbuildname.c b/src/occ_405/occbuildname.c index 08cc1a0..7e7c6d8 100755 --- a/src/occ_405/occbuildname.c +++ b/src/occ_405/occbuildname.c @@ -34,6 +34,6 @@ volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = #else -volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /**/ "op_occ_190228a\0" /**/ ; +volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /**/ "op_occ_190322a\0" /**/ ; #endif diff --git a/src/occ_405/proc/proc_data.c b/src/occ_405/proc/proc_data.c index c2eb17f..88c179f 100755 --- a/src/occ_405/proc/proc_data.c +++ b/src/occ_405/proc/proc_data.c @@ -5,7 +5,7 @@ /* */ /* OpenPOWER OnChipController Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2017 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -715,67 +715,78 @@ void task_24x7(task_t * i_task) { static uint8_t L_numTicks = 0x00; // never called since OCC started static bool L_idle_trace = FALSE; + static bool L_logged_disable = FALSE; // Schedule 24x7 task if it hasn't been disabled if( (!G_24x7_disabled) && !(G_internal_flags & INT_FLAG_DISABLE_24X7) ) { - // Schedule 24x7 task if idle - if (!async_request_is_idle(&G_24x7_request.request)) - { - if(!L_idle_trace) - { - INTR_TRAC_ERR("task_24x7: request not idle"); - L_idle_trace = TRUE; - } - L_numTicks++; - } - else - { - if(L_idle_trace) - { - INTR_TRAC_INFO("task_24x7: previously was not idle and is now idle after %d ticks", L_numTicks); - L_idle_trace = FALSE; - } - // Clear errors and init parameters for GPE task - G_24x7_parms.error.error = 0; - G_24x7_parms.numTicksPassed = L_numTicks; - - int l_rc = gpe_request_schedule(&G_24x7_request); - if (0 == l_rc) - { - L_numTicks = 1; // next time called will be 1 tick later - } - else - { - errlHndl_t l_err = NULL; - INTR_TRAC_ERR("task_24x7: schedule failed w/rc=0x%08X (%d us)", - l_rc, (int) ((ssx_timebase_get())/(SSX_TIMEBASE_FREQUENCY_HZ/1000000))); - /* - * @errortype - * @moduleid PROC_24X7_MOD - * @reasoncode SSX_GENERIC_FAILURE - * @userdata1 gpe_request_schedule return code - * @userdata4 ERC_24X7_GPE_SCHEDULE_FAILURE - * @devdesc Failure to schedule 24x7 GpeRequest - */ - l_err = createErrl( - PROC_24X7_MOD, //ModId - SSX_GENERIC_FAILURE, //Reasoncode - ERC_24X7_GPE_SCHEDULE_FAILURE, //Extended reason code - ERRL_SEV_PREDICTIVE, //Severity - NULL, //Trace Buf - DEFAULT_TRACE_SIZE, //Trace Size - l_rc, //Userdata1 - 0 //Userdata2 - ); + // Schedule 24x7 task if idle + if (!async_request_is_idle(&G_24x7_request.request)) + { + if(!L_idle_trace) + { + INTR_TRAC_ERR("task_24x7: request not idle"); + L_idle_trace = TRUE; + } + L_numTicks++; + } + else + { + if(L_idle_trace) + { + INTR_TRAC_INFO("task_24x7: previously was not idle and is now idle after %d ticks", L_numTicks); + L_idle_trace = FALSE; + } + // Clear errors and init parameters for GPE task + G_24x7_parms.error.error = 0; + G_24x7_parms.numTicksPassed = L_numTicks; + if (L_logged_disable) + { + INTR_TRAC_INFO("task_24x7: schedule re-enabled"); + L_logged_disable = FALSE; + } - // Request reset since this should never happen. - REQUEST_RESET(l_err); - } - } + int l_rc = gpe_request_schedule(&G_24x7_request); + if (0 == l_rc) + { + L_numTicks = 1; // next time called will be 1 tick later + } + else + { + errlHndl_t l_err = NULL; + INTR_TRAC_ERR("task_24x7: schedule failed w/rc=0x%08X (%d us)", + l_rc, (int) ((ssx_timebase_get())/(SSX_TIMEBASE_FREQUENCY_HZ/1000000))); + /* + * @errortype + * @moduleid PROC_24X7_MOD + * @reasoncode SSX_GENERIC_FAILURE + * @userdata1 gpe_request_schedule return code + * @userdata4 ERC_24X7_GPE_SCHEDULE_FAILURE + * @devdesc Failure to schedule 24x7 GpeRequest + */ + l_err = createErrl( + PROC_24X7_MOD, //ModId + SSX_GENERIC_FAILURE, //Reasoncode + ERC_24X7_GPE_SCHEDULE_FAILURE, //Extended reason code + ERRL_SEV_PREDICTIVE, //Severity + NULL, //Trace Buf + DEFAULT_TRACE_SIZE, //Trace Size + l_rc, //Userdata1 + 0 //Userdata2 + ); + + // Request reset since this should never happen. + REQUEST_RESET(l_err); + } + } } // !G_24x7_disabled else { + if (! L_logged_disable) + { + INTR_TRAC_INFO("task_24x7: not scheduled due to disable"); + L_logged_disable = TRUE; + } // 24x7 is disabled INC number ticks so 24x7 knows how many ticks it was disabled for L_numTicks++; } -- cgit v1.2.1