summaryrefslogtreecommitdiffstats
path: root/import
diff options
context:
space:
mode:
authorRahul Batra <rbatra@us.ibm.com>2019-07-29 13:13:39 -0400
committerhostboot <hostboot@us.ibm.com>2019-09-05 04:01:28 -0500
commit1096b337782a74a81b1ed1660a8731da01ebfa68 (patch)
tree8903eb612505156a93fa9a21386e8c4675f2a213 /import
parent8df77fd26804457df401de87dada9729309893d5 (diff)
downloadtalos-hcode-1096b337782a74a81b1ed1660a8731da01ebfa68.tar.gz
talos-hcode-1096b337782a74a81b1ed1660a8731da01ebfa68.zip
PM: Fix DB0 Hang
Key_Cronus_Test=PM_REGRESS Change-Id: I706ec7b87e777b736153d5765ced0a3f6cea5d96 CQ: SW470688 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/81266 Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com> Tested-by: PPE CI <ppe-ci+hostboot@us.ibm.com> Tested-by: Cronus HW CI <cronushw-ci+hostboot@us.ibm.com> Tested-by: Hostboot CI <hostboot-ci+hostboot@us.ibm.com> Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com> Tested-by: HWSV CI <hwsv-ci+hostboot@us.ibm.com> Reviewed-by: YUE DU <daviddu@us.ibm.com> Reviewed-by: RANGANATHPRASAD G. BRAHMASAMUDRA <prasadbgr@in.ibm.com> Reviewed-by: Jennifer A Stofer <stofer@us.ibm.com>
Diffstat (limited to 'import')
-rw-r--r--import/chips/p9/procedures/hwp/lib/p9_pm_hcd_flags.h1
-rw-r--r--import/chips/p9/procedures/ppe/iota/iota_uih.c81
-rw-r--r--import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c6
-rw-r--r--import/chips/p9/procedures/ppe_closed/cme/p9_cme_irq.h15
-rw-r--r--import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_intercme.c8
-rw-r--r--import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_thread_db.c75
-rw-r--r--import/chips/p9/procedures/ppe_closed/pgpe/pstate_gpe/p9_pgpe_pstate.c2
7 files changed, 175 insertions, 13 deletions
diff --git a/import/chips/p9/procedures/hwp/lib/p9_pm_hcd_flags.h b/import/chips/p9/procedures/hwp/lib/p9_pm_hcd_flags.h
index fcc74dbd..1b940919 100644
--- a/import/chips/p9/procedures/hwp/lib/p9_pm_hcd_flags.h
+++ b/import/chips/p9/procedures/hwp/lib/p9_pm_hcd_flags.h
@@ -129,6 +129,7 @@ enum PM_CME_FLAGS_DEFS
CME_FLAGS_DROOP_SUSPEND_ENTRY = 14,
CME_FLAGS_SAFE_MODE = 16,
CME_FLAGS_PSTATES_SUSPENDED = 17,
+ CME_FLAGS_DB0_COMM_RECV_STARVATION_CNT_ENABLED = 18,
CME_FLAGS_SPWU_CHECK_ENABLE = 22,
CME_FLAGS_BLOCK_ENTRY_STOP11 = 23,
CME_FLAGS_PSTATES_ENABLED = 24,
diff --git a/import/chips/p9/procedures/ppe/iota/iota_uih.c b/import/chips/p9/procedures/ppe/iota/iota_uih.c
index 546def67..28272661 100644
--- a/import/chips/p9/procedures/ppe/iota/iota_uih.c
+++ b/import/chips/p9/procedures/ppe/iota/iota_uih.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
-/* COPYRIGHT 2017 */
+/* COPYRIGHT 2017,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -32,6 +32,9 @@ int g_eimr_stack_ctr = -1;
uint64_t g_eimr_override_stack[IOTA_NUM_EXT_IRQ_PRIORITIES];
uint64_t g_eimr_override = 0x0000000000000000;
uint64_t g_ext_irq_vector = 0;
+uint32_t g_db0_pending_fit_tick_count = 0;
+uint32_t g_comm_recv_pending_fit_tick_count = 0;
+uint32_t g_intercme_in0_pending_tick_count = 0;
// Unified IRQ priority and masking handler.
// - Locates the highest priority IRQ task vector that has at least one of its
@@ -51,13 +54,50 @@ uint32_t iota_uih(void)
do
{
- if(ext_irq_vectors_cme[iPrtyLvl][IDX_PRTY_VEC] & g_ext_irq_vector)
+ //Note: Special handling of DB0/COMM_RECV to handle the db0/comm_recv
+ //starvation case.
+ //
+ //Reason: DB0(Quad Manager CME) and COMM_RECV(Sibling CME) are lower priority
+ //than the STOP related interrupts,
+ //and can stay pending for very long time(~ms scale) on systems with
+ //high frequency of STOP requests. This can then prevent PGPE from
+ //completing OCC directed IPC operations within the expected
+ //time bounds(< 8ms)
+ //
+ //Mechanism:
+ //1)In FIT: Every FIT tick, we check if DB0(on Quad manager)/COMM_RECV(on Sibling CME)
+ //is pending. If DB0(on Quad manager)/COMM_RECV(on Sibling CME) is seen pending for
+ //more than DB0_FIT_TICK_THRESHOLD/COMM_RECV_FIT_TICK_THRESHOLD FIT ticks,
+ //then we take action in UIH
+ //
+ //2)In UIH: We set priority level to IDX_PRTY_LVL_DB0/IDX_PRTY_LVL_COMM_RECVD, and mask
+ //everything except Priority 0(xstop, exceptions, etc). This then allows a
+ //pending DB0 to complete
+ if(g_db0_pending_fit_tick_count > DB0_FIT_TICK_THRESHOLD)
+ {
+ bFound = 1;
+ iPrtyLvl = IDX_PRTY_LVL_DB0;
+ break;
+ }
+ else if(g_comm_recv_pending_fit_tick_count > COMM_RECV_FIT_TICK_THRESHOLD)
+ {
+ bFound = 1;
+ iPrtyLvl = IDX_PRTY_LVL_COMM_RECVD;
+ break;
+ }
+ else if(g_intercme_in0_pending_tick_count > INTERCME_IN0_FIT_TICK_THRESHOLD)
+ {
+ bFound = 1;
+ iPrtyLvl = IDX_PRTY_LVL_INTERCME_IN0;
+ break;
+ }
+ else if(ext_irq_vectors_cme[iPrtyLvl][IDX_PRTY_VEC] & g_ext_irq_vector)
{
bFound = 1;
break;
}
}
- while(++iPrtyLvl < (IOTA_NUM_EXT_IRQ_PRIORITIES - 1)); //No need to check DISABLED.
+ while(++iPrtyLvl < (IOTA_NUM_EXT_IRQ_PRIORITIES - 1)); //No need to check DISABLED.
// Only manipulate EIMR masks for task level prty levels.
// Let shared non-task IRQs (iPrtyLvl=0) be processed by
@@ -82,9 +122,38 @@ uint32_t iota_uih(void)
}
// 3. Write the new mask for this priority level.
- out64(CME_LCL_EIMR, ext_irq_vectors_cme[iPrtyLvl][IDX_MASK_VEC] |
- g_eimr_override);
-
+ //Note: Special handling of DB0/COMM_RECV to handle the db0/comm_recv
+ //starvation case.
+ //
+ //Reason: DB0(Quad Manager CME) and COMM_RECV(Sibling CME) are lower priority
+ //than the STOP related interrupts,
+ //and can stay pending for very long time(~ms scale) on systems with
+ //high frequency of STOP requests. This can then prevent PGPE from
+ //completing OCC directed IPC operations within the expected
+ //time bounds(< 8ms)
+ //
+ //Mechanism:
+ //1)In FIT: Every FIT tick, we check if DB0(on Quad manager)/COMM_RECV(on Sibling CME)
+ //is pending. If DB0(on Quad manager)/COMM_RECV(on Sibling CME) is seen pending for
+ //more than DB0_FIT_TICK_THRESHOLD/COMM_RECV_FIT_TICK_THRESHOLD FIT ticks,
+ //then we take action in UIH
+ //
+ //2)In UIH: We set priority level to IDX_PRTY_LVL_DB0/IDX_PRTY_LVL_COMM_RECVD, and mask
+ //everything except Priority 0(xstop, exceptions, etc). This then allows a
+ //pending DB0 to complete
+ if ((g_db0_pending_fit_tick_count > DB0_FIT_TICK_THRESHOLD) ||
+ (g_comm_recv_pending_fit_tick_count > COMM_RECV_FIT_TICK_THRESHOLD) ||
+ (g_intercme_in0_pending_tick_count > INTERCME_IN0_FIT_TICK_THRESHOLD))
+ {
+ PK_TRACE_INF("UIH: Starvation Detected. Overriding Mask!");
+ out64(CME_LCL_EIMR, (ext_irq_vectors_cme[0][IDX_MASK_VEC] |
+ g_eimr_override));
+ }
+ else
+ {
+ out64(CME_LCL_EIMR, ext_irq_vectors_cme[iPrtyLvl][IDX_MASK_VEC] |
+ g_eimr_override);
+ }
}
else
{
diff --git a/import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c b/import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c
index f9632f45..0818517e 100644
--- a/import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c
+++ b/import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
-/* COPYRIGHT 2017,2018 */
+/* COPYRIGHT 2017,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -46,6 +46,8 @@ CmeFitRecord G_cme_fit_record = {0, 0, 0, 0, 0xFFFFFFFF, 0};
#endif
+void p9_cme_pstate_db0_comm_recv_intercme_in0_pending_counter();
+
uint32_t G_CME_LCL_EINR = CME_LCL_EINR;
uint32_t G_CME_LCL_EISR = CME_LCL_EISR;
uint32_t G_CME_LCL_EISR_CLR = CME_LCL_EISR_CLR;
@@ -102,6 +104,8 @@ void fit_handler()
p9_cme_core_livelock_buster();
#endif
+ //Handle DB0/Comm_Recv starvation case
+ p9_cme_pstate_db0_comm_recv_intercme_in0_pending_counter();
}
#endif //fit handler
diff --git a/import/chips/p9/procedures/ppe_closed/cme/p9_cme_irq.h b/import/chips/p9/procedures/ppe_closed/cme/p9_cme_irq.h
index c5a29801..170eb88f 100644
--- a/import/chips/p9/procedures/ppe_closed/cme/p9_cme_irq.h
+++ b/import/chips/p9/procedures/ppe_closed/cme/p9_cme_irq.h
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
-/* COPYRIGHT 2015,2018 */
+/* COPYRIGHT 2015,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -48,6 +48,19 @@
#include <stdint.h>
+//CME_TSEL is set to 8 which means FIT has period of 1.04ms when
+//Nest Freq is 2000Mhz. Ideally, should calculate period of FIT based
+//on nest frequency, but nest frequency is NOT plumbed to CME and we
+//don't need to be highly accurate here.
+//Note, from PGPE perspective, the latency of the DB0 operation depends
+//on the amount of time DB0 is pending on Quad Manager plus COMM_RECV is pending
+//on sibling. This is because COMM_RECV interrupt is triggered by the DB0
+//handler on the quad manager. Therefore, we must set the COMM_RECV_TICK_THRESHOLD
+//to be smaller.
+#define DB0_FIT_TICK_THRESHOLD 1 //Threshold for DB0 pending count(2ms)
+#define COMM_RECV_FIT_TICK_THRESHOLD 1 //Threshold for COMM_RECV pending countr(2ms)
+#define INTERCME_IN0_FIT_TICK_THRESHOLD 1 //Threshold for COMM_RECV pending countr(2ms)
+
// Priority Levels
#define IDX_PRTY_LVL_HIPRTY 0
#define IDX_PRTY_LVL_DB3 1
diff --git a/import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_intercme.c b/import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_intercme.c
index e802448e..9497442c 100644
--- a/import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_intercme.c
+++ b/import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_intercme.c
@@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
-/* COPYRIGHT 2016,2018 */
+/* COPYRIGHT 2016,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
@@ -43,17 +43,21 @@
//
extern CmePstateRecord G_cme_pstate_record;
extern CmeRecord G_cme_record;
+extern uint32_t g_comm_recv_pending_fit_tick_count;
+extern uint32_t g_intercme_in0_pending_tick_count;
//
//InterCME_IN0 handler
//
void p9_cme_pstate_intercme_in0_irq_handler(void)
{
+ g_intercme_in0_pending_tick_count = 0;
p9_cme_pstate_process_db0_sibling();
}
void p9_cme_pstate_intercme_msg_handler(void)
{
+ g_comm_recv_pending_fit_tick_count = 0;
p9_cme_pstate_sibling_lock_and_intercme_protocol(INTERCME_MSG_LOCK_WAIT_ON_RECV);
}
@@ -119,6 +123,8 @@ void p9_cme_pstate_process_db0_sibling()
//Unmask EIMR[OCC_HEARTBEAT_LOST/4]
g_eimr_override &= ~BIT64(4);
+ out32(G_CME_LCL_FLAGS_OR, BIT32(CME_FLAGS_DB0_COMM_RECV_STARVATION_CNT_ENABLED));//Set Starvation Count enabled
+
//Clear Core GPMMR RESET_STATE_INDICATOR bit to show pstates have started
CME_PUTSCOM(PPM_GPMMR_CLR, G_cme_record.core_enabled, BIT64(15));
}
diff --git a/import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_thread_db.c b/import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_thread_db.c
index 0c3f8cde..cba9f9e8 100644
--- a/import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_thread_db.c
+++ b/import/chips/p9/procedures/ppe_closed/cme/pstate_cme/p9_cme_thread_db.c
@@ -56,6 +56,9 @@ extern CmePstateRecord G_cme_pstate_record;
extern cmeHeader_t* G_cmeHeader;
extern LocalPstateParmBlock* G_lppb;
extern uint8_t G_vdm_threshold_table[];
+extern uint32_t g_db0_pending_fit_tick_count;
+extern uint32_t g_comm_recv_pending_fit_tick_count;
+extern uint32_t g_intercme_in0_pending_tick_count;
cppm_cmedb0_t G_dbData;
@@ -75,6 +78,7 @@ void p9_cme_pstate_db0_start();
void p9_cme_pstate_db0_glb_bcast();
void p9_cme_pstate_db0_clip_bcast();
void p9_cme_pstate_update();
+void p9_cme_pstate_db0_comm_recv_intercme_in0_pending_counter();
//
//Doorbell0 interrupt handler
@@ -167,6 +171,67 @@ void p9_cme_pstate_db0_handler(void)
}
//
+//Doorbell0/Comm Recv pending counter(called every FIT tick)
+//
+void p9_cme_pstate_db0_comm_recv_intercme_in0_pending_counter()
+{
+ //Note: Special handling of DB0/COMM_RECV to handle the db0/comm_recv
+ //starvation case.
+ //
+ //Reason: DB0(Quad Manager CME) and COMM_RECV(Sibling CME) are lower priority
+ //than the STOP related interrupts,
+ //and can stay pending for very long time(~ms scale) on systems with
+ //high frequency of STOP requests. This can then prevent PGPE from
+ //completing OCC directed IPC operations within the expected
+ //time bounds(< 8ms)
+ //
+ //Mechanism:
+ //1)In FIT: Every FIT tick, we check if DB0(on Quad manager)/COMM_RECV(on Sibling CME)
+ //is pending. If DB0(on Quad manager)/COMM_RECV(on Sibling CME) is seen pending for
+ //more than DB0_FIT_TICK_THRESHOLD/COMM_RECV_FIT_TICK_THRESHOLD FIT ticks,
+ //then we take action in UIH
+ //
+ //2)In UIH: We set priority level to IDX_PRTY_LVL_DB0/IDX_PRTY_LVL_COMM_RECVD, and mask
+ //everything except Priority 0(xstop, exceptions, etc). This then allows a
+ //pending DB0 to complete
+ uint32_t cme_flags = in32(G_CME_LCL_FLAGS);
+
+ if (cme_flags & BIT32(CME_FLAGS_DB0_COMM_RECV_STARVATION_CNT_ENABLED))
+ {
+ if(G_cme_pstate_record.qmFlag)
+ {
+
+ if (cme_flags & BIT32(CME_FLAGS_CORE0_GOOD))
+ {
+ if (in32_sh(CME_LCL_EISR) & BIT64SH(36))
+ {
+ g_db0_pending_fit_tick_count++;
+ }
+ }
+ else
+ {
+ if (in32_sh(CME_LCL_EISR) & BIT64SH(37))
+ {
+ g_db0_pending_fit_tick_count++;
+ }
+ }
+ }
+ else
+ {
+ if (in32(CME_LCL_EISR) & BIT32(29))
+ {
+ g_comm_recv_pending_fit_tick_count++;
+ }
+
+ if(in32(CME_LCL_EISR) & BIT32(7))
+ {
+ g_intercme_in0_pending_tick_count++;
+ }
+ }
+ }
+}
+
+//
//Doorbell3 interrupt handler
//
//Note: This enabled on both QuadManagerCME and SiblingCME
@@ -660,6 +725,9 @@ void p9_cme_pstate_process_db0()
G_cme_pstate_record.updateAnalogError = 0;
uint64_t scom_data;
+ //Clear out db0_pending_tick_count
+ g_db0_pending_fit_tick_count = 0;
+
PK_TRACE_INF("PSTATE: Process DB0 Enter");
//Clear EISR and read DB0 register
@@ -856,7 +924,7 @@ inline void p9_cme_pstate_register()
}
}
- PK_TRACE_INF("PSTATE: Sib Register MsgCnt=%d", msgCnt);
+ PK_TRACE_DBG("PSTATE: Sib Register MsgCnt=%d", msgCnt);
}
}
}
@@ -894,6 +962,7 @@ void p9_cme_pstate_db0_start()
ack = MSGID_PCB_TYPE4_ACK_PSTATE_PROTO_ACK;
out32(G_CME_LCL_FLAGS_OR, BIT32(24));//Set Pstates Enabled
+ out32(G_CME_LCL_FLAGS_OR, BIT32(CME_FLAGS_DB0_COMM_RECV_STARVATION_CNT_ENABLED));//Set Starvation Count enabled
//Enable PMCR Interrupts (for good cores) when this task is done
g_eimr_override &= ~(uint64_t)(G_cme_record.core_enabled << SHIFT64(35));
@@ -1035,7 +1104,7 @@ inline void p9_cme_pstate_db0_pmsr_updt()
//Set Core GPMMR RESET_STATE_INDICATOR bit to show pstates have stopped
CME_PUTSCOM(PPM_GPMMR_OR, G_cme_record.core_enabled, BIT64(15));
- PK_TRACE_INF("PSTATE: DB0 Safe Mode Exit");
+ PK_TRACE_INF("PSTATE: DB0 PMSR Updt Exit");
}
void p9_cme_pstate_notify_sib(INTERCME_DIRECT_INTF intf)
@@ -1058,7 +1127,7 @@ inline void p9_cme_pstate_freq_update(uint32_t cme_flags)
else
{
PK_TRACE_INF("PSTATE: Freq Updt Enter");
- PK_TRACE_INF("PSTATE: Dpll0=0x%x", G_lppb->dpll_pstate0_value);
+ PK_TRACE_DBG("PSTATE: Dpll0=0x%x", G_lppb->dpll_pstate0_value);
//Adjust DPLL
qppm_dpll_freq_t dpllFreq;
diff --git a/import/chips/p9/procedures/ppe_closed/pgpe/pstate_gpe/p9_pgpe_pstate.c b/import/chips/p9/procedures/ppe_closed/pgpe/pstate_gpe/p9_pgpe_pstate.c
index 2ed5aa58..4d6348d9 100644
--- a/import/chips/p9/procedures/ppe_closed/pgpe/pstate_gpe/p9_pgpe_pstate.c
+++ b/import/chips/p9/procedures/ppe_closed/pgpe/pstate_gpe/p9_pgpe_pstate.c
@@ -2542,7 +2542,7 @@ void p9_pgpe_pstate_wov_init()
G_pgpe_pstate_record.wov.avg_freq_gt_target_freq = 0;
G_pgpe_pstate_record.wov.freq_loss_tenths_gt_max_droop_tenths = 0;
G_pgpe_pstate_record.wov.status = WOV_DISABLED;
- G_pgpe_pstate_record.wov.info = 0xdeadbeef;
+ G_pgpe_pstate_record.wov.info = 0xdeadde04;
}
//
OpenPOWER on IntegriCloud