From d1f3e4faf9d99d76bc413503afea87c8486af8b1 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Fri, 5 Apr 2019 16:33:03 +0200 Subject: hw/npu2: Dump (more) npu2 registers on link error and HMIs We were already logging some NPU registers during an HMI. This patch cleans up a bit how it is done and separates what is global from what is specific to nvlink or opencapi. Since we can now receive an error interrupt when an opencapi link goes down unexpectedly, we also dump the NPU state but we limit it to the registers of the brick which hit the error. The list of registers to dump was worked out with the hw team to allow for proper debugging. For each register, we print the name as found in the NPU workbook, the scom address and the register value. Signed-off-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Stewart Smith --- core/hmi.c | 58 +--------------------------------------------------------- 1 file changed, 1 insertion(+), 57 deletions(-) (limited to 'core') diff --git a/core/hmi.c b/core/hmi.c index fbb182c3..26277fa6 100644 --- a/core/hmi.c +++ b/core/hmi.c @@ -594,60 +594,6 @@ static void find_nx_checkstop_reason(int flat_chip_id, queue_hmi_event(hmi_evt, 0, out_flags); } -/* - * If the year is 2018 and you still see all these hardcoded, you - * should really replace this with the neat macros that's in the - * NPU2 code rather than this horrible listing of every single - * NPU2 register hardcoded for a specific chip. - * - * I feel dirty having even written it. - */ -static uint32_t npu2_scom_dump[] = { - 0x5011017, 0x5011047, 0x5011077, 0x50110A7, - 0x5011217, 0x5011247, 0x5011277, 0x50112A7, - 0x5011417, 0x5011447, 0x5011477, 0x50114A7, - 0x50110DA, 0x50112DA, 0x50114DA, - 0x50110DB, 0x50112DB, 0x50114DB, - 0x5011011, 0x5011041, 0x5011071, 0x50110A1, - 0x5011211, 0x5011241, 0x5011271, 0x50112A1, - 0x5011411, 0x5011441, 0x5011471, 0x50114A1, - 0x5011018, 0x5011048, 0x5011078, 0x50110A8, - 0x5011218, 0x5011248, 0x5011278, 0x50112A8, - 0x5011418, 0x5011448, 0x5011478, 0x50114A8, - 0x5011640, - 0x5011114, 0x5011134, 0x5011314, 0x5011334, - 0x5011514, 0x5011534, 0x5011118, 0x5011138, - 0x5011318, 0x5011338, 0x5011518, 0x5011538, - 0x50110D8, 0x50112D8, 0x50114D8, - 0x50110D9, 0x50112D9, 0x50114D9, - 0x5011019, 0x5011049, 0x5011079, 0x50110A9, - 0x5011219, 0x5011249, 0x5011279, 0x50112A9, - 0x5011419, 0x5011449, 0x5011479, 0x50114A9, - 0x50110F4, 0x50112F4, 0x50114F4, - 0x50110F5, 0x50112F5, 0x50114F5, - 0x50110F6, 0x50112F6, 0x50114F6, - 0x50110FD, 0x50112FD, 0x50114FD, - 0x50110FE, 0x50112FE, 0x50114FE, - 0x00 -}; - -static void dump_scoms(int flat_chip_id, const char *unit, uint32_t *scoms, - const char *loc) -{ - uint64_t value; - int r; - - while (*scoms != 0) { - value = 0; - r = _xscom_read(flat_chip_id, *scoms, &value, false); - if (r != OPAL_SUCCESS) - continue; - prlog(PR_ERR, "%s: [Loc: %s] P:%d 0x%08x=0x%016llx\n", - unit, loc, flat_chip_id, *scoms, value); - scoms++; - } -} - static bool phb_is_npu2(struct dt_node *dn) { return (dt_node_is_compatible(dn, "ibm,power9-npu-pciex") || @@ -731,9 +677,7 @@ static void find_npu2_checkstop_reason(int flat_chip_id, npu2_hmi_verbose = true; if (npu2_hmi_verbose) { - _xscom_lock(); - dump_scoms(flat_chip_id, "NPU", npu2_scom_dump, loc); - _xscom_unlock(); + npu2_dump_scoms(flat_chip_id); prlog(PR_ERR, " _________________________ \n"); prlog(PR_ERR, "< It's Driver Debug time! >\n"); prlog(PR_ERR, " ------------------------- \n"); -- cgit v1.2.1