summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.ibm.com>2019-04-05 16:33:03 +0200
committerStewart Smith <stewart@linux.ibm.com>2019-04-09 10:50:55 +1000
commitd1f3e4faf9d99d76bc413503afea87c8486af8b1 (patch)
tree5ae00584124d92e2193762e100abe289c26c23e6
parent7320a21e7261d5ed87971a7985fecdd7588a72ec (diff)
downloadblackbird-skiboot-d1f3e4faf9d99d76bc413503afea87c8486af8b1.tar.gz
blackbird-skiboot-d1f3e4faf9d99d76bc413503afea87c8486af8b1.zip
hw/npu2: Dump (more) npu2 registers on link error and HMIs
We were already logging some NPU registers during an HMI. This patch cleans up a bit how it is done and separates what is global from what is specific to nvlink or opencapi. Since we can now receive an error interrupt when an opencapi link goes down unexpectedly, we also dump the NPU state but we limit it to the registers of the brick which hit the error. The list of registers to dump was worked out with the hw team to allow for proper debugging. For each register, we print the name as found in the NPU workbook, the scom address and the register value. Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com> Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
-rw-r--r--core/hmi.c58
-rw-r--r--hw/npu2-common.c234
-rw-r--r--include/npu2-regs.h10
-rw-r--r--include/npu2.h1
4 files changed, 246 insertions, 57 deletions
diff --git a/core/hmi.c b/core/hmi.c
index fbb182c3..26277fa6 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -594,60 +594,6 @@ static void find_nx_checkstop_reason(int flat_chip_id,
queue_hmi_event(hmi_evt, 0, out_flags);
}
-/*
- * If the year is 2018 and you still see all these hardcoded, you
- * should really replace this with the neat macros that's in the
- * NPU2 code rather than this horrible listing of every single
- * NPU2 register hardcoded for a specific chip.
- *
- * I feel dirty having even written it.
- */
-static uint32_t npu2_scom_dump[] = {
- 0x5011017, 0x5011047, 0x5011077, 0x50110A7,
- 0x5011217, 0x5011247, 0x5011277, 0x50112A7,
- 0x5011417, 0x5011447, 0x5011477, 0x50114A7,
- 0x50110DA, 0x50112DA, 0x50114DA,
- 0x50110DB, 0x50112DB, 0x50114DB,
- 0x5011011, 0x5011041, 0x5011071, 0x50110A1,
- 0x5011211, 0x5011241, 0x5011271, 0x50112A1,
- 0x5011411, 0x5011441, 0x5011471, 0x50114A1,
- 0x5011018, 0x5011048, 0x5011078, 0x50110A8,
- 0x5011218, 0x5011248, 0x5011278, 0x50112A8,
- 0x5011418, 0x5011448, 0x5011478, 0x50114A8,
- 0x5011640,
- 0x5011114, 0x5011134, 0x5011314, 0x5011334,
- 0x5011514, 0x5011534, 0x5011118, 0x5011138,
- 0x5011318, 0x5011338, 0x5011518, 0x5011538,
- 0x50110D8, 0x50112D8, 0x50114D8,
- 0x50110D9, 0x50112D9, 0x50114D9,
- 0x5011019, 0x5011049, 0x5011079, 0x50110A9,
- 0x5011219, 0x5011249, 0x5011279, 0x50112A9,
- 0x5011419, 0x5011449, 0x5011479, 0x50114A9,
- 0x50110F4, 0x50112F4, 0x50114F4,
- 0x50110F5, 0x50112F5, 0x50114F5,
- 0x50110F6, 0x50112F6, 0x50114F6,
- 0x50110FD, 0x50112FD, 0x50114FD,
- 0x50110FE, 0x50112FE, 0x50114FE,
- 0x00
-};
-
-static void dump_scoms(int flat_chip_id, const char *unit, uint32_t *scoms,
- const char *loc)
-{
- uint64_t value;
- int r;
-
- while (*scoms != 0) {
- value = 0;
- r = _xscom_read(flat_chip_id, *scoms, &value, false);
- if (r != OPAL_SUCCESS)
- continue;
- prlog(PR_ERR, "%s: [Loc: %s] P:%d 0x%08x=0x%016llx\n",
- unit, loc, flat_chip_id, *scoms, value);
- scoms++;
- }
-}
-
static bool phb_is_npu2(struct dt_node *dn)
{
return (dt_node_is_compatible(dn, "ibm,power9-npu-pciex") ||
@@ -731,9 +677,7 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
npu2_hmi_verbose = true;
if (npu2_hmi_verbose) {
- _xscom_lock();
- dump_scoms(flat_chip_id, "NPU", npu2_scom_dump, loc);
- _xscom_unlock();
+ npu2_dump_scoms(flat_chip_id);
prlog(PR_ERR, " _________________________ \n");
prlog(PR_ERR, "< It's Driver Debug time! >\n");
prlog(PR_ERR, " ------------------------- \n");
diff --git a/hw/npu2-common.c b/hw/npu2-common.c
index ccbbbbca..d4c0f851 100644
--- a/hw/npu2-common.c
+++ b/hw/npu2-common.c
@@ -103,6 +103,239 @@ void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
(uint64_t)new_val << 32);
}
+typedef struct {
+ const char *name;
+ uint32_t block;
+ uint32_t offset;
+} npu2_scom_dump_t;
+
+static npu2_scom_dump_t npu2_scom_dump_global[] = {
+ /* CQ State Machine */
+ { "CS.SM0.MISC.CERR_MESSAGE0", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG0 },
+ { "CS.SM1.MISC.CERR_MESSAGE0", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG0 },
+ { "CS.SM2.MISC.CERR_MESSAGE0", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG0 },
+ { "CS.SM3.MISC.CERR_MESSAGE0", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG0 },
+
+ { "CS.SM0.MISC.CERR_MESSAGE1", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG1 },
+ { "CS.SM1.MISC.CERR_MESSAGE1", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG1 },
+ { "CS.SM2.MISC.CERR_MESSAGE1", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG1 },
+ { "CS.SM3.MISC.CERR_MESSAGE1", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG1 },
+
+ { "CS.SM0.MISC.CERR_MESSAGE2", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG2 },
+ { "CS.SM1.MISC.CERR_MESSAGE2", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG2 },
+ { "CS.SM2.MISC.CERR_MESSAGE2", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG2 },
+ { "CS.SM3.MISC.CERR_MESSAGE2", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG2 },
+
+ { "CS.SM0.MISC.CERR_MESSAGE3", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG3 },
+ { "CS.SM1.MISC.CERR_MESSAGE3", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG3 },
+ { "CS.SM2.MISC.CERR_MESSAGE3", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG3 },
+ { "CS.SM3.MISC.CERR_MESSAGE3", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG3 },
+
+ { "CS.SM0.MISC.CERR_MESSAGE4", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG4 },
+ { "CS.SM1.MISC.CERR_MESSAGE4", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG4 },
+ { "CS.SM2.MISC.CERR_MESSAGE4", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG4 },
+ { "CS.SM3.MISC.CERR_MESSAGE4", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG4 },
+
+ { "CS.SM0.MISC.CERR_MESSAGE5", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG5 },
+ { "CS.SM1.MISC.CERR_MESSAGE5", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG5 },
+ { "CS.SM2.MISC.CERR_MESSAGE5", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG5 },
+ { "CS.SM3.MISC.CERR_MESSAGE5", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG5 },
+
+ { "CS.SM0.MISC.CERR_MESSAGE6", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG6 },
+ { "CS.SM1.MISC.CERR_MESSAGE6", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG6 },
+ { "CS.SM2.MISC.CERR_MESSAGE6", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG6 },
+ { "CS.SM3.MISC.CERR_MESSAGE6", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG6 },
+
+ { "CS.SM0.MISC.CERR_FIRST0", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_FIRST0 },
+ { "CS.SM1.MISC.CERR_FIRST0", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_FIRST0 },
+ { "CS.SM2.MISC.CERR_FIRST0", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_FIRST0 },
+ { "CS.SM3.MISC.CERR_FIRST0", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_FIRST0 },
+
+ { "CS.SM0.MISC.CERR_FIRST1", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_FIRST1 },
+ { "CS.SM1.MISC.CERR_FIRST1", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_FIRST1 },
+ { "CS.SM2.MISC.CERR_FIRST1", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_FIRST1 },
+ { "CS.SM3.MISC.CERR_FIRST1", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_FIRST1 },
+
+ { "CS.SM0.MISC.CERR_FIRST2", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_FIRST2 },
+ { "CS.SM1.MISC.CERR_FIRST2", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_FIRST2 },
+ { "CS.SM2.MISC.CERR_FIRST2", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_FIRST2 },
+ { "CS.SM3.MISC.CERR_FIRST2", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_FIRST2 },
+
+ /* CQ Control */
+ { "CS.CTL.MISC.CERR_MESSAGE0", NPU2_BLOCK_CTL, NPU2_CQ_C_ERR_RPT_MSG0 },
+ { "CS.CTL.MISC.CERR_MESSAGE1", NPU2_BLOCK_CTL, NPU2_CQ_C_ERR_RPT_MSG1 },
+ { "CS.CTL.MISC.CERR_FIRST0", NPU2_BLOCK_CTL, NPU2_CQ_C_ERR_RPT_FIRST0 },
+ { "CS.CTL.MISC.CERR_FIRST1", NPU2_BLOCK_CTL, NPU2_CQ_C_ERR_RPT_FIRST1 },
+
+ /* CQ Data */
+ { "DAT.MISC.CERR_ECC_HOLD", NPU2_BLOCK_DAT, NPU2_CQ_DAT_ECC_STATUS },
+ { "DAT.MISC.CERR_ECC_MASK", NPU2_BLOCK_DAT, NPU2_CQ_DAT_ECC_MASK },
+ { "DAT.MISC.CERR_ECC_FIRST", NPU2_BLOCK_DAT, NPU2_CQ_DAT_ECC_FIRST },
+ { "DAT.MISC.REM0", NPU2_BLOCK_DAT, NPU2_CQ_DAT_RAS_MSG0 },
+ { "DAT.MISC.REM1", NPU2_BLOCK_DAT, NPU2_CQ_DAT_RAS_MSG1 },
+};
+
+static npu2_scom_dump_t npu2_scom_dump_nvlink[] = {
+ { "NTL0.REGS.CERR_FIRST1", NPU2_BLOCK_NTL0, NPU2_NTL_ERR_FIRST1_OFF },
+ { "NTL1.REGS.CERR_FIRST1", NPU2_BLOCK_NTL1, NPU2_NTL_ERR_FIRST1_OFF },
+ { "NTL0.REGS.CERR_FIRST2", NPU2_BLOCK_NTL0, NPU2_NTL_ERR_FIRST2_OFF },
+ { "NTL1.REGS.CERR_FIRST2", NPU2_BLOCK_NTL1, NPU2_NTL_ERR_FIRST2_OFF },
+};
+
+static npu2_scom_dump_t npu2_scom_dump_ocapi[] = {
+ { "OTL0.MISC.C_ERR_RPT_HOLD0", NPU2_BLOCK_OTL0, NPU2_OTL_ERR_RPT_HOLD0 },
+ { "OTL1.MISC.C_ERR_RPT_HOLD0", NPU2_BLOCK_OTL1, NPU2_OTL_ERR_RPT_HOLD0 },
+ { "OTL0.MISC.OTL_REM0", NPU2_BLOCK_OTL0, NPU2_OTL_RAS_ERR_MSG0 },
+ { "OTL1.MISC.OTL_REM0", NPU2_BLOCK_OTL1, NPU2_OTL_RAS_ERR_MSG0 },
+ { "OTL0.MISC.ERROR_SIG_RXI", NPU2_BLOCK_OTL0, NPU2_OTL_RXI_ERR_SIG },
+ { "OTL1.MISC.ERROR_SIG_RXI", NPU2_BLOCK_OTL1, NPU2_OTL_RXI_ERR_SIG },
+ { "OTL0.MISC.ERROR_SIG_RXO", NPU2_BLOCK_OTL0, NPU2_OTL_RXO_ERR_SIG },
+ { "OTL1.MISC.ERROR_SIG_RXO", NPU2_BLOCK_OTL1, NPU2_OTL_RXO_ERR_SIG },
+ { "OTL0.MISC.C_ERR_RPT_HOLD1", NPU2_BLOCK_OTL0, NPU2_OTL_ERR_RPT_HOLD1 },
+ { "OTL1.MISC.C_ERR_RPT_HOLD1", NPU2_BLOCK_OTL1, NPU2_OTL_ERR_RPT_HOLD1 },
+};
+
+static void print_one_npu_reg(struct npu2 *npu, npu2_scom_dump_t *scom, int stack)
+{
+ uint64_t reg, val;
+
+ reg = NPU2_REG_OFFSET(stack, scom->block, scom->offset);
+ val = npu2_scom_read(npu->chip_id, npu->xscom_base,
+ reg, NPU2_MISC_DA_LEN_8B);
+
+ prlog(PR_ERR, "NPU[%d] STCK%d.%s 0x%llx = 0x%016llx\n",
+ npu->chip_id, stack - 4, scom->name, reg, val);
+}
+
+/* same as above, but for direct access registers */
+static void print_one_reg(int chip_id, int brick_index,
+ uint64_t reg_addr, const char *reg_name)
+{
+ uint64_t val;
+
+ xscom_read(chip_id, reg_addr, &val);
+ prlog(PR_ERR, "NPU[%d] %s brick %d 0x%llx = 0x%016llx\n",
+ chip_id, reg_name, brick_index, reg_addr, val);
+}
+
+static void show_nvlink_regs(struct npu2 *npu, int brick_index)
+{
+ uint32_t stack, ntl;
+ int i;
+
+ stack = NPU2_STACK_STCK_0 + brick_index / 2;
+ ntl = NPU2_BLOCK_NTL0 + (brick_index % 2) * 2;
+
+ for (i = 0; i < ARRAY_SIZE(npu2_scom_dump_nvlink); i++) {
+ if (npu2_scom_dump_nvlink[i].block == ntl)
+ print_one_npu_reg(npu, &npu2_scom_dump_nvlink[i], stack);
+ }
+}
+
+static void show_opencapi_regs(struct npu2 *npu, int brick_index)
+{
+ uint32_t stack, otl;
+ int i;
+
+ stack = NPU2_STACK_STCK_0 + brick_index / 2;
+ otl = NPU2_BLOCK_OTL0 + (brick_index % 2);
+
+ /* NPU registers */
+ for (i = 0; i < ARRAY_SIZE(npu2_scom_dump_ocapi); i++) {
+ if (npu2_scom_dump_ocapi[i].block == otl)
+ print_one_npu_reg(npu, &npu2_scom_dump_ocapi[i], stack);
+ }
+
+ /* Fabric registers */
+ print_one_reg(npu->chip_id, brick_index,
+ OB_ODL_STATUS(brick_index), "ODL status");
+ print_one_reg(npu->chip_id, brick_index,
+ OB_ODL_TRAINING_STATUS(brick_index), "ODL training status");
+ print_one_reg(npu->chip_id, brick_index,
+ OB_ODL_ENDPOINT_INFO(brick_index), "ODL endpoint info");
+}
+
+static void show_all_regs(struct npu2 *npu, int brick_index)
+{
+ int i, stack, stack_min, stack_max;
+ uint64_t fir_val, mask_val, fir_addr, mask_addr;
+ struct npu2_dev *dev;
+ npu2_scom_dump_t scom_reg;
+
+ if (brick_index != -1) {
+ stack_min = stack_max = NPU2_STACK_STCK_0 + brick_index / 2;
+ } else {
+ stack_min = NPU2_STACK_STCK_0;
+ stack_max = NPU2_STACK_STCK_2;
+ /* Avoid dumping unused stacks for opencapi on Lagrange */
+ if (npu->total_devices == 2)
+ stack_min = stack_max = NPU2_STACK_STCK_1;
+ }
+
+ /* NPU FIRs */
+ for (i = 0; i < NPU2_TOTAL_FIR_REGISTERS; i++) {
+ fir_addr = NPU2_FIR_REGISTER_0 + i * NPU2_FIR_OFFSET;
+ mask_addr = fir_addr + NPU2_FIR_MASK_OFFSET;
+ xscom_read(npu->chip_id, fir_addr, &fir_val);
+ xscom_read(npu->chip_id, mask_addr, &mask_val);
+ prlog(PR_ERR, "NPU[%d] FIR%d = 0x%016llx (mask 0x%016llx => 0x%016llx)\n",
+ npu->chip_id, i, fir_val, mask_val, fir_val & ~mask_val);
+ }
+
+ /* NPU global, per-stack registers */
+ for (i = 0; i < ARRAY_SIZE(npu2_scom_dump_global); i++) {
+ for (stack = stack_min; stack <= stack_max; stack++)
+ print_one_npu_reg(npu, &npu2_scom_dump_global[i], stack);
+ }
+
+ /*
+ * NPU global registers, stack independent
+ * We have only one for now, so dump it directly
+ */
+ scom_reg.name = "XTS.REG.ERR_HOLD";
+ scom_reg.block = NPU2_BLOCK_XTS;
+ scom_reg.offset = 0;
+ print_one_npu_reg(npu, &scom_reg, NPU2_STACK_MISC);
+
+ /* nvlink- or opencapi-specific registers */
+ for (i = 0; i < npu->total_devices; i++) {
+ dev = &npu->devices[i];
+ if (brick_index == -1 || dev->brick_index == brick_index) {
+ if (dev->type == NPU2_DEV_TYPE_NVLINK)
+ show_nvlink_regs(npu, dev->brick_index);
+ else if (dev->type == NPU2_DEV_TYPE_OPENCAPI)
+ show_opencapi_regs(npu, dev->brick_index);
+ }
+ }
+}
+
+void npu2_dump_scoms(int chip_id)
+{
+ struct npu2 *npu;
+ struct phb *phb;
+ struct npu2_dev *dev;
+
+ /*
+ * Look for the npu2 structure for that chip ID. We can access it
+ * through the array of phbs, looking for a nvlink or opencapi
+ * phb. We can have several entries, but they all point
+ * to the same npu2 structure
+ */
+ for_each_phb(phb) {
+ npu = NULL;
+ if (phb->phb_type == phb_type_npu_v2) {
+ npu = phb_to_npu2_nvlink(phb);
+ } else if (phb->phb_type == phb_type_npu_v2_opencapi) {
+ dev = phb_to_npu2_dev_ocapi(phb);
+ npu = dev->npu;
+ }
+ if (npu && npu->chip_id == chip_id) {
+ show_all_regs(npu, -1 /* all bricks */);
+ break;
+ }
+ }
+}
+
static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
{
struct npu2 *p = is->data;
@@ -182,6 +415,7 @@ static void npu2_err_interrupt(struct irq_source *is, uint32_t isn)
brick = 2 + ((idx - 27) % 4);
prlog(PR_ERR, "NPU[%d] error interrupt for brick %d\n",
p->chip_id, brick);
+ show_all_regs(p, brick);
opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
OPAL_EVENT_PCI_ERROR);
break;
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index 939a23f5..ba10b8ea 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -203,6 +203,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_PERF_MASK 0x110
#define NPU2_DBG0_CFG 0x118
#define NPU2_DBG1_CFG 0x120
+#define NPU2_C_ERR_RPT_MSG5 0x128
+#define NPU2_C_ERR_RPT_MSG6 0x130
/* CTL block registers */
#define NPU2_CQ_CTL_MISC_CFG 0x000
@@ -295,10 +297,12 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_NTL_MISC_CFG3(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x008)
#define NPU2_NTL_ERR_HOLD1(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x010)
#define NPU2_NTL_ERR_MASK1(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x018)
+#define NPU2_NTL_ERR_FIRST1_OFF 0x020
#define NPU2_NTL_ERR_FIRST1(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x020)
#define NPU2_NTL_ERR_FIRST1_MASK(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x028)
#define NPU2_NTL_ERR_HOLD2(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x030)
#define NPU2_NTL_ERR_MASK2(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x038)
+#define NPU2_NTL_ERR_FIRST2_OFF 0x040
#define NPU2_NTL_ERR_FIRST2(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x040)
#define NPU2_NTL_ERR_FIRST2_MASK(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x048)
#define NPU2_NTL_SCRATCH2(ndev) NPU2_NTL_REG_OFFSET(ndev, 0x050)
@@ -402,6 +406,12 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_OTL_OSL_DAR(stack, block) NPU2_REG_OFFSET(stack, block, 0x008)
#define NPU2_OTL_OSL_TFC(stack, block) NPU2_REG_OFFSET(stack, block, 0x010)
#define NPU2_OTL_OSL_PEHANDLE(stack, block) NPU2_REG_OFFSET(stack, block, 0x018)
+#define NPU2_OTL_ERR_RPT_HOLD0 0x30
+#define NPU2_OTL_RAS_ERR_MSG0 0x68
+#define NPU2_OTL_RXI_ERR_SIG 0x70
+#define NPU2_OTL_RXO_ERR_SIG 0x78
+#define NPU2_OTL_ERR_RPT_HOLD1 0xB0
+
/* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above
* there is only a single instance of each of these in the NPU so we
diff --git a/include/npu2.h b/include/npu2.h
index ef4e7aff..d58aab47 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -248,4 +248,5 @@ int64_t npu2_freeze_status(struct phb *phb __unused,
uint8_t *freeze_state,
uint16_t *pci_error_type __unused,
uint16_t *severity __unused);
+void npu2_dump_scoms(int chip_id);
#endif /* __NPU2_H */
OpenPOWER on IntegriCloud