summaryrefslogtreecommitdiffstats
path: root/include/errorlog.h
blob: 285c185d0b86b0a6c95eae82e0e35de78f659905 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
/* Copyright 2013-2014 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *	http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef __ERRORLOG_H
#define __ERRORLOG_H

#include <compiler.h>
#include <opal.h>
#include <stdint.h>
#include <ccan/list/list.h>

/* Classification of error/events type reported on OPAL */
/* Platform Events/Errors: Report Machine Check Interrupt */
#define OPAL_PLATFORM_ERR_EVT		0x01
/* INPUT_OUTPUT: Report all I/O related events/errors */
#define OPAL_INPUT_OUTPUT_ERR_EVT	0x02
/* RESOURCE_DEALLOC: Hotplug events and errors */
#define OPAL_RESOURCE_DEALLOC_ERR_EVT	0x03
/* MISC: Miscellaneous error */
#define OPAL_MISC_ERR_EVT		0x04

/* OPAL Subsystem IDs listed for reporting events/errors */
#define OPAL_PROCESSOR_SUBSYSTEM	0x10
#define OPAL_MEMORY_SUBSYSTEM		0x20
#define OPAL_IO_SUBSYSTEM		0x30
#define OPAL_IO_DEVICES			0x40
#define OPAL_CEC_HARDWARE		0x50
#define OPAL_POWER_COOLING		0x60
#define OPAL_MISC_SUBSYSTEM		0x70
#define OPAL_SURVEILLANCE_ERR		0x7A
#define OPAL_PLATFORM_FIRMWARE		0x80
#define OPAL_SOFTWARE			0x90
#define OPAL_EXTERNAL_ENV		0xA0

/*
 * During reporting an event/error the following represents
 * how serious the logged event/error is. (Severity)
 */
#define OPAL_INFO						0x00
#define OPAL_RECOVERED_ERR_GENERAL				0x10

/* 0x2X series is to denote set of Predictive Error */
/* 0x20 Generic predictive error */
#define OPAL_PREDICTIVE_ERR_GENERAL				0x20
/* 0x21 Predictive error, degraded performance */
#define OPAL_PREDICTIVE_ERR_DEGRADED_PERF			0x21
/* 0x22 Predictive error, fault may be corrected after reboot */
#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT		0x22
/*
 * 0x23 Predictive error, fault may be corrected after reboot,
 * degraded performance
 */
#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_BOOT_DEGRADE_PERF	0x23
/* 0x24 Predictive error, loss of redundancy */
#define OPAL_PREDICTIVE_ERR_LOSS_OF_REDUNDANCY			0x24

/* 0x4X series for Unrecoverable Error */
/* 0x40 Generic Unrecoverable error */
#define OPAL_UNRECOVERABLE_ERR_GENERAL				0x40
/* 0x41 Unrecoverable error bypassed with degraded performance */
#define OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF			0x41
/* 0x44 Unrecoverable error bypassed with loss of redundancy */
#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY			0x44
/* 0x45 Unrecoverable error bypassed with loss of redundancy and performance */
#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY_PERF		0x45
/* 0x48 Unrecoverable error bypassed with loss of function */
#define OPAL_UNRECOVERABLE_ERR_LOSS_OF_FUNCTION			0x48
/* 0x50 In case of PANIC	*/
#define OPAL_ERROR_PANIC					0x50

/*
 * OPAL Event Sub-type
 * This field provides additional information on the non-error
 * event type
 */
#define OPAL_NA						0x00
#define OPAL_MISCELLANEOUS_INFO_ONLY			0x01
#define OPAL_PREV_REPORTED_ERR_RECTIFIED		0x10
#define OPAL_SYS_RESOURCES_DECONFIG_BY_USER		0x20
#define OPAL_SYS_RESOURCE_DECONFIG_PRIOR_ERR		0x21
#define OPAL_RESOURCE_DEALLOC_EVENT_NOTIFY		0x22
#define OPAL_CONCURRENT_MAINTENANCE_EVENT		0x40
#define OPAL_CAPACITY_UPGRADE_EVENT			0x60
#define OPAL_RESOURCE_SPARING_EVENT			0x70
#define OPAL_DYNAMIC_RECONFIG_EVENT			0x80
#define OPAL_NORMAL_SYS_PLATFORM_SHUTDOWN		0xD0
#define OPAL_ABNORMAL_POWER_OFF				0xE0

/* Max user dump size is 14K	*/
#define OPAL_LOG_MAX_DUMP	14336

/* Origin of error, elog_origin */
#define ORG_SAPPHIRE	1
#define ORG_POWERNV	2

/* Multiple user data sections */
struct __attribute__((__packed__))elog_user_data_section {
	uint32_t tag;
	uint16_t size;
	uint16_t component_id;
	char data_dump[1];
};

/*
 * All the information regarding an error/event to be reported
 * needs to populate this structure using pre-defined interfaces
 * only
 */
struct __attribute__((__packed__)) errorlog {

	uint16_t component_id;
	uint8_t error_event_type;
	uint8_t subsystem_id;

	uint8_t event_severity;
	uint8_t event_subtype;
	uint8_t user_section_count;
	uint8_t elog_origin;

	uint32_t user_section_size;
	uint32_t reason_code;
	uint32_t additional_info[4];

	uint32_t plid;
	uint32_t log_size;
	uint64_t elog_timeout;

	char user_data_dump[OPAL_LOG_MAX_DUMP];
	struct list_node link;
};

struct opal_err_info {
	uint32_t reason_code;
	uint8_t err_type;
	uint16_t cmp_id;
	uint8_t subsystem;
	uint8_t sev;
	uint8_t event_subtype;
};

/* Component IDs */
/* In PEL error log format, Creator ID is hypervisor
 * But we can have various component ID to distinguish
 * which component in hypervisor is reporting the error
 * This is 2 bytes long,
 *	first byte corresponds to Component IDs
 *	Second byte is reserved for the Reason code.
 * Component ID is mapped to readable 4-digit ascii
 * character name in FSP and displayed.
 */
/* SAPPHIRE components */
#define OPAL_CODEUPDATE				0x4355  /* CU */
#define OPAL_CONSOLE				0x434E  /* CN */
#define OPAL_CEC				0x4345  /* CE */
#define OPAL_CHIP				0x4348  /* CH */
#define OPAL_ELOG				0x454C  /* EL */
#define OPAL_NVRAM				0x4E56  /* NV */
#define OPAL_RTC				0x5254  /* RT */
#define OPAL_SURVEILLANCE			0x5355  /* SU */
#define OPAL_SYSPARAM				0x5350  /* SP */
#define OPAL_LPC				0x4C50  /* LP */
#define OPAL_UART				0x5541  /* UA */
#define OPAL_OCC				0x4F43  /* OC */
#define OPAL_OP_PANEL				0x4F50  /* OP */
#define OPAL_PHB3				0x5048  /* PH */
#define OPAL_PSI				0x5053  /* PS */
#define OPAL_VPD				0x5650  /* VP */
#define OPAL_XSCOM				0x5853  /* XS */
#define OPAL_PCI				0x5043  /* PC */
#define OPAL_MISC				0x4D49  /* MI */
#define OPAL_ATTN				0x4154  /* AT */
#define OPAL_MEM_ERR				0x4D45  /* ME */
#define OPAL_CENTAUR				0x4354  /* CT */
#define OPAL_MFSI				0x4D46  /* MF */
#define OPAL_DUMP				0x4455  /* DU */
#define OPAL_LED				0x4C45  /* LE */
#define OPAL_SENSOR				0x5345  /* SE */
#define OPAL_SLW				0x534C  /* SL */
#define OPAL_FSP				0x4650  /* FP */
#define OPAL_I2C				0x4943  /* IC */
#define OPAL_IPMI				0x4950  /* IP */

/* SAPPHIRE SRC component ID*/
#define OPAL_SRC_COMPONENT_CODE_UPDATE		0x1000
#define OPAL_SRC_COMPONENT_XSCOM		0x1100
#define OPAL_SRC_COMPONENT_PCI			0x1200
#define OPAL_SRC_COMPONENT_MISC			0x1300
#define OPAL_SRC_COMPONENT_ATTN			0x1400
#define OPAL_SRC_COMPONENT_MEM_ERR		0x1500
#define OPAL_SRC_COMPONENT_CENTAUR		0x1600
#define OPAL_SRC_COMPONENT_MFSI			0x1700
#define OPAL_SRC_COMPONENT_DUMP			0x1800
#define OPAL_SRC_COMPONENT_LED			0x1900
#define OPAL_SRC_COMPONENT_VPD			0x1a00
#define OPAL_SRC_COMPONENT_CONSOLE		0x1b00
#define OPAL_SRC_COMPONENT_SENSOR		0x2000
#define OPAL_SRC_COMPONENT_SLW			0x2100
#define OPAL_SRC_COMPONENT_FSP			0x2200
#define OPAL_SRC_COMPONENT_I2C			0x2300
#define OPAL_SRC_COMPONENT_IPMI			0x2400
#define OPAL_SRC_COMPONENT_CEC			0x3000
#define OPAL_SRC_COMPONENT_CHIP			0x4000
#define OPAL_SRC_COMPONENT_ELOG			0x5000
#define OPAL_SRC_COMPONENT_NVRAM		0x6000
#define OPAL_SRC_COMPONENT_RTC			0x7000
#define OPAL_SRC_COMPONENT_SURVEILLANCE		0x8000
#define OPAL_SRC_COMPONENT_SYSPARAM		0x9000
#define OPAL_SRC_COMPONENT_LPC			0xa000
#define OPAL_SRC_COMPONENT_UART			0xb000
#define OPAL_SRC_COMPONENT_OCC			0xc000
#define OPAL_SRC_COMPONENT_OP_PANEL		0xd000
#define OPAL_SRC_COMPONENT_PHB3			0xe000
#define OPAL_SRC_COMPONENT_PSI			0xf000

enum opal_reasoncode {
/* code update */
	OPAL_RC_CU_FLASH	    = OPAL_SRC_COMPONENT_CODE_UPDATE | 0x10,
	OPAL_RC_CU_INIT		    = OPAL_SRC_COMPONENT_CODE_UPDATE | 0x11,
	OPAL_RC_CU_SG_LIST	    = OPAL_SRC_COMPONENT_CODE_UPDATE | 0x12,
	OPAL_RC_CU_COMMIT	    = OPAL_SRC_COMPONENT_CODE_UPDATE | 0x13,
	OPAL_RC_CU_MSG		    = OPAL_SRC_COMPONENT_CODE_UPDATE | 0x14,
	OPAL_RC_CU_NOTIFY	    = OPAL_SRC_COMPONENT_CODE_UPDATE | 0x15,
	OPAL_RC_CU_MARKER_LID	    = OPAL_SRC_COMPONENT_CODE_UPDATE | 0x16,
/* NVRAM */
	OPAL_RC_NVRAM_INIT	    = OPAL_SRC_COMPONENT_NVRAM | 0x10,
	OPAL_RC_NVRAM_OPEN	    = OPAL_SRC_COMPONENT_NVRAM | 0x11,
	OPAL_RC_NVRAM_SIZE	    = OPAL_SRC_COMPONENT_NVRAM | 0x12,
	OPAL_RC_NVRAM_WRITE	    = OPAL_SRC_COMPONENT_NVRAM | 0x13,
	OPAL_RC_NVRAM_READ	    = OPAL_SRC_COMPONENT_NVRAM | 0x14,
/* CENTAUR */
	OPAL_RC_CENTAUR_INIT	    = OPAL_SRC_COMPONENT_CENTAUR | 0x10,
	OPAL_RC_CENTAUR_RW_ERR	    = OPAL_SRC_COMPONENT_CENTAUR | 0x11,
/* MFSI */
	OPAL_RC_MFSI_RW_ERR	    = OPAL_SRC_COMPONENT_MFSI | 0x10,
/* UART */
	OPAL_RC_UART_INIT	    = OPAL_SRC_COMPONENT_UART | 0x10,
/* OCC */
	OPAL_RC_OCC_RESET	    = OPAL_SRC_COMPONENT_OCC | 0x10,
	OPAL_RC_OCC_LOAD	    = OPAL_SRC_COMPONENT_OCC | 0x11,
	OPAL_RC_OCC_PSTATE_INIT	    = OPAL_SRC_COMPONENT_OCC | 0x12,
	OPAL_RC_OCC_TIMEOUT	    = OPAL_SRC_COMPONENT_OCC | 0x13,
/* RTC	*/
	OPAL_RC_RTC_READ	    = OPAL_SRC_COMPONENT_RTC | 0x10,
	OPAL_RC_RTC_TOD		    = OPAL_SRC_COMPONENT_RTC | 0x11,
/* SURVEILLANCE */
	OPAL_RC_SURVE_INIT	    = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x10,
	OPAL_RC_SURVE_STATUS	    = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x11,
	OPAL_RC_SURVE_ACK	    = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x12,
	OPAL_INJECTED_HIR	    = OPAL_SRC_COMPONENT_SURVEILLANCE | 0x13,
/* SYSPARAM */
	OPAL_RC_SYSPARM_INIT	    = OPAL_SRC_COMPONENT_SYSPARAM | 0x10,
	OPAL_RC_SYSPARM_MSG	    = OPAL_SRC_COMPONENT_SYSPARAM | 0x11,
/* LPC */
	OPAL_RC_LPC_READ	    = OPAL_SRC_COMPONENT_LPC | 0x10,
	OPAL_RC_LPC_WRITE	    = OPAL_SRC_COMPONENT_LPC | 0x11,
	OPAL_RC_LPC_SYNC	    = OPAL_SRC_COMPONENT_LPC | 0x12,
	OPAL_RC_LPC_SYNC_PERF	    = OPAL_SRC_COMPONENT_LPC | 0x13,
/* OP_PANEL */
	OPAL_RC_PANEL_WRITE	    = OPAL_SRC_COMPONENT_OP_PANEL | 0x10,
/* PSI */
	OPAL_RC_PSI_INIT	    = OPAL_SRC_COMPONENT_PSI | 0x10,
	OPAL_RC_PSI_IRQ_RESET	    = OPAL_SRC_COMPONENT_PSI | 0x11,
	OPAL_RC_PSI_TIMEOUT	    = OPAL_SRC_COMPONENT_PSI | 0X12,
/* XSCOM */
	OPAL_RC_XSCOM_RW	    = OPAL_SRC_COMPONENT_XSCOM | 0x10,
	OPAL_RC_XSCOM_INDIRECT_RW   = OPAL_SRC_COMPONENT_XSCOM | 0x11,
	OPAL_RC_XSCOM_RESET	    = OPAL_SRC_COMPONENT_XSCOM | 0x12,
	OPAL_RC_XSCOM_BUSY	    = OPAL_SRC_COMPONENT_XSCOM | 0x13,
/* PCI */
	OPAL_RC_PCI_INIT_SLOT	    = OPAL_SRC_COMPONENT_PCI | 0x10,
	OPAL_RC_PCI_ADD_SLOT	    = OPAL_SRC_COMPONENT_PCI | 0x11,
	OPAL_RC_PCI_SCAN	    = OPAL_SRC_COMPONENT_PCI | 0x12,
	OPAL_RC_PCI_RESET_PHB	    = OPAL_SRC_COMPONENT_PCI | 0x10,
/* ATTN */
	OPAL_RC_ATTN		    = OPAL_SRC_COMPONENT_ATTN | 0x10,
/* MEM_ERR */
	OPAL_RC_MEM_ERR_RES	    = OPAL_SRC_COMPONENT_MEM_ERR | 0x10,
	OPAL_RC_MEM_ERR_DEALLOC	    = OPAL_SRC_COMPONENT_MEM_ERR | 0x11,
/* DUMP */
	OPAL_RC_DUMP_INIT	    = OPAL_SRC_COMPONENT_DUMP | 0x10,
	OPAL_RC_DUMP_LIST	    = OPAL_SRC_COMPONENT_DUMP | 0x11,
	OPAL_RC_DUMP_ACK	    = OPAL_SRC_COMPONENT_DUMP | 0x12,
	OPAL_RC_DUMP_MDST_INIT	    = OPAL_SRC_COMPONENT_DUMP | 0x13,
	OPAL_RC_DUMP_MDST_UPDATE    = OPAL_SRC_COMPONENT_DUMP | 0x14,
	OPAL_RC_DUMP_MDST_ADD	    = OPAL_SRC_COMPONENT_DUMP | 0x15,
	OPAL_RC_DUMP_MDST_REMOVE    = OPAL_SRC_COMPONENT_DUMP | 0x16,
/* LED	*/
	OPAL_RC_LED_SPCN	    = OPAL_SRC_COMPONENT_LED | 0x10,
	OPAL_RC_LED_BUFF	    = OPAL_SRC_COMPONENT_LED | 0x11,
	OPAL_RC_LED_LC		    = OPAL_SRC_COMPONENT_LED | 0x12,
	OPAL_RC_LED_STATE	    = OPAL_SRC_COMPONENT_LED | 0x13,
	OPAL_RC_LED_SUPPORT	    = OPAL_SRC_COMPONENT_LED | 0x14,
/* SENSOR */
	OPAL_RC_SENSOR_INIT	    = OPAL_SRC_COMPONENT_SENSOR | 0x10,
	OPAL_RC_SENSOR_READ	    = OPAL_SRC_COMPONENT_SENSOR | 0x11,
	OPAL_RC_SENSOR_ASYNC_COMPLETE
				    = OPAL_SRC_COMPONENT_SENSOR | 0x12,
/* SLW */
	OPAL_RC_SLW_INIT	    = OPAL_SRC_COMPONENT_SLW | 0x10,
	OPAL_RC_SLW_SET		    = OPAL_SRC_COMPONENT_SLW | 0x11,
	OPAL_RC_SLW_GET		    = OPAL_SRC_COMPONENT_SLW | 0x12,
	OPAL_RC_SLW_REG		    = OPAL_SRC_COMPONENT_SLW | 0x13,
/* FSP	*/
	OPAL_RC_FSP_POLL_TIMEOUT    = OPAL_SRC_COMPONENT_FSP | 0x10,
	OPAL_RC_FSP_MBOX_ERR	    = OPAL_SRC_COMPONENT_FSP | 0x11,
	OPAL_RC_FSP_DISR_HIR_MASK   = OPAL_SRC_COMPONENT_FSP | 0x12,
/* I2C */
	OPAL_RC_I2C_INIT	    = OPAL_SRC_COMPONENT_I2C | 0X10,
	OPAL_RC_I2C_START_REQ	    = OPAL_SRC_COMPONENT_I2C | 0X11,
	OPAL_RC_I2C_TIMEOUT	    = OPAL_SRC_COMPONENT_I2C | 0x12,
	OPAL_RC_I2C_TRANSFER	    = OPAL_SRC_COMPONENT_I2C | 0x13,
	OPAL_RC_I2C_RESET	    = OPAL_SRC_COMPONENT_I2C | 0x14,

/* IPMI */
	OPAL_RC_IPMI_REQ	    = OPAL_SRC_COMPONENT_IPMI | 0x10,
	OPAL_RC_IPMI_RESP	    = OPAL_SRC_COMPONENT_IPMI | 0x11,
	OPAL_RC_IPMI_DMA_ERROR_RESP
				    = OPAL_SRC_COMPONENT_IPMI | 0x12,

/* Platform error */
	OPAL_RC_ABNORMAL_REBOOT	    = OPAL_SRC_COMPONENT_CEC | 0x10,

/* FSP console */
	OPAL_RC_CONSOLE_HANG	    = OPAL_SRC_COMPONENT_CONSOLE | 0x10,
};

#define DEFINE_LOG_ENTRY(reason, type, id, subsys,			\
severity, subtype) static struct opal_err_info err_##reason =		\
{ .reason_code = reason, .err_type = type, .cmp_id = id,		\
.subsystem = subsys, .sev = severity, .event_subtype = subtype }

/* This is wrapper around the error log function, which creates
 * and commits the error to FSP.
 * Used for simple error logging.
 * Returns a Log ID, if an error involves a service processor needing
 * to be kicked, this logid can be sent to the service processor explaining
 * *why* we kicked it. Log Id = -1 on error.
 */
uint32_t log_simple_error(struct opal_err_info *e_info,
		const char *fmt, ...) __attribute__ ((format (printf, 2, 3)));

#define e_info(reason_code) err_##reason_code

struct errorlog *opal_elog_create(struct opal_err_info *e_info,
				  uint32_t tag) __warn_unused_result;
void log_add_section(struct errorlog *buf, uint32_t tag);
void log_append_data(struct errorlog *buf, unsigned char *data, uint16_t size);
void log_append_msg(struct errorlog *buf,
		const char *fmt, ...) __attribute__ ((format (printf, 2, 3)));
void log_commit(struct errorlog *elog);

/* Called by the backend after an error has been logged by the
 * backend. If the error could not be logged successfully success is
 * set to false. */
void opal_elog_complete(struct errorlog *elog, bool success);

int elog_init(void);

#endif /* __ERRORLOG_H */
OpenPOWER on IntegriCloud