diff options
Diffstat (limited to 'drivers/pci/pcie')
| -rw-r--r-- | drivers/pci/pcie/Kconfig | 41 | ||||
| -rw-r--r-- | drivers/pci/pcie/Makefile | 5 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer.c | 1377 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer/Kconfig | 29 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer/Kconfig.debug | 19 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer/Makefile | 13 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer/aerdrv.c | 374 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer/aerdrv.h | 125 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer/aerdrv_acpi.c | 141 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer/aerdrv_core.c | 809 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer/aerdrv_errprint.c | 254 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer/ecrc.c | 117 | ||||
| -rw-r--r-- | drivers/pci/pcie/aer_inject.c (renamed from drivers/pci/pcie/aer/aer_inject.c) | 3 | ||||
| -rw-r--r-- | drivers/pci/pcie/aspm.c | 9 | ||||
| -rw-r--r-- | drivers/pci/pcie/dpc.c | 75 | ||||
| -rw-r--r-- | drivers/pci/pcie/err.c | 390 | ||||
| -rw-r--r-- | drivers/pci/pcie/portdrv.h | 20 | ||||
| -rw-r--r-- | drivers/pci/pcie/portdrv_acpi.c | 57 | ||||
| -rw-r--r-- | drivers/pci/pcie/portdrv_core.c | 71 |
19 files changed, 1947 insertions, 1982 deletions
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index b12e28b3d8f9..0a1e9d379bc5 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -23,7 +23,42 @@ config HOTPLUG_PCI_PCIE When in doubt, say N. -source "drivers/pci/pcie/aer/Kconfig" +config PCIEAER + bool "PCI Express Advanced Error Reporting support" + depends on PCIEPORTBUS + select RAS + default y + help + This enables PCI Express Root Port Advanced Error Reporting + (AER) driver support. Error reporting messages sent to Root + Port will be handled by PCI Express AER driver. + +config PCIEAER_INJECT + tristate "PCI Express error injection support" + depends on PCIEAER + default n + help + This enables PCI Express Root Port Advanced Error Reporting + (AER) software error injector. + + Debugging AER code is quite difficult because it is hard + to trigger various real hardware errors. Software-based + error injection can fake almost all kinds of errors with the + help of a user space helper tool aer-inject, which can be + gotten from: + http://www.kernel.org/pub/linux/utils/pci/aer-inject/ + +# +# PCI Express ECRC +# +config PCIE_ECRC + bool "PCI Express ECRC settings control" + depends on PCIEAER + help + Used to override firmware/bios settings for PCI Express ECRC + (transaction layer end-to-end CRC checking). + + When in doubt, say N. # # PCI Express ASPM @@ -92,7 +127,7 @@ config PCIE_PME depends on PCIEPORTBUS && PM config PCIE_DPC - bool "PCIe Downstream Port Containment support" + bool "PCI Express Downstream Port Containment support" depends on PCIEPORTBUS && PCIEAER default n help @@ -103,7 +138,7 @@ config PCIE_DPC it is safe to answer N. config PCIE_PTM - bool "PCIe Precision Time Measurement support" + bool "PCI Express Precision Time Measurement support" default n depends on PCIEPORTBUS help diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 800e1d404a45..ab514083d5d4 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,12 +2,13 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o +pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o obj-$(CONFIG_PCIEASPM) += aspm.o -obj-$(CONFIG_PCIEAER) += aer/ +obj-$(CONFIG_PCIEAER) += aer.o +obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o obj-$(CONFIG_PCIE_PTM) += ptm.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c new file mode 100644 index 000000000000..a2e88386af28 --- /dev/null +++ b/drivers/pci/pcie/aer.c @@ -0,0 +1,1377 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implement the AER root port service driver. The driver registers an IRQ + * handler. When a root port triggers an AER interrupt, the IRQ handler + * collects root port status and schedules work. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + * + * (C) Copyright 2009 Hewlett-Packard Development Company, L.P. + * Andrew Patterson <andrew.patterson@hp.com> + */ + +#include <linux/cper.h> +#include <linux/pci.h> +#include <linux/pci-acpi.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/pm.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/kfifo.h> +#include <linux/slab.h> +#include <acpi/apei.h> +#include <ras/ras_event.h> + +#include "../pci.h" +#include "portdrv.h" + +#define AER_ERROR_SOURCES_MAX 100 +#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ + +struct aer_err_info { + struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; + int error_dev_num; + + unsigned int id:16; + + unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ + unsigned int __pad1:5; + unsigned int multi_error_valid:1; + + unsigned int first_error:5; + unsigned int __pad2:2; + unsigned int tlp_header_valid:1; + + unsigned int status; /* COR/UNCOR Error Status */ + unsigned int mask; /* COR/UNCOR Error Mask */ + struct aer_header_log_regs tlp; /* TLP Header */ +}; + +struct aer_err_source { + unsigned int status; + unsigned int id; +}; + +struct aer_rpc { + struct pci_dev *rpd; /* Root Port device */ + struct work_struct dpc_handler; + struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX]; + struct aer_err_info e_info; + unsigned short prod_idx; /* Error Producer Index */ + unsigned short cons_idx; /* Error Consumer Index */ + int isr; + spinlock_t e_lock; /* + * Lock access to Error Status/ID Regs + * and error producer/consumer index + */ + struct mutex rpc_mutex; /* + * only one thread could do + * recovery on the same + * root port hierarchy + */ +}; + +#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ + PCI_ERR_UNC_ECRC| \ + PCI_ERR_UNC_UNSUP| \ + PCI_ERR_UNC_COMP_ABORT| \ + PCI_ERR_UNC_UNX_COMP| \ + PCI_ERR_UNC_MALF_TLP) + +#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ + PCI_EXP_RTCTL_SENFEE| \ + PCI_EXP_RTCTL_SEFEE) +#define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ + PCI_ERR_ROOT_CMD_NONFATAL_EN| \ + PCI_ERR_ROOT_CMD_FATAL_EN) +#define ERR_COR_ID(d) (d & 0xffff) +#define ERR_UNCOR_ID(d) (d >> 16) + +static int pcie_aer_disable; + +void pci_no_aer(void) +{ + pcie_aer_disable = 1; +} + +bool pci_aer_available(void) +{ + return !pcie_aer_disable && pci_msi_enabled(); +} + +#ifdef CONFIG_PCIE_ECRC + +#define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */ +#define ECRC_POLICY_OFF 1 /* ECRC off for performance */ +#define ECRC_POLICY_ON 2 /* ECRC on for data integrity */ + +static int ecrc_policy = ECRC_POLICY_DEFAULT; + +static const char *ecrc_policy_str[] = { + [ECRC_POLICY_DEFAULT] = "bios", + [ECRC_POLICY_OFF] = "off", + [ECRC_POLICY_ON] = "on" +}; + +/** + * enable_ercr_checking - enable PCIe ECRC checking for a device + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +static int enable_ecrc_checking(struct pci_dev *dev) +{ + int pos; + u32 reg32; + + if (!pci_is_pcie(dev)) + return -ENODEV; + + pos = dev->aer_cap; + if (!pos) + return -ENODEV; + + pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); + if (reg32 & PCI_ERR_CAP_ECRC_GENC) + reg32 |= PCI_ERR_CAP_ECRC_GENE; + if (reg32 & PCI_ERR_CAP_ECRC_CHKC) + reg32 |= PCI_ERR_CAP_ECRC_CHKE; + pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); + + return 0; +} + +/** + * disable_ercr_checking - disables PCIe ECRC checking for a device + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +static int disable_ecrc_checking(struct pci_dev *dev) +{ + int pos; + u32 reg32; + + if (!pci_is_pcie(dev)) + return -ENODEV; + + pos = dev->aer_cap; + if (!pos) + return -ENODEV; + + pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); + reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); + + return 0; +} + +/** + * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy + * @dev: the PCI device + */ +void pcie_set_ecrc_checking(struct pci_dev *dev) +{ + switch (ecrc_policy) { + case ECRC_POLICY_DEFAULT: + return; + case ECRC_POLICY_OFF: + disable_ecrc_checking(dev); + break; + case ECRC_POLICY_ON: + enable_ecrc_checking(dev); + break; + default: + return; + } +} + +/** + * pcie_ecrc_get_policy - parse kernel command-line ecrc option + */ +void pcie_ecrc_get_policy(char *str) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ecrc_policy_str); i++) + if (!strncmp(str, ecrc_policy_str[i], + strlen(ecrc_policy_str[i]))) + break; + if (i >= ARRAY_SIZE(ecrc_policy_str)) + return; + + ecrc_policy = i; +} +#endif /* CONFIG_PCIE_ECRC */ + +#ifdef CONFIG_ACPI_APEI +static inline int hest_match_pci(struct acpi_hest_aer_common *p, + struct pci_dev *pci) +{ + return ACPI_HEST_SEGMENT(p->bus) == pci_domain_nr(pci->bus) && + ACPI_HEST_BUS(p->bus) == pci->bus->number && + p->device == PCI_SLOT(pci->devfn) && + p->function == PCI_FUNC(pci->devfn); +} + +static inline bool hest_match_type(struct acpi_hest_header *hest_hdr, + struct pci_dev *dev) +{ + u16 hest_type = hest_hdr->type; + u8 pcie_type = pci_pcie_type(dev); + + if ((hest_type == ACPI_HEST_TYPE_AER_ROOT_PORT && + pcie_type == PCI_EXP_TYPE_ROOT_PORT) || + (hest_type == ACPI_HEST_TYPE_AER_ENDPOINT && + pcie_type == PCI_EXP_TYPE_ENDPOINT) || + (hest_type == ACPI_HEST_TYPE_AER_BRIDGE && + (dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)) + return true; + return false; +} + +struct aer_hest_parse_info { + struct pci_dev *pci_dev; + int firmware_first; +}; + +static int hest_source_is_pcie_aer(struct acpi_hest_header *hest_hdr) +{ + if (hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT || + hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT || + hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE) + return 1; + return 0; +} + +static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data) +{ + struct aer_hest_parse_info *info = data; + struct acpi_hest_aer_common *p; + int ff; + + if (!hest_source_is_pcie_aer(hest_hdr)) + return 0; + + p = (struct acpi_hest_aer_common *)(hest_hdr + 1); + ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); + + /* + * If no specific device is supplied, determine whether + * FIRMWARE_FIRST is set for *any* PCIe device. + */ + if (!info->pci_dev) { + info->firmware_first |= ff; + return 0; + } + + /* Otherwise, check the specific device */ + if (p->flags & ACPI_HEST_GLOBAL) { + if (hest_match_type(hest_hdr, info->pci_dev)) + info->firmware_first = ff; + } else + if (hest_match_pci(p, info->pci_dev)) + info->firmware_first = ff; + + return 0; +} + +static void aer_set_firmware_first(struct pci_dev *pci_dev) +{ + int rc; + struct aer_hest_parse_info info = { + .pci_dev = pci_dev, + .firmware_first = 0, + }; + + rc = apei_hest_parse(aer_hest_parse, &info); + + if (rc) + pci_dev->__aer_firmware_first = 0; + else + pci_dev->__aer_firmware_first = info.firmware_first; + pci_dev->__aer_firmware_first_valid = 1; +} + +int pcie_aer_get_firmware_first(struct pci_dev *dev) +{ + if (!pci_is_pcie(dev)) + return 0; + + if (!dev->__aer_firmware_first_valid) + aer_set_firmware_first(dev); + return dev->__aer_firmware_first; +} +#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) + +static bool aer_firmware_first; + +/** + * aer_acpi_firmware_first - Check if APEI should control AER. + */ +bool aer_acpi_firmware_first(void) +{ + static bool parsed = false; + struct aer_hest_parse_info info = { + .pci_dev = NULL, /* Check all PCIe devices */ + .firmware_first = 0, + }; + + if (!parsed) { + apei_hest_parse(aer_hest_parse, &info); + aer_firmware_first = info.firmware_first; + parsed = true; + } + return aer_firmware_first; +} +#endif + +#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) + +int pci_enable_pcie_error_reporting(struct pci_dev *dev) +{ + if (pcie_aer_get_firmware_first(dev)) + return -EIO; + + if (!dev->aer_cap) + return -EIO; + + return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); +} +EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); + +int pci_disable_pcie_error_reporting(struct pci_dev *dev) +{ + if (pcie_aer_get_firmware_first(dev)) + return -EIO; + + return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, + PCI_EXP_AER_FLAGS); +} +EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); + +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) +{ + int pos; + u32 status; + + pos = dev->aer_cap; + if (!pos) + return -EIO; + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + if (status) + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); + +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +{ + int pos; + u32 status; + int port_type; + + if (!pci_is_pcie(dev)) + return -ENODEV; + + pos = dev->aer_cap; + if (!pos) + return -EIO; + + port_type = pci_pcie_type(dev); + if (port_type == PCI_EXP_TYPE_ROOT_PORT) { + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); + } + + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); + + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); + + return 0; +} + +int pci_aer_init(struct pci_dev *dev) +{ + dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + return pci_cleanup_aer_error_status_regs(dev); +} + +#define AER_AGENT_RECEIVER 0 +#define AER_AGENT_REQUESTER 1 +#define AER_AGENT_COMPLETER 2 +#define AER_AGENT_TRANSMITTER 3 + +#define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \ + 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP)) +#define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \ + 0 : PCI_ERR_UNC_COMP_ABORT) +#define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \ + (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0) + +#define AER_GET_AGENT(t, e) \ + ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \ + (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \ + (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \ + AER_AGENT_RECEIVER) + +#define AER_PHYSICAL_LAYER_ERROR 0 +#define AER_DATA_LINK_LAYER_ERROR 1 +#define AER_TRANSACTION_LAYER_ERROR 2 + +#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ + PCI_ERR_COR_RCVR : 0) +#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ + (PCI_ERR_COR_BAD_TLP| \ + PCI_ERR_COR_BAD_DLLP| \ + PCI_ERR_COR_REP_ROLL| \ + PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP) + +#define AER_GET_LAYER_ERROR(t, e) \ + ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \ + (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ + AER_TRANSACTION_LAYER_ERROR) + +/* + * AER error strings + */ +static const char *aer_error_severity_string[] = { + "Uncorrected (Non-Fatal)", + "Uncorrected (Fatal)", + "Corrected" +}; + +static const char *aer_error_layer[] = { + "Physical Layer", + "Data Link Layer", + "Transaction Layer" +}; + +static const char *aer_correctable_error_string[] = { + "Receiver Error", /* Bit Position 0 */ + NULL, + NULL, + NULL, + NULL, + NULL, + "Bad TLP", /* Bit Position 6 */ + "Bad DLLP", /* Bit Position 7 */ + "RELAY_NUM Rollover", /* Bit Position 8 */ + NULL, + NULL, + NULL, + "Replay Timer Timeout", /* Bit Position 12 */ + "Advisory Non-Fatal", /* Bit Position 13 */ + "Corrected Internal Error", /* Bit Position 14 */ + "Header Log Overflow", /* Bit Position 15 */ +}; + +static const char *aer_uncorrectable_error_string[] = { + "Undefined", /* Bit Position 0 */ + NULL, + NULL, + NULL, + "Data Link Protocol", /* Bit Position 4 */ + "Surprise Down Error", /* Bit Position 5 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Poisoned TLP", /* Bit Position 12 */ + "Flow Control Protocol", /* Bit Position 13 */ + "Completion Timeout", /* Bit Position 14 */ + "Completer Abort", /* Bit Position 15 */ + "Unexpected Completion", /* Bit Position 16 */ + "Receiver Overflow", /* Bit Position 17 */ + "Malformed TLP", /* Bit Position 18 */ + "ECRC", /* Bit Position 19 */ + "Unsupported Request", /* Bit Position 20 */ + "ACS Violation", /* Bit Position 21 */ + "Uncorrectable Internal Error", /* Bit Position 22 */ + "MC Blocked TLP", /* Bit Position 23 */ + "AtomicOp Egress Blocked", /* Bit Position 24 */ + "TLP Prefix Blocked Error", /* Bit Position 25 */ +}; + +static const char *aer_agent_string[] = { + "Receiver ID", + "Requester ID", + "Completer ID", + "Transmitter ID" +}; + +static void __print_tlp_header(struct pci_dev *dev, + struct aer_header_log_regs *t) +{ + pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", + t->dw0, t->dw1, t->dw2, t->dw3); +} + +static void __aer_print_error(struct pci_dev *dev, + struct aer_err_info *info) +{ + int i, status; + const char *errmsg = NULL; + status = (info->status & ~info->mask); + + for (i = 0; i < 32; i++) { + if (!(status & (1 << i))) + continue; + + if (info->severity == AER_CORRECTABLE) + errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ? + aer_correctable_error_string[i] : NULL; + else + errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ? + aer_uncorrectable_error_string[i] : NULL; + + if (errmsg) + pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, + info->first_error == i ? " (First)" : ""); + else + pci_err(dev, " [%2d] Unknown Error Bit%s\n", + i, info->first_error == i ? " (First)" : ""); + } +} + +static void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) +{ + int layer, agent; + int id = ((dev->bus->number << 8) | dev->devfn); + + if (!info->status) { + pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", + aer_error_severity_string[info->severity]); + goto out; + } + + layer = AER_GET_LAYER_ERROR(info->severity, info->status); + agent = AER_GET_AGENT(info->severity, info->status); + + pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", + aer_error_severity_string[info->severity], + aer_error_layer[layer], aer_agent_string[agent]); + + pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", + dev->vendor, dev->device, + info->status, info->mask); + + __aer_print_error(dev, info); + + if (info->tlp_header_valid) + __print_tlp_header(dev, &info->tlp); + +out: + if (info->id && info->error_dev_num > 1 && info->id == id) + pci_err(dev, " Error of this Agent is reported first\n"); + + trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), + info->severity, info->tlp_header_valid, &info->tlp); +} + +static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) +{ + u8 bus = info->id >> 8; + u8 devfn = info->id & 0xff; + + pci_info(dev, "AER: %s%s error received: %04x:%02x:%02x.%d\n", + info->multi_error_valid ? "Multiple " : "", + aer_error_severity_string[info->severity], + pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); +} + +#ifdef CONFIG_ACPI_APEI_PCIEAER +int cper_severity_to_aer(int cper_severity) +{ + switch (cper_severity) { + case CPER_SEV_RECOVERABLE: + return AER_NONFATAL; + case CPER_SEV_FATAL: + return AER_FATAL; + default: + return AER_CORRECTABLE; + } +} +EXPORT_SYMBOL_GPL(cper_severity_to_aer); + +void cper_print_aer(struct pci_dev *dev, int aer_severity, + struct aer_capability_regs *aer) +{ + int layer, agent, tlp_header_valid = 0; + u32 status, mask; + struct aer_err_info info; + + if (aer_severity == AER_CORRECTABLE) { + status = aer->cor_status; + mask = aer->cor_mask; + } else { + status = aer->uncor_status; + mask = aer->uncor_mask; + tlp_header_valid = status & AER_LOG_TLP_MASKS; + } + + layer = AER_GET_LAYER_ERROR(aer_severity, status); + agent = AER_GET_AGENT(aer_severity, status); + + memset(&info, 0, sizeof(info)); + info.severity = aer_severity; + info.status = status; + info.mask = mask; + info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); + + pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); + __aer_print_error(dev, &info); + pci_err(dev, "aer_layer=%s, aer_agent=%s\n", + aer_error_layer[layer], aer_agent_string[agent]); + + if (aer_severity != AER_CORRECTABLE) + pci_err(dev, "aer_uncor_severity: 0x%08x\n", + aer->uncor_severity); + + if (tlp_header_valid) + __print_tlp_header(dev, &aer->header_log); + + trace_aer_event(dev_name(&dev->dev), (status & ~mask), + aer_severity, tlp_header_valid, &aer->header_log); +} +#endif + +/** + * add_error_device - list device to be handled + * @e_info: pointer to error info + * @dev: pointer to pci_dev to be added + */ +static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) +{ + if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { + e_info->dev[e_info->error_dev_num] = dev; + e_info->error_dev_num++; + return 0; + } + return -ENOSPC; +} + +/** + * is_error_source - check whether the device is source of reported error + * @dev: pointer to pci_dev to be checked + * @e_info: pointer to reported error info + */ +static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) +{ + int pos; + u32 status, mask; + u16 reg16; + + /* + * When bus id is equal to 0, it might be a bad id + * reported by root port. + */ + if ((PCI_BUS_NUM(e_info->id) != 0) && + !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { + /* Device ID match? */ + if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) + return true; + + /* Continue id comparing if there is no multiple error */ + if (!e_info->multi_error_valid) + return false; + } + + /* + * When either + * 1) bus id is equal to 0. Some ports might lose the bus + * id of error source id; + * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set + * 3) There are multiple errors and prior ID comparing fails; + * We check AER status registers to find possible reporter. + */ + if (atomic_read(&dev->enable_cnt) == 0) + return false; + + /* Check if AER is enabled */ + pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16); + if (!(reg16 & PCI_EXP_AER_FLAGS)) + return false; + + pos = dev->aer_cap; + if (!pos) + return false; + + /* Check if error is recorded */ + if (e_info->severity == AER_CORRECTABLE) { + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask); + } else { + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); + } + if (status & ~mask) + return true; + + return false; +} + +static int find_device_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *e_info = (struct aer_err_info *)data; + + if (is_error_source(dev, e_info)) { + /* List this device */ + if (add_error_device(e_info, dev)) { + /* We cannot handle more... Stop iteration */ + /* TODO: Should print error message here? */ + return 1; + } + + /* If there is only a single error, stop iteration */ + if (!e_info->multi_error_valid) + return 1; + } + return 0; +} + +/** + * find_source_device - search through device hierarchy for source device + * @parent: pointer to Root Port pci_dev data structure + * @e_info: including detailed error information such like id + * + * Return true if found. + * + * Invoked by DPC when error is detected at the Root Port. + * Caller of this function must set id, severity, and multi_error_valid of + * struct aer_err_info pointed by @e_info properly. This function must fill + * e_info->error_dev_num and e_info->dev[], based on the given information. + */ +static bool find_source_device(struct pci_dev *parent, + struct aer_err_info *e_info) +{ + struct pci_dev *dev = parent; + int result; + + /* Must reset in this function */ + e_info->error_dev_num = 0; + + /* Is Root Port an agent that sends error message? */ + result = find_device_iter(dev, e_info); + if (result) + return true; + + pci_walk_bus(parent->subordinate, find_device_iter, e_info); + + if (!e_info->error_dev_num) { + pci_printk(KERN_DEBUG, parent, "can't find device of ID%04x\n", + e_info->id); + return false; + } + return true; +} + +/** + * handle_error_source - handle logging error into an event log + * @dev: pointer to pci_dev data structure of error source device + * @info: comprehensive error information + * + * Invoked when an error being detected by Root Port. + */ +static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) +{ + int pos; + + if (info->severity == AER_CORRECTABLE) { + /* + * Correctable error does not need software intervention. + * No need to go through error recovery process. + */ + pos = dev->aer_cap; + if (pos) + pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, + info->status); + } else if (info->severity == AER_NONFATAL) + pcie_do_nonfatal_recovery(dev); + else if (info->severity == AER_FATAL) + pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER); +} + +#ifdef CONFIG_ACPI_APEI_PCIEAER + +#define AER_RECOVER_RING_ORDER 4 +#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER) + +struct aer_recover_entry { + u8 bus; + u8 devfn; + u16 domain; + int severity; + struct aer_capability_regs *regs; +}; + +static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, + AER_RECOVER_RING_SIZE); + +static void aer_recover_work_func(struct work_struct *work) +{ + struct aer_recover_entry entry; + struct pci_dev *pdev; + + while (kfifo_get(&aer_recover_ring, &entry)) { + pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, + entry.devfn); + if (!pdev) { + pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", + entry.domain, entry.bus, + PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); + continue; + } + cper_print_aer(pdev, entry.severity, entry.regs); + if (entry.severity == AER_NONFATAL) + pcie_do_nonfatal_recovery(pdev); + else if (entry.severity == AER_FATAL) + pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_AER); + pci_dev_put(pdev); + } +} + +/* + * Mutual exclusion for writers of aer_recover_ring, reader side don't + * need lock, because there is only one reader and lock is not needed + * between reader and writer. + */ +static DEFINE_SPINLOCK(aer_recover_ring_lock); +static DECLARE_WORK(aer_recover_work, aer_recover_work_func); + +void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, + int severity, struct aer_capability_regs *aer_regs) +{ + unsigned long flags; + struct aer_recover_entry entry = { + .bus = bus, + .devfn = devfn, + .domain = domain, + .severity = severity, + .regs = aer_regs, + }; + + spin_lock_irqsave(&aer_recover_ring_lock, flags); + if (kfifo_put(&aer_recover_ring, entry)) + schedule_work(&aer_recover_work); + else + pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", + domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + spin_unlock_irqrestore(&aer_recover_ring_lock, flags); +} +EXPORT_SYMBOL_GPL(aer_recover_queue); +#endif + +/** + * get_device_error_info - read error status from dev and store it to info + * @dev: pointer to the device expected to have a error record + * @info: pointer to structure to store the error record + * + * Return 1 on success, 0 on error. + * + * Note that @info is reused among all error devices. Clear fields properly. + */ +static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) +{ + int pos, temp; + + /* Must reset in this function */ + info->status = 0; + info->tlp_header_valid = 0; + + pos = dev->aer_cap; + + /* The device might not support AER */ + if (!pos) + return 0; + + if (info->severity == AER_CORRECTABLE) { + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, + &info->status); + pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, + &info->mask); + if (!(info->status & ~info->mask)) + return 0; + } else if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + info->severity == AER_NONFATAL) { + + /* Link is still healthy for IO reads */ + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, + &info->status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, + &info->mask); + if (!(info->status & ~info->mask)) + return 0; + + /* Get First Error Pointer */ + pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); + info->first_error = PCI_ERR_CAP_FEP(temp); + + if (info->status & AER_LOG_TLP_MASKS) { + info->tlp_header_valid = 1; + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0); + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); + pci_read_config_dword(dev, + pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); + } + } + + return 1; +} + +static inline void aer_process_err_devices(struct aer_err_info *e_info) +{ + int i; + + /* Report all before handle them, not to lost records by reset etc. */ + for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { + if (get_device_error_info(e_info->dev[i], e_info)) + aer_print_error(e_info->dev[i], e_info); + } + for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { + if (get_device_error_info(e_info->dev[i], e_info)) + handle_error_source(e_info->dev[i], e_info); + } +} + +/** + * aer_isr_one_error - consume an error detected by root port + * @rpc: pointer to the root port which holds an error + * @e_src: pointer to an error source + */ +static void aer_isr_one_error(struct aer_rpc *rpc, + struct aer_err_source *e_src) +{ + struct pci_dev *pdev = rpc->rpd; + struct aer_err_info *e_info = &rpc->e_info; + + /* + * There is a possibility that both correctable error and + * uncorrectable error being logged. Report correctable error first. + */ + if (e_src->status & PCI_ERR_ROOT_COR_RCV) { + e_info->id = ERR_COR_ID(e_src->id); + e_info->severity = AER_CORRECTABLE; + + if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) + e_info->multi_error_valid = 1; + else + e_info->multi_error_valid = 0; + aer_print_port_info(pdev, e_info); + + if (find_source_device(pdev, e_info)) + aer_process_err_devices(e_info); + } + + if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { + e_info->id = ERR_UNCOR_ID(e_src->id); + + if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) + e_info->severity = AER_FATAL; + else + e_info->severity = AER_NONFATAL; + + if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) + e_info->multi_error_valid = 1; + else + e_info->multi_error_valid = 0; + + aer_print_port_info(pdev, e_info); + + if (find_source_device(pdev, e_info)) + aer_process_err_devices(e_info); + } +} + +/** + * get_e_source - retrieve an error source + * @rpc: pointer to the root port which holds an error + * @e_src: pointer to store retrieved error source + * + * Return 1 if an error source is retrieved, otherwise 0. + * + * Invoked by DPC handler to consume an error. + */ +static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src) +{ + unsigned long flags; + + /* Lock access to Root error producer/consumer index */ + spin_lock_irqsave(&rpc->e_lock, flags); + if (rpc->prod_idx == rpc->cons_idx) { + spin_unlock_irqrestore(&rpc->e_lock, flags); + return 0; + } + + *e_src = rpc->e_sources[rpc->cons_idx]; + rpc->cons_idx++; + if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) + rpc->cons_idx = 0; + spin_unlock_irqrestore(&rpc->e_lock, flags); + + return 1; +} + +/** + * aer_isr - consume errors detected by root port + * @work: definition of this work item + * + * Invoked, as DPC, when root port records new detected error + */ +static void aer_isr(struct work_struct *work) +{ + struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler); + struct aer_err_source uninitialized_var(e_src); + + mutex_lock(&rpc->rpc_mutex); + while (get_e_source(rpc, &e_src)) + aer_isr_one_error(rpc, &e_src); + mutex_unlock(&rpc->rpc_mutex); +} + +/** + * aer_irq - Root Port's ISR + * @irq: IRQ assigned to Root Port + * @context: pointer to Root Port data structure + * + * Invoked when Root Port detects AER messages. + */ +irqreturn_t aer_irq(int irq, void *context) +{ + unsigned int status, id; + struct pcie_device *pdev = (struct pcie_device *)context; + struct aer_rpc *rpc = get_service_data(pdev); + int next_prod_idx; + unsigned long flags; + int pos; + + pos = pdev->port->aer_cap; + /* + * Must lock access to Root Error Status Reg, Root Error ID Reg, + * and Root error producer/consumer index + */ + spin_lock_irqsave(&rpc->e_lock, flags); + + /* Read error status */ + pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status); + if (!(status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) { + spin_unlock_irqrestore(&rpc->e_lock, flags); + return IRQ_NONE; + } + + /* Read error source and clear error status */ + pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_ERR_SRC, &id); + pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status); + + /* Store error source for later DPC handler */ + next_prod_idx = rpc->prod_idx + 1; + if (next_prod_idx == AER_ERROR_SOURCES_MAX) + next_prod_idx = 0; + if (next_prod_idx == rpc->cons_idx) { + /* + * Error Storm Condition - possibly the same error occurred. + * Drop the error. + */ + spin_unlock_irqrestore(&rpc->e_lock, flags); + return IRQ_HANDLED; + } + rpc->e_sources[rpc->prod_idx].status = status; + rpc->e_sources[rpc->prod_idx].id = id; + rpc->prod_idx = next_prod_idx; + spin_unlock_irqrestore(&rpc->e_lock, flags); + + /* Invoke DPC handler */ + schedule_work(&rpc->dpc_handler); + + return IRQ_HANDLED; +} +EXPORT_SYMBOL_GPL(aer_irq); + +static int set_device_error_reporting(struct pci_dev *dev, void *data) +{ + bool enable = *((bool *)data); + int type = pci_pcie_type(dev); + + if ((type == PCI_EXP_TYPE_ROOT_PORT) || + (type == PCI_EXP_TYPE_UPSTREAM) || + (type == PCI_EXP_TYPE_DOWNSTREAM)) { + if (enable) + pci_enable_pcie_error_reporting(dev); + else + pci_disable_pcie_error_reporting(dev); + } + + if (enable) + pcie_set_ecrc_checking(dev); + + return 0; +} + +/** + * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports. + * @dev: pointer to root port's pci_dev data structure + * @enable: true = enable error reporting, false = disable error reporting. + */ +static void set_downstream_devices_error_reporting(struct pci_dev *dev, + bool enable) +{ + set_device_error_reporting(dev, &enable); + + if (!dev->subordinate) + return; + pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); +} + +/** + * aer_enable_rootport - enable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIe bus loads AER service driver. + */ +static void aer_enable_rootport(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd; + int aer_pos; + u16 reg16; + u32 reg32; + + /* Clear PCIe Capability's Device Status */ + pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16); + pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16); + + /* Disable system error generation in response to error messages */ + pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, + SYSTEM_ERROR_INTR_ON_MESG_MASK); + + aer_pos = pdev->aer_cap; + /* Clear error status */ + pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); + pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); + pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); + + /* + * Enable error reporting for the root port device and downstream port + * devices. + */ + set_downstream_devices_error_reporting(pdev, true); + + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32); +} + +/** + * aer_disable_rootport - disable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIe bus unloads AER service driver. + */ +static void aer_disable_rootport(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd; + u32 reg32; + int pos; + + /* + * Disable error reporting for the root port device and downstream port + * devices. + */ + set_downstream_devices_error_reporting(pdev, false); + + pos = pdev->aer_cap; + /* Disable Root's interrupt in response to error messages */ + pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32); + + /* Clear Root's error status reg */ + pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); +} + +/** + * aer_alloc_rpc - allocate Root Port data structure + * @dev: pointer to the pcie_dev data structure + * + * Invoked when Root Port's AER service is loaded. + */ +static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) +{ + struct aer_rpc *rpc; + + rpc = kzalloc(sizeof(struct aer_rpc), GFP_KERNEL); + if (!rpc) + return NULL; + + /* Initialize Root lock access, e_lock, to Root Error Status Reg */ + spin_lock_init(&rpc->e_lock); + + rpc->rpd = dev->port; + INIT_WORK(&rpc->dpc_handler, aer_isr); + mutex_init(&rpc->rpc_mutex); + + /* Use PCIe bus function to store rpc into PCIe device */ + set_service_data(dev, rpc); + + return rpc; +} + +/** + * aer_remove - clean up resources + * @dev: pointer to the pcie_dev data structure + * + * Invoked when PCI Express bus unloads or AER probe fails. + */ +static void aer_remove(struct pcie_device *dev) +{ + struct aer_rpc *rpc = get_service_data(dev); + + if (rpc) { + /* If register interrupt service, it must be free. */ + if (rpc->isr) + free_irq(dev->irq, dev); + + flush_work(&rpc->dpc_handler); + aer_disable_rootport(rpc); + kfree(rpc); + set_service_data(dev, NULL); + } +} + +/** + * aer_probe - initialize resources + * @dev: pointer to the pcie_dev data structure + * + * Invoked when PCI Express bus loads AER service driver. + */ +static int aer_probe(struct pcie_device *dev) +{ + int status; + struct aer_rpc *rpc; + struct device *device = &dev->port->dev; + + /* Alloc rpc data structure */ + rpc = aer_alloc_rpc(dev); + if (!rpc) { + dev_printk(KERN_DEBUG, device, "alloc AER rpc failed\n"); + aer_remove(dev); + return -ENOMEM; + } + + /* Request IRQ ISR */ + status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev); + if (status) { + dev_printk(KERN_DEBUG, device, "request AER IRQ %d failed\n", + dev->irq); + aer_remove(dev); + return status; + } + + rpc->isr = 1; + + aer_enable_rootport(rpc); + dev_info(device, "AER enabled with IRQ %d\n", dev->irq); + return 0; +} + +/** + * aer_root_reset - reset link on Root Port + * @dev: pointer to Root Port's pci_dev data structure + * + * Invoked by Port Bus driver when performing link reset at Root Port. + */ +static pci_ers_result_t aer_root_reset(struct pci_dev *dev) +{ + u32 reg32; + int pos; + + pos = dev->aer_cap; + + /* Disable Root's interrupt in response to error messages */ + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); + + pci_reset_bridge_secondary_bus(dev); + pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n"); + + /* Clear Root Error Status */ + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32); + + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); + + return PCI_ERS_RESULT_RECOVERED; +} + +/** + * aer_error_resume - clean up corresponding error status bits + * @dev: pointer to Root Port's pci_dev data structure + * + * Invoked by Port Bus driver during nonfatal recovery. + */ +static void aer_error_resume(struct pci_dev *dev) +{ + int pos; + u32 status, mask; + u16 reg16; + + /* Clean up Root device status */ + pcie_capability_read_word(dev, PCI_EXP_DEVSTA, ®16); + pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16); + + /* Clean AER Root Error Status */ + pos = dev->aer_cap; + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); + status &= ~mask; /* Clear corresponding nonfatal bits */ + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); +} + +static struct pcie_port_service_driver aerdriver = { + .name = "aer", + .port_type = PCI_EXP_TYPE_ROOT_PORT, + .service = PCIE_PORT_SERVICE_AER, + + .probe = aer_probe, + .remove = aer_remove, + .error_resume = aer_error_resume, + .reset_link = aer_root_reset, +}; + +/** + * aer_service_init - register AER root service driver + * + * Invoked when AER root service driver is loaded. + */ +static int __init aer_service_init(void) +{ + if (!pci_aer_available() || aer_acpi_firmware_first()) + return -ENXIO; + return pcie_port_service_register(&aerdriver); +} +device_initcall(aer_service_init); diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig deleted file mode 100644 index 5a64eb3d6c7a..000000000000 --- a/drivers/pci/pcie/aer/Kconfig +++ /dev/null @@ -1,29 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# PCI Express Root Port Device AER Configuration -# - -config PCIEAER - bool "Root Port Advanced Error Reporting support" - depends on PCIEPORTBUS - select RAS - default y - help - This enables PCI Express Root Port Advanced Error Reporting - (AER) driver support. Error reporting messages sent to Root - Port will be handled by PCI Express AER driver. - - -# -# PCI Express ECRC -# -config PCIE_ECRC - bool "PCI Express ECRC settings control" - depends on PCIEAER - help - Used to override firmware/bios settings for PCI Express ECRC - (transaction layer end-to-end CRC checking). - - When in doubt, say N. - -source "drivers/pci/pcie/aer/Kconfig.debug" diff --git a/drivers/pci/pcie/aer/Kconfig.debug b/drivers/pci/pcie/aer/Kconfig.debug deleted file mode 100644 index 67e02174b65b..000000000000 --- a/drivers/pci/pcie/aer/Kconfig.debug +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# PCI Express Root Port Device AER Debug Configuration -# - -config PCIEAER_INJECT - tristate "PCIe AER error injector support" - depends on PCIEAER - default n - help - This enables PCI Express Root Port Advanced Error Reporting - (AER) software error injector. - - Debugging PCIe AER code is quite difficult because it is hard - to trigger various real hardware errors. Software based - error injection can fake almost all kinds of errors with the - help of a user space helper tool aer-inject, which can be - gotten from: - http://www.kernel.org/pub/linux/utils/pci/aer-inject/ diff --git a/drivers/pci/pcie/aer/Makefile b/drivers/pci/pcie/aer/Makefile deleted file mode 100644 index 09bd890875a3..000000000000 --- a/drivers/pci/pcie/aer/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for PCI-Express Root Port Advanced Error Reporting Driver -# - -obj-$(CONFIG_PCIEAER) += aerdriver.o - -obj-$(CONFIG_PCIE_ECRC) += ecrc.o - -aerdriver-objs := aerdrv_errprint.o aerdrv_core.o aerdrv.o -aerdriver-$(CONFIG_ACPI) += aerdrv_acpi.o - -obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c deleted file mode 100644 index 779b3879b1b5..000000000000 --- a/drivers/pci/pcie/aer/aerdrv.c +++ /dev/null @@ -1,374 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Implement the AER root port service driver. The driver registers an IRQ - * handler. When a root port triggers an AER interrupt, the IRQ handler - * collects root port status and schedules work. - * - * Copyright (C) 2006 Intel Corp. - * Tom Long Nguyen (tom.l.nguyen@intel.com) - * Zhang Yanmin (yanmin.zhang@intel.com) - */ - -#include <linux/pci.h> -#include <linux/pci-acpi.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/pm.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/slab.h> - -#include "aerdrv.h" -#include "../../pci.h" - -static int aer_probe(struct pcie_device *dev); -static void aer_remove(struct pcie_device *dev); -static void aer_error_resume(struct pci_dev *dev); -static pci_ers_result_t aer_root_reset(struct pci_dev *dev); - -static struct pcie_port_service_driver aerdriver = { - .name = "aer", - .port_type = PCI_EXP_TYPE_ROOT_PORT, - .service = PCIE_PORT_SERVICE_AER, - - .probe = aer_probe, - .remove = aer_remove, - .error_resume = aer_error_resume, - .reset_link = aer_root_reset, -}; - -static int pcie_aer_disable; - -void pci_no_aer(void) -{ - pcie_aer_disable = 1; -} - -bool pci_aer_available(void) -{ - return !pcie_aer_disable && pci_msi_enabled(); -} - -static int set_device_error_reporting(struct pci_dev *dev, void *data) -{ - bool enable = *((bool *)data); - int type = pci_pcie_type(dev); - - if ((type == PCI_EXP_TYPE_ROOT_PORT) || - (type == PCI_EXP_TYPE_UPSTREAM) || - (type == PCI_EXP_TYPE_DOWNSTREAM)) { - if (enable) - pci_enable_pcie_error_reporting(dev); - else - pci_disable_pcie_error_reporting(dev); - } - - if (enable) - pcie_set_ecrc_checking(dev); - - return 0; -} - -/** - * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports. - * @dev: pointer to root port's pci_dev data structure - * @enable: true = enable error reporting, false = disable error reporting. - */ -static void set_downstream_devices_error_reporting(struct pci_dev *dev, - bool enable) -{ - set_device_error_reporting(dev, &enable); - - if (!dev->subordinate) - return; - pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); -} - -/** - * aer_enable_rootport - enable Root Port's interrupts when receiving messages - * @rpc: pointer to a Root Port data structure - * - * Invoked when PCIe bus loads AER service driver. - */ -static void aer_enable_rootport(struct aer_rpc *rpc) -{ - struct pci_dev *pdev = rpc->rpd->port; - int aer_pos; - u16 reg16; - u32 reg32; - - /* Clear PCIe Capability's Device Status */ - pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16); - pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16); - - /* Disable system error generation in response to error messages */ - pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, - SYSTEM_ERROR_INTR_ON_MESG_MASK); - - aer_pos = pdev->aer_cap; - /* Clear error status */ - pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); - pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32); - pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); - pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32); - pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); - - /* - * Enable error reporting for the root port device and downstream port - * devices. - */ - set_downstream_devices_error_reporting(pdev, true); - - /* Enable Root Port's interrupt in response to error messages */ - pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32); - reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32); -} - -/** - * aer_disable_rootport - disable Root Port's interrupts when receiving messages - * @rpc: pointer to a Root Port data structure - * - * Invoked when PCIe bus unloads AER service driver. - */ -static void aer_disable_rootport(struct aer_rpc *rpc) -{ - struct pci_dev *pdev = rpc->rpd->port; - u32 reg32; - int pos; - - /* - * Disable error reporting for the root port device and downstream port - * devices. - */ - set_downstream_devices_error_reporting(pdev, false); - - pos = pdev->aer_cap; - /* Disable Root's interrupt in response to error messages */ - pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); - reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32); - - /* Clear Root's error status reg */ - pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); -} - -/** - * aer_irq - Root Port's ISR - * @irq: IRQ assigned to Root Port - * @context: pointer to Root Port data structure - * - * Invoked when Root Port detects AER messages. - */ -irqreturn_t aer_irq(int irq, void *context) -{ - unsigned int status, id; - struct pcie_device *pdev = (struct pcie_device *)context; - struct aer_rpc *rpc = get_service_data(pdev); - int next_prod_idx; - unsigned long flags; - int pos; - - pos = pdev->port->aer_cap; - /* - * Must lock access to Root Error Status Reg, Root Error ID Reg, - * and Root error producer/consumer index - */ - spin_lock_irqsave(&rpc->e_lock, flags); - - /* Read error status */ - pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status); - if (!(status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) { - spin_unlock_irqrestore(&rpc->e_lock, flags); - return IRQ_NONE; - } - - /* Read error source and clear error status */ - pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_ERR_SRC, &id); - pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status); - - /* Store error source for later DPC handler */ - next_prod_idx = rpc->prod_idx + 1; - if (next_prod_idx == AER_ERROR_SOURCES_MAX) - next_prod_idx = 0; - if (next_prod_idx == rpc->cons_idx) { - /* - * Error Storm Condition - possibly the same error occurred. - * Drop the error. - */ - spin_unlock_irqrestore(&rpc->e_lock, flags); - return IRQ_HANDLED; - } - rpc->e_sources[rpc->prod_idx].status = status; - rpc->e_sources[rpc->prod_idx].id = id; - rpc->prod_idx = next_prod_idx; - spin_unlock_irqrestore(&rpc->e_lock, flags); - - /* Invoke DPC handler */ - schedule_work(&rpc->dpc_handler); - - return IRQ_HANDLED; -} -EXPORT_SYMBOL_GPL(aer_irq); - -/** - * aer_alloc_rpc - allocate Root Port data structure - * @dev: pointer to the pcie_dev data structure - * - * Invoked when Root Port's AER service is loaded. - */ -static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) -{ - struct aer_rpc *rpc; - - rpc = kzalloc(sizeof(struct aer_rpc), GFP_KERNEL); - if (!rpc) - return NULL; - - /* Initialize Root lock access, e_lock, to Root Error Status Reg */ - spin_lock_init(&rpc->e_lock); - - rpc->rpd = dev; - INIT_WORK(&rpc->dpc_handler, aer_isr); - mutex_init(&rpc->rpc_mutex); - - /* Use PCIe bus function to store rpc into PCIe device */ - set_service_data(dev, rpc); - - return rpc; -} - -/** - * aer_remove - clean up resources - * @dev: pointer to the pcie_dev data structure - * - * Invoked when PCI Express bus unloads or AER probe fails. - */ -static void aer_remove(struct pcie_device *dev) -{ - struct aer_rpc *rpc = get_service_data(dev); - - if (rpc) { - /* If register interrupt service, it must be free. */ - if (rpc->isr) - free_irq(dev->irq, dev); - - flush_work(&rpc->dpc_handler); - aer_disable_rootport(rpc); - kfree(rpc); - set_service_data(dev, NULL); - } -} - -/** - * aer_probe - initialize resources - * @dev: pointer to the pcie_dev data structure - * - * Invoked when PCI Express bus loads AER service driver. - */ -static int aer_probe(struct pcie_device *dev) -{ - int status; - struct aer_rpc *rpc; - struct device *device = &dev->port->dev; - - /* Alloc rpc data structure */ - rpc = aer_alloc_rpc(dev); - if (!rpc) { - dev_printk(KERN_DEBUG, device, "alloc AER rpc failed\n"); - aer_remove(dev); - return -ENOMEM; - } - - /* Request IRQ ISR */ - status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev); - if (status) { - dev_printk(KERN_DEBUG, device, "request AER IRQ %d failed\n", - dev->irq); - aer_remove(dev); - return status; - } - - rpc->isr = 1; - - aer_enable_rootport(rpc); - dev_info(device, "AER enabled with IRQ %d\n", dev->irq); - return 0; -} - -/** - * aer_root_reset - reset link on Root Port - * @dev: pointer to Root Port's pci_dev data structure - * - * Invoked by Port Bus driver when performing link reset at Root Port. - */ -static pci_ers_result_t aer_root_reset(struct pci_dev *dev) -{ - u32 reg32; - int pos; - - pos = dev->aer_cap; - - /* Disable Root's interrupt in response to error messages */ - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); - reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); - - pci_reset_bridge_secondary_bus(dev); - pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n"); - - /* Clear Root Error Status */ - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32); - - /* Enable Root Port's interrupt in response to error messages */ - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); - reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); - - return PCI_ERS_RESULT_RECOVERED; -} - -/** - * aer_error_resume - clean up corresponding error status bits - * @dev: pointer to Root Port's pci_dev data structure - * - * Invoked by Port Bus driver during nonfatal recovery. - */ -static void aer_error_resume(struct pci_dev *dev) -{ - int pos; - u32 status, mask; - u16 reg16; - - /* Clean up Root device status */ - pcie_capability_read_word(dev, PCI_EXP_DEVSTA, ®16); - pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16); - - /* Clean AER Root Error Status */ - pos = dev->aer_cap; - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); - if (dev->error_state == pci_channel_io_normal) - status &= ~mask; /* Clear corresponding nonfatal bits */ - else - status &= mask; /* Clear corresponding fatal bits */ - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); -} - -/** - * aer_service_init - register AER root service driver - * - * Invoked when AER root service driver is loaded. - */ -static int __init aer_service_init(void) -{ - if (!pci_aer_available() || aer_acpi_firmware_first()) - return -ENXIO; - return pcie_port_service_register(&aerdriver); -} -device_initcall(aer_service_init); diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h deleted file mode 100644 index 08b4584f62fe..000000000000 --- a/drivers/pci/pcie/aer/aerdrv.h +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2006 Intel Corp. - * Tom Long Nguyen (tom.l.nguyen@intel.com) - * Zhang Yanmin (yanmin.zhang@intel.com) - */ - -#ifndef _AERDRV_H_ -#define _AERDRV_H_ - -#include <linux/workqueue.h> -#include <linux/aer.h> -#include <linux/interrupt.h> - -#include "../portdrv.h" - -#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ - PCI_EXP_RTCTL_SENFEE| \ - PCI_EXP_RTCTL_SEFEE) -#define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ - PCI_ERR_ROOT_CMD_NONFATAL_EN| \ - PCI_ERR_ROOT_CMD_FATAL_EN) -#define ERR_COR_ID(d) (d & 0xffff) -#define ERR_UNCOR_ID(d) (d >> 16) - -#define AER_ERROR_SOURCES_MAX 100 - -#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ - PCI_ERR_UNC_ECRC| \ - PCI_ERR_UNC_UNSUP| \ - PCI_ERR_UNC_COMP_ABORT| \ - PCI_ERR_UNC_UNX_COMP| \ - PCI_ERR_UNC_MALF_TLP) - -#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ -struct aer_err_info { - struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; - int error_dev_num; - - unsigned int id:16; - - unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ - unsigned int __pad1:5; - unsigned int multi_error_valid:1; - - unsigned int first_error:5; - unsigned int __pad2:2; - unsigned int tlp_header_valid:1; - - unsigned int status; /* COR/UNCOR Error Status */ - unsigned int mask; /* COR/UNCOR Error Mask */ - struct aer_header_log_regs tlp; /* TLP Header */ -}; - -struct aer_err_source { - unsigned int status; - unsigned int id; -}; - -struct aer_rpc { - struct pcie_device *rpd; /* Root Port device */ - struct work_struct dpc_handler; - struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX]; - struct aer_err_info e_info; - unsigned short prod_idx; /* Error Producer Index */ - unsigned short cons_idx; /* Error Consumer Index */ - int isr; - spinlock_t e_lock; /* - * Lock access to Error Status/ID Regs - * and error producer/consumer index - */ - struct mutex rpc_mutex; /* - * only one thread could do - * recovery on the same - * root port hierarchy - */ -}; - -struct aer_broadcast_data { - enum pci_channel_state state; - enum pci_ers_result result; -}; - -static inline pci_ers_result_t merge_result(enum pci_ers_result orig, - enum pci_ers_result new) -{ - if (new == PCI_ERS_RESULT_NO_AER_DRIVER) - return PCI_ERS_RESULT_NO_AER_DRIVER; - - if (new == PCI_ERS_RESULT_NONE) - return orig; - - switch (orig) { - case PCI_ERS_RESULT_CAN_RECOVER: - case PCI_ERS_RESULT_RECOVERED: - orig = new; - break; - case PCI_ERS_RESULT_DISCONNECT: - if (new == PCI_ERS_RESULT_NEED_RESET) - orig = PCI_ERS_RESULT_NEED_RESET; - break; - default: - break; - } - - return orig; -} - -extern struct bus_type pcie_port_bus_type; -void aer_isr(struct work_struct *work); -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); -void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info); -irqreturn_t aer_irq(int irq, void *context); - -#ifdef CONFIG_ACPI_APEI -int pcie_aer_get_firmware_first(struct pci_dev *pci_dev); -#else -static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) -{ - if (pci_dev->__aer_firmware_first_valid) - return pci_dev->__aer_firmware_first; - return 0; -} -#endif -#endif /* _AERDRV_H_ */ diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c deleted file mode 100644 index 08c87de13cb8..000000000000 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ /dev/null @@ -1,141 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Access ACPI _OSC method - * - * Copyright (C) 2006 Intel Corp. - * Tom Long Nguyen (tom.l.nguyen@intel.com) - * Zhang Yanmin (yanmin.zhang@intel.com) - */ - -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/pm.h> -#include <linux/suspend.h> -#include <linux/acpi.h> -#include <linux/pci-acpi.h> -#include <linux/delay.h> -#include <acpi/apei.h> -#include "aerdrv.h" - -#ifdef CONFIG_ACPI_APEI -static inline int hest_match_pci(struct acpi_hest_aer_common *p, - struct pci_dev *pci) -{ - return ACPI_HEST_SEGMENT(p->bus) == pci_domain_nr(pci->bus) && - ACPI_HEST_BUS(p->bus) == pci->bus->number && - p->device == PCI_SLOT(pci->devfn) && - p->function == PCI_FUNC(pci->devfn); -} - -static inline bool hest_match_type(struct acpi_hest_header *hest_hdr, - struct pci_dev *dev) -{ - u16 hest_type = hest_hdr->type; - u8 pcie_type = pci_pcie_type(dev); - - if ((hest_type == ACPI_HEST_TYPE_AER_ROOT_PORT && - pcie_type == PCI_EXP_TYPE_ROOT_PORT) || - (hest_type == ACPI_HEST_TYPE_AER_ENDPOINT && - pcie_type == PCI_EXP_TYPE_ENDPOINT) || - (hest_type == ACPI_HEST_TYPE_AER_BRIDGE && - (dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)) - return true; - return false; -} - -struct aer_hest_parse_info { - struct pci_dev *pci_dev; - int firmware_first; -}; - -static int hest_source_is_pcie_aer(struct acpi_hest_header *hest_hdr) -{ - if (hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT || - hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT || - hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE) - return 1; - return 0; -} - -static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data) -{ - struct aer_hest_parse_info *info = data; - struct acpi_hest_aer_common *p; - int ff; - - if (!hest_source_is_pcie_aer(hest_hdr)) - return 0; - - p = (struct acpi_hest_aer_common *)(hest_hdr + 1); - ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); - - /* - * If no specific device is supplied, determine whether - * FIRMWARE_FIRST is set for *any* PCIe device. - */ - if (!info->pci_dev) { - info->firmware_first |= ff; - return 0; - } - - /* Otherwise, check the specific device */ - if (p->flags & ACPI_HEST_GLOBAL) { - if (hest_match_type(hest_hdr, info->pci_dev)) - info->firmware_first = ff; - } else - if (hest_match_pci(p, info->pci_dev)) - info->firmware_first = ff; - - return 0; -} - -static void aer_set_firmware_first(struct pci_dev *pci_dev) -{ - int rc; - struct aer_hest_parse_info info = { - .pci_dev = pci_dev, - .firmware_first = 0, - }; - - rc = apei_hest_parse(aer_hest_parse, &info); - - if (rc) - pci_dev->__aer_firmware_first = 0; - else - pci_dev->__aer_firmware_first = info.firmware_first; - pci_dev->__aer_firmware_first_valid = 1; -} - -int pcie_aer_get_firmware_first(struct pci_dev *dev) -{ - if (!pci_is_pcie(dev)) - return 0; - - if (!dev->__aer_firmware_first_valid) - aer_set_firmware_first(dev); - return dev->__aer_firmware_first; -} - -static bool aer_firmware_first; - -/** - * aer_acpi_firmware_first - Check if APEI should control AER. - */ -bool aer_acpi_firmware_first(void) -{ - static bool parsed = false; - struct aer_hest_parse_info info = { - .pci_dev = NULL, /* Check all PCIe devices */ - .firmware_first = 0, - }; - - if (!parsed) { - apei_hest_parse(aer_hest_parse, &info); - aer_firmware_first = info.firmware_first; - parsed = true; - } - return aer_firmware_first; -} -#endif diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c deleted file mode 100644 index 0ea5acc40323..000000000000 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ /dev/null @@ -1,809 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Implement the core part of PCIe AER. When a PCIe error is delivered, an - * error message will be collected and printed to console, then an error - * recovery procedure will be executed by following the PCI error recovery - * rules. - * - * Copyright (C) 2006 Intel Corp. - * Tom Long Nguyen (tom.l.nguyen@intel.com) - * Zhang Yanmin (yanmin.zhang@intel.com) - */ - -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/pm.h> -#include <linux/suspend.h> -#include <linux/delay.h> -#include <linux/slab.h> -#include <linux/kfifo.h> -#include "aerdrv.h" - -#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ - PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) - -int pci_enable_pcie_error_reporting(struct pci_dev *dev) -{ - if (pcie_aer_get_firmware_first(dev)) - return -EIO; - - if (!dev->aer_cap) - return -EIO; - - return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); -} -EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); - -int pci_disable_pcie_error_reporting(struct pci_dev *dev) -{ - if (pcie_aer_get_firmware_first(dev)) - return -EIO; - - return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, - PCI_EXP_AER_FLAGS); -} -EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); - -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) -{ - int pos; - u32 status; - - pos = dev->aer_cap; - if (!pos) - return -EIO; - - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - if (status) - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); - - return 0; -} -EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); - -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) -{ - int pos; - u32 status; - int port_type; - - if (!pci_is_pcie(dev)) - return -ENODEV; - - pos = dev->aer_cap; - if (!pos) - return -EIO; - - port_type = pci_pcie_type(dev); - if (port_type == PCI_EXP_TYPE_ROOT_PORT) { - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); - } - - pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); - pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status); - - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); - - return 0; -} - -int pci_aer_init(struct pci_dev *dev) -{ - dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - return pci_cleanup_aer_error_status_regs(dev); -} - -/** - * add_error_device - list device to be handled - * @e_info: pointer to error info - * @dev: pointer to pci_dev to be added - */ -static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) -{ - if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { - e_info->dev[e_info->error_dev_num] = dev; - e_info->error_dev_num++; - return 0; - } - return -ENOSPC; -} - -/** - * is_error_source - check whether the device is source of reported error - * @dev: pointer to pci_dev to be checked - * @e_info: pointer to reported error info - */ -static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) -{ - int pos; - u32 status, mask; - u16 reg16; - - /* - * When bus id is equal to 0, it might be a bad id - * reported by root port. - */ - if ((PCI_BUS_NUM(e_info->id) != 0) && - !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { - /* Device ID match? */ - if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) - return true; - - /* Continue id comparing if there is no multiple error */ - if (!e_info->multi_error_valid) - return false; - } - - /* - * When either - * 1) bus id is equal to 0. Some ports might lose the bus - * id of error source id; - * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set - * 3) There are multiple errors and prior ID comparing fails; - * We check AER status registers to find possible reporter. - */ - if (atomic_read(&dev->enable_cnt) == 0) - return false; - - /* Check if AER is enabled */ - pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16); - if (!(reg16 & PCI_EXP_AER_FLAGS)) - return false; - - pos = dev->aer_cap; - if (!pos) - return false; - - /* Check if error is recorded */ - if (e_info->severity == AER_CORRECTABLE) { - pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); - pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask); - } else { - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); - } - if (status & ~mask) - return true; - - return false; -} - -static int find_device_iter(struct pci_dev *dev, void *data) -{ - struct aer_err_info *e_info = (struct aer_err_info *)data; - - if (is_error_source(dev, e_info)) { - /* List this device */ - if (add_error_device(e_info, dev)) { - /* We cannot handle more... Stop iteration */ - /* TODO: Should print error message here? */ - return 1; - } - - /* If there is only a single error, stop iteration */ - if (!e_info->multi_error_valid) - return 1; - } - return 0; -} - -/** - * find_source_device - search through device hierarchy for source device - * @parent: pointer to Root Port pci_dev data structure - * @e_info: including detailed error information such like id - * - * Return true if found. - * - * Invoked by DPC when error is detected at the Root Port. - * Caller of this function must set id, severity, and multi_error_valid of - * struct aer_err_info pointed by @e_info properly. This function must fill - * e_info->error_dev_num and e_info->dev[], based on the given information. - */ -static bool find_source_device(struct pci_dev *parent, - struct aer_err_info *e_info) -{ - struct pci_dev *dev = parent; - int result; - - /* Must reset in this function */ - e_info->error_dev_num = 0; - - /* Is Root Port an agent that sends error message? */ - result = find_device_iter(dev, e_info); - if (result) - return true; - - pci_walk_bus(parent->subordinate, find_device_iter, e_info); - - if (!e_info->error_dev_num) { - pci_printk(KERN_DEBUG, parent, "can't find device of ID%04x\n", - e_info->id); - return false; - } - return true; -} - -static int report_error_detected(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(&dev->dev); - dev->error_state = result_data->state; - - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->error_detected) { - if (result_data->state == pci_channel_io_frozen && - dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { - /* - * In case of fatal recovery, if one of down- - * stream device has no driver. We might be - * unable to recover because a later insmod - * of a driver for this device is unaware of - * its hw state. - */ - pci_printk(KERN_DEBUG, dev, "device has %s\n", - dev->driver ? - "no AER-aware driver" : "no driver"); - } - - /* - * If there's any device in the subtree that does not - * have an error_detected callback, returning - * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of - * the subsequent mmio_enabled/slot_reset/resume - * callbacks of "any" device in the subtree. All the - * devices in the subtree are left in the error state - * without recovery. - */ - - if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) - vote = PCI_ERS_RESULT_NO_AER_DRIVER; - else - vote = PCI_ERS_RESULT_NONE; - } else { - err_handler = dev->driver->err_handler; - vote = err_handler->error_detected(dev, result_data->state); - pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); - } - - result_data->result = merge_result(result_data->result, vote); - device_unlock(&dev->dev); - return 0; -} - -static int report_mmio_enabled(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(&dev->dev); - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->mmio_enabled) - goto out; - - err_handler = dev->driver->err_handler; - vote = err_handler->mmio_enabled(dev); - result_data->result = merge_result(result_data->result, vote); -out: - device_unlock(&dev->dev); - return 0; -} - -static int report_slot_reset(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(&dev->dev); - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->slot_reset) - goto out; - - err_handler = dev->driver->err_handler; - vote = err_handler->slot_reset(dev); - result_data->result = merge_result(result_data->result, vote); -out: - device_unlock(&dev->dev); - return 0; -} - -static int report_resume(struct pci_dev *dev, void *data) -{ - const struct pci_error_handlers *err_handler; - - device_lock(&dev->dev); - dev->error_state = pci_channel_io_normal; - - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->resume) - goto out; - - err_handler = dev->driver->err_handler; - err_handler->resume(dev); - pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); -out: - device_unlock(&dev->dev); - return 0; -} - -/** - * broadcast_error_message - handle message broadcast to downstream drivers - * @dev: pointer to from where in a hierarchy message is broadcasted down - * @state: error state - * @error_mesg: message to print - * @cb: callback to be broadcasted - * - * Invoked during error recovery process. Once being invoked, the content - * of error severity will be broadcasted to all downstream drivers in a - * hierarchy in question. - */ -static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, - enum pci_channel_state state, - char *error_mesg, - int (*cb)(struct pci_dev *, void *)) -{ - struct aer_broadcast_data result_data; - - pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg); - result_data.state = state; - if (cb == report_error_detected) - result_data.result = PCI_ERS_RESULT_CAN_RECOVER; - else - result_data.result = PCI_ERS_RESULT_RECOVERED; - - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - /* - * If the error is reported by a bridge, we think this error - * is related to the downstream link of the bridge, so we - * do error recovery on all subordinates of the bridge instead - * of the bridge and clear the error status of the bridge. - */ - if (cb == report_error_detected) - dev->error_state = state; - pci_walk_bus(dev->subordinate, cb, &result_data); - if (cb == report_resume) { - pci_cleanup_aer_uncorrect_error_status(dev); - dev->error_state = pci_channel_io_normal; - } - } else { - /* - * If the error is reported by an end point, we think this - * error is related to the upstream link of the end point. - */ - if (state == pci_channel_io_normal) - /* - * the error is non fatal so the bus is ok, just invoke - * the callback for the function that logged the error. - */ - cb(dev, &result_data); - else - pci_walk_bus(dev->bus, cb, &result_data); - } - - return result_data.result; -} - -/** - * default_reset_link - default reset function - * @dev: pointer to pci_dev data structure - * - * Invoked when performing link reset on a Downstream Port or a - * Root Port with no aer driver. - */ -static pci_ers_result_t default_reset_link(struct pci_dev *dev) -{ - pci_reset_bridge_secondary_bus(dev); - pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); - return PCI_ERS_RESULT_RECOVERED; -} - -static int find_aer_service_iter(struct device *device, void *data) -{ - struct pcie_port_service_driver *service_driver, **drv; - - drv = (struct pcie_port_service_driver **) data; - - if (device->bus == &pcie_port_bus_type && device->driver) { - service_driver = to_service_driver(device->driver); - if (service_driver->service == PCIE_PORT_SERVICE_AER) { - *drv = service_driver; - return 1; - } - } - - return 0; -} - -static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev) -{ - struct pcie_port_service_driver *drv = NULL; - - device_for_each_child(&dev->dev, &drv, find_aer_service_iter); - - return drv; -} - -static pci_ers_result_t reset_link(struct pci_dev *dev) -{ - struct pci_dev *udev; - pci_ers_result_t status; - struct pcie_port_service_driver *driver; - - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - /* Reset this port for all subordinates */ - udev = dev; - } else { - /* Reset the upstream component (likely downstream port) */ - udev = dev->bus->self; - } - - /* Use the aer driver of the component firstly */ - driver = find_aer_service(udev); - - if (driver && driver->reset_link) { - status = driver->reset_link(udev); - } else if (udev->has_secondary_link) { - status = default_reset_link(udev); - } else { - pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", - pci_name(udev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - if (status != PCI_ERS_RESULT_RECOVERED) { - pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", - pci_name(udev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - return status; -} - -/** - * do_recovery - handle nonfatal/fatal error recovery process - * @dev: pointer to a pci_dev data structure of agent detecting an error - * @severity: error severity type - * - * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast - * error detected message to all downstream drivers within a hierarchy in - * question and return the returned code. - */ -static void do_recovery(struct pci_dev *dev, int severity) -{ - pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED; - enum pci_channel_state state; - - if (severity == AER_FATAL) - state = pci_channel_io_frozen; - else - state = pci_channel_io_normal; - - status = broadcast_error_message(dev, - state, - "error_detected", - report_error_detected); - - if (severity == AER_FATAL) { - result = reset_link(dev); - if (result != PCI_ERS_RESULT_RECOVERED) - goto failed; - } - - if (status == PCI_ERS_RESULT_CAN_RECOVER) - status = broadcast_error_message(dev, - state, - "mmio_enabled", - report_mmio_enabled); - - if (status == PCI_ERS_RESULT_NEED_RESET) { - /* - * TODO: Should call platform-specific - * functions to reset slot before calling - * drivers' slot_reset callbacks? - */ - status = broadcast_error_message(dev, - state, - "slot_reset", - report_slot_reset); - } - - if (status != PCI_ERS_RESULT_RECOVERED) - goto failed; - - broadcast_error_message(dev, - state, - "resume", - report_resume); - - pci_info(dev, "AER: Device recovery successful\n"); - return; - -failed: - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); - /* TODO: Should kernel panic here? */ - pci_info(dev, "AER: Device recovery failed\n"); -} - -/** - * handle_error_source - handle logging error into an event log - * @aerdev: pointer to pcie_device data structure of the root port - * @dev: pointer to pci_dev data structure of error source device - * @info: comprehensive error information - * - * Invoked when an error being detected by Root Port. - */ -static void handle_error_source(struct pcie_device *aerdev, - struct pci_dev *dev, - struct aer_err_info *info) -{ - int pos; - - if (info->severity == AER_CORRECTABLE) { - /* - * Correctable error does not need software intervention. - * No need to go through error recovery process. - */ - pos = dev->aer_cap; - if (pos) - pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, - info->status); - } else - do_recovery(dev, info->severity); -} - -#ifdef CONFIG_ACPI_APEI_PCIEAER -static void aer_recover_work_func(struct work_struct *work); - -#define AER_RECOVER_RING_ORDER 4 -#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER) - -struct aer_recover_entry { - u8 bus; - u8 devfn; - u16 domain; - int severity; - struct aer_capability_regs *regs; -}; - -static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, - AER_RECOVER_RING_SIZE); -/* - * Mutual exclusion for writers of aer_recover_ring, reader side don't - * need lock, because there is only one reader and lock is not needed - * between reader and writer. - */ -static DEFINE_SPINLOCK(aer_recover_ring_lock); -static DECLARE_WORK(aer_recover_work, aer_recover_work_func); - -void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, - int severity, struct aer_capability_regs *aer_regs) -{ - unsigned long flags; - struct aer_recover_entry entry = { - .bus = bus, - .devfn = devfn, - .domain = domain, - .severity = severity, - .regs = aer_regs, - }; - - spin_lock_irqsave(&aer_recover_ring_lock, flags); - if (kfifo_put(&aer_recover_ring, entry)) - schedule_work(&aer_recover_work); - else - pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", - domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - spin_unlock_irqrestore(&aer_recover_ring_lock, flags); -} -EXPORT_SYMBOL_GPL(aer_recover_queue); - -static void aer_recover_work_func(struct work_struct *work) -{ - struct aer_recover_entry entry; - struct pci_dev *pdev; - - while (kfifo_get(&aer_recover_ring, &entry)) { - pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, - entry.devfn); - if (!pdev) { - pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", - entry.domain, entry.bus, - PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); - continue; - } - cper_print_aer(pdev, entry.severity, entry.regs); - if (entry.severity != AER_CORRECTABLE) - do_recovery(pdev, entry.severity); - pci_dev_put(pdev); - } -} -#endif - -/** - * get_device_error_info - read error status from dev and store it to info - * @dev: pointer to the device expected to have a error record - * @info: pointer to structure to store the error record - * - * Return 1 on success, 0 on error. - * - * Note that @info is reused among all error devices. Clear fields properly. - */ -static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) -{ - int pos, temp; - - /* Must reset in this function */ - info->status = 0; - info->tlp_header_valid = 0; - - pos = dev->aer_cap; - - /* The device might not support AER */ - if (!pos) - return 0; - - if (info->severity == AER_CORRECTABLE) { - pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, - &info->status); - pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, - &info->mask); - if (!(info->status & ~info->mask)) - return 0; - } else if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - info->severity == AER_NONFATAL) { - - /* Link is still healthy for IO reads */ - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, - &info->status); - pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, - &info->mask); - if (!(info->status & ~info->mask)) - return 0; - - /* Get First Error Pointer */ - pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); - info->first_error = PCI_ERR_CAP_FEP(temp); - - if (info->status & AER_LOG_TLP_MASKS) { - info->tlp_header_valid = 1; - pci_read_config_dword(dev, - pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0); - pci_read_config_dword(dev, - pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); - pci_read_config_dword(dev, - pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); - pci_read_config_dword(dev, - pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); - } - } - - return 1; -} - -static inline void aer_process_err_devices(struct pcie_device *p_device, - struct aer_err_info *e_info) -{ - int i; - - /* Report all before handle them, not to lost records by reset etc. */ - for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { - if (get_device_error_info(e_info->dev[i], e_info)) - aer_print_error(e_info->dev[i], e_info); - } - for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { - if (get_device_error_info(e_info->dev[i], e_info)) - handle_error_source(p_device, e_info->dev[i], e_info); - } -} - -/** - * aer_isr_one_error - consume an error detected by root port - * @p_device: pointer to error root port service device - * @e_src: pointer to an error source - */ -static void aer_isr_one_error(struct pcie_device *p_device, - struct aer_err_source *e_src) -{ - struct aer_rpc *rpc = get_service_data(p_device); - struct aer_err_info *e_info = &rpc->e_info; - - /* - * There is a possibility that both correctable error and - * uncorrectable error being logged. Report correctable error first. - */ - if (e_src->status & PCI_ERR_ROOT_COR_RCV) { - e_info->id = ERR_COR_ID(e_src->id); - e_info->severity = AER_CORRECTABLE; - - if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) - e_info->multi_error_valid = 1; - else - e_info->multi_error_valid = 0; - - aer_print_port_info(p_device->port, e_info); - - if (find_source_device(p_device->port, e_info)) - aer_process_err_devices(p_device, e_info); - } - - if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { - e_info->id = ERR_UNCOR_ID(e_src->id); - - if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) - e_info->severity = AER_FATAL; - else - e_info->severity = AER_NONFATAL; - - if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) - e_info->multi_error_valid = 1; - else - e_info->multi_error_valid = 0; - - aer_print_port_info(p_device->port, e_info); - - if (find_source_device(p_device->port, e_info)) - aer_process_err_devices(p_device, e_info); - } -} - -/** - * get_e_source - retrieve an error source - * @rpc: pointer to the root port which holds an error - * @e_src: pointer to store retrieved error source - * - * Return 1 if an error source is retrieved, otherwise 0. - * - * Invoked by DPC handler to consume an error. - */ -static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src) -{ - unsigned long flags; - - /* Lock access to Root error producer/consumer index */ - spin_lock_irqsave(&rpc->e_lock, flags); - if (rpc->prod_idx == rpc->cons_idx) { - spin_unlock_irqrestore(&rpc->e_lock, flags); - return 0; - } - - *e_src = rpc->e_sources[rpc->cons_idx]; - rpc->cons_idx++; - if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) - rpc->cons_idx = 0; - spin_unlock_irqrestore(&rpc->e_lock, flags); - - return 1; -} - -/** - * aer_isr - consume errors detected by root port - * @work: definition of this work item - * - * Invoked, as DPC, when root port records new detected error - */ -void aer_isr(struct work_struct *work) -{ - struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler); - struct pcie_device *p_device = rpc->rpd; - struct aer_err_source uninitialized_var(e_src); - - mutex_lock(&rpc->rpc_mutex); - while (get_e_source(rpc, &e_src)) - aer_isr_one_error(p_device, &e_src); - mutex_unlock(&rpc->rpc_mutex); -} diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c deleted file mode 100644 index cfc89dd57831..000000000000 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ /dev/null @@ -1,254 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Format error messages and print them to console. - * - * Copyright (C) 2006 Intel Corp. - * Tom Long Nguyen (tom.l.nguyen@intel.com) - * Zhang Yanmin (yanmin.zhang@intel.com) - */ - -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/pm.h> -#include <linux/suspend.h> -#include <linux/cper.h> - -#include "aerdrv.h" -#include <ras/ras_event.h> - -#define AER_AGENT_RECEIVER 0 -#define AER_AGENT_REQUESTER 1 -#define AER_AGENT_COMPLETER 2 -#define AER_AGENT_TRANSMITTER 3 - -#define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \ - 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP)) -#define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \ - 0 : PCI_ERR_UNC_COMP_ABORT) -#define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \ - (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0) - -#define AER_GET_AGENT(t, e) \ - ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \ - (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \ - (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \ - AER_AGENT_RECEIVER) - -#define AER_PHYSICAL_LAYER_ERROR 0 -#define AER_DATA_LINK_LAYER_ERROR 1 -#define AER_TRANSACTION_LAYER_ERROR 2 - -#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ - PCI_ERR_COR_RCVR : 0) -#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \ - (PCI_ERR_COR_BAD_TLP| \ - PCI_ERR_COR_BAD_DLLP| \ - PCI_ERR_COR_REP_ROLL| \ - PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP) - -#define AER_GET_LAYER_ERROR(t, e) \ - ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \ - (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ - AER_TRANSACTION_LAYER_ERROR) - -/* - * AER error strings - */ -static const char *aer_error_severity_string[] = { - "Uncorrected (Non-Fatal)", - "Uncorrected (Fatal)", - "Corrected" -}; - -static const char *aer_error_layer[] = { - "Physical Layer", - "Data Link Layer", - "Transaction Layer" -}; - -static const char *aer_correctable_error_string[] = { - "Receiver Error", /* Bit Position 0 */ - NULL, - NULL, - NULL, - NULL, - NULL, - "Bad TLP", /* Bit Position 6 */ - "Bad DLLP", /* Bit Position 7 */ - "RELAY_NUM Rollover", /* Bit Position 8 */ - NULL, - NULL, - NULL, - "Replay Timer Timeout", /* Bit Position 12 */ - "Advisory Non-Fatal", /* Bit Position 13 */ - "Corrected Internal Error", /* Bit Position 14 */ - "Header Log Overflow", /* Bit Position 15 */ -}; - -static const char *aer_uncorrectable_error_string[] = { - "Undefined", /* Bit Position 0 */ - NULL, - NULL, - NULL, - "Data Link Protocol", /* Bit Position 4 */ - "Surprise Down Error", /* Bit Position 5 */ - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - "Poisoned TLP", /* Bit Position 12 */ - "Flow Control Protocol", /* Bit Position 13 */ - "Completion Timeout", /* Bit Position 14 */ - "Completer Abort", /* Bit Position 15 */ - "Unexpected Completion", /* Bit Position 16 */ - "Receiver Overflow", /* Bit Position 17 */ - "Malformed TLP", /* Bit Position 18 */ - "ECRC", /* Bit Position 19 */ - "Unsupported Request", /* Bit Position 20 */ - "ACS Violation", /* Bit Position 21 */ - "Uncorrectable Internal Error", /* Bit Position 22 */ - "MC Blocked TLP", /* Bit Position 23 */ - "AtomicOp Egress Blocked", /* Bit Position 24 */ - "TLP Prefix Blocked Error", /* Bit Position 25 */ -}; - -static const char *aer_agent_string[] = { - "Receiver ID", - "Requester ID", - "Completer ID", - "Transmitter ID" -}; - -static void __print_tlp_header(struct pci_dev *dev, - struct aer_header_log_regs *t) -{ - pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", - t->dw0, t->dw1, t->dw2, t->dw3); -} - -static void __aer_print_error(struct pci_dev *dev, - struct aer_err_info *info) -{ - int i, status; - const char *errmsg = NULL; - status = (info->status & ~info->mask); - - for (i = 0; i < 32; i++) { - if (!(status & (1 << i))) - continue; - - if (info->severity == AER_CORRECTABLE) - errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ? - aer_correctable_error_string[i] : NULL; - else - errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ? - aer_uncorrectable_error_string[i] : NULL; - - if (errmsg) - pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, - info->first_error == i ? " (First)" : ""); - else - pci_err(dev, " [%2d] Unknown Error Bit%s\n", - i, info->first_error == i ? " (First)" : ""); - } -} - -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) -{ - int layer, agent; - int id = ((dev->bus->number << 8) | dev->devfn); - - if (!info->status) { - pci_err(dev, "PCIe Bus Error: severity=%s, type=Unaccessible, id=%04x(Unregistered Agent ID)\n", - aer_error_severity_string[info->severity], id); - goto out; - } - - layer = AER_GET_LAYER_ERROR(info->severity, info->status); - agent = AER_GET_AGENT(info->severity, info->status); - - pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", - aer_error_severity_string[info->severity], - aer_error_layer[layer], id, aer_agent_string[agent]); - - pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", - dev->vendor, dev->device, - info->status, info->mask); - - __aer_print_error(dev, info); - - if (info->tlp_header_valid) - __print_tlp_header(dev, &info->tlp); - -out: - if (info->id && info->error_dev_num > 1 && info->id == id) - pci_err(dev, " Error of this Agent(%04x) is reported first\n", id); - - trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), - info->severity); -} - -void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) -{ - pci_info(dev, "AER: %s%s error received: id=%04x\n", - info->multi_error_valid ? "Multiple " : "", - aer_error_severity_string[info->severity], info->id); -} - -#ifdef CONFIG_ACPI_APEI_PCIEAER -int cper_severity_to_aer(int cper_severity) -{ - switch (cper_severity) { - case CPER_SEV_RECOVERABLE: - return AER_NONFATAL; - case CPER_SEV_FATAL: - return AER_FATAL; - default: - return AER_CORRECTABLE; - } -} -EXPORT_SYMBOL_GPL(cper_severity_to_aer); - -void cper_print_aer(struct pci_dev *dev, int aer_severity, - struct aer_capability_regs *aer) -{ - int layer, agent, status_strs_size, tlp_header_valid = 0; - u32 status, mask; - const char **status_strs; - - if (aer_severity == AER_CORRECTABLE) { - status = aer->cor_status; - mask = aer->cor_mask; - status_strs = aer_correctable_error_string; - status_strs_size = ARRAY_SIZE(aer_correctable_error_string); - } else { - status = aer->uncor_status; - mask = aer->uncor_mask; - status_strs = aer_uncorrectable_error_string; - status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string); - tlp_header_valid = status & AER_LOG_TLP_MASKS; - } - - layer = AER_GET_LAYER_ERROR(aer_severity, status); - agent = AER_GET_AGENT(aer_severity, status); - - pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); - cper_print_bits("", status, status_strs, status_strs_size); - pci_err(dev, "aer_layer=%s, aer_agent=%s\n", - aer_error_layer[layer], aer_agent_string[agent]); - - if (aer_severity != AER_CORRECTABLE) - pci_err(dev, "aer_uncor_severity: 0x%08x\n", - aer->uncor_severity); - - if (tlp_header_valid) - __print_tlp_header(dev, &aer->header_log); - - trace_aer_event(dev_name(&dev->dev), (status & ~mask), - aer_severity); -} -#endif diff --git a/drivers/pci/pcie/aer/ecrc.c b/drivers/pci/pcie/aer/ecrc.c deleted file mode 100644 index 039efb606e31..000000000000 --- a/drivers/pci/pcie/aer/ecrc.c +++ /dev/null @@ -1,117 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Enable/disable PCIe ECRC checking - * - * (C) Copyright 2009 Hewlett-Packard Development Company, L.P. - * Andrew Patterson <andrew.patterson@hp.com> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/pci.h> -#include <linux/pci_regs.h> -#include <linux/errno.h> -#include "../../pci.h" - -#define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */ -#define ECRC_POLICY_OFF 1 /* ECRC off for performance */ -#define ECRC_POLICY_ON 2 /* ECRC on for data integrity */ - -static int ecrc_policy = ECRC_POLICY_DEFAULT; - -static const char *ecrc_policy_str[] = { - [ECRC_POLICY_DEFAULT] = "bios", - [ECRC_POLICY_OFF] = "off", - [ECRC_POLICY_ON] = "on" -}; - -/** - * enable_ercr_checking - enable PCIe ECRC checking for a device - * @dev: the PCI device - * - * Returns 0 on success, or negative on failure. - */ -static int enable_ecrc_checking(struct pci_dev *dev) -{ - int pos; - u32 reg32; - - if (!pci_is_pcie(dev)) - return -ENODEV; - - pos = dev->aer_cap; - if (!pos) - return -ENODEV; - - pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); - if (reg32 & PCI_ERR_CAP_ECRC_GENC) - reg32 |= PCI_ERR_CAP_ECRC_GENE; - if (reg32 & PCI_ERR_CAP_ECRC_CHKC) - reg32 |= PCI_ERR_CAP_ECRC_CHKE; - pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); - - return 0; -} - -/** - * disable_ercr_checking - disables PCIe ECRC checking for a device - * @dev: the PCI device - * - * Returns 0 on success, or negative on failure. - */ -static int disable_ecrc_checking(struct pci_dev *dev) -{ - int pos; - u32 reg32; - - if (!pci_is_pcie(dev)) - return -ENODEV; - - pos = dev->aer_cap; - if (!pos) - return -ENODEV; - - pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32); - reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32); - - return 0; -} - -/** - * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy - * @dev: the PCI device - */ -void pcie_set_ecrc_checking(struct pci_dev *dev) -{ - switch (ecrc_policy) { - case ECRC_POLICY_DEFAULT: - return; - case ECRC_POLICY_OFF: - disable_ecrc_checking(dev); - break; - case ECRC_POLICY_ON: - enable_ecrc_checking(dev); - break; - default: - return; - } -} - -/** - * pcie_ecrc_get_policy - parse kernel command-line ecrc option - */ -void pcie_ecrc_get_policy(char *str) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(ecrc_policy_str); i++) - if (!strncmp(str, ecrc_policy_str[i], - strlen(ecrc_policy_str[i]))) - break; - if (i >= ARRAY_SIZE(ecrc_policy_str)) - return; - - ecrc_policy = i; -} diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer_inject.c index a49090935303..0eb24346cad3 100644 --- a/drivers/pci/pcie/aer/aer_inject.c +++ b/drivers/pci/pcie/aer_inject.c @@ -21,7 +21,8 @@ #include <linux/uaccess.h> #include <linux/stddef.h> #include <linux/device.h> -#include "aerdrv.h" + +#include "portdrv.h" /* Override the existing corrected and uncorrected error masks */ static bool aer_mask_override; diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index f76eb7704f64..c687c817b47d 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -400,6 +400,15 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev, info->l1ss_cap = 0; return; } + + /* + * If we don't have LTR for the entire path from the Root Complex + * to this device, we can't use ASPM L1.2 because it relies on the + * LTR_L1.2_THRESHOLD. See PCIe r4.0, secs 5.5.4, 6.18. + */ + if (!pdev->ltr_path) + info->l1ss_cap &= ~PCI_L1SS_CAP_ASPM_L1_2; + pci_read_config_dword(pdev, info->l1ss_cap_ptr + PCI_L1SS_CTL1, &info->l1ss_ctl1); pci_read_config_dword(pdev, info->l1ss_cap_ptr + PCI_L1SS_CTL2, diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 8c57d607e603..921ed979109d 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -13,7 +13,6 @@ #include "portdrv.h" #include "../pci.h" -#include "aer/aerdrv.h" struct dpc_dev { struct pcie_device *dev; @@ -68,44 +67,35 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) static void dpc_wait_link_inactive(struct dpc_dev *dpc) { - unsigned long timeout = jiffies + HZ; struct pci_dev *pdev = dpc->dev->port; - struct device *dev = &dpc->dev->device; - u16 lnk_status; - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); - while (lnk_status & PCI_EXP_LNKSTA_DLLLA && - !time_after(jiffies, timeout)) { - msleep(10); - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); - } - if (lnk_status & PCI_EXP_LNKSTA_DLLLA) - dev_warn(dev, "Link state not disabled for DPC event\n"); + pcie_wait_for_link(pdev, false); } -static void dpc_work(struct work_struct *work) +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) { - struct dpc_dev *dpc = container_of(work, struct dpc_dev, work); - struct pci_dev *dev, *temp, *pdev = dpc->dev->port; - struct pci_bus *parent = pdev->subordinate; - u16 cap = dpc->cap_pos, ctl; - - pci_lock_rescan_remove(); - list_for_each_entry_safe_reverse(dev, temp, &parent->devices, - bus_list) { - pci_dev_get(dev); - pci_dev_set_disconnected(dev, NULL); - if (pci_has_subordinate(dev)) - pci_walk_bus(dev->subordinate, - pci_dev_set_disconnected, NULL); - pci_stop_and_remove_bus_device(dev); - pci_dev_put(dev); - } - pci_unlock_rescan_remove(); - + struct dpc_dev *dpc; + struct pcie_device *pciedev; + struct device *devdpc; + u16 cap, ctl; + + /* + * DPC disables the Link automatically in hardware, so it has + * already been reset by the time we get here. + */ + devdpc = pcie_port_find_device(pdev, PCIE_PORT_SERVICE_DPC); + pciedev = to_pcie_device(devdpc); + dpc = get_service_data(pciedev); + cap = dpc->cap_pos; + + /* + * Wait until the Link is inactive, then clear DPC Trigger Status + * to allow the Port to leave DPC. + */ dpc_wait_link_inactive(dpc); + if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) - return; + return PCI_ERS_RESULT_DISCONNECT; if (dpc->rp_extensions && dpc->rp_pio_status) { pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, dpc->rp_pio_status); @@ -113,11 +103,22 @@ static void dpc_work(struct work_struct *work) } pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, - PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT); + PCI_EXP_DPC_STATUS_TRIGGER); pci_read_config_word(pdev, cap + PCI_EXP_DPC_CTL, &ctl); pci_write_config_word(pdev, cap + PCI_EXP_DPC_CTL, ctl | PCI_EXP_DPC_CTL_INT_EN); + + return PCI_ERS_RESULT_RECOVERED; +} + +static void dpc_work(struct work_struct *work) +{ + struct dpc_dev *dpc = container_of(work, struct dpc_dev, work); + struct pci_dev *pdev = dpc->dev->port; + + /* We configure DPC so it only triggers on ERR_FATAL */ + pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_DPC); } static void dpc_process_rp_pio_error(struct dpc_dev *dpc) @@ -223,6 +224,9 @@ static irqreturn_t dpc_irq(int irq, void *context) if (dpc->rp_extensions && reason == 3 && ext_reason == 0) dpc_process_rp_pio_error(dpc); + pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_INTERRUPT); + schedule_work(&dpc->work); return IRQ_HANDLED; @@ -270,7 +274,7 @@ static int dpc_probe(struct pcie_device *dev) } } - ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN; + ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", @@ -288,7 +292,7 @@ static void dpc_remove(struct pcie_device *dev) u16 ctl; pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); - ctl &= ~(PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN); + ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); } @@ -298,6 +302,7 @@ static struct pcie_port_service_driver dpcdriver = { .service = PCIE_PORT_SERVICE_DPC, .probe = dpc_probe, .remove = dpc_remove, + .reset_link = dpc_reset_link, }; static int __init dpc_service_init(void) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c new file mode 100644 index 000000000000..f02e334beb45 --- /dev/null +++ b/drivers/pci/pcie/err.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file implements the error recovery as a core part of PCIe error + * reporting. When a PCIe error is delivered, an error message will be + * collected and printed to console, then, an error recovery procedure + * will be executed by following the PCI error recovery rules. + * + * Copyright (C) 2006 Intel Corp. + * Tom Long Nguyen (tom.l.nguyen@intel.com) + * Zhang Yanmin (yanmin.zhang@intel.com) + */ + +#include <linux/pci.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/aer.h> +#include "portdrv.h" +#include "../pci.h" + +struct aer_broadcast_data { + enum pci_channel_state state; + enum pci_ers_result result; +}; + +static pci_ers_result_t merge_result(enum pci_ers_result orig, + enum pci_ers_result new) +{ + if (new == PCI_ERS_RESULT_NO_AER_DRIVER) + return PCI_ERS_RESULT_NO_AER_DRIVER; + + if (new == PCI_ERS_RESULT_NONE) + return orig; + + switch (orig) { + case PCI_ERS_RESULT_CAN_RECOVER: + case PCI_ERS_RESULT_RECOVERED: + orig = new; + break; + case PCI_ERS_RESULT_DISCONNECT: + if (new == PCI_ERS_RESULT_NEED_RESET) + orig = PCI_ERS_RESULT_NEED_RESET; + break; + default: + break; + } + + return orig; +} + +static int report_error_detected(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + + result_data = (struct aer_broadcast_data *) data; + + device_lock(&dev->dev); + dev->error_state = result_data->state; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->error_detected) { + if (result_data->state == pci_channel_io_frozen && + dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { + /* + * In case of fatal recovery, if one of down- + * stream device has no driver. We might be + * unable to recover because a later insmod + * of a driver for this device is unaware of + * its hw state. + */ + pci_printk(KERN_DEBUG, dev, "device has %s\n", + dev->driver ? + "no AER-aware driver" : "no driver"); + } + + /* + * If there's any device in the subtree that does not + * have an error_detected callback, returning + * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of + * the subsequent mmio_enabled/slot_reset/resume + * callbacks of "any" device in the subtree. All the + * devices in the subtree are left in the error state + * without recovery. + */ + + if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) + vote = PCI_ERS_RESULT_NO_AER_DRIVER; + else + vote = PCI_ERS_RESULT_NONE; + } else { + err_handler = dev->driver->err_handler; + vote = err_handler->error_detected(dev, result_data->state); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); + } + + result_data->result = merge_result(result_data->result, vote); + device_unlock(&dev->dev); + return 0; +} + +static int report_mmio_enabled(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + + result_data = (struct aer_broadcast_data *) data; + + device_lock(&dev->dev); + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->mmio_enabled) + goto out; + + err_handler = dev->driver->err_handler; + vote = err_handler->mmio_enabled(dev); + result_data->result = merge_result(result_data->result, vote); +out: + device_unlock(&dev->dev); + return 0; +} + +static int report_slot_reset(struct pci_dev *dev, void *data) +{ + pci_ers_result_t vote; + const struct pci_error_handlers *err_handler; + struct aer_broadcast_data *result_data; + + result_data = (struct aer_broadcast_data *) data; + + device_lock(&dev->dev); + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->slot_reset) + goto out; + + err_handler = dev->driver->err_handler; + vote = err_handler->slot_reset(dev); + result_data->result = merge_result(result_data->result, vote); +out: + device_unlock(&dev->dev); + return 0; +} + +static int report_resume(struct pci_dev *dev, void *data) +{ + const struct pci_error_handlers *err_handler; + + device_lock(&dev->dev); + dev->error_state = pci_channel_io_normal; + + if (!dev->driver || + !dev->driver->err_handler || + !dev->driver->err_handler->resume) + goto out; + + err_handler = dev->driver->err_handler; + err_handler->resume(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); +out: + device_unlock(&dev->dev); + return 0; +} + +/** + * default_reset_link - default reset function + * @dev: pointer to pci_dev data structure + * + * Invoked when performing link reset on a Downstream Port or a + * Root Port with no aer driver. + */ +static pci_ers_result_t default_reset_link(struct pci_dev *dev) +{ + pci_reset_bridge_secondary_bus(dev); + pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) +{ + struct pci_dev *udev; + pci_ers_result_t status; + struct pcie_port_service_driver *driver = NULL; + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + /* Reset this port for all subordinates */ + udev = dev; + } else { + /* Reset the upstream component (likely downstream port) */ + udev = dev->bus->self; + } + + /* Use the aer driver of the component firstly */ + driver = pcie_port_find_service(udev, service); + + if (driver && driver->reset_link) { + status = driver->reset_link(udev); + } else if (udev->has_secondary_link) { + status = default_reset_link(udev); + } else { + pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", + pci_name(udev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (status != PCI_ERS_RESULT_RECOVERED) { + pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", + pci_name(udev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + return status; +} + +/** + * broadcast_error_message - handle message broadcast to downstream drivers + * @dev: pointer to from where in a hierarchy message is broadcasted down + * @state: error state + * @error_mesg: message to print + * @cb: callback to be broadcasted + * + * Invoked during error recovery process. Once being invoked, the content + * of error severity will be broadcasted to all downstream drivers in a + * hierarchy in question. + */ +static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, + enum pci_channel_state state, + char *error_mesg, + int (*cb)(struct pci_dev *, void *)) +{ + struct aer_broadcast_data result_data; + + pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg); + result_data.state = state; + if (cb == report_error_detected) + result_data.result = PCI_ERS_RESULT_CAN_RECOVER; + else + result_data.result = PCI_ERS_RESULT_RECOVERED; + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + /* + * If the error is reported by a bridge, we think this error + * is related to the downstream link of the bridge, so we + * do error recovery on all subordinates of the bridge instead + * of the bridge and clear the error status of the bridge. + */ + if (cb == report_error_detected) + dev->error_state = state; + pci_walk_bus(dev->subordinate, cb, &result_data); + if (cb == report_resume) { + pci_cleanup_aer_uncorrect_error_status(dev); + dev->error_state = pci_channel_io_normal; + } + } else { + /* + * If the error is reported by an end point, we think this + * error is related to the upstream link of the end point. + */ + if (state == pci_channel_io_normal) + /* + * the error is non fatal so the bus is ok, just invoke + * the callback for the function that logged the error. + */ + cb(dev, &result_data); + else + pci_walk_bus(dev->bus, cb, &result_data); + } + + return result_data.result; +} + +/** + * pcie_do_fatal_recovery - handle fatal error recovery process + * @dev: pointer to a pci_dev data structure of agent detecting an error + * + * Invoked when an error is fatal. Once being invoked, removes the devices + * beneath this AER agent, followed by reset link e.g. secondary bus reset + * followed by re-enumeration of devices. + */ +void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service) +{ + struct pci_dev *udev; + struct pci_bus *parent; + struct pci_dev *pdev, *temp; + pci_ers_result_t result; + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + udev = dev; + else + udev = dev->bus->self; + + parent = udev->subordinate; + pci_lock_rescan_remove(); + pci_dev_get(dev); + list_for_each_entry_safe_reverse(pdev, temp, &parent->devices, + bus_list) { + pci_dev_get(pdev); + pci_dev_set_disconnected(pdev, NULL); + if (pci_has_subordinate(pdev)) + pci_walk_bus(pdev->subordinate, + pci_dev_set_disconnected, NULL); + pci_stop_and_remove_bus_device(pdev); + pci_dev_put(pdev); + } + + result = reset_link(udev, service); + + if ((service == PCIE_PORT_SERVICE_AER) && + (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) { + /* + * If the error is reported by a bridge, we think this error + * is related to the downstream link of the bridge, so we + * do error recovery on all subordinates of the bridge instead + * of the bridge and clear the error status of the bridge. + */ + pci_cleanup_aer_uncorrect_error_status(dev); + } + + if (result == PCI_ERS_RESULT_RECOVERED) { + if (pcie_wait_for_link(udev, true)) + pci_rescan_bus(udev->bus); + pci_info(dev, "Device recovery from fatal error successful\n"); + } else { + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + pci_info(dev, "Device recovery from fatal error failed\n"); + } + + pci_dev_put(dev); + pci_unlock_rescan_remove(); +} + +/** + * pcie_do_nonfatal_recovery - handle nonfatal error recovery process + * @dev: pointer to a pci_dev data structure of agent detecting an error + * + * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast + * error detected message to all downstream drivers within a hierarchy in + * question and return the returned code. + */ +void pcie_do_nonfatal_recovery(struct pci_dev *dev) +{ + pci_ers_result_t status; + enum pci_channel_state state; + + state = pci_channel_io_normal; + + status = broadcast_error_message(dev, + state, + "error_detected", + report_error_detected); + + if (status == PCI_ERS_RESULT_CAN_RECOVER) + status = broadcast_error_message(dev, + state, + "mmio_enabled", + report_mmio_enabled); + + if (status == PCI_ERS_RESULT_NEED_RESET) { + /* + * TODO: Should call platform-specific + * functions to reset slot before calling + * drivers' slot_reset callbacks? + */ + status = broadcast_error_message(dev, + state, + "slot_reset", + report_slot_reset); + } + + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + + broadcast_error_message(dev, + state, + "resume", + report_resume); + + pci_info(dev, "AER: Device recovery successful\n"); + return; + +failed: + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + + /* TODO: Should kernel panic here? */ + pci_info(dev, "AER: Device recovery failed\n"); +} diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index d0c6783dbfe3..6ffc797a0dc1 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -11,8 +11,6 @@ #include <linux/compiler.h> -extern bool pcie_ports_native; - /* Service Type */ #define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */ #define PCIE_PORT_SERVICE_PME (1 << PCIE_PORT_SERVICE_PME_SHIFT) @@ -112,4 +110,22 @@ static inline bool pcie_pme_no_msi(void) { return false; } static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} #endif /* !CONFIG_PCIE_PME */ +#ifdef CONFIG_ACPI_APEI +int pcie_aer_get_firmware_first(struct pci_dev *pci_dev); +#else +static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) +{ + if (pci_dev->__aer_firmware_first_valid) + return pci_dev->__aer_firmware_first; + return 0; +} +#endif + +#ifdef CONFIG_PCIEAER +irqreturn_t aer_irq(int irq, void *context); +#endif + +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service); +struct device *pcie_port_find_device(struct pci_dev *dev, u32 service); #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_acpi.c b/drivers/pci/pcie/portdrv_acpi.c deleted file mode 100644 index 8ab5d434b9c6..000000000000 --- a/drivers/pci/pcie/portdrv_acpi.c +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PCIe Port Native Services Support, ACPI-Related Part - * - * Copyright (C) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. - */ - -#include <linux/pci.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/acpi.h> -#include <linux/pci-acpi.h> - -#include "aer/aerdrv.h" -#include "../pci.h" -#include "portdrv.h" - -/** - * pcie_port_acpi_setup - Request the BIOS to release control of PCIe services. - * @port: PCIe Port service for a root port or event collector. - * @srv_mask: Bit mask of services that can be enabled for @port. - * - * Invoked when @port is identified as a PCIe port device. To avoid conflicts - * with the BIOS PCIe port native services support requires the BIOS to yield - * control of these services to the kernel. The mask of services that the BIOS - * allows to be enabled for @port is written to @srv_mask. - * - * NOTE: It turns out that we cannot do that for individual port services - * separately, because that would make some systems work incorrectly. - */ -void pcie_port_acpi_setup(struct pci_dev *port, int *srv_mask) -{ - struct acpi_pci_root *root; - acpi_handle handle; - u32 flags; - - if (acpi_pci_disabled) - return; - - handle = acpi_find_root_bridge_handle(port); - if (!handle) - return; - - root = acpi_pci_find_root(handle); - if (!root) - return; - - flags = root->osc_control_set; - - *srv_mask = 0; - if (flags & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL) - *srv_mask |= PCIE_PORT_SERVICE_HP; - if (flags & OSC_PCI_EXPRESS_PME_CONTROL) - *srv_mask |= PCIE_PORT_SERVICE_PME; - if (flags & OSC_PCI_EXPRESS_AER_CONTROL) - *srv_mask |= PCIE_PORT_SERVICE_AER | PCIE_PORT_SERVICE_DPC; -} diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index c9c0663db282..e0261ad4bcdd 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -19,6 +19,12 @@ #include "../pci.h" #include "portdrv.h" +struct portdrv_service_data { + struct pcie_port_service_driver *drv; + struct device *dev; + u32 service; +}; + /** * release_pcie_device - free PCI Express port service device structure * @dev: Port service device to release @@ -199,7 +205,7 @@ static int get_port_device_capability(struct pci_dev *dev) int services = 0; if (dev->is_hotplug_bridge && - (pcie_ports_native || host->native_hotplug)) { + (pcie_ports_native || host->native_pcie_hotplug)) { services |= PCIE_PORT_SERVICE_HP; /* @@ -398,6 +404,69 @@ static int remove_iter(struct device *dev, void *data) return 0; } +static int find_service_iter(struct device *device, void *data) +{ + struct pcie_port_service_driver *service_driver; + struct portdrv_service_data *pdrvs; + u32 service; + + pdrvs = (struct portdrv_service_data *) data; + service = pdrvs->service; + + if (device->bus == &pcie_port_bus_type && device->driver) { + service_driver = to_service_driver(device->driver); + if (service_driver->service == service) { + pdrvs->drv = service_driver; + pdrvs->dev = device; + return 1; + } + } + + return 0; +} + +/** + * pcie_port_find_service - find the service driver + * @dev: PCI Express port the service is associated with + * @service: Service to find + * + * Find PCI Express port service driver associated with given service + */ +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service) +{ + struct pcie_port_service_driver *drv; + struct portdrv_service_data pdrvs; + + pdrvs.drv = NULL; + pdrvs.service = service; + device_for_each_child(&dev->dev, &pdrvs, find_service_iter); + + drv = pdrvs.drv; + return drv; +} + +/** + * pcie_port_find_device - find the struct device + * @dev: PCI Express port the service is associated with + * @service: For the service to find + * + * Find the struct device associated with given service on a pci_dev + */ +struct device *pcie_port_find_device(struct pci_dev *dev, + u32 service) +{ + struct device *device; + struct portdrv_service_data pdrvs; + + pdrvs.dev = NULL; + pdrvs.service = service; + device_for_each_child(&dev->dev, &pdrvs, find_service_iter); + + device = pdrvs.dev; + return device; +} + /** * pcie_port_device_remove - unregister PCI Express port service devices * @dev: PCI Express port the service devices to unregister are associated with |

