diff options
Diffstat (limited to 'drivers/pci/pcie/aer')
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv.c | 11 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv.h | 32 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv_core.c | 397 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv_errprint.c | 38 |
4 files changed, 69 insertions, 409 deletions
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 779b3879b1b5..9735c19bf39c 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -94,7 +94,7 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev, */ static void aer_enable_rootport(struct aer_rpc *rpc) { - struct pci_dev *pdev = rpc->rpd->port; + struct pci_dev *pdev = rpc->rpd; int aer_pos; u16 reg16; u32 reg32; @@ -136,7 +136,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc) */ static void aer_disable_rootport(struct aer_rpc *rpc) { - struct pci_dev *pdev = rpc->rpd->port; + struct pci_dev *pdev = rpc->rpd; u32 reg32; int pos; @@ -232,7 +232,7 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) /* Initialize Root lock access, e_lock, to Root Error Status Reg */ spin_lock_init(&rpc->e_lock); - rpc->rpd = dev; + rpc->rpd = dev->port; INIT_WORK(&rpc->dpc_handler, aer_isr); mutex_init(&rpc->rpc_mutex); @@ -353,10 +353,7 @@ static void aer_error_resume(struct pci_dev *dev) pos = dev->aer_cap; pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); - if (dev->error_state == pci_channel_io_normal) - status &= ~mask; /* Clear corresponding nonfatal bits */ - else - status &= mask; /* Clear corresponding fatal bits */ + status &= ~mask; /* Clear corresponding nonfatal bits */ pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); } diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 08b4584f62fe..6e0ad9a68fd9 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -58,7 +58,7 @@ struct aer_err_source { }; struct aer_rpc { - struct pcie_device *rpd; /* Root Port device */ + struct pci_dev *rpd; /* Root Port device */ struct work_struct dpc_handler; struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX]; struct aer_err_info e_info; @@ -76,36 +76,6 @@ struct aer_rpc { */ }; -struct aer_broadcast_data { - enum pci_channel_state state; - enum pci_ers_result result; -}; - -static inline pci_ers_result_t merge_result(enum pci_ers_result orig, - enum pci_ers_result new) -{ - if (new == PCI_ERS_RESULT_NO_AER_DRIVER) - return PCI_ERS_RESULT_NO_AER_DRIVER; - - if (new == PCI_ERS_RESULT_NONE) - return orig; - - switch (orig) { - case PCI_ERS_RESULT_CAN_RECOVER: - case PCI_ERS_RESULT_RECOVERED: - orig = new; - break; - case PCI_ERS_RESULT_DISCONNECT: - if (new == PCI_ERS_RESULT_NEED_RESET) - orig = PCI_ERS_RESULT_NEED_RESET; - break; - default: - break; - } - - return orig; -} - extern struct bus_type pcie_port_bus_type; void aer_isr(struct work_struct *work); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 0ea5acc40323..42d4f3f32282 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -20,6 +20,7 @@ #include <linux/slab.h> #include <linux/kfifo.h> #include "aerdrv.h" +#include "../../pci.h" #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) @@ -227,329 +228,14 @@ static bool find_source_device(struct pci_dev *parent, return true; } -static int report_error_detected(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(&dev->dev); - dev->error_state = result_data->state; - - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->error_detected) { - if (result_data->state == pci_channel_io_frozen && - dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { - /* - * In case of fatal recovery, if one of down- - * stream device has no driver. We might be - * unable to recover because a later insmod - * of a driver for this device is unaware of - * its hw state. - */ - pci_printk(KERN_DEBUG, dev, "device has %s\n", - dev->driver ? - "no AER-aware driver" : "no driver"); - } - - /* - * If there's any device in the subtree that does not - * have an error_detected callback, returning - * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of - * the subsequent mmio_enabled/slot_reset/resume - * callbacks of "any" device in the subtree. All the - * devices in the subtree are left in the error state - * without recovery. - */ - - if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) - vote = PCI_ERS_RESULT_NO_AER_DRIVER; - else - vote = PCI_ERS_RESULT_NONE; - } else { - err_handler = dev->driver->err_handler; - vote = err_handler->error_detected(dev, result_data->state); - pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); - } - - result_data->result = merge_result(result_data->result, vote); - device_unlock(&dev->dev); - return 0; -} - -static int report_mmio_enabled(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(&dev->dev); - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->mmio_enabled) - goto out; - - err_handler = dev->driver->err_handler; - vote = err_handler->mmio_enabled(dev); - result_data->result = merge_result(result_data->result, vote); -out: - device_unlock(&dev->dev); - return 0; -} - -static int report_slot_reset(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(&dev->dev); - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->slot_reset) - goto out; - - err_handler = dev->driver->err_handler; - vote = err_handler->slot_reset(dev); - result_data->result = merge_result(result_data->result, vote); -out: - device_unlock(&dev->dev); - return 0; -} - -static int report_resume(struct pci_dev *dev, void *data) -{ - const struct pci_error_handlers *err_handler; - - device_lock(&dev->dev); - dev->error_state = pci_channel_io_normal; - - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->resume) - goto out; - - err_handler = dev->driver->err_handler; - err_handler->resume(dev); - pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); -out: - device_unlock(&dev->dev); - return 0; -} - -/** - * broadcast_error_message - handle message broadcast to downstream drivers - * @dev: pointer to from where in a hierarchy message is broadcasted down - * @state: error state - * @error_mesg: message to print - * @cb: callback to be broadcasted - * - * Invoked during error recovery process. Once being invoked, the content - * of error severity will be broadcasted to all downstream drivers in a - * hierarchy in question. - */ -static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, - enum pci_channel_state state, - char *error_mesg, - int (*cb)(struct pci_dev *, void *)) -{ - struct aer_broadcast_data result_data; - - pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg); - result_data.state = state; - if (cb == report_error_detected) - result_data.result = PCI_ERS_RESULT_CAN_RECOVER; - else - result_data.result = PCI_ERS_RESULT_RECOVERED; - - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - /* - * If the error is reported by a bridge, we think this error - * is related to the downstream link of the bridge, so we - * do error recovery on all subordinates of the bridge instead - * of the bridge and clear the error status of the bridge. - */ - if (cb == report_error_detected) - dev->error_state = state; - pci_walk_bus(dev->subordinate, cb, &result_data); - if (cb == report_resume) { - pci_cleanup_aer_uncorrect_error_status(dev); - dev->error_state = pci_channel_io_normal; - } - } else { - /* - * If the error is reported by an end point, we think this - * error is related to the upstream link of the end point. - */ - if (state == pci_channel_io_normal) - /* - * the error is non fatal so the bus is ok, just invoke - * the callback for the function that logged the error. - */ - cb(dev, &result_data); - else - pci_walk_bus(dev->bus, cb, &result_data); - } - - return result_data.result; -} - -/** - * default_reset_link - default reset function - * @dev: pointer to pci_dev data structure - * - * Invoked when performing link reset on a Downstream Port or a - * Root Port with no aer driver. - */ -static pci_ers_result_t default_reset_link(struct pci_dev *dev) -{ - pci_reset_bridge_secondary_bus(dev); - pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); - return PCI_ERS_RESULT_RECOVERED; -} - -static int find_aer_service_iter(struct device *device, void *data) -{ - struct pcie_port_service_driver *service_driver, **drv; - - drv = (struct pcie_port_service_driver **) data; - - if (device->bus == &pcie_port_bus_type && device->driver) { - service_driver = to_service_driver(device->driver); - if (service_driver->service == PCIE_PORT_SERVICE_AER) { - *drv = service_driver; - return 1; - } - } - - return 0; -} - -static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev) -{ - struct pcie_port_service_driver *drv = NULL; - - device_for_each_child(&dev->dev, &drv, find_aer_service_iter); - - return drv; -} - -static pci_ers_result_t reset_link(struct pci_dev *dev) -{ - struct pci_dev *udev; - pci_ers_result_t status; - struct pcie_port_service_driver *driver; - - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - /* Reset this port for all subordinates */ - udev = dev; - } else { - /* Reset the upstream component (likely downstream port) */ - udev = dev->bus->self; - } - - /* Use the aer driver of the component firstly */ - driver = find_aer_service(udev); - - if (driver && driver->reset_link) { - status = driver->reset_link(udev); - } else if (udev->has_secondary_link) { - status = default_reset_link(udev); - } else { - pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", - pci_name(udev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - if (status != PCI_ERS_RESULT_RECOVERED) { - pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", - pci_name(udev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - return status; -} - -/** - * do_recovery - handle nonfatal/fatal error recovery process - * @dev: pointer to a pci_dev data structure of agent detecting an error - * @severity: error severity type - * - * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast - * error detected message to all downstream drivers within a hierarchy in - * question and return the returned code. - */ -static void do_recovery(struct pci_dev *dev, int severity) -{ - pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED; - enum pci_channel_state state; - - if (severity == AER_FATAL) - state = pci_channel_io_frozen; - else - state = pci_channel_io_normal; - - status = broadcast_error_message(dev, - state, - "error_detected", - report_error_detected); - - if (severity == AER_FATAL) { - result = reset_link(dev); - if (result != PCI_ERS_RESULT_RECOVERED) - goto failed; - } - - if (status == PCI_ERS_RESULT_CAN_RECOVER) - status = broadcast_error_message(dev, - state, - "mmio_enabled", - report_mmio_enabled); - - if (status == PCI_ERS_RESULT_NEED_RESET) { - /* - * TODO: Should call platform-specific - * functions to reset slot before calling - * drivers' slot_reset callbacks? - */ - status = broadcast_error_message(dev, - state, - "slot_reset", - report_slot_reset); - } - - if (status != PCI_ERS_RESULT_RECOVERED) - goto failed; - - broadcast_error_message(dev, - state, - "resume", - report_resume); - - pci_info(dev, "AER: Device recovery successful\n"); - return; - -failed: - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); - /* TODO: Should kernel panic here? */ - pci_info(dev, "AER: Device recovery failed\n"); -} - /** * handle_error_source - handle logging error into an event log - * @aerdev: pointer to pcie_device data structure of the root port * @dev: pointer to pci_dev data structure of error source device * @info: comprehensive error information * * Invoked when an error being detected by Root Port. */ -static void handle_error_source(struct pcie_device *aerdev, - struct pci_dev *dev, - struct aer_err_info *info) +static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) { int pos; @@ -562,12 +248,13 @@ static void handle_error_source(struct pcie_device *aerdev, if (pos) pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, info->status); - } else - do_recovery(dev, info->severity); + } else if (info->severity == AER_NONFATAL) + pcie_do_nonfatal_recovery(dev); + else if (info->severity == AER_FATAL) + pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER); } #ifdef CONFIG_ACPI_APEI_PCIEAER -static void aer_recover_work_func(struct work_struct *work); #define AER_RECOVER_RING_ORDER 4 #define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER) @@ -582,6 +269,30 @@ struct aer_recover_entry { static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry, AER_RECOVER_RING_SIZE); + +static void aer_recover_work_func(struct work_struct *work) +{ + struct aer_recover_entry entry; + struct pci_dev *pdev; + + while (kfifo_get(&aer_recover_ring, &entry)) { + pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, + entry.devfn); + if (!pdev) { + pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", + entry.domain, entry.bus, + PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); + continue; + } + cper_print_aer(pdev, entry.severity, entry.regs); + if (entry.severity == AER_NONFATAL) + pcie_do_nonfatal_recovery(pdev); + else if (entry.severity == AER_FATAL) + pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_AER); + pci_dev_put(pdev); + } +} + /* * Mutual exclusion for writers of aer_recover_ring, reader side don't * need lock, because there is only one reader and lock is not needed @@ -611,27 +322,6 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, spin_unlock_irqrestore(&aer_recover_ring_lock, flags); } EXPORT_SYMBOL_GPL(aer_recover_queue); - -static void aer_recover_work_func(struct work_struct *work) -{ - struct aer_recover_entry entry; - struct pci_dev *pdev; - - while (kfifo_get(&aer_recover_ring, &entry)) { - pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, - entry.devfn); - if (!pdev) { - pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", - entry.domain, entry.bus, - PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); - continue; - } - cper_print_aer(pdev, entry.severity, entry.regs); - if (entry.severity != AER_CORRECTABLE) - do_recovery(pdev, entry.severity); - pci_dev_put(pdev); - } -} #endif /** @@ -695,8 +385,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) return 1; } -static inline void aer_process_err_devices(struct pcie_device *p_device, - struct aer_err_info *e_info) +static inline void aer_process_err_devices(struct aer_err_info *e_info) { int i; @@ -707,19 +396,19 @@ static inline void aer_process_err_devices(struct pcie_device *p_device, } for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { if (get_device_error_info(e_info->dev[i], e_info)) - handle_error_source(p_device, e_info->dev[i], e_info); + handle_error_source(e_info->dev[i], e_info); } } /** * aer_isr_one_error - consume an error detected by root port - * @p_device: pointer to error root port service device + * @rpc: pointer to the root port which holds an error * @e_src: pointer to an error source */ -static void aer_isr_one_error(struct pcie_device *p_device, +static void aer_isr_one_error(struct aer_rpc *rpc, struct aer_err_source *e_src) { - struct aer_rpc *rpc = get_service_data(p_device); + struct pci_dev *pdev = rpc->rpd; struct aer_err_info *e_info = &rpc->e_info; /* @@ -734,11 +423,10 @@ static void aer_isr_one_error(struct pcie_device *p_device, e_info->multi_error_valid = 1; else e_info->multi_error_valid = 0; + aer_print_port_info(pdev, e_info); - aer_print_port_info(p_device->port, e_info); - - if (find_source_device(p_device->port, e_info)) - aer_process_err_devices(p_device, e_info); + if (find_source_device(pdev, e_info)) + aer_process_err_devices(e_info); } if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { @@ -754,10 +442,10 @@ static void aer_isr_one_error(struct pcie_device *p_device, else e_info->multi_error_valid = 0; - aer_print_port_info(p_device->port, e_info); + aer_print_port_info(pdev, e_info); - if (find_source_device(p_device->port, e_info)) - aer_process_err_devices(p_device, e_info); + if (find_source_device(pdev, e_info)) + aer_process_err_devices(e_info); } } @@ -799,11 +487,10 @@ static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src) void aer_isr(struct work_struct *work) { struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler); - struct pcie_device *p_device = rpc->rpd; struct aer_err_source uninitialized_var(e_src); mutex_lock(&rpc->rpc_mutex); while (get_e_source(rpc, &e_src)) - aer_isr_one_error(p_device, &e_src); + aer_isr_one_error(rpc, &e_src); mutex_unlock(&rpc->rpc_mutex); } diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index cfc89dd57831..4985bdf64c2e 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -163,17 +163,17 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) int id = ((dev->bus->number << 8) | dev->devfn); if (!info->status) { - pci_err(dev, "PCIe Bus Error: severity=%s, type=Unaccessible, id=%04x(Unregistered Agent ID)\n", - aer_error_severity_string[info->severity], id); + pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", + aer_error_severity_string[info->severity]); goto out; } layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", + pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", aer_error_severity_string[info->severity], - aer_error_layer[layer], id, aer_agent_string[agent]); + aer_error_layer[layer], aer_agent_string[agent]); pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", dev->vendor, dev->device, @@ -186,17 +186,21 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) out: if (info->id && info->error_dev_num > 1 && info->id == id) - pci_err(dev, " Error of this Agent(%04x) is reported first\n", id); + pci_err(dev, " Error of this Agent is reported first\n"); trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), - info->severity); + info->severity, info->tlp_header_valid, &info->tlp); } void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) { - pci_info(dev, "AER: %s%s error received: id=%04x\n", + u8 bus = info->id >> 8; + u8 devfn = info->id & 0xff; + + pci_info(dev, "AER: %s%s error received: %04x:%02x:%02x.%d\n", info->multi_error_valid ? "Multiple " : "", - aer_error_severity_string[info->severity], info->id); + aer_error_severity_string[info->severity], + pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -216,28 +220,30 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer); void cper_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer) { - int layer, agent, status_strs_size, tlp_header_valid = 0; + int layer, agent, tlp_header_valid = 0; u32 status, mask; - const char **status_strs; + struct aer_err_info info; if (aer_severity == AER_CORRECTABLE) { status = aer->cor_status; mask = aer->cor_mask; - status_strs = aer_correctable_error_string; - status_strs_size = ARRAY_SIZE(aer_correctable_error_string); } else { status = aer->uncor_status; mask = aer->uncor_mask; - status_strs = aer_uncorrectable_error_string; - status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string); tlp_header_valid = status & AER_LOG_TLP_MASKS; } layer = AER_GET_LAYER_ERROR(aer_severity, status); agent = AER_GET_AGENT(aer_severity, status); + memset(&info, 0, sizeof(info)); + info.severity = aer_severity; + info.status = status; + info.mask = mask; + info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); + pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); - cper_print_bits("", status, status_strs, status_strs_size); + __aer_print_error(dev, &info); pci_err(dev, "aer_layer=%s, aer_agent=%s\n", aer_error_layer[layer], aer_agent_string[agent]); @@ -249,6 +255,6 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity, __print_tlp_header(dev, &aer->header_log); trace_aer_event(dev_name(&dev->dev), (status & ~mask), - aer_severity); + aer_severity, tlp_header_valid, &aer->header_log); } #endif |