From 10874f5a00266343a06e95da680e8a5a383d9a80 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 14 Apr 2014 16:11:40 -0600 Subject: PCI: Remove unnecessary __ref annotations Some PCI functions used to be marked __devinit. When CONFIG_HOTPLUG was not set, these functions were discarded after boot. A few callers of these __devinit functions were marked __ref to indicate that they could safely call the __devinit functions even though the callers were not __devinit. But CONFIG_HOTPLUG and __devinit are now gone, and the need for the __ref annotations is also gone, so remove them. Relevant historical commits: 54b956b90360 Remove __dev* markings from init.h a8e4b9c101ae PCI: add generic pci_hp_add_bridge() 0ab2b57f8db8 PCI: fix section mismatch warning in pci_scan_child_bus 451124a7cc6c PCI: fix 4x section mismatch warnings Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ef09f5f2fe6c..fe89a982a3da 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -719,7 +719,7 @@ add_dev: return child; } -struct pci_bus *__ref pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr) +struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr) { struct pci_bus *child; @@ -1369,7 +1369,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) WARN_ON(ret < 0); } -struct pci_dev *__ref pci_scan_single_device(struct pci_bus *bus, int devfn) +struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn) { struct pci_dev *dev; @@ -1958,7 +1958,7 @@ EXPORT_SYMBOL(pci_scan_bus); * * Returns the max number of subordinate bus discovered. */ -unsigned int __ref pci_rescan_bus_bridge_resize(struct pci_dev *bridge) +unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge) { unsigned int max; struct pci_bus *bus = bridge->subordinate; @@ -1981,7 +1981,7 @@ unsigned int __ref pci_rescan_bus_bridge_resize(struct pci_dev *bridge) * * Returns the max number of subordinate bus discovered. */ -unsigned int __ref pci_rescan_bus(struct pci_bus *bus) +unsigned int pci_rescan_bus(struct pci_bus *bus) { unsigned int max; -- cgit v1.2.3 From 1e358f94c00570f88a590cabe718daf835440cc9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 29 Apr 2014 12:51:55 -0600 Subject: PCI: Fix use of uninitialized MPS value If "pcie_bus_config == PCIE_BUS_PERFORMANCE", we don't initialize "smpss", so we pass a pointer to garbage into pcie_bus_configure_set(), where we compute "mps" based on the garbage. We then pass the garbage "mps" to pcie_write_mps(), which ignores it in the PCIE_BUS_PERFORMANCE case. Coverity isn't smart enough to deduce that we ignore the garbage (it's a lot to expect from a human, too), so initialize "smpss" to a safe value in all cases. Found by Coverity (CID 146454). Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index fe89a982a3da..490031fd2108 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1617,7 +1617,7 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data) */ void pcie_bus_configure_settings(struct pci_bus *bus) { - u8 smpss; + u8 smpss = 0; if (!bus->self) return; -- cgit v1.2.3 From 23b13bc76f359e99140baf083dc44314f4eb1b87 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 14 Apr 2014 15:25:54 -0600 Subject: PCI: Fail safely if we can't handle BARs larger than 4GB We can only handle BARs larger than 4GB if both dma_addr_t and resource_size_t are 64 bits wide. If dma_addr_t is 32 bits, we can't represent all the bus addresses, and if resource_size_t is 32 bits, we can't represent all the CPU addresses. Previously we cleared res->flags (at "fail:") for resources that were too large. That means we think the BAR doesn't exist at all, which in turn means that we could enable the device even though we can't keep track of where the BAR is and we can't make sure it doesn't overlap something else. This preserves the type flags (MEM/IO) so we can keep from enabling the device. Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ef09f5f2fe6c..c7f8b717c2e7 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -171,6 +171,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int pos) { u32 l, sz, mask; + u64 l64, sz64, mask64; u16 orig_cmd; struct pci_bus_region region, inverted_region; bool bar_too_big = false, bar_disabled = false; @@ -226,9 +227,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, } if (res->flags & IORESOURCE_MEM_64) { - u64 l64 = l; - u64 sz64 = sz; - u64 mask64 = mask | (u64)~0 << 32; + l64 = l; + sz64 = sz; + mask64 = mask | (u64)~0 << 32; pci_read_config_dword(dev, pos + 4, &l); pci_write_config_dword(dev, pos + 4, ~0); @@ -243,9 +244,13 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, if (!sz64) goto fail; - if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) { + if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) && + sz64 > 0x100000000ULL) { + res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; + res->start = 0; + res->end = 0; bar_too_big = true; - goto fail; + goto out; } if ((sizeof(resource_size_t) < 8) && l) { @@ -303,7 +308,8 @@ out: pci_write_config_word(dev, PCI_COMMAND, orig_cmd); if (bar_too_big) - dev_err(&dev->dev, "reg 0x%x: can't handle 64-bit BAR\n", pos); + dev_err(&dev->dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n", + pos, (unsigned long long) sz64); if (res->flags && !bar_disabled) dev_printk(KERN_DEBUG, &dev->dev, "reg 0x%x: %pR\n", pos, res); -- cgit v1.2.3 From d1a313e4b6ccbb61c746ee10ac198970516e9afc Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 29 Apr 2014 18:33:09 -0600 Subject: PCI: Reject BAR above 4GB if dma_addr_t is too small We can only handle BARs above 4GB if dma_addr_t (not resource_size_t) is 64 bits wide. If we have a 64-bit resource_size_t and a 32-bit dma_addr_t, we can't deal with BARs above 4GB. Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c7f8b717c2e7..afae3bf405fa 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -253,7 +253,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, goto out; } - if ((sizeof(resource_size_t) < 8) && l) { + if ((sizeof(dma_addr_t) < 8) && l) { /* Address above 32-bit boundary; disable the BAR */ pci_write_config_dword(dev, pos, 0); pci_write_config_dword(dev, pos + 4, 0); -- cgit v1.2.3 From 72dc5601fe5fec37cc1bd0efb19d99948fe7e54c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 29 Apr 2014 18:42:49 -0600 Subject: PCI: Don't convert BAR address to resource if dma_addr_t is too small If dma_addr_t is too small to represent the BAR value, pcibios_bus_to_resource() will fail, so just remember the BAR size directly in the resource. The resource is already marked UNSET, so we know the address isn't valid anyway. Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index afae3bf405fa..82cd75f6118a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -258,9 +258,10 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, pci_write_config_dword(dev, pos, 0); pci_write_config_dword(dev, pos + 4, 0); res->flags |= IORESOURCE_UNSET; - region.start = 0; - region.end = sz64; + res->start = 0; + res->end = sz64; bar_disabled = true; + goto out; } else { region.start = l64; region.end = l64 + sz64; -- cgit v1.2.3 From 31e9dd2565a6e27a3e698d7e3adf929db8d6c767 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 29 Apr 2014 18:37:47 -0600 Subject: PCI: Don't set BAR to zero if dma_addr_t is too small If a BAR is above 4GB and our dma_addr_t is too small, don't clear the BAR to zero: that doesn't disable the BAR, and it makes it more likely that the BAR will conflict with things if we turn on the memory enable bit (as we will at "out:" if the device was already enabled at the handoff). We should also print the BAR info and its original size so we can follow the process when we try to assign space to it. Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 82cd75f6118a..dd710b12d34c 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -174,7 +174,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, u64 l64, sz64, mask64; u16 orig_cmd; struct pci_bus_region region, inverted_region; - bool bar_too_big = false, bar_disabled = false; + bool bar_too_big = false, bar_too_high = false; mask = type ? PCI_ROM_ADDRESS_MASK : ~0; @@ -254,13 +254,11 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, } if ((sizeof(dma_addr_t) < 8) && l) { - /* Address above 32-bit boundary; disable the BAR */ - pci_write_config_dword(dev, pos, 0); - pci_write_config_dword(dev, pos + 4, 0); + /* Above 32-bit boundary; try to reallocate */ res->flags |= IORESOURCE_UNSET; res->start = 0; res->end = sz64; - bar_disabled = true; + bar_too_high = true; goto out; } else { region.start = l64; @@ -311,7 +309,10 @@ out: if (bar_too_big) dev_err(&dev->dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n", pos, (unsigned long long) sz64); - if (res->flags && !bar_disabled) + if (bar_too_high) + dev_info(&dev->dev, "reg 0x%x: can't handle BAR above 4G (bus address %#010llx)\n", + pos, (unsigned long long) l64); + if (res->flags) dev_printk(KERN_DEBUG, &dev->dev, "reg 0x%x: %pR\n", pos, res); return (res->flags & IORESOURCE_MEM_64) ? 1 : 0; -- cgit v1.2.3 From 26370fc6647b63eefb85a675382d661d0fed30a1 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 14 Apr 2014 15:26:50 -0600 Subject: PCI: Don't print anything while decoding is disabled If the console is a PCI device, and we try to print to it while its decoding is disabled, the system will hang. This particular printk hasn't caused a problem yet, but it could, so this fixes it. See also 0ff9514b579b ("PCI: Don't print anything while decoding is disabled"). Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index dd710b12d34c..3bc149b848a8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -174,7 +174,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, u64 l64, sz64, mask64; u16 orig_cmd; struct pci_bus_region region, inverted_region; - bool bar_too_big = false, bar_too_high = false; + bool bar_too_big = false, bar_too_high = false, bar_invalid = false; mask = type ? PCI_ROM_ADDRESS_MASK : ~0; @@ -289,11 +289,10 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, * be claimed by the device. */ if (inverted_region.start != region.start) { - dev_info(&dev->dev, "reg 0x%x: initial BAR value %pa invalid; forcing reassignment\n", - pos, ®ion.start); res->flags |= IORESOURCE_UNSET; - res->end -= res->start; res->start = 0; + res->end = region.end - region.start; + bar_invalid = true; } goto out; @@ -312,6 +311,9 @@ out: if (bar_too_high) dev_info(&dev->dev, "reg 0x%x: can't handle BAR above 4G (bus address %#010llx)\n", pos, (unsigned long long) l64); + if (bar_invalid) + dev_info(&dev->dev, "reg 0x%x: initial BAR value %#010llx invalid\n", + pos, (unsigned long long) region.start); if (res->flags) dev_printk(KERN_DEBUG, &dev->dev, "reg 0x%x: %pR\n", pos, res); -- cgit v1.2.3 From d739a099d0248c78d374b1b610cdb679c7bc052d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 14 Apr 2014 16:10:54 -0600 Subject: PCI: Don't add disabled subtractive decode bus resources For a subtractive decode bridge, we previously added and printed all resources of the primary bus, even if they were not valid. In the example below, the bridge 00:1c.3 has no windows enabled, so there are no valid resources on bus 02. But since 02:00.0 is subtractive decode bridge, we add and print all those invalid resources, which don't really make sense: pci 0000:00:1c.3: PCI bridge to [bus 02-03] pci 0000:02:00.0: PCI bridge to [bus 03] (subtractive decode) pci 0000:02:00.0: bridge window [??? 0x00000000 flags 0x0] (subtractive decode) Add and print the subtractively-decoded resources only if they are valid. There's an example in the dmesg log attached to the bugzilla below (but this patch doesn't fix the bug reported there). Link: https://bugzilla.kernel.org/show_bug.cgi?id=73141 Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 3bc149b848a8..966514010974 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -475,7 +475,7 @@ void pci_read_bridge_bases(struct pci_bus *child) if (dev->transparent) { pci_bus_for_each_resource(child->parent, res, i) { - if (res) { + if (res && res->flags) { pci_bus_add_resource(child, res, PCI_SUBTRACTIVE_DECODE); dev_printk(KERN_DEBUG, &dev->dev, -- cgit v1.2.3 From 6788a51fe3391817c8ded5f43dd4c57f3d212c17 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Sun, 4 May 2014 12:23:38 +0800 Subject: PCI: Use pci_is_bridge() to simplify code Use pci_is_bridge() to simplify code. No functional change. Requires: 326c1cdae741 PCI: Rename pci_is_bridge() to pci_has_subordinate() Requires: 1c86438c9423 PCI: Add new pci_is_bridge() interface Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas --- drivers/pci/pci-acpi.c | 8 +------- drivers/pci/probe.c | 3 +-- drivers/pci/setup-bus.c | 4 +--- 3 files changed, 3 insertions(+), 12 deletions(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index f49abef88485..ca4927ba8433 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -309,13 +309,7 @@ static struct acpi_device *acpi_pci_find_companion(struct device *dev) bool check_children; u64 addr; - /* - * pci_is_bridge() is not suitable here, because pci_dev->subordinate - * is set only after acpi_pci_find_device() has been called for the - * given device. - */ - check_children = pci_dev->hdr_type == PCI_HEADER_TYPE_BRIDGE - || pci_dev->hdr_type == PCI_HEADER_TYPE_CARDBUS; + check_children = pci_is_bridge(pci_dev); /* Please ref to ACPI spec for the syntax of _ADR */ addr = (PCI_SLOT(pci_dev->devfn) << 16) | PCI_FUNC(pci_dev->devfn); return acpi_find_child_device(ACPI_COMPANION(dev->parent), addr, diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ef09f5f2fe6c..f831dd80fa2d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1670,8 +1670,7 @@ unsigned int pci_scan_child_bus(struct pci_bus *bus) for (pass=0; pass < 2; pass++) list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + if (pci_is_bridge(dev)) max = pci_scan_bridge(bus, dev, max, pass); } diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 138bdd6393be..e399d00ed5fb 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1629,9 +1629,7 @@ void pci_assign_unassigned_bus_resources(struct pci_bus *bus) down_read(&pci_bus_sem); list_for_each_entry(dev, &bus->devices, bus_list) - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) - if (dev->subordinate) + if (pci_is_bridge(dev) && pci_has_subordinate(dev)) __pci_bus_size_bridges(dev->subordinate, &add_list); up_read(&pci_bus_sem); -- cgit v1.2.3 From 78916b00f0096059c872f537306b1a464c84fb30 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 5 May 2014 14:20:51 -0600 Subject: PCI: Test for std config alias when testing extended config space When a PCI-to-PCIe bridge is stacked on a PCIe-to-PCI bridge, we can have PCIe endpoints masked by a conventional PCI bus. This makes the extended config space of the PCIe endpoint inaccessible. The PCIe-to-PCI bridge is supposed to handle any type 1 configuration transactions where the extended config offset bits are non-zero as an Unsupported Request rather than forward it to the secondary interface. As noted here, there are a couple known offenders to this rule. These bridges drop the extended offset bits, resulting in the conventional config space being aliased many times across the extended config space. For Intel NICs, this alias often seems to expose a bogus SR-IOV cap. Stacking bridges may seem like an uncommon scenario, but note that any conventional PCI slot in a modern PC is already the secondary interface of an onboard PCIe-to-PCI bridge. The user need only add a PCI-to-PCIe adapter and PCIe device to encounter this problem. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 490031fd2108..b47c2dd5b9e1 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -983,6 +983,43 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev) } +/** + * pci_ext_cfg_is_aliased - is ext config space just an alias of std config? + * @dev: PCI device + * + * PCI Express to PCI/PCI-X Bridge Specification, rev 1.0, 4.1.4 says that + * when forwarding a type1 configuration request the bridge must check that + * the extended register address field is zero. The bridge is not permitted + * to forward the transactions and must handle it as an Unsupported Request. + * Some bridges do not follow this rule and simply drop the extended register + * bits, resulting in the standard config space being aliased, every 256 + * bytes across the entire configuration space. Test for this condition by + * comparing the first dword of each potential alias to the vendor/device ID. + * Known offenders: + * ASM1083/1085 PCIe-to-PCI Reversible Bridge (1b21:1080, rev 01 & 03) + * AMD/ATI SBx00 PCI to PCI Bridge (1002:4384, rev 40) + */ +static bool pci_ext_cfg_is_aliased(struct pci_dev *dev) +{ +#ifdef CONFIG_PCI_QUIRKS + int pos; + u32 header, tmp; + + pci_read_config_dword(dev, PCI_VENDOR_ID, &header); + + for (pos = PCI_CFG_SPACE_SIZE; + pos < PCI_CFG_SPACE_EXP_SIZE; pos += PCI_CFG_SPACE_SIZE) { + if (pci_read_config_dword(dev, pos, &tmp) != PCIBIOS_SUCCESSFUL + || header != tmp) + return false; + } + + return true; +#else + return false; +#endif +} + /** * pci_cfg_space_size - get the configuration space size of the PCI device. * @dev: PCI device @@ -1001,7 +1038,7 @@ static int pci_cfg_space_size_ext(struct pci_dev *dev) if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL) goto fail; - if (status == 0xffffffff) + if (status == 0xffffffff || pci_ext_cfg_is_aliased(dev)) goto fail; return PCI_CFG_SPACE_EXP_SIZE; -- cgit v1.2.3 From 782a985d7af26db39e86070d28f987cad21313c0 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 20 May 2014 08:53:21 -0600 Subject: PCI: Introduce new device binding path using pci_dev.driver_override The driver_override field allows us to specify the driver for a device rather than relying on the driver to provide a positive match of the device. This shortcuts the existing process of looking up the vendor and device ID, adding them to the driver new_id, binding the device, then removing the ID, but it also provides a couple advantages. First, the above existing process allows the driver to bind to any device matching the new_id for the window where it's enabled. This is often not desired, such as the case of trying to bind a single device to a meta driver like pci-stub or vfio-pci. Using driver_override we can do this deterministically using: echo pci-stub > /sys/bus/pci/devices/0000:03:00.0/driver_override echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind echo 0000:03:00.0 > /sys/bus/pci/drivers_probe Previously we could not invoke drivers_probe after adding a device to new_id for a driver as we get non-deterministic behavior whether the driver we intend or the standard driver will claim the device. Now it becomes a deterministic process, only the driver matching driver_override will probe the device. To return the device to the standard driver, we simply clear the driver_override and reprobe the device: echo > /sys/bus/pci/devices/0000:03:00.0/driver_override echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind echo 0000:03:00.0 > /sys/bus/pci/drivers_probe Another advantage to this approach is that we can specify a driver override to force a specific binding or prevent any binding. For instance when an IOMMU group is exposed to userspace through VFIO we require that all devices within that group are owned by VFIO. However, devices can be hot-added into an IOMMU group, in which case we want to prevent the device from binding to any driver (override driver = "none") or perhaps have it automatically bind to vfio-pci. With driver_override it's a simple matter for this field to be set internally when the device is first discovered to prevent driver matches. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Alexander Graf Acked-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-bus-pci | 21 +++++++++++++++++ drivers/pci/pci-driver.c | 25 ++++++++++++++++++--- drivers/pci/pci-sysfs.c | 40 +++++++++++++++++++++++++++++++++ drivers/pci/probe.c | 1 + include/linux/pci.h | 1 + 5 files changed, 85 insertions(+), 3 deletions(-) (limited to 'drivers/pci/probe.c') diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index a3c5a6685036..898ddc4440e6 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -250,3 +250,24 @@ Description: valid. For example, writing a 2 to this file when sriov_numvfs is not 0 and not 2 already will return an error. Writing a 10 when the value of sriov_totalvfs is 8 will return an error. + +What: /sys/bus/pci/devices/.../driver_override +Date: April 2014 +Contact: Alex Williamson +Description: + This file allows the driver for a device to be specified which + will override standard static and dynamic ID matching. When + specified, only a driver with a name matching the value written + to driver_override will have an opportunity to bind to the + device. The override is specified by writing a string to the + driver_override file (echo pci-stub > driver_override) and + may be cleared with an empty string (echo > driver_override). + This returns the device to standard matching rules binding. + Writing to driver_override does not automatically unbind the + device from its current driver or make any attempt to + automatically load the specified driver. If no driver with a + matching name is currently loaded in the kernel, the device + will not bind to any driver. This also allows devices to + opt-out of driver binding using a driver_override name such as + "none". Only a single driver may be specified in the override, + there is no support for parsing delimiters. diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d911e0c1f359..4393c12e9135 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -216,6 +216,13 @@ const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, return NULL; } +static const struct pci_device_id pci_device_id_any = { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, +}; + /** * pci_match_device - Tell if a PCI device structure has a matching PCI device id structure * @drv: the PCI driver to match against @@ -229,18 +236,30 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_dev *dev) { struct pci_dynid *dynid; + const struct pci_device_id *found_id = NULL; + + /* When driver_override is set, only bind to the matching driver */ + if (dev->driver_override && strcmp(dev->driver_override, drv->name)) + return NULL; /* Look at the dynamic ids first, before the static ones */ spin_lock(&drv->dynids.lock); list_for_each_entry(dynid, &drv->dynids.list, node) { if (pci_match_one_device(&dynid->id, dev)) { - spin_unlock(&drv->dynids.lock); - return &dynid->id; + found_id = &dynid->id; + break; } } spin_unlock(&drv->dynids.lock); - return pci_match_id(drv->id_table, dev); + if (!found_id) + found_id = pci_match_id(drv->id_table, dev); + + /* driver_override will always match, send a dummy id */ + if (!found_id && dev->driver_override) + found_id = &pci_device_id_any; + + return found_id; } struct drv_dev_and_id { diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 4e0acefb7565..faa4ab554d68 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -499,6 +499,45 @@ static struct device_attribute sriov_numvfs_attr = sriov_numvfs_show, sriov_numvfs_store); #endif /* CONFIG_PCI_IOV */ +static ssize_t driver_override_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + char *driver_override, *old = pdev->driver_override, *cp; + + if (count > PATH_MAX) + return -EINVAL; + + driver_override = kstrndup(buf, count, GFP_KERNEL); + if (!driver_override) + return -ENOMEM; + + cp = strchr(driver_override, '\n'); + if (cp) + *cp = '\0'; + + if (strlen(driver_override)) { + pdev->driver_override = driver_override; + } else { + kfree(driver_override); + pdev->driver_override = NULL; + } + + kfree(old); + + return count; +} + +static ssize_t driver_override_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return sprintf(buf, "%s\n", pdev->driver_override); +} +static DEVICE_ATTR_RW(driver_override); + static struct attribute *pci_dev_attrs[] = { &dev_attr_resource.attr, &dev_attr_vendor.attr, @@ -521,6 +560,7 @@ static struct attribute *pci_dev_attrs[] = { #if defined(CONFIG_PM_RUNTIME) && defined(CONFIG_ACPI) &dev_attr_d3cold_allowed.attr, #endif + &dev_attr_driver_override.attr, NULL, }; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ef09f5f2fe6c..54268de45f59 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1215,6 +1215,7 @@ static void pci_release_dev(struct device *dev) pci_release_of_node(pci_dev); pcibios_release_device(pci_dev); pci_bus_put(pci_dev->bus); + kfree(pci_dev->driver_override); kfree(pci_dev); } diff --git a/include/linux/pci.h b/include/linux/pci.h index aab57b4abe7f..b72af276f591 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -365,6 +365,7 @@ struct pci_dev { #endif phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */ size_t romlen; /* Length of ROM if it's not from the BAR */ + char *driver_override; /* Driver name to force a match */ }; static inline struct pci_dev *pci_physfn(struct pci_dev *dev) -- cgit v1.2.3