From 33959f88fce9b8d3346d8000b3425814cbc6d6c0 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 18 Jul 2013 11:31:51 +1000 Subject: powerpc: Add second POWER8 PVR entry POWER8 comes with two different PVRs. This patch enables the additional PVR in the cputable. The existing entry (PVR=0x4b) is renamed to POWER8E and the new entry (PVR=0x4d) is given POWER8. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cputable.c | 20 +++++++++++++++++++- arch/powerpc/kernel/prom_init.c | 5 +++-- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 2a45d0f04385..22973a74df73 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -494,9 +494,27 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_power7, .platform = "power7+", }, - { /* Power8 */ + { /* Power8E */ .pvr_mask = 0xffff0000, .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .platform = "power8", + }, + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, .cpu_name = "POWER8 (raw)", .cpu_features = CPU_FTRS_POWER8, .cpu_user_features = COMMON_USER_POWER8, diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 5eccda9fd33f..607902424e73 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -644,7 +644,8 @@ unsigned char ibm_architecture_vec[] = { W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ W(0xffff0000), W(0x003e0000), /* POWER6 */ W(0xffff0000), W(0x003f0000), /* POWER7 */ - W(0xffff0000), W(0x004b0000), /* POWER8 */ + W(0xffff0000), W(0x004b0000), /* POWER8E */ + W(0xffff0000), W(0x004d0000), /* POWER8 */ W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ @@ -706,7 +707,7 @@ unsigned char ibm_architecture_vec[] = { * must match by the macro below. Update the definition if * the structure layout changes. */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 117 +#define IBM_ARCH_VEC_NRCORES_OFFSET 125 W(NR_CPUS), /* number of cores supported */ 0, 0, -- cgit v1.2.1 From 0e0ed6406e61434d3f38fb58aa8464ec4722b77e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 15 Jul 2013 14:04:50 +1000 Subject: powerpc/modules: Module CRC relocation fix causes perf issues Module CRCs are implemented as absolute symbols that get resolved by a linker script. We build an intermediate .o that contains an unresolved symbol for each CRC. genksysms parses this .o, calculates the CRCs and writes a linker script that "resolves" the symbols to the calculated CRC. Unfortunately the ppc64 relocatable kernel sees these CRCs as symbols that need relocating and relocates them at boot. Commit d4703aef (module: handle ppc64 relocating kcrctabs when CONFIG_RELOCATABLE=y) added a hook to reverse the bogus relocations. Part of this patch created a symbol at 0x0: # head -2 /proc/kallsyms 0000000000000000 T reloc_start c000000000000000 T .__start This reloc_start symbol is causing lots of confusion to perf. It thinks reloc_start is a massive function that stretches from 0x0 to 0xc000000000000000 and we get various cryptic errors out of perf, including: problem incrementing symbol count, skipping event This patch removes the reloc_start linker script label and instead defines it as PHYSICAL_START. We also need to wrap it with CONFIG_PPC64 because the ppc32 kernel can set a non zero PHYSICAL_START at compile time and we wouldn't want to subtract it from the CRCs in that case. Signed-off-by: Anton Blanchard Cc: Acked-by: Rusty Russell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vmlinux.lds.S | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 654e479802f2..f096e72262f4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - . = 0; - reloc_start = .; - . = KERNELBASE; /* -- cgit v1.2.1 From 0ba178888b05a4efdaca7da528c170bd09f9687b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:51 +0800 Subject: powerpc/eeh: Remove reference to PCI device We will rely on pcibios_release_device() to remove the EEH cache and unbind EEH device for the specific PCI device. So we shouldn't hold the reference to the PCI device from EEH cache and EEH device. Otherwise, pcibios_release_device() won't be called as we expected. The patch removes the reference to the PCI device in EEH core. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 4 ---- arch/powerpc/kernel/eeh_cache.c | 18 +++++------------- 2 files changed, 5 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 39954fe941b8..b5c425ea2974 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -499,8 +499,6 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon } eeh_dev_check_failure(edev); - - pci_dev_put(eeh_dev_to_pci_dev(edev)); return val; } @@ -904,7 +902,6 @@ static void eeh_add_device_late(struct pci_dev *dev) } WARN_ON(edev->pdev); - pci_dev_get(dev); edev->pdev = dev; dev->dev.archdata.edev = edev; @@ -992,7 +989,6 @@ static void eeh_remove_device(struct pci_dev *dev, int purge_pe) } edev->pdev = NULL; dev->dev.archdata.edev = NULL; - pci_dev_put(dev); eeh_rmv_from_parent_pe(edev, purge_pe); eeh_addr_cache_rmv_dev(dev); diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index f9ac1232a746..e8c9fd546a5c 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -68,16 +68,12 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr) struct pci_io_addr_range *piar; piar = rb_entry(n, struct pci_io_addr_range, rb_node); - if (addr < piar->addr_lo) { + if (addr < piar->addr_lo) n = n->rb_left; - } else { - if (addr > piar->addr_hi) { - n = n->rb_right; - } else { - pci_dev_get(piar->pcidev); - return piar->edev; - } - } + else if (addr > piar->addr_hi) + n = n->rb_right; + else + return piar->edev; } return NULL; @@ -156,7 +152,6 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, if (!piar) return NULL; - pci_dev_get(dev); piar->addr_lo = alo; piar->addr_hi = ahi; piar->edev = pci_dev_to_eeh_dev(dev); @@ -250,7 +245,6 @@ restart: if (piar->pcidev == dev) { rb_erase(n, &pci_io_addr_cache_root.rb_root); - pci_dev_put(piar->pcidev); kfree(piar); goto restart; } @@ -302,12 +296,10 @@ void eeh_addr_cache_build(void) if (!edev) continue; - pci_dev_get(dev); /* matching put is in eeh_remove_device() */ dev->dev.archdata.edev = edev; edev->pdev = dev; eeh_addr_cache_insert_dev(dev); - eeh_sysfs_add_device(dev); } -- cgit v1.2.1 From f2856491d24044de08da9e53cf7068841a8b4e1c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:52 +0800 Subject: powerpc/eeh: Export functions for hotplug Make some functions public in order to support hotplug on either specific PCI bus or PCI device in future. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index b5c425ea2974..582ad1ef46a8 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -836,7 +836,7 @@ core_initcall_sync(eeh_init); * on the CEC architecture, type of the device, on earlier boot * command-line arguments & etc. */ -static void eeh_add_device_early(struct device_node *dn) +void eeh_add_device_early(struct device_node *dn) { struct pci_controller *phb; @@ -884,7 +884,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); * This routine must be used to complete EEH initialization for PCI * devices that were added after system boot (e.g. hotplug, dlpar). */ -static void eeh_add_device_late(struct pci_dev *dev) +void eeh_add_device_late(struct pci_dev *dev) { struct device_node *dn; struct eeh_dev *edev; @@ -972,7 +972,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); * this device will no longer be detected after this call; thus, * i/o errors affecting this slot may leave this device unusable. */ -static void eeh_remove_device(struct pci_dev *dev, int purge_pe) +void eeh_remove_device(struct pci_dev *dev, int purge_pe) { struct eeh_dev *edev; -- cgit v1.2.1 From 008a4938ea07db071f03adffaa3a78c2fbbcde6b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:53 +0800 Subject: powerpc/pci: Override pcibios_release_device() The patch overrides pcibios_release_device() to release EEH resources (EEH cache, unbinding EEH device) for the indicated PCI device. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-hotplug.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 3f608800c06b..3dab2f2801b9 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -21,6 +21,17 @@ #include #include +/** + * pcibios_release_device - release PCI device + * @dev: PCI device + * + * The function is called before releasing the indicated PCI device. + */ +void pcibios_release_device(struct pci_dev *dev) +{ + eeh_remove_device(dev, 1); +} + /** * __pcibios_remove_pci_devices - remove all devices under this bus * @bus: the indicated PCI bus -- cgit v1.2.1 From 807a827d4e7455a40e8f56ec2a67c57a91cab9f7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:55 +0800 Subject: powerpc/eeh: Keep PE during hotplug When we do normal hotplug, the PE (shadow EEH structure) shouldn't be kept around. However, we need to keep it if the hotplug an artifial one caused by EEH errors recovery. Since we remove EEH device through the PCI hook pcibios_release_device(), the flag "purge_pe" passed to various functions is meaningless. So the patch removes the meaningless flag and introduce new flag "EEH_PE_KEEP" to save the PE while doing hotplug during EEH error recovery. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 28 ++-------------------------- arch/powerpc/kernel/eeh_driver.c | 7 +++++-- arch/powerpc/kernel/eeh_pe.c | 7 +++---- arch/powerpc/kernel/pci-hotplug.c | 26 +++++--------------------- 4 files changed, 15 insertions(+), 53 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 582ad1ef46a8..ce81477316be 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -964,7 +964,6 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); /** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed - * @purge_pe: remove the PE or not * * This routine should be called when a device is removed from * a running system (e.g. by hotplug or dlpar). It unregisters @@ -972,7 +971,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); * this device will no longer be detected after this call; thus, * i/o errors affecting this slot may leave this device unusable. */ -void eeh_remove_device(struct pci_dev *dev, int purge_pe) +void eeh_remove_device(struct pci_dev *dev) { struct eeh_dev *edev; @@ -990,34 +989,11 @@ void eeh_remove_device(struct pci_dev *dev, int purge_pe) edev->pdev = NULL; dev->dev.archdata.edev = NULL; - eeh_rmv_from_parent_pe(edev, purge_pe); + eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); } -/** - * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device - * @dev: PCI device - * @purge_pe: remove the corresponding PE or not - * - * This routine must be called when a device is removed from the - * running system through hotplug or dlpar. The corresponding - * PCI address cache will be removed. - */ -void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) -{ - struct pci_bus *bus = dev->subordinate; - struct pci_dev *child, *tmp; - - eeh_remove_device(dev, purge_pe); - - if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) - eeh_remove_bus_device(child, purge_pe); - } -} -EXPORT_SYMBOL_GPL(eeh_remove_bus_device); - static int proc_eeh_show(struct seq_file *m, void *v) { if (0 == eeh_subsystem_enabled) { diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2b1ce17cae50..9ef3bbb8580a 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -362,8 +362,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * devices are expected to be attached soon when calling * into pcibios_add_pci_devices(). */ - if (bus) - __pcibios_remove_pci_devices(bus, 0); + if (bus) { + eeh_pe_state_mark(pe, EEH_PE_KEEP); + pcibios_remove_pci_devices(bus); + } /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system @@ -386,6 +388,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) if (bus) { ssleep(5); pcibios_add_pci_devices(bus); + eeh_pe_state_clear(pe, EEH_PE_KEEP); } pe->tstamp = tstamp; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 016588a6f5ed..32ef40940bad 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) while (parent) { if (!(parent->type & EEH_PE_INVALID)) break; - parent->type &= ~EEH_PE_INVALID; + parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP); parent = parent->parent; } pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", @@ -397,14 +397,13 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /** * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE * @edev: EEH device - * @purge_pe: remove PE or not * * The PE hierarchy tree might be changed when doing PCI hotplug. * Also, the PCI devices or buses could be removed from the system * during EEH recovery. So we have to call the function remove the * corresponding PE accordingly if necessary. */ -int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) +int eeh_rmv_from_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent, *child; int cnt; @@ -431,7 +430,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) if (pe->type & EEH_PE_PHB) break; - if (purge_pe) { + if (!(pe->state & EEH_PE_KEEP)) { if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) { list_del(&pe->child); diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 3dab2f2801b9..fc0831d4971f 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -29,49 +29,33 @@ */ void pcibios_release_device(struct pci_dev *dev) { - eeh_remove_device(dev, 1); + eeh_remove_device(dev); } /** - * __pcibios_remove_pci_devices - remove all devices under this bus + * pcibios_remove_pci_devices - remove all devices under this bus * @bus: the indicated PCI bus - * @purge_pe: destroy the PE on removal of PCI devices * * Remove all of the PCI devices under this bus both from the * linux pci device tree, and from the powerpc EEH address cache. - * By default, the corresponding PE will be destroied during the - * normal PCI hotplug path. For PCI hotplug during EEH recovery, - * the corresponding PE won't be destroied and deallocated. */ -void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) +void pcibios_remove_pci_devices(struct pci_bus *bus) { struct pci_dev *dev, *tmp; struct pci_bus *child_bus; /* First go down child busses */ list_for_each_entry(child_bus, &bus->children, node) - __pcibios_remove_pci_devices(child_bus, purge_pe); + pcibios_remove_pci_devices(child_bus); pr_debug("PCI: Removing devices on bus %04x:%02x\n", pci_domain_nr(bus), bus->number); list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { - pr_debug(" * Removing %s...\n", pci_name(dev)); - eeh_remove_bus_device(dev, purge_pe); + pr_debug(" Removing %s...\n", pci_name(dev)); pci_stop_and_remove_bus_device(dev); } } -/** - * pcibios_remove_pci_devices - remove all devices under this bus - * @bus: the indicated PCI bus - * - * Remove all of the PCI devices under this bus both from the - * linux pci device tree, and from the powerpc EEH address cache. - */ -void pcibios_remove_pci_devices(struct pci_bus *bus) -{ - __pcibios_remove_pci_devices(bus, 1); -} EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); /** -- cgit v1.2.1 From 9feed42e93d2625db86423cedf8b4b2bed00779e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:56 +0800 Subject: powerpc/eeh: Use safe list traversal when walking EEH devices Currently, we're trasversing the EEH devices list using list_for_each_entry(). That's not safe enough because the EEH devices might be removed from its parent PE while doing iteration. The patch replaces that with list_for_each_entry_safe(). Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 4 ++-- arch/powerpc/kernel/eeh_pe.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ce81477316be..56bd4584f61f 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -231,7 +231,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - struct eeh_dev *edev; + struct eeh_dev *edev, *tmp; bool valid_cfg_log = true; /* @@ -251,7 +251,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) eeh_pe_restore_bars(pe); pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev) { + eeh_pe_for_each_dev(pe, edev, tmp) { loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, EEH_PCI_REGS_LOG_LEN - loglen); } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 32ef40940bad..c8b815e45c8f 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -176,7 +176,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag) { struct eeh_pe *pe; - struct eeh_dev *edev; + struct eeh_dev *edev, *tmp; void *ret; if (!root) { @@ -186,7 +186,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, /* Traverse root PE */ for (pe = root; pe; pe = eeh_pe_next(pe, root)) { - eeh_pe_for_each_dev(pe, edev) { + eeh_pe_for_each_dev(pe, edev, tmp) { ret = fn(edev, flag); if (ret) return ret; @@ -501,7 +501,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); - struct eeh_dev *tmp; + struct eeh_dev *edev, *tmp; struct pci_dev *pdev; /* @@ -511,8 +511,8 @@ static void *__eeh_pe_state_mark(void *data, void *flag) * the PCI device driver. */ pe->state |= state; - eeh_pe_for_each_dev(pe, tmp) { - pdev = eeh_dev_to_pci_dev(tmp); + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); if (pdev) pdev->error_state = pci_channel_io_frozen; } -- cgit v1.2.1 From ab444ec97e8bd65fff9d489b9a409fc03979268b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:57 +0800 Subject: powerpc/pci: Partial tree hotplug support When EEH error happens to one specific PE, the device drivers of its attached EEH devices (PCI devices) are checked to see the further action: reset with complete hotplug, or reset without hotplug. However, that's not enough for those PCI devices whose drivers can't support EEH, or those PCI devices without driver. So we need do so-called "partial hotplug" on basis of PCI devices. In the situation, part of PCI devices of the specific PE are unplugged and plugged again after PE reset. The patch changes pcibios_add_pci_devices() so that it can support full hotplug and so-called "partial" hotplug based on device-tree or real hardware. It's notable that pci_of_scan.c has been changed for a bit in order to support the "partial" hotplug based on dev-tree. Most of the generic code already supports that, we just need to plumb it properly on our side. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 2 ++ arch/powerpc/kernel/pci-hotplug.c | 14 ++++++---- arch/powerpc/kernel/pci_of_scan.c | 56 ++++++++++++++++++++++++++++----------- 3 files changed, 51 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index f46914a0f33e..7d22a675fe1a 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1462,6 +1462,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) /* Allocate bus and devices resources */ pcibios_allocate_bus_resources(bus); pcibios_claim_one_bus(bus); + if (!pci_has_flag(PCI_PROBE_ONLY)) + pci_assign_unassigned_bus_resources(bus); /* Fixup EEH */ eeh_add_device_tree_late(bus); diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index fc0831d4971f..c1e17ae68a08 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -71,7 +71,7 @@ EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); */ void pcibios_add_pci_devices(struct pci_bus * bus) { - int slotno, num, mode, pass, max; + int slotno, mode, pass, max; struct pci_dev *dev; struct device_node *dn = pci_bus_to_OF_node(bus); @@ -85,11 +85,15 @@ void pcibios_add_pci_devices(struct pci_bus * bus) /* use ofdt-based probe */ of_rescan_bus(dn, bus); } else if (mode == PCI_PROBE_NORMAL) { - /* use legacy probe */ + /* + * Use legacy probe. In the partial hotplug case, we + * probably have grandchildren devices unplugged. So + * we don't check the return value from pci_scan_slot() in + * order for fully rescan all the way down to pick them up. + * They can have been removed during partial hotplug. + */ slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); - num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); - if (!num) - return; + pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); pcibios_setup_bus_devices(bus); max = bus->busn_res.start; for (pass = 0; pass < 2; pass++) { diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 6b0ba5854d99..15d9105323bf 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -230,11 +230,14 @@ void of_scan_pci_bridge(struct pci_dev *dev) return; } - bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + bus = pci_find_bus(pci_domain_nr(dev->bus), busrange[0]); if (!bus) { - printk(KERN_ERR "Failed to create pci bus for %s\n", - node->full_name); - return; + bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + if (!bus) { + printk(KERN_ERR "Failed to create pci bus for %s\n", + node->full_name); + return; + } } bus->primary = dev->bus->number; @@ -292,6 +295,38 @@ void of_scan_pci_bridge(struct pci_dev *dev) } EXPORT_SYMBOL(of_scan_pci_bridge); +static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, + struct device_node *dn) +{ + struct pci_dev *dev = NULL; + const u32 *reg; + int reglen, devfn; + + pr_debug(" * %s\n", dn->full_name); + if (!of_device_is_available(dn)) + return NULL; + + reg = of_get_property(dn, "reg", ®len); + if (reg == NULL || reglen < 20) + return NULL; + devfn = (reg[0] >> 8) & 0xff; + + /* Check if the PCI device is already there */ + dev = pci_get_slot(bus, devfn); + if (dev) { + pci_dev_put(dev); + return dev; + } + + /* create a new pci_dev for this device */ + dev = of_create_pci_dev(dn, bus, devfn); + if (!dev) + return NULL; + + pr_debug(" dev header type: %x\n", dev->hdr_type); + return dev; +} + /** * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices * @node: device tree node for the PCI bus @@ -302,8 +337,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, int rescan_existing) { struct device_node *child; - const u32 *reg; - int reglen, devfn; struct pci_dev *dev; pr_debug("of_scan_bus(%s) bus no %d...\n", @@ -311,16 +344,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, /* Scan direct children */ for_each_child_of_node(node, child) { - pr_debug(" * %s\n", child->full_name); - if (!of_device_is_available(child)) - continue; - reg = of_get_property(child, "reg", ®len); - if (reg == NULL || reglen < 20) - continue; - devfn = (reg[0] >> 8) & 0xff; - - /* create a new pci_dev for this device */ - dev = of_create_pci_dev(child, bus, devfn); + dev = of_scan_pci_dev(bus, child); if (!dev) continue; pr_debug(" dev header type: %x\n", dev->hdr_type); -- cgit v1.2.1 From f5c57710dd62dd06f176934a8b4b8accbf00f9f8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:58 +0800 Subject: powerpc/eeh: Use partial hotplug for EEH unaware drivers When EEH error happens to one specific PE, some devices with drivers supporting EEH won't except hotplug on the device. However, there might have other deivces without driver, or with driver without EEH support. For the case, we need do partial hotplug in order to make sure that the PE becomes absolutely quite during reset. Otherise, the PE reset might fail and leads to failure of error recovery. The current code doesn't handle that 'mixed' case properly, it either uses the error callbacks to the drivers, or tries hotplug, but doesn't handle a PE (EEH domain) composed of a combination of the two. The patch intends to support so-called "partial" hotplug for EEH: Before we do reset, we stop and remove those PCI devices without EEH sensitive driver. The corresponding EEH devices are not detached from its PE, but with special flag. After the reset is done, those EEH devices with the special flag will be scanned one by one. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 30 ++++++++++++++-- arch/powerpc/kernel/eeh_driver.c | 74 +++++++++++++++++++++++++++++++++++++--- arch/powerpc/kernel/eeh_pe.c | 20 ++++------- arch/powerpc/kernel/eeh_sysfs.c | 7 ++++ 4 files changed, 110 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 56bd4584f61f..a5783f1a7a96 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -900,7 +900,21 @@ void eeh_add_device_late(struct pci_dev *dev) pr_debug("EEH: Already referenced !\n"); return; } - WARN_ON(edev->pdev); + + /* + * The EEH cache might not be removed correctly because of + * unbalanced kref to the device during unplug time, which + * relies on pcibios_release_device(). So we have to remove + * that here explicitly. + */ + if (edev->pdev) { + eeh_rmv_from_parent_pe(edev); + eeh_addr_cache_rmv_dev(edev->pdev); + eeh_sysfs_remove_device(edev->pdev); + + edev->pdev = NULL; + dev->dev.archdata.edev = NULL; + } edev->pdev = dev; dev->dev.archdata.edev = edev; @@ -982,14 +996,24 @@ void eeh_remove_device(struct pci_dev *dev) /* Unregister the device with the EEH/PCI address search system */ pr_debug("EEH: Removing device %s\n", pci_name(dev)); - if (!edev || !edev->pdev) { + if (!edev || !edev->pdev || !edev->pe) { pr_debug("EEH: Not referenced !\n"); return; } + + /* + * During the hotplug for EEH error recovery, we need the EEH + * device attached to the parent PE in order for BAR restore + * a bit later. So we keep it for BAR restore and remove it + * from the parent PE during the BAR resotre. + */ edev->pdev = NULL; dev->dev.archdata.edev = NULL; + if (!(edev->pe->state & EEH_PE_KEEP)) + eeh_rmv_from_parent_pe(edev); + else + edev->mode |= EEH_DEV_DISCONNECTED; - eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 9ef3bbb8580a..9fda75d1f5aa 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -338,6 +338,54 @@ static void *eeh_report_failure(void *data, void *userdata) return NULL; } +static void *eeh_rmv_device(void *data, void *userdata) +{ + struct pci_driver *driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + int *removed = (int *)userdata; + + /* + * Actually, we should remove the PCI bridges as well. + * However, that's lots of complexity to do that, + * particularly some of devices under the bridge might + * support EEH. So we just care about PCI devices for + * simplicity here. + */ + if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) + return NULL; + driver = eeh_pcid_get(dev); + if (driver && driver->err_handler) + return NULL; + + /* Remove it from PCI subsystem */ + pr_debug("EEH: Removing %s without EEH sensitive driver\n", + pci_name(dev)); + edev->bus = dev->bus; + edev->mode |= EEH_DEV_DISCONNECTED; + (*removed)++; + + pci_stop_and_remove_bus_device(dev); + + return NULL; +} + +static void *eeh_pe_detach_dev(void *data, void *userdata) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + struct eeh_dev *edev, *tmp; + + eeh_pe_for_each_dev(pe, edev, tmp) { + if (!(edev->mode & EEH_DEV_DISCONNECTED)) + continue; + + edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); + eeh_rmv_from_parent_pe(edev); + } + + return NULL; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -349,8 +397,9 @@ static void *eeh_report_failure(void *data, void *userdata) */ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) { + struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); struct timeval tstamp; - int cnt, rc; + int cnt, rc, removed = 0; /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -362,10 +411,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * devices are expected to be attached soon when calling * into pcibios_add_pci_devices(). */ - if (bus) { - eeh_pe_state_mark(pe, EEH_PE_KEEP); + eeh_pe_state_mark(pe, EEH_PE_KEEP); + if (bus) pcibios_remove_pci_devices(bus); - } + else if (frozen_bus) + eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system @@ -386,10 +436,24 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * potentially weird things happen. */ if (bus) { + pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); ssleep(5); + + /* + * The EEH device is still connected with its parent + * PE. We should disconnect it so the binding can be + * rebuilt when adding PCI devices. + */ + eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); pcibios_add_pci_devices(bus); - eeh_pe_state_clear(pe, EEH_PE_KEEP); + } else if (frozen_bus && removed) { + pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); + ssleep(5); + + eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); + pcibios_add_pci_devices(frozen_bus); } + eeh_pe_state_clear(pe, EEH_PE_KEEP); pe->tstamp = tstamp; pe->freeze_count = cnt; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index c8b815e45c8f..2aa955ae01a1 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -149,8 +149,8 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, * callback returns something other than NULL, or no more PEs * to be traversed. */ -static void *eeh_pe_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag) +void *eeh_pe_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag) { struct eeh_pe *pe; void *ret; @@ -409,8 +409,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) int cnt; if (!edev->pe) { - pr_warning("%s: No PE found for EEH device %s\n", - __func__, edev->dn->full_name); + pr_debug("%s: No PE found for EEH device %s\n", + __func__, edev->dn->full_name); return -EEXIST; } @@ -728,18 +728,12 @@ static void eeh_restore_device_bars(struct eeh_dev *edev, */ static void *eeh_restore_one_device_bars(void *data, void *flag) { - struct pci_dev *pdev = NULL; struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct device_node *dn = eeh_dev_to_of_node(edev); - /* Trace the PCI bridge */ - if (eeh_probe_mode_dev()) { - pdev = eeh_dev_to_pci_dev(edev); - if (pdev->hdr_type != PCI_HEADER_TYPE_BRIDGE) - pdev = NULL; - } - - if (pdev) + /* Do special restore for bridges */ + if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) eeh_restore_bridge_bars(pdev, edev, dn); else eeh_restore_device_bars(edev, dn); diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index e7ae3484918c..61e2a1452131 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -68,6 +68,13 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) void eeh_sysfs_remove_device(struct pci_dev *pdev) { + /* + * The parent directory might have been removed. We needn't + * continue for that case. + */ + if (!pdev->dev.kobj.sd) + return; + device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); -- cgit v1.2.1 From 4b83bd452f17582c8d1c916568fd28a237a2eb46 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:24:59 +0800 Subject: powerpc/eeh: Don't use pci_dev during BAR restore While restoring BARs for one specific PCI device, the pci_dev instance should have been released. So it's not reliable to use the pci_dev instance on restoring BARs. However, we still need some information (e.g. PCIe capability position, header type) from the pci_dev instance. So we have to store those information to EEH device in advance. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh_pe.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 2aa955ae01a1..f9450537e335 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -578,7 +578,7 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state) * blocked on normal path during the stage. So we need utilize * eeh operations, which is always permitted. */ -static void eeh_bridge_check_link(struct pci_dev *pdev, +static void eeh_bridge_check_link(struct eeh_dev *edev, struct device_node *dn) { int cap; @@ -589,16 +589,17 @@ static void eeh_bridge_check_link(struct pci_dev *pdev, * We only check root port and downstream ports of * PCIe switches */ - if (!pci_is_pcie(pdev) || - (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT && - pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM)) + if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT))) return; - pr_debug("%s: Check PCIe link for %s ...\n", - __func__, pci_name(pdev)); + pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n", + __func__, edev->phb->global_number, + edev->config_addr >> 8, + PCI_SLOT(edev->config_addr & 0xFF), + PCI_FUNC(edev->config_addr & 0xFF)); /* Check slot status */ - cap = pdev->pcie_cap; + cap = edev->pcie_cap; eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val); if (!(val & PCI_EXP_SLTSTA_PDS)) { pr_debug(" No card in the slot (0x%04x) !\n", val); @@ -652,8 +653,7 @@ static void eeh_bridge_check_link(struct pci_dev *pdev, #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) -static void eeh_restore_bridge_bars(struct pci_dev *pdev, - struct eeh_dev *edev, +static void eeh_restore_bridge_bars(struct eeh_dev *edev, struct device_node *dn) { int i; @@ -679,7 +679,7 @@ static void eeh_restore_bridge_bars(struct pci_dev *pdev, eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]); /* Check the PCIe link is ready */ - eeh_bridge_check_link(pdev, dn); + eeh_bridge_check_link(edev, dn); } static void eeh_restore_device_bars(struct eeh_dev *edev, @@ -729,12 +729,11 @@ static void eeh_restore_device_bars(struct eeh_dev *edev, static void *eeh_restore_one_device_bars(void *data, void *flag) { struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct device_node *dn = eeh_dev_to_of_node(edev); /* Do special restore for bridges */ - if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - eeh_restore_bridge_bars(pdev, edev, dn); + if (edev->mode & EEH_DEV_BRIDGE) + eeh_restore_bridge_bars(edev, dn); else eeh_restore_device_bars(edev, dn); -- cgit v1.2.1 From 91150af3adf67463c4ca7d72d4fe1a84da37792c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:25:00 +0800 Subject: powerpc/eeh: Fix unbalanced enable for IRQ The patch fixes following issue: Unbalanced enable for IRQ 23 ------------[ cut here ]------------ WARNING: at kernel/irq/manage.c:437 : NIP [c00000000016de8c] .__enable_irq+0x11c/0x140 LR [c00000000016de88] .__enable_irq+0x118/0x140 Call Trace: [c000003ea1f23880] [c00000000016de88] .__enable_irq+0x118/0x140 (unreliable) [c000003ea1f23910] [c00000000016df08] .enable_irq+0x58/0xa0 [c000003ea1f239a0] [c0000000000388b4] .eeh_enable_irq+0xc4/0xe0 [c000003ea1f23a30] [c000000000038a28] .eeh_report_reset+0x78/0x130 [c000003ea1f23ac0] [c000000000037508] .eeh_pe_dev_traverse+0x98/0x170 [c000003ea1f23b60] [c0000000000391ac] .eeh_handle_normal_event+0x2fc/0x3d0 [c000003ea1f23bf0] [c000000000039538] .eeh_handle_event+0x2b8/0x2c0 [c000003ea1f23c90] [c000000000039600] .eeh_event_handler+0xc0/0x170 [c000003ea1f23d30] [c0000000000da9a0] .kthread+0xf0/0x100 [c000003ea1f23e30] [c00000000000a1dc] .ret_from_kernel_thread+0x5c/0x80 Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh_driver.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 9fda75d1f5aa..36bed5a12750 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -143,10 +143,14 @@ static void eeh_disable_irq(struct pci_dev *dev) static void eeh_enable_irq(struct pci_dev *dev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); + struct irq_desc *desc; if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { edev->mode &= ~EEH_DEV_IRQ_DISABLED; - enable_irq(dev->irq); + + desc = irq_to_desc(dev->irq); + if (desc && desc->depth > 0) + enable_irq(dev->irq); } } -- cgit v1.2.1 From ab55d2187da27414f78056810713c92f9a4350c2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 24 Jul 2013 10:25:01 +0800 Subject: powerpc/eeh: Introdce flag to protect sysfs The patch introduces flag EEH_DEV_SYSFS to keep track that the sysfs entries for the corresponding EEH device (then PCI device) has been added or removed, in order to avoid race condition. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 2 ++ arch/powerpc/kernel/eeh_sysfs.c | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index a5783f1a7a96..ea9414c8088d 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -911,6 +911,7 @@ void eeh_add_device_late(struct pci_dev *dev) eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); + edev->mode &= ~EEH_DEV_SYSFS; edev->pdev = NULL; dev->dev.archdata.edev = NULL; @@ -1016,6 +1017,7 @@ void eeh_remove_device(struct pci_dev *dev) eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); + edev->mode &= ~EEH_DEV_SYSFS; } static int proc_eeh_show(struct seq_file *m, void *v) diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 61e2a1452131..5d753d4f2c75 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -56,26 +56,40 @@ EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); void eeh_sysfs_add_device(struct pci_dev *pdev) { + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); int rc=0; + if (edev && (edev->mode & EEH_DEV_SYSFS)) + return; + rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); if (rc) printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); + else if (edev) + edev->mode |= EEH_DEV_SYSFS; } void eeh_sysfs_remove_device(struct pci_dev *pdev) { + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + /* * The parent directory might have been removed. We needn't * continue for that case. */ - if (!pdev->dev.kobj.sd) + if (!pdev->dev.kobj.sd) { + if (edev) + edev->mode &= ~EEH_DEV_SYSFS; return; + } device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + + if (edev) + edev->mode &= ~EEH_DEV_SYSFS; } -- cgit v1.2.1 From e8e813ed268d90c1377f53460527c419eb52c67a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 4 Jun 2013 14:21:17 +1000 Subject: powerpc: Rename PMU interrupts from CNT to PMI Back in commit 89713ed "Add timer, performance monitor and machine check counts to /proc/interrupts" we added a count of PMU interrupts to the output of /proc/interrupts. At the time we named them "CNT" to match x86. However in commit 89ccf46 "Rename 'performance counter interrupt'", the x86 guys renamed theirs from "CNT" to "PMI". Arguably changing the name could break someone's script, but I think the chance of that is minimal, and it's preferable to have a name that 1) is somewhat meaningful, and 2) matches x86. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2e51cde616d2..c69440cef7af 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -362,7 +362,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "CNT"); + seq_printf(p, "%*s: ", prec, "PMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); seq_printf(p, " Performance monitoring interrupts\n"); -- cgit v1.2.1 From 144136dd7a25a0ca4d86685f872168502f91f337 Mon Sep 17 00:00:00 2001 From: Mike Qiu Date: Tue, 6 Aug 2013 22:25:14 -0400 Subject: powerpc/eeh: Add missing procfs entry for PowerNV The procfs entry for global statistics has been missed on PowerNV platform and the patch is going to add that. Signed-off-by: Mike Qiu Acked-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/eeh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ea9414c8088d..55593ee2d5aa 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1061,7 +1061,7 @@ static const struct file_operations proc_eeh_operations = { static int __init eeh_init_proc(void) { - if (machine_is(pseries)) + if (machine_is(pseries) || machine_is(powernv)) proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); return 0; } -- cgit v1.2.1 From 88f094120bd2f012ff494ae50a8d4e0d8af8f69e Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:27 +1000 Subject: powerpc: Fix hypervisor facility unavaliable vector number Currently if we take hypervisor facility unavaliable (from 0xf80/0x4f80) we mark it as an OS facility unavaliable (0xf60) as the two share the same code path. The becomes a problem in facility_unavailable_exception() as we aren't able to see the hypervisor facility unavailable exceptions. Below fixes this by duplication the required macros. Signed-off-by: Michael Neuling Cc: [v3.10] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4e00d223b2e3..902ca3c6b4b6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -848,7 +848,7 @@ hv_facility_unavailable_relon_trampoline: . = 0x4f80 SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b facility_unavailable_relon_hv + b hv_facility_unavailable_relon_hv STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION @@ -1175,6 +1175,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) b .ret_from_except STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception) + STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception) .align 7 .globl __end_handlers @@ -1188,7 +1189,7 @@ __end_handlers: STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) - STD_RELON_EXCEPTION_HV_OOL(0xf80, facility_unavailable) + STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* -- cgit v1.2.1 From 2517617e0de65f8f7cfe75cae745d06b1fa98586 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:29 +1000 Subject: powerpc: Fix context switch DSCR on POWER8 POWER8 allows the DSCR to be accessed directly from userspace via a new SPR number 0x3 (Rather than 0x11. DSCR SPR number 0x11 is still used on POWER8 but like POWER7, is only accessible in HV and OS modes). Currently, we allow this by setting H/FSCR DSCR bit on boot. Unfortunately this doesn't work, as the kernel needs to see the DSCR change so that it knows to no longer restore the system wide version of DSCR on context switch (ie. to set thread.dscr_inherit). This clears the H/FSCR DSCR bit initially. If a process then accesses the DSCR (via SPR 0x3), it'll trap into the kernel where we set thread.dscr_inherit in facility_unavailable_exception(). We also change _switch() so that we set or clear the H/FSCR DSCR bit based on the thread.dscr_inherit. Signed-off-by: Michael Neuling Cc: [v3.10] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/entry_64.S | 27 +++++++++++++++++++- arch/powerpc/kernel/traps.c | 58 +++++++++++++++++++++++++----------------- 2 files changed, 60 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index ab15b8d057ad..4674fe647ad7 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -584,9 +584,34 @@ BEGIN_FTR_SECTION ld r7,DSCR_DEFAULT@toc(2) ld r0,THREAD_DSCR(r4) cmpwi r6,0 + li r8, FSCR_DSCR bne 1f ld r0,0(r7) -1: cmpd r0,r25 + b 3f +1: + BEGIN_FTR_SECTION_NESTED(70) + mfspr r6, SPRN_FSCR + or r6, r6, r8 + mtspr SPRN_FSCR, r6 + BEGIN_FTR_SECTION_NESTED(69) + mfspr r6, SPRN_HFSCR + or r6, r6, r8 + mtspr SPRN_HFSCR, r6 + END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69) + b 4f + END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70) +3: + BEGIN_FTR_SECTION_NESTED(70) + mfspr r6, SPRN_FSCR + andc r6, r6, r8 + mtspr SPRN_FSCR, r6 + BEGIN_FTR_SECTION_NESTED(69) + mfspr r6, SPRN_HFSCR + andc r6, r6, r8 + mtspr SPRN_HFSCR, r6 + END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69) + END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70) +4: cmpd r0,r25 beq 2f mtspr SPRN_DSCR,r0 2: diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index bf33c22e38a4..e435bc089ea3 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -44,9 +44,7 @@ #include #include #include -#ifdef CONFIG_PPC32 #include -#endif #ifdef CONFIG_PMAC_BACKLIGHT #include #endif @@ -1296,43 +1294,54 @@ void vsx_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } +#ifdef CONFIG_PPC64 void facility_unavailable_exception(struct pt_regs *regs) { static char *facility_strings[] = { - "FPU", - "VMX/VSX", - "DSCR", - "PMU SPRs", - "BHRB", - "TM", - "AT", - "EBB", - "TAR", + [FSCR_FP_LG] = "FPU", + [FSCR_VECVSX_LG] = "VMX/VSX", + [FSCR_DSCR_LG] = "DSCR", + [FSCR_PM_LG] = "PMU SPRs", + [FSCR_BHRB_LG] = "BHRB", + [FSCR_TM_LG] = "TM", + [FSCR_EBB_LG] = "EBB", + [FSCR_TAR_LG] = "TAR", }; - char *facility, *prefix; + char *facility = "unknown"; u64 value; + u8 status; + bool hv; - if (regs->trap == 0xf60) { - value = mfspr(SPRN_FSCR); - prefix = ""; - } else { + hv = (regs->trap == 0xf80); + if (hv) value = mfspr(SPRN_HFSCR); - prefix = "Hypervisor "; + else + value = mfspr(SPRN_FSCR); + + status = value >> 56; + if (status == FSCR_DSCR_LG) { + /* User is acessing the DSCR. Set the inherit bit and allow + * the user to set it directly in future by setting via the + * H/FSCR DSCR bit. + */ + current->thread.dscr_inherit = 1; + if (hv) + mtspr(SPRN_HFSCR, value | HFSCR_DSCR); + else + mtspr(SPRN_FSCR, value | FSCR_DSCR); + return; } - value = value >> 56; + if ((status < ARRAY_SIZE(facility_strings)) && + facility_strings[status]) + facility = facility_strings[status]; /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - if (value < ARRAY_SIZE(facility_strings)) - facility = facility_strings[value]; - else - facility = "unknown"; - pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", - prefix, facility, regs->nip, regs->msr); + hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); @@ -1341,6 +1350,7 @@ void facility_unavailable_exception(struct pt_regs *regs) die("Unexpected facility unavailable exception", regs, SIGABRT); } +#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -- cgit v1.2.1 From c2d52644e2da8a07ecab5ca62dd0bc563089e8dc Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:30 +1000 Subject: powerpc: Save the TAR register earlier This moves us to save the Target Address Register (TAR) a earlier in __switch_to. It introduces a new function save_tar() to do this. We need to save the TAR earlier as we will overwrite it in the transactional memory reclaim/recheckpoint path. We are going to do this in a subsequent patch which will fix saving the TAR register when it's modified inside a transaction. Signed-off-by: Michael Neuling Cc: [v3.10] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/entry_64.S | 9 --------- arch/powerpc/kernel/process.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 4674fe647ad7..2bd0b885b0fe 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -449,15 +449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR) #ifdef CONFIG_PPC_BOOK3S_64 BEGIN_FTR_SECTION - /* - * Back up the TAR across context switches. Note that the TAR is not - * available for use in the kernel. (To provide this, the TAR should - * be backed up/restored on exception entry/exit instead, and be in - * pt_regs. FIXME, this should be in pt_regs anyway (for debug).) - */ - mfspr r0,SPRN_TAR - std r0,THREAD_TAR(r3) - /* Event based branch registers */ mfspr r0, SPRN_BESCR std r0, THREAD_BESCR(r3) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c517dbe705fd..8083be20fe5e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -600,6 +600,16 @@ struct task_struct *__switch_to(struct task_struct *prev, struct ppc64_tlb_batch *batch; #endif + /* Back up the TAR across context switches. + * Note that the TAR is not available for use in the kernel. (To + * provide this, the TAR should be backed up/restored on exception + * entry/exit instead, and be in pt_regs. FIXME, this should be in + * pt_regs anyway (for debug).) + * Save the TAR here before we do treclaim/trecheckpoint as these + * will change the TAR. + */ + save_tar(&prev->thread); + __switch_to_tm(prev); #ifdef CONFIG_SMP -- cgit v1.2.1 From 28e61cc466d8daace4b0f04ba2b83e0bd68f5832 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 9 Aug 2013 17:29:31 +1000 Subject: powerpc/tm: Fix context switching TAR, PPR and DSCR SPRs If a transaction is rolled back, the Target Address Register (TAR), Processor Priority Register (PPR) and Data Stream Control Register (DSCR) should be restored to the checkpointed values before the transaction began. Any changes to these SPRs inside the transaction should not be visible in the abort handler. Currently Linux doesn't save or restore the checkpointed TAR, PPR or DSCR. If we preempt a processes inside a transaction which has modified any of these, on process restore, that same transaction may be aborted we but we won't see the checkpointed versions of these SPRs. This adds checkpointed versions of these SPRs to the thread_struct and adds the save/restore of these three SPRs to the treclaim/trechkpt code. Without this if any of these SPRs are modified during a transaction, users may incorrectly see a speculated SPR value even if the transaction is aborted. Signed-off-by: Michael Neuling Cc: [v3.10] Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 3 +++ arch/powerpc/kernel/tm.S | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c7e8afc2ead0..8207459efe56 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -138,6 +138,9 @@ int main(void) DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr)); DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar)); + DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar)); + DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr)); + DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr)); DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, transact_vr[0])); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 51be8fb24803..0554d1f6d70d 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -233,6 +233,16 @@ dont_backup_fp: std r5, _CCR(r7) std r6, _XER(r7) + + /* ******************** TAR, PPR, DSCR ********** */ + mfspr r3, SPRN_TAR + mfspr r4, SPRN_PPR + mfspr r5, SPRN_DSCR + + std r3, THREAD_TM_TAR(r12) + std r4, THREAD_TM_PPR(r12) + std r5, THREAD_TM_DSCR(r12) + /* MSR and flags: We don't change CRs, and we don't need to alter * MSR. */ @@ -347,6 +357,16 @@ dont_restore_fp: mtmsr r6 /* FP/Vec off again! */ restore_gprs: + + /* ******************** TAR, PPR, DSCR ********** */ + ld r4, THREAD_TM_TAR(r3) + ld r5, THREAD_TM_PPR(r3) + ld r6, THREAD_TM_DSCR(r3) + + mtspr SPRN_TAR, r4 + mtspr SPRN_PPR, r5 + mtspr SPRN_DSCR, r6 + /* ******************** CR,LR,CCR,MSR ********** */ ld r3, _CTR(r7) ld r4, _LINK(r7) -- cgit v1.2.1