diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r-- | arch/powerpc/platforms/pseries/Kconfig | 23 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/dlpar.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/dtl.c | 20 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh.c | 84 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_driver.c | 22 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hotplug-cpu.c | 7 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/io_event_irq.c | 231 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/iommu.c | 123 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/kexec.c | 5 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 48 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/nvram.c | 24 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/offline_states.h | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/plpar_wrappers.h | 27 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/setup.c | 50 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/smp.c | 45 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/xics.c | 948 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/xics.h | 23 |
19 files changed, 565 insertions, 1127 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 5b3da4b4ea79..71af4c5d6c05 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -3,7 +3,10 @@ config PPC_PSERIES bool "IBM pSeries & new (POWER5-based) iSeries" select MPIC select PCI_MSI - select XICS + select PPC_XICS + select PPC_ICP_NATIVE + select PPC_ICP_HV + select PPC_ICS_RTAS select PPC_I8259 select PPC_RTAS select PPC_RTAS_DAEMON @@ -47,6 +50,24 @@ config SCANLOG tristate "Scanlog dump interface" depends on RTAS_PROC && PPC_PSERIES +config IO_EVENT_IRQ + bool "IO Event Interrupt support" + depends on PPC_PSERIES + default y + help + Select this option, if you want to enable support for IO Event + interrupts. IO event interrupt is a mechanism provided by RTAS + to return information about hardware error and non-error events + which may need OS attention. RTAS returns events for multiple + event types and scopes. Device drivers can register their handlers + to receive events. + + This option will only enable the IO event platform code. You + will still need to enable or compile the actual drivers + that use this infrastruture to handle IO event interrupts. + + Say Y if you are unsure. + config LPARCFG bool "LPAR Configuration Data" depends on PPC_PSERIES || PPC_ISERIES diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index fc5237810ece..3556e402cbf5 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -5,7 +5,6 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_KEXEC) += kexec.o @@ -22,6 +21,7 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o +obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o ifeq ($(CONFIG_PPC_PSERIES),y) obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index b74a9230edc9..57ceb92b2288 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -74,7 +74,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa) return NULL; /* The configure connector reported name does not contain a - * preceeding '/', so we allocate a buffer large enough to + * preceding '/', so we allocate a buffer large enough to * prepend this to the full_name. */ name = (char *)ccwa + ccwa->name_offset; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index c371bc06434b..e9190073bb97 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -52,10 +52,10 @@ static u8 dtl_event_mask = 0x7; /* - * Size of per-cpu log buffers. Default is just under 16 pages worth. + * Size of per-cpu log buffers. Firmware requires that the buffer does + * not cross a 4k boundary. */ -static int dtl_buf_entries = (16 * 85); - +static int dtl_buf_entries = N_DISPATCH_LOG; #ifdef CONFIG_VIRT_CPU_ACCOUNTING struct dtl_ring { @@ -151,7 +151,7 @@ static int dtl_start(struct dtl *dtl) /* Register our dtl buffer with the hypervisor. The HV expects the * buffer size to be passed in the second word of the buffer */ - ((u32 *)dtl->buf)[1] = dtl->buf_entries * sizeof(struct dtl_entry); + ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES; hwcpu = get_hard_smp_processor_id(dtl->cpu); addr = __pa(dtl->buf); @@ -196,13 +196,15 @@ static int dtl_enable(struct dtl *dtl) long int rc; struct dtl_entry *buf = NULL; + if (!dtl_cache) + return -ENOMEM; + /* only allow one reader */ if (dtl->buf) return -EBUSY; n_entries = dtl_buf_entries; - buf = kmalloc_node(n_entries * sizeof(struct dtl_entry), - GFP_KERNEL, cpu_to_node(dtl->cpu)); + buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu)); if (!buf) { printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", __func__, dtl->cpu); @@ -223,7 +225,7 @@ static int dtl_enable(struct dtl *dtl) spin_unlock(&dtl->lock); if (rc) - kfree(buf); + kmem_cache_free(dtl_cache, buf); return rc; } @@ -231,7 +233,7 @@ static void dtl_disable(struct dtl *dtl) { spin_lock(&dtl->lock); dtl_stop(dtl); - kfree(dtl->buf); + kmem_cache_free(dtl_cache, dtl->buf); dtl->buf = NULL; dtl->buf_entries = 0; spin_unlock(&dtl->lock); @@ -365,7 +367,7 @@ static int dtl_init(void) event_mask_file = debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask); - buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0600, + buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries); if (!event_mask_file || !buf_entries_file) { diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 3cc4d102b1f1..46b55cf563e3 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -65,7 +65,7 @@ * with EEH. * * Ideally, a PCI device driver, when suspecting that an isolation - * event has occured (e.g. by reading 0xff's), will then ask EEH + * event has occurred (e.g. by reading 0xff's), will then ask EEH * whether this is the case, and then take appropriate steps to * reset the PCI slot, the PCI device, and then resume operations. * However, until that day, the checking is done here, with the @@ -93,6 +93,7 @@ static int ibm_slot_error_detail; static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_bridge; +static int ibm_configure_pe; int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); @@ -261,6 +262,8 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity) pci_regs_buf[0] = 0; rtas_pci_enable(pdn, EEH_THAW_MMIO); + rtas_configure_bridge(pdn); + eeh_restore_bars(pdn); loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); @@ -448,6 +451,39 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } +void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) +{ + struct device_node *dn; + + for_each_child_of_node(parent, dn) { + if (PCI_DN(dn)) { + + struct pci_dev *dev = PCI_DN(dn)->pcidev; + + if (dev && dev->driver) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); + } + } +} + +void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) +{ + struct pci_dev *dev; + dn = find_device_pe(dn); + + /* Back up one, since config addrs might be shared */ + if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) + dn = dn->parent; + + dev = PCI_DN(dn)->pcidev; + if (dev) + *freset |= dev->needs_freset; + + __eeh_set_pe_freset(dn, freset); +} + /** * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze * @dn device node @@ -692,15 +728,24 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) if (pdn->eeh_pe_config_addr) config_addr = pdn->eeh_pe_config_addr; - rc = rtas_call(ibm_set_slot_reset,4,1, NULL, + rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid), state); - if (rc) - printk (KERN_WARNING "EEH: Unable to reset the failed slot," - " (%d) #RST=%d dn=%s\n", - rc, state, pdn->node->full_name); + + /* Fundamental-reset not supported on this PE, try hot-reset */ + if (rc == -8 && state == 3) { + rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, + BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), 1); + if (rc) + printk(KERN_WARNING + "EEH: Unable to reset the failed slot," + " #RST=%d dn=%s\n", + rc, pdn->node->full_name); + } } /** @@ -736,18 +781,21 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat /** * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second * @pdn: pci device node to be reset. - * - * Return 0 if success, else a non-zero value. */ static void __rtas_set_slot_reset(struct pci_dn *pdn) { - struct pci_dev *dev = pdn->pcidev; + unsigned int freset = 0; - /* Determine type of EEH reset required by device, - * default hot reset or fundamental reset - */ - if (dev && dev->needs_freset) + /* Determine type of EEH reset required for + * Partitionable Endpoint, a hot-reset (1) + * or a fundamental reset (3). + * A fundamental reset required by any device under + * Partitionable Endpoint trumps hot-reset. + */ + eeh_set_pe_freset(pdn->node, &freset); + + if (freset) rtas_pci_slot_reset(pdn, 3); else rtas_pci_slot_reset(pdn, 1); @@ -895,13 +943,20 @@ rtas_configure_bridge(struct pci_dn *pdn) { int config_addr; int rc; + int token; /* Use PE configuration address, if present */ config_addr = pdn->eeh_config_addr; if (pdn->eeh_pe_config_addr) config_addr = pdn->eeh_pe_config_addr; - rc = rtas_call(ibm_configure_bridge,3,1, NULL, + /* Use new configure-pe function, if supported */ + if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) + token = ibm_configure_pe; + else + token = ibm_configure_bridge; + + rc = rtas_call(token, 3, 1, NULL, config_addr, BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); @@ -1077,6 +1132,7 @@ void __init eeh_init(void) ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); + ibm_configure_pe = rtas_token("ibm,configure-pe"); if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) return; diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index b8d70f5d9aa9..1b6cb10589e0 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -328,7 +328,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; - const char *location, *pci_str, *drv_str; + const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; frozen_dn = find_device_pe(event->dn); if (!frozen_dn) { @@ -364,13 +364,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) frozen_pdn = PCI_DN(frozen_dn); frozen_pdn->eeh_freeze_count++; - if (frozen_pdn->pcidev) { - pci_str = pci_name (frozen_pdn->pcidev); - drv_str = pcid_name (frozen_pdn->pcidev); - } else { - pci_str = eeh_pci_name(event->dev); - drv_str = pcid_name (event->dev); - } + pci_str = eeh_pci_name(event->dev); + drv_str = pcid_name(event->dev); if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; @@ -378,8 +373,17 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) printk(KERN_WARNING "EEH: This PCI device has failed %d times in the last hour:\n", frozen_pdn->eeh_freeze_count); + + if (frozen_pdn->pcidev) { + bus_pci_str = pci_name(frozen_pdn->pcidev); + bus_drv_str = pcid_name(frozen_pdn->pcidev); + printk(KERN_WARNING + "EEH: Bus location=%s driver=%s pci addr=%s\n", + location, bus_drv_str, bus_pci_str); + } + printk(KERN_WARNING - "EEH: location=%s driver=%s pci addr=%s\n", + "EEH: Device location=%s driver=%s pci addr=%s\n", location, drv_str, pci_str); /* Walk the various device drivers attached to this slot through diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index fd50ccd4bac1..46f13a3c5d09 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -19,6 +19,7 @@ */ #include <linux/kernel.h> +#include <linux/interrupt.h> #include <linux/delay.h> #include <linux/cpu.h> #include <asm/system.h> @@ -28,7 +29,7 @@ #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/pSeries_reconfig.h> -#include "xics.h" +#include <asm/xics.h> #include "plpar_wrappers.h" #include "offline_states.h" @@ -216,7 +217,7 @@ static void pseries_cpu_die(unsigned int cpu) cpu, pcpu, cpu_status); } - /* Isolation and deallocation are definatly done by + /* Isolation and deallocation are definitely done by * drslot_chrp_cpu. If they were not they would be * done here. Change isolate state to Isolate and * change allocation-state to Unusable. @@ -280,7 +281,7 @@ static int pseries_add_processor(struct device_node *np) } for_each_cpu(cpu, tmp) { - BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask)); + BUG_ON(cpu_present(cpu)); set_cpu_present(cpu, true); set_hard_smp_processor_id(cpu, *intserv++); } diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c new file mode 100644 index 000000000000..c829e6067d54 --- /dev/null +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -0,0 +1,231 @@ +/* + * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/list.h> +#include <linux/notifier.h> + +#include <asm/machdep.h> +#include <asm/rtas.h> +#include <asm/irq.h> +#include <asm/io_event_irq.h> + +#include "pseries.h" + +/* + * IO event interrupt is a mechanism provided by RTAS to return + * information about hardware error and non-error events. Device + * drivers can register their event handlers to receive events. + * Device drivers are expected to use atomic_notifier_chain_register() + * and atomic_notifier_chain_unregister() to register and unregister + * their event handlers. Since multiple IO event types and scopes + * share an IO event interrupt, the event handlers are called one + * by one until the IO event is claimed by one of the handlers. + * The event handlers are expected to return NOTIFY_OK if the + * event is handled by the event handler or NOTIFY_DONE if the + * event does not belong to the handler. + * + * Usage: + * + * Notifier function: + * #include <asm/io_event_irq.h> + * int event_handler(struct notifier_block *nb, unsigned long val, void *data) { + * p = (struct pseries_io_event_sect_data *) data; + * if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE; + * : + * : + * return NOTIFY_OK; + * } + * struct notifier_block event_nb = { + * .notifier_call = event_handler, + * } + * + * Registration: + * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb); + * + * Unregistration: + * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb); + */ + +ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list); +EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list); + +static int ioei_check_exception_token; + +/* pSeries event log format */ + +/* Two bytes ASCII section IDs */ +#define PSERIES_ELOG_SECT_ID_PRIV_HDR (('P' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_USER_HDR (('U' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC (('P' << 8) | 'S') +#define PSERIES_ELOG_SECT_ID_EXTENDED_UH (('E' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_FAILING_MTMS (('M' << 8) | 'T') +#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC (('S' << 8) | 'S') +#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR (('D' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_FW_ERROR (('S' << 8) | 'W') +#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID (('L' << 8) | 'P') +#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID (('L' << 8) | 'R') +#define PSERIES_ELOG_SECT_ID_HMC_ID (('H' << 8) | 'M') +#define PSERIES_ELOG_SECT_ID_EPOW (('E' << 8) | 'P') +#define PSERIES_ELOG_SECT_ID_IO_EVENT (('I' << 8) | 'E') +#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' << 8) | 'I') +#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H') +#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D') + +/* Vendor specific Platform Event Log Format, Version 6, section header */ +struct pseries_elog_section { + uint16_t id; /* 0x00 2-byte ASCII section ID */ + uint16_t length; /* 0x02 Section length in bytes */ + uint8_t version; /* 0x04 Section version */ + uint8_t subtype; /* 0x05 Section subtype */ + uint16_t creator_component; /* 0x06 Creator component ID */ + uint8_t data[]; /* 0x08 Start of section data */ +}; + +static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; + +/** + * Find data portion of a specific section in RTAS extended event log. + * @elog: RTAS error/event log. + * @sect_id: secsion ID. + * + * Return: + * pointer to the section data of the specified section + * NULL if not found + */ +static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *elog, + uint16_t sect_id) +{ + struct rtas_ext_event_log_v6 *xelog = + (struct rtas_ext_event_log_v6 *) elog->buffer; + struct pseries_elog_section *sect; + unsigned char *p, *log_end; + + /* Check that we understand the format */ + if (elog->extended_log_length < sizeof(struct rtas_ext_event_log_v6) || + xelog->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || + xelog->company_id != RTAS_V6EXT_COMPANY_ID_IBM) + return NULL; + + log_end = elog->buffer + elog->extended_log_length; + p = xelog->vendor_log; + while (p < log_end) { + sect = (struct pseries_elog_section *)p; + if (sect->id == sect_id) + return sect; + p += sect->length; + } + return NULL; +} + +/** + * Find the data portion of an IO Event section from event log. + * @elog: RTAS error/event log. + * + * Return: + * pointer to a valid IO event section data. NULL if not found. + */ +static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) +{ + struct pseries_elog_section *sect; + + /* We should only ever get called for io-event interrupts, but if + * we do get called for another type then something went wrong so + * make some noise about it. + * RTAS_TYPE_IO only exists in extended event log version 6 or later. + * No need to check event log version. + */ + if (unlikely(elog->type != RTAS_TYPE_IO)) { + printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d", + elog->type); + return NULL; + } + + sect = find_xelog_section(elog, PSERIES_ELOG_SECT_ID_IO_EVENT); + if (unlikely(!sect)) { + printk_once(KERN_WARNING "io_event_irq: RTAS extended event " + "log does not contain an IO Event section. " + "Could be a bug in system firmware!\n"); + return NULL; + } + return (struct pseries_io_event *) §->data; +} + +/* + * PAPR: + * - check-exception returns the first found error or event and clear that + * error or event so it is reported once. + * - Each interrupt returns one event. If a plateform chooses to report + * multiple events through a single interrupt, it must ensure that the + * interrupt remains asserted until check-exception has been used to + * process all out-standing events for that interrupt. + * + * Implementation notes: + * - Events must be processed in the order they are returned. Hence, + * sequential in nature. + * - The owner of an event is determined by combinations of scope, + * event type, and sub-type. There is no easy way to pre-sort clients + * by scope or event type alone. For example, Torrent ISR route change + * event is reported with scope 0x00 (Not Applicatable) rather than + * 0x3B (Torrent-hub). It is better to let the clients to identify + * who owns the the event. + */ + +static irqreturn_t ioei_interrupt(int irq, void *dev_id) +{ + struct pseries_io_event *event; + int rtas_rc; + + for (;;) { + rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL, + RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), + RTAS_IO_EVENTS, 1 /* Time Critical */, + __pa(ioei_rtas_buf), + RTAS_DATA_BUF_SIZE); + if (rtas_rc != 0) + break; + + event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf); + if (!event) + continue; + + atomic_notifier_call_chain(&pseries_ioei_notifier_list, + 0, event); + } + return IRQ_HANDLED; +} + +static int __init ioei_init(void) +{ + struct device_node *np; + + ioei_check_exception_token = rtas_token("check-exception"); + if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) { + pr_warning("IO Event IRQ not supported on this system !\n"); + return -ENODEV; + } + np = of_find_node_by_path("/event-sources/ibm,io-events"); + if (np) { + request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT"); + of_node_put(np); + } else { + pr_err("io_event_irq: No ibm,io-events on system! " + "IO Event interrupt disabled.\n"); + return -ENODEV; + } + return 0; +} +machine_subsys_initcall(pseries, ioei_init); + diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 154c464cdca5..01faab9456ca 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -272,7 +272,7 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) return tce_ret; } -/* this is compatable with cells for the device tree property */ +/* this is compatible with cells for the device tree property */ struct dynamic_dma_window_prop { __be32 liobn; /* tce table number */ __be64 dma_base; /* address hi,lo */ @@ -659,15 +659,18 @@ static void remove_ddw(struct device_node *np) { struct dynamic_dma_window_prop *dwp; struct property *win64; - const u32 *ddr_avail; + const u32 *ddw_avail; u64 liobn; int len, ret; - ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len); + ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); - if (!win64 || !ddr_avail || len < 3 * sizeof(u32)) + if (!win64) return; + if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp)) + goto delprop; + dwp = win64->value; liobn = (u64)be32_to_cpu(dwp->liobn); @@ -681,28 +684,29 @@ static void remove_ddw(struct device_node *np) pr_debug("%s successfully cleared tces in window.\n", np->full_name); - ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn); + ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); if (ret) pr_warning("%s: failed to remove direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np->full_name, ret, ddr_avail[2], liobn); + np->full_name, ret, ddw_avail[2], liobn); else pr_debug("%s: successfully removed direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", - np->full_name, ret, ddr_avail[2], liobn); -} + np->full_name, ret, ddw_avail[2], liobn); +delprop: + ret = prom_remove_property(np, win64); + if (ret) + pr_warning("%s: failed to remove direct window property: %d\n", + np->full_name, ret); +} -static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn) +static u64 find_existing_ddw(struct device_node *pdn) { - struct device_node *dn; - struct pci_dn *pcidn; struct direct_window *window; const struct dynamic_dma_window_prop *direct64; u64 dma_addr = 0; - dn = pci_device_to_OF_node(dev); - pcidn = PCI_DN(dn); spin_lock(&direct_window_list_lock); /* check if we already created a window and dupe that config if so */ list_for_each_entry(window, &direct_window_list, list) { @@ -717,36 +721,40 @@ static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node * return dma_addr; } -static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn) +static int find_existing_ddw_windows(void) { - struct device_node *dn; - struct pci_dn *pcidn; int len; + struct device_node *pdn; struct direct_window *window; const struct dynamic_dma_window_prop *direct64; - u64 dma_addr = 0; - dn = pci_device_to_OF_node(dev); - pcidn = PCI_DN(dn); - direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); - if (direct64) { + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + for_each_node_with_property(pdn, DIRECT64_PROPNAME) { + direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); + if (!direct64) + continue; + window = kzalloc(sizeof(*window), GFP_KERNEL); - if (!window) { + if (!window || len < sizeof(struct dynamic_dma_window_prop)) { + kfree(window); remove_ddw(pdn); - } else { - window->device = pdn; - window->prop = direct64; - spin_lock(&direct_window_list_lock); - list_add(&window->list, &direct_window_list); - spin_unlock(&direct_window_list_lock); - dma_addr = direct64->dma_base; + continue; } + + window->device = pdn; + window->prop = direct64; + spin_lock(&direct_window_list_lock); + list_add(&window->list, &direct_window_list); + spin_unlock(&direct_window_list_lock); } - return dma_addr; + return 0; } +machine_arch_initcall(pseries, find_existing_ddw_windows); -static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail, +static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_query_response *query) { struct device_node *dn; @@ -767,15 +775,15 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail, if (pcidn->eeh_pe_config_addr) cfg_addr = pcidn->eeh_pe_config_addr; buid = pcidn->phb->buid; - ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query, + ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, cfg_addr, BUID_HI(buid), BUID_LO(buid)); dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" - " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid), + " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid), BUID_LO(buid), ret); return ret; } -static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail, +static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_create_response *create, int page_shift, int window_shift) { @@ -800,12 +808,12 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail, do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr, + ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " - "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1], + "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1], cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, window_shift, ret, create->liobn, create->addr_hi, create->addr_lo); @@ -831,18 +839,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - const u32 *uninitialized_var(ddr_avail); + const u32 *uninitialized_var(ddw_avail); struct direct_window *window; - struct property *uninitialized_var(win64); + struct property *win64; struct dynamic_dma_window_prop *ddwprop; mutex_lock(&direct_window_init_mutex); - dma_addr = dupe_ddw_if_already_created(dev, pdn); - if (dma_addr != 0) - goto out_unlock; - - dma_addr = dupe_ddw_if_kexec(dev, pdn); + dma_addr = find_existing_ddw(pdn); if (dma_addr != 0) goto out_unlock; @@ -854,8 +858,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for the given node in that order. * the property is actually in the parent, not the PE */ - ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); - if (!ddr_avail || len < 3 * sizeof(u32)) + ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); + if (!ddw_avail || len < 3 * sizeof(u32)) goto out_unlock; /* @@ -865,7 +869,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * of page sizes: supported and supported for migrate-dma. */ dn = pci_device_to_OF_node(dev); - ret = query_ddw(dev, ddr_avail, &query); + ret = query_ddw(dev, ddw_avail, &query); if (ret != 0) goto out_unlock; @@ -907,13 +911,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) } win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL); win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL); + win64->length = sizeof(*ddwprop); if (!win64->name || !win64->value) { dev_info(&dev->dev, "couldn't allocate property name and value\n"); goto out_free_prop; } - ret = create_ddw(dev, ddr_avail, &create, page_shift, len); + ret = create_ddw(dev, ddw_avail, &create, page_shift, len); if (ret != 0) goto out_free_prop; @@ -976,7 +981,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); /* dev setup for LPAR is a little tricky, since the device tree might - * contain the dma-window properties per-device and not neccesarily + * contain the dma-window properties per-device and not necessarily * for the bus. So we need to search upwards in the tree until we * either hit a dma-window property, OR find a parent with a table * already allocated. @@ -1021,19 +1026,22 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) const void *dma_window = NULL; u64 dma_offset; - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + if (!dev->dma_mask) return -EIO; + if (!dev_is_pci(dev)) + goto check_mask; + + pdev = to_pci_dev(dev); + /* only attempt to use a new window if 64-bit DMA is requested */ if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) { - pdev = to_pci_dev(dev); - dn = pci_device_to_OF_node(pdev); dev_dbg(dev, "node is %s\n", dn->full_name); /* * the device tree might contain the dma-window properties - * per-device and not neccesarily for the bus. So we need to + * per-device and not necessarily for the bus. So we need to * search upwards in the tree until we either hit a dma-window * property, OR find a parent with a table already allocated. */ @@ -1054,12 +1062,17 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) } } - /* fall-through to iommu ops */ - if (!ddw_enabled) { - dev_info(dev, "Using 32-bit DMA via iommu\n"); + /* fall back on iommu ops, restore table pointer with ops */ + if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) { + dev_info(dev, "Restoring 32-bit DMA via iommu\n"); set_dma_ops(dev, &dma_iommu_ops); + pci_dma_dev_setup_pSeriesLP(pdev); } +check_mask: + if (!dma_supported(dev, dma_mask)) + return -EIO; + *dev->dma_mask = dma_mask; return 0; } diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 77d38a5e2ff9..54cf3a4aa16b 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -7,15 +7,18 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/kernel.h> +#include <linux/interrupt.h> + #include <asm/machdep.h> #include <asm/page.h> #include <asm/firmware.h> #include <asm/kexec.h> #include <asm/mpic.h> +#include <asm/xics.h> #include <asm/smp.h> #include "pseries.h" -#include "xics.h" #include "plpar_wrappers.h" static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index ca5d5898d320..39e6e0a7b2fa 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -329,6 +329,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, /* Make pHyp happy */ if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU)) hpte_r &= ~_PAGE_COHERENT; + if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) + flags |= H_COALESCE_CAND; lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); if (unlikely(lpar_rc == H_PTEG_FULL)) { @@ -573,7 +575,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long i, pix, rc; unsigned long flags = 0; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long va; unsigned long hash, index, shift, hidx, slot; @@ -771,3 +773,47 @@ out: local_irq_restore(flags); } #endif + +/** + * h_get_mpp + * H_GET_MPP hcall returns info in 7 parms + */ +int h_get_mpp(struct hvcall_mpp_data *mpp_data) +{ + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_MPP, retbuf); + + mpp_data->entitled_mem = retbuf[0]; + mpp_data->mapped_mem = retbuf[1]; + + mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + mpp_data->pool_num = retbuf[2] & 0xffff; + + mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; + mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; + mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; + + mpp_data->pool_size = retbuf[4]; + mpp_data->loan_request = retbuf[5]; + mpp_data->backing_mem = retbuf[6]; + + return rc; +} +EXPORT_SYMBOL(h_get_mpp); + +int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) +{ + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; + + rc = plpar_hcall9(H_GET_MPP_X, retbuf); + + mpp_x_data->coalesced_bytes = retbuf[0]; + mpp_x_data->pool_coalesced_bytes = retbuf[1]; + mpp_x_data->pool_purr_cycles = retbuf[2]; + mpp_x_data->pool_spurr_cycles = retbuf[3]; + + return rc; +} diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 419707b07248..00cc3a094885 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -480,8 +480,32 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, const char *new_msgs, unsigned long new_len) { static unsigned int oops_count = 0; + static bool panicking = false; size_t text_len; + switch (reason) { + case KMSG_DUMP_RESTART: + case KMSG_DUMP_HALT: + case KMSG_DUMP_POWEROFF: + /* These are almost always orderly shutdowns. */ + return; + case KMSG_DUMP_OOPS: + case KMSG_DUMP_KEXEC: + break; + case KMSG_DUMP_PANIC: + panicking = true; + break; + case KMSG_DUMP_EMERG: + if (panicking) + /* Panic report already captured. */ + return; + break; + default: + pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", + __FUNCTION__, (int) reason); + return; + } + if (clobbering_unread_rtas_event()) return; diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h index 75a6f480d931..08672d9136ab 100644 --- a/arch/powerpc/platforms/pseries/offline_states.h +++ b/arch/powerpc/platforms/pseries/offline_states.h @@ -34,6 +34,4 @@ static inline void set_default_offline_state(int cpu) #endif extern enum cpu_state_vals get_preferred_offline_state(int cpu); -extern int start_secondary(void); -extern void start_secondary_resume(void); #endif diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index d9801117124b..4bf21207d7d3 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h @@ -270,31 +270,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len, lbuf[1]); } -static inline long plpar_eoi(unsigned long xirr) -{ - return plpar_hcall_norets(H_EOI, xirr); -} - -static inline long plpar_cppr(unsigned long cppr) -{ - return plpar_hcall_norets(H_CPPR, cppr); -} - -static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr) -{ - return plpar_hcall_norets(H_IPI, servernum, mfrr); -} - -static inline long plpar_xirr(unsigned long *xirr_ret, unsigned char cppr) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall(H_XIRR, retbuf, cppr); - - *xirr_ret = retbuf[0]; - - return rc; -} - #endif /* _PSERIES_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index c55d7ad9c648..086d2ae4e06a 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -122,7 +122,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, - irq_map[irq].hwirq, + virq_to_hw(irq), RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, critical, __pa(&ras_log_buf), rtas_get_error_log_max()); @@ -157,7 +157,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, - irq_map[irq].hwirq, + virq_to_hw(irq), RTAS_INTERNAL_ERROR, 1 /*Time Critical */, __pa(&ras_log_buf), rtas_get_error_log_max()); @@ -227,7 +227,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) struct rtas_error_log *h, *errhdr = NULL; if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { - printk(KERN_ERR "FWNMI: corrupt r3\n"); + printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); return NULL; } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c319d04aa799..593acceeff96 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -53,9 +53,9 @@ #include <asm/irq.h> #include <asm/time.h> #include <asm/nvram.h> -#include "xics.h" #include <asm/pmc.h> #include <asm/mpic.h> +#include <asm/xics.h> #include <asm/ppc-pci.h> #include <asm/i8259.h> #include <asm/udbg.h> @@ -205,6 +205,9 @@ static void __init pseries_mpic_init_IRQ(void) mpic_assign_isu(mpic, n, isuaddr); } + /* Setup top-level get_irq */ + ppc_md.get_irq = mpic_get_irq; + /* All ISUs are setup, complete initialization */ mpic_init(mpic); @@ -214,7 +217,7 @@ static void __init pseries_mpic_init_IRQ(void) static void __init pseries_xics_init_IRQ(void) { - xics_init_IRQ(); + xics_init(); pseries_setup_i8259_cascade(); } @@ -238,7 +241,6 @@ static void __init pseries_discover_pic(void) if (strstr(typep, "open-pic")) { pSeries_mpic_node = of_node_get(np); ppc_md.init_IRQ = pseries_mpic_init_IRQ; - ppc_md.get_irq = mpic_get_irq; setup_kexec_cpu_down_mpic(); smp_init_pseries_mpic(); return; @@ -276,6 +278,8 @@ static struct notifier_block pci_dn_reconfig_nb = { .notifier_call = pci_dn_reconfig_notifier, }; +struct kmem_cache *dtl_cache; + #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Allocate space for the dispatch trace log for all possible cpus @@ -291,10 +295,12 @@ static int alloc_dispatch_logs(void) if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; + if (!dtl_cache) + return 0; + for_each_possible_cpu(cpu) { pp = &paca[cpu]; - dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL, - cpu_to_node(cpu)); + dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL); if (!dtl) { pr_warn("Failed to allocate dispatch trace log for cpu %d\n", cpu); @@ -324,10 +330,27 @@ static int alloc_dispatch_logs(void) return 0; } - -early_initcall(alloc_dispatch_logs); +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +static inline int alloc_dispatch_logs(void) +{ + return 0; +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +static int alloc_dispatch_log_kmem_cache(void) +{ + dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, NULL); + if (!dtl_cache) { + pr_warn("Failed to create dispatch trace log buffer cache\n"); + pr_warn("Stolen time statistics will be unreliable\n"); + return 0; + } + + return alloc_dispatch_logs(); +} +early_initcall(alloc_dispatch_log_kmem_cache); + static void __init pSeries_setup_arch(void) { /* Discover PIC type and setup ppc_md accordingly */ @@ -378,7 +401,7 @@ static int __init pSeries_init_panel(void) return 0; } -arch_initcall(pSeries_init_panel); +machine_arch_initcall(pseries, pSeries_init_panel); static int pseries_set_dabr(unsigned long dabr) { @@ -395,6 +418,16 @@ static int pseries_set_xdabr(unsigned long dabr) #define CMO_CHARACTERISTICS_TOKEN 44 #define CMO_MAXLENGTH 1026 +void pSeries_coalesce_init(void) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data)) + powerpc_firmware_features |= FW_FEATURE_XCMO; + else + powerpc_firmware_features &= ~FW_FEATURE_XCMO; +} + /** * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) @@ -464,6 +497,7 @@ void pSeries_cmo_feature_init(void) pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, CMO_SecPSP); powerpc_firmware_features |= FW_FEATURE_CMO; + pSeries_coalesce_init(); } else pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, CMO_SecPSP); diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 0317cce877c6..fbffd7e47ab8 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -44,10 +44,11 @@ #include <asm/mpic.h> #include <asm/vdso_datapage.h> #include <asm/cputhreads.h> +#include <asm/mpic.h> +#include <asm/xics.h> #include "plpar_wrappers.h" #include "pseries.h" -#include "xics.h" #include "offline_states.h" @@ -64,8 +65,8 @@ int smp_query_cpu_stopped(unsigned int pcpu) int qcss_tok = rtas_token("query-cpu-stopped-state"); if (qcss_tok == RTAS_UNKNOWN_SERVICE) { - printk(KERN_INFO "Firmware doesn't support " - "query-cpu-stopped-state\n"); + printk_once(KERN_INFO + "Firmware doesn't support query-cpu-stopped-state\n"); return QCSS_HARDWARE_ERROR; } @@ -112,10 +113,10 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) /* Fixup atomic count: it exited inside IRQ handler. */ task_thread_info(paca[lcpu].__current)->preempt_count = 0; - +#ifdef CONFIG_HOTPLUG_CPU if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) goto out; - +#endif /* * If the RTAS start-cpu token does not exist then presume the * cpu is already spinning. @@ -130,11 +131,12 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) return 0; } +#ifdef CONFIG_HOTPLUG_CPU out: +#endif return 1; } -#ifdef CONFIG_XICS static void __devinit smp_xics_setup_cpu(int cpu) { if (cpu != boot_cpuid) @@ -144,20 +146,18 @@ static void __devinit smp_xics_setup_cpu(int cpu) vpa_init(cpu); cpumask_clear_cpu(cpu, of_spin_mask); +#ifdef CONFIG_HOTPLUG_CPU set_cpu_current_state(cpu, CPU_STATE_ONLINE); set_default_offline_state(cpu); - +#endif } -#endif /* CONFIG_XICS */ -static void __devinit smp_pSeries_kick_cpu(int nr) +static int __devinit smp_pSeries_kick_cpu(int nr) { - long rc; - unsigned long hcpuid; BUG_ON(nr < 0 || nr >= NR_CPUS); if (!smp_startup_cpu(nr)) - return; + return -ENOENT; /* * The processor is currently spinning, waiting for the @@ -165,16 +165,22 @@ static void __devinit smp_pSeries_kick_cpu(int nr) * the processor will continue on to secondary_start */ paca[nr].cpu_start = 1; - +#ifdef CONFIG_HOTPLUG_CPU set_preferred_offline_state(nr, CPU_STATE_ONLINE); if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) { + long rc; + unsigned long hcpuid; + hcpuid = get_hard_smp_processor_id(nr); rc = plpar_hcall_norets(H_PROD, hcpuid); if (rc != H_SUCCESS) printk(KERN_ERR "Error: Prod to wake up processor %d " "Ret= %ld\n", nr, rc); } +#endif + + return 0; } static int smp_pSeries_cpu_bootable(unsigned int nr) @@ -192,23 +198,22 @@ static int smp_pSeries_cpu_bootable(unsigned int nr) return 1; } -#ifdef CONFIG_MPIC + static struct smp_ops_t pSeries_mpic_smp_ops = { .message_pass = smp_mpic_message_pass, .probe = smp_mpic_probe, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_mpic_setup_cpu, }; -#endif -#ifdef CONFIG_XICS + static struct smp_ops_t pSeries_xics_smp_ops = { - .message_pass = smp_xics_message_pass, - .probe = smp_xics_probe, + .message_pass = smp_muxed_ipi_message_pass, + .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */ + .probe = xics_smp_probe, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_xics_setup_cpu, .cpu_bootable = smp_pSeries_cpu_bootable, }; -#endif /* This is called very early */ static void __init smp_init_pseries(void) @@ -240,14 +245,12 @@ static void __init smp_init_pseries(void) pr_debug(" <- smp_init_pSeries()\n"); } -#ifdef CONFIG_MPIC void __init smp_init_pseries_mpic(void) { smp_ops = &pSeries_mpic_smp_ops; smp_init_pseries(); } -#endif void __init smp_init_pseries_xics(void) { diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c deleted file mode 100644 index 6c1e638f0ce9..000000000000 --- a/arch/powerpc/platforms/pseries/xics.c +++ /dev/null @@ -1,948 +0,0 @@ -/* - * arch/powerpc/platforms/pseries/xics.c - * - * Copyright 2000 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/types.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/irq.h> -#include <linux/smp.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/radix-tree.h> -#include <linux/cpu.h> -#include <linux/msi.h> -#include <linux/of.h> -#include <linux/percpu.h> - -#include <asm/firmware.h> -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/smp.h> -#include <asm/rtas.h> -#include <asm/hvcall.h> -#include <asm/machdep.h> - -#include "xics.h" -#include "plpar_wrappers.h" - -static struct irq_host *xics_host; - -#define XICS_IPI 2 -#define XICS_IRQ_SPURIOUS 0 - -/* Want a priority other than 0. Various HW issues require this. */ -#define DEFAULT_PRIORITY 5 - -/* - * Mark IPIs as higher priority so we can take them inside interrupts that - * arent marked IRQF_DISABLED - */ -#define IPI_PRIORITY 4 - -/* The least favored priority */ -#define LOWEST_PRIORITY 0xFF - -/* The number of priorities defined above */ -#define MAX_NUM_PRIORITIES 3 - -static unsigned int default_server = 0xFF; -static unsigned int default_distrib_server = 0; -static unsigned int interrupt_server_size = 8; - -/* RTAS service tokens */ -static int ibm_get_xive; -static int ibm_set_xive; -static int ibm_int_on; -static int ibm_int_off; - -struct xics_cppr { - unsigned char stack[MAX_NUM_PRIORITIES]; - int index; -}; - -static DEFINE_PER_CPU(struct xics_cppr, xics_cppr); - -/* Direct hardware low level accessors */ - -/* The part of the interrupt presentation layer that we care about */ -struct xics_ipl { - union { - u32 word; - u8 bytes[4]; - } xirr_poll; - union { - u32 word; - u8 bytes[4]; - } xirr; - u32 dummy; - union { - u32 word; - u8 bytes[4]; - } qirr; -}; - -static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; - -static inline unsigned int direct_xirr_info_get(void) -{ - int cpu = smp_processor_id(); - - return in_be32(&xics_per_cpu[cpu]->xirr.word); -} - -static inline void direct_xirr_info_set(unsigned int value) -{ - int cpu = smp_processor_id(); - - out_be32(&xics_per_cpu[cpu]->xirr.word, value); -} - -static inline void direct_cppr_info(u8 value) -{ - int cpu = smp_processor_id(); - - out_8(&xics_per_cpu[cpu]->xirr.bytes[0], value); -} - -static inline void direct_qirr_info(int n_cpu, u8 value) -{ - out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value); -} - - -/* LPAR low level accessors */ - -static inline unsigned int lpar_xirr_info_get(unsigned char cppr) -{ - unsigned long lpar_rc; - unsigned long return_value; - - lpar_rc = plpar_xirr(&return_value, cppr); - if (lpar_rc != H_SUCCESS) - panic(" bad return code xirr - rc = %lx\n", lpar_rc); - return (unsigned int)return_value; -} - -static inline void lpar_xirr_info_set(unsigned int value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_eoi(value); - if (lpar_rc != H_SUCCESS) - panic("bad return code EOI - rc = %ld, value=%x\n", lpar_rc, - value); -} - -static inline void lpar_cppr_info(u8 value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_cppr(value); - if (lpar_rc != H_SUCCESS) - panic("bad return code cppr - rc = %lx\n", lpar_rc); -} - -static inline void lpar_qirr_info(int n_cpu , u8 value) -{ - unsigned long lpar_rc; - - lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value); - if (lpar_rc != H_SUCCESS) - panic("bad return code qirr - rc = %lx\n", lpar_rc); -} - - -/* Interface to generic irq subsystem */ - -#ifdef CONFIG_SMP -/* - * For the moment we only implement delivery to all cpus or one cpu. - * - * If the requested affinity is cpu_all_mask, we set global affinity. - * If not we set it to the first cpu in the mask, even if multiple cpus - * are set. This is so things like irqbalance (which set core and package - * wide affinities) do the right thing. - */ -static int get_irq_server(unsigned int virq, const struct cpumask *cpumask, - unsigned int strict_check) -{ - - if (!distribute_irqs) - return default_server; - - if (!cpumask_subset(cpu_possible_mask, cpumask)) { - int server = cpumask_first_and(cpu_online_mask, cpumask); - - if (server < nr_cpu_ids) - return get_hard_smp_processor_id(server); - - if (strict_check) - return -1; - } - - /* - * Workaround issue with some versions of JS20 firmware that - * deliver interrupts to cpus which haven't been started. This - * happens when using the maxcpus= boot option. - */ - if (cpumask_equal(cpu_online_mask, cpu_present_mask)) - return default_distrib_server; - - return default_server; -} -#else -#define get_irq_server(virq, cpumask, strict_check) (default_server) -#endif - -static void xics_unmask_irq(struct irq_data *d) -{ - unsigned int irq; - int call_status; - int server; - - pr_devel("xics: unmask virq %d\n", d->irq); - - irq = (unsigned int)irq_map[d->irq].hwirq; - pr_devel(" -> map to hwirq 0x%x\n", irq); - if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) - return; - - server = get_irq_server(d->irq, d->affinity, 0); - - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, - DEFAULT_PRIORITY); - if (call_status != 0) { - printk(KERN_ERR - "%s: ibm_set_xive irq %u server %x returned %d\n", - __func__, irq, server, call_status); - return; - } - - /* Now unmask the interrupt (often a no-op) */ - call_status = rtas_call(ibm_int_on, 1, 1, NULL, irq); - if (call_status != 0) { - printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", - __func__, irq, call_status); - return; - } -} - -static unsigned int xics_startup(struct irq_data *d) -{ - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - if (d->msi_desc) - unmask_msi_irq(d); - - /* unmask it */ - xics_unmask_irq(d); - return 0; -} - -static void xics_mask_real_irq(struct irq_data *d) -{ - int call_status; - - if (d->irq == XICS_IPI) - return; - - call_status = rtas_call(ibm_int_off, 1, 1, NULL, d->irq); - if (call_status != 0) { - printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", - __func__, d->irq, call_status); - return; - } - - /* Have to set XIVE to 0xff to be able to remove a slot */ - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, d->irq, - default_server, 0xff); - if (call_status != 0) { - printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", - __func__, d->irq, call_status); - return; - } -} - -static void xics_mask_irq(struct irq_data *d) -{ - unsigned int irq; - - pr_devel("xics: mask virq %d\n", d->irq); - - irq = (unsigned int)irq_map[d->irq].hwirq; - if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) - return; - xics_mask_real_irq(d); -} - -static void xics_mask_unknown_vec(unsigned int vec) -{ - printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec); - xics_mask_real_irq(irq_get_irq_data(vec)); -} - -static inline unsigned int xics_xirr_vector(unsigned int xirr) -{ - /* - * The top byte is the old cppr, to be restored on EOI. - * The remaining 24 bits are the vector. - */ - return xirr & 0x00ffffff; -} - -static void push_cppr(unsigned int vec) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - - if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) - return; - - if (vec == XICS_IPI) - os_cppr->stack[++os_cppr->index] = IPI_PRIORITY; - else - os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY; -} - -static unsigned int xics_get_irq_direct(void) -{ - unsigned int xirr = direct_xirr_info_get(); - unsigned int vec = xics_xirr_vector(xirr); - unsigned int irq; - - if (vec == XICS_IRQ_SPURIOUS) - return NO_IRQ; - - irq = irq_radix_revmap_lookup(xics_host, vec); - if (likely(irq != NO_IRQ)) { - push_cppr(vec); - return irq; - } - - /* We don't have a linux mapping, so have rtas mask it. */ - xics_mask_unknown_vec(vec); - - /* We might learn about it later, so EOI it */ - direct_xirr_info_set(xirr); - return NO_IRQ; -} - -static unsigned int xics_get_irq_lpar(void) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - unsigned int xirr = lpar_xirr_info_get(os_cppr->stack[os_cppr->index]); - unsigned int vec = xics_xirr_vector(xirr); - unsigned int irq; - - if (vec == XICS_IRQ_SPURIOUS) - return NO_IRQ; - - irq = irq_radix_revmap_lookup(xics_host, vec); - if (likely(irq != NO_IRQ)) { - push_cppr(vec); - return irq; - } - - /* We don't have a linux mapping, so have RTAS mask it. */ - xics_mask_unknown_vec(vec); - - /* We might learn about it later, so EOI it */ - lpar_xirr_info_set(xirr); - return NO_IRQ; -} - -static unsigned char pop_cppr(void) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - - if (WARN_ON(os_cppr->index < 1)) - return LOWEST_PRIORITY; - - return os_cppr->stack[--os_cppr->index]; -} - -static void xics_eoi_direct(struct irq_data *d) -{ - unsigned int irq = (unsigned int)irq_map[d->irq].hwirq; - - iosync(); - direct_xirr_info_set((pop_cppr() << 24) | irq); -} - -static void xics_eoi_lpar(struct irq_data *d) -{ - unsigned int irq = (unsigned int)irq_map[d->irq].hwirq; - - iosync(); - lpar_xirr_info_set((pop_cppr() << 24) | irq); -} - -static int -xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force) -{ - unsigned int irq; - int status; - int xics_status[2]; - int irq_server; - - irq = (unsigned int)irq_map[d->irq].hwirq; - if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) - return -1; - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); - - if (status) { - printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", - __func__, irq, status); - return -1; - } - - irq_server = get_irq_server(d->irq, cpumask, 1); - if (irq_server == -1) { - char cpulist[128]; - cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); - printk(KERN_WARNING - "%s: No online cpus in the mask %s for irq %d\n", - __func__, cpulist, d->irq); - return -1; - } - - status = rtas_call(ibm_set_xive, 3, 1, NULL, - irq, irq_server, xics_status[1]); - - if (status) { - printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", - __func__, irq, status); - return -1; - } - - return 0; -} - -static struct irq_chip xics_pic_direct = { - .name = "XICS", - .irq_startup = xics_startup, - .irq_mask = xics_mask_irq, - .irq_unmask = xics_unmask_irq, - .irq_eoi = xics_eoi_direct, - .irq_set_affinity = xics_set_affinity -}; - -static struct irq_chip xics_pic_lpar = { - .name = "XICS", - .irq_startup = xics_startup, - .irq_mask = xics_mask_irq, - .irq_unmask = xics_unmask_irq, - .irq_eoi = xics_eoi_lpar, - .irq_set_affinity = xics_set_affinity -}; - - -/* Interface to arch irq controller subsystem layer */ - -/* Points to the irq_chip we're actually using */ -static struct irq_chip *xics_irq_chip; - -static int xics_host_match(struct irq_host *h, struct device_node *node) -{ - /* IBM machines have interrupt parents of various funky types for things - * like vdevices, events, etc... The trick we use here is to match - * everything here except the legacy 8259 which is compatible "chrp,iic" - */ - return !of_device_is_compatible(node, "chrp,iic"); -} - -static int xics_host_map(struct irq_host *h, unsigned int virq, - irq_hw_number_t hw) -{ - pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); - - /* Insert the interrupt mapping into the radix tree for fast lookup */ - irq_radix_revmap_insert(xics_host, virq, hw); - - irq_set_status_flags(virq, IRQ_LEVEL); - irq_set_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq); - return 0; -} - -static int xics_host_xlate(struct irq_host *h, struct device_node *ct, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_flags) - -{ - /* Current xics implementation translates everything - * to level. It is not technically right for MSIs but this - * is irrelevant at this point. We might get smarter in the future - */ - *out_hwirq = intspec[0]; - *out_flags = IRQ_TYPE_LEVEL_LOW; - - return 0; -} - -static struct irq_host_ops xics_host_ops = { - .match = xics_host_match, - .map = xics_host_map, - .xlate = xics_host_xlate, -}; - -static void __init xics_init_host(void) -{ - if (firmware_has_feature(FW_FEATURE_LPAR)) - xics_irq_chip = &xics_pic_lpar; - else - xics_irq_chip = &xics_pic_direct; - - xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops, - XICS_IRQ_SPURIOUS); - BUG_ON(xics_host == NULL); - irq_set_default_host(xics_host); -} - - -/* Inter-processor interrupt support */ - -#ifdef CONFIG_SMP -/* - * XICS only has a single IPI, so encode the messages per CPU - */ -static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message); - -static inline void smp_xics_do_message(int cpu, int msg) -{ - unsigned long *tgt = &per_cpu(xics_ipi_message, cpu); - - set_bit(msg, tgt); - mb(); - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_qirr_info(cpu, IPI_PRIORITY); - else - direct_qirr_info(cpu, IPI_PRIORITY); -} - -void smp_xics_message_pass(int target, int msg) -{ - unsigned int i; - - if (target < NR_CPUS) { - smp_xics_do_message(target, msg); - } else { - for_each_online_cpu(i) { - if (target == MSG_ALL_BUT_SELF - && i == smp_processor_id()) - continue; - smp_xics_do_message(i, msg); - } - } -} - -static irqreturn_t xics_ipi_dispatch(int cpu) -{ - unsigned long *tgt = &per_cpu(xics_ipi_message, cpu); - - mb(); /* order mmio clearing qirr */ - while (*tgt) { - if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) { - smp_message_recv(PPC_MSG_CALL_FUNCTION); - } - if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) { - smp_message_recv(PPC_MSG_RESCHEDULE); - } - if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) { - smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE); - } -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) - if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) { - smp_message_recv(PPC_MSG_DEBUGGER_BREAK); - } -#endif - } - return IRQ_HANDLED; -} - -static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id) -{ - int cpu = smp_processor_id(); - - direct_qirr_info(cpu, 0xff); - - return xics_ipi_dispatch(cpu); -} - -static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id) -{ - int cpu = smp_processor_id(); - - lpar_qirr_info(cpu, 0xff); - - return xics_ipi_dispatch(cpu); -} - -static void xics_request_ipi(void) -{ - unsigned int ipi; - int rc; - - ipi = irq_create_mapping(xics_host, XICS_IPI); - BUG_ON(ipi == NO_IRQ); - - /* - * IPIs are marked IRQF_DISABLED as they must run with irqs - * disabled - */ - irq_set_handler(ipi, handle_percpu_irq); - if (firmware_has_feature(FW_FEATURE_LPAR)) - rc = request_irq(ipi, xics_ipi_action_lpar, - IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL); - else - rc = request_irq(ipi, xics_ipi_action_direct, - IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL); - BUG_ON(rc); -} - -int __init smp_xics_probe(void) -{ - xics_request_ipi(); - - return cpumask_weight(cpu_possible_mask); -} - -#endif /* CONFIG_SMP */ - - -/* Initialization */ - -static void xics_update_irq_servers(void) -{ - int i, j; - struct device_node *np; - u32 ilen; - const u32 *ireg; - u32 hcpuid; - - /* Find the server numbers for the boot cpu. */ - np = of_get_cpu_node(boot_cpuid, NULL); - BUG_ON(!np); - - ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen); - if (!ireg) { - of_node_put(np); - return; - } - - i = ilen / sizeof(int); - hcpuid = get_hard_smp_processor_id(boot_cpuid); - - /* Global interrupt distribution server is specified in the last - * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last - * entry fom this property for current boot cpu id and use it as - * default distribution server - */ - for (j = 0; j < i; j += 2) { - if (ireg[j] == hcpuid) { - default_server = hcpuid; - default_distrib_server = ireg[j+1]; - } - } - - of_node_put(np); -} - -static void __init xics_map_one_cpu(int hw_id, unsigned long addr, - unsigned long size) -{ - int i; - - /* This may look gross but it's good enough for now, we don't quite - * have a hard -> linux processor id matching. - */ - for_each_possible_cpu(i) { - if (!cpu_present(i)) - continue; - if (hw_id == get_hard_smp_processor_id(i)) { - xics_per_cpu[i] = ioremap(addr, size); - return; - } - } -} - -static void __init xics_init_one_node(struct device_node *np, - unsigned int *indx) -{ - unsigned int ilen; - const u32 *ireg; - - /* This code does the theorically broken assumption that the interrupt - * server numbers are the same as the hard CPU numbers. - * This happens to be the case so far but we are playing with fire... - * should be fixed one of these days. -BenH. - */ - ireg = of_get_property(np, "ibm,interrupt-server-ranges", NULL); - - /* Do that ever happen ? we'll know soon enough... but even good'old - * f80 does have that property .. - */ - WARN_ON(ireg == NULL); - if (ireg) { - /* - * set node starting index for this node - */ - *indx = *ireg; - } - ireg = of_get_property(np, "reg", &ilen); - if (!ireg) - panic("xics_init_IRQ: can't find interrupt reg property"); - - while (ilen >= (4 * sizeof(u32))) { - unsigned long addr, size; - - /* XXX Use proper OF parsing code here !!! */ - addr = (unsigned long)*ireg++ << 32; - ilen -= sizeof(u32); - addr |= *ireg++; - ilen -= sizeof(u32); - size = (unsigned long)*ireg++ << 32; - ilen -= sizeof(u32); - size |= *ireg++; - ilen -= sizeof(u32); - xics_map_one_cpu(*indx, addr, size); - (*indx)++; - } -} - -void __init xics_init_IRQ(void) -{ - struct device_node *np; - u32 indx = 0; - int found = 0; - const u32 *isize; - - ppc64_boot_msg(0x20, "XICS Init"); - - ibm_get_xive = rtas_token("ibm,get-xive"); - ibm_set_xive = rtas_token("ibm,set-xive"); - ibm_int_on = rtas_token("ibm,int-on"); - ibm_int_off = rtas_token("ibm,int-off"); - - for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") { - found = 1; - if (firmware_has_feature(FW_FEATURE_LPAR)) { - of_node_put(np); - break; - } - xics_init_one_node(np, &indx); - } - if (found == 0) - return; - - /* get the bit size of server numbers */ - found = 0; - - for_each_compatible_node(np, NULL, "ibm,ppc-xics") { - isize = of_get_property(np, "ibm,interrupt-server#-size", NULL); - - if (!isize) - continue; - - if (!found) { - interrupt_server_size = *isize; - found = 1; - } else if (*isize != interrupt_server_size) { - printk(KERN_WARNING "XICS: " - "mismatched ibm,interrupt-server#-size\n"); - interrupt_server_size = max(*isize, - interrupt_server_size); - } - } - - xics_update_irq_servers(); - xics_init_host(); - - if (firmware_has_feature(FW_FEATURE_LPAR)) - ppc_md.get_irq = xics_get_irq_lpar; - else - ppc_md.get_irq = xics_get_irq_direct; - - xics_setup_cpu(); - - ppc64_boot_msg(0x21, "XICS Done"); -} - -/* Cpu startup, shutdown, and hotplug */ - -static void xics_set_cpu_priority(unsigned char cppr) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - - /* - * we only really want to set the priority when there's - * just one cppr value on the stack - */ - WARN_ON(os_cppr->index != 0); - - os_cppr->stack[0] = cppr; - - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_cppr_info(cppr); - else - direct_cppr_info(cppr); - iosync(); -} - -/* Have the calling processor join or leave the specified global queue */ -static void xics_set_cpu_giq(unsigned int gserver, unsigned int join) -{ - int index; - int status; - - if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL)) - return; - - index = (1UL << interrupt_server_size) - 1 - gserver; - - status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join); - - WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n", - GLOBAL_INTERRUPT_QUEUE, index, join, status); -} - -void xics_setup_cpu(void) -{ - xics_set_cpu_priority(LOWEST_PRIORITY); - - xics_set_cpu_giq(default_distrib_server, 1); -} - -void xics_teardown_cpu(void) -{ - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - int cpu = smp_processor_id(); - - /* - * we have to reset the cppr index to 0 because we're - * not going to return from the IPI - */ - os_cppr->index = 0; - xics_set_cpu_priority(0); - - /* Clear any pending IPI request */ - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_qirr_info(cpu, 0xff); - else - direct_qirr_info(cpu, 0xff); -} - -void xics_kexec_teardown_cpu(int secondary) -{ - xics_teardown_cpu(); - - /* - * we take the ipi irq but and never return so we - * need to EOI the IPI, but want to leave our priority 0 - * - * should we check all the other interrupts too? - * should we be flagging idle loop instead? - * or creating some task to be scheduled? - */ - - if (firmware_has_feature(FW_FEATURE_LPAR)) - lpar_xirr_info_set((0x00 << 24) | XICS_IPI); - else - direct_xirr_info_set((0x00 << 24) | XICS_IPI); - - /* - * Some machines need to have at least one cpu in the GIQ, - * so leave the master cpu in the group. - */ - if (secondary) - xics_set_cpu_giq(default_distrib_server, 0); -} - -#ifdef CONFIG_HOTPLUG_CPU - -/* Interrupts are disabled. */ -void xics_migrate_irqs_away(void) -{ - int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id(); - unsigned int irq, virq; - - /* If we used to be the default server, move to the new "boot_cpuid" */ - if (hw_cpu == default_server) - xics_update_irq_servers(); - - /* Reject any interrupt that was queued to us... */ - xics_set_cpu_priority(0); - - /* Remove ourselves from the global interrupt queue */ - xics_set_cpu_giq(default_distrib_server, 0); - - /* Allow IPIs again... */ - xics_set_cpu_priority(DEFAULT_PRIORITY); - - for_each_irq(virq) { - struct irq_desc *desc; - struct irq_chip *chip; - int xics_status[2]; - int status; - unsigned long flags; - - /* We cant set affinity on ISA interrupts */ - if (virq < NUM_ISA_INTERRUPTS) - continue; - if (irq_map[virq].host != xics_host) - continue; - irq = (unsigned int)irq_map[virq].hwirq; - /* We need to get IPIs still. */ - if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) - continue; - - desc = irq_to_desc(virq); - - /* We only need to migrate enabled IRQS */ - if (desc == NULL || desc->action == NULL) - continue; - - chip = irq_desc_get_chip(desc); - if (chip == NULL || chip->irq_set_affinity == NULL) - continue; - - raw_spin_lock_irqsave(&desc->lock, flags); - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); - if (status) { - printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", - __func__, irq, status); - goto unlock; - } - - /* - * We only support delivery to all cpus or to one cpu. - * The irq has to be migrated only in the single cpu - * case. - */ - if (xics_status[0] != hw_cpu) - goto unlock; - - /* This is expected during cpu offline. */ - if (cpu_online(cpu)) - printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n", - virq, cpu); - - /* Reset affinity to all cpus */ - cpumask_setall(desc->irq_data.affinity); - chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true); -unlock: - raw_spin_unlock_irqrestore(&desc->lock, flags); - } -} -#endif diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h deleted file mode 100644 index d1d5a83039ae..000000000000 --- a/arch/powerpc/platforms/pseries/xics.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * arch/powerpc/platforms/pseries/xics.h - * - * Copyright 2000 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _POWERPC_KERNEL_XICS_H -#define _POWERPC_KERNEL_XICS_H - -extern void xics_init_IRQ(void); -extern void xics_setup_cpu(void); -extern void xics_teardown_cpu(void); -extern void xics_kexec_teardown_cpu(int secondary); -extern void xics_migrate_irqs_away(void); -extern int smp_xics_probe(void); -extern void smp_xics_message_pass(int target, int msg); - -#endif /* _POWERPC_KERNEL_XICS_H */ |