diff options
Diffstat (limited to 'kernel')
32 files changed, 1314 insertions, 337 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index c3c9a0e1b3c9..8d4e85eae42c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4265,6 +4265,11 @@ static void kill_css(struct cgroup_subsys_state *css) { lockdep_assert_held(&cgroup_mutex); + if (css->flags & CSS_DYING) + return; + + css->flags |= CSS_DYING; + /* * This must happen before css is disassociated with its cgroup. * See seq_css() for details. diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f6501f4f6040..ae643412948a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -176,9 +176,9 @@ typedef enum { } cpuset_flagbits_t; /* convenient tests for these bits */ -static inline bool is_cpuset_online(const struct cpuset *cs) +static inline bool is_cpuset_online(struct cpuset *cs) { - return test_bit(CS_ONLINE, &cs->flags); + return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); } static inline int is_cpu_exclusive(const struct cpuset *cs) diff --git a/kernel/cpu.c b/kernel/cpu.c index 9ae6fbe5b5cf..b86b32ebb3b2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1252,6 +1252,11 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup.single = smpboot_unpark_threads, .teardown.single = NULL, }, + [CPUHP_AP_IRQ_AFFINITY_ONLINE] = { + .name = "irq/affinity:online", + .startup.single = irq_affinity_online_cpu, + .teardown.single = NULL, + }, [CPUHP_AP_PERF_ONLINE] = { .name = "perf:online", .startup.single = perf_event_init_cpu, @@ -1658,13 +1663,13 @@ static ssize_t write_cpuhp_target(struct device *dev, ret = !sp->name || sp->cant_stop ? -EINVAL : 0; mutex_unlock(&cpuhp_state_mutex); if (ret) - return ret; + goto out; if (st->state < target) ret = do_cpu_up(dev->id, target); else ret = do_cpu_down(dev->id, target); - +out: unlock_device_hotplug(); return ret ? ret : count; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 6e75a5c9412d..6c4e523dc1e2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7316,6 +7316,21 @@ int perf_event_account_interrupt(struct perf_event *event) return __perf_event_account_interrupt(event, 1); } +static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) +{ + /* + * Due to interrupt latency (AKA "skid"), we may enter the + * kernel before taking an overflow, even if the PMU is only + * counting user events. + * To avoid leaking information to userspace, we must always + * reject kernel samples when exclude_kernel is set. + */ + if (event->attr.exclude_kernel && !user_mode(regs)) + return false; + + return true; +} + /* * Generic event overflow handling, sampling. */ @@ -7337,6 +7352,12 @@ static int __perf_event_overflow(struct perf_event *event, ret = __perf_event_account_interrupt(event, throttle); /* + * For security, drop the skid kernel samples if necessary. + */ + if (!sample_is_allowed(event, regs)) + return ret; + + /* * XXX event_limit might not quite work as expected on inherited * events */ diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 3bbfd6a9c475..fcbb1d6d51cb 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -21,6 +21,10 @@ config GENERIC_IRQ_SHOW config GENERIC_IRQ_SHOW_LEVEL bool +# Supports effective affinity mask +config GENERIC_IRQ_EFFECTIVE_AFF_MASK + bool + # Facility to allocate a hardware interrupt. This is legacy support # and should not be used in new code. Use irq domains instead. config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ @@ -108,4 +112,15 @@ config SPARSE_IRQ If you don't know what to do here, say N. +config GENERIC_IRQ_DEBUGFS + bool "Expose irq internals in debugfs" + depends on DEBUG_FS + default n + ---help--- + + Exposes internal state information through debugfs. Mostly for + developers and debugging of hard to diagnose interrupt problems. + + If you don't know what to do here, say N. + endmenu diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 1d3ee3169202..c61fc9c2d1f7 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_PM_SLEEP) += pm.o obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o obj-$(CONFIG_SMP) += affinity.o +obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index e2d356dd7581..d2747f9c5707 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -1,4 +1,7 @@ - +/* + * Copyright (C) 2016 Thomas Gleixner. + * Copyright (C) 2016-2017 Christoph Hellwig. + */ #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/slab.h> @@ -35,13 +38,54 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, } } -static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk) +static cpumask_var_t *alloc_node_to_present_cpumask(void) +{ + cpumask_var_t *masks; + int node; + + masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); + if (!masks) + return NULL; + + for (node = 0; node < nr_node_ids; node++) { + if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL)) + goto out_unwind; + } + + return masks; + +out_unwind: + while (--node >= 0) + free_cpumask_var(masks[node]); + kfree(masks); + return NULL; +} + +static void free_node_to_present_cpumask(cpumask_var_t *masks) +{ + int node; + + for (node = 0; node < nr_node_ids; node++) + free_cpumask_var(masks[node]); + kfree(masks); +} + +static void build_node_to_present_cpumask(cpumask_var_t *masks) +{ + int cpu; + + for_each_present_cpu(cpu) + cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); +} + +static int get_nodes_in_cpumask(cpumask_var_t *node_to_present_cpumask, + const struct cpumask *mask, nodemask_t *nodemsk) { int n, nodes = 0; /* Calculate the number of nodes in the supplied affinity mask */ - for_each_online_node(n) { - if (cpumask_intersects(mask, cpumask_of_node(n))) { + for_each_node(n) { + if (cpumask_intersects(mask, node_to_present_cpumask[n])) { node_set(n, *nodemsk); nodes++; } @@ -64,7 +108,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) int last_affv = affv + affd->pre_vectors; nodemask_t nodemsk = NODE_MASK_NONE; struct cpumask *masks; - cpumask_var_t nmsk; + cpumask_var_t nmsk, *node_to_present_cpumask; if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return NULL; @@ -73,13 +117,19 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) if (!masks) goto out; + node_to_present_cpumask = alloc_node_to_present_cpumask(); + if (!node_to_present_cpumask) + goto out; + /* Fill out vectors at the beginning that don't need affinity */ for (curvec = 0; curvec < affd->pre_vectors; curvec++) cpumask_copy(masks + curvec, irq_default_affinity); /* Stabilize the cpumasks */ get_online_cpus(); - nodes = get_nodes_in_cpumask(cpu_online_mask, &nodemsk); + build_node_to_present_cpumask(node_to_present_cpumask); + nodes = get_nodes_in_cpumask(node_to_present_cpumask, cpu_present_mask, + &nodemsk); /* * If the number of nodes in the mask is greater than or equal the @@ -87,7 +137,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) */ if (affv <= nodes) { for_each_node_mask(n, nodemsk) { - cpumask_copy(masks + curvec, cpumask_of_node(n)); + cpumask_copy(masks + curvec, + node_to_present_cpumask[n]); if (++curvec == last_affv) break; } @@ -101,7 +152,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes; /* Get the cpus on this node which are in the mask */ - cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n)); + cpumask_and(nmsk, cpu_present_mask, node_to_present_cpumask[n]); /* Calculate the number of cpus per vector */ ncpus = cpumask_weight(nmsk); @@ -133,6 +184,7 @@ done: /* Fill out vectors at the end that don't need affinity */ for (; curvec < nvecs; curvec++) cpumask_copy(masks + curvec, irq_default_affinity); + free_node_to_present_cpumask(node_to_present_cpumask); out: free_cpumask_var(nmsk); return masks; @@ -147,12 +199,10 @@ int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd) { int resv = affd->pre_vectors + affd->post_vectors; int vecs = maxvec - resv; - int cpus; + int ret; - /* Stabilize the cpumasks */ get_online_cpus(); - cpus = cpumask_weight(cpu_online_mask); + ret = min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv; put_online_cpus(); - - return min(cpus, vecs) + resv; + return ret; } diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 0119b9d467ae..d30a0dd5cc02 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -53,7 +53,7 @@ unsigned long probe_irq_on(void) if (desc->irq_data.chip->irq_set_type) desc->irq_data.chip->irq_set_type(&desc->irq_data, IRQ_TYPE_PROBE); - irq_startup(desc, false); + irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE); } raw_spin_unlock_irq(&desc->lock); } @@ -70,7 +70,7 @@ unsigned long probe_irq_on(void) raw_spin_lock_irq(&desc->lock); if (!desc->action && irq_settings_can_probe(desc)) { desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; - if (irq_startup(desc, false)) + if (irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE)) desc->istate |= IRQS_PENDING; } raw_spin_unlock_irq(&desc->lock); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c94da688ee9b..fc89eeb8a6b4 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -185,37 +185,131 @@ static void irq_state_set_masked(struct irq_desc *desc) irqd_set(&desc->irq_data, IRQD_IRQ_MASKED); } -int irq_startup(struct irq_desc *desc, bool resend) +static void irq_state_clr_started(struct irq_desc *desc) { - int ret = 0; + irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED); +} - irq_state_clr_disabled(desc); - desc->depth = 0; +static void irq_state_set_started(struct irq_desc *desc) +{ + irqd_set(&desc->irq_data, IRQD_IRQ_STARTED); +} + +enum { + IRQ_STARTUP_NORMAL, + IRQ_STARTUP_MANAGED, + IRQ_STARTUP_ABORT, +}; + +#ifdef CONFIG_SMP +static int +__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + + if (!irqd_affinity_is_managed(d)) + return IRQ_STARTUP_NORMAL; + + irqd_clr_managed_shutdown(d); - irq_domain_activate_irq(&desc->irq_data); - if (desc->irq_data.chip->irq_startup) { - ret = desc->irq_data.chip->irq_startup(&desc->irq_data); + if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) { + /* + * Catch code which fiddles with enable_irq() on a managed + * and potentially shutdown IRQ. Chained interrupt + * installment or irq auto probing should not happen on + * managed irqs either. Emit a warning, break the affinity + * and start it up as a normal interrupt. + */ + if (WARN_ON_ONCE(force)) + return IRQ_STARTUP_NORMAL; + /* + * The interrupt was requested, but there is no online CPU + * in it's affinity mask. Put it into managed shutdown + * state and let the cpu hotplug mechanism start it up once + * a CPU in the mask becomes available. + */ + irqd_set_managed_shutdown(d); + return IRQ_STARTUP_ABORT; + } + return IRQ_STARTUP_MANAGED; +} +#else +static int +__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) +{ + return IRQ_STARTUP_NORMAL; +} +#endif + +static int __irq_startup(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + int ret = 0; + + irq_domain_activate_irq(d); + if (d->chip->irq_startup) { + ret = d->chip->irq_startup(d); + irq_state_clr_disabled(desc); irq_state_clr_masked(desc); } else { irq_enable(desc); } + irq_state_set_started(desc); + return ret; +} + +int irq_startup(struct irq_desc *desc, bool resend, bool force) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct cpumask *aff = irq_data_get_affinity_mask(d); + int ret = 0; + + desc->depth = 0; + + if (irqd_is_started(d)) { + irq_enable(desc); + } else { + switch (__irq_startup_managed(desc, aff, force)) { + case IRQ_STARTUP_NORMAL: + ret = __irq_startup(desc); + irq_setup_affinity(desc); + break; + case IRQ_STARTUP_MANAGED: + ret = __irq_startup(desc); + irq_set_affinity_locked(d, aff, false); + break; + case IRQ_STARTUP_ABORT: + return 0; + } + } if (resend) check_irq_resend(desc); + return ret; } +static void __irq_disable(struct irq_desc *desc, bool mask); + void irq_shutdown(struct irq_desc *desc) { - irq_state_set_disabled(desc); - desc->depth = 1; - if (desc->irq_data.chip->irq_shutdown) - desc->irq_data.chip->irq_shutdown(&desc->irq_data); - else if (desc->irq_data.chip->irq_disable) - desc->irq_data.chip->irq_disable(&desc->irq_data); - else - desc->irq_data.chip->irq_mask(&desc->irq_data); + if (irqd_is_started(&desc->irq_data)) { + desc->depth = 1; + if (desc->irq_data.chip->irq_shutdown) { + desc->irq_data.chip->irq_shutdown(&desc->irq_data); + irq_state_set_disabled(desc); + irq_state_set_masked(desc); + } else { + __irq_disable(desc, true); + } + irq_state_clr_started(desc); + } + /* + * This must be called even if the interrupt was never started up, + * because the activation can happen before the interrupt is + * available for request/startup. It has it's own state tracking so + * it's safe to call it unconditionally. + */ irq_domain_deactivate_irq(&desc->irq_data); - irq_state_set_masked(desc); } void irq_enable(struct irq_desc *desc) @@ -228,6 +322,17 @@ void irq_enable(struct irq_desc *desc) irq_state_clr_masked(desc); } +static void __irq_disable(struct irq_desc *desc, bool mask) +{ + irq_state_set_disabled(desc); + if (desc->irq_data.chip->irq_disable) { + desc->irq_data.chip->irq_disable(&desc->irq_data); + irq_state_set_masked(desc); + } else if (mask) { + mask_irq(desc); + } +} + /** * irq_disable - Mark interrupt disabled * @desc: irq descriptor which should be disabled @@ -250,13 +355,7 @@ void irq_enable(struct irq_desc *desc) */ void irq_disable(struct irq_desc *desc) { - irq_state_set_disabled(desc); - if (desc->irq_data.chip->irq_disable) { - desc->irq_data.chip->irq_disable(&desc->irq_data); - irq_state_set_masked(desc); - } else if (irq_settings_disable_unlazy(desc)) { - mask_irq(desc); - } + __irq_disable(desc, irq_settings_disable_unlazy(desc)); } void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu) @@ -851,7 +950,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); desc->action = &chained_action; - irq_startup(desc, true); + irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); } } @@ -903,6 +1002,13 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) if (!desc) return; + + /* + * Warn when a driver sets the no autoenable flag on an already + * active interrupt. + */ + WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN)); + irq_settings_clr_and_set(desc, clr, set); irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 011f8c4c63da..aee8f7ec40af 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -14,37 +14,99 @@ #include "internals.h" +/* For !GENERIC_IRQ_EFFECTIVE_AFF_MASK this looks at general affinity mask */ +static inline bool irq_needs_fixup(struct irq_data *d) +{ + const struct cpumask *m = irq_data_get_effective_affinity_mask(d); + + return cpumask_test_cpu(smp_processor_id(), m); +} + static bool migrate_one_irq(struct irq_desc *desc) { struct irq_data *d = irq_desc_get_irq_data(desc); - const struct cpumask *affinity = d->common->affinity; - struct irq_chip *c; - bool ret = false; + struct irq_chip *chip = irq_data_get_irq_chip(d); + bool maskchip = !irq_can_move_pcntxt(d) && !irqd_irq_masked(d); + const struct cpumask *affinity; + bool brokeaff = false; + int err; /* - * If this is a per-CPU interrupt, or the affinity does not - * include this CPU, then we have nothing to do. + * IRQ chip might be already torn down, but the irq descriptor is + * still in the radix tree. Also if the chip has no affinity setter, + * nothing can be done here. */ - if (irqd_is_per_cpu(d) || - !cpumask_test_cpu(smp_processor_id(), affinity)) + if (!chip || !chip->irq_set_affinity) { + pr_debug("IRQ %u: Unable to migrate away\n", d->irq); return false; + } + + /* + * No move required, if: + * - Interrupt is per cpu + * - Interrupt is not started + * - Affinity mask does not include this CPU. + * + * Note: Do not check desc->action as this might be a chained + * interrupt. + */ + if (irqd_is_per_cpu(d) || !irqd_is_started(d) || !irq_needs_fixup(d)) { + /* + * If an irq move is pending, abort it if the dying CPU is + * the sole target. + */ + irq_fixup_move_pending(desc, false); + return false; + } + + /* + * Complete an eventually pending irq move cleanup. If this + * interrupt was moved in hard irq context, then the vectors need + * to be cleaned up. It can't wait until this interrupt actually + * happens and this CPU was involved. + */ + irq_force_complete_move(desc); + + /* + * If there is a setaffinity pending, then try to reuse the pending + * mask, so the last change of the affinity does not get lost. If + * there is no move pending or the pending mask does not contain + * any online CPU, use the current affinity mask. + */ + if (irq_fixup_move_pending(desc, true)) + affinity = irq_desc_get_pending_mask(desc); + else + affinity = irq_data_get_affinity_mask(d); + + /* Mask the chip for interrupts which cannot move in process context */ + if (maskchip && chip->irq_mask) + chip->irq_mask(d); if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + /* + * If the interrupt is managed, then shut it down and leave + * the affinity untouched. + */ + if (irqd_affinity_is_managed(d)) { + irqd_set_managed_shutdown(d); + irq_shutdown(desc); + return false; + } affinity = cpu_online_mask; - ret = true; + brokeaff = true; } - c = irq_data_get_irq_chip(d); - if (!c->irq_set_affinity) { - pr_debug("IRQ%u: unable to set affinity\n", d->irq); - } else { - int r = irq_do_set_affinity(d, affinity, false); - if (r) - pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", - d->irq, r); + err = irq_do_set_affinity(d, affinity, true); + if (err) { + pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", + d->irq, err); + brokeaff = false; } - return ret; + if (maskchip && chip->irq_unmask) + chip->irq_unmask(d); + + return brokeaff; } /** @@ -59,11 +121,8 @@ static bool migrate_one_irq(struct irq_desc *desc) */ void irq_migrate_all_off_this_cpu(void) { - unsigned int irq; struct irq_desc *desc; - unsigned long flags; - - local_irq_save(flags); + unsigned int irq; for_each_active_irq(irq) { bool affinity_broken; @@ -73,10 +132,53 @@ void irq_migrate_all_off_this_cpu(void) affinity_broken = migrate_one_irq(desc); raw_spin_unlock(&desc->lock); - if (affinity_broken) - pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", + if (affinity_broken) { + pr_warn_ratelimited("IRQ %u: no longer affine to CPU%u\n", irq, smp_processor_id()); + } + } +} + +static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) +{ + struct irq_data *data = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = irq_data_get_affinity_mask(data); + + if (!irqd_affinity_is_managed(data) || !desc->action || + !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) + return; + + if (irqd_is_managed_and_shutdown(data)) { + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + return; + } + + /* + * If the interrupt can only be directed to a single target + * CPU then it is already assigned to a CPU in the affinity + * mask. No point in trying to move it around. + */ + if (!irqd_is_single_target(data)) + irq_set_affinity_locked(data, affinity, false); +} + +/** + * irq_affinity_online_cpu - Restore affinity for managed interrupts + * @cpu: Upcoming CPU for which interrupts should be restored + */ +int irq_affinity_online_cpu(unsigned int cpu) +{ + struct irq_desc *desc; + unsigned int irq; + + irq_lock_sparse(); + for_each_active_irq(irq) { + desc = irq_to_desc(irq); + raw_spin_lock_irq(&desc->lock); + irq_restore_affinity_of_irq(desc, cpu); + raw_spin_unlock_irq(&desc->lock); } + irq_unlock_sparse(); - local_irq_restore(flags); + return 0; } diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c new file mode 100644 index 000000000000..dbd6e78db213 --- /dev/null +++ b/kernel/irq/debugfs.c @@ -0,0 +1,220 @@ +/* + * Copyright 2017 Thomas Gleixner <tglx@linutronix.de> + * + * This file is licensed under the GPL V2. + */ +#include <linux/debugfs.h> +#include <linux/irqdomain.h> +#include <linux/irq.h> + +#include "internals.h" + +static struct dentry *irq_dir; + +struct irq_bit_descr { + unsigned int mask; + char *name; +}; +#define BIT_MASK_DESCR(m) { .mask = m, .name = #m } + +static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, + const struct irq_bit_descr *sd, int size) +{ + int i; + + for (i = 0; i < size; i++, sd++) { + if (state & sd->mask) + seq_printf(m, "%*s%s\n", ind + 12, "", sd->name); + } +} + +#ifdef CONFIG_SMP +static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) +{ + struct irq_data *data = irq_desc_get_irq_data(desc); + struct cpumask *msk; + + msk = irq_data_get_affinity_mask(data); + seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk)); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + msk = irq_data_get_effective_affinity_mask(data); + seq_printf(m, "effectiv: %*pbl\n", cpumask_pr_args(msk)); +#endif +#ifdef CONFIG_GENERIC_PENDING_IRQ + msk = desc->pending_mask; + seq_printf(m, "pending: %*pbl\n", cpumask_pr_args(msk)); +#endif +} +#else +static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) { } +#endif + +static const struct irq_bit_descr irqchip_flags[] = { + BIT_MASK_DESCR(IRQCHIP_SET_TYPE_MASKED), + BIT_MASK_DESCR(IRQCHIP_EOI_IF_HANDLED), + BIT_MASK_DESCR(IRQCHIP_MASK_ON_SUSPEND), + BIT_MASK_DESCR(IRQCHIP_ONOFFLINE_ENABLED), + BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE), + BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE), + BIT_MASK_DESCR(IRQCHIP_EOI_THREADED), +}; + +static void +irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind) +{ + struct irq_chip *chip = data->chip; + + if (!chip) { + seq_printf(m, "chip: None\n"); + return; + } + seq_printf(m, "%*schip: %s\n", ind, "", chip->name); + seq_printf(m, "%*sflags: 0x%lx\n", ind + 1, "", chip->flags); + irq_debug_show_bits(m, ind, chip->flags, irqchip_flags, + ARRAY_SIZE(irqchip_flags)); +} + +static void +irq_debug_show_data(struct seq_file *m, struct irq_data *data, int ind) +{ + seq_printf(m, "%*sdomain: %s\n", ind, "", + data->domain ? data->domain->name : ""); + seq_printf(m, "%*shwirq: 0x%lx\n", ind + 1, "", data->hwirq); + irq_debug_show_chip(m, data, ind + 1); +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + if (!data->parent_data) + return; + seq_printf(m, "%*sparent:\n", ind + 1, ""); + irq_debug_show_data(m, data->parent_data, ind + 4); +#endif +} + +static const struct irq_bit_descr irqdata_states[] = { + BIT_MASK_DESCR(IRQ_TYPE_EDGE_RISING), + BIT_MASK_DESCR(IRQ_TYPE_EDGE_FALLING), + BIT_MASK_DESCR(IRQ_TYPE_LEVEL_HIGH), + BIT_MASK_DESCR(IRQ_TYPE_LEVEL_LOW), + BIT_MASK_DESCR(IRQD_LEVEL), + + BIT_MASK_DESCR(IRQD_ACTIVATED), + BIT_MASK_DESCR(IRQD_IRQ_STARTED), + BIT_MASK_DESCR(IRQD_IRQ_DISABLED), + BIT_MASK_DESCR(IRQD_IRQ_MASKED), + BIT_MASK_DESCR(IRQD_IRQ_INPROGRESS), + + BIT_MASK_DESCR(IRQD_PER_CPU), + BIT_MASK_DESCR(IRQD_NO_BALANCING), + + BIT_MASK_DESCR(IRQD_SINGLE_TARGET), + BIT_MASK_DESCR(IRQD_MOVE_PCNTXT), + BIT_MASK_DESCR(IRQD_AFFINITY_SET), + BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), + BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), + BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), + + BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), + + BIT_MASK_DESCR(IRQD_WAKEUP_STATE), + BIT_MASK_DESCR(IRQD_WAKEUP_ARMED), +}; + +static const struct irq_bit_descr irqdesc_states[] = { + BIT_MASK_DESCR(_IRQ_NOPROBE), + BIT_MASK_DESCR(_IRQ_NOREQUEST), + BIT_MASK_DESCR(_IRQ_NOTHREAD), + BIT_MASK_DESCR(_IRQ_NOAUTOEN), + BIT_MASK_DESCR(_IRQ_NESTED_THREAD), + BIT_MASK_DESCR(_IRQ_PER_CPU_DEVID), + BIT_MASK_DESCR(_IRQ_IS_POLLED), + BIT_MASK_DESCR(_IRQ_DISABLE_UNLAZY), +}; + +static const struct irq_bit_descr irqdesc_istates[] = { + BIT_MASK_DESCR(IRQS_AUTODETECT), + BIT_MASK_DESCR(IRQS_SPURIOUS_DISABLED), + BIT_MASK_DESCR(IRQS_POLL_INPROGRESS), + BIT_MASK_DESCR(IRQS_ONESHOT), + BIT_MASK_DESCR(IRQS_REPLAY), + BIT_MASK_DESCR(IRQS_WAITING), + BIT_MASK_DESCR(IRQS_PENDING), + BIT_MASK_DESCR(IRQS_SUSPENDED), +}; + + +static int irq_debug_show(struct seq_file *m, void *p) +{ + struct irq_desc *desc = m->private; + struct irq_data *data; + + raw_spin_lock_irq(&desc->lock); + data = irq_desc_get_irq_data(desc); + seq_printf(m, "handler: %pf\n", desc->handle_irq); + seq_printf(m, "status: 0x%08x\n", desc->status_use_accessors); + irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states, + ARRAY_SIZE(irqdesc_states)); + seq_printf(m, "istate: 0x%08x\n", desc->istate); + irq_debug_show_bits(m, 0, desc->istate, irqdesc_istates, + ARRAY_SIZE(irqdesc_istates)); + seq_printf(m, "ddepth: %u\n", desc->depth); + seq_printf(m, "wdepth: %u\n", desc->wake_depth); + seq_printf(m, "dstate: 0x%08x\n", irqd_get(data)); + irq_debug_show_bits(m, 0, irqd_get(data), irqdata_states, + ARRAY_SIZE(irqdata_states)); + seq_printf(m, "node: %d\n", irq_data_get_node(data)); + irq_debug_show_masks(m, desc); + irq_debug_show_data(m, data, 0); + raw_spin_unlock_irq(&desc->lock); + return 0; +} + +static int irq_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_debug_show, inode->i_private); +} + +static const struct file_operations dfs_irq_ops = { + .open = irq_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc) +{ + char name [10]; + + if (!irq_dir || !desc || desc->debugfs_file) + return; + + sprintf(name, "%d", irq); + desc->debugfs_file = debugfs_create_file(name, 0444, irq_dir, desc, + &dfs_irq_ops); +} + +void irq_remove_debugfs_entry(struct irq_desc *desc) +{ + if (desc->debugfs_file) + debugfs_remove(desc->debugfs_file); +} + +static int __init irq_debugfs_init(void) +{ + struct dentry *root_dir; + int irq; + + root_dir = debugfs_create_dir("irq", NULL); + if (!root_dir) + return -ENOMEM; + + irq_domain_debugfs_init(root_dir); + + irq_dir = debugfs_create_dir("irqs", root_dir); + + irq_lock_sparse(); + for_each_active_irq(irq) + irq_add_debugfs_entry(irq, irq_to_desc(irq)); + irq_unlock_sparse(); + + return 0; +} +__initcall(irq_debugfs_init); diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 1613bfd48365..194c506d9d20 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -4,6 +4,8 @@ #include <linux/gfp.h> #include <linux/irq.h> +#include "internals.h" + /* * Device resource management aware IRQ request/free implementation. */ @@ -198,3 +200,87 @@ int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from, return base; } EXPORT_SYMBOL_GPL(__devm_irq_alloc_descs); + +#ifdef CONFIG_GENERIC_IRQ_CHIP +/** + * devm_irq_alloc_generic_chip - Allocate and initialize a generic chip + * for a managed device + * @dev: Device to allocate the generic chip for + * @name: Name of the irq chip + * @num_ct: Number of irq_chip_type instances associated with this + * @irq_base: Interrupt base nr for this chip + * @reg_base: Register base address (virtual) + * @handler: Default flow handler associated with this chip + * + * Returns an initialized irq_chip_generic structure. The chip defaults + * to the primary (index 0) irq_chip_type and @handler + */ +struct irq_chip_generic * +devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct, + unsigned int irq_base, void __iomem *reg_base, + irq_flow_handler_t handler) +{ + struct irq_chip_generic *gc; + unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); + + gc = devm_kzalloc(dev, sz, GFP_KERNEL); + if (gc) + irq_init_generic_chip(gc, name, num_ct, + irq_base, reg_base, handler); + + return gc; +} +EXPORT_SYMBOL_GPL(devm_irq_alloc_generic_chip); + +struct irq_generic_chip_devres { + struct irq_chip_generic *gc; + u32 msk; + unsigned int clr; + unsigned int set; +}; + +static void devm_irq_remove_generic_chip(struct device *dev, void *res) +{ + struct irq_generic_chip_devres *this = res; + + irq_remove_generic_chip(this->gc, this->msk, this->clr, this->set); +} + +/** + * devm_irq_setup_generic_chip - Setup a range of interrupts with a generic + * chip for a managed device + * + * @dev: Device to setup the generic chip for + * @gc: Generic irq chip holding all data + * @msk: Bitmask holding the irqs to initialize relative to gc->irq_base + * @flags: Flags for initialization + * @clr: IRQ_* bits to clear + * @set: IRQ_* bits to set + * + * Set up max. 32 interrupts starting from gc->irq_base. Note, this + * initializes all interrupts to the primary irq_chip_type and its + * associated handler. + */ +int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, + u32 msk, enum irq_gc_flags flags, + unsigned int clr, unsigned int set) +{ + struct irq_generic_chip_devres *dr; + + dr = devres_alloc(devm_irq_remove_generic_chip, + sizeof(*dr), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + irq_setup_generic_chip(gc, msk, flags, clr, set); + + dr->gc = gc; + dr->msk = msk; + dr->clr = clr; + dr->set = set; + devres_add(dev, dr); + + return 0; +} +EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip); +#endif /* CONFIG_GENERIC_IRQ_CHIP */ diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index ee32870079c9..f7086b78ad6e 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -201,10 +201,9 @@ static void irq_writel_be(u32 val, void __iomem *addr) iowrite32be(val, addr); } -static void -irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, - int num_ct, unsigned int irq_base, - void __iomem *reg_base, irq_flow_handler_t handler) +void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, + int num_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler) { raw_spin_lock_init(&gc->lock); gc->num_ct = num_ct; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index bc226e783bd2..5fd105e252c3 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -66,7 +66,14 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned long flags); extern void __disable_irq(struct irq_desc *desc); extern void __enable_irq(struct irq_desc *desc); -extern int irq_startup(struct irq_desc *desc, bool resend); +#define IRQ_RESEND true +#define IRQ_NORESEND false + +#define IRQ_START_FORCE true +#define IRQ_START_COND false + +extern int irq_startup(struct irq_desc *desc, bool resend, bool force); + extern void irq_shutdown(struct irq_desc *desc); extern void irq_enable(struct irq_desc *desc); extern void irq_disable(struct irq_desc *desc); @@ -109,13 +116,19 @@ static inline void unregister_handler_proc(unsigned int irq, extern bool irq_can_set_affinity_usr(unsigned int irq); -extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask); +extern int irq_select_affinity_usr(unsigned int irq); extern void irq_set_thread_affinity(struct irq_desc *desc); extern int irq_do_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force); +#ifdef CONFIG_SMP +extern int irq_setup_affinity(struct irq_desc *desc); +#else +static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; } +#endif + /* Inline functions for support of irq chips on slow busses */ static inline void chip_bus_lock(struct irq_desc *desc) { @@ -169,6 +182,11 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags) #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) +static inline unsigned int irqd_get(struct irq_data *d) +{ + return __irqd_to_state(d); +} + /* * Manipulation functions for irq_data.state */ @@ -182,6 +200,16 @@ static inline void irqd_clr_move_pending(struct irq_data *d) __irqd_to_state(d) &= ~IRQD_SETAFFINITY_PENDING; } +static inline void irqd_set_managed_shutdown(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_MANAGED_SHUTDOWN; +} + +static inline void irqd_clr_managed_shutdown(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_MANAGED_SHUTDOWN; +} + static inline void irqd_clear(struct irq_data *d, unsigned int mask) { __irqd_to_state(d) &= ~mask; @@ -226,3 +254,82 @@ irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { } static inline void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { } #endif + +#ifdef CONFIG_GENERIC_IRQ_CHIP +void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, + int num_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler); +#else +static inline void +irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, + int num_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler) { } +#endif /* CONFIG_GENERIC_IRQ_CHIP */ + +#ifdef CONFIG_GENERIC_PENDING_IRQ +static inline bool irq_can_move_pcntxt(struct irq_data *data) +{ + return irqd_can_move_in_process_context(data); +} +static inline bool irq_move_pending(struct irq_data *data) +{ + return irqd_is_setaffinity_pending(data); +} +static inline void +irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) +{ + cpumask_copy(desc->pending_mask, mask); +} +static inline void +irq_get_pending(struct cpumask *mask, struct irq_desc *desc) +{ + cpumask_copy(mask, desc->pending_mask); +} +static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) +{ + return desc->pending_mask; +} +bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); +#else /* CONFIG_GENERIC_PENDING_IRQ */ +static inline bool irq_can_move_pcntxt(struct irq_data *data) +{ + return true; +} +static inline bool irq_move_pending(struct irq_data *data) +{ + return false; +} +static inline void +irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) +{ +} +static inline void +irq_get_pending(struct cpumask *mask, struct irq_desc *desc) +{ +} +static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) +{ + return NULL; +} +static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) +{ + return false; +} +#endif /* !CONFIG_GENERIC_PENDING_IRQ */ + +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc); +void irq_remove_debugfs_entry(struct irq_desc *desc); +# ifdef CONFIG_IRQ_DOMAIN +void irq_domain_debugfs_init(struct dentry *root); +# else +static inline void irq_domain_debugfs_init(struct dentry *root); +# endif +#else /* CONFIG_GENERIC_IRQ_DEBUGFS */ +static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d) +{ +} +static inline void irq_remove_debugfs_entry(struct irq_desc *d) +{ +} +#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */ diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 00bb0aeea1d0..35a95fadcfda 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -54,14 +54,25 @@ static void __init init_irq_default_affinity(void) #endif #ifdef CONFIG_SMP -static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) +static int alloc_masks(struct irq_desc *desc, int node) { if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity, - gfp, node)) + GFP_KERNEL, node)) return -ENOMEM; +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + if (!zalloc_cpumask_var_node(&desc->irq_common_data.effective_affinity, + GFP_KERNEL, node)) { + free_cpumask_var(desc->irq_common_data.affinity); + return -ENOMEM; + } +#endif + #ifdef CONFIG_GENERIC_PENDING_IRQ - if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { + if (!zalloc_cpumask_var_node(&desc->pending_mask, GFP_KERNEL, node)) { +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + free_cpumask_var(desc->irq_common_data.effective_affinity); +#endif free_cpumask_var(desc->irq_common_data.affinity); return -ENOMEM; } @@ -86,7 +97,7 @@ static void desc_smp_init(struct irq_desc *desc, int node, #else static inline int -alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; } +alloc_masks(struct irq_desc *desc, int node) { return 0; } static inline void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { } #endif @@ -324,6 +335,9 @@ static void free_masks(struct irq_desc *desc) free_cpumask_var(desc->pending_mask); #endif free_cpumask_var(desc->irq_common_data.affinity); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + free_cpumask_var(desc->irq_common_data.effective_affinity); +#endif } #else static inline void free_masks(struct irq_desc *desc) { } @@ -344,9 +358,8 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, struct module *owner) { struct irq_desc *desc; - gfp_t gfp = GFP_KERNEL; - desc = kzalloc_node(sizeof(*desc), gfp, node); + desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node); if (!desc) return NULL; /* allocate based on nr_cpu_ids */ @@ -354,7 +367,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, if (!desc->kstat_irqs) goto err_desc; - if (alloc_masks(desc, gfp, node)) + if (alloc_masks(desc, node)) goto err_kstat; raw_spin_lock_init(&desc->lock); @@ -394,6 +407,7 @@ static void free_desc(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); + irq_remove_debugfs_entry(desc); unregister_irq_proc(irq, desc); /* @@ -480,7 +494,8 @@ int __init early_irq_init(void) /* Let arch update nr_irqs and return the nr of preallocated irqs */ initcnt = arch_probe_nr_irqs(); - printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); + printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n", + NR_IRQS, nr_irqs, initcnt); if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS)) nr_irqs = IRQ_BITMAP_BITS; @@ -516,14 +531,14 @@ int __init early_irq_init(void) init_irq_default_affinity(); - printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS); + printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS); desc = irq_desc; count = ARRAY_SIZE(irq_desc); for (i = 0; i < count; i++) { desc[i].kstat_irqs = alloc_percpu(unsigned int); - alloc_masks(&desc[i], GFP_KERNEL, node); + alloc_masks(&desc[i], node); raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); desc_set_defaults(i, &desc[i], node, NULL, NULL); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 31805f237396..14fe862aa2e3 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -26,39 +26,69 @@ static struct irq_domain *irq_default_domain; static void irq_domain_check_hierarchy(struct irq_domain *domain); struct irqchip_fwid { - struct fwnode_handle fwnode; - char *name; + struct fwnode_handle fwnode; + unsigned int type; + char *name; void *data; }; +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +static void debugfs_add_domain_dir(struct irq_domain *d); +static void debugfs_remove_domain_dir(struct irq_domain *d); +#else +static inline void debugfs_add_domain_dir(struct irq_domain *d) { } +static inline void debugfs_remove_domain_dir(struct irq_domain *d) { } +#endif + /** * irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for * identifying an irq domain - * @data: optional user-provided data + * @type: Type of irqchip_fwnode. See linux/irqdomain.h + * @name: Optional user provided domain name + * @id: Optional user provided id if name != NULL + * @data: Optional user-provided data * - * Allocate a struct device_node, and return a poiner to the embedded + * Allocate a struct irqchip_fwid, and return a poiner to the embedded * fwnode_handle (or NULL on failure). + * + * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are + * solely to transport name information to irqdomain creation code. The + * node is not stored. For other types the pointer is kept in the irq + * domain struct. */ -struct fwnode_handle *irq_domain_alloc_fwnode(void *data) +struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, + const char *name, void *data) { struct irqchip_fwid *fwid; - char *name; + char *n; fwid = kzalloc(sizeof(*fwid), GFP_KERNEL); - name = kasprintf(GFP_KERNEL, "irqchip@%p", data); - if (!fwid || !name) { + switch (type) { + case IRQCHIP_FWNODE_NAMED: + n = kasprintf(GFP_KERNEL, "%s", name); + break; + case IRQCHIP_FWNODE_NAMED_ID: + n = kasprintf(GFP_KERNEL, "%s-%d", name, id); + break; + default: + n = kasprintf(GFP_KERNEL, "irqchip@%p", data); + break; + } + + if (!fwid || !n) { kfree(fwid); - kfree(name); + kfree(n); return NULL; } - fwid->name = name; + fwid->type = type; + fwid->name = n; fwid->data = data; fwid->fwnode.type = FWNODE_IRQCHIP; return &fwid->fwnode; } -EXPORT_SYMBOL_GPL(irq_domain_alloc_fwnode); +EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode); /** * irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle @@ -97,26 +127,82 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, void *host_data) { struct device_node *of_node = to_of_node(fwnode); + struct irqchip_fwid *fwid; struct irq_domain *domain; + static atomic_t unknown_domains; + domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), GFP_KERNEL, of_node_to_nid(of_node)); if (WARN_ON(!domain)) return NULL; + if (fwnode && is_fwnode_irqchip(fwnode)) { + fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + + switch (fwid->type) { + case IRQCHIP_FWNODE_NAMED: + case IRQCHIP_FWNODE_NAMED_ID: + domain->name = kstrdup(fwid->name, GFP_KERNEL); + if (!domain->name) { + kfree(domain); + return NULL; + } + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + break; + default: + domain->fwnode = fwnode; + domain->name = fwid->name; + break; + } + } else if (of_node) { + char *name; + + /* + * DT paths contain '/', which debugfs is legitimately + * unhappy about. Replace them with ':', which does + * the trick and is not as offensive as '\'... + */ + name = kstrdup(of_node_full_name(of_node), GFP_KERNEL); + if (!name) { + kfree(domain); + return NULL; + } + + strreplace(name, '/', ':'); + + domain->name = name; + domain->fwnode = fwnode; + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + } + + if (!domain->name) { + if (fwnode) { + pr_err("Invalid fwnode type (%d) for irqdomain\n", + fwnode->type); + } + domain->name = kasprintf(GFP_KERNEL, "unknown-%d", + atomic_inc_return(&unknown_domains)); + if (!domain->name) { + kfree(domain); + return NULL; + } + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + } + of_node_get(of_node); /* Fill structure */ INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); domain->ops = ops; domain->host_data = host_data; - domain->fwnode = fwnode; domain->hwirq_max = hwirq_max; domain->revmap_size = size; domain->revmap_direct_max_irq = direct_max; irq_domain_check_hierarchy(domain); mutex_lock(&irq_domain_mutex); + debugfs_add_domain_dir(domain); list_add(&domain->link, &irq_domain_list); mutex_unlock(&irq_domain_mutex); @@ -136,6 +222,7 @@ EXPORT_SYMBOL_GPL(__irq_domain_add); void irq_domain_remove(struct irq_domain *domain) { mutex_lock(&irq_domain_mutex); + debugfs_remove_domain_dir(domain); WARN_ON(!radix_tree_empty(&domain->revmap_tree)); @@ -152,10 +239,43 @@ void irq_domain_remove(struct irq_domain *domain) pr_debug("Removed domain %s\n", domain->name); of_node_put(irq_domain_get_of_node(domain)); + if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) + kfree(domain->name); kfree(domain); } EXPORT_SYMBOL_GPL(irq_domain_remove); +void irq_domain_update_bus_token(struct irq_domain *domain, + enum irq_domain_bus_token bus_token) +{ + char *name; + + if (domain->bus_token == bus_token) + return; + + mutex_lock(&irq_domain_mutex); + + domain->bus_token = bus_token; + + name = kasprintf(GFP_KERNEL, "%s-%d", domain->name, bus_token); + if (!name) { + mutex_unlock(&irq_domain_mutex); + return; + } + + debugfs_remove_domain_dir(domain); + + if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) + kfree(domain->name); + else + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + + domain->name = name; + debugfs_add_domain_dir(domain); + + mutex_unlock(&irq_domain_mutex); +} + /** * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs * @of_node: pointer to interrupt controller's device tree node. @@ -344,6 +464,7 @@ void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) irq_data->domain = NULL; irq_data->hwirq = 0; + domain->mapcount--; /* Clear reverse map for this hwirq */ if (hwirq < domain->revmap_size) { @@ -395,6 +516,7 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq, domain->name = irq_data->chip->name; } + domain->mapcount++; if (hwirq < domain->revmap_size) { domain->linear_revmap[hwirq] = virq; } else { @@ -746,13 +868,54 @@ unsigned int irq_find_mapping(struct irq_domain *domain, EXPORT_SYMBOL_GPL(irq_find_mapping); #ifdef CONFIG_IRQ_DOMAIN_DEBUG +static void virq_debug_show_one(struct seq_file *m, struct irq_desc *desc) +{ + struct irq_domain *domain; + struct irq_data *data; + + domain = desc->irq_data.domain; + data = &desc->irq_data; + + while (domain) { + unsigned int irq = data->irq; + unsigned long hwirq = data->hwirq; + struct irq_chip *chip; + bool direct; + + if (data == &desc->irq_data) + seq_printf(m, "%5d ", irq); + else + seq_printf(m, "%5d+ ", irq); + seq_printf(m, "0x%05lx ", hwirq); + + chip = irq_data_get_irq_chip(data); + seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none"); + + seq_printf(m, data ? "0x%p " : " %p ", + irq_data_get_irq_chip_data(data)); + + seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' '); + direct = (irq == hwirq) && (irq < domain->revmap_direct_max_irq); + seq_printf(m, "%6s%-8s ", + (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX", + direct ? "(DIRECT)" : ""); + seq_printf(m, "%s\n", domain->name); +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + domain = domain->parent; + data = data->parent_data; +#else + domain = NULL; +#endif + } +} + static int virq_debug_show(struct seq_file *m, void *private) { unsigned long flags; struct irq_desc *desc; struct irq_domain *domain; struct radix_tree_iter iter; - void *data, **slot; + void **slot; int i; seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", @@ -760,15 +923,26 @@ static int virq_debug_show(struct seq_file *m, void *private) mutex_lock(&irq_domain_mutex); list_for_each_entry(domain, &irq_domain_list, link) { struct device_node *of_node; + const char *name; + int count = 0; + of_node = irq_domain_get_of_node(domain); + if (of_node) + name = of_node_full_name(of_node); + else if (is_fwnode_irqchip(domain->fwnode)) + name = container_of(domain->fwnode, struct irqchip_fwid, + fwnode)->name; + else + name = ""; + radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0) count++; seq_printf(m, "%c%-16s %6u %10u %10u %s\n", domain == irq_default_domain ? '*' : ' ', domain->name, domain->revmap_size + count, domain->revmap_size, domain->revmap_direct_max_irq, - of_node ? of_node_full_name(of_node) : ""); + name); } mutex_unlock(&irq_domain_mutex); @@ -782,30 +956,7 @@ static int virq_debug_show(struct seq_file *m, void *private) continue; raw_spin_lock_irqsave(&desc->lock, flags); - domain = desc->irq_data.domain; - - if (domain) { - struct irq_chip *chip; - int hwirq = desc->irq_data.hwirq; - bool direct; - - seq_printf(m, "%5d ", i); - seq_printf(m, "0x%05x ", hwirq); - - chip = irq_desc_get_chip(desc); - seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none"); - - data = irq_desc_get_chip_data(desc); - seq_printf(m, data ? "0x%p " : " %p ", data); - - seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' '); - direct = (i == hwirq) && (i < domain->revmap_direct_max_irq); - seq_printf(m, "%6s%-8s ", - (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX", - direct ? "(DIRECT)" : ""); - seq_printf(m, "%s\n", desc->irq_data.domain->name); - } - + virq_debug_show_one(m, desc); raw_spin_unlock_irqrestore(&desc->lock, flags); } @@ -973,6 +1124,7 @@ static void irq_domain_insert_irq(int virq) struct irq_domain *domain = data->domain; irq_hw_number_t hwirq = data->hwirq; + domain->mapcount++; if (hwirq < domain->revmap_size) { domain->linear_revmap[hwirq] = virq; } else { @@ -1002,6 +1154,7 @@ static void irq_domain_remove_irq(int virq) struct irq_domain *domain = data->domain; irq_hw_number_t hwirq = data->hwirq; + domain->mapcount--; if (hwirq < domain->revmap_size) { domain->linear_revmap[hwirq] = 0; } else { @@ -1189,43 +1342,18 @@ void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_common(domain, virq, nr_irqs); } -static bool irq_domain_is_auto_recursive(struct irq_domain *domain) -{ - return domain->flags & IRQ_DOMAIN_FLAG_AUTO_RECURSIVE; -} - -static void irq_domain_free_irqs_recursive(struct irq_domain *domain, +static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs) { domain->ops->free(domain, irq_base, nr_irqs); - if (irq_domain_is_auto_recursive(domain)) { - BUG_ON(!domain->parent); - irq_domain_free_irqs_recursive(domain->parent, irq_base, - nr_irqs); - } } -int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, +int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg) { - int ret = 0; - struct irq_domain *parent = domain->parent; - bool recursive = irq_domain_is_auto_recursive(domain); - - BUG_ON(recursive && !parent); - if (recursive) - ret = irq_domain_alloc_irqs_recursive(parent, irq_base, - nr_irqs, arg); - if (ret < 0) - return ret; - - ret = domain->ops->alloc(domain, irq_base, nr_irqs, arg); - if (ret < 0 && recursive) - irq_domain_free_irqs_recursive(parent, irq_base, nr_irqs); - - return ret; + return domain->ops->alloc(domain, irq_base, nr_irqs, arg); } /** @@ -1286,7 +1414,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, } mutex_lock(&irq_domain_mutex); - ret = irq_domain_alloc_irqs_recursive(domain, virq, nr_irqs, arg); + ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); if (ret < 0) { mutex_unlock(&irq_domain_mutex); goto out_free_irq_data; @@ -1321,7 +1449,7 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) mutex_lock(&irq_domain_mutex); for (i = 0; i < nr_irqs; i++) irq_domain_remove_irq(virq + i); - irq_domain_free_irqs_recursive(data->domain, virq, nr_irqs); + irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs); mutex_unlock(&irq_domain_mutex); irq_domain_free_irq_data(virq, nr_irqs); @@ -1341,15 +1469,11 @@ int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg) { - /* irq_domain_alloc_irqs_recursive() has called parent's alloc() */ - if (irq_domain_is_auto_recursive(domain)) - return 0; + if (!domain->parent) + return -ENOSYS; - domain = domain->parent; - if (domain) - return irq_domain_alloc_irqs_recursive(domain, irq_base, - nr_irqs, arg); - return -ENOSYS; + return irq_domain_alloc_irqs_hierarchy(domain->parent, irq_base, + nr_irqs, arg); } EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent); @@ -1364,10 +1488,10 @@ EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent); void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs) { - /* irq_domain_free_irqs_recursive() will call parent's free */ - if (!irq_domain_is_auto_recursive(domain) && domain->parent) - irq_domain_free_irqs_recursive(domain->parent, irq_base, - nr_irqs); + if (!domain->parent) + return; + + irq_domain_free_irqs_hierarchy(domain->parent, irq_base, nr_irqs); } EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); @@ -1487,3 +1611,78 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain) { } #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ + +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +static struct dentry *domain_dir; + +static void +irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind) +{ + seq_printf(m, "%*sname: %s\n", ind, "", d->name); + seq_printf(m, "%*ssize: %u\n", ind + 1, "", + d->revmap_size + d->revmap_direct_max_irq); + seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount); + seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags); +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + if (!d->parent) + return; + seq_printf(m, "%*sparent: %s\n", ind + 1, "", d->parent->name); + irq_domain_debug_show_one(m, d->parent, ind + 4); +#endif +} + +static int irq_domain_debug_show(struct seq_file *m, void *p) +{ + struct irq_domain *d = m->private; + + /* Default domain? Might be NULL */ + if (!d) { + if (!irq_default_domain) + return 0; + d = irq_default_domain; + } + irq_domain_debug_show_one(m, d, 0); + return 0; +} + +static int irq_domain_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_domain_debug_show, inode->i_private); +} + +static const struct file_operations dfs_domain_ops = { + .open = irq_domain_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void debugfs_add_domain_dir(struct irq_domain *d) +{ + if (!d->name || !domain_dir || d->debugfs_file) + return; + d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d, + &dfs_domain_ops); +} + +static void debugfs_remove_domain_dir(struct irq_domain *d) +{ + if (d->debugfs_file) + debugfs_remove(d->debugfs_file); +} + +void __init irq_domain_debugfs_init(struct dentry *root) +{ + struct irq_domain *d; + + domain_dir = debugfs_create_dir("domains", root); + if (!domain_dir) + return; + + debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops); + mutex_lock(&irq_domain_mutex); + list_for_each_entry(d, &irq_domain_list, link) + debugfs_add_domain_dir(d); + mutex_unlock(&irq_domain_mutex); +} +#endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 070be980c37a..3577c091ac7b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -168,34 +168,6 @@ void irq_set_thread_affinity(struct irq_desc *desc) set_bit(IRQTF_AFFINITY, &action->thread_flags); } -#ifdef CONFIG_GENERIC_PENDING_IRQ -static inline bool irq_can_move_pcntxt(struct irq_data *data) -{ - return irqd_can_move_in_process_context(data); -} -static inline bool irq_move_pending(struct irq_data *data) -{ - return irqd_is_setaffinity_pending(data); -} -static inline void -irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) -{ - cpumask_copy(desc->pending_mask, mask); -} -static inline void -irq_get_pending(struct cpumask *mask, struct irq_desc *desc) -{ - cpumask_copy(mask, desc->pending_mask); -} -#else -static inline bool irq_can_move_pcntxt(struct irq_data *data) { return true; } -static inline bool irq_move_pending(struct irq_data *data) { return false; } -static inline void -irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { } -static inline void -irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } -#endif - int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -345,15 +317,18 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); /* * Generic version of the affinity autoselector. */ -static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) +int irq_setup_affinity(struct irq_desc *desc) { struct cpumask *set = irq_default_affinity; - int node = irq_desc_get_node(desc); + int ret, node = irq_desc_get_node(desc); + static DEFINE_RAW_SPINLOCK(mask_lock); + static struct cpumask mask; /* Excludes PER_CPU and NO_BALANCE interrupts */ if (!__irq_can_set_affinity(desc)) return 0; + raw_spin_lock(&mask_lock); /* * Preserve the managed affinity setting and a userspace affinity * setup, but make sure that one of the targets is online. @@ -367,46 +342,40 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); } - cpumask_and(mask, cpu_online_mask, set); + cpumask_and(&mask, cpu_online_mask, set); if (node != NUMA_NO_NODE) { const struct cpumask *nodemask = cpumask_of_node(node); /* make sure at least one of the cpus in nodemask is online */ - if (cpumask_intersects(mask, nodemask)) - cpumask_and(mask, mask, nodemask); + if (cpumask_intersects(&mask, nodemask)) + cpumask_and(&mask, &mask, nodemask); } - irq_do_set_affinity(&desc->irq_data, mask, false); - return 0; + ret = irq_do_set_affinity(&desc->irq_data, &mask, false); + raw_spin_unlock(&mask_lock); + return ret; } #else /* Wrapper for ALPHA specific affinity selector magic */ -static inline int setup_affinity(struct irq_desc *d, struct cpumask *mask) +int irq_setup_affinity(struct irq_desc *desc) { - return irq_select_affinity(irq_desc_get_irq(d)); + return irq_select_affinity(irq_desc_get_irq(desc)); } #endif /* - * Called when affinity is set via /proc/irq + * Called when a bogus affinity is set via /proc/irq */ -int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask) +int irq_select_affinity_usr(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; int ret; raw_spin_lock_irqsave(&desc->lock, flags); - ret = setup_affinity(desc, mask); + ret = irq_setup_affinity(desc); raw_spin_unlock_irqrestore(&desc->lock, flags); return ret; } - -#else -static inline int -setup_affinity(struct irq_desc *desc, struct cpumask *mask) -{ - return 0; -} #endif /** @@ -533,9 +502,15 @@ void __enable_irq(struct irq_desc *desc) goto err_out; /* Prevent probing on this irq: */ irq_settings_set_noprobe(desc); - irq_enable(desc); - check_irq_resend(desc); - /* fall-through */ + /* + * Call irq_startup() not irq_enable() here because the + * interrupt might be marked NOAUTOEN. So irq_startup() + * needs to be invoked when it gets enabled the first + * time. If it was already started up, then irq_startup() + * will invoke irq_enable() under the hood. + */ + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + break; } default: desc->depth--; @@ -1122,7 +1097,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) struct irqaction *old, **old_ptr; unsigned long flags, thread_mask = 0; int ret, nested, shared = 0; - cpumask_var_t mask; if (!desc) return -EINVAL; @@ -1181,11 +1155,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } } - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { - ret = -ENOMEM; - goto out_thread; - } - /* * Drivers are often written to work w/o knowledge about the * underlying irq chip implementation, so a request for a @@ -1250,7 +1219,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ if (thread_mask == ~0UL) { ret = -EBUSY; - goto out_mask; + goto out_unlock; } /* * The thread_mask for the action is or'ed to @@ -1294,7 +1263,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", irq); ret = -EINVAL; - goto out_mask; + goto out_unlock; } if (!shared) { @@ -1302,7 +1271,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (ret) { pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n", new->name, irq, desc->irq_data.chip->name); - goto out_mask; + goto out_unlock; } init_waitqueue_head(&desc->wait_for_threads); @@ -1312,8 +1281,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) ret = __irq_set_trigger(desc, new->flags & IRQF_TRIGGER_MASK); - if (ret) - goto out_mask; + if (ret) { + irq_release_resources(desc); + goto out_unlock; + } } desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ @@ -1328,20 +1299,25 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (new->flags & IRQF_ONESHOT) desc->istate |= IRQS_ONESHOT; - if (irq_settings_can_autoenable(desc)) - irq_startup(desc, true); - else - /* Undo nested disables: */ - desc->depth = 1; - /* Exclude IRQ from balancing if requested */ if (new->flags & IRQF_NOBALANCING) { irq_settings_set_no_balancing(desc); irqd_set(&desc->irq_data, IRQD_NO_BALANCING); } - /* Set default affinity mask once everything is setup */ - setup_affinity(desc, mask); + if (irq_settings_can_autoenable(desc)) { + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + } else { + /* + * Shared interrupts do not go well with disabling + * auto enable. The sharing interrupt might request + * it while it's still disabled and then wait for + * interrupts forever. + */ + WARN_ON_ONCE(new->flags & IRQF_SHARED); + /* Undo nested disables: */ + desc->depth = 1; + } } else if (new->flags & IRQF_TRIGGER_MASK) { unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; @@ -1382,10 +1358,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) wake_up_process(new->secondary->thread); register_irq_proc(irq, desc); + irq_add_debugfs_entry(irq, desc); new->dir = NULL; register_handler_proc(irq, new); - free_cpumask_var(mask); - return 0; mismatch: @@ -1398,9 +1373,8 @@ mismatch: } ret = -EBUSY; -out_mask: +out_unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); - free_cpumask_var(mask); out_thread: if (new->thread) { diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 37ddb7bda651..6ca054a3f91d 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -4,6 +4,36 @@ #include "internals.h" +/** + * irq_fixup_move_pending - Cleanup irq move pending from a dying CPU + * @desc: Interrupt descpriptor to clean up + * @force_clear: If set clear the move pending bit unconditionally. + * If not set, clear it only when the dying CPU is the + * last one in the pending mask. + * + * Returns true if the pending bit was set and the pending mask contains an + * online CPU other than the dying CPU. + */ +bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) +{ + struct irq_data *data = irq_desc_get_irq_data(desc); + + if (!irqd_is_setaffinity_pending(data)) + return false; + + /* + * The outgoing CPU might be the last online target in a pending + * interrupt move. If that's the case clear the pending move bit. + */ + if (cpumask_any_and(desc->pending_mask, cpu_online_mask) >= nr_cpu_ids) { + irqd_clr_move_pending(data); + return false; + } + if (force_clear) + irqd_clr_move_pending(data); + return true; +} + void irq_move_masked_irq(struct irq_data *idata) { struct irq_desc *desc = irq_data_to_desc(idata); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index ddc2f5427f75..48eadf416c24 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -265,13 +265,20 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent) { + struct irq_domain *domain; + if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS) msi_domain_update_dom_ops(info); if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) msi_domain_update_chip_ops(info); - return irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0, - fwnode, &msi_domain_ops, info); + domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0, + fwnode, &msi_domain_ops, info); + + if (domain && !domain->name && info->chip) + domain->name = info->chip->name; + + return domain; } int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, @@ -308,7 +315,7 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, ops->set_desc(arg, desc); /* Assumes the domain mutex is held! */ - ret = irq_domain_alloc_irqs_recursive(domain, virq, 1, arg); + ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg); if (ret) break; diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index c53edad7b459..7f9642a1e267 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -37,19 +37,47 @@ static struct proc_dir_entry *root_irq_dir; #ifdef CONFIG_SMP -static int show_irq_affinity(int type, struct seq_file *m, void *v) +enum { + AFFINITY, + AFFINITY_LIST, + EFFECTIVE, + EFFECTIVE_LIST, +}; + +static int show_irq_affinity(int type, struct seq_file *m) { struct irq_desc *desc = irq_to_desc((long)m->private); - const struct cpumask *mask = desc->irq_common_data.affinity; + const struct cpumask *mask; + switch (type) { + case AFFINITY: + case AFFINITY_LIST: + mask = desc->irq_common_data.affinity; #ifdef CONFIG_GENERIC_PENDING_IRQ - if (irqd_is_setaffinity_pending(&desc->irq_data)) - mask = desc->pending_mask; + if (irqd_is_setaffinity_pending(&desc->irq_data)) + mask = desc->pending_mask; #endif - if (type) + break; + case EFFECTIVE: + case EFFECTIVE_LIST: +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + mask = desc->irq_common_data.effective_affinity; + break; +#else + return -EINVAL; +#endif + }; + + switch (type) { + case AFFINITY_LIST: + case EFFECTIVE_LIST: seq_printf(m, "%*pbl\n", cpumask_pr_args(mask)); - else + break; + case AFFINITY: + case EFFECTIVE: seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); + break; + } return 0; } @@ -80,12 +108,12 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) int no_irq_affinity; static int irq_affinity_proc_show(struct seq_file *m, void *v) { - return show_irq_affinity(0, m, v); + return show_irq_affinity(AFFINITY, m); } static int irq_affinity_list_proc_show(struct seq_file *m, void *v) { - return show_irq_affinity(1, m, v); + return show_irq_affinity(AFFINITY_LIST, m); } @@ -120,9 +148,11 @@ static ssize_t write_irq_affinity(int type, struct file *file, * one online CPU still has to be targeted. */ if (!cpumask_intersects(new_value, cpu_online_mask)) { - /* Special case for empty set - allow the architecture - code to set default SMP affinity. */ - err = irq_select_affinity_usr(irq, new_value) ? -EINVAL : count; + /* + * Special case for empty set - allow the architecture code + * to set default SMP affinity. + */ + err = irq_select_affinity_usr(irq) ? -EINVAL : count; } else { irq_set_affinity(irq, new_value); err = count; @@ -183,6 +213,44 @@ static const struct file_operations irq_affinity_list_proc_fops = { .write = irq_affinity_list_proc_write, }; +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK +static int irq_effective_aff_proc_show(struct seq_file *m, void *v) +{ + return show_irq_affinity(EFFECTIVE, m); +} + +static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v) +{ + return show_irq_affinity(EFFECTIVE_LIST, m); +} + +static int irq_effective_aff_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_effective_aff_proc_show, PDE_DATA(inode)); +} + +static int irq_effective_aff_list_proc_open(struct inode *inode, + struct file *file) +{ + return single_open(file, irq_effective_aff_list_proc_show, + PDE_DATA(inode)); +} + +static const struct file_operations irq_effective_aff_proc_fops = { + .open = irq_effective_aff_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations irq_effective_aff_list_proc_fops = { + .open = irq_effective_aff_list_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + static int default_affinity_show(struct seq_file *m, void *v) { seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity)); @@ -324,6 +392,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { static DEFINE_MUTEX(register_lock); + void __maybe_unused *irqp = (void *)(unsigned long) irq; char name [MAX_NAMELEN]; if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) @@ -349,20 +418,25 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) #ifdef CONFIG_SMP /* create /proc/irq/<irq>/smp_affinity */ proc_create_data("smp_affinity", 0644, desc->dir, - &irq_affinity_proc_fops, (void *)(long)irq); + &irq_affinity_proc_fops, irqp); /* create /proc/irq/<irq>/affinity_hint */ proc_create_data("affinity_hint", 0444, desc->dir, - &irq_affinity_hint_proc_fops, (void *)(long)irq); + &irq_affinity_hint_proc_fops, irqp); /* create /proc/irq/<irq>/smp_affinity_list */ proc_create_data("smp_affinity_list", 0644, desc->dir, - &irq_affinity_list_proc_fops, (void *)(long)irq); + &irq_affinity_list_proc_fops, irqp); proc_create_data("node", 0444, desc->dir, - &irq_node_proc_fops, (void *)(long)irq); + &irq_node_proc_fops, irqp); +# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + proc_create_data("effective_affinity", 0444, desc->dir, + &irq_effective_aff_proc_fops, irqp); + proc_create_data("effective_affinity_list", 0444, desc->dir, + &irq_effective_aff_list_proc_fops, irqp); +# endif #endif - proc_create_data("spurious", 0444, desc->dir, &irq_spurious_proc_fops, (void *)(long)irq); @@ -381,6 +455,10 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) remove_proc_entry("affinity_hint", desc->dir); remove_proc_entry("smp_affinity_list", desc->dir); remove_proc_entry("node", desc->dir); +# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + remove_proc_entry("effective_affinity", desc->dir); + remove_proc_entry("effective_affinity_list", desc->dir); +# endif #endif remove_proc_entry("spurious", desc->dir); diff --git a/kernel/power/process.c b/kernel/power/process.c index 78672d324a6e..c7209f060eeb 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -132,7 +132,7 @@ int freeze_processes(void) if (!pm_freezing) atomic_inc(&system_freezing_cnt); - pm_wakeup_clear(true); + pm_wakeup_clear(); pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c0248c74d6d4..15e6baef5c73 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -72,8 +72,6 @@ static void freeze_begin(void) static void freeze_enter(void) { - trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true); - spin_lock_irq(&suspend_freeze_lock); if (pm_wakeup_pending()) goto out; @@ -100,27 +98,6 @@ static void freeze_enter(void) out: suspend_freeze_state = FREEZE_STATE_NONE; spin_unlock_irq(&suspend_freeze_lock); - - trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false); -} - -static void s2idle_loop(void) -{ - do { - freeze_enter(); - - if (freeze_ops && freeze_ops->wake) - freeze_ops->wake(); - - dpm_resume_noirq(PMSG_RESUME); - if (freeze_ops && freeze_ops->sync) - freeze_ops->sync(); - - if (pm_wakeup_pending()) - break; - - pm_wakeup_clear(false); - } while (!dpm_suspend_noirq(PMSG_SUSPEND)); } void freeze_wake(void) @@ -394,8 +371,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) * all the devices are suspended. */ if (state == PM_SUSPEND_FREEZE) { - s2idle_loop(); - goto Platform_early_resume; + trace_suspend_resume(TPS("machine_suspend"), state, true); + freeze_enter(); + trace_suspend_resume(TPS("machine_suspend"), state, false); + goto Platform_wake; } error = disable_nonboot_cpus(); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a1aecf44ab07..a1db38abac5b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -269,7 +269,6 @@ static struct console *exclusive_console; #define MAX_CMDLINECONSOLES 8 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; -static int console_cmdline_cnt; static int preferred_console = -1; int console_set_on_cmdline; @@ -1906,25 +1905,12 @@ static int __add_preferred_console(char *name, int idx, char *options, * See if this tty is not yet registered, and * if we have a slot free. */ - for (i = 0, c = console_cmdline; i < console_cmdline_cnt; i++, c++) { + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) { if (strcmp(c->name, name) == 0 && c->index == idx) { - if (brl_options) - return 0; - - /* - * Maintain an invariant that will help to find if - * the matching console is preferred, see - * register_console(): - * - * The last non-braille console is always - * the preferred one. - */ - if (i != console_cmdline_cnt - 1) - swap(console_cmdline[i], - console_cmdline[console_cmdline_cnt - 1]); - - preferred_console = console_cmdline_cnt - 1; - + if (!brl_options) + preferred_console = i; return 0; } } @@ -1937,7 +1923,6 @@ static int __add_preferred_console(char *name, int idx, char *options, braille_set_options(c, brl_options); c->index = idx; - console_cmdline_cnt++; return 0; } /* @@ -2477,23 +2462,12 @@ void register_console(struct console *newcon) } /* - * See if this console matches one we selected on the command line. - * - * There may be several entries in the console_cmdline array matching - * with the same console, one with newcon->match(), another by - * name/index: - * - * pl011,mmio,0x87e024000000,115200 -- added from SPCR - * ttyAMA0 -- added from command line - * - * Traverse the console_cmdline array in reverse order to be - * sure that if this console is preferred then it will be the first - * matching entry. We use the invariant that is maintained in - * __add_preferred_console(). + * See if this console matches one we selected on + * the command line. */ - for (i = console_cmdline_cnt - 1; i >= 0; i--) { - c = console_cmdline + i; - + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) { if (!newcon->match || newcon->match(newcon, c->name, c->index, c->options) != 0) { /* default matching */ diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 584d8a983883..dea03614263f 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -263,7 +263,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Must be called from process context. + * srcu_struct. * Returns an index that must be passed to the matching srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *sp) @@ -271,7 +271,7 @@ int __srcu_read_lock(struct srcu_struct *sp) int idx; idx = READ_ONCE(sp->completed) & 0x1; - __this_cpu_inc(sp->per_cpu_ref->lock_count[idx]); + this_cpu_inc(sp->per_cpu_ref->lock_count[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -281,7 +281,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). - * Must be called from process context. */ void __srcu_read_unlock(struct srcu_struct *sp, int idx) { diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 36e1f82faed1..32798eb14853 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -97,8 +97,9 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Must be called from process context. - * Returns an index that must be passed to the matching srcu_read_unlock(). + * srcu_struct. Can be invoked from irq/bh handlers, but the matching + * __srcu_read_unlock() must be in the same handler instance. Returns an + * index that must be passed to the matching srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *sp) { @@ -112,7 +113,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); /* * Removes the count for the old reader from the appropriate element of - * the srcu_struct. Must be called from process context. + * the srcu_struct. */ void __srcu_read_unlock(struct srcu_struct *sp, int idx) { diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 3ae8474557df..157654fa436a 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -357,7 +357,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Must be called from process context. + * srcu_struct. * Returns an index that must be passed to the matching srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *sp) @@ -365,7 +365,7 @@ int __srcu_read_lock(struct srcu_struct *sp) int idx; idx = READ_ONCE(sp->srcu_idx) & 0x1; - __this_cpu_inc(sp->sda->srcu_lock_count[idx]); + this_cpu_inc(sp->sda->srcu_lock_count[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -375,7 +375,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). - * Must be called from process context. */ void __srcu_read_unlock(struct srcu_struct *sp, int idx) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 803c3bc274c4..326d4f88e2b1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5605,7 +5605,7 @@ void idle_task_exit(void) BUG_ON(cpu_online(smp_processor_id())); if (mm != &init_mm) { - switch_mm_irqs_off(mm, &init_mm, current); + switch_mm(mm, &init_mm, current); finish_arch_post_lock_switch(); } mmdrop(mm); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 622eed1b7658..076a2e31951c 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -101,9 +101,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, if (sg_policy->next_freq == next_freq) return; - if (sg_policy->next_freq > next_freq) - next_freq = (sg_policy->next_freq + next_freq) >> 1; - sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d71109321841..c77e4b1d51c0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3563,7 +3563,7 @@ static inline void check_schedstat_required(void) trace_sched_stat_runtime_enabled()) { printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, " "stat_blocked and stat_runtime require the " - "kernel parameter schedstats=enabled or " + "kernel parameter schedstats=enable or " "kernel.sched_schedstats=1\n"); } #endif diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 5cb5b0008d97..ee2f4202d82a 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -387,7 +387,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; - start = ktime_add(start, base->gettime()); + start = ktime_add_safe(start, base->gettime()); alarm_start(alarm, start); } EXPORT_SYMBOL_GPL(alarm_start_relative); @@ -475,7 +475,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) overrun++; } - alarm->node.expires = ktime_add(alarm->node.expires, interval); + alarm->node.expires = ktime_add_safe(alarm->node.expires, interval); return overrun; } EXPORT_SYMBOL_GPL(alarm_forward); @@ -660,13 +660,21 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, /* start the timer */ timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval); + + /* + * Rate limit to the tick as a hot fix to prevent DOS. Will be + * mopped up later. + */ + if (timr->it.alarm.interval < TICK_NSEC) + timr->it.alarm.interval = TICK_NSEC; + exp = timespec64_to_ktime(new_setting->it_value); /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { ktime_t now; now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); - exp = ktime_add(now, exp); + exp = ktime_add_safe(now, exp); } alarm_start(&timr->it.alarm.alarmtimer, exp); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 987e496bb51a..b398c2ea69b2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -37,9 +37,11 @@ static int tick_broadcast_forced; static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); #ifdef CONFIG_TICK_ONESHOT +static void tick_broadcast_setup_oneshot(struct clock_event_device *bc); static void tick_broadcast_clear_oneshot(int cpu); static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); #else +static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_clear_oneshot(int cpu) { } static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } #endif @@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask, /** * tick_broadcast_setup_oneshot - setup the broadcast device */ -void tick_broadcast_setup_oneshot(struct clock_event_device *bc) +static void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { int cpu = smp_processor_id(); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f738251000fe..be0ac01f2e12 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } /* Functions related to oneshot broadcasting */ #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) -extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int cpu); extern int tick_broadcast_oneshot_active(void); @@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* !(BROADCAST && ONESHOT): */ -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } |