diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 15 | ||||
-rw-r--r-- | kernel/events/core.c | 23 | ||||
-rw-r--r-- | kernel/fork.c | 65 | ||||
-rw-r--r-- | kernel/irq/Makefile | 3 | ||||
-rw-r--r-- | kernel/irq/affinity.c | 12 | ||||
-rw-r--r-- | kernel/irq/chip.c | 4 | ||||
-rw-r--r-- | kernel/irq/internals.h | 21 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 16 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 4 | ||||
-rw-r--r-- | kernel/irq/timings.c | 453 | ||||
-rw-r--r-- | kernel/power/suspend.c | 3 | ||||
-rw-r--r-- | kernel/ptrace.c | 7 | ||||
-rw-r--r-- | kernel/signal.c | 5 | ||||
-rw-r--r-- | kernel/smp.c | 12 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | kernel/stop_machine.c | 19 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace.c | 24 | ||||
-rw-r--r-- | kernel/up.c | 3 |
19 files changed, 564 insertions, 137 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 077fde6fb953..e84c0873559e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -522,7 +522,7 @@ static int bringup_wait_for_ap(unsigned int cpu) /* * SMT soft disabling on X86 requires to bring the CPU out of the * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The - * CPU marked itself as booted_once in cpu_notify_starting() so the + * CPU marked itself as booted_once in notify_cpu_starting() so the * cpu_smt_allowed() check will now return false if this is not the * primary sibling. */ @@ -1221,6 +1221,13 @@ int freeze_secondary_cpus(int primary) for_each_online_cpu(cpu) { if (cpu == primary) continue; + + if (pm_wakeup_pending()) { + pr_info("Wakeup pending. Abort CPU freeze\n"); + error = -EBUSY; + break; + } + trace_suspend_resume(TPS("CPU_OFF"), cpu, true); error = _cpu_down(cpu, 1, CPUHP_OFFLINE); trace_suspend_resume(TPS("CPU_OFF"), cpu, false); @@ -1964,6 +1971,9 @@ static ssize_t write_cpuhp_fail(struct device *dev, if (ret) return ret; + if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) + return -EINVAL; + /* * Cannot fail STARTING/DYING callbacks. */ @@ -2339,6 +2349,9 @@ static int __init mitigations_parse_cmdline(char *arg) cpu_mitigations = CPU_MITIGATIONS_AUTO; else if (!strcmp(arg, "auto,nosmt")) cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; + else + pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", + arg); return 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index e2d014395fc6..29e5f7880a4b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5005,6 +5005,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (perf_event_check_period(event, value)) return -EINVAL; + if (!event->attr.freq && (value & (1ULL << 63))) + return -EINVAL; + event_function_call(event, __perf_event_period, &value); return 0; @@ -5923,7 +5926,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user, if (user_mode(regs)) { regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; - } else if (current->mm) { + } else if (!(current->flags & PF_KTHREAD)) { perf_get_regs_user(regs_user, regs, regs_user_copy); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; @@ -10033,6 +10036,12 @@ void perf_pmu_unregister(struct pmu *pmu) } EXPORT_SYMBOL_GPL(perf_pmu_unregister); +static inline bool has_extended_regs(struct perf_event *event) +{ + return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) || + (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK); +} + static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) { struct perf_event_context *ctx = NULL; @@ -10064,12 +10073,16 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) perf_event_ctx_unlock(event->group_leader, ctx); if (!ret) { + if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) && + has_extended_regs(event)) + ret = -EOPNOTSUPP; + if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE && - event_has_any_exclude_flag(event)) { - if (event->destroy) - event->destroy(event); + event_has_any_exclude_flag(event)) ret = -EINVAL; - } + + if (ret && event->destroy) + event->destroy(event); } if (ret) diff --git a/kernel/fork.c b/kernel/fork.c index 4722f1a320bf..a83ef7243ccc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -248,7 +248,11 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) struct page *page = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); - return page ? page_address(page) : NULL; + if (likely(page)) { + tsk->stack = page_address(page); + return tsk->stack; + } + return NULL; #endif } @@ -1712,31 +1716,6 @@ const struct file_operations pidfd_fops = { #endif }; -/** - * pidfd_create() - Create a new pid file descriptor. - * - * @pid: struct pid that the pidfd will reference - * - * This creates a new pid file descriptor with the O_CLOEXEC flag set. - * - * Note, that this function can only be called after the fd table has - * been unshared to avoid leaking the pidfd to the new process. - * - * Return: On success, a cloexec pidfd is returned. - * On error, a negative errno number will be returned. - */ -static int pidfd_create(struct pid *pid) -{ - int fd; - - fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), - O_RDWR | O_CLOEXEC); - if (fd < 0) - put_pid(pid); - - return fd; -} - static void __delayed_free_task(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); @@ -1774,6 +1753,7 @@ static __latent_entropy struct task_struct *copy_process( int pidfd = -1, retval; struct task_struct *p; struct multiprocess_signals delayed; + struct file *pidfile = NULL; /* * Don't allow sharing the root directory with processes in a different @@ -1822,8 +1802,6 @@ static __latent_entropy struct task_struct *copy_process( } if (clone_flags & CLONE_PIDFD) { - int reserved; - /* * - CLONE_PARENT_SETTID is useless for pidfds and also * parent_tidptr is used to return pidfds. @@ -1834,16 +1812,6 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) return ERR_PTR(-EINVAL); - - /* - * Verify that parent_tidptr is sane so we can potentially - * reuse it later. - */ - if (get_user(reserved, parent_tidptr)) - return ERR_PTR(-EFAULT); - - if (reserved != 0) - return ERR_PTR(-EINVAL); } /* @@ -2058,11 +2026,21 @@ static __latent_entropy struct task_struct *copy_process( * if the fd table isn't shared). */ if (clone_flags & CLONE_PIDFD) { - retval = pidfd_create(pid); + retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC); if (retval < 0) goto bad_fork_free_pid; pidfd = retval; + + pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid, + O_RDWR | O_CLOEXEC); + if (IS_ERR(pidfile)) { + put_unused_fd(pidfd); + retval = PTR_ERR(pidfile); + goto bad_fork_free_pid; + } + get_pid(pid); /* held by pidfile now */ + retval = put_user(pidfd, parent_tidptr); if (retval) goto bad_fork_put_pidfd; @@ -2180,6 +2158,9 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } + /* past the last point of failure */ + if (pidfile) + fd_install(pidfd, pidfile); init_task_pid_links(p); if (likely(p->pid)) { @@ -2246,8 +2227,10 @@ bad_fork_cancel_cgroup: bad_fork_cgroup_threadgroup_change_end: cgroup_threadgroup_change_end(current); bad_fork_put_pidfd: - if (clone_flags & CLONE_PIDFD) - ksys_close(pidfd); + if (clone_flags & CLONE_PIDFD) { + fput(pidfile); + put_unused_fd(pidfd); + } bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index ff6e352e3a6c..b4f53717d143 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -2,6 +2,9 @@ obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o obj-$(CONFIG_IRQ_TIMINGS) += timings.o +ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y) + CFLAGS_timings.o += -DDEBUG +endif obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f18cd5aa33e8..4352b08ae48d 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -94,8 +94,7 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, return nodes; } -static int __irq_build_affinity_masks(const struct irq_affinity *affd, - unsigned int startvec, +static int __irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs, unsigned int firstvec, cpumask_var_t *node_to_cpumask, @@ -171,8 +170,7 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd, * 1) spread present CPU on these vectors * 2) spread other possible CPUs on these vectors */ -static int irq_build_affinity_masks(const struct irq_affinity *affd, - unsigned int startvec, unsigned int numvecs, +static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs, unsigned int firstvec, struct irq_affinity_desc *masks) { @@ -197,7 +195,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, build_node_to_cpumask(node_to_cpumask); /* Spread on present CPUs starting from affd->pre_vectors */ - nr_present = __irq_build_affinity_masks(affd, curvec, numvecs, + nr_present = __irq_build_affinity_masks(curvec, numvecs, firstvec, node_to_cpumask, cpu_present_mask, nmsk, masks); @@ -212,7 +210,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, else curvec = firstvec + nr_present; cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); - nr_others = __irq_build_affinity_masks(affd, curvec, numvecs, + nr_others = __irq_build_affinity_masks(curvec, numvecs, firstvec, node_to_cpumask, npresmsk, nmsk, masks); put_online_cpus(); @@ -295,7 +293,7 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) unsigned int this_vecs = affd->set_size[i]; int ret; - ret = irq_build_affinity_masks(affd, curvec, this_vecs, + ret = irq_build_affinity_masks(curvec, this_vecs, curvec, masks); if (ret) { kfree(masks); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 29d6c7d070b4..04c850fb70cb 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -748,6 +748,8 @@ void handle_fasteoi_nmi(struct irq_desc *desc) unsigned int irq = irq_desc_get_irq(desc); irqreturn_t res; + __kstat_incr_irqs_this_cpu(desc); + trace_irq_handler_entry(irq, action); /* * NMIs cannot be shared, there is only one action. @@ -962,6 +964,8 @@ void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc) unsigned int irq = irq_desc_get_irq(desc); irqreturn_t res; + __kstat_incr_irqs_this_cpu(desc); + trace_irq_handler_entry(irq, action); res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); trace_irq_handler_exit(irq, action, res); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 70c3053bc1f6..21f9927ff5ad 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -354,6 +354,16 @@ static inline int irq_timing_decode(u64 value, u64 *timestamp) return value & U16_MAX; } +static __always_inline void irq_timings_push(u64 ts, int irq) +{ + struct irq_timings *timings = this_cpu_ptr(&irq_timings); + + timings->values[timings->count & IRQ_TIMINGS_MASK] = + irq_timing_encode(ts, irq); + + timings->count++; +} + /* * The function record_irq_time is only called in one place in the * interrupts handler. We want this function always inline so the code @@ -367,15 +377,8 @@ static __always_inline void record_irq_time(struct irq_desc *desc) if (!static_branch_likely(&irq_timing_enabled)) return; - if (desc->istate & IRQS_TIMINGS) { - struct irq_timings *timings = this_cpu_ptr(&irq_timings); - - timings->values[timings->count & IRQ_TIMINGS_MASK] = - irq_timing_encode(local_clock(), - irq_desc_get_irq(desc)); - - timings->count++; - } + if (desc->istate & IRQS_TIMINGS) + irq_timings_push(local_clock(), irq_desc_get_irq(desc)); } #else static inline void irq_remove_timings(struct irq_desc *desc) {} diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index c52b737ab8e3..9484e88dabc2 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -680,6 +680,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, * @hwirq: The HW irq number to convert to a logical one * @regs: Register file coming from the low-level handling code * + * This function must be called from an NMI context. + * * Returns: 0 on success, or -EINVAL if conversion has failed */ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, @@ -689,7 +691,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, unsigned int irq; int ret = 0; - nmi_enter(); + /* + * NMI context needs to be setup earlier in order to deal with tracing. + */ + WARN_ON(!in_nmi()); irq = irq_find_mapping(domain, hwirq); @@ -702,7 +707,6 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, else ret = -EINVAL; - nmi_exit(); set_irq_regs(old_regs); return ret; } @@ -946,6 +950,11 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; } +static bool irq_is_nmi(struct irq_desc *desc) +{ + return desc->istate & IRQS_NMI; +} + /** * kstat_irqs - Get the statistics for an interrupt * @irq: The interrupt number @@ -963,7 +972,8 @@ unsigned int kstat_irqs(unsigned int irq) if (!desc || !desc->kstat_irqs) return 0; if (!irq_settings_is_per_cpu_devid(desc) && - !irq_settings_is_per_cpu(desc)) + !irq_settings_is_per_cpu(desc) && + !irq_is_nmi(desc)) return desc->tot_count; for_each_possible_cpu(cpu) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index a453e229f99c..3078d0e48bba 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); * @ops: domain callbacks * @host_data: Controller private data pointer * - * Allocates and initialize and irq_domain structure. + * Allocates and initializes an irq_domain structure. * Returns pointer to IRQ domain, or NULL on failure. */ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, @@ -139,7 +139,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), GFP_KERNEL, of_node_to_nid(of_node)); - if (WARN_ON(!domain)) + if (!domain) return NULL; if (fwnode && is_fwnode_irqchip(fwnode)) { diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index 90c735da15d0..e960d7ce7bcc 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#define pr_fmt(fmt) "irq_timings: " fmt #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/static_key.h> +#include <linux/init.h> #include <linux/interrupt.h> #include <linux/idr.h> #include <linux/irq.h> @@ -261,12 +263,29 @@ void irq_timings_disable(void) #define EMA_ALPHA_VAL 64 #define EMA_ALPHA_SHIFT 7 -#define PREDICTION_PERIOD_MIN 2 +#define PREDICTION_PERIOD_MIN 3 #define PREDICTION_PERIOD_MAX 5 #define PREDICTION_FACTOR 4 #define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */ #define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */ +/* + * Number of elements in the circular buffer: If it happens it was + * flushed before, then the number of elements could be smaller than + * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is + * used as we wrapped. The index begins from zero when we did not + * wrap. That could be done in a nicer way with the proper circular + * array structure type but with the cost of extra computation in the + * interrupt handler hot path. We choose efficiency. + */ +#define for_each_irqts(i, irqts) \ + for (i = irqts->count < IRQ_TIMINGS_SIZE ? \ + 0 : irqts->count & IRQ_TIMINGS_MASK, \ + irqts->count = min(IRQ_TIMINGS_SIZE, \ + irqts->count); \ + irqts->count > 0; irqts->count--, \ + i = (i + 1) & IRQ_TIMINGS_MASK) + struct irqt_stat { u64 last_ts; u64 ema_time[PREDICTION_BUFFER_SIZE]; @@ -297,7 +316,16 @@ static u64 irq_timings_ema_new(u64 value, u64 ema_old) static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) { - int i; + int period; + + /* + * Move the beginning pointer to the end minus the max period x 3. + * We are at the point we can begin searching the pattern + */ + buffer = &buffer[len - (period_max * 3)]; + + /* Adjust the length to the maximum allowed period x 3 */ + len = period_max * 3; /* * The buffer contains the suite of intervals, in a ilog2 @@ -306,21 +334,45 @@ static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) * period beginning at the end of the buffer. We do that for * each suffix. */ - for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) { + for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) { - int *begin = &buffer[len - (i * 3)]; - int *ptr = begin; + /* + * The first comparison always succeed because the + * suffix is deduced from the first n-period bytes of + * the buffer and we compare the initial suffix with + * itself, so we can skip the first iteration. + */ + int idx = period; + size_t size = period; /* * We look if the suite with period 'i' repeat * itself. If it is truncated at the end, as it * repeats we can use the period to find out the next - * element. + * element with the modulo. */ - while (!memcmp(ptr, begin, i * sizeof(*ptr))) { - ptr += i; - if (ptr >= &buffer[len]) - return begin[((i * 3) % i)]; + while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) { + + /* + * Move the index in a period basis + */ + idx += size; + + /* + * If this condition is reached, all previous + * memcmp were successful, so the period is + * found. + */ + if (idx == len) + return buffer[len % period]; + + /* + * If the remaining elements to compare are + * smaller than the period, readjust the size + * of the comparison for the last iteration. + */ + if (len - idx < period) + size = len - idx; } } @@ -380,11 +432,43 @@ static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) return irqs->last_ts + irqs->ema_time[index]; } +static __always_inline int irq_timings_interval_index(u64 interval) +{ + /* + * The PREDICTION_FACTOR increase the interval size for the + * array of exponential average. + */ + u64 interval_us = (interval >> 10) / PREDICTION_FACTOR; + + return likely(interval_us) ? ilog2(interval_us) : 0; +} + +static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs, + u64 interval) +{ + int index; + + /* + * Get the index in the ema table for this interrupt. + */ + index = irq_timings_interval_index(interval); + + /* + * Store the index as an element of the pattern in another + * circular array. + */ + irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; + + irqs->ema_time[index] = irq_timings_ema_new(interval, + irqs->ema_time[index]); + + irqs->count++; +} + static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) { u64 old_ts = irqs->last_ts; u64 interval; - int index; /* * The timestamps are absolute time values, we need to compute @@ -415,24 +499,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) return; } - /* - * Get the index in the ema table for this interrupt. The - * PREDICTION_FACTOR increase the interval size for the array - * of exponential average. - */ - index = likely(interval) ? - ilog2((interval >> 10) / PREDICTION_FACTOR) : 0; - - /* - * Store the index as an element of the pattern in another - * circular array. - */ - irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; - - irqs->ema_time[index] = irq_timings_ema_new(interval, - irqs->ema_time[index]); - - irqs->count++; + __irq_timings_store(irq, irqs, interval); } /** @@ -493,11 +560,7 @@ u64 irq_timings_next_event(u64 now) * model while decrementing the counter because we consume the * data from our circular buffer. */ - - i = (irqts->count & IRQ_TIMINGS_MASK) - 1; - irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); - - for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { + for_each_irqts(i, irqts) { irq = irq_timing_decode(irqts->values[i], &ts); s = idr_find(&irqt_stats, irq); if (s) @@ -564,3 +627,325 @@ int irq_timings_alloc(int irq) return 0; } + +#ifdef CONFIG_TEST_IRQ_TIMINGS +struct timings_intervals { + u64 *intervals; + size_t count; +}; + +/* + * Intervals are given in nanosecond base + */ +static u64 intervals0[] __initdata = { + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, +}; + +static u64 intervals1[] __initdata = { + 223947000, 1240000, 1384000, 1386000, 1386000, + 217416000, 1236000, 1384000, 1386000, 1387000, + 214719000, 1241000, 1386000, 1387000, 1384000, + 213696000, 1234000, 1384000, 1386000, 1388000, + 219904000, 1240000, 1385000, 1389000, 1385000, + 212240000, 1240000, 1386000, 1386000, 1386000, + 214415000, 1236000, 1384000, 1386000, 1387000, + 214276000, 1234000, +}; + +static u64 intervals2[] __initdata = { + 4000, 3000, 5000, 100000, + 3000, 3000, 5000, 117000, + 4000, 4000, 5000, 112000, + 4000, 3000, 4000, 110000, + 3000, 5000, 3000, 117000, + 4000, 4000, 5000, 112000, + 4000, 3000, 4000, 110000, + 3000, 4000, 5000, 112000, + 4000, +}; + +static u64 intervals3[] __initdata = { + 1385000, 212240000, 1240000, + 1386000, 214415000, 1236000, + 1384000, 214276000, 1234000, + 1386000, 214415000, 1236000, + 1385000, 212240000, 1240000, + 1386000, 214415000, 1236000, + 1384000, 214276000, 1234000, + 1386000, 214415000, 1236000, + 1385000, 212240000, 1240000, +}; + +static u64 intervals4[] __initdata = { + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, +}; + +static struct timings_intervals tis[] __initdata = { + { intervals0, ARRAY_SIZE(intervals0) }, + { intervals1, ARRAY_SIZE(intervals1) }, + { intervals2, ARRAY_SIZE(intervals2) }, + { intervals3, ARRAY_SIZE(intervals3) }, + { intervals4, ARRAY_SIZE(intervals4) }, +}; + +static int __init irq_timings_test_next_index(struct timings_intervals *ti) +{ + int _buffer[IRQ_TIMINGS_SIZE]; + int buffer[IRQ_TIMINGS_SIZE]; + int index, start, i, count, period_max; + + count = ti->count - 1; + + period_max = count > (3 * PREDICTION_PERIOD_MAX) ? + PREDICTION_PERIOD_MAX : count / 3; + + /* + * Inject all values except the last one which will be used + * to compare with the next index result. + */ + pr_debug("index suite: "); + + for (i = 0; i < count; i++) { + index = irq_timings_interval_index(ti->intervals[i]); + _buffer[i & IRQ_TIMINGS_MASK] = index; + pr_cont("%d ", index); + } + + start = count < IRQ_TIMINGS_SIZE ? 0 : + count & IRQ_TIMINGS_MASK; + + count = min_t(int, count, IRQ_TIMINGS_SIZE); + + for (i = 0; i < count; i++) { + int index = (start + i) & IRQ_TIMINGS_MASK; + buffer[i] = _buffer[index]; + } + + index = irq_timings_next_event_index(buffer, count, period_max); + i = irq_timings_interval_index(ti->intervals[ti->count - 1]); + + if (index != i) { + pr_err("Expected (%d) and computed (%d) next indexes differ\n", + i, index); + return -EINVAL; + } + + return 0; +} + +static int __init irq_timings_next_index_selftest(void) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(tis); i++) { + + pr_info("---> Injecting intervals number #%d (count=%zd)\n", + i, tis[i].count); + + ret = irq_timings_test_next_index(&tis[i]); + if (ret) + break; + } + + return ret; +} + +static int __init irq_timings_test_irqs(struct timings_intervals *ti) +{ + struct irqt_stat __percpu *s; + struct irqt_stat *irqs; + int i, index, ret, irq = 0xACE5; + + ret = irq_timings_alloc(irq); + if (ret) { + pr_err("Failed to allocate irq timings\n"); + return ret; + } + + s = idr_find(&irqt_stats, irq); + if (!s) { + ret = -EIDRM; + goto out; + } + + irqs = this_cpu_ptr(s); + + for (i = 0; i < ti->count; i++) { + + index = irq_timings_interval_index(ti->intervals[i]); + pr_debug("%d: interval=%llu ema_index=%d\n", + i, ti->intervals[i], index); + + __irq_timings_store(irq, irqs, ti->intervals[i]); + if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) { + pr_err("Failed to store in the circular buffer\n"); + goto out; + } + } + + if (irqs->count != ti->count) { + pr_err("Count differs\n"); + goto out; + } + + ret = 0; +out: + irq_timings_free(irq); + + return ret; +} + +static int __init irq_timings_irqs_selftest(void) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(tis); i++) { + pr_info("---> Injecting intervals number #%d (count=%zd)\n", + i, tis[i].count); + ret = irq_timings_test_irqs(&tis[i]); + if (ret) + break; + } + + return ret; +} + +static int __init irq_timings_test_irqts(struct irq_timings *irqts, + unsigned count) +{ + int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0; + int i, irq, oirq = 0xBEEF; + u64 ots = 0xDEAD, ts; + + /* + * Fill the circular buffer by using the dedicated function. + */ + for (i = 0; i < count; i++) { + pr_debug("%d: index=%d, ts=%llX irq=%X\n", + i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i); + + irq_timings_push(ots + i, oirq + i); + } + + /* + * Compute the first elements values after the index wrapped + * up or not. + */ + ots += start; + oirq += start; + + /* + * Test the circular buffer count is correct. + */ + pr_debug("---> Checking timings array count (%d) is right\n", count); + if (WARN_ON(irqts->count != count)) + return -EINVAL; + + /* + * Test the macro allowing to browse all the irqts. + */ + pr_debug("---> Checking the for_each_irqts() macro\n"); + for_each_irqts(i, irqts) { + + irq = irq_timing_decode(irqts->values[i], &ts); + + pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n", + i, ts, ots, irq, oirq); + + if (WARN_ON(ts != ots || irq != oirq)) + return -EINVAL; + + ots++; oirq++; + } + + /* + * The circular buffer should have be flushed when browsed + * with for_each_irqts + */ + pr_debug("---> Checking timings array is empty after browsing it\n"); + if (WARN_ON(irqts->count)) + return -EINVAL; + + return 0; +} + +static int __init irq_timings_irqts_selftest(void) +{ + struct irq_timings *irqts = this_cpu_ptr(&irq_timings); + int i, ret; + + /* + * Test the circular buffer with different number of + * elements. The purpose is to test at the limits (empty, half + * full, full, wrapped with the cursor at the boundaries, + * wrapped several times, etc ... + */ + int count[] = { 0, + IRQ_TIMINGS_SIZE >> 1, + IRQ_TIMINGS_SIZE, + IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1), + 2 * IRQ_TIMINGS_SIZE, + (2 * IRQ_TIMINGS_SIZE) + 3, + }; + + for (i = 0; i < ARRAY_SIZE(count); i++) { + + pr_info("---> Checking the timings with %d/%d values\n", + count[i], IRQ_TIMINGS_SIZE); + + ret = irq_timings_test_irqts(irqts, count[i]); + if (ret) + break; + } + + return ret; +} + +static int __init irq_timings_selftest(void) +{ + int ret; + + pr_info("------------------- selftest start -----------------\n"); + + /* + * At this point, we don't except any subsystem to use the irq + * timings but us, so it should not be enabled. + */ + if (static_branch_unlikely(&irq_timing_enabled)) { + pr_warn("irq timings already initialized, skipping selftest\n"); + return 0; + } + + ret = irq_timings_irqts_selftest(); + if (ret) + goto out; + + ret = irq_timings_irqs_selftest(); + if (ret) + goto out; + + ret = irq_timings_next_index_selftest(); +out: + pr_info("---------- selftest end with %s -----------\n", + ret ? "failure" : "success"); + + return ret; +} +early_initcall(irq_timings_selftest); +#endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 9505101ed2bc..096211299c07 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -493,6 +493,9 @@ int suspend_devices_and_enter(suspend_state_t state) pm_suspend_target_state = state; + if (state == PM_SUSPEND_TO_IDLE) + pm_set_suspend_no_platform(); + error = platform_suspend_begin(state); if (error) goto Close; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8456b6e2205f..83a531cea2f3 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -79,9 +79,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, */ static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - rcu_read_lock(); - __ptrace_link(child, new_parent, __task_cred(new_parent)); - rcu_read_unlock(); + __ptrace_link(child, new_parent, current_cred()); } /** @@ -118,6 +116,9 @@ void __ptrace_unlink(struct task_struct *child) BUG_ON(!child->ptrace); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); +#ifdef TIF_SYSCALL_EMU + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); +#endif child->parent = child->real_parent; list_del_init(&child->ptrace_entry); diff --git a/kernel/signal.c b/kernel/signal.c index d622eac9d169..edf8915ddd54 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2912,7 +2912,8 @@ EXPORT_SYMBOL(set_compat_user_sigmask); * This is useful for syscalls such as ppoll, pselect, io_pgetevents and * epoll_pwait where a new sigmask is passed in from userland for the syscalls. */ -void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) +void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved, + bool interrupted) { if (!usigmask) @@ -2922,7 +2923,7 @@ void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) * Restoring sigmask here can lead to delivering signals that the above * syscalls are intended to block because of the sigmask passed in. */ - if (signal_pending(current)) { + if (interrupted) { current->saved_sigmask = *sigsaved; set_restore_sigmask(); return; diff --git a/kernel/smp.c b/kernel/smp.c index d155374632eb..616d4d114847 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -34,7 +34,7 @@ struct call_function_data { cpumask_var_t cpumask_ipi; }; -static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); +static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data); static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); @@ -487,13 +487,11 @@ EXPORT_SYMBOL(smp_call_function_many); * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ -int smp_call_function(smp_call_func_t func, void *info, int wait) +void smp_call_function(smp_call_func_t func, void *info, int wait) { preempt_disable(); smp_call_function_many(cpu_online_mask, func, info, wait); preempt_enable(); - - return 0; } EXPORT_SYMBOL(smp_call_function); @@ -594,18 +592,16 @@ void __init smp_init(void) * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead * of local_irq_disable/enable(). */ -int on_each_cpu(void (*func) (void *info), void *info, int wait) +void on_each_cpu(void (*func) (void *info), void *info, int wait) { unsigned long flags; - int ret = 0; preempt_disable(); - ret = smp_call_function(func, info, wait); + smp_call_function(func, info, wait); local_irq_save(flags); func(info); local_irq_restore(flags); preempt_enable(); - return ret; } EXPORT_SYMBOL(on_each_cpu); diff --git a/kernel/softirq.c b/kernel/softirq.c index a6b81c6b6bff..0427a86743a4 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -649,7 +649,7 @@ static int takeover_tasklets(unsigned int cpu) /* Find end, append list for that CPU. */ if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head; - this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail); + __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail); per_cpu(tasklet_vec, cpu).head = NULL; per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; } diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2b5a6754646f..b4f83f7bdf86 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -177,12 +177,18 @@ static void ack_state(struct multi_stop_data *msdata) set_state(msdata, msdata->state + 1); } +void __weak stop_machine_yield(const struct cpumask *cpumask) +{ + cpu_relax(); +} + /* This is the cpu_stop function which stops the CPU. */ static int multi_cpu_stop(void *data) { struct multi_stop_data *msdata = data; enum multi_stop_state curstate = MULTI_STOP_NONE; int cpu = smp_processor_id(), err = 0; + const struct cpumask *cpumask; unsigned long flags; bool is_active; @@ -192,15 +198,18 @@ static int multi_cpu_stop(void *data) */ local_save_flags(flags); - if (!msdata->active_cpus) - is_active = cpu == cpumask_first(cpu_online_mask); - else - is_active = cpumask_test_cpu(cpu, msdata->active_cpus); + if (!msdata->active_cpus) { + cpumask = cpu_online_mask; + is_active = cpu == cpumask_first(cpumask); + } else { + cpumask = msdata->active_cpus; + is_active = cpumask_test_cpu(cpu, cpumask); + } /* Simple state machine */ do { /* Chill out and ensure we re-read multi_stop_state. */ - cpu_relax_yield(); + stop_machine_yield(cpumask); if (msdata->state != curstate) { curstate = msdata->state; switch (curstate) { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 38277af44f5c..576c41644e77 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -34,7 +34,6 @@ #include <linux/hash.h> #include <linux/rcupdate.h> #include <linux/kprobes.h> -#include <linux/memory.h> #include <trace/events/sched.h> @@ -2611,12 +2610,10 @@ static void ftrace_run_update_code(int command) { int ret; - mutex_lock(&text_mutex); - ret = ftrace_arch_code_modify_prepare(); FTRACE_WARN_ON(ret); if (ret) - goto out_unlock; + return; /* * By default we use stop_machine() to modify the code. @@ -2628,9 +2625,6 @@ static void ftrace_run_update_code(int command) ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); - -out_unlock: - mutex_unlock(&text_mutex); } static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, @@ -5784,7 +5778,6 @@ void ftrace_module_enable(struct module *mod) struct ftrace_page *pg; mutex_lock(&ftrace_lock); - mutex_lock(&text_mutex); if (ftrace_disabled) goto out_unlock; @@ -5846,7 +5839,6 @@ void ftrace_module_enable(struct module *mod) ftrace_arch_code_modify_post_process(); out_unlock: - mutex_unlock(&text_mutex); mutex_unlock(&ftrace_lock); process_cached_mods(mod->name); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 83e08b78dbee..c3aabb576fe5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6719,11 +6719,13 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, break; } #endif - if (!tr->allocated_snapshot) { + if (tr->allocated_snapshot) + ret = resize_buffer_duplicate_size(&tr->max_buffer, + &tr->trace_buffer, iter->cpu_file); + else ret = tracing_alloc_snapshot_instance(tr); - if (ret < 0) - break; - } + if (ret < 0) + break; local_irq_disable(); /* Now, we're going to swap */ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) @@ -7126,12 +7128,24 @@ static ssize_t tracing_err_log_write(struct file *file, return count; } +static int tracing_err_log_release(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + if (file->f_mode & FMODE_READ) + seq_release(inode, file); + + return 0; +} + static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, .llseek = seq_lseek, - .release = tracing_release_generic_tr, + .release = tracing_err_log_release, }; static int tracing_buffers_open(struct inode *inode, struct file *filp) diff --git a/kernel/up.c b/kernel/up.c index 483c9962c999..862b460ab97a 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -35,14 +35,13 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd) } EXPORT_SYMBOL(smp_call_function_single_async); -int on_each_cpu(smp_call_func_t func, void *info, int wait) +void on_each_cpu(smp_call_func_t func, void *info, int wait) { unsigned long flags; local_irq_save(flags); func(info); local_irq_restore(flags); - return 0; } EXPORT_SYMBOL(on_each_cpu); |