diff options
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r-- | kernel/events/core.c | 196 |
1 files changed, 130 insertions, 66 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 440eefc67397..a33d9a2bcbd7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -39,6 +39,8 @@ #include <linux/hw_breakpoint.h> #include <linux/mm_types.h> #include <linux/cgroup.h> +#include <linux/module.h> +#include <linux/mman.h> #include "internal.h" @@ -607,7 +609,8 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, if (!f.file) return -EBADF; - css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys); + css = css_tryget_online_from_dir(f.file->f_dentry, + &perf_event_cgrp_subsys); if (IS_ERR(css)) { ret = PTR_ERR(css); goto out; @@ -1677,6 +1680,8 @@ event_sched_in(struct perf_event *event, u64 tstamp = perf_event_time(event); int ret = 0; + lockdep_assert_held(&ctx->lock); + if (event->state <= PERF_EVENT_STATE_OFF) return 0; @@ -2970,6 +2975,22 @@ out: local_irq_restore(flags); } +void perf_event_exec(void) +{ + struct perf_event_context *ctx; + int ctxn; + + rcu_read_lock(); + for_each_task_context_nr(ctxn) { + ctx = current->perf_event_ctxp[ctxn]; + if (!ctx) + continue; + + perf_event_enable_on_exec(ctx); + } + rcu_read_unlock(); +} + /* * Cross CPU call to read the hardware event */ @@ -3244,9 +3265,13 @@ static void __free_event(struct perf_event *event) if (event->ctx) put_ctx(event->ctx); + if (event->pmu) + module_put(event->pmu->module); + call_rcu(&event->rcu_head, free_event_rcu); } -static void free_event(struct perf_event *event) + +static void _free_event(struct perf_event *event) { irq_work_sync(&event->pending); @@ -3267,42 +3292,31 @@ static void free_event(struct perf_event *event) if (is_cgroup_event(event)) perf_detach_cgroup(event); - __free_event(event); } -int perf_event_release_kernel(struct perf_event *event) +/* + * Used to free events which have a known refcount of 1, such as in error paths + * where the event isn't exposed yet and inherited events. + */ +static void free_event(struct perf_event *event) { - struct perf_event_context *ctx = event->ctx; - - WARN_ON_ONCE(ctx->parent_ctx); - /* - * There are two ways this annotation is useful: - * - * 1) there is a lock recursion from perf_event_exit_task - * see the comment there. - * - * 2) there is a lock-inversion with mmap_sem through - * perf_event_read_group(), which takes faults while - * holding ctx->mutex, however this is called after - * the last filedesc died, so there is no possibility - * to trigger the AB-BA case. - */ - mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); - perf_remove_from_context(event, true); - mutex_unlock(&ctx->mutex); - - free_event(event); + if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1, + "unexpected event refcount: %ld; ptr=%p\n", + atomic_long_read(&event->refcount), event)) { + /* leak to avoid use-after-free */ + return; + } - return 0; + _free_event(event); } -EXPORT_SYMBOL_GPL(perf_event_release_kernel); /* * Called when the last reference to the file is gone. */ static void put_event(struct perf_event *event) { + struct perf_event_context *ctx = event->ctx; struct task_struct *owner; if (!atomic_long_dec_and_test(&event->refcount)) @@ -3341,9 +3355,33 @@ static void put_event(struct perf_event *event) put_task_struct(owner); } - perf_event_release_kernel(event); + WARN_ON_ONCE(ctx->parent_ctx); + /* + * There are two ways this annotation is useful: + * + * 1) there is a lock recursion from perf_event_exit_task + * see the comment there. + * + * 2) there is a lock-inversion with mmap_sem through + * perf_event_read_group(), which takes faults while + * holding ctx->mutex, however this is called after + * the last filedesc died, so there is no possibility + * to trigger the AB-BA case. + */ + mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); + perf_remove_from_context(event, true); + mutex_unlock(&ctx->mutex); + + _free_event(event); } +int perf_event_release_kernel(struct perf_event *event) +{ + put_event(event); + return 0; +} +EXPORT_SYMBOL_GPL(perf_event_release_kernel); + static int perf_release(struct inode *inode, struct file *file) { put_event(file->private_data); @@ -5054,21 +5092,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) NULL); } -void perf_event_comm(struct task_struct *task) +void perf_event_comm(struct task_struct *task, bool exec) { struct perf_comm_event comm_event; - struct perf_event_context *ctx; - int ctxn; - - rcu_read_lock(); - for_each_task_context_nr(ctxn) { - ctx = task->perf_event_ctxp[ctxn]; - if (!ctx) - continue; - - perf_event_enable_on_exec(ctx); - } - rcu_read_unlock(); if (!atomic_read(&nr_comm_events)) return; @@ -5080,7 +5106,7 @@ void perf_event_comm(struct task_struct *task) .event_id = { .header = { .type = PERF_RECORD_COMM, - .misc = 0, + .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0, /* .size */ }, /* .pid */ @@ -5103,6 +5129,7 @@ struct perf_mmap_event { int maj, min; u64 ino; u64 ino_generation; + u32 prot, flags; struct { struct perf_event_header header; @@ -5144,6 +5171,8 @@ static void perf_event_mmap_output(struct perf_event *event, mmap_event->event_id.header.size += sizeof(mmap_event->min); mmap_event->event_id.header.size += sizeof(mmap_event->ino); mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation); + mmap_event->event_id.header.size += sizeof(mmap_event->prot); + mmap_event->event_id.header.size += sizeof(mmap_event->flags); } perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); @@ -5162,6 +5191,8 @@ static void perf_event_mmap_output(struct perf_event *event, perf_output_put(&handle, mmap_event->min); perf_output_put(&handle, mmap_event->ino); perf_output_put(&handle, mmap_event->ino_generation); + perf_output_put(&handle, mmap_event->prot); + perf_output_put(&handle, mmap_event->flags); } __output_copy(&handle, mmap_event->file_name, @@ -5180,6 +5211,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) struct file *file = vma->vm_file; int maj = 0, min = 0; u64 ino = 0, gen = 0; + u32 prot = 0, flags = 0; unsigned int size; char tmp[16]; char *buf = NULL; @@ -5210,6 +5242,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) gen = inode->i_generation; maj = MAJOR(dev); min = MINOR(dev); + + if (vma->vm_flags & VM_READ) + prot |= PROT_READ; + if (vma->vm_flags & VM_WRITE) + prot |= PROT_WRITE; + if (vma->vm_flags & VM_EXEC) + prot |= PROT_EXEC; + + if (vma->vm_flags & VM_MAYSHARE) + flags = MAP_SHARED; + else + flags = MAP_PRIVATE; + + if (vma->vm_flags & VM_DENYWRITE) + flags |= MAP_DENYWRITE; + if (vma->vm_flags & VM_MAYEXEC) + flags |= MAP_EXECUTABLE; + if (vma->vm_flags & VM_LOCKED) + flags |= MAP_LOCKED; + if (vma->vm_flags & VM_HUGETLB) + flags |= MAP_HUGETLB; + goto got_name; } else { name = (char *)arch_vma_name(vma); @@ -5250,6 +5304,8 @@ got_name: mmap_event->min = min; mmap_event->ino = ino; mmap_event->ino_generation = gen; + mmap_event->prot = prot; + mmap_event->flags = flags; if (!(vma->vm_flags & VM_EXEC)) mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA; @@ -5290,6 +5346,8 @@ void perf_event_mmap(struct vm_area_struct *vma) /* .min (attr_mmap2 only) */ /* .ino (attr_mmap2 only) */ /* .ino_generation (attr_mmap2 only) */ + /* .prot (attr_mmap2 only) */ + /* .flags (attr_mmap2 only) */ }; perf_event_mmap_event(&mmap_event); @@ -6578,6 +6636,7 @@ free_pdc: free_percpu(pmu->pmu_disable_count); goto unlock; } +EXPORT_SYMBOL_GPL(perf_pmu_register); void perf_pmu_unregister(struct pmu *pmu) { @@ -6599,6 +6658,7 @@ void perf_pmu_unregister(struct pmu *pmu) put_device(pmu->dev); free_pmu_context(pmu); } +EXPORT_SYMBOL_GPL(perf_pmu_unregister); struct pmu *perf_init_event(struct perf_event *event) { @@ -6612,6 +6672,10 @@ struct pmu *perf_init_event(struct perf_event *event) pmu = idr_find(&pmu_idr, event->attr.type); rcu_read_unlock(); if (pmu) { + if (!try_module_get(pmu->module)) { + pmu = ERR_PTR(-ENODEV); + goto unlock; + } event->pmu = pmu; ret = pmu->event_init(event); if (ret) @@ -6620,6 +6684,10 @@ struct pmu *perf_init_event(struct perf_event *event) } list_for_each_entry_rcu(pmu, &pmus, entry) { + if (!try_module_get(pmu->module)) { + pmu = ERR_PTR(-ENODEV); + goto unlock; + } event->pmu = pmu; ret = pmu->event_init(event); if (!ret) @@ -6798,6 +6866,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, err_pmu: if (event->destroy) event->destroy(event); + module_put(pmu->module); err_ns: if (event->ns) put_pid_ns(event->ns); @@ -6861,10 +6930,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (ret) return -EFAULT; - /* disabled for now */ - if (attr->mmap2) - return -EINVAL; - if (attr->__reserved_1) return -EINVAL; @@ -7067,20 +7132,33 @@ SYSCALL_DEFINE5(perf_event_open, } } + if (task && group_leader && + group_leader->attr.inherit != attr.inherit) { + err = -EINVAL; + goto err_task; + } + get_online_cpus(); event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL, NULL); if (IS_ERR(event)) { err = PTR_ERR(event); - goto err_task; + goto err_cpus; } if (flags & PERF_FLAG_PID_CGROUP) { err = perf_cgroup_connect(pid, event, &attr, group_leader); if (err) { __free_event(event); - goto err_task; + goto err_cpus; + } + } + + if (is_sampling_event(event)) { + if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { + err = -ENOTSUPP; + goto err_alloc; } } @@ -7242,8 +7320,9 @@ err_context: put_ctx(ctx); err_alloc: free_event(event); -err_task: +err_cpus: put_online_cpus(); +err_task: if (task) put_task_struct(task); err_group_fd: @@ -7379,7 +7458,7 @@ __perf_event_exit_task(struct perf_event *child_event, struct perf_event_context *child_ctx, struct task_struct *child) { - perf_remove_from_context(child_event, !!child_event->parent); + perf_remove_from_context(child_event, true); /* * It can happen that the parent exits first, and has events @@ -7394,7 +7473,7 @@ __perf_event_exit_task(struct perf_event *child_event, static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { - struct perf_event *child_event, *tmp; + struct perf_event *child_event, *next; struct perf_event_context *child_ctx; unsigned long flags; @@ -7448,24 +7527,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) */ mutex_lock(&child_ctx->mutex); -again: - list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, - group_entry) - __perf_event_exit_task(child_event, child_ctx, child); - - list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, - group_entry) + list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) __perf_event_exit_task(child_event, child_ctx, child); - /* - * If the last event was a group event, it will have appended all - * its siblings to the list, but we obtained 'tmp' before that which - * will still point to the list head terminating the iteration. - */ - if (!list_empty(&child_ctx->pinned_groups) || - !list_empty(&child_ctx->flexible_groups)) - goto again; - mutex_unlock(&child_ctx->mutex); put_ctx(child_ctx); |