summaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c49
-rw-r--r--kernel/events/uprobes.c278
2 files changed, 310 insertions, 17 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5a97f34bc14c..8c490130c4fb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8376,30 +8376,39 @@ static struct pmu perf_tracepoint = {
*
* PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
* if not set, create kprobe/uprobe
+ *
+ * The following values specify a reference counter (or semaphore in the
+ * terminology of tools like dtrace, systemtap, etc.) Userspace Statically
+ * Defined Tracepoints (USDT). Currently, we use 40 bit for the offset.
+ *
+ * PERF_UPROBE_REF_CTR_OFFSET_BITS # of bits in config as th offset
+ * PERF_UPROBE_REF_CTR_OFFSET_SHIFT # of bits to shift left
*/
enum perf_probe_config {
PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0, /* [k,u]retprobe */
+ PERF_UPROBE_REF_CTR_OFFSET_BITS = 32,
+ PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
};
PMU_FORMAT_ATTR(retprobe, "config:0");
+#endif
-static struct attribute *probe_attrs[] = {
+#ifdef CONFIG_KPROBE_EVENTS
+static struct attribute *kprobe_attrs[] = {
&format_attr_retprobe.attr,
NULL,
};
-static struct attribute_group probe_format_group = {
+static struct attribute_group kprobe_format_group = {
.name = "format",
- .attrs = probe_attrs,
+ .attrs = kprobe_attrs,
};
-static const struct attribute_group *probe_attr_groups[] = {
- &probe_format_group,
+static const struct attribute_group *kprobe_attr_groups[] = {
+ &kprobe_format_group,
NULL,
};
-#endif
-#ifdef CONFIG_KPROBE_EVENTS
static int perf_kprobe_event_init(struct perf_event *event);
static struct pmu perf_kprobe = {
.task_ctx_nr = perf_sw_context,
@@ -8409,7 +8418,7 @@ static struct pmu perf_kprobe = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
- .attr_groups = probe_attr_groups,
+ .attr_groups = kprobe_attr_groups,
};
static int perf_kprobe_event_init(struct perf_event *event)
@@ -8441,6 +8450,24 @@ static int perf_kprobe_event_init(struct perf_event *event)
#endif /* CONFIG_KPROBE_EVENTS */
#ifdef CONFIG_UPROBE_EVENTS
+PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63");
+
+static struct attribute *uprobe_attrs[] = {
+ &format_attr_retprobe.attr,
+ &format_attr_ref_ctr_offset.attr,
+ NULL,
+};
+
+static struct attribute_group uprobe_format_group = {
+ .name = "format",
+ .attrs = uprobe_attrs,
+};
+
+static const struct attribute_group *uprobe_attr_groups[] = {
+ &uprobe_format_group,
+ NULL,
+};
+
static int perf_uprobe_event_init(struct perf_event *event);
static struct pmu perf_uprobe = {
.task_ctx_nr = perf_sw_context,
@@ -8450,12 +8477,13 @@ static struct pmu perf_uprobe = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
- .attr_groups = probe_attr_groups,
+ .attr_groups = uprobe_attr_groups,
};
static int perf_uprobe_event_init(struct perf_event *event)
{
int err;
+ unsigned long ref_ctr_offset;
bool is_retprobe;
if (event->attr.type != perf_uprobe.type)
@@ -8471,7 +8499,8 @@ static int perf_uprobe_event_init(struct perf_event *event)
return -EOPNOTSUPP;
is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
- err = perf_uprobe_init(event, is_retprobe);
+ ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
+ err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
if (err)
return err;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2bf792d22087..96d4bee83489 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -73,6 +73,7 @@ struct uprobe {
struct uprobe_consumer *consumers;
struct inode *inode; /* Also hold a ref to inode */
loff_t offset;
+ loff_t ref_ctr_offset;
unsigned long flags;
/*
@@ -88,6 +89,15 @@ struct uprobe {
struct arch_uprobe arch;
};
+struct delayed_uprobe {
+ struct list_head list;
+ struct uprobe *uprobe;
+ struct mm_struct *mm;
+};
+
+static DEFINE_MUTEX(delayed_uprobe_lock);
+static LIST_HEAD(delayed_uprobe_list);
+
/*
* Execute out of line area: anonymous executable mapping installed
* by the probed task to execute the copy of the original instruction
@@ -282,6 +292,166 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
return 1;
}
+static struct delayed_uprobe *
+delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
+{
+ struct delayed_uprobe *du;
+
+ list_for_each_entry(du, &delayed_uprobe_list, list)
+ if (du->uprobe == uprobe && du->mm == mm)
+ return du;
+ return NULL;
+}
+
+static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
+{
+ struct delayed_uprobe *du;
+
+ if (delayed_uprobe_check(uprobe, mm))
+ return 0;
+
+ du = kzalloc(sizeof(*du), GFP_KERNEL);
+ if (!du)
+ return -ENOMEM;
+
+ du->uprobe = uprobe;
+ du->mm = mm;
+ list_add(&du->list, &delayed_uprobe_list);
+ return 0;
+}
+
+static void delayed_uprobe_delete(struct delayed_uprobe *du)
+{
+ if (WARN_ON(!du))
+ return;
+ list_del(&du->list);
+ kfree(du);
+}
+
+static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
+{
+ struct list_head *pos, *q;
+ struct delayed_uprobe *du;
+
+ if (!uprobe && !mm)
+ return;
+
+ list_for_each_safe(pos, q, &delayed_uprobe_list) {
+ du = list_entry(pos, struct delayed_uprobe, list);
+
+ if (uprobe && du->uprobe != uprobe)
+ continue;
+ if (mm && du->mm != mm)
+ continue;
+
+ delayed_uprobe_delete(du);
+ }
+}
+
+static bool valid_ref_ctr_vma(struct uprobe *uprobe,
+ struct vm_area_struct *vma)
+{
+ unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset);
+
+ return uprobe->ref_ctr_offset &&
+ vma->vm_file &&
+ file_inode(vma->vm_file) == uprobe->inode &&
+ (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+ vma->vm_start <= vaddr &&
+ vma->vm_end > vaddr;
+}
+
+static struct vm_area_struct *
+find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
+{
+ struct vm_area_struct *tmp;
+
+ for (tmp = mm->mmap; tmp; tmp = tmp->vm_next)
+ if (valid_ref_ctr_vma(uprobe, tmp))
+ return tmp;
+
+ return NULL;
+}
+
+static int
+__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
+{
+ void *kaddr;
+ struct page *page;
+ struct vm_area_struct *vma;
+ int ret;
+ short *ptr;
+
+ if (!vaddr || !d)
+ return -EINVAL;
+
+ ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+ FOLL_WRITE, &page, &vma, NULL);
+ if (unlikely(ret <= 0)) {
+ /*
+ * We are asking for 1 page. If get_user_pages_remote() fails,
+ * it may return 0, in that case we have to return error.
+ */
+ return ret == 0 ? -EBUSY : ret;
+ }
+
+ kaddr = kmap_atomic(page);
+ ptr = kaddr + (vaddr & ~PAGE_MASK);
+
+ if (unlikely(*ptr + d < 0)) {
+ pr_warn("ref_ctr going negative. vaddr: 0x%lx, "
+ "curr val: %d, delta: %d\n", vaddr, *ptr, d);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *ptr += d;
+ ret = 0;
+out:
+ kunmap_atomic(kaddr);
+ put_page(page);
+ return ret;
+}
+
+static void update_ref_ctr_warn(struct uprobe *uprobe,
+ struct mm_struct *mm, short d)
+{
+ pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
+ "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
+ d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
+ (unsigned long long) uprobe->offset,
+ (unsigned long long) uprobe->ref_ctr_offset, mm);
+}
+
+static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
+ short d)
+{
+ struct vm_area_struct *rc_vma;
+ unsigned long rc_vaddr;
+ int ret = 0;
+
+ rc_vma = find_ref_ctr_vma(uprobe, mm);
+
+ if (rc_vma) {
+ rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset);
+ ret = __update_ref_ctr(mm, rc_vaddr, d);
+ if (ret)
+ update_ref_ctr_warn(uprobe, mm, d);
+
+ if (d > 0)
+ return ret;
+ }
+
+ mutex_lock(&delayed_uprobe_lock);
+ if (d > 0)
+ ret = delayed_uprobe_add(uprobe, mm);
+ else
+ delayed_uprobe_remove(uprobe, mm);
+ mutex_unlock(&delayed_uprobe_lock);
+
+ return ret;
+}
+
/*
* NOTE:
* Expect the breakpoint instruction to be the smallest size instruction for
@@ -302,9 +472,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long vaddr, uprobe_opcode_t opcode)
{
+ struct uprobe *uprobe;
struct page *old_page, *new_page;
struct vm_area_struct *vma;
- int ret;
+ int ret, is_register, ref_ctr_updated = 0;
+
+ is_register = is_swbp_insn(&opcode);
+ uprobe = container_of(auprobe, struct uprobe, arch);
retry:
/* Read the page with vaddr into memory */
@@ -317,6 +491,15 @@ retry:
if (ret <= 0)
goto put_old;
+ /* We are going to replace instruction, update ref_ctr. */
+ if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
+ ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
+ if (ret)
+ goto put_old;
+
+ ref_ctr_updated = 1;
+ }
+
ret = anon_vma_prepare(vma);
if (ret)
goto put_old;
@@ -337,6 +520,11 @@ put_old:
if (unlikely(ret == -EAGAIN))
goto retry;
+
+ /* Revert back reference counter if instruction update failed. */
+ if (ret && is_register && ref_ctr_updated)
+ update_ref_ctr(uprobe, mm, -1);
+
return ret;
}
@@ -378,8 +566,15 @@ static struct uprobe *get_uprobe(struct uprobe *uprobe)
static void put_uprobe(struct uprobe *uprobe)
{
- if (atomic_dec_and_test(&uprobe->ref))
+ if (atomic_dec_and_test(&uprobe->ref)) {
+ /*
+ * If application munmap(exec_vma) before uprobe_unregister()
+ * gets called, we don't get a chance to remove uprobe from
+ * delayed_uprobe_list from remove_breakpoint(). Do it here.
+ */
+ delayed_uprobe_remove(uprobe, NULL);
kfree(uprobe);
+ }
}
static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -484,7 +679,18 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
return u;
}
-static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
+static void
+ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe)
+{
+ pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx "
+ "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n",
+ uprobe->inode->i_ino, (unsigned long long) uprobe->offset,
+ (unsigned long long) cur_uprobe->ref_ctr_offset,
+ (unsigned long long) uprobe->ref_ctr_offset);
+}
+
+static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
+ loff_t ref_ctr_offset)
{
struct uprobe *uprobe, *cur_uprobe;
@@ -494,6 +700,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
uprobe->inode = inode;
uprobe->offset = offset;
+ uprobe->ref_ctr_offset = ref_ctr_offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
@@ -501,6 +708,12 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
cur_uprobe = insert_uprobe(uprobe);
/* a uprobe exists for this inode:offset combination */
if (cur_uprobe) {
+ if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) {
+ ref_ctr_mismatch_warn(cur_uprobe, uprobe);
+ put_uprobe(cur_uprobe);
+ kfree(uprobe);
+ return ERR_PTR(-EINVAL);
+ }
kfree(uprobe);
uprobe = cur_uprobe;
}
@@ -895,7 +1108,7 @@ EXPORT_SYMBOL_GPL(uprobe_unregister);
* else return 0 (success)
*/
static int __uprobe_register(struct inode *inode, loff_t offset,
- struct uprobe_consumer *uc)
+ loff_t ref_ctr_offset, struct uprobe_consumer *uc)
{
struct uprobe *uprobe;
int ret;
@@ -912,9 +1125,12 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
return -EINVAL;
retry:
- uprobe = alloc_uprobe(inode, offset);
+ uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
if (!uprobe)
return -ENOMEM;
+ if (IS_ERR(uprobe))
+ return PTR_ERR(uprobe);
+
/*
* We can race with uprobe_unregister()->delete_uprobe().
* Check uprobe_is_active() and retry if it is false.
@@ -938,10 +1154,17 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
int uprobe_register(struct inode *inode, loff_t offset,
struct uprobe_consumer *uc)
{
- return __uprobe_register(inode, offset, uc);
+ return __uprobe_register(inode, offset, 0, uc);
}
EXPORT_SYMBOL_GPL(uprobe_register);
+int uprobe_register_refctr(struct inode *inode, loff_t offset,
+ loff_t ref_ctr_offset, struct uprobe_consumer *uc)
+{
+ return __uprobe_register(inode, offset, ref_ctr_offset, uc);
+}
+EXPORT_SYMBOL_GPL(uprobe_register_refctr);
+
/*
* uprobe_apply - unregister an already registered probe.
* @inode: the file in which the probe has to be removed.
@@ -1060,6 +1283,35 @@ static void build_probe_list(struct inode *inode,
spin_unlock(&uprobes_treelock);
}
+/* @vma contains reference counter, not the probed instruction. */
+static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
+{
+ struct list_head *pos, *q;
+ struct delayed_uprobe *du;
+ unsigned long vaddr;
+ int ret = 0, err = 0;
+
+ mutex_lock(&delayed_uprobe_lock);
+ list_for_each_safe(pos, q, &delayed_uprobe_list) {
+ du = list_entry(pos, struct delayed_uprobe, list);
+
+ if (du->mm != vma->vm_mm ||
+ !valid_ref_ctr_vma(du->uprobe, vma))
+ continue;
+
+ vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset);
+ ret = __update_ref_ctr(vma->vm_mm, vaddr, 1);
+ if (ret) {
+ update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1);
+ if (!err)
+ err = ret;
+ }
+ delayed_uprobe_delete(du);
+ }
+ mutex_unlock(&delayed_uprobe_lock);
+ return err;
+}
+
/*
* Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
*
@@ -1072,7 +1324,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
struct uprobe *uprobe, *u;
struct inode *inode;
- if (no_uprobe_events() || !valid_vma(vma, true))
+ if (no_uprobe_events())
+ return 0;
+
+ if (vma->vm_file &&
+ (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+ test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
+ delayed_ref_ctr_inc(vma);
+
+ if (!valid_vma(vma, true))
return 0;
inode = file_inode(vma->vm_file);
@@ -1246,6 +1506,10 @@ void uprobe_clear_state(struct mm_struct *mm)
{
struct xol_area *area = mm->uprobes_state.xol_area;
+ mutex_lock(&delayed_uprobe_lock);
+ delayed_uprobe_remove(NULL, mm);
+ mutex_unlock(&delayed_uprobe_lock);
+
if (!area)
return;
OpenPOWER on IntegriCloud