From b68e0749100e1b901bf11330f149b321c082178e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:31 +0200 Subject: uprobes: Change the callsite of uprobe_copy_process() Preparation for the next patches. Move the callsite of uprobe_copy_process() in copy_process() down to the succesfull return. We do not care if copy_process() fails, uprobe_free_utask() won't be called in this case so the wrong ->utask != NULL doesn't matter. OTOH, with this change we know that copy_process() can't fail when uprobe_copy_process() is called, the new task should either return to user-mode or call do_exit(). This way uprobe_copy_process() can: 1. setup p->utask != NULL if necessary 2. setup uprobes_state.xol_area 3. use task_work_add(p) Also, move the definition of uprobe_copy_process() down so that it can see get_utask(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bdca70e..db7a1dcb3dd6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1344,14 +1344,6 @@ void uprobe_free_utask(struct task_struct *t) t->utask = NULL; } -/* - * Called in context of a new clone/fork from copy_process. - */ -void uprobe_copy_process(struct task_struct *t) -{ - t->utask = NULL; -} - /* * Allocate a uprobe_task object for the task if if necessary. * Called when the thread hits a breakpoint. @@ -1367,6 +1359,14 @@ static struct uprobe_task *get_utask(void) return current->utask; } +/* + * Called in context of a new clone/fork from copy_process. + */ +void uprobe_copy_process(struct task_struct *t) +{ + t->utask = NULL; +} + /* * Current area->vaddr notion assume the trampoline address is always * equal area->vaddr. -- cgit v1.2.1 From 6441ec8b7c108b72789d120562b9f1d976e4aaaf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:35 +0200 Subject: uprobes: Introduce __create_xol_area() No functional changes, preparation. Extract the code which actually allocates/installs the new area into the new helper, __create_xol_area(). While at it remove the unnecessary "ret = ENOMEM" and "ret = 0" in xol_add_vma(), they both have no effect. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index db7a1dcb3dd6..ad17d813e73e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1096,16 +1096,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon } /* Slot allocation for XOL */ -static int xol_add_vma(struct xol_area *area) +static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) { - struct mm_struct *mm = current->mm; int ret = -EALREADY; down_write(&mm->mmap_sem); if (mm->uprobes_state.xol_area) goto fail; - ret = -ENOMEM; /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); if (area->vaddr & ~PAGE_MASK) { @@ -1120,28 +1118,17 @@ static int xol_add_vma(struct xol_area *area) smp_wmb(); /* pairs with get_xol_area() */ mm->uprobes_state.xol_area = area; - ret = 0; fail: up_write(&mm->mmap_sem); return ret; } -/* - * get_xol_area - Allocate process's xol_area if necessary. - * This area will be used for storing instructions for execution out of line. - * - * Returns the allocated area or NULL. - */ -static struct xol_area *get_xol_area(void) +static struct xol_area *__create_xol_area(void) { struct mm_struct *mm = current->mm; - struct xol_area *area; uprobe_opcode_t insn = UPROBE_SWBP_INSN; - - area = mm->uprobes_state.xol_area; - if (area) - goto ret; + struct xol_area *area; area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) @@ -1155,13 +1142,13 @@ static struct xol_area *get_xol_area(void) if (!area->page) goto free_bitmap; - /* allocate first slot of task's xol_area for the return probes */ + init_waitqueue_head(&area->wq); + /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); - copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); atomic_set(&area->slot_count, 1); - init_waitqueue_head(&area->wq); + copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); - if (!xol_add_vma(area)) + if (!xol_add_vma(mm, area)) return area; __free_page(area->page); @@ -1170,9 +1157,25 @@ static struct xol_area *get_xol_area(void) free_area: kfree(area); out: + return NULL; +} + +/* + * get_xol_area - Allocate process's xol_area if necessary. + * This area will be used for storing instructions for execution out of line. + * + * Returns the allocated area or NULL. + */ +static struct xol_area *get_xol_area(void) +{ + struct mm_struct *mm = current->mm; + struct xol_area *area; + + if (!mm->uprobes_state.xol_area) + __create_xol_area(); + area = mm->uprobes_state.xol_area; - ret: - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ return area; } -- cgit v1.2.1 From af0d95af79773f7637107cd3871aaabcb425f15a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:38 +0200 Subject: uprobes: Teach __create_xol_area() to accept the predefined vaddr Currently xol_add_vma() uses get_unmapped_area() for area->vaddr, but the next patches need to use the fixed address. So this patch adds the new "vaddr" argument to __create_xol_area() which should be used as area->vaddr if it is nonzero. xol_add_vma() doesn't bother to verify that the predefined addr is not used, insert_vm_struct() should fail if find_vma_links() detects the overlap with the existing vma. Also, __create_xol_area() doesn't need __GFP_ZERO to allocate area. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad17d813e73e..7d12a45842a7 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1104,11 +1104,14 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) if (mm->uprobes_state.xol_area) goto fail; - /* Try to map as high as possible, this is only a hint. */ - area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); - if (area->vaddr & ~PAGE_MASK) { - ret = area->vaddr; - goto fail; + if (!area->vaddr) { + /* Try to map as high as possible, this is only a hint. */ + area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, + PAGE_SIZE, 0, 0); + if (area->vaddr & ~PAGE_MASK) { + ret = area->vaddr; + goto fail; + } } ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, @@ -1124,13 +1127,13 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) return ret; } -static struct xol_area *__create_xol_area(void) +static struct xol_area *__create_xol_area(unsigned long vaddr) { struct mm_struct *mm = current->mm; uprobe_opcode_t insn = UPROBE_SWBP_INSN; struct xol_area *area; - area = kzalloc(sizeof(*area), GFP_KERNEL); + area = kmalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; @@ -1142,6 +1145,7 @@ static struct xol_area *__create_xol_area(void) if (!area->page) goto free_bitmap; + area->vaddr = vaddr; init_waitqueue_head(&area->wq); /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); @@ -1172,7 +1176,7 @@ static struct xol_area *get_xol_area(void) struct xol_area *area; if (!mm->uprobes_state.xol_area) - __create_xol_area(); + __create_xol_area(0); area = mm->uprobes_state.xol_area; smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ -- cgit v1.2.1 From 248d3a7b2f100078c5f6878351177859380582e9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:41 +0200 Subject: uprobes: Change uprobe_copy_process() to dup return_instances uprobe_copy_process() assumes that the new child doesn't need ->utask, it should be allocated by demand. But this is not true if the forking task has the pending ret- probes, the child should report them as well and thus it needs the copy of parent's ->return_instances chain. Otherwise the child crashes when it returns from the probed function. Alternatively we could cleanup the child's stack, but this needs per-arch changes and this is not what we want. At least systemtap expects a .return in the child too. Note: this change alone doesn't fix the problem, see the next change. Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7d12a45842a7..1c6cda68a555 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1366,12 +1366,55 @@ static struct uprobe_task *get_utask(void) return current->utask; } +static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) +{ + struct uprobe_task *n_utask; + struct return_instance **p, *o, *n; + + n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + if (!n_utask) + return -ENOMEM; + t->utask = n_utask; + + p = &n_utask->return_instances; + for (o = o_utask->return_instances; o; o = o->next) { + n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); + if (!n) + return -ENOMEM; + + *n = *o; + atomic_inc(&n->uprobe->ref); + n->next = NULL; + + *p = n; + p = &n->next; + n_utask->depth++; + } + + return 0; +} + +static void uprobe_warn(struct task_struct *t, const char *msg) +{ + pr_warn("uprobe: %s:%d failed to %s\n", + current->comm, current->pid, msg); +} + /* * Called in context of a new clone/fork from copy_process. */ void uprobe_copy_process(struct task_struct *t) { + struct uprobe_task *utask = current->utask; + struct mm_struct *mm = current->mm; + t->utask = NULL; + + if (mm == t->mm || !utask || !utask->return_instances) + return; + + if (dup_utask(t, utask)) + return uprobe_warn(t, "dup ret instances"); } /* -- cgit v1.2.1 From aa59c53fd4599c91ccf9629af0c2777b89929076 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:44 +0200 Subject: uprobes: Change uprobe_copy_process() to dup xol_area This finally fixes the serious bug in uretprobes: a forked child crashes if the parent called fork() with the pending ret probe. Trivial test-case: # perf probe -x /lib/libc.so.6 __fork%return # perf record -e probe_libc:__fork perl -le 'fork || print "OK"' (the child doesn't print "OK", it is killed by SIGSEGV) If the child returns from the probed function it actually returns to trampoline_vaddr, because it got the copy of parent's stack mangled by prepare_uretprobe() when the parent entered this func. It crashes because a) this address is not mapped and b) until the previous change it doesn't have the proper->return_instances info. This means that uprobe_copy_process() has to create xol_area which has the trampoline slot, and its vaddr should be equal to parent's xol_area->vaddr. Unfortunately, uprobe_copy_process() can not simply do __create_xol_area(child, xol_area->vaddr). This could actually work but perf_event_mmap() doesn't expect the usage of foreign ->mm. So we offload this to task_work_run(), and pass the argument via not yet used utask->vaddr. We know that this vaddr is fine for install_special_mapping(), the necessary hole was recently "created" by dup_mmap() which skips the parent's VM_DONTCOPY area, and nobody else could use the new mm. Unfortunately, this also means that we can not handle the errors properly, we obviously can not abort the already completed fork(). So we simply print the warning if GFP_KERNEL allocation (the only possible reason) fails. Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1c6cda68a555..9f282e14925d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -35,6 +35,7 @@ #include /* notifier mechanism */ #include "../../mm/internal.h" /* munlock_vma_page */ #include +#include #include @@ -1400,6 +1401,17 @@ static void uprobe_warn(struct task_struct *t, const char *msg) current->comm, current->pid, msg); } +static void dup_xol_work(struct callback_head *work) +{ + kfree(work); + + if (current->flags & PF_EXITING) + return; + + if (!__create_xol_area(current->utask->vaddr)) + uprobe_warn(current, "dup xol area"); +} + /* * Called in context of a new clone/fork from copy_process. */ @@ -1407,6 +1419,8 @@ void uprobe_copy_process(struct task_struct *t) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; + struct callback_head *work; + struct xol_area *area; t->utask = NULL; @@ -1415,6 +1429,20 @@ void uprobe_copy_process(struct task_struct *t) if (dup_utask(t, utask)) return uprobe_warn(t, "dup ret instances"); + + /* The task can fork() after dup_xol_work() fails */ + area = mm->uprobes_state.xol_area; + if (!area) + return uprobe_warn(t, "dup xol area"); + + /* TODO: move it into the union in uprobe_task */ + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return uprobe_warn(t, "dup xol area"); + + utask->vaddr = area->vaddr; + init_task_work(work, dup_xol_work); + task_work_add(t, work, true); } /* -- cgit v1.2.1 From 3ab679661721b1ec2aaad99a801870ed59ab1110 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 16 Oct 2013 19:39:37 +0200 Subject: uprobes: Teach uprobe_copy_process() to handle CLONE_VFORK uprobe_copy_process() does nothing if the child shares ->mm with the forking process, but there is a special case: CLONE_VFORK. In this case it would be more correct to do dup_utask() but avoid dup_xol(). This is not that important, the child should not unwind its stack too much, this can corrupt the parent's stack, but at least we need this to allow to ret-probe __vfork() itself. Note: in theory, it would be better to check task_pt_regs(p)->sp instead of CLONE_VFORK, we need to dup_utask() if and only if the child can return from the function called by the parent. But this needs the arch-dependant helper, and I think that nobody actually does clone(same_stack, CLONE_VM). Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 9f282e14925d..ae9e1d2ef256 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1415,7 +1415,7 @@ static void dup_xol_work(struct callback_head *work) /* * Called in context of a new clone/fork from copy_process. */ -void uprobe_copy_process(struct task_struct *t) +void uprobe_copy_process(struct task_struct *t, unsigned long flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; @@ -1424,7 +1424,10 @@ void uprobe_copy_process(struct task_struct *t) t->utask = NULL; - if (mm == t->mm || !utask || !utask->return_instances) + if (!utask || !utask->return_instances) + return; + + if (mm == t->mm && !(flags & CLONE_VFORK)) return; if (dup_utask(t, utask)) @@ -1435,6 +1438,9 @@ void uprobe_copy_process(struct task_struct *t) if (!area) return uprobe_warn(t, "dup xol area"); + if (mm == t->mm) + return; + /* TODO: move it into the union in uprobe_task */ work = kmalloc(sizeof(*work), GFP_KERNEL); if (!work) -- cgit v1.2.1 From 736e89d9f782a7dd9a38ecda13b2db916fa72f33 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 31 Oct 2013 19:28:22 +0100 Subject: uprobes: Kill module_init() and module_exit() Turn module_init() into __initcall() and kill module_exit(). This code can't be compiled as a module so these module_*() calls only add the confusion, especially if arch-dependant code needs its own initialization hooks. Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ae9e1d2ef256..0012c8ebb098 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1941,9 +1941,4 @@ static int __init init_uprobes(void) return register_die_notifier(&uprobe_exception_nb); } -module_init(init_uprobes); - -static void __exit exit_uprobes(void) -{ -} -module_exit(exit_uprobes); +__initcall(init_uprobes); -- cgit v1.2.1 From 8a8de66c4f6ebd0f6d3da026ec24339aa5d1db12 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 4 Nov 2013 20:27:13 +0100 Subject: uprobes: Introduce arch_uprobe->ixol Currently xol_get_insn_slot() assumes that we should simply copy arch_uprobe->insn[] which is (ignoring arch_uprobe_analyze_insn) just the copy of the original insn. This is not true for arm which needs to create another insn to execute it out-of-line. So this patch simply adds the new member, ->ixol into the union. This doesn't make any difference for x86 and powerpc, but arm can divorce insn/ixol and initialize the correct xol insn in arch_uprobe_analyze_insn(). Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 0012c8ebb098..fbcff61b5099 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1264,7 +1264,8 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) return 0; /* Initialize the slot */ - copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES); + copy_to_page(area->page, xol_vaddr, + uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); /* * We probably need flush_icache_user_range() but it needs vma. * This should work on supported architectures too. -- cgit v1.2.1 From f72d41fa902fb19a9b63028202a400b0ce497491 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 5 Nov 2013 19:50:39 +0100 Subject: uprobes: Export write_opcode() as uprobe_write_opcode() set_swbp() and set_orig_insn() are __weak, but this is pointless because write_opcode() is static. Export write_opcode() as uprobe_write_opcode() for the upcoming arm port, this way it can actually override set_swbp() and use __opcode_to_mem_arm(bpinsn) instead if UPROBE_SWBP_INSN. Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index fbcff61b5099..0ac346ae5edb 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -245,12 +245,12 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction * supported by that architecture then we need to modify is_trap_at_addr and - * write_opcode accordingly. This would never be a problem for archs that - * have fixed length instructions. + * uprobe_write_opcode accordingly. This would never be a problem for archs + * that have fixed length instructions. */ /* - * write_opcode - write the opcode at a given virtual address. + * uprobe_write_opcode - write the opcode at a given virtual address. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. @@ -261,7 +261,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * For mm @mm, write the opcode at @vaddr. * Return 0 (success) or a negative errno. */ -static int write_opcode(struct mm_struct *mm, unsigned long vaddr, +int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct page *old_page, *new_page; @@ -315,7 +315,7 @@ put_old: */ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return write_opcode(mm, vaddr, UPROBE_SWBP_INSN); + return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); } /** @@ -330,7 +330,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned int __weak set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); + return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); } static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -577,7 +577,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, if (ret) goto out; - /* write_opcode() assumes we don't cross page boundary */ + /* uprobe_write_opcode() assumes we don't cross page boundary */ BUG_ON((uprobe->offset & ~PAGE_MASK) + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); -- cgit v1.2.1 From 70d7f98722a7a1df1a55d6a92d0ce959c7aba9fd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 8 Nov 2013 16:35:55 +0100 Subject: uprobes: Fix the wrong usage of current->utask in uprobe_copy_process() Commit aa59c53fd459 "uprobes: Change uprobe_copy_process() to dup xol_area" has a stupid typo, we need to setup t->utask->vaddr but the code wrongly uses current->utask. Even with this bug dup_xol_work() works "in practice", but only because get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE) likely returns the same address every time. Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 0ac346ae5edb..5e5695038d2d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1447,7 +1447,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags) if (!work) return uprobe_warn(t, "dup xol area"); - utask->vaddr = area->vaddr; + t->utask->vaddr = area->vaddr; init_task_work(work, dup_xol_work); task_work_add(t, work, true); } -- cgit v1.2.1 From 2ded0980a6e4ae96bdd84bda66c7240967d86f3c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 7 Nov 2013 19:41:57 +0100 Subject: uprobes: Fix the memory out of bound overwrite in copy_insn() 1. copy_insn() doesn't look very nice, all calculations are confusing and it is not immediately clear why do we read the 2nd page first. 2. The usage of inode->i_size is wrong on 32-bit machines. 3. "Instruction at end of binary" logic is simply wrong, it doesn't handle the case when uprobe->offset > inode->i_size. In this case "bytes" overflows, and __copy_insn() writes to the memory outside of uprobe->arch.insn. Yes, uprobe_register() checks i_size_read(), but this file can be truncated after that. All i_size checks are racy, we do this only to catch the obvious mistakes. Change copy_insn() to call __copy_insn() in a loop, simplify and fix the bytes/nbytes calculations. Note: we do not care if we read extra bytes after inode->i_size if we got the valid page. This is fine because the task gets the same page after page-fault, and arch_uprobe_analyze_insn() can't know how many bytes were actually read anyway. Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) (limited to 'kernel/events/uprobes.c') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 5e5695038d2d..24b7d6ca871b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -504,9 +504,8 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) return ret; } -static int -__copy_insn(struct address_space *mapping, struct file *filp, char *insn, - unsigned long nbytes, loff_t offset) +static int __copy_insn(struct address_space *mapping, struct file *filp, + void *insn, int nbytes, loff_t offset) { struct page *page; @@ -528,28 +527,28 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, static int copy_insn(struct uprobe *uprobe, struct file *filp) { - struct address_space *mapping; - unsigned long nbytes; - int bytes; - - nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); - mapping = uprobe->inode->i_mapping; + struct address_space *mapping = uprobe->inode->i_mapping; + loff_t offs = uprobe->offset; + void *insn = uprobe->arch.insn; + int size = MAX_UINSN_BYTES; + int len, err = -EIO; - /* Instruction at end of binary; copy only available bytes */ - if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size) - bytes = uprobe->inode->i_size - uprobe->offset; - else - bytes = MAX_UINSN_BYTES; + /* Copy only available bytes, -EIO if nothing was read */ + do { + if (offs >= i_size_read(uprobe->inode)) + break; - /* Instruction at the page-boundary; copy bytes in second page */ - if (nbytes < bytes) { - int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, - bytes - nbytes, uprobe->offset + nbytes); + len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK)); + err = __copy_insn(mapping, filp, insn, len, offs); if (err) - return err; - bytes = nbytes; - } - return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); + break; + + insn += len; + offs += len; + size -= len; + } while (size); + + return err; } static int prepare_uprobe(struct uprobe *uprobe, struct file *file, -- cgit v1.2.1