From e9d54cae8f03e7f963a12f44bd50d68f49b9ea36 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Fri, 30 Jan 2009 17:38:59 +0000 Subject: x86 mmiotrace: WARN_ONCE if dis/arming a page fails Print a full warning once, if arming or disarming a page fails. Also, if initial arming fails, do not handle the page further. This avoids the possibility of a page failing to arm and then later claiming to have handled any fault on that page. WARN_ONCE added by Pekka Paalanen. Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm/kmmio.c') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93d82038af4b..fb1f11546fcd 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -105,7 +105,7 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void set_page_present(unsigned long addr, bool present, +static int set_page_present(unsigned long addr, bool present, unsigned int *pglevel) { pteval_t pteval; @@ -116,7 +116,7 @@ static void set_page_present(unsigned long addr, bool present, if (!pte) { pr_err("kmmio: no pte for page 0x%08lx\n", addr); - return; + return -1; } if (pglevel) @@ -140,22 +140,27 @@ static void set_page_present(unsigned long addr, bool present, default: pr_err("kmmio: unexpected page level 0x%x.\n", level); - return; + return -1; } __flush_tlb_one(addr); + + return 0; } /** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +static int arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, false, pglevel); + int ret = set_page_present(page & PAGE_MASK, false, pglevel); + WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", page); + return ret; } /** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, true, pglevel); + int ret = set_page_present(page & PAGE_MASK, true, pglevel); + WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", page); } /* @@ -326,9 +331,13 @@ static int add_kmmio_fault_page(unsigned long page) f->count = 1; f->page = page; - list_add_rcu(&f->list, kmmio_page_list(f->page)); - arm_kmmio_fault_page(f->page, NULL); + if (arm_kmmio_fault_page(f->page, NULL)) { + kfree(f); + return -1; + } + + list_add_rcu(&f->list, kmmio_page_list(f->page)); return 0; } -- cgit v1.2.1 From 5359b585fb5edb3db34d6cd491e1475b098c61d3 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:11:58 +0200 Subject: x86 mmiotrace: fix save/restore page table state From baa99e2b32449ec7bf147c234adfa444caecac8a Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 22 Feb 2009 20:02:43 +0200 Blindly setting _PAGE_PRESENT in disarm_kmmio_fault_page() overlooks the possibility, that the page was not present when it was armed. Make arm_kmmio_fault_page() store the previous page presence in struct kmmio_fault_page and use it on disarm. This patch was originally written by Stuart Bennett, but Pekka Paalanen rewrote it a little different. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 66 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 22 deletions(-) (limited to 'arch/x86/mm/kmmio.c') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index fb1f11546fcd..be361eb828c8 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -32,6 +32,8 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; unsigned long page; /* location of the fault page */ + bool old_presence; /* page presence prior to arming */ + bool armed; /* * Number of times this page has been registered as a part @@ -105,8 +107,7 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static int set_page_present(unsigned long addr, bool present, - unsigned int *pglevel) +static int set_page_presence(unsigned long addr, bool present, bool *old) { pteval_t pteval; pmdval_t pmdval; @@ -119,20 +120,21 @@ static int set_page_present(unsigned long addr, bool present, return -1; } - if (pglevel) - *pglevel = level; - switch (level) { case PG_LEVEL_2M: pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + pmdval = pmd_val(*pmd); + *old = !!(pmdval & _PAGE_PRESENT); + pmdval &= ~_PAGE_PRESENT; if (present) pmdval |= _PAGE_PRESENT; set_pmd(pmd, __pmd(pmdval)); break; case PG_LEVEL_4K: - pteval = pte_val(*pte) & ~_PAGE_PRESENT; + pteval = pte_val(*pte); + *old = !!(pteval & _PAGE_PRESENT); + pteval &= ~_PAGE_PRESENT; if (present) pteval |= _PAGE_PRESENT; set_pte_atomic(pte, __pte(pteval)); @@ -148,19 +150,39 @@ static int set_page_present(unsigned long addr, bool present, return 0; } -/** Mark the given page as not present. Access to it will trigger a fault. */ -static int arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +/* + * Mark the given page as not present. Access to it will trigger a fault. + * + * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the + * protection is ignored here. RCU read lock is assumed held, so the struct + * will not disappear unexpectedly. Furthermore, the caller must guarantee, + * that double arming the same virtual address (page) cannot occur. + * + * Double disarming on the other hand is allowed, and may occur when a fault + * and mmiotrace shutdown happen simultaneously. + */ +static int arm_kmmio_fault_page(struct kmmio_fault_page *f) { - int ret = set_page_present(page & PAGE_MASK, false, pglevel); - WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", page); + int ret; + WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); + if (f->armed) { + pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", + f->page, f->count, f->old_presence); + } + ret = set_page_presence(f->page, false, &f->old_presence); + WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); + f->armed = true; return ret; } -/** Mark the given page as present. */ -static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +/** Restore the given page to saved presence state. */ +static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { - int ret = set_page_present(page & PAGE_MASK, true, pglevel); - WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", page); + bool tmp; + int ret = set_page_presence(f->page, f->old_presence, &tmp); + WARN_ONCE(ret < 0, + KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); + f->armed = false; } /* @@ -207,7 +229,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - disarm_kmmio_fault_page(faultpage->page, NULL); + disarm_kmmio_fault_page(faultpage); if (addr == ctx->addr) { /* * On SMP we sometimes get recursive probe hits on the @@ -249,7 +271,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) regs->flags &= ~X86_EFLAGS_IF; /* Now we set present bit in PTE and single step. */ - disarm_kmmio_fault_page(ctx->fpage->page, NULL); + disarm_kmmio_fault_page(ctx->fpage); /* * If another cpu accesses the same page while we are stepping, @@ -288,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage->page, NULL); + arm_kmmio_fault_page(ctx->fpage); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; @@ -320,19 +342,19 @@ static int add_kmmio_fault_page(unsigned long page) f = get_kmmio_fault_page(page); if (f) { if (!f->count) - arm_kmmio_fault_page(f->page, NULL); + arm_kmmio_fault_page(f); f->count++; return 0; } - f = kmalloc(sizeof(*f), GFP_ATOMIC); + f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) return -1; f->count = 1; f->page = page; - if (arm_kmmio_fault_page(f->page, NULL)) { + if (arm_kmmio_fault_page(f)) { kfree(f); return -1; } @@ -356,7 +378,7 @@ static void release_kmmio_fault_page(unsigned long page, f->count--; BUG_ON(f->count < 0); if (!f->count) { - disarm_kmmio_fault_page(f->page, NULL); + disarm_kmmio_fault_page(f); f->release_next = *release_list; *release_list = f; } -- cgit v1.2.1 From 0b700a6a253b6a3b3059bb9a9247a73490ee33fb Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:12:48 +0200 Subject: x86 mmiotrace: split set_page_presence() From 36772dcb6ffbbb68254cbfc379a103acd2fbfefc Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sat, 28 Feb 2009 21:34:59 +0200 Split set_page_presence() in kmmio.c into two more functions set_pmd_presence() and set_pte_presence(). Purely code reorganization, no functional changes. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'arch/x86/mm/kmmio.c') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index be361eb828c8..d29777520af3 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -107,12 +107,29 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } +static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) +{ + pmdval_t v = pmd_val(*pmd); + *old = !!(v & _PAGE_PRESENT); + v &= ~_PAGE_PRESENT; + if (present) + v |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(v)); +} + +static void set_pte_presence(pte_t *pte, bool present, bool *old) +{ + pteval_t v = pte_val(*pte); + *old = !!(v & _PAGE_PRESENT); + v &= ~_PAGE_PRESENT; + if (present) + v |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(v)); +} + static int set_page_presence(unsigned long addr, bool present, bool *old) { - pteval_t pteval; - pmdval_t pmdval; unsigned int level; - pmd_t *pmd; pte_t *pte = lookup_address(addr, &level); if (!pte) { @@ -122,31 +139,17 @@ static int set_page_presence(unsigned long addr, bool present, bool *old) switch (level) { case PG_LEVEL_2M: - pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd); - *old = !!(pmdval & _PAGE_PRESENT); - pmdval &= ~_PAGE_PRESENT; - if (present) - pmdval |= _PAGE_PRESENT; - set_pmd(pmd, __pmd(pmdval)); + set_pmd_presence((pmd_t *)pte, present, old); break; - case PG_LEVEL_4K: - pteval = pte_val(*pte); - *old = !!(pteval & _PAGE_PRESENT); - pteval &= ~_PAGE_PRESENT; - if (present) - pteval |= _PAGE_PRESENT; - set_pte_atomic(pte, __pte(pteval)); + set_pte_presence(pte, present, old); break; - default: pr_err("kmmio: unexpected page level 0x%x.\n", level); return -1; } __flush_tlb_one(addr); - return 0; } -- cgit v1.2.1 From 3e39aa156a24ce386da378784edd0f748c770087 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Thu, 5 Feb 2009 11:02:02 +0000 Subject: x86 mmiotrace: improve handling of secondary faults Upgrade some kmmio.c debug messages to warnings. Allow secondary faults on probed pages to fall through, and only log secondary faults that are not due to non-present pages. Patch edited by Pekka Paalanen. Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'arch/x86/mm/kmmio.c') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index d29777520af3..4c66bd3a240d 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -232,28 +232,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - disarm_kmmio_fault_page(faultpage); if (addr == ctx->addr) { /* - * On SMP we sometimes get recursive probe hits on the - * same address. Context is already saved, fall out. + * A second fault on the same page means some other + * condition needs handling by do_page_fault(), the + * page really not being present is the most common. */ - pr_debug("kmmio: duplicate probe hit on CPU %d, for " - "address 0x%08lx.\n", - smp_processor_id(), addr); - ret = 1; - goto no_kmmio_ctx; - } - /* - * Prevent overwriting already in-flight context. - * This should not happen, let's hope disarming at least - * prevents a panic. - */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " + pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", + addr, smp_processor_id()); + + if (!faultpage->old_presence) + pr_info("kmmio: unexpected secondary hit for " + "address 0x%08lx on CPU %d.\n", addr, + smp_processor_id()); + } else { + /* + * Prevent overwriting already in-flight context. + * This should not happen, let's hope disarming at + * least prevents a panic. + */ + pr_emerg("kmmio: recursive probe hit on CPU %d, " "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); + disarm_kmmio_fault_page(faultpage); + } goto no_kmmio_ctx; } ctx->active++; @@ -305,7 +309,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", + pr_warning("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } -- cgit v1.2.1 From 340430c572f7b2b275d39965e88bafa71693cb23 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 24 Feb 2009 21:44:15 +0200 Subject: x86 mmiotrace: fix race with release_kmmio_fault_page() There was a theoretical possibility to a race between arming a page in post_kmmio_handler() and disarming the page in release_kmmio_fault_page(): cpu0 cpu1 ------------------------------------------------------------------ mmiotrace shutdown enter release_kmmio_fault_page fault on the page disarm the page disarm the page handle the MMIO access re-arm the page put the page on release list remove_kmmio_fault_pages() fault on the page page not known to mmiotrace fall back to do_page_fault() *KABOOM* (This scenario also shows the double disarm case which is allowed.) Fixed by acquiring kmmio_lock in post_kmmio_handler() and checking if the page is being released from mmiotrace. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/kmmio.c') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 4c66bd3a240d..9f205030d9aa 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -38,7 +38,8 @@ struct kmmio_fault_page { /* * Number of times this page has been registered as a part * of a probe. If zero, page is disarmed and this may be freed. - * Used only by writers (RCU). + * Used only by writers (RCU) and post_kmmio_handler(). + * Protected by kmmio_lock, when linked into kmmio_page_table. */ int count; }; @@ -317,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage); + /* Prevent racing against release_kmmio_fault_page(). */ + spin_lock(&kmmio_lock); + if (ctx->fpage->count) + arm_kmmio_fault_page(ctx->fpage); + spin_unlock(&kmmio_lock); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; -- cgit v1.2.1 From d0fc63f7bd07cb779a06dc1cdd0c5a14e7f5d562 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Sun, 8 Mar 2009 20:21:35 +0200 Subject: x86 mmiotrace: fix remove_kmmio_fault_pages() Impact: fix race+crash in mmiotrace The list manipulation in remove_kmmio_fault_pages() was broken. If more than one consecutive kmmio_fault_page was re-added during the grace period between unregister_kmmio_probe() and remove_kmmio_fault_pages(), the list manipulation failed to remove pages from the release list. After a second grace period the pages get into rcu_free_kmmio_fault_pages() and raise a BUG_ON() kernel crash. The list manipulation is fixed to properly remove pages from the release list. This bug has been present from the very beginning of mmiotrace in the mainline kernel. It was introduced in 0fd0e3da ("x86: mmiotrace full patch, preview 1"); An urgent fix for Linus. Tested by Stuart (on 32-bit) and Pekka (on amd and intel 64-bit systems, nouveau and nvidia proprietary). Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen LKML-Reference: <20090308202135.34933feb@daedalus.pq.iki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm/kmmio.c') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 9f205030d9aa..6a518dd08a36 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -451,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) static void remove_kmmio_fault_pages(struct rcu_head *head) { - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); + struct kmmio_delayed_release *dr = + container_of(head, struct kmmio_delayed_release, rcu); struct kmmio_fault_page *p = dr->release_list; struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); while (p) { - if (!p->count) + if (!p->count) { list_del_rcu(&p->list); - else + prevp = &p->release_next; + } else { *prevp = p->release_next; - prevp = &p->release_next; + } p = p->release_next; } spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); } -- cgit v1.2.1