diff options
Diffstat (limited to 'arch/x86_64/mm/fault.c')
-rw-r--r-- | arch/x86_64/mm/fault.c | 67 |
1 files changed, 48 insertions, 19 deletions
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 55250593d8c9..3751b4788e28 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -5,7 +5,6 @@ * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. */ -#include <linux/config.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -41,6 +40,35 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); + +/* Hook to register for page fault notifications */ +int register_page_fault_notifier(struct notifier_block *nb) +{ + vmalloc_sync_all(); + return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); +} +EXPORT_SYMBOL_GPL(register_page_fault_notifier); + +int unregister_page_fault_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + +static inline int notify_page_fault(enum die_val val, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .str = str, + .err = err, + .trapnr = trap, + .signr = sig + }; + return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); +} + void bust_spinlocks(int yes) { int loglevel_save = console_loglevel; @@ -68,7 +96,7 @@ void bust_spinlocks(int yes) static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned long error_code) { - unsigned char *instr; + unsigned char __user *instr; int scan_more = 1; int prefetch = 0; unsigned char *max_instr; @@ -77,7 +105,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, if (error_code & PF_INSTR) return 0; - instr = (unsigned char *)convert_rip_to_linear(current, regs); + instr = (unsigned char __user *)convert_rip_to_linear(current, regs); max_instr = instr + 15; if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) @@ -88,7 +116,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned char instr_hi; unsigned char instr_lo; - if (__get_user(opcode, instr)) + if (__get_user(opcode, (char __user *)instr)) break; instr_hi = opcode & 0xf0; @@ -126,7 +154,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; - if (__get_user(opcode, instr)) + if (__get_user(opcode, (char __user *)instr)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); @@ -142,7 +170,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, static int bad_address(void *p) { unsigned long dummy; - return __get_user(dummy, (unsigned long *)p); + return __get_user(dummy, (unsigned long __user *)p); } void dump_pagetable(unsigned long address) @@ -160,7 +188,7 @@ void dump_pagetable(unsigned long address) printk("PGD %lx ", pgd_val(*pgd)); if (!pgd_present(*pgd)) goto ret; - pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address); + pud = pud_offset(pgd, address); if (bad_address(pud)) goto bad; printk("PUD %lx ", pud_val(*pud)); if (!pud_present(*pud)) goto ret; @@ -216,7 +244,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) int unhandled_signal(struct task_struct *tsk, int sig) { - if (tsk->pid == 1) + if (is_init(tsk)) return 1; if (tsk->ptrace & PT_PTRACED) return 0; @@ -265,7 +293,7 @@ static int vmalloc_fault(unsigned long address) if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); else - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); /* Below here mismatches are bugs because these lower tables are shared */ @@ -274,7 +302,7 @@ static int vmalloc_fault(unsigned long address) pud_ref = pud_offset(pgd_ref, address); if (pud_none(*pud_ref)) return -1; - if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref)) + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) BUG(); pmd = pmd_offset(pud, address); pmd_ref = pmd_offset(pud_ref, address); @@ -348,7 +376,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (vmalloc_fault(address) >= 0) return; } - if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, SIGSEGV) == NOTIFY_STOP) return; /* @@ -358,7 +386,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } - if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, SIGSEGV) == NOTIFY_STOP) return; @@ -383,7 +411,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunatly, in the case of an - * erroneous fault occuring in a code path which already holds mmap_sem + * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user * space from well defined areas of code, which are listed in the @@ -410,8 +438,10 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (error_code & 4) { - // XXX: align red zone size with ABI - if (address + 128 < regs->rsp) + /* Allow userspace just enough access below the stack pointer + * to let the 'enter' instruction work. + */ + if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp) goto bad_area; } if (expand_stack(vma, address)) @@ -434,7 +464,7 @@ good_area: case PF_PROT: /* read, present */ goto bad_area; case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; } @@ -534,7 +564,6 @@ no_context: printk(KERN_ALERT "Unable to handle kernel paging request"); printk(" at %016lx RIP: \n" KERN_ALERT,address); printk_address(regs->rip); - printk("\n"); dump_pagetable(address); tsk->thread.cr2 = address; tsk->thread.trap_no = 14; @@ -551,7 +580,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); goto again; } @@ -606,7 +635,7 @@ void vmalloc_sync_all(void) if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); else - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); } spin_unlock(&pgd_lock); set_bit(pgd_index(address), insync); |