diff options
-rw-r--r-- | arch/x86/mm/fault_32.c | 100 | ||||
-rw-r--r-- | arch/x86/mm/fault_64.c | 93 |
2 files changed, 177 insertions, 16 deletions
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c index 7d9ecbbba745..4da4625c6968 100644 --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c @@ -48,7 +48,11 @@ static inline int notify_page_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ +#ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { +#else + if (!user_mode(regs)) { +#endif preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -430,11 +434,15 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, #endif /* + * X86_32 * Handle a fault on the vmalloc or module mapping area * + * X86_64 + * Handle a fault on the vmalloc area + * * This assumes no large pages in there. */ -static inline int vmalloc_fault(unsigned long address) +static int vmalloc_fault(unsigned long address) { #ifdef CONFIG_X86_32 unsigned long pgd_paddr; @@ -509,6 +517,9 @@ int show_unhandled_signals = 1; * and the problem, and then passes it off to one of the appropriate * routines. */ +#ifdef CONFIG_X86_64 +asmlinkage +#endif void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk; @@ -517,6 +528,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long address; int write, si_code; int fault; +#ifdef CONFIG_X86_64 + unsigned long flags; +#endif /* * We can fault from pretty much anywhere, with unknown IRQ state. @@ -548,6 +562,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 9) == 0. */ +#ifdef CONFIG_X86_32 if (unlikely(address >= TASK_SIZE)) { if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && vmalloc_fault(address) >= 0) @@ -570,7 +585,45 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) */ if (in_atomic() || !mm) goto bad_area_nosemaphore; +#else /* CONFIG_X86_64 */ + if (unlikely(address >= TASK_SIZE64)) { + /* + * Don't check for the module range here: its PML4 + * is always initialized because it's shared with the main + * kernel text. Only vmalloc may need PML4 syncups. + */ + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && + ((address >= VMALLOC_START && address < VMALLOC_END))) { + if (vmalloc_fault(address) >= 0) + return; + } + /* + * Don't take the mm semaphore here. If we fixup a prefetch + * fault we could otherwise deadlock. + */ + goto bad_area_nosemaphore; + } + if (likely(regs->flags & X86_EFLAGS_IF)) + local_irq_enable(); + + if (unlikely(error_code & PF_RSVD)) + pgtable_bad(address, regs, error_code); + + /* + * If we're in an interrupt, have no user context or are running in an + * atomic region then we must not take the fault. + */ + if (unlikely(in_atomic() || !mm)) + goto bad_area_nosemaphore; + /* + * User-mode registers count as a user access even for any + * potential system fault or CPU buglet. + */ + if (user_mode_vm(regs)) + error_code |= PF_USER; +again: +#endif /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -596,7 +649,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) vma = find_vma(mm, address); if (!vma) goto bad_area; +#ifdef CONFIG_X86_32 if (vma->vm_start <= address) +#else + if (likely(vma->vm_start <= address)) +#endif goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; @@ -634,7 +691,9 @@ good_area: goto bad_area; } - survive: +#ifdef CONFIG_X86_32 +survive: +#endif /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -705,6 +764,7 @@ bad_area_nosemaphore: print_vma_addr(" in ", regs->ip); printk("\n"); } + tsk->thread.cr2 = address; /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); @@ -722,9 +782,13 @@ no_context: return; /* + * X86_32 * Valid to do another page fault here, because if this fault * had been triggered by is_prefetch fixup_exception would have * handled it. + * + * X86_64 + * Hall of shame of CPU/BIOS bugs. */ if (is_prefetch(regs, address, error_code)) return; @@ -736,7 +800,7 @@ no_context: * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - +#ifdef CONFIG_X86_32 bust_spinlocks(1); show_fault_oops(regs, error_code, address); @@ -747,6 +811,20 @@ no_context: die("Oops", regs, error_code); bust_spinlocks(0); do_exit(SIGKILL); +#else /* CONFIG_X86_64 */ + flags = oops_begin(); + + show_fault_oops(regs, error_code, address); + + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + if (__die("Oops", regs, error_code)) + regs = NULL; + /* Executive summary in case the body of the oops scrolled away */ + printk(KERN_EMERG "CR2: %016lx\n", address); + oops_end(flags, regs, SIGKILL); +#endif /* * We ran out of memory, or some other thing happened to us that made @@ -754,11 +832,18 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); +#ifdef CONFIG_X86_32 if (is_global_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } +#else + if (is_global_init(current)) { + yield(); + goto again; + } +#endif printk("VM: killing process %s\n", tsk->comm); if (error_code & PF_USER) do_group_exit(SIGKILL); @@ -770,17 +855,22 @@ do_sigbus: /* Kernel mode? Handle exceptions or die */ if (!(error_code & PF_USER)) goto no_context; - +#ifdef CONFIG_X86_32 /* User space => ok to do another page fault */ if (is_prefetch(regs, address, error_code)) return; - +#endif tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); } +#ifdef CONFIG_X86_64 +DEFINE_SPINLOCK(pgd_lock); +LIST_HEAD(pgd_list); +#endif + void vmalloc_sync_all(void) { #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index edca689c62d5..0902719388bc 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -51,7 +51,11 @@ static inline int notify_page_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ +#ifdef CONFIG_X86_32 + if (!user_mode_vm(regs)) { +#else if (!user_mode(regs)) { +#endif preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -433,6 +437,10 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, #endif /* + * X86_32 + * Handle a fault on the vmalloc or module mapping area + * + * X86_64 * Handle a fault on the vmalloc area * * This assumes no large pages in there. @@ -512,16 +520,20 @@ int show_unhandled_signals = 1; * and the problem, and then passes it off to one of the appropriate * routines. */ -asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, - unsigned long error_code) +#ifdef CONFIG_X86_64 +asmlinkage +#endif +void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long address; - int write, fault; + int write, si_code; + int fault; +#ifdef CONFIG_X86_64 unsigned long flags; - int si_code; +#endif /* * We can fault from pretty much anywhere, with unknown IRQ state. @@ -553,6 +565,30 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 9) == 0. */ +#ifdef CONFIG_X86_32 + if (unlikely(address >= TASK_SIZE)) { + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && + vmalloc_fault(address) >= 0) + return; + /* + * Don't take the mm semaphore here. If we fixup a prefetch + * fault we could otherwise deadlock. + */ + goto bad_area_nosemaphore; + } + + /* It's safe to allow irq's after cr2 has been saved and the vmalloc + fault has been handled. */ + if (regs->flags & (X86_EFLAGS_IF|VM_MASK)) + local_irq_enable(); + + /* + * If we're in an interrupt, have no user context or are running in an + * atomic region then we must not take the fault. + */ + if (in_atomic() || !mm) + goto bad_area_nosemaphore; +#else /* CONFIG_X86_64 */ if (unlikely(address >= TASK_SIZE64)) { /* * Don't check for the module range here: its PML4 @@ -570,7 +606,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, */ goto bad_area_nosemaphore; } - if (likely(regs->flags & X86_EFLAGS_IF)) local_irq_enable(); @@ -590,8 +625,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, */ if (user_mode_vm(regs)) error_code |= PF_USER; - - again: +again: +#endif /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -617,7 +652,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, vma = find_vma(mm, address); if (!vma) goto bad_area; +#ifdef CONFIG_X86_32 + if (vma->vm_start <= address) +#else if (likely(vma->vm_start <= address)) +#endif goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; @@ -655,6 +694,9 @@ good_area: goto bad_area; } +#ifdef CONFIG_X86_32 +survive: +#endif /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -730,7 +772,6 @@ bad_area_nosemaphore: /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); tsk->thread.trap_no = 14; - force_sig_info_fault(SIGSEGV, si_code, address, tsk); return; } @@ -744,9 +785,14 @@ no_context: return; /* + * X86_32 + * Valid to do another page fault here, because if this fault + * had been triggered by is_prefetch fixup_exception would have + * handled it. + * + * X86_64 * Hall of shame of CPU/BIOS bugs. */ - if (is_prefetch(regs, address, error_code)) return; @@ -757,7 +803,18 @@ no_context: * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ +#ifdef CONFIG_X86_32 + bust_spinlocks(1); + + show_fault_oops(regs, error_code, address); + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); +#else /* CONFIG_X86_64 */ flags = oops_begin(); show_fault_oops(regs, error_code, address); @@ -770,6 +827,7 @@ no_context: /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); oops_end(flags, regs, SIGKILL); +#endif /* * We ran out of memory, or some other thing happened to us that made @@ -777,10 +835,18 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); +#ifdef CONFIG_X86_32 + if (is_global_init(tsk)) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } +#else if (is_global_init(current)) { yield(); goto again; } +#endif printk("VM: killing process %s\n", tsk->comm); if (error_code & PF_USER) do_group_exit(SIGKILL); @@ -792,16 +858,21 @@ do_sigbus: /* Kernel mode? Handle exceptions or die */ if (!(error_code & PF_USER)) goto no_context; - +#ifdef CONFIG_X86_32 + /* User space => ok to do another page fault */ + if (is_prefetch(regs, address, error_code)) + return; +#endif tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); - return; } +#ifdef CONFIG_X86_64 DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); +#endif void vmalloc_sync_all(void) { |