From 1b2ee1266ea647713dbaf44825967c180dfc8d76 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 12 Feb 2016 13:02:21 -0800 Subject: mm/core: Do not enforce PKEY permissions on remote mm access We try to enforce protection keys in software the same way that we do in hardware. (See long example below). But, we only want to do this when accessing our *own* process's memory. If GDB set PKRU[6].AD=1 (disable access to PKEY 6), then tried to PTRACE_POKE a target process which just happened to have some mprotect_pkey(pkey=6) memory, we do *not* want to deny the debugger access to that memory. PKRU is fundamentally a thread-local structure and we do not want to enforce it on access to _another_ thread's data. This gets especially tricky when we have workqueues or other delayed-work mechanisms that might run in a random process's context. We can check that we only enforce pkeys when operating on our *own* mm, but delayed work gets performed when a random user context is active. We might end up with a situation where a delayed-work gup fails when running randomly under its "own" task but succeeds when running under another process. We want to avoid that. To avoid that, we use the new GUP flag: FOLL_REMOTE and add a fault flag: FAULT_FLAG_REMOTE. They indicate that we are walking an mm which is not guranteed to be the same as current->mm and should not be subject to protection key enforcement. Thanks to Jerome Glisse for pointing out this scenario. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Alexey Kardashevskiy Cc: Andrea Arcangeli Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Boaz Harrosh Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Dave Chinner Cc: Dave Hansen Cc: David Gibson Cc: Denys Vlasenko Cc: Dominik Dingel Cc: Dominik Vogt Cc: Eric B Munson Cc: Geliang Tang Cc: Guan Xuetao Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Hugh Dickins Cc: Jan Kara Cc: Jason Low Cc: Jerome Marchand Cc: Joerg Roedel Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Laurent Dufour Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michael Ellerman Cc: Michal Hocko Cc: Mikulas Patocka Cc: Minchan Kim Cc: Oleg Nesterov Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Sasha Levin Cc: Shachar Raindel Cc: Vlastimil Babka Cc: Xie XiuQi Cc: iommu@lists.linux-foundation.org Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-s390@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Ingo Molnar --- mm/gup.c | 15 ++++++++++----- mm/ksm.c | 10 ++++++++-- mm/memory.c | 3 ++- 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'mm') diff --git a/mm/gup.c b/mm/gup.c index e0f5f3574d16..d276760163b3 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -365,6 +365,8 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, return -ENOENT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; + if (*flags & FOLL_REMOTE) + fault_flags |= FAULT_FLAG_REMOTE; if (nonblocking) fault_flags |= FAULT_FLAG_ALLOW_RETRY; if (*flags & FOLL_NOWAIT) @@ -415,11 +417,13 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) { vm_flags_t vm_flags = vma->vm_flags; + int write = (gup_flags & FOLL_WRITE); + int foreign = (gup_flags & FOLL_REMOTE); if (vm_flags & (VM_IO | VM_PFNMAP)) return -EFAULT; - if (gup_flags & FOLL_WRITE) { + if (write) { if (!(vm_flags & VM_WRITE)) { if (!(gup_flags & FOLL_FORCE)) return -EFAULT; @@ -445,7 +449,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (!(vm_flags & VM_MAYREAD)) return -EFAULT; } - if (!arch_vma_access_permitted(vma, (gup_flags & FOLL_WRITE))) + if (!arch_vma_access_permitted(vma, write, foreign)) return -EFAULT; return 0; } @@ -615,7 +619,8 @@ EXPORT_SYMBOL(__get_user_pages); bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) { - bool write = !!(fault_flags & FAULT_FLAG_WRITE); + bool write = !!(fault_flags & FAULT_FLAG_WRITE); + bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE); vm_flags_t vm_flags = write ? VM_WRITE : VM_READ; if (!(vm_flags & vma->vm_flags)) @@ -623,9 +628,9 @@ bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) /* * The architecture might have a hardware protection - * mechanism other than read/write that can deny access + * mechanism other than read/write that can deny access. */ - if (!arch_vma_access_permitted(vma, write)) + if (!arch_vma_access_permitted(vma, write, foreign)) return false; return true; diff --git a/mm/ksm.c b/mm/ksm.c index c2013f638d11..b99e828172f6 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -359,6 +359,10 @@ static inline bool ksm_test_exit(struct mm_struct *mm) * in case the application has unmapped and remapped mm,addr meanwhile. * Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP * mmap of /dev/mem or /dev/kmem, where we would not want to touch it. + * + * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context + * of the process that owns 'vma'. We also do not want to enforce + * protection keys here anyway. */ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) { @@ -367,12 +371,14 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) do { cond_resched(); - page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION); + page = follow_page(vma, addr, + FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); if (IS_ERR_OR_NULL(page)) break; if (PageKsm(page)) ret = handle_mm_fault(vma->vm_mm, vma, addr, - FAULT_FLAG_WRITE); + FAULT_FLAG_WRITE | + FAULT_FLAG_REMOTE); else ret = VM_FAULT_WRITE; put_page(page); diff --git a/mm/memory.c b/mm/memory.c index d7e84fe6504d..76c44e5dffa2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3379,7 +3379,8 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd; pte_t *pte; - if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE)) + if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + flags & FAULT_FLAG_REMOTE)) return VM_FAULT_SIGSEGV; if (unlikely(is_vm_hugetlb_page(vma))) -- cgit v1.2.1