From 5d7476374564645b1a2d299e242ad7b17b1104ee Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 15:04:10 -0700 Subject: mm: remove vm_insert_mixed() All callers are now converted to vmf_insert_mixed() so convert vmf_insert_mixed() from being a compatibility wrapper into the real function. Link: http://lkml.kernel.org/r/20180828145728.11873-3-willy@infradead.org Signed-off-by: Matthew Wilcox Reviewed-by: Andrew Morton Cc: Nicolas Pitre Cc: Souptick Joarder Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 21a5e6e4758b..200aaf291e98 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1693,13 +1693,19 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, return insert_pfn(vma, addr, pfn, pgprot, mkwrite); } -int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, - pfn_t pfn) +vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, + pfn_t pfn) { - return __vm_insert_mixed(vma, addr, pfn, false); + int err = __vm_insert_mixed(vma, addr, pfn, false); + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err < 0 && err != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; } -EXPORT_SYMBOL(vm_insert_mixed); +EXPORT_SYMBOL(vmf_insert_mixed); /* * If the insertion of PTE failed because someone else already added a -- cgit v1.2.3 From f5e6d1d5f8f3080aa7a51acea1f77085f45abe9c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 15:04:13 -0700 Subject: mm: introduce vmf_insert_pfn_prot() Like vm_insert_pfn_prot(), but returns a vm_fault_t instead of an errno. Also unexport vm_insert_pfn_prot as it has no modular users. Link: http://lkml.kernel.org/r/20180828145728.11873-4-willy@infradead.org Signed-off-by: Matthew Wilcox Reviewed-by: Andrew Morton Cc: Nicolas Pitre Cc: Souptick Joarder Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ mm/memory.c | 47 +++++++++++++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 16 deletions(-) (limited to 'mm/memory.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecc6f9347756..f1293bdc6de2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2506,6 +2506,8 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot); +vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot); vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn); vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, diff --git a/mm/memory.c b/mm/memory.c index 200aaf291e98..b3eecb3aa65f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1596,21 +1596,6 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(vm_insert_pfn); -/** - * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot - * @vma: user vma to map to - * @addr: target user address of this page - * @pfn: source kernel pfn - * @pgprot: pgprot flags for the inserted page - * - * This is exactly like vm_insert_pfn, except that it allows drivers to - * to override pgprot on a per-page basis. - * - * This only makes sense for IO mappings, and it makes no sense for - * cow mappings. In general, using multiple vmas is preferable; - * vm_insert_pfn_prot should only be used if using multiple VMAs is - * impractical. - */ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot) { @@ -1640,7 +1625,37 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, return ret; } -EXPORT_SYMBOL(vm_insert_pfn_prot); + +/** + * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot + * @vma: user vma to map to + * @addr: target user address of this page + * @pfn: source kernel pfn + * @pgprot: pgprot flags for the inserted page + * + * This is exactly like vmf_insert_pfn(), except that it allows drivers to + * to override pgprot on a per-page basis. + * + * This only makes sense for IO mappings, and it makes no sense for + * COW mappings. In general, using multiple vmas is preferable; + * vm_insert_pfn_prot should only be used if using multiple VMAs is + * impractical. + * + * Return: vm_fault_t value. + */ +vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) +{ + int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); + + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err < 0 && err != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL(vmf_insert_pfn_prot); static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) { -- cgit v1.2.3 From bc12e6ad9617831727e4201e7cbf5c3b868cc8fd Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 15:04:21 -0700 Subject: mm: make vm_insert_pfn_prot() static Now this is no longer used outside mm/memory.c, make it static. Link: http://lkml.kernel.org/r/20180828145728.11873-6-willy@infradead.org Signed-off-by: Matthew Wilcox Reviewed-by: Andrew Morton Cc: Nicolas Pitre Cc: Souptick Joarder Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 -- mm/memory.c | 50 +++++++++++++++++++++++++------------------------- 2 files changed, 25 insertions(+), 27 deletions(-) (limited to 'mm/memory.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index f1293bdc6de2..0f5db0455e61 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2504,8 +2504,6 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr, int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); -int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, pgprot_t pgprot); vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot); vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, diff --git a/mm/memory.c b/mm/memory.c index b3eecb3aa65f..6365144f8267 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1572,31 +1572,7 @@ out: return retval; } -/** - * vm_insert_pfn - insert single pfn into user vma - * @vma: user vma to map to - * @addr: target user address of this page - * @pfn: source kernel pfn - * - * Similar to vm_insert_page, this allows drivers to insert individual pages - * they've allocated into a user vma. Same comments apply. - * - * This function should only be called from a vm_ops->fault handler, and - * in that case the handler should return NULL. - * - * vma cannot be a COW mapping. - * - * As this is called only for pages that do not currently exist, we - * do not need to flush old virtual caches or the TLB. - */ -int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn) -{ - return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); -} -EXPORT_SYMBOL(vm_insert_pfn); - -int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, +static int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot) { int ret; @@ -1626,6 +1602,30 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, return ret; } +/** + * vm_insert_pfn - insert single pfn into user vma + * @vma: user vma to map to + * @addr: target user address of this page + * @pfn: source kernel pfn + * + * Similar to vm_insert_page, this allows drivers to insert individual pages + * they've allocated into a user vma. Same comments apply. + * + * This function should only be called from a vm_ops->fault handler, and + * in that case the handler should return NULL. + * + * vma cannot be a COW mapping. + * + * As this is called only for pages that do not currently exist, we + * do not need to flush old virtual caches or the TLB. + */ +int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn) +{ + return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); +} +EXPORT_SYMBOL(vm_insert_pfn); + /** * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot * @vma: user vma to map to -- cgit v1.2.3 From ae2b01f37044c10e975d22116755df56252b09d8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 15:04:29 -0700 Subject: mm: remove vm_insert_pfn() All callers are now converted to vmf_insert_pfn() so convert vmf_insert_pfn() from being a compatibility wrapper around vm_insert_pfn() to being a compatibility wrapper around vmf_insert_pfn_prot(). Link: http://lkml.kernel.org/r/20180828145728.11873-8-willy@infradead.org Signed-off-by: Matthew Wilcox Reviewed-by: Andrew Morton Cc: Nicolas Pitre Cc: Souptick Joarder Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 15 +-------------- mm/memory.c | 54 +++++++++++++++++++++++++++++------------------------- 2 files changed, 30 insertions(+), 39 deletions(-) (limited to 'mm/memory.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0f5db0455e61..737279bb479c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2502,7 +2502,7 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); -int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, +vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot); @@ -2525,19 +2525,6 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } -static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, - unsigned long addr, unsigned long pfn) -{ - int err = vm_insert_pfn(vma, addr, pfn); - - if (err == -ENOMEM) - return VM_FAULT_OOM; - if (err < 0 && err != -EBUSY) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; -} - static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) diff --git a/mm/memory.c b/mm/memory.c index 6365144f8267..08653d0a795a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1602,30 +1602,6 @@ static int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, return ret; } -/** - * vm_insert_pfn - insert single pfn into user vma - * @vma: user vma to map to - * @addr: target user address of this page - * @pfn: source kernel pfn - * - * Similar to vm_insert_page, this allows drivers to insert individual pages - * they've allocated into a user vma. Same comments apply. - * - * This function should only be called from a vm_ops->fault handler, and - * in that case the handler should return NULL. - * - * vma cannot be a COW mapping. - * - * As this is called only for pages that do not currently exist, we - * do not need to flush old virtual caches or the TLB. - */ -int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn) -{ - return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); -} -EXPORT_SYMBOL(vm_insert_pfn); - /** * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot * @vma: user vma to map to @@ -1638,9 +1614,10 @@ EXPORT_SYMBOL(vm_insert_pfn); * * This only makes sense for IO mappings, and it makes no sense for * COW mappings. In general, using multiple vmas is preferable; - * vm_insert_pfn_prot should only be used if using multiple VMAs is + * vmf_insert_pfn_prot should only be used if using multiple VMAs is * impractical. * + * Context: Process context. May allocate using %GFP_KERNEL. * Return: vm_fault_t value. */ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, @@ -1657,6 +1634,33 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(vmf_insert_pfn_prot); +/** + * vmf_insert_pfn - insert single pfn into user vma + * @vma: user vma to map to + * @addr: target user address of this page + * @pfn: source kernel pfn + * + * Similar to vm_insert_page, this allows drivers to insert individual pages + * they've allocated into a user vma. Same comments apply. + * + * This function should only be called from a vm_ops->fault handler, and + * in that case the handler should return the result of this function. + * + * vma cannot be a COW mapping. + * + * As this is called only for pages that do not currently exist, we + * do not need to flush old virtual caches or the TLB. + * + * Context: Process context. May allocate using %GFP_KERNEL. + * Return: vm_fault_t value. + */ +vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn) +{ + return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); +} +EXPORT_SYMBOL(vmf_insert_pfn); + static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) { /* these checks mirror the abort conditions in vm_normal_page */ -- cgit v1.2.3 From 6d958546ff611c9ae09b181e628c1c5d5da5ebda Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 15:04:33 -0700 Subject: mm: inline vm_insert_pfn_prot() into caller vm_insert_pfn_prot() is only called from vmf_insert_pfn_prot(), so inline it and convert some of the errnos into vm_fault codes earlier. Link: http://lkml.kernel.org/r/20180828145728.11873-9-willy@infradead.org Signed-off-by: Matthew Wilcox Reviewed-by: Andrew Morton Cc: Nicolas Pitre Cc: Souptick Joarder Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 55 ++++++++++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 31 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 08653d0a795a..40b692fa4b99 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1572,36 +1572,6 @@ out: return retval; } -static int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, pgprot_t pgprot) -{ - int ret; - /* - * Technically, architectures with pte_special can avoid all these - * restrictions (same for remap_pfn_range). However we would like - * consistency in testing and feature parity among all, so we should - * try to keep these invariants in place for everybody. - */ - BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); - BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == - (VM_PFNMAP|VM_MIXEDMAP)); - BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); - BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); - - if (addr < vma->vm_start || addr >= vma->vm_end) - return -EFAULT; - - if (!pfn_modify_allowed(pfn, pgprot)) - return -EACCES; - - track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)); - - ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, - false); - - return ret; -} - /** * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot * @vma: user vma to map to @@ -1623,7 +1593,30 @@ static int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot) { - int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); + int err; + + /* + * Technically, architectures with pte_special can avoid all these + * restrictions (same for remap_pfn_range). However we would like + * consistency in testing and feature parity among all, so we should + * try to keep these invariants in place for everybody. + */ + BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); + BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == + (VM_PFNMAP|VM_MIXEDMAP)); + BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); + BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); + + if (addr < vma->vm_start || addr >= vma->vm_end) + return VM_FAULT_SIGBUS; + + if (!pfn_modify_allowed(pfn, pgprot)) + return VM_FAULT_SIGBUS; + + track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)); + + err = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, + false); if (err == -ENOMEM) return VM_FAULT_OOM; -- cgit v1.2.3 From 79f3aa5ba989a1fa6e2ef189f2abdfcee25ba663 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 15:04:37 -0700 Subject: mm: convert __vm_insert_mixed() to vm_fault_t Both of its callers currently convert its errno return into a vm_fault_t, so move the conversion into __vm_insert_mixed(). Link: http://lkml.kernel.org/r/20180828145728.11873-10-willy@infradead.org Signed-off-by: Matthew Wilcox Reviewed-by: Andrew Morton Cc: Nicolas Pitre Cc: Souptick Joarder Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 40b692fa4b99..a016fd1198a9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1668,20 +1668,21 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) return false; } -static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, - pfn_t pfn, bool mkwrite) +static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, + unsigned long addr, pfn_t pfn, bool mkwrite) { pgprot_t pgprot = vma->vm_page_prot; + int err; BUG_ON(!vm_mixed_ok(vma, pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) - return -EFAULT; + return VM_FAULT_SIGBUS; track_pfn_insert(vma, &pgprot, pfn); if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot)) - return -EACCES; + return VM_FAULT_SIGBUS; /* * If we don't have pte special, then we have to use the pfn_valid() @@ -1700,15 +1701,10 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, * result in pfn_t_has_page() == false. */ page = pfn_to_page(pfn_t_to_pfn(pfn)); - return insert_page(vma, addr, page, pgprot); + err = insert_page(vma, addr, page, pgprot); + } else { + err = insert_pfn(vma, addr, pfn, pgprot, mkwrite); } - return insert_pfn(vma, addr, pfn, pgprot, mkwrite); -} - -vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, - pfn_t pfn) -{ - int err = __vm_insert_mixed(vma, addr, pfn, false); if (err == -ENOMEM) return VM_FAULT_OOM; @@ -1717,6 +1713,12 @@ vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, return VM_FAULT_NOPAGE; } + +vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, + pfn_t pfn) +{ + return __vm_insert_mixed(vma, addr, pfn, false); +} EXPORT_SYMBOL(vmf_insert_mixed); /* @@ -1724,18 +1726,10 @@ EXPORT_SYMBOL(vmf_insert_mixed); * different entry in the mean time, we treat that as success as we assume * the same entry was actually inserted. */ - vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) { - int err; - - err = __vm_insert_mixed(vma, addr, pfn, true); - if (err == -ENOMEM) - return VM_FAULT_OOM; - if (err < 0 && err != -EBUSY) - return VM_FAULT_SIGBUS; - return VM_FAULT_NOPAGE; + return __vm_insert_mixed(vma, addr, pfn, true); } EXPORT_SYMBOL(vmf_insert_mixed_mkwrite); -- cgit v1.2.3 From 9b5a8e00d479bb5e55f6902bf50877c080d9506d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 26 Oct 2018 15:04:40 -0700 Subject: mm: convert insert_pfn() to vm_fault_t All callers convert its errno into a vm_fault_t, so convert it to return a vm_fault_t directly. Link: http://lkml.kernel.org/r/20180828145728.11873-11-willy@infradead.org Signed-off-by: Matthew Wilcox Reviewed-by: Andrew Morton Cc: Nicolas Pitre Cc: Souptick Joarder Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index a016fd1198a9..6abc74f41bc0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1520,19 +1520,16 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(vm_insert_page); -static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, +static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, pgprot_t prot, bool mkwrite) { struct mm_struct *mm = vma->vm_mm; - int retval; pte_t *pte, entry; spinlock_t *ptl; - retval = -ENOMEM; pte = get_locked_pte(mm, addr, &ptl); if (!pte) - goto out; - retval = -EBUSY; + return VM_FAULT_OOM; if (!pte_none(*pte)) { if (mkwrite) { /* @@ -1565,11 +1562,9 @@ out_mkwrite: set_pte_at(mm, addr, pte, entry); update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ - retval = 0; out_unlock: pte_unmap_unlock(pte, ptl); -out: - return retval; + return VM_FAULT_NOPAGE; } /** @@ -1593,8 +1588,6 @@ out: vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot) { - int err; - /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like @@ -1615,15 +1608,8 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)); - err = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, + return insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, false); - - if (err == -ENOMEM) - return VM_FAULT_OOM; - if (err < 0 && err != -EBUSY) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; } EXPORT_SYMBOL(vmf_insert_pfn_prot); @@ -1703,7 +1689,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, page = pfn_to_page(pfn_t_to_pfn(pfn)); err = insert_page(vma, addr, page, pgprot); } else { - err = insert_pfn(vma, addr, pfn, pgprot, mkwrite); + return insert_pfn(vma, addr, pfn, pgprot, mkwrite); } if (err == -ENOMEM) -- cgit v1.2.3 From ff09d7ec9786be4ad7589aa987d7dc66e2dd9160 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 26 Oct 2018 15:09:01 -0700 Subject: mm/memory.c: recheck page table entry with page table lock held We clear the pte temporarily during read/modify/write update of the pte. If we take a page fault while the pte is cleared, the application can get SIGBUS. One such case is with remap_pfn_range without a backing vm_ops->fault callback. do_fault will return SIGBUS in that case. cpu 0 cpu1 mprotect() ptep_modify_prot_start()/pte cleared. . . page fault. . . prep_modify_prot_commit() Fix this by taking page table lock and rechecking for pte_none. [aneesh.kumar@linux.ibm.com: fix crash observed with syzkaller run] Link: http://lkml.kernel.org/r/87va6bwlfg.fsf@linux.ibm.com Link: http://lkml.kernel.org/r/20180926031858.9692-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Cc: Willem de Bruijn Cc: Eric Dumazet Cc: Ido Schimmel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 6abc74f41bc0..072139579d89 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3496,10 +3496,36 @@ static vm_fault_t do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; - /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ - if (!vma->vm_ops->fault) - ret = VM_FAULT_SIGBUS; - else if (!(vmf->flags & FAULT_FLAG_WRITE)) + /* + * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND + */ + if (!vma->vm_ops->fault) { + /* + * If we find a migration pmd entry or a none pmd entry, which + * should never happen, return SIGBUS + */ + if (unlikely(!pmd_present(*vmf->pmd))) + ret = VM_FAULT_SIGBUS; + else { + vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, + vmf->pmd, + vmf->address, + &vmf->ptl); + /* + * Make sure this is not a temporary clearing of pte + * by holding ptl and checking again. A R/M/W update + * of pte involves: take ptl, clearing the pte so that + * we don't have concurrent modification by hardware + * followed by an update. + */ + if (unlikely(pte_none(*vmf->pte))) + ret = VM_FAULT_SIGBUS; + else + ret = VM_FAULT_NOPAGE; + + pte_unmap_unlock(vmf->pte, vmf->ptl); + } + } else if (!(vmf->flags & FAULT_FLAG_WRITE)) ret = do_read_fault(vmf); else if (!(vma->vm_flags & VM_SHARED)) ret = do_cow_fault(vmf); -- cgit v1.2.3 From f2c57d91b0d96aa13ccff4e3b178038f17b00658 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 30 Oct 2018 15:10:47 -0700 Subject: mm: Fix warning in insert_pfn() In DAX mode a write pagefault can race with write(2) in the following way: CPU0 CPU1 write fault for mapped zero page (hole) dax_iomap_rw() iomap_apply() xfs_file_iomap_begin() - allocates blocks dax_iomap_actor() invalidate_inode_pages2_range() - invalidates radix tree entries in given range dax_iomap_pte_fault() grab_mapping_entry() - no entry found, creates empty ... xfs_file_iomap_begin() - finds already allocated block ... vmf_insert_mixed_mkwrite() - WARNs and does nothing because there is still zero page mapped in PTE unmap_mapping_pages() This race results in WARN_ON from insert_pfn() and is occasionally triggered by fstest generic/344. Note that the race is otherwise harmless as before write(2) on CPU0 is finished, we will invalidate page tables properly and thus user of mmap will see modified data from write(2) from that point on. So just restrict the warning only to the case when the PFN in PTE is not zero page. Link: http://lkml.kernel.org/r/20180824154542.26872-1-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Andrew Morton Cc: Ross Zwisler Cc: Dan Williams Cc: Dave Jiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 072139579d89..4ad2d293ddc2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1537,10 +1537,15 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, * in may not match the PFN we have mapped if the * mapped PFN is a writeable COW page. In the mkwrite * case we are creating a writable PTE for a shared - * mapping and we expect the PFNs to match. + * mapping and we expect the PFNs to match. If they + * don't match, we are likely racing with block + * allocation and mapping invalidation so just skip the + * update. */ - if (WARN_ON_ONCE(pte_pfn(*pte) != pfn_t_to_pfn(pfn))) + if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) { + WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte))); goto out_unlock; + } entry = *pte; goto out_mkwrite; } else -- cgit v1.2.3