diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 1 | ||||
-rw-r--r-- | mm/backing-dev.c | 186 | ||||
-rw-r--r-- | mm/filemap.c | 33 | ||||
-rw-r--r-- | mm/gup.c | 148 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 3 | ||||
-rw-r--r-- | mm/percpu.c | 40 | ||||
-rw-r--r-- | mm/swap.c | 10 | ||||
-rw-r--r-- | mm/usercopy.c | 19 | ||||
-rw-r--r-- | mm/vmalloc.c | 2 |
10 files changed, 281 insertions, 163 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 9b8fccb969dc..beb7a455915d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -312,7 +312,6 @@ config NEED_BOUNCE_POOL config NR_QUICK int depends on QUICKLIST - default "2" if AVR32 default "1" config VIRT_TO_BUS diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c6f2a37028c2..f028a9a472fd 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -12,8 +12,6 @@ #include <linux/device.h> #include <trace/events/writeback.h> -static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); - struct backing_dev_info noop_backing_dev_info = { .name = "noop", .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, @@ -242,6 +240,8 @@ static __init int bdi_class_init(void) } postcore_initcall(bdi_class_init); +static int bdi_init(struct backing_dev_info *bdi); + static int __init default_bdi_init(void) { int err; @@ -294,6 +294,8 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, memset(wb, 0, sizeof(*wb)); + if (wb != &bdi->wb) + bdi_get(bdi); wb->bdi = bdi; wb->last_old_flush = jiffies; INIT_LIST_HEAD(&wb->b_dirty); @@ -314,8 +316,10 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, wb->dirty_sleep = jiffies; wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp); - if (!wb->congested) - return -ENOMEM; + if (!wb->congested) { + err = -ENOMEM; + goto out_put_bdi; + } err = fprop_local_init_percpu(&wb->completions, gfp); if (err) @@ -335,9 +339,14 @@ out_destroy_stat: fprop_local_destroy_percpu(&wb->completions); out_put_cong: wb_congested_put(wb->congested); +out_put_bdi: + if (wb != &bdi->wb) + bdi_put(bdi); return err; } +static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb); + /* * Remove bdi from the global list and shutdown any threads we have running */ @@ -347,10 +356,18 @@ static void wb_shutdown(struct bdi_writeback *wb) spin_lock_bh(&wb->work_lock); if (!test_and_clear_bit(WB_registered, &wb->state)) { spin_unlock_bh(&wb->work_lock); + /* + * Wait for wb shutdown to finish if someone else is just + * running wb_shutdown(). Otherwise we could proceed to wb / + * bdi destruction before wb_shutdown() is finished. + */ + wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE); return; } + set_bit(WB_shutting_down, &wb->state); spin_unlock_bh(&wb->work_lock); + cgwb_remove_from_bdi_list(wb); /* * Drain work list and shutdown the delayed_work. !WB_registered * tells wb_workfn() that @wb is dying and its work_list needs to @@ -359,6 +376,12 @@ static void wb_shutdown(struct bdi_writeback *wb) mod_delayed_work(bdi_wq, &wb->dwork, 0); flush_delayed_work(&wb->dwork); WARN_ON(!list_empty(&wb->work_list)); + /* + * Make sure bit gets cleared after shutdown is finished. Matches with + * the barrier provided by test_and_clear_bit() above. + */ + smp_wmb(); + clear_bit(WB_shutting_down, &wb->state); } static void wb_exit(struct bdi_writeback *wb) @@ -372,6 +395,8 @@ static void wb_exit(struct bdi_writeback *wb) fprop_local_destroy_percpu(&wb->completions); wb_congested_put(wb->congested); + if (wb != &wb->bdi->wb) + bdi_put(wb->bdi); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -381,11 +406,9 @@ static void wb_exit(struct bdi_writeback *wb) /* * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree, * blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU - * protected. cgwb_release_wait is used to wait for the completion of cgwb - * releases from bdi destruction path. + * protected. */ static DEFINE_SPINLOCK(cgwb_lock); -static DECLARE_WAIT_QUEUE_HEAD(cgwb_release_wait); /** * wb_congested_get_create - get or create a wb_congested @@ -438,7 +461,7 @@ retry: return NULL; atomic_set(&new_congested->refcnt, 0); - new_congested->bdi = bdi; + new_congested->__bdi = bdi; new_congested->blkcg_id = blkcg_id; goto retry; @@ -466,10 +489,10 @@ void wb_congested_put(struct bdi_writeback_congested *congested) } /* bdi might already have been destroyed leaving @congested unlinked */ - if (congested->bdi) { + if (congested->__bdi) { rb_erase(&congested->rb_node, - &congested->bdi->cgwb_congested_tree); - congested->bdi = NULL; + &congested->__bdi->cgwb_congested_tree); + congested->__bdi = NULL; } spin_unlock_irqrestore(&cgwb_lock, flags); @@ -480,11 +503,6 @@ static void cgwb_release_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); - struct backing_dev_info *bdi = wb->bdi; - - spin_lock_irq(&cgwb_lock); - list_del_rcu(&wb->bdi_node); - spin_unlock_irq(&cgwb_lock); wb_shutdown(wb); @@ -495,9 +513,6 @@ static void cgwb_release_workfn(struct work_struct *work) percpu_ref_exit(&wb->refcnt); wb_exit(wb); kfree_rcu(wb, rcu); - - if (atomic_dec_and_test(&bdi->usage_cnt)) - wake_up_all(&cgwb_release_wait); } static void cgwb_release(struct percpu_ref *refcnt) @@ -517,6 +532,13 @@ static void cgwb_kill(struct bdi_writeback *wb) percpu_ref_kill(&wb->refcnt); } +static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb) +{ + spin_lock_irq(&cgwb_lock); + list_del_rcu(&wb->bdi_node); + spin_unlock_irq(&cgwb_lock); +} + static int cgwb_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp) { @@ -580,7 +602,6 @@ static int cgwb_create(struct backing_dev_info *bdi, /* we might have raced another instance of this function */ ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb); if (!ret) { - atomic_inc(&bdi->usage_cnt); list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); @@ -670,7 +691,6 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; - atomic_set(&bdi->usage_cnt, 1); ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (!ret) { @@ -680,29 +700,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) return ret; } -static void cgwb_bdi_destroy(struct backing_dev_info *bdi) +static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { struct radix_tree_iter iter; void **slot; + struct bdi_writeback *wb; WARN_ON(test_bit(WB_registered, &bdi->wb.state)); spin_lock_irq(&cgwb_lock); radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); - spin_unlock_irq(&cgwb_lock); - /* - * All cgwb's must be shutdown and released before returning. Drain - * the usage counter to wait for all cgwb's ever created on @bdi. - */ - atomic_dec(&bdi->usage_cnt); - wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt)); - /* - * Grab back our reference so that we hold it when @bdi gets - * re-registered. - */ - atomic_inc(&bdi->usage_cnt); + while (!list_empty(&bdi->wb_list)) { + wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, + bdi_node); + spin_unlock_irq(&cgwb_lock); + wb_shutdown(wb); + spin_lock_irq(&cgwb_lock); + } + spin_unlock_irq(&cgwb_lock); } /** @@ -752,11 +769,18 @@ static void cgwb_bdi_exit(struct backing_dev_info *bdi) rb_entry(rbn, struct bdi_writeback_congested, rb_node); rb_erase(rbn, &bdi->cgwb_congested_tree); - congested->bdi = NULL; /* mark @congested unlinked */ + congested->__bdi = NULL; /* mark @congested unlinked */ } spin_unlock_irq(&cgwb_lock); } +static void cgwb_bdi_register(struct backing_dev_info *bdi) +{ + spin_lock_irq(&cgwb_lock); + list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list); + spin_unlock_irq(&cgwb_lock); +} + #else /* CONFIG_CGROUP_WRITEBACK */ static int cgwb_bdi_init(struct backing_dev_info *bdi) @@ -777,16 +801,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) return 0; } -static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } +static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { } static void cgwb_bdi_exit(struct backing_dev_info *bdi) { wb_congested_put(bdi->wb_congested); } +static void cgwb_bdi_register(struct backing_dev_info *bdi) +{ + list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list); +} + +static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb) +{ + list_del_rcu(&wb->bdi_node); +} + #endif /* CONFIG_CGROUP_WRITEBACK */ -int bdi_init(struct backing_dev_info *bdi) +static int bdi_init(struct backing_dev_info *bdi) { int ret; @@ -802,11 +836,8 @@ int bdi_init(struct backing_dev_info *bdi) ret = cgwb_bdi_init(bdi); - list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list); - return ret; } -EXPORT_SYMBOL(bdi_init); struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id) { @@ -823,22 +854,20 @@ struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id) } return bdi; } +EXPORT_SYMBOL(bdi_alloc_node); -int bdi_register(struct backing_dev_info *bdi, struct device *parent, - const char *fmt, ...) +int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) { - va_list args; struct device *dev; if (bdi->dev) /* The driver needs to use separate queues per device */ return 0; - va_start(args, fmt); - dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); - va_end(args); + dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args); if (IS_ERR(dev)) return PTR_ERR(dev); + cgwb_bdi_register(bdi); bdi->dev = dev; bdi_debug_register(bdi, dev_name(dev)); @@ -851,20 +880,25 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, trace_writeback_bdi_register(bdi); return 0; } -EXPORT_SYMBOL(bdi_register); +EXPORT_SYMBOL(bdi_register_va); -int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) +int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...) { - return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); + va_list args; + int ret; + + va_start(args, fmt); + ret = bdi_register_va(bdi, fmt, args); + va_end(args); + return ret; } -EXPORT_SYMBOL(bdi_register_dev); +EXPORT_SYMBOL(bdi_register); int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner) { int rc; - rc = bdi_register(bdi, NULL, "%u:%u", MAJOR(owner->devt), - MINOR(owner->devt)); + rc = bdi_register(bdi, "%u:%u", MAJOR(owner->devt), MINOR(owner->devt)); if (rc) return rc; /* Leaking owner reference... */ @@ -892,7 +926,7 @@ void bdi_unregister(struct backing_dev_info *bdi) /* make sure nobody finds us on the bdi_list anymore */ bdi_remove_from_list(bdi); wb_shutdown(&bdi->wb); - cgwb_bdi_destroy(bdi); + cgwb_bdi_unregister(bdi); if (bdi->dev) { bdi_debug_unregister(bdi); @@ -906,19 +940,16 @@ void bdi_unregister(struct backing_dev_info *bdi) } } -static void bdi_exit(struct backing_dev_info *bdi) -{ - WARN_ON_ONCE(bdi->dev); - wb_exit(&bdi->wb); - cgwb_bdi_exit(bdi); -} - static void release_bdi(struct kref *ref) { struct backing_dev_info *bdi = container_of(ref, struct backing_dev_info, refcnt); - bdi_exit(bdi); + if (test_bit(WB_registered, &bdi->wb.state)) + bdi_unregister(bdi); + WARN_ON_ONCE(bdi->dev); + wb_exit(&bdi->wb); + cgwb_bdi_exit(bdi); kfree(bdi); } @@ -926,38 +957,7 @@ void bdi_put(struct backing_dev_info *bdi) { kref_put(&bdi->refcnt, release_bdi); } - -void bdi_destroy(struct backing_dev_info *bdi) -{ - bdi_unregister(bdi); - bdi_exit(bdi); -} -EXPORT_SYMBOL(bdi_destroy); - -/* - * For use from filesystems to quickly init and register a bdi associated - * with dirty writeback - */ -int bdi_setup_and_register(struct backing_dev_info *bdi, char *name) -{ - int err; - - bdi->name = name; - bdi->capabilities = 0; - err = bdi_init(bdi); - if (err) - return err; - - err = bdi_register(bdi, NULL, "%.28s-%ld", name, - atomic_long_inc_return(&bdi_seq)); - if (err) { - bdi_destroy(bdi); - return err; - } - - return 0; -} -EXPORT_SYMBOL(bdi_setup_and_register); +EXPORT_SYMBOL(bdi_put); static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), diff --git a/mm/filemap.c b/mm/filemap.c index 1694623a6289..dc59c5f35b37 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -519,7 +519,7 @@ EXPORT_SYMBOL(filemap_write_and_wait); * * Write out and wait upon file offsets lstart->lend, inclusive. * - * Note that `lend' is inclusive (describes the last byte to be written) so + * Note that @lend is inclusive (describes the last byte to be written) so * that this function can be used to write to the very end-of-file (end = -1). */ int filemap_write_and_wait_range(struct address_space *mapping, @@ -1277,12 +1277,14 @@ EXPORT_SYMBOL(find_lock_entry); * * PCG flags modify how the page is returned. * - * FGP_ACCESSED: the page will be marked accessed - * FGP_LOCK: Page is return locked - * FGP_CREAT: If page is not present then a new page is allocated using - * @gfp_mask and added to the page cache and the VM's LRU - * list. The page is returned locked and with an increased - * refcount. Otherwise, %NULL is returned. + * @fgp_flags can be: + * + * - FGP_ACCESSED: the page will be marked accessed + * - FGP_LOCK: Page is return locked + * - FGP_CREAT: If page is not present then a new page is allocated using + * @gfp_mask and added to the page cache and the VM's LRU + * list. The page is returned locked and with an increased + * refcount. Otherwise, NULL is returned. * * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even * if the GFP flags specified for FGP_CREAT are atomic. @@ -2033,7 +2035,6 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - struct iov_iter data = *iter; loff_t size; size = i_size_read(inode); @@ -2044,11 +2045,12 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) file_accessed(file); - retval = mapping->a_ops->direct_IO(iocb, &data); + retval = mapping->a_ops->direct_IO(iocb, iter); if (retval >= 0) { iocb->ki_pos += retval; - iov_iter_advance(iter, retval); + count -= retval; } + iov_iter_revert(iter, iov_iter_count(iter) - count); /* * Btrfs can have a short DIO read if we encounter @@ -2059,7 +2061,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) * the rest of the read. Buffered reads will not work for * DAX files, so don't bother trying. */ - if (retval < 0 || !iov_iter_count(iter) || iocb->ki_pos >= size || + if (retval < 0 || !count || iocb->ki_pos >= size || IS_DAX(inode)) goto out; } @@ -2704,7 +2706,6 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) ssize_t written; size_t write_len; pgoff_t end; - struct iov_iter data; write_len = iov_iter_count(from); end = (pos + write_len - 1) >> PAGE_SHIFT; @@ -2733,8 +2734,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) } } - data = *from; - written = mapping->a_ops->direct_IO(iocb, &data); + written = mapping->a_ops->direct_IO(iocb, from); /* * Finally, try again to invalidate clean pages which might have been @@ -2751,13 +2751,14 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) if (written > 0) { pos += written; - iov_iter_advance(from, written); + write_len -= written; if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) { i_size_write(inode, pos); mark_inode_dirty(inode); } iocb->ki_pos = pos; } + iov_iter_revert(from, write_len - iov_iter_count(from)); out: return written; } @@ -3001,7 +3002,7 @@ EXPORT_SYMBOL(generic_file_write_iter); * @gfp_mask: memory allocation flags (and I/O mode) * * The address_space is to try to release any data against the page - * (presumably at page->private). If the release was successful, return `1'. + * (presumably at page->private). If the release was successful, return '1'. * Otherwise return zero. * * This may also be called if PG_fscache is set on a page, indicating that the @@ -1189,34 +1189,57 @@ struct page *get_dump_page(unsigned long addr) */ #ifdef CONFIG_HAVE_GENERIC_RCU_GUP +#ifndef gup_get_pte +/* + * We assume that the PTE can be read atomically. If this is not the case for + * your architecture, please provide the helper. + */ +static inline pte_t gup_get_pte(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} +#endif + +static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) +{ + while ((*nr) - nr_start) { + struct page *page = pages[--(*nr)]; + + ClearPageReferenced(page); + put_page(page); + } +} + #ifdef __HAVE_ARCH_PTE_SPECIAL static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { + struct dev_pagemap *pgmap = NULL; + int nr_start = *nr, ret = 0; pte_t *ptep, *ptem; - int ret = 0; ptem = ptep = pte_offset_map(&pmd, addr); do { - /* - * In the line below we are assuming that the pte can be read - * atomically. If this is not the case for your architecture, - * please wrap this in a helper function! - * - * for an example see gup_get_pte in arch/x86/mm/gup.c - */ - pte_t pte = READ_ONCE(*ptep); + pte_t pte = gup_get_pte(ptep); struct page *head, *page; /* * Similar to the PMD case below, NUMA hinting must take slow * path using the pte_protnone check. */ - if (!pte_present(pte) || pte_special(pte) || - pte_protnone(pte) || (write && !pte_write(pte))) + if (pte_protnone(pte)) goto pte_unmap; - if (!arch_pte_access_permitted(pte, write)) + if (!pte_access_permitted(pte, write)) + goto pte_unmap; + + if (pte_devmap(pte)) { + pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); + if (unlikely(!pgmap)) { + undo_dev_pagemap(nr, nr_start, pages); + goto pte_unmap; + } + } else if (pte_special(pte)) goto pte_unmap; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); @@ -1232,6 +1255,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, } VM_BUG_ON_PAGE(compound_head(page) != head, page); + + put_dev_pagemap(pgmap); + SetPageReferenced(page); pages[*nr] = page; (*nr)++; @@ -1261,15 +1287,76 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, } #endif /* __HAVE_ARCH_PTE_SPECIAL */ +#ifdef __HAVE_ARCH_PTE_DEVMAP +static int __gup_device_huge(unsigned long pfn, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + int nr_start = *nr; + struct dev_pagemap *pgmap = NULL; + + do { + struct page *page = pfn_to_page(pfn); + + pgmap = get_dev_pagemap(pfn, pgmap); + if (unlikely(!pgmap)) { + undo_dev_pagemap(nr, nr_start, pages); + return 0; + } + SetPageReferenced(page); + pages[*nr] = page; + get_page(page); + put_dev_pagemap(pgmap); + (*nr)++; + pfn++; + } while (addr += PAGE_SIZE, addr != end); + return 1; +} + +static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} + +static int __gup_device_huge_pud(pud_t pud, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} +#else +static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + BUILD_BUG(); + return 0; +} + +static int __gup_device_huge_pud(pud_t pud, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + BUILD_BUG(); + return 0; +} +#endif + static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { struct page *head, *page; int refs; - if (write && !pmd_write(orig)) + if (!pmd_access_permitted(orig, write)) return 0; + if (pmd_devmap(orig)) + return __gup_device_huge_pmd(orig, addr, end, pages, nr); + refs = 0; head = pmd_page(orig); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); @@ -1293,6 +1380,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, return 0; } + SetPageReferenced(head); return 1; } @@ -1302,9 +1390,12 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, struct page *head, *page; int refs; - if (write && !pud_write(orig)) + if (!pud_access_permitted(orig, write)) return 0; + if (pud_devmap(orig)) + return __gup_device_huge_pud(orig, addr, end, pages, nr); + refs = 0; head = pud_page(orig); page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); @@ -1328,6 +1419,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 0; } + SetPageReferenced(head); return 1; } @@ -1338,9 +1430,10 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, int refs; struct page *head, *page; - if (write && !pgd_write(orig)) + if (!pgd_access_permitted(orig, write)) return 0; + BUILD_BUG_ON(pgd_devmap(orig)); refs = 0; head = pgd_page(orig); page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); @@ -1364,6 +1457,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, return 0; } + SetPageReferenced(head); return 1; } @@ -1520,6 +1614,21 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, return nr; } +#ifndef gup_fast_permitted +/* + * Check if it's allowed to use __get_user_pages_fast() for the range, or + * we need to fall back to the slow version: + */ +bool gup_fast_permitted(unsigned long start, int nr_pages, int write) +{ + unsigned long len, end; + + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + return end >= start; +} +#endif + /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address @@ -1539,11 +1648,14 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { - int nr, ret; + int nr = 0, ret = 0; start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - ret = nr; + + if (gup_fast_permitted(start, nr_pages, write)) { + nr = __get_user_pages_fast(start, nr_pages, write, pages); + ret = nr; + } if (nr < nr_pages) { /* Try to get the remaining pages with get_user_pages */ diff --git a/mm/memory.c b/mm/memory.c index 235ba51b2fbf..6ff5d729ded0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4298,7 +4298,7 @@ void __might_fault(const char *file, int line) * get paged out, therefore we'll never actually fault, and the * below annotations will generate false positives. */ - if (segment_eq(get_fs(), KERNEL_DS)) + if (uaccess_kernel()) return; if (pagefault_disabled()) return; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07efbc3a8656..bd01501efab9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4247,7 +4247,8 @@ EXPORT_SYMBOL(free_pages_exact); * nr_free_zone_pages() counts the number of counts pages which are beyond the * high watermark within all zones at or below a given zone index. For each * zone, the number of pages is calculated as: - * managed_pages - high_pages + * + * nr_free_zone_pages = managed_pages - high_pages */ static unsigned long nr_free_zone_pages(int offset) { diff --git a/mm/percpu.c b/mm/percpu.c index 60a6488e9e6d..e0aa8ae7bde7 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1284,18 +1284,7 @@ void free_percpu(void __percpu *ptr) } EXPORT_SYMBOL_GPL(free_percpu); -/** - * is_kernel_percpu_address - test whether address is from static percpu area - * @addr: address to test - * - * Test whether @addr belongs to in-kernel static percpu area. Module - * static percpu areas are not considered. For those, use - * is_module_percpu_address(). - * - * RETURNS: - * %true if @addr is from in-kernel static percpu area, %false otherwise. - */ -bool is_kernel_percpu_address(unsigned long addr) +bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr) { #ifdef CONFIG_SMP const size_t static_size = __per_cpu_end - __per_cpu_start; @@ -1304,16 +1293,39 @@ bool is_kernel_percpu_address(unsigned long addr) for_each_possible_cpu(cpu) { void *start = per_cpu_ptr(base, cpu); + void *va = (void *)addr; - if ((void *)addr >= start && (void *)addr < start + static_size) + if (va >= start && va < start + static_size) { + if (can_addr) { + *can_addr = (unsigned long) (va - start); + *can_addr += (unsigned long) + per_cpu_ptr(base, get_boot_cpu_id()); + } return true; - } + } + } #endif /* on UP, can't distinguish from other static vars, always false */ return false; } /** + * is_kernel_percpu_address - test whether address is from static percpu area + * @addr: address to test + * + * Test whether @addr belongs to in-kernel static percpu area. Module + * static percpu areas are not considered. For those, use + * is_module_percpu_address(). + * + * RETURNS: + * %true if @addr is from in-kernel static percpu area, %false otherwise. + */ +bool is_kernel_percpu_address(unsigned long addr) +{ + return __is_kernel_percpu_address(addr, NULL); +} + +/** * per_cpu_ptr_to_phys - convert translated percpu address to physical address * @addr: the address to be converted to physical address * diff --git a/mm/swap.c b/mm/swap.c index 5dabf444d724..d8d9ee9e311a 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -97,6 +97,16 @@ static void __put_compound_page(struct page *page) void __put_page(struct page *page) { + if (is_zone_device_page(page)) { + put_dev_pagemap(page->pgmap); + + /* + * The page belongs to the device that created pgmap. Do + * not return it to page allocator. + */ + return; + } + if (unlikely(PageCompound(page))) __put_compound_page(page); else diff --git a/mm/usercopy.c b/mm/usercopy.c index d155e12563b1..a9852b24715d 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -19,15 +19,9 @@ #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/sched/task_stack.h> +#include <linux/thread_info.h> #include <asm/sections.h> -enum { - BAD_STACK = -1, - NOT_STACK = 0, - GOOD_FRAME, - GOOD_STACK, -}; - /* * Checks if a given pointer and length is contained by the current * stack frame (if possible). @@ -206,17 +200,6 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, { struct page *page; - /* - * Some architectures (arm64) return true for virt_addr_valid() on - * vmalloced addresses. Work around this by checking for vmalloc - * first. - * - * We also need to check for module addresses explicitly since we - * may copy static data from modules to userspace - */ - if (is_vmalloc_or_module_addr(ptr)) - return NULL; - if (!virt_addr_valid(ptr)) return NULL; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0b057628a7ba..b52aeed3f58e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1579,7 +1579,7 @@ void vfree_atomic(const void *addr) * have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling * conventions for vfree() arch-depenedent would be a really bad idea) * - * NOTE: assumes that the object at *addr has a size >= sizeof(llist_node) + * NOTE: assumes that the object at @addr has a size >= sizeof(llist_node) */ void vfree(const void *addr) { |