diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 12 | ||||
-rw-r--r-- | mm/backing-dev.c | 2 | ||||
-rw-r--r-- | mm/debug.c | 5 | ||||
-rw-r--r-- | mm/dmapool.c | 2 | ||||
-rw-r--r-- | mm/failslab.c | 8 | ||||
-rw-r--r-- | mm/filemap.c | 6 | ||||
-rw-r--r-- | mm/huge_memory.c | 21 | ||||
-rw-r--r-- | mm/hugetlb.c | 39 | ||||
-rw-r--r-- | mm/hugetlb_cgroup.c | 2 | ||||
-rw-r--r-- | mm/internal.h | 28 | ||||
-rw-r--r-- | mm/kasan/kasan.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 10 | ||||
-rw-r--r-- | mm/memory-failure.c | 7 | ||||
-rw-r--r-- | mm/memory.c | 8 | ||||
-rw-r--r-- | mm/mempool.c | 10 | ||||
-rw-r--r-- | mm/migrate.c | 4 | ||||
-rw-r--r-- | mm/oom_kill.c | 17 | ||||
-rw-r--r-- | mm/page-writeback.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 710 | ||||
-rw-r--r-- | mm/readahead.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 2 | ||||
-rw-r--r-- | mm/slab.c | 37 | ||||
-rw-r--r-- | mm/slab.h | 2 | ||||
-rw-r--r-- | mm/slab_common.c | 6 | ||||
-rw-r--r-- | mm/slob.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 319 | ||||
-rw-r--r-- | mm/swap.c | 4 | ||||
-rw-r--r-- | mm/vmalloc.c | 9 | ||||
-rw-r--r-- | mm/vmscan.c | 8 | ||||
-rw-r--r-- | mm/vmstat.c | 2 | ||||
-rw-r--r-- | mm/zbud.c | 2 | ||||
-rw-r--r-- | mm/zpool.c | 18 | ||||
-rw-r--r-- | mm/zsmalloc.c | 49 | ||||
-rw-r--r-- | mm/zswap.c | 87 |
34 files changed, 705 insertions, 743 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 0d9fdcd01e47..97a4e06b15c0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -200,18 +200,6 @@ config MEMORY_HOTREMOVE depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE depends on MIGRATION -# -# If we have space for more page flags then we can enable additional -# optimizations and functionality. -# -# Regular Sparsemem takes page flag bits for the sectionid if it does not -# use a virtual memmap. Disable extended page flags for 32 bit platforms -# that require the use of a sectionid in the page flags. -# -config PAGEFLAGS_EXTENDED - def_bool y - depends on 64BIT || SPARSEMEM_VMEMMAP || !SPARSEMEM - # Heavily threaded applications may benefit from splitting the mm-wide # page_table_lock, so that faults on different parts of the user address # space can be handled with less contention: split it at this NR_CPUS. diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 619984fc07ec..8ed2ffd963c5 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -637,7 +637,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, { struct bdi_writeback *wb; - might_sleep_if(gfp & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(gfp)); if (!memcg_css->parent) return &bdi->wb; diff --git a/mm/debug.c b/mm/debug.c index e784110fb51d..668aa35191ca 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -25,12 +25,7 @@ static const struct trace_print_flags pageflag_names[] = { {1UL << PG_private, "private" }, {1UL << PG_private_2, "private_2" }, {1UL << PG_writeback, "writeback" }, -#ifdef CONFIG_PAGEFLAGS_EXTENDED {1UL << PG_head, "head" }, - {1UL << PG_tail, "tail" }, -#else - {1UL << PG_compound, "compound" }, -#endif {1UL << PG_swapcache, "swapcache" }, {1UL << PG_mappedtodisk, "mappedtodisk" }, {1UL << PG_reclaim, "reclaim" }, diff --git a/mm/dmapool.c b/mm/dmapool.c index 312a716fa14c..57312b5d6e12 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -326,7 +326,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, size_t offset; void *retval; - might_sleep_if(mem_flags & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(mem_flags)); spin_lock_irqsave(&pool->lock, flags); list_for_each_entry(page, &pool->page_list, page_list) { diff --git a/mm/failslab.c b/mm/failslab.c index 98fb490311eb..79171b4a5826 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -3,11 +3,11 @@ static struct { struct fault_attr attr; - bool ignore_gfp_wait; + bool ignore_gfp_reclaim; bool cache_filter; } failslab = { .attr = FAULT_ATTR_INITIALIZER, - .ignore_gfp_wait = true, + .ignore_gfp_reclaim = true, .cache_filter = false, }; @@ -16,7 +16,7 @@ bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags) if (gfpflags & __GFP_NOFAIL) return false; - if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT)) + if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM)) return false; if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB)) @@ -42,7 +42,7 @@ static int __init failslab_debugfs_init(void) return PTR_ERR(dir); if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, - &failslab.ignore_gfp_wait)) + &failslab.ignore_gfp_reclaim)) goto fail; if (!debugfs_create_bool("cache-filter", mode, dir, &failslab.cache_filter)) diff --git a/mm/filemap.c b/mm/filemap.c index 58e04e26f996..1bb007624b53 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1722,7 +1722,7 @@ no_cached_page: goto out; } error = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL & mapping_gfp_mask(mapping)); + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (error) { page_cache_release(page); if (error == -EEXIST) { @@ -1824,7 +1824,7 @@ static int page_cache_read(struct file *file, pgoff_t offset) return -ENOMEM; ret = add_to_page_cache_lru(page, mapping, offset, - GFP_KERNEL & mapping_gfp_mask(mapping)); + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (ret == 0) ret = mapping->a_ops->readpage(file, page); else if (ret == -EEXIST) @@ -2713,7 +2713,7 @@ EXPORT_SYMBOL(generic_file_write_iter); * page is known to the local caching routines. * * The @gfp_mask argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). + * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS). * */ int try_to_release_page(struct page *page, gfp_t gfp_mask) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f5c08b46fef8..62fe06bb7d04 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -116,7 +116,7 @@ static void set_recommended_min_free_kbytes(void) for_each_populated_zone(zone) nr_zones++; - /* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */ + /* Ensure 2 pageblocks are free to assist fragmentation avoidance */ recommended_min = pageblock_nr_pages * nr_zones * 2; /* @@ -151,7 +151,7 @@ static int start_stop_khugepaged(void) if (!khugepaged_thread) khugepaged_thread = kthread_run(khugepaged, NULL, "khugepaged"); - if (unlikely(IS_ERR(khugepaged_thread))) { + if (IS_ERR(khugepaged_thread)) { pr_err("khugepaged: kthread_run(khugepaged) failed\n"); err = PTR_ERR(khugepaged_thread); khugepaged_thread = NULL; @@ -786,7 +786,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) { - return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp; + return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp; } /* Caller must hold page table lock. */ @@ -1755,8 +1755,7 @@ static void __split_huge_page_refcount(struct page *page, (1L << PG_unevictable))); page_tail->flags |= (1L << PG_dirty); - /* clear PageTail before overwriting first_page */ - smp_wmb(); + clear_compound_head(page_tail); if (page_is_young(page)) set_page_young(page_tail); @@ -2010,7 +2009,7 @@ int hugepage_madvise(struct vm_area_struct *vma, /* * Be somewhat over-protective like KSM for now! */ - if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) + if (*vm_flags & VM_NO_THP) return -EINVAL; *vm_flags &= ~VM_NOHUGEPAGE; *vm_flags |= VM_HUGEPAGE; @@ -2026,7 +2025,7 @@ int hugepage_madvise(struct vm_area_struct *vma, /* * Be somewhat over-protective like KSM for now! */ - if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP)) + if (*vm_flags & VM_NO_THP) return -EINVAL; *vm_flags &= ~VM_HUGEPAGE; *vm_flags |= VM_NOHUGEPAGE; @@ -2413,8 +2412,7 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) static struct page * khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - int node) + unsigned long address, int node) { VM_BUG_ON_PAGE(*hpage, *hpage); @@ -2481,8 +2479,7 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) static struct page * khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - int node) + unsigned long address, int node) { up_read(&mm->mmap_sem); VM_BUG_ON(!*hpage); @@ -2530,7 +2527,7 @@ static void collapse_huge_page(struct mm_struct *mm, __GFP_THISNODE; /* release the mmap_sem read lock. */ - new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node); + new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node); if (!new_page) return; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 74ef0c6a25dd..827bb02a43a4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -994,23 +994,22 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) #if defined(CONFIG_CMA) && defined(CONFIG_X86_64) static void destroy_compound_gigantic_page(struct page *page, - unsigned long order) + unsigned int order) { int i; int nr_pages = 1 << order; struct page *p = page + 1; for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { - __ClearPageTail(p); + clear_compound_head(p); set_page_refcounted(p); - p->first_page = NULL; } set_compound_order(page, 0); __ClearPageHead(page); } -static void free_gigantic_page(struct page *page, unsigned order) +static void free_gigantic_page(struct page *page, unsigned int order) { free_contig_range(page_to_pfn(page), 1 << order); } @@ -1054,7 +1053,7 @@ static bool zone_spans_last_pfn(const struct zone *zone, return zone_spans_pfn(zone, last_pfn); } -static struct page *alloc_gigantic_page(int nid, unsigned order) +static struct page *alloc_gigantic_page(int nid, unsigned int order) { unsigned long nr_pages = 1 << order; unsigned long ret, pfn, flags; @@ -1090,7 +1089,7 @@ static struct page *alloc_gigantic_page(int nid, unsigned order) } static void prep_new_huge_page(struct hstate *h, struct page *page, int nid); -static void prep_compound_gigantic_page(struct page *page, unsigned long order); +static void prep_compound_gigantic_page(struct page *page, unsigned int order); static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid) { @@ -1123,9 +1122,9 @@ static int alloc_fresh_gigantic_page(struct hstate *h, static inline bool gigantic_page_supported(void) { return true; } #else static inline bool gigantic_page_supported(void) { return false; } -static inline void free_gigantic_page(struct page *page, unsigned order) { } +static inline void free_gigantic_page(struct page *page, unsigned int order) { } static inline void destroy_compound_gigantic_page(struct page *page, - unsigned long order) { } + unsigned int order) { } static inline int alloc_fresh_gigantic_page(struct hstate *h, nodemask_t *nodes_allowed) { return 0; } #endif @@ -1146,7 +1145,7 @@ static void update_and_free_page(struct hstate *h, struct page *page) 1 << PG_writeback); } VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); - set_compound_page_dtor(page, NULL); + set_compound_page_dtor(page, NULL_COMPOUND_DTOR); set_page_refcounted(page); if (hstate_is_gigantic(h)) { destroy_compound_gigantic_page(page, huge_page_order(h)); @@ -1242,7 +1241,7 @@ void free_huge_page(struct page *page) static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) { INIT_LIST_HEAD(&page->lru); - set_compound_page_dtor(page, free_huge_page); + set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); spin_lock(&hugetlb_lock); set_hugetlb_cgroup(page, NULL); h->nr_huge_pages++; @@ -1251,7 +1250,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) put_page(page); /* free it into the hugepage allocator */ } -static void prep_compound_gigantic_page(struct page *page, unsigned long order) +static void prep_compound_gigantic_page(struct page *page, unsigned int order) { int i; int nr_pages = 1 << order; @@ -1276,10 +1275,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order) */ __ClearPageReserved(p); set_page_count(p, 0); - p->first_page = page; - /* Make sure p->first_page is always valid for PageTail() */ - smp_wmb(); - __SetPageTail(p); + set_compound_head(p, page); } } @@ -1294,7 +1290,7 @@ int PageHuge(struct page *page) return 0; page = compound_head(page); - return get_compound_page_dtor(page) == free_huge_page; + return page[1].compound_dtor == HUGETLB_PAGE_DTOR; } EXPORT_SYMBOL_GPL(PageHuge); @@ -1568,7 +1564,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h, if (page) { INIT_LIST_HEAD(&page->lru); r_nid = page_to_nid(page); - set_compound_page_dtor(page, free_huge_page); + set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); set_hugetlb_cgroup(page, NULL); /* * We incremented the global counters already @@ -1972,7 +1968,8 @@ found: return 1; } -static void __init prep_compound_huge_page(struct page *page, int order) +static void __init prep_compound_huge_page(struct page *page, + unsigned int order) { if (unlikely(order > (MAX_ORDER - 1))) prep_compound_gigantic_page(page, order); @@ -2141,7 +2138,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, * First take pages out of surplus state. Then make up the * remaining difference by allocating fresh huge pages. * - * We might race with alloc_buddy_huge_page() here and be unable + * We might race with __alloc_buddy_huge_page() here and be unable * to convert a surplus huge page to a normal huge page. That is * not critical, though, it just means the overall size of the * pool might be one hugepage larger than it needs to be, but @@ -2183,7 +2180,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, * By placing pages into the surplus state independent of the * overcommit value, we are allowing the surplus pool size to * exceed overcommit. There are few sane options here. Since - * alloc_buddy_huge_page() is checking the global counter, + * __alloc_buddy_huge_page() is checking the global counter, * though, we'll note that we're not allowed to exceed surplus * and won't grow the pool anywhere else. Not until one of the * sysctls are changed, or the surplus pages go out of use. @@ -2683,7 +2680,7 @@ static int __init hugetlb_init(void) module_init(hugetlb_init); /* Should be called on processing a hugepagesz=... option */ -void __init hugetlb_add_hstate(unsigned order) +void __init hugetlb_add_hstate(unsigned int order) { struct hstate *h; unsigned long i; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 33d59abe91f1..d8fb10de0f14 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -385,7 +385,7 @@ void __init hugetlb_cgroup_file_init(void) /* * Add cgroup control files only if the huge page consists * of more than two normal pages. This is because we use - * page[2].lru.next for storing cgroup details. + * page[2].private for storing cgroup details. */ if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) __hugetlb_cgroup_file_init(hstate_index(h)); diff --git a/mm/internal.h b/mm/internal.h index d4b807d6c963..38e24b89e4c4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -14,6 +14,25 @@ #include <linux/fs.h> #include <linux/mm.h> +/* + * The set of flags that only affect watermark checking and reclaim + * behaviour. This is used by the MM to obey the caller constraints + * about IO, FS and watermark checking while ignoring placement + * hints such as HIGHMEM usage. + */ +#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ + __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ + __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC) + +/* The GFP flags allowed during early boot */ +#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) + +/* Control allocation cpuset and node placement constraints */ +#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) + +/* Do not use these with a slab allocator */ +#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) + void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); @@ -61,9 +80,9 @@ static inline void __get_page_tail_foll(struct page *page, * speculative page access (like in * page_cache_get_speculative()) on tail pages. */ - VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page); + VM_BUG_ON_PAGE(atomic_read(&compound_head(page)->_count) <= 0, page); if (get_page_head) - atomic_inc(&page->first_page->_count); + atomic_inc(&compound_head(page)->_count); get_huge_page_tail(page); } @@ -129,6 +148,7 @@ struct alloc_context { int classzone_idx; int migratetype; enum zone_type high_zoneidx; + bool spread_dirty_pages; }; /* @@ -157,7 +177,7 @@ __find_buddy_index(unsigned long page_idx, unsigned int order) extern int __isolate_free_page(struct page *page, unsigned int order); extern void __free_pages_bootmem(struct page *page, unsigned long pfn, unsigned int order); -extern void prep_compound_page(struct page *page, unsigned long order); +extern void prep_compound_page(struct page *page, unsigned int order); #ifdef CONFIG_MEMORY_FAILURE extern bool is_free_buddy_page(struct page *page); #endif @@ -215,7 +235,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, * page cannot be allocated or merged in parallel. Alternatively, it must * handle invalid values gracefully, and use page_order_unsafe() below. */ -static inline unsigned long page_order(struct page *page) +static inline unsigned int page_order(struct page *page) { /* PageBuddy() must be checked by the caller */ return page_private(page); diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index d41b21bce6a0..bc0a8d8b8f42 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -19,6 +19,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/kmemleak.h> #include <linux/memblock.h> #include <linux/memory.h> #include <linux/mm.h> @@ -444,6 +445,7 @@ int kasan_module_alloc(void *addr, size_t size) if (ret) { find_vm_area(addr)->flags |= VM_KASAN; + kmemleak_ignore(ret); return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bc502e590366..9acfb165eb52 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2046,7 +2046,7 @@ retry: if (unlikely(task_in_memcg_oom(current))) goto nomem; - if (!(gfp_mask & __GFP_WAIT)) + if (!gfpflags_allow_blocking(gfp_mask)) goto nomem; mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1); @@ -2120,7 +2120,7 @@ done_restock: /* * If the hierarchy is above the normal consumption range, schedule * reclaim on returning to userland. We can perform reclaim here - * if __GFP_WAIT but let's always punt for simplicity and so that + * if __GFP_RECLAIM but let's always punt for simplicity and so that * GFP_KERNEL can consistently be used during reclaim. @memcg is * not recorded as it most likely matches current's and won't * change in the meantime. As high limit is checked again before @@ -2801,7 +2801,7 @@ static unsigned long tree_stat(struct mem_cgroup *memcg, return val; } -static inline unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) +static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) { unsigned long val; @@ -4364,8 +4364,8 @@ static int mem_cgroup_do_precharge(unsigned long count) { int ret; - /* Try a single bulk charge without reclaim first */ - ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); + /* Try a single bulk charge without reclaim first, kswapd may wake */ + ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count); if (!ret) { mc.precharge += count; return ret; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 16a0ec385320..8424b64711ac 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -776,8 +776,6 @@ static int me_huge_page(struct page *p, unsigned long pfn) #define lru (1UL << PG_lru) #define swapbacked (1UL << PG_swapbacked) #define head (1UL << PG_head) -#define tail (1UL << PG_tail) -#define compound (1UL << PG_compound) #define slab (1UL << PG_slab) #define reserved (1UL << PG_reserved) @@ -800,12 +798,7 @@ static struct page_state { */ { slab, slab, MF_MSG_SLAB, me_kernel }, -#ifdef CONFIG_PAGEFLAGS_EXTENDED { head, head, MF_MSG_HUGE, me_huge_page }, - { tail, tail, MF_MSG_HUGE, me_huge_page }, -#else - { compound, compound, MF_MSG_HUGE, me_huge_page }, -#endif { sc|dirty, sc|dirty, MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty }, { sc|dirty, sc, MF_MSG_CLEAN_SWAPCACHE, me_swapcache_clean }, diff --git a/mm/memory.c b/mm/memory.c index deb679c31f2a..c387430f06c3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3015,9 +3015,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else { /* * The fault handler has no page to lock, so it holds - * i_mmap_lock for write to protect against truncate. + * i_mmap_lock for read to protect against truncate. */ - i_mmap_unlock_write(vma->vm_file->f_mapping); + i_mmap_unlock_read(vma->vm_file->f_mapping); } goto uncharge_out; } @@ -3031,9 +3031,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else { /* * The fault handler has no page to lock, so it holds - * i_mmap_lock for write to protect against truncate. + * i_mmap_lock for read to protect against truncate. */ - i_mmap_unlock_write(vma->vm_file->f_mapping); + i_mmap_unlock_read(vma->vm_file->f_mapping); } return ret; uncharge_out: diff --git a/mm/mempool.c b/mm/mempool.c index 4c533bc51d73..004d42b1dfaf 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -320,13 +320,13 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) gfp_t gfp_temp; VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); - might_sleep_if(gfp_mask & __GFP_WAIT); + might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ gfp_mask |= __GFP_NOWARN; /* failures are OK */ - gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); + gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO); repeat_alloc: @@ -349,7 +349,7 @@ repeat_alloc: } /* - * We use gfp mask w/o __GFP_WAIT or IO for the first round. If + * We use gfp mask w/o direct reclaim or IO for the first round. If * alloc failed with that and @pool was empty, retry immediately. */ if (gfp_temp != gfp_mask) { @@ -358,8 +358,8 @@ repeat_alloc: goto repeat_alloc; } - /* We must not sleep if !__GFP_WAIT */ - if (!(gfp_mask & __GFP_WAIT)) { + /* We must not sleep if !__GFP_DIRECT_RECLAIM */ + if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) { spin_unlock_irqrestore(&pool->lock, flags); return NULL; } diff --git a/mm/migrate.c b/mm/migrate.c index 2834faba719a..7890d0bb5e23 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1578,7 +1578,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, (GFP_HIGHUSER_MOVABLE | __GFP_THISNODE | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & - ~GFP_IOFS, 0); + ~(__GFP_IO | __GFP_FS), 0); return newpage; } @@ -1752,7 +1752,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, goto out_dropref; new_page = alloc_pages_node(node, - (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, + (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM, HPAGE_PMD_ORDER); if (!new_page) goto out_fail; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index e4778285d8d1..d13a33918fa2 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -118,6 +118,15 @@ found: return t; } +/* + * order == -1 means the oom kill is required by sysrq, otherwise only + * for display purposes. + */ +static inline bool is_sysrq_oom(struct oom_control *oc) +{ + return oc->order == -1; +} + /* return true if the task is not adequate as candidate victim task. */ static bool oom_unkillable_task(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask) @@ -265,7 +274,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, * Don't allow any other task to have access to the reserves. */ if (test_tsk_thread_flag(task, TIF_MEMDIE)) { - if (oc->order != -1) + if (!is_sysrq_oom(oc)) return OOM_SCAN_ABORT; } if (!task->mm) @@ -278,7 +287,7 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, if (oom_task_origin(task)) return OOM_SCAN_SELECT; - if (task_will_free_mem(task) && oc->order != -1) + if (task_will_free_mem(task) && !is_sysrq_oom(oc)) return OOM_SCAN_ABORT; return OOM_SCAN_OK; @@ -629,7 +638,7 @@ void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint, return; } /* Do not panic for oom kills triggered by sysrq */ - if (oc->order == -1) + if (is_sysrq_oom(oc)) return; dump_header(oc, NULL, memcg); panic("Out of memory: %s panic_on_oom is enabled\n", @@ -709,7 +718,7 @@ bool out_of_memory(struct oom_control *oc) p = select_bad_process(oc, &points, totalpages); /* Found nothing?!?! Either we hang forever, or we panic. */ - if (!p && oc->order != -1) { + if (!p && !is_sysrq_oom(oc)) { dump_header(oc, NULL, NULL); panic("Out of memory and no killable processes...\n"); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2c90357c34ea..3e4d65445fa7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1542,7 +1542,9 @@ static void balance_dirty_pages(struct address_space *mapping, for (;;) { unsigned long now = jiffies; unsigned long dirty, thresh, bg_thresh; - unsigned long m_dirty, m_thresh, m_bg_thresh; + unsigned long m_dirty = 0; /* stop bogus uninit warnings */ + unsigned long m_thresh = 0; + unsigned long m_bg_thresh = 0; /* * Unstable writes are a feature of certain networked diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 446bb36ee59d..17a3c66639a9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -169,19 +169,19 @@ void pm_restrict_gfp_mask(void) WARN_ON(!mutex_is_locked(&pm_mutex)); WARN_ON(saved_gfp_mask); saved_gfp_mask = gfp_allowed_mask; - gfp_allowed_mask &= ~GFP_IOFS; + gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS); } bool pm_suspended_storage(void) { - if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS) + if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) return false; return true; } #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE -int pageblock_order __read_mostly; +unsigned int pageblock_order __read_mostly; #endif static void __free_pages_ok(struct page *page, unsigned int order); @@ -229,6 +229,15 @@ static char * const zone_names[MAX_NR_ZONES] = { #endif }; +static void free_compound_page(struct page *page); +compound_page_dtor * const compound_page_dtors[] = { + NULL, + free_compound_page, +#ifdef CONFIG_HUGETLB_PAGE + free_huge_page, +#endif +}; + int min_free_kbytes = 1024; int user_min_free_kbytes = -1; @@ -436,15 +445,15 @@ out: /* * Higher-order pages are called "compound pages". They are structured thusly: * - * The first PAGE_SIZE page is called the "head page". + * The first PAGE_SIZE page is called the "head page" and have PG_head set. * - * The remaining PAGE_SIZE pages are called "tail pages". + * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded + * in bit 0 of page->compound_head. The rest of bits is pointer to head page. * - * All pages have PG_compound set. All tail pages have their ->first_page - * pointing at the head page. + * The first tail page's ->compound_dtor holds the offset in array of compound + * page destructors. See compound_page_dtors. * - * The first tail page's ->lru.next holds the address of the compound page's - * put_page() function. Its ->lru.prev holds the order of allocation. + * The first tail page's ->compound_order holds the order of allocation. * This usage means that zero-order pages may not be compound. */ @@ -453,21 +462,18 @@ static void free_compound_page(struct page *page) __free_pages_ok(page, compound_order(page)); } -void prep_compound_page(struct page *page, unsigned long order) +void prep_compound_page(struct page *page, unsigned int order) { int i; int nr_pages = 1 << order; - set_compound_page_dtor(page, free_compound_page); + set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); set_compound_order(page, order); __SetPageHead(page); for (i = 1; i < nr_pages; i++) { struct page *p = page + i; set_page_count(p, 0); - p->first_page = page; - /* Make sure p->first_page is always valid for PageTail() */ - smp_wmb(); - __SetPageTail(p); + set_compound_head(p, page); } } @@ -656,7 +662,7 @@ static inline void __free_one_page(struct page *page, unsigned long combined_idx; unsigned long uninitialized_var(buddy_idx); struct page *buddy; - int max_order = MAX_ORDER; + unsigned int max_order = MAX_ORDER; VM_BUG_ON(!zone_is_initialized(zone)); VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); @@ -669,7 +675,7 @@ static inline void __free_one_page(struct page *page, * pageblock. Without this, pageblock isolation * could cause incorrect freepage accounting. */ - max_order = min(MAX_ORDER, pageblock_order + 1); + max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); } else { __mod_zone_freepage_state(zone, 1 << order, migratetype); } @@ -817,7 +823,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, if (unlikely(has_isolate_pageblock(zone))) mt = get_pageblock_migratetype(page); - /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ __free_one_page(page, page_to_pfn(page), zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); } while (--to_free && --batch_free && !list_empty(list)); @@ -846,17 +851,30 @@ static void free_one_page(struct zone *zone, static int free_tail_pages_check(struct page *head_page, struct page *page) { - if (!IS_ENABLED(CONFIG_DEBUG_VM)) - return 0; + int ret = 1; + + /* + * We rely page->lru.next never has bit 0 set, unless the page + * is PageTail(). Let's make sure that's true even for poisoned ->lru. + */ + BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1); + + if (!IS_ENABLED(CONFIG_DEBUG_VM)) { + ret = 0; + goto out; + } if (unlikely(!PageTail(page))) { bad_page(page, "PageTail not set", 0); - return 1; + goto out; } - if (unlikely(page->first_page != head_page)) { - bad_page(page, "first_page not consistent", 0); - return 1; + if (unlikely(compound_head(page) != head_page)) { + bad_page(page, "compound_head not consistent", 0); + goto out; } - return 0; + ret = 0; +out: + clear_compound_head(page); + return ret; } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -923,6 +941,10 @@ void __meminit reserve_bootmem_region(unsigned long start, unsigned long end) struct page *page = pfn_to_page(start_pfn); init_reserved_page(start_pfn); + + /* Avoid false-positive PageTail() */ + INIT_LIST_HEAD(&page->lru); + SetPageReserved(page); } } @@ -1417,15 +1439,14 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, * the free lists for the desirable migrate type are depleted */ static int fallbacks[MIGRATE_TYPES][4] = { - [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, + [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, + [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, + [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES }, #ifdef CONFIG_CMA - [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ + [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */ #endif - [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ #ifdef CONFIG_MEMORY_ISOLATION - [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ + [MIGRATE_ISOLATE] = { MIGRATE_TYPES }, /* Never used */ #endif }; @@ -1450,7 +1471,7 @@ int move_freepages(struct zone *zone, int migratetype) { struct page *page; - unsigned long order; + unsigned int order; int pages_moved = 0; #ifndef CONFIG_HOLES_IN_ZONE @@ -1563,7 +1584,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt) static void steal_suitable_fallback(struct zone *zone, struct page *page, int start_type) { - int current_order = page_order(page); + unsigned int current_order = page_order(page); int pages; /* Take ownership for orders >= pageblock_order */ @@ -1598,7 +1619,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, *can_steal = false; for (i = 0;; i++) { fallback_mt = fallbacks[migratetype][i]; - if (fallback_mt == MIGRATE_RESERVE) + if (fallback_mt == MIGRATE_TYPES) break; if (list_empty(&area->free_list[fallback_mt])) @@ -1617,6 +1638,101 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, return -1; } +/* + * Reserve a pageblock for exclusive use of high-order atomic allocations if + * there are no empty page blocks that contain a page with a suitable order + */ +static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, + unsigned int alloc_order) +{ + int mt; + unsigned long max_managed, flags; + + /* + * Limit the number reserved to 1 pageblock or roughly 1% of a zone. + * Check is race-prone but harmless. + */ + max_managed = (zone->managed_pages / 100) + pageblock_nr_pages; + if (zone->nr_reserved_highatomic >= max_managed) + return; + + spin_lock_irqsave(&zone->lock, flags); + + /* Recheck the nr_reserved_highatomic limit under the lock */ + if (zone->nr_reserved_highatomic >= max_managed) + goto out_unlock; + + /* Yoink! */ + mt = get_pageblock_migratetype(page); + if (mt != MIGRATE_HIGHATOMIC && + !is_migrate_isolate(mt) && !is_migrate_cma(mt)) { + zone->nr_reserved_highatomic += pageblock_nr_pages; + set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); + move_freepages_block(zone, page, MIGRATE_HIGHATOMIC); + } + +out_unlock: + spin_unlock_irqrestore(&zone->lock, flags); +} + +/* + * Used when an allocation is about to fail under memory pressure. This + * potentially hurts the reliability of high-order allocations when under + * intense memory pressure but failed atomic allocations should be easier + * to recover from than an OOM. + */ +static void unreserve_highatomic_pageblock(const struct alloc_context *ac) +{ + struct zonelist *zonelist = ac->zonelist; + unsigned long flags; + struct zoneref *z; + struct zone *zone; + struct page *page; + int order; + + for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx, + ac->nodemask) { + /* Preserve at least one pageblock */ + if (zone->nr_reserved_highatomic <= pageblock_nr_pages) + continue; + + spin_lock_irqsave(&zone->lock, flags); + for (order = 0; order < MAX_ORDER; order++) { + struct free_area *area = &(zone->free_area[order]); + + if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) + continue; + + page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next, + struct page, lru); + + /* + * It should never happen but changes to locking could + * inadvertently allow a per-cpu drain to add pages + * to MIGRATE_HIGHATOMIC while unreserving so be safe + * and watch for underflows. + */ + zone->nr_reserved_highatomic -= min(pageblock_nr_pages, + zone->nr_reserved_highatomic); + + /* + * Convert to ac->migratetype and avoid the normal + * pageblock stealing heuristics. Minimally, the caller + * is doing the work and needs the pages. More + * importantly, if the block was always converted to + * MIGRATE_UNMOVABLE or another type then the number + * of pageblocks that cannot be completely freed + * may increase. + */ + set_pageblock_migratetype(page, ac->migratetype); + move_freepages_block(zone, page, ac->migratetype); + spin_unlock_irqrestore(&zone->lock, flags); + return; + } + spin_unlock_irqrestore(&zone->lock, flags); + } +} + /* Remove an element from the buddy allocator from the fallback list */ static inline struct page * __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) @@ -1672,29 +1788,17 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) * Call me with the zone->lock already held. */ static struct page *__rmqueue(struct zone *zone, unsigned int order, - int migratetype) + int migratetype, gfp_t gfp_flags) { struct page *page; -retry_reserve: page = __rmqueue_smallest(zone, order, migratetype); - - if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { + if (unlikely(!page)) { if (migratetype == MIGRATE_MOVABLE) page = __rmqueue_cma_fallback(zone, order); if (!page) page = __rmqueue_fallback(zone, order, migratetype); - - /* - * Use MIGRATE_RESERVE rather than fail an allocation. goto - * is used because __rmqueue_smallest is an inline function - * and we want just one call site - */ - if (!page) { - migratetype = MIGRATE_RESERVE; - goto retry_reserve; - } } trace_mm_page_alloc_zone_locked(page, order, migratetype); @@ -1714,7 +1818,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, spin_lock(&zone->lock); for (i = 0; i < count; ++i) { - struct page *page = __rmqueue(zone, order, migratetype); + struct page *page = __rmqueue(zone, order, migratetype, 0); if (unlikely(page == NULL)) break; @@ -2086,7 +2190,7 @@ int split_free_page(struct page *page) static inline struct page *buffered_rmqueue(struct zone *preferred_zone, struct zone *zone, unsigned int order, - gfp_t gfp_flags, int migratetype) + gfp_t gfp_flags, int alloc_flags, int migratetype) { unsigned long flags; struct page *page; @@ -2129,7 +2233,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, WARN_ON_ONCE(order > 1); } spin_lock_irqsave(&zone->lock, flags); - page = __rmqueue(zone, order, migratetype); + + page = NULL; + if (alloc_flags & ALLOC_HARDER) { + page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); + if (page) + trace_mm_page_alloc_zone_locked(page, order, migratetype); + } + if (!page) + page = __rmqueue(zone, order, migratetype, gfp_flags); spin_unlock(&zone->lock); if (!page) goto failed; @@ -2160,11 +2272,11 @@ static struct { struct fault_attr attr; bool ignore_gfp_highmem; - bool ignore_gfp_wait; + bool ignore_gfp_reclaim; u32 min_order; } fail_page_alloc = { .attr = FAULT_ATTR_INITIALIZER, - .ignore_gfp_wait = true, + .ignore_gfp_reclaim = true, .ignore_gfp_highmem = true, .min_order = 1, }; @@ -2183,7 +2295,8 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) return false; if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM)) return false; - if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT)) + if (fail_page_alloc.ignore_gfp_reclaim && + (gfp_mask & __GFP_DIRECT_RECLAIM)) return false; return should_fail(&fail_page_alloc.attr, 1 << order); @@ -2202,7 +2315,7 @@ static int __init fail_page_alloc_debugfs(void) return PTR_ERR(dir); if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, - &fail_page_alloc.ignore_gfp_wait)) + &fail_page_alloc.ignore_gfp_reclaim)) goto fail; if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir, &fail_page_alloc.ignore_gfp_highmem)) @@ -2232,42 +2345,77 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) #endif /* CONFIG_FAIL_PAGE_ALLOC */ /* - * Return true if free pages are above 'mark'. This takes into account the order - * of the allocation. + * Return true if free base pages are above 'mark'. For high-order checks it + * will return true of the order-0 watermark is reached and there is at least + * one free page of a suitable size. Checking now avoids taking the zone lock + * to check in the allocation paths if no pages are free. */ static bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, int alloc_flags, long free_pages) { - /* free_pages may go negative - that's OK */ long min = mark; int o; - long free_cma = 0; + const int alloc_harder = (alloc_flags & ALLOC_HARDER); + /* free_pages may go negative - that's OK */ free_pages -= (1 << order) - 1; + if (alloc_flags & ALLOC_HIGH) min -= min / 2; - if (alloc_flags & ALLOC_HARDER) + + /* + * If the caller does not have rights to ALLOC_HARDER then subtract + * the high-atomic reserves. This will over-estimate the size of the + * atomic reserve but it avoids a search. + */ + if (likely(!alloc_harder)) + free_pages -= z->nr_reserved_highatomic; + else min -= min / 4; + #ifdef CONFIG_CMA /* If allocation can't use CMA areas don't use free CMA pages */ if (!(alloc_flags & ALLOC_CMA)) - free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); + free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); #endif - if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx]) + /* + * Check watermarks for an order-0 allocation request. If these + * are not met, then a high-order request also cannot go ahead + * even if a suitable page happened to be free. + */ + if (free_pages <= min + z->lowmem_reserve[classzone_idx]) return false; - for (o = 0; o < order; o++) { - /* At the next order, this order's pages become unavailable */ - free_pages -= z->free_area[o].nr_free << o; - /* Require fewer higher order pages to be free */ - min >>= 1; + /* If this is an order-0 request then the watermark is fine */ + if (!order) + return true; + + /* For a high-order request, check at least one suitable page is free */ + for (o = order; o < MAX_ORDER; o++) { + struct free_area *area = &z->free_area[o]; + int mt; + + if (!area->nr_free) + continue; + + if (alloc_harder) + return true; + + for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { + if (!list_empty(&area->free_list[mt])) + return true; + } - if (free_pages <= min) - return false; +#ifdef CONFIG_CMA + if ((alloc_flags & ALLOC_CMA) && + !list_empty(&area->free_list[MIGRATE_CMA])) { + return true; + } +#endif } - return true; + return false; } bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, @@ -2278,134 +2426,18 @@ bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, } bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int classzone_idx, int alloc_flags) + unsigned long mark, int classzone_idx) { long free_pages = zone_page_state(z, NR_FREE_PAGES); if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); - return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, + return __zone_watermark_ok(z, order, mark, classzone_idx, 0, free_pages); } #ifdef CONFIG_NUMA -/* - * zlc_setup - Setup for "zonelist cache". Uses cached zone data to - * skip over zones that are not allowed by the cpuset, or that have - * been recently (in last second) found to be nearly full. See further - * comments in mmzone.h. Reduces cache footprint of zonelist scans - * that have to skip over a lot of full or unallowed zones. - * - * If the zonelist cache is present in the passed zonelist, then - * returns a pointer to the allowed node mask (either the current - * tasks mems_allowed, or node_states[N_MEMORY].) - * - * If the zonelist cache is not available for this zonelist, does - * nothing and returns NULL. - * - * If the fullzones BITMAP in the zonelist cache is stale (more than - * a second since last zap'd) then we zap it out (clear its bits.) - * - * We hold off even calling zlc_setup, until after we've checked the - * first zone in the zonelist, on the theory that most allocations will - * be satisfied from that first zone, so best to examine that zone as - * quickly as we can. - */ -static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) -{ - struct zonelist_cache *zlc; /* cached zonelist speedup info */ - nodemask_t *allowednodes; /* zonelist_cache approximation */ - - zlc = zonelist->zlcache_ptr; - if (!zlc) - return NULL; - - if (time_after(jiffies, zlc->last_full_zap + HZ)) { - bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); - zlc->last_full_zap = jiffies; - } - - allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? - &cpuset_current_mems_allowed : - &node_states[N_MEMORY]; - return allowednodes; -} - -/* - * Given 'z' scanning a zonelist, run a couple of quick checks to see - * if it is worth looking at further for free memory: - * 1) Check that the zone isn't thought to be full (doesn't have its - * bit set in the zonelist_cache fullzones BITMAP). - * 2) Check that the zones node (obtained from the zonelist_cache - * z_to_n[] mapping) is allowed in the passed in allowednodes mask. - * Return true (non-zero) if zone is worth looking at further, or - * else return false (zero) if it is not. - * - * This check -ignores- the distinction between various watermarks, - * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ... If a zone is - * found to be full for any variation of these watermarks, it will - * be considered full for up to one second by all requests, unless - * we are so low on memory on all allowed nodes that we are forced - * into the second scan of the zonelist. - * - * In the second scan we ignore this zonelist cache and exactly - * apply the watermarks to all zones, even it is slower to do so. - * We are low on memory in the second scan, and should leave no stone - * unturned looking for a free page. - */ -static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z, - nodemask_t *allowednodes) -{ - struct zonelist_cache *zlc; /* cached zonelist speedup info */ - int i; /* index of *z in zonelist zones */ - int n; /* node that zone *z is on */ - - zlc = zonelist->zlcache_ptr; - if (!zlc) - return 1; - - i = z - zonelist->_zonerefs; - n = zlc->z_to_n[i]; - - /* This zone is worth trying if it is allowed but not full */ - return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones); -} - -/* - * Given 'z' scanning a zonelist, set the corresponding bit in - * zlc->fullzones, so that subsequent attempts to allocate a page - * from that zone don't waste time re-examining it. - */ -static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) -{ - struct zonelist_cache *zlc; /* cached zonelist speedup info */ - int i; /* index of *z in zonelist zones */ - - zlc = zonelist->zlcache_ptr; - if (!zlc) - return; - - i = z - zonelist->_zonerefs; - - set_bit(i, zlc->fullzones); -} - -/* - * clear all zones full, called after direct reclaim makes progress so that - * a zone that was recently full is not skipped over for up to a second - */ -static void zlc_clear_zones_full(struct zonelist *zonelist) -{ - struct zonelist_cache *zlc; /* cached zonelist speedup info */ - - zlc = zonelist->zlcache_ptr; - if (!zlc) - return; - - bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); -} - static bool zone_local(struct zone *local_zone, struct zone *zone) { return local_zone->node == zone->node; @@ -2416,28 +2448,7 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < RECLAIM_DISTANCE; } - #else /* CONFIG_NUMA */ - -static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) -{ - return NULL; -} - -static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z, - nodemask_t *allowednodes) -{ - return 1; -} - -static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) -{ -} - -static void zlc_clear_zones_full(struct zonelist *zonelist) -{ -} - static bool zone_local(struct zone *local_zone, struct zone *zone) { return true; @@ -2447,7 +2458,6 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return true; } - #endif /* CONFIG_NUMA */ static void reset_alloc_batches(struct zone *preferred_zone) @@ -2474,11 +2484,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct zoneref *z; struct page *page = NULL; struct zone *zone; - nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ - int zlc_active = 0; /* set if using zonelist_cache */ - int did_zlc_setup = 0; /* just call zlc_setup() one time */ - bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) && - (gfp_mask & __GFP_WRITE); int nr_fair_skipped = 0; bool zonelist_rescan; @@ -2493,9 +2498,6 @@ zonelist_scan: ac->nodemask) { unsigned long mark; - if (IS_ENABLED(CONFIG_NUMA) && zlc_active && - !zlc_zone_worth_trying(zonelist, z, allowednodes)) - continue; if (cpusets_enabled() && (alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed(zone, gfp_mask)) @@ -2533,14 +2535,14 @@ zonelist_scan: * * XXX: For now, allow allocations to potentially * exceed the per-zone dirty limit in the slowpath - * (ALLOC_WMARK_LOW unset) before going into reclaim, + * (spread_dirty_pages unset) before going into reclaim, * which is important when on a NUMA setup the allowed * zones are together not big enough to reach the * global limit. The proper fix for these situations * will require awareness of zones in the * dirty-throttling and the flusher threads. */ - if (consider_zone_dirty && !zone_dirty_ok(zone)) + if (ac->spread_dirty_pages && !zone_dirty_ok(zone)) continue; mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; @@ -2553,28 +2555,8 @@ zonelist_scan: if (alloc_flags & ALLOC_NO_WATERMARKS) goto try_this_zone; - if (IS_ENABLED(CONFIG_NUMA) && - !did_zlc_setup && nr_online_nodes > 1) { - /* - * we do zlc_setup if there are multiple nodes - * and before considering the first zone allowed - * by the cpuset. - */ - allowednodes = zlc_setup(zonelist, alloc_flags); - zlc_active = 1; - did_zlc_setup = 1; - } - if (zone_reclaim_mode == 0 || !zone_allows_reclaim(ac->preferred_zone, zone)) - goto this_zone_full; - - /* - * As we may have just activated ZLC, check if the first - * eligible zone has failed zone_reclaim recently. - */ - if (IS_ENABLED(CONFIG_NUMA) && zlc_active && - !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; ret = zone_reclaim(zone, gfp_mask, order); @@ -2591,34 +2573,26 @@ zonelist_scan: ac->classzone_idx, alloc_flags)) goto try_this_zone; - /* - * Failed to reclaim enough to meet watermark. - * Only mark the zone full if checking the min - * watermark or if we failed to reclaim just - * 1<<order pages or else the page allocator - * fastpath will prematurely mark zones full - * when the watermark is between the low and - * min watermarks. - */ - if (((alloc_flags & ALLOC_WMARK_MASK) == ALLOC_WMARK_MIN) || - ret == ZONE_RECLAIM_SOME) - goto this_zone_full; - continue; } } try_this_zone: page = buffered_rmqueue(ac->preferred_zone, zone, order, - gfp_mask, ac->migratetype); + gfp_mask, alloc_flags, ac->migratetype); if (page) { if (prep_new_page(page, order, gfp_mask, alloc_flags)) goto try_this_zone; + + /* + * If this is a high-order atomic allocation then check + * if the pageblock should be reserved for the future + */ + if (unlikely(order && (alloc_flags & ALLOC_HARDER))) + reserve_highatomic_pageblock(page, zone, order); + return page; } -this_zone_full: - if (IS_ENABLED(CONFIG_NUMA) && zlc_active) - zlc_mark_zone_full(zonelist, z); } /* @@ -2639,12 +2613,6 @@ this_zone_full: zonelist_rescan = true; } - if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) { - /* Disable zlc cache for second zonelist scan */ - zlc_active = 0; - zonelist_rescan = true; - } - if (zonelist_rescan) goto zonelist_scan; @@ -2669,7 +2637,7 @@ static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); -void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) +void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...) { unsigned int filter = SHOW_MEM_FILTER_NODES; @@ -2686,7 +2654,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) if (test_thread_flag(TIF_MEMDIE) || (current->flags & (PF_MEMALLOC | PF_EXITING))) filter &= ~SHOW_MEM_FILTER_NODES; - if (in_interrupt() || !(gfp_mask & __GFP_WAIT)) + if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM)) filter &= ~SHOW_MEM_FILTER_NODES; if (fmt) { @@ -2703,7 +2671,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) va_end(args); } - pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n", + pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n", current->comm, order, gfp_mask); dump_stack(); @@ -2889,19 +2857,17 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, if (unlikely(!(*did_some_progress))) return NULL; - /* After successful reclaim, reconsider all zones for allocation */ - if (IS_ENABLED(CONFIG_NUMA)) - zlc_clear_zones_full(ac->zonelist); - retry: page = get_page_from_freelist(gfp_mask, order, alloc_flags & ~ALLOC_NO_WATERMARKS, ac); /* * If an allocation failed after direct reclaim, it could be because - * pages are pinned on the per-cpu lists. Drain them and try again + * pages are pinned on the per-cpu lists or in high alloc reserves. + * Shrink them them and try again */ if (!page && !drained) { + unreserve_highatomic_pageblock(ac); drain_all_pages(NULL); drained = true; goto retry; @@ -2946,7 +2912,6 @@ static inline int gfp_to_alloc_flags(gfp_t gfp_mask) { int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; - const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD)); /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); @@ -2955,11 +2920,11 @@ gfp_to_alloc_flags(gfp_t gfp_mask) * The caller may dip into page reserves a bit more if the caller * cannot run direct reclaim, or if the caller has realtime scheduling * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will - * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH). + * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH). */ alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); - if (atomic) { + if (gfp_mask & __GFP_ATOMIC) { /* * Not worth trying to allocate harder for __GFP_NOMEMALLOC even * if it can't schedule. @@ -2996,11 +2961,16 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS); } +static inline bool is_thp_gfp_mask(gfp_t gfp_mask) +{ + return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE; +} + static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) { - const gfp_t wait = gfp_mask & __GFP_WAIT; + bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; struct page *page = NULL; int alloc_flags; unsigned long pages_reclaimed = 0; @@ -3021,15 +2991,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, } /* + * We also sanity check to catch abuse of atomic reserves being used by + * callers that are not in atomic context. + */ + if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) == + (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) + gfp_mask &= ~__GFP_ATOMIC; + + /* * If this allocation cannot block and it is for a specific node, then * fail early. There's no need to wakeup kswapd or retry for a * speculative node-specific allocation. */ - if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait) + if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim) goto nopage; retry: - if (!(gfp_mask & __GFP_NO_KSWAPD)) + if (gfp_mask & __GFP_KSWAPD_RECLAIM) wake_all_kswapds(order, ac); /* @@ -3072,8 +3050,8 @@ retry: } } - /* Atomic allocations - we can't balance anything */ - if (!wait) { + /* Caller is not willing to reclaim, we can't balance anything */ + if (!can_direct_reclaim) { /* * All existing users of the deprecated __GFP_NOFAIL are * blockable, so warn of any new users that actually allow this @@ -3103,7 +3081,7 @@ retry: goto got_pg; /* Checks for THP-specific high-order allocations */ - if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) { + if (is_thp_gfp_mask(gfp_mask)) { /* * If compaction is deferred for high-order allocations, it is * because sync compaction recently failed. If this is the case @@ -3138,8 +3116,7 @@ retry: * fault, so use asynchronous memory compaction for THP unless it is * khugepaged trying to collapse. */ - if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE || - (current->flags & PF_KTHREAD)) + if (!is_thp_gfp_mask(gfp_mask) || (current->flags & PF_KTHREAD)) migration_mode = MIGRATE_SYNC_LIGHT; /* Try direct reclaim and then allocating */ @@ -3210,7 +3187,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, lockdep_trace_alloc(gfp_mask); - might_sleep_if(gfp_mask & __GFP_WAIT); + might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); if (should_fail_alloc_page(gfp_mask, order)) return NULL; @@ -3231,6 +3208,10 @@ retry_cpuset: /* We set it here, as __alloc_pages_slowpath might have changed it */ ac.zonelist = zonelist; + + /* Dirty zone balancing only done in the fast path */ + ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); + /* The preferred zone is used for statistics later */ preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx, ac.nodemask ? : &cpuset_current_mems_allowed, @@ -3249,6 +3230,7 @@ retry_cpuset: * complete. */ alloc_mask = memalloc_noio_flags(gfp_mask); + ac.spread_dirty_pages = false; page = __alloc_pages_slowpath(alloc_mask, order, &ac); } @@ -3467,7 +3449,8 @@ void free_kmem_pages(unsigned long addr, unsigned int order) } } -static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) +static void *make_alloc_exact(unsigned long addr, unsigned int order, + size_t size) { if (addr) { unsigned long alloc_end = addr + (PAGE_SIZE << order); @@ -3517,7 +3500,7 @@ EXPORT_SYMBOL(alloc_pages_exact); */ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) { - unsigned order = get_order(size); + unsigned int order = get_order(size); struct page *p = alloc_pages_node(nid, gfp_mask, order); if (!p) return NULL; @@ -3666,7 +3649,6 @@ static void show_migration_types(unsigned char type) [MIGRATE_UNMOVABLE] = 'U', [MIGRATE_RECLAIMABLE] = 'E', [MIGRATE_MOVABLE] = 'M', - [MIGRATE_RESERVE] = 'R', #ifdef CONFIG_CMA [MIGRATE_CMA] = 'C', #endif @@ -3819,7 +3801,8 @@ void show_free_areas(unsigned int filter) } for_each_populated_zone(zone) { - unsigned long nr[MAX_ORDER], flags, order, total = 0; + unsigned int order; + unsigned long nr[MAX_ORDER], flags, total = 0; unsigned char types[MAX_ORDER]; if (skip_free_areas_node(filter, zone_to_nid(zone))) @@ -4168,7 +4151,7 @@ static void build_zonelists(pg_data_t *pgdat) nodemask_t used_mask; int local_node, prev_node; struct zonelist *zonelist; - int order = current_zonelist_order; + unsigned int order = current_zonelist_order; /* initialize zonelists */ for (i = 0; i < MAX_ZONELISTS; i++) { @@ -4212,20 +4195,6 @@ static void build_zonelists(pg_data_t *pgdat) build_thisnode_zonelists(pgdat); } -/* Construct the zonelist performance cache - see further mmzone.h */ -static void build_zonelist_cache(pg_data_t *pgdat) -{ - struct zonelist *zonelist; - struct zonelist_cache *zlc; - struct zoneref *z; - - zonelist = &pgdat->node_zonelists[0]; - zonelist->zlcache_ptr = zlc = &zonelist->zlcache; - bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); - for (z = zonelist->_zonerefs; z->zone; z++) - zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z); -} - #ifdef CONFIG_HAVE_MEMORYLESS_NODES /* * Return node id of node used for "local" allocations. @@ -4286,12 +4255,6 @@ static void build_zonelists(pg_data_t *pgdat) zonelist->_zonerefs[j].zone_idx = 0; } -/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ -static void build_zonelist_cache(pg_data_t *pgdat) -{ - pgdat->node_zonelists[0].zlcache_ptr = NULL; -} - #endif /* CONFIG_NUMA */ /* @@ -4332,14 +4295,12 @@ static int __build_all_zonelists(void *data) if (self && !node_online(self->node_id)) { build_zonelists(self); - build_zonelist_cache(self); } for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); build_zonelists(pgdat); - build_zonelist_cache(pgdat); } /* @@ -4499,120 +4460,6 @@ static inline unsigned long wait_table_bits(unsigned long size) } /* - * Check if a pageblock contains reserved pages - */ -static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn) -{ - unsigned long pfn; - - for (pfn = start_pfn; pfn < end_pfn; pfn++) { - if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn))) - return 1; - } - return 0; -} - -/* - * Mark a number of pageblocks as MIGRATE_RESERVE. The number - * of blocks reserved is based on min_wmark_pages(zone). The memory within - * the reserve will tend to store contiguous free pages. Setting min_free_kbytes - * higher will lead to a bigger reserve which will get freed as contiguous - * blocks as reclaim kicks in - */ -static void setup_zone_migrate_reserve(struct zone *zone) -{ - unsigned long start_pfn, pfn, end_pfn, block_end_pfn; - struct page *page; - unsigned long block_migratetype; - int reserve; - int old_reserve; - - /* - * Get the start pfn, end pfn and the number of blocks to reserve - * We have to be careful to be aligned to pageblock_nr_pages to - * make sure that we always check pfn_valid for the first page in - * the block. - */ - start_pfn = zone->zone_start_pfn; - end_pfn = zone_end_pfn(zone); - start_pfn = roundup(start_pfn, pageblock_nr_pages); - reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> - pageblock_order; - - /* - * Reserve blocks are generally in place to help high-order atomic - * allocations that are short-lived. A min_free_kbytes value that - * would result in more than 2 reserve blocks for atomic allocations - * is assumed to be in place to help anti-fragmentation for the - * future allocation of hugepages at runtime. - */ - reserve = min(2, reserve); - old_reserve = zone->nr_migrate_reserve_block; - - /* When memory hot-add, we almost always need to do nothing */ - if (reserve == old_reserve) - return; - zone->nr_migrate_reserve_block = reserve; - - for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { - if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone))) - return; - - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - - /* Watch out for overlapping nodes */ - if (page_to_nid(page) != zone_to_nid(zone)) - continue; - - block_migratetype = get_pageblock_migratetype(page); - - /* Only test what is necessary when the reserves are not met */ - if (reserve > 0) { - /* - * Blocks with reserved pages will never free, skip - * them. - */ - block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn); - if (pageblock_is_reserved(pfn, block_end_pfn)) - continue; - - /* If this block is reserved, account for it */ - if (block_migratetype == MIGRATE_RESERVE) { - reserve--; - continue; - } - - /* Suitable for reserving if this block is movable */ - if (block_migratetype == MIGRATE_MOVABLE) { - set_pageblock_migratetype(page, - MIGRATE_RESERVE); - move_freepages_block(zone, page, - MIGRATE_RESERVE); - reserve--; - continue; - } - } else if (!old_reserve) { - /* - * At boot time we don't need to scan the whole zone - * for turning off MIGRATE_RESERVE. - */ - break; - } - - /* - * If the reserve is met and this is a previous reserved block, - * take it back - */ - if (block_migratetype == MIGRATE_RESERVE) { - set_pageblock_migratetype(page, MIGRATE_MOVABLE); - move_freepages_block(zone, page, MIGRATE_MOVABLE); - } - } -} - -/* * Initially all pages are reserved - free ones are freed * up by free_all_bootmem() once the early boot process is * done. Non-atomic initialization, single-pass. @@ -4651,9 +4498,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, * movable at startup. This will force kernel allocations * to reserve their blocks rather than leaking throughout * the address space during boot when many long-lived - * kernel allocations are made. Later some blocks near - * the start are marked MIGRATE_RESERVE by - * setup_zone_migrate_reserve() + * kernel allocations are made. * * bitmap is created for zone's valid pfn range. but memmap * can be created for invalid pages (for alignment) @@ -5421,6 +5266,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) { + unsigned long __maybe_unused start = 0; unsigned long __maybe_unused offset = 0; /* Skip empty nodes */ @@ -5428,9 +5274,11 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) return; #ifdef CONFIG_FLAT_NODE_MEM_MAP + start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); + offset = pgdat->node_start_pfn - start; /* ia64 gets its own node_mem_map, before this, without bootmem */ if (!pgdat->node_mem_map) { - unsigned long size, start, end; + unsigned long size, end; struct page *map; /* @@ -5438,8 +5286,6 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) * aligned but the node_mem_map endpoints must be in order * for the buddy allocator to function correctly. */ - start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); - offset = pgdat->node_start_pfn - start; end = pgdat_end_pfn(pgdat); end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) * sizeof(struct page); @@ -6214,7 +6060,6 @@ static void __setup_per_zone_wmarks(void) high_wmark_pages(zone) - low_wmark_pages(zone) - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); - setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); } @@ -6836,7 +6681,8 @@ int alloc_contig_range(unsigned long start, unsigned long end, unsigned migratetype) { unsigned long outer_start, outer_end; - int ret = 0, order; + unsigned int order; + int ret = 0; struct compact_control cc = { .nr_migratepages = 0, diff --git a/mm/readahead.c b/mm/readahead.c index 998ad592f408..ba22d7fe0afb 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -90,7 +90,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, page = list_to_page(pages); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, - GFP_KERNEL & mapping_gfp_mask(mapping))) { + mapping_gfp_constraint(mapping, GFP_KERNEL))) { read_cache_pages_invalidate_page(mapping, page); continue; } @@ -128,7 +128,7 @@ static int read_pages(struct address_space *mapping, struct file *filp, struct page *page = list_to_page(pages); list_del(&page->lru); if (!add_to_page_cache_lru(page, mapping, page->index, - GFP_KERNEL & mapping_gfp_mask(mapping))) { + mapping_gfp_constraint(mapping, GFP_KERNEL))) { mapping->a_ops->readpage(filp, page); } page_cache_release(page); diff --git a/mm/shmem.c b/mm/shmem.c index 3b8b73928398..9187eee4128b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -73,6 +73,8 @@ static struct vfsmount *shm_mnt; #include <asm/uaccess.h> #include <asm/pgtable.h> +#include "internal.h" + #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) diff --git a/mm/slab.c b/mm/slab.c index 272e809404d5..4765c97ce690 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1031,12 +1031,12 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) } /* - * Construct gfp mask to allocate from a specific node but do not invoke reclaim - * or warn about failures. + * Construct gfp mask to allocate from a specific node but do not direct reclaim + * or warn about failures. kswapd may still wake to reclaim in the background. */ static inline gfp_t gfp_exact_node(gfp_t flags) { - return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT; + return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; } #endif @@ -1889,21 +1889,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page) freelist = page->freelist; slab_destroy_debugcheck(cachep, page); - if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { - struct rcu_head *head; - - /* - * RCU free overloads the RCU head over the LRU. - * slab_page has been overloeaded over the LRU, - * however it is not used from now on so that - * we can use it safely. - */ - head = (void *)&page->rcu_head; - call_rcu(head, kmem_rcu_free); - - } else { + if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) + call_rcu(&page->rcu_head, kmem_rcu_free); + else kmem_freepages(cachep, page); - } /* * From now on, we don't use freelist @@ -2633,7 +2622,7 @@ static int cache_grow(struct kmem_cache *cachep, offset *= cachep->colour_off; - if (local_flags & __GFP_WAIT) + if (gfpflags_allow_blocking(local_flags)) local_irq_enable(); /* @@ -2663,7 +2652,7 @@ static int cache_grow(struct kmem_cache *cachep, cache_init_objs(cachep, page); - if (local_flags & __GFP_WAIT) + if (gfpflags_allow_blocking(local_flags)) local_irq_disable(); check_irq_off(); spin_lock(&n->list_lock); @@ -2677,7 +2666,7 @@ static int cache_grow(struct kmem_cache *cachep, opps1: kmem_freepages(cachep, page); failed: - if (local_flags & __GFP_WAIT) + if (gfpflags_allow_blocking(local_flags)) local_irq_disable(); return 0; } @@ -2869,7 +2858,7 @@ force_grow: static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) { - might_sleep_if(flags & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(flags)); #if DEBUG kmem_flagcheck(cachep, flags); #endif @@ -3057,11 +3046,11 @@ retry: */ struct page *page; - if (local_flags & __GFP_WAIT) + if (gfpflags_allow_blocking(local_flags)) local_irq_enable(); kmem_flagcheck(cache, flags); page = kmem_getpages(cache, local_flags, numa_mem_id()); - if (local_flags & __GFP_WAIT) + if (gfpflags_allow_blocking(local_flags)) local_irq_disable(); if (page) { /* @@ -3430,7 +3419,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { return __kmem_cache_alloc_bulk(s, flags, size, p); diff --git a/mm/slab.h b/mm/slab.h index 27492eb678f7..7b6087197997 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -170,7 +170,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, * may be allocated or freed using these operations. */ void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -bool __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_MEMCG_KMEM /* diff --git a/mm/slab_common.c b/mm/slab_common.c index d88e97c10a2e..3c6a86b4ec25 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -112,7 +112,7 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p) kmem_cache_free(s, p[i]); } -bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, +int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, void **p) { size_t i; @@ -121,10 +121,10 @@ bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, void *x = p[i] = kmem_cache_alloc(s, flags); if (!x) { __kmem_cache_free_bulk(s, i, p); - return false; + return 0; } } - return true; + return i; } #ifdef CONFIG_MEMCG_KMEM diff --git a/mm/slob.c b/mm/slob.c index 0d7e5df74d1f..17e8f8cc7c53 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -617,7 +617,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { return __kmem_cache_alloc_bulk(s, flags, size, p); diff --git a/mm/slub.c b/mm/slub.c index 75a5fa92ac2a..46997517406e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1065,11 +1065,15 @@ bad: return 0; } +/* Supports checking bulk free of a constructed freelist */ static noinline struct kmem_cache_node *free_debug_processing( - struct kmem_cache *s, struct page *page, void *object, + struct kmem_cache *s, struct page *page, + void *head, void *tail, int bulk_cnt, unsigned long addr, unsigned long *flags) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + void *object = head; + int cnt = 0; spin_lock_irqsave(&n->list_lock, *flags); slab_lock(page); @@ -1077,6 +1081,9 @@ static noinline struct kmem_cache_node *free_debug_processing( if (!check_slab(s, page)) goto fail; +next_object: + cnt++; + if (!check_valid_pointer(s, page, object)) { slab_err(s, page, "Invalid object pointer 0x%p", object); goto fail; @@ -1107,8 +1114,19 @@ static noinline struct kmem_cache_node *free_debug_processing( if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); trace(s, page, object, 0); + /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ init_object(s, object, SLUB_RED_INACTIVE); + + /* Reached end of constructed freelist yet? */ + if (object != tail) { + object = get_freepointer(s, object); + goto next_object; + } out: + if (cnt != bulk_cnt) + slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", + bulk_cnt, cnt); + slab_unlock(page); /* * Keep node_lock to preserve integrity @@ -1204,7 +1222,7 @@ unsigned long kmem_cache_flags(unsigned long object_size, return flags; } -#else +#else /* !CONFIG_SLUB_DEBUG */ static inline void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) {} @@ -1212,7 +1230,8 @@ static inline int alloc_debug_processing(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) { return 0; } static inline struct kmem_cache_node *free_debug_processing( - struct kmem_cache *s, struct page *page, void *object, + struct kmem_cache *s, struct page *page, + void *head, void *tail, int bulk_cnt, unsigned long addr, unsigned long *flags) { return NULL; } static inline int slab_pad_check(struct kmem_cache *s, struct page *page) @@ -1265,7 +1284,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, { flags &= gfp_allowed_mask; lockdep_trace_alloc(flags); - might_sleep_if(flags & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(flags)); if (should_failslab(s->object_size, flags, s->flags)) return NULL; @@ -1273,14 +1292,21 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, return memcg_kmem_get_cache(s, flags); } -static inline void slab_post_alloc_hook(struct kmem_cache *s, - gfp_t flags, void *object) +static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, + size_t size, void **p) { + size_t i; + flags &= gfp_allowed_mask; - kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); - kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); + for (i = 0; i < size; i++) { + void *object = p[i]; + + kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); + kmemleak_alloc_recursive(object, s->object_size, 1, + s->flags, flags); + kasan_slab_alloc(s, object); + } memcg_kmem_put_cache(s); - kasan_slab_alloc(s, object); } static inline void slab_free_hook(struct kmem_cache *s, void *x) @@ -1308,6 +1334,29 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) kasan_slab_free(s, x); } +static inline void slab_free_freelist_hook(struct kmem_cache *s, + void *head, void *tail) +{ +/* + * Compiler cannot detect this function can be removed if slab_free_hook() + * evaluates to nothing. Thus, catch all relevant config debug options here. + */ +#if defined(CONFIG_KMEMCHECK) || \ + defined(CONFIG_LOCKDEP) || \ + defined(CONFIG_DEBUG_KMEMLEAK) || \ + defined(CONFIG_DEBUG_OBJECTS_FREE) || \ + defined(CONFIG_KASAN) + + void *object = head; + void *tail_obj = tail ? : head; + + do { + slab_free_hook(s, object); + } while ((object != tail_obj) && + (object = get_freepointer(s, object))); +#endif +} + static void setup_object(struct kmem_cache *s, struct page *page, void *object) { @@ -1353,7 +1402,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) flags &= gfp_allowed_mask; - if (flags & __GFP_WAIT) + if (gfpflags_allow_blocking(flags)) local_irq_enable(); flags |= s->allocflags; @@ -1363,8 +1412,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) * so we fall-back to the minimum order allocation. */ alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; - if ((alloc_gfp & __GFP_WAIT) && oo_order(oo) > oo_order(s->min)) - alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_WAIT; + if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min)) + alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM; page = alloc_slab_page(s, alloc_gfp, node, oo); if (unlikely(!page)) { @@ -1424,7 +1473,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) page->frozen = 1; out: - if (flags & __GFP_WAIT) + if (gfpflags_allow_blocking(flags)) local_irq_disable(); if (!page) return NULL; @@ -1507,10 +1556,7 @@ static void free_slab(struct kmem_cache *s, struct page *page) VM_BUG_ON(s->reserved != sizeof(*head)); head = page_address(page) + offset; } else { - /* - * RCU free overloads the RCU head over the LRU - */ - head = (void *)&page->lru; + head = &page->rcu_head; } call_rcu(head, rcu_free_slab); @@ -2298,23 +2344,15 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) * And if we were unable to get a new slab from the partial slab lists then * we need to allocate a new slab. This is the slowest path since it involves * a call to the page allocator and the setup of a new slab. + * + * Version of __slab_alloc to use when we know that interrupts are + * already disabled (which is the case for bulk allocation). */ -static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { void *freelist; struct page *page; - unsigned long flags; - - local_irq_save(flags); -#ifdef CONFIG_PREEMPT - /* - * We may have been preempted and rescheduled on a different - * cpu before disabling interrupts. Need to reload cpu area - * pointer. - */ - c = this_cpu_ptr(s->cpu_slab); -#endif page = c->page; if (!page) @@ -2372,7 +2410,6 @@ load_freelist: VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); - local_irq_restore(flags); return freelist; new_slab: @@ -2389,7 +2426,6 @@ new_slab: if (unlikely(!freelist)) { slab_out_of_memory(s, gfpflags, node); - local_irq_restore(flags); return NULL; } @@ -2405,11 +2441,35 @@ new_slab: deactivate_slab(s, page, get_freepointer(s, freelist)); c->page = NULL; c->freelist = NULL; - local_irq_restore(flags); return freelist; } /* + * Another one that disabled interrupt and compensates for possible + * cpu changes by refetching the per cpu area pointer. + */ +static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + unsigned long addr, struct kmem_cache_cpu *c) +{ + void *p; + unsigned long flags; + + local_irq_save(flags); +#ifdef CONFIG_PREEMPT + /* + * We may have been preempted and rescheduled on a different + * cpu before disabling interrupts. Need to reload cpu area + * pointer. + */ + c = this_cpu_ptr(s->cpu_slab); +#endif + + p = ___slab_alloc(s, gfpflags, node, addr, c); + local_irq_restore(flags); + return p; +} + +/* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * have the fastpath folded into their functions. So no function call * overhead for requests that can be satisfied on the fastpath. @@ -2422,7 +2482,7 @@ new_slab: static __always_inline void *slab_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr) { - void **object; + void *object; struct kmem_cache_cpu *c; struct page *page; unsigned long tid; @@ -2501,7 +2561,7 @@ redo: if (unlikely(gfpflags & __GFP_ZERO) && object) memset(object, 0, s->object_size); - slab_post_alloc_hook(s, gfpflags, object); + slab_post_alloc_hook(s, gfpflags, 1, &object); return object; } @@ -2572,10 +2632,11 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, unsigned long addr) + void *head, void *tail, int cnt, + unsigned long addr) + { void *prior; - void **object = (void *)x; int was_frozen; struct page new; unsigned long counters; @@ -2585,7 +2646,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, stat(s, FREE_SLOWPATH); if (kmem_cache_debug(s) && - !(n = free_debug_processing(s, page, x, addr, &flags))) + !(n = free_debug_processing(s, page, head, tail, cnt, + addr, &flags))) return; do { @@ -2595,10 +2657,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page, } prior = page->freelist; counters = page->counters; - set_freepointer(s, object, prior); + set_freepointer(s, tail, prior); new.counters = counters; was_frozen = new.frozen; - new.inuse--; + new.inuse -= cnt; if ((!new.inuse || !prior) && !was_frozen) { if (kmem_cache_has_cpu_partial(s) && !prior) { @@ -2629,7 +2691,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, } while (!cmpxchg_double_slab(s, page, prior, counters, - object, new.counters, + head, new.counters, "__slab_free")); if (likely(!n)) { @@ -2694,15 +2756,20 @@ slab_empty: * * If fastpath is not possible then fall back to __slab_free where we deal * with all sorts of special processing. + * + * Bulk free of a freelist with several objects (all pointing to the + * same page) possible by specifying head and tail ptr, plus objects + * count (cnt). Bulk free indicated by tail pointer being set. */ -static __always_inline void slab_free(struct kmem_cache *s, - struct page *page, void *x, unsigned long addr) +static __always_inline void slab_free(struct kmem_cache *s, struct page *page, + void *head, void *tail, int cnt, + unsigned long addr) { - void **object = (void *)x; + void *tail_obj = tail ? : head; struct kmem_cache_cpu *c; unsigned long tid; - slab_free_hook(s, x); + slab_free_freelist_hook(s, head, tail); redo: /* @@ -2721,19 +2788,19 @@ redo: barrier(); if (likely(page == c->page)) { - set_freepointer(s, object, c->freelist); + set_freepointer(s, tail_obj, c->freelist); if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, c->freelist, tid, - object, next_tid(tid)))) { + head, next_tid(tid)))) { note_cmpxchg_failure("slab_free", s, tid); goto redo; } stat(s, FREE_FASTPATH); } else - __slab_free(s, page, x, addr); + __slab_free(s, page, head, tail_obj, cnt, addr); } @@ -2742,59 +2809,116 @@ void kmem_cache_free(struct kmem_cache *s, void *x) s = cache_from_obj(s, x); if (!s) return; - slab_free(s, virt_to_head_page(x), x, _RET_IP_); + slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_); trace_kmem_cache_free(_RET_IP_, x); } EXPORT_SYMBOL(kmem_cache_free); -/* Note that interrupts must be enabled when calling this function. */ -void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) -{ - struct kmem_cache_cpu *c; +struct detached_freelist { struct page *page; - int i; + void *tail; + void *freelist; + int cnt; +}; - local_irq_disable(); - c = this_cpu_ptr(s->cpu_slab); +/* + * This function progressively scans the array with free objects (with + * a limited look ahead) and extract objects belonging to the same + * page. It builds a detached freelist directly within the given + * page/objects. This can happen without any need for + * synchronization, because the objects are owned by running process. + * The freelist is build up as a single linked list in the objects. + * The idea is, that this detached freelist can then be bulk + * transferred to the real freelist(s), but only requiring a single + * synchronization primitive. Look ahead in the array is limited due + * to performance reasons. + */ +static int build_detached_freelist(struct kmem_cache *s, size_t size, + void **p, struct detached_freelist *df) +{ + size_t first_skipped_index = 0; + int lookahead = 3; + void *object; - for (i = 0; i < size; i++) { - void *object = p[i]; + /* Always re-init detached_freelist */ + df->page = NULL; - BUG_ON(!object); - /* kmem cache debug support */ - s = cache_from_obj(s, object); - if (unlikely(!s)) - goto exit; - slab_free_hook(s, object); + do { + object = p[--size]; + } while (!object && size); - page = virt_to_head_page(object); + if (!object) + return 0; - if (c->page == page) { - /* Fastpath: local CPU free */ - set_freepointer(s, object, c->freelist); - c->freelist = object; - } else { - c->tid = next_tid(c->tid); - local_irq_enable(); - /* Slowpath: overhead locked cmpxchg_double_slab */ - __slab_free(s, page, object, _RET_IP_); - local_irq_disable(); - c = this_cpu_ptr(s->cpu_slab); + /* Start new detached freelist */ + set_freepointer(s, object, NULL); + df->page = virt_to_head_page(object); + df->tail = object; + df->freelist = object; + p[size] = NULL; /* mark object processed */ + df->cnt = 1; + + while (size) { + object = p[--size]; + if (!object) + continue; /* Skip processed objects */ + + /* df->page is always set at this point */ + if (df->page == virt_to_head_page(object)) { + /* Opportunity build freelist */ + set_freepointer(s, object, df->freelist); + df->freelist = object; + df->cnt++; + p[size] = NULL; /* mark object processed */ + + continue; } + + /* Limit look ahead search */ + if (!--lookahead) + break; + + if (!first_skipped_index) + first_skipped_index = size + 1; } -exit: - c->tid = next_tid(c->tid); - local_irq_enable(); + + return first_skipped_index; +} + + +/* Note that interrupts must be enabled when calling this function. */ +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) +{ + if (WARN_ON(!size)) + return; + + do { + struct detached_freelist df; + struct kmem_cache *s; + + /* Support for memcg */ + s = cache_from_obj(orig_s, p[size - 1]); + + size = build_detached_freelist(s, size, p, &df); + if (unlikely(!df.page)) + continue; + + slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); + } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); /* Note that interrupts must be enabled when calling this function. */ -bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + void **p) { struct kmem_cache_cpu *c; int i; + /* memcg and kmem_cache debug support */ + s = slab_pre_alloc_hook(s, flags); + if (unlikely(!s)) + return false; /* * Drain objects in the per cpu slab, while disabling local * IRQs, which protects against PREEMPT and interrupts @@ -2807,36 +2931,20 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void *object = c->freelist; if (unlikely(!object)) { - local_irq_enable(); /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist */ - p[i] = __slab_alloc(s, flags, NUMA_NO_NODE, + p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_, c); - if (unlikely(!p[i])) { - __kmem_cache_free_bulk(s, i, p); - return false; - } - local_irq_disable(); + if (unlikely(!p[i])) + goto error; + c = this_cpu_ptr(s->cpu_slab); continue; /* goto for-loop */ } - - /* kmem_cache debug support */ - s = slab_pre_alloc_hook(s, flags); - if (unlikely(!s)) { - __kmem_cache_free_bulk(s, i, p); - c->tid = next_tid(c->tid); - local_irq_enable(); - return false; - } - c->freelist = get_freepointer(s, object); p[i] = object; - - /* kmem_cache debug support */ - slab_post_alloc_hook(s, flags, object); } c->tid = next_tid(c->tid); local_irq_enable(); @@ -2849,7 +2957,14 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, memset(p[j], 0, s->object_size); } - return true; + /* memcg and kmem_cache debug support */ + slab_post_alloc_hook(s, flags, size, p); + return i; +error: + local_irq_enable(); + slab_post_alloc_hook(s, flags, i, p); + __kmem_cache_free_bulk(s, i, p); + return 0; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); @@ -3514,7 +3629,7 @@ void kfree(const void *x) __free_kmem_pages(page, compound_order(page)); return; } - slab_free(page->slab_cache, page, object, _RET_IP_); + slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); } EXPORT_SYMBOL(kfree); diff --git a/mm/swap.c b/mm/swap.c index 983f692a47fd..39395fb549c0 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -201,7 +201,7 @@ out_put_single: __put_single_page(page); return; } - VM_BUG_ON_PAGE(page_head != page->first_page, page); + VM_BUG_ON_PAGE(page_head != compound_head(page), page); /* * We can release the refcount taken by * get_page_unless_zero() now that @@ -262,7 +262,7 @@ static void put_compound_page(struct page *page) * Case 3 is possible, as we may race with * __split_huge_page_refcount tearing down a THP page. */ - page_head = compound_head_by_tail(page); + page_head = compound_head(page); if (!__compound_tail_refcounted(page_head)) put_unrefcounted_compound_page(page_head, page); else diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9db9ef5e8481..8e3c9c5a3042 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -35,6 +35,8 @@ #include <asm/tlbflush.h> #include <asm/shmparam.h> +#include "internal.h" + struct vfree_deferred { struct llist_head list; struct work_struct wq; @@ -1441,7 +1443,6 @@ struct vm_struct *remove_vm_area(const void *addr) vmap_debug_free_range(va->va_start, va->va_end); kasan_free_shadow(vm); free_unmap_vmap_area(va); - vm->size -= PAGE_SIZE; return vm; } @@ -1466,8 +1467,8 @@ static void __vunmap(const void *addr, int deallocate_pages) return; } - debug_check_no_locks_freed(addr, area->size); - debug_check_no_obj_freed(addr, area->size); + debug_check_no_locks_freed(addr, get_vm_area_size(area)); + debug_check_no_obj_freed(addr, get_vm_area_size(area)); if (deallocate_pages) { int i; @@ -1617,7 +1618,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, goto fail; } area->pages[i] = page; - if (gfp_mask & __GFP_WAIT) + if (gfpflags_allow_blocking(gfp_mask)) cond_resched(); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 55721b619aee..2aec4241b42a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1476,7 +1476,7 @@ static int too_many_isolated(struct zone *zone, int file, * won't get blocked by normal direct-reclaimers, forming a circular * deadlock. */ - if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS) + if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) inactive >>= 3; return isolated > inactive; @@ -2477,7 +2477,7 @@ static inline bool compaction_ready(struct zone *zone, int order) balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP( zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO)); watermark = high_wmark_pages(zone) + balance_gap + (2UL << order); - watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0); + watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0); /* * If compaction is deferred, reclaim up to a point where @@ -2960,7 +2960,7 @@ static bool zone_balanced(struct zone *zone, int order, unsigned long balance_gap, int classzone_idx) { if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) + - balance_gap, classzone_idx, 0)) + balance_gap, classzone_idx)) return false; if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone, @@ -3791,7 +3791,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) /* * Do not scan if the allocation should not be delayed. */ - if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) + if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC)) return ZONE_RECLAIM_NOSCAN; /* diff --git a/mm/vmstat.c b/mm/vmstat.c index ffcb4f58bf3e..879a2be23325 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -923,7 +923,7 @@ static char * const migratetype_names[MIGRATE_TYPES] = { "Unmovable", "Reclaimable", "Movable", - "Reserve", + "HighAtomic", #ifdef CONFIG_CMA "CMA", #endif diff --git a/mm/zbud.c b/mm/zbud.c index fa48bcdff9d5..d8a181fd779b 100644 --- a/mm/zbud.c +++ b/mm/zbud.c @@ -137,7 +137,7 @@ static const struct zbud_ops zbud_zpool_ops = { .evict = zbud_zpool_evict }; -static void *zbud_zpool_create(char *name, gfp_t gfp, +static void *zbud_zpool_create(const char *name, gfp_t gfp, const struct zpool_ops *zpool_ops, struct zpool *zpool) { diff --git a/mm/zpool.c b/mm/zpool.c index 8f670d3e8706..fd3ff719c32c 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -18,8 +18,6 @@ #include <linux/zpool.h> struct zpool { - char *type; - struct zpool_driver *driver; void *pool; const struct zpool_ops *ops; @@ -73,7 +71,8 @@ int zpool_unregister_driver(struct zpool_driver *driver) } EXPORT_SYMBOL(zpool_unregister_driver); -static struct zpool_driver *zpool_get_driver(char *type) +/* this assumes @type is null-terminated. */ +static struct zpool_driver *zpool_get_driver(const char *type) { struct zpool_driver *driver; @@ -113,6 +112,8 @@ static void zpool_put_driver(struct zpool_driver *driver) * not be loaded, and calling @zpool_create_pool() with the pool type will * fail. * + * The @type string must be null-terminated. + * * Returns: true if @type pool is available, false if not */ bool zpool_has_pool(char *type) @@ -145,9 +146,11 @@ EXPORT_SYMBOL(zpool_has_pool); * * Implementations must guarantee this to be thread-safe. * + * The @type and @name strings must be null-terminated. + * * Returns: New zpool on success, NULL on failure. */ -struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, +struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp, const struct zpool_ops *ops) { struct zpool_driver *driver; @@ -174,7 +177,6 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, return NULL; } - zpool->type = driver->type; zpool->driver = driver; zpool->pool = driver->create(name, gfp, ops, zpool); zpool->ops = ops; @@ -208,7 +210,7 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, */ void zpool_destroy_pool(struct zpool *zpool) { - pr_debug("destroying pool type %s\n", zpool->type); + pr_debug("destroying pool type %s\n", zpool->driver->type); spin_lock(&pools_lock); list_del(&zpool->list); @@ -228,9 +230,9 @@ void zpool_destroy_pool(struct zpool *zpool) * * Returns: The type of zpool. */ -char *zpool_get_type(struct zpool *zpool) +const char *zpool_get_type(struct zpool *zpool) { - return zpool->type; + return zpool->driver->type; } /** diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index f135b1b6fcdc..9f15bdd9163c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -16,7 +16,7 @@ * struct page(s) to form a zspage. * * Usage of struct page fields: - * page->first_page: points to the first component (0-order) page + * page->private: points to the first component (0-order) page * page->index (union with page->freelist): offset of the first object * starting in this page. For the first page, this is * always 0, so we use this field (aka freelist) to point @@ -26,8 +26,7 @@ * * For _first_ page only: * - * page->private (union with page->first_page): refers to the - * component page after the first page + * page->private: refers to the component page after the first page * If the page is first_page for huge object, it stores handle. * Look at size_class->huge. * page->freelist: points to the first free object in zspage. @@ -38,6 +37,7 @@ * page->lru: links together first pages of various zspages. * Basically forming list of zspages in a fullness group. * page->mapping: class index and fullness group of the zspage + * page->inuse: the number of objects that are used in this zspage * * Usage of struct page flags: * PG_private: identifies the first component page @@ -58,7 +58,7 @@ #include <linux/cpumask.h> #include <linux/cpu.h> #include <linux/vmalloc.h> -#include <linux/hardirq.h> +#include <linux/preempt.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/debugfs.h> @@ -166,9 +166,14 @@ enum zs_stat_type { OBJ_USED, CLASS_ALMOST_FULL, CLASS_ALMOST_EMPTY, - NR_ZS_STAT_TYPE, }; +#ifdef CONFIG_ZSMALLOC_STAT +#define NR_ZS_STAT_TYPE (CLASS_ALMOST_EMPTY + 1) +#else +#define NR_ZS_STAT_TYPE (OBJ_USED + 1) +#endif + struct zs_size_stat { unsigned long objs[NR_ZS_STAT_TYPE]; }; @@ -237,7 +242,7 @@ struct link_free { }; struct zs_pool { - char *name; + const char *name; struct size_class **size_class; struct kmem_cache *handle_cachep; @@ -311,7 +316,7 @@ static void record_obj(unsigned long handle, unsigned long obj) #ifdef CONFIG_ZPOOL -static void *zs_zpool_create(char *name, gfp_t gfp, +static void *zs_zpool_create(const char *name, gfp_t gfp, const struct zpool_ops *zpool_ops, struct zpool *zpool) { @@ -447,19 +452,23 @@ static int get_size_class_index(int size) static inline void zs_stat_inc(struct size_class *class, enum zs_stat_type type, unsigned long cnt) { - class->stats.objs[type] += cnt; + if (type < NR_ZS_STAT_TYPE) + class->stats.objs[type] += cnt; } static inline void zs_stat_dec(struct size_class *class, enum zs_stat_type type, unsigned long cnt) { - class->stats.objs[type] -= cnt; + if (type < NR_ZS_STAT_TYPE) + class->stats.objs[type] -= cnt; } static inline unsigned long zs_stat_get(struct size_class *class, enum zs_stat_type type) { - return class->stats.objs[type]; + if (type < NR_ZS_STAT_TYPE) + return class->stats.objs[type]; + return 0; } #ifdef CONFIG_ZSMALLOC_STAT @@ -548,7 +557,7 @@ static const struct file_operations zs_stat_size_ops = { .release = single_release, }; -static int zs_pool_stat_create(char *name, struct zs_pool *pool) +static int zs_pool_stat_create(const char *name, struct zs_pool *pool) { struct dentry *entry; @@ -588,7 +597,7 @@ static void __exit zs_stat_exit(void) { } -static inline int zs_pool_stat_create(char *name, struct zs_pool *pool) +static inline int zs_pool_stat_create(const char *name, struct zs_pool *pool) { return 0; } @@ -764,7 +773,7 @@ static struct page *get_first_page(struct page *page) if (is_first_page(page)) return page; else - return page->first_page; + return (struct page *)page_private(page); } static struct page *get_next_page(struct page *page) @@ -824,7 +833,7 @@ static unsigned long obj_to_head(struct size_class *class, struct page *page, { if (class->huge) { VM_BUG_ON(!is_first_page(page)); - return *(unsigned long *)page_private(page); + return page_private(page); } else return *(unsigned long *)obj; } @@ -949,7 +958,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) * Allocate individual pages and link them together as: * 1. first page->private = first sub-page * 2. all sub-pages are linked together using page->lru - * 3. each sub-page is linked to the first page using page->first_page + * 3. each sub-page is linked to the first page using page->private * * For each size class, First/Head pages are linked together using * page->lru. Also, we set PG_private to identify the first page @@ -974,7 +983,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) if (i == 1) set_page_private(first_page, (unsigned long)page); if (i >= 1) - page->first_page = first_page; + set_page_private(page, (unsigned long)first_page); if (i >= 2) list_add(&page->lru, &prev_page->lru); if (i == class->pages_per_zspage - 1) /* last page */ @@ -1428,8 +1437,6 @@ static void obj_free(struct zs_pool *pool, struct size_class *class, struct page *first_page, *f_page; unsigned long f_objidx, f_offset; void *vaddr; - int class_idx; - enum fullness_group fullness; BUG_ON(!obj); @@ -1437,7 +1444,6 @@ static void obj_free(struct zs_pool *pool, struct size_class *class, obj_to_location(obj, &f_page, &f_objidx); first_page = get_first_page(f_page); - get_zspage_mapping(first_page, &class_idx, &fullness); f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); vaddr = kmap_atomic(f_page); @@ -1822,9 +1828,6 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker, struct zs_pool *pool = container_of(shrinker, struct zs_pool, shrinker); - if (!pool->shrinker_enabled) - return 0; - for (i = zs_size_classes - 1; i >= 0; i--) { class = pool->size_class[i]; if (!class) @@ -1866,7 +1869,7 @@ static int zs_register_shrinker(struct zs_pool *pool) * On success, a pointer to the newly created pool is returned, * otherwise NULL. */ -struct zs_pool *zs_create_pool(char *name, gfp_t flags) +struct zs_pool *zs_create_pool(const char *name, gfp_t flags) { int i; struct zs_pool *pool; diff --git a/mm/zswap.c b/mm/zswap.c index 4043df7c672f..025f8dc723de 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -82,33 +82,27 @@ module_param_named(enabled, zswap_enabled, bool, 0644); /* Crypto compressor to use */ #define ZSWAP_COMPRESSOR_DEFAULT "lzo" -static char zswap_compressor[CRYPTO_MAX_ALG_NAME] = ZSWAP_COMPRESSOR_DEFAULT; -static struct kparam_string zswap_compressor_kparam = { - .string = zswap_compressor, - .maxlen = sizeof(zswap_compressor), -}; +static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; static int zswap_compressor_param_set(const char *, const struct kernel_param *); static struct kernel_param_ops zswap_compressor_param_ops = { .set = zswap_compressor_param_set, - .get = param_get_string, + .get = param_get_charp, + .free = param_free_charp, }; module_param_cb(compressor, &zswap_compressor_param_ops, - &zswap_compressor_kparam, 0644); + &zswap_compressor, 0644); /* Compressed storage zpool to use */ #define ZSWAP_ZPOOL_DEFAULT "zbud" -static char zswap_zpool_type[32 /* arbitrary */] = ZSWAP_ZPOOL_DEFAULT; -static struct kparam_string zswap_zpool_kparam = { - .string = zswap_zpool_type, - .maxlen = sizeof(zswap_zpool_type), -}; +static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; static int zswap_zpool_param_set(const char *, const struct kernel_param *); static struct kernel_param_ops zswap_zpool_param_ops = { - .set = zswap_zpool_param_set, - .get = param_get_string, + .set = zswap_zpool_param_set, + .get = param_get_charp, + .free = param_free_charp, }; -module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_kparam, 0644); +module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); /* The maximum percentage of memory that the compressed pool can occupy */ static unsigned int zswap_max_pool_percent = 20; @@ -342,7 +336,7 @@ static void zswap_entry_put(struct zswap_tree *tree, static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, pgoff_t offset) { - struct zswap_entry *entry = NULL; + struct zswap_entry *entry; entry = zswap_rb_search(root, offset); if (entry) @@ -571,7 +565,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) static struct zswap_pool *zswap_pool_create(char *type, char *compressor) { struct zswap_pool *pool; - gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; + gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) { @@ -615,19 +609,29 @@ error: return NULL; } -static struct zswap_pool *__zswap_pool_create_fallback(void) +static __init struct zswap_pool *__zswap_pool_create_fallback(void) { if (!crypto_has_comp(zswap_compressor, 0, 0)) { + if (!strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) { + pr_err("default compressor %s not available\n", + zswap_compressor); + return NULL; + } pr_err("compressor %s not available, using default %s\n", zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT); - strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT, - sizeof(zswap_compressor)); + param_free_charp(&zswap_compressor); + zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; } if (!zpool_has_pool(zswap_zpool_type)) { + if (!strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { + pr_err("default zpool %s not available\n", + zswap_zpool_type); + return NULL; + } pr_err("zpool %s not available, using default %s\n", zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT); - strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT, - sizeof(zswap_zpool_type)); + param_free_charp(&zswap_zpool_type); + zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; } return zswap_pool_create(zswap_zpool_type, zswap_compressor); @@ -684,43 +688,39 @@ static void zswap_pool_put(struct zswap_pool *pool) * param callbacks **********************************/ +/* val must be a null-terminated string */ static int __zswap_param_set(const char *val, const struct kernel_param *kp, char *type, char *compressor) { struct zswap_pool *pool, *put_pool = NULL; - char str[kp->str->maxlen], *s; + char *s = strstrip((char *)val); int ret; - /* - * kp is either zswap_zpool_kparam or zswap_compressor_kparam, defined - * at the top of this file, so maxlen is CRYPTO_MAX_ALG_NAME (64) or - * 32 (arbitrary). - */ - strlcpy(str, val, kp->str->maxlen); - s = strim(str); + /* no change required */ + if (!strcmp(s, *(char **)kp->arg)) + return 0; /* if this is load-time (pre-init) param setting, * don't create a pool; that's done during init. */ if (!zswap_init_started) - return param_set_copystring(s, kp); - - /* no change required */ - if (!strncmp(kp->str->string, s, kp->str->maxlen)) - return 0; + return param_set_charp(s, kp); if (!type) { - type = s; - if (!zpool_has_pool(type)) { - pr_err("zpool %s not available\n", type); + if (!zpool_has_pool(s)) { + pr_err("zpool %s not available\n", s); return -ENOENT; } + type = s; } else if (!compressor) { - compressor = s; - if (!crypto_has_comp(compressor, 0, 0)) { - pr_err("compressor %s not available\n", compressor); + if (!crypto_has_comp(s, 0, 0)) { + pr_err("compressor %s not available\n", s); return -ENOENT; } + compressor = s; + } else { + WARN_ON(1); + return -EINVAL; } spin_lock(&zswap_pools_lock); @@ -736,7 +736,7 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, } if (pool) - ret = param_set_copystring(s, kp); + ret = param_set_charp(s, kp); else ret = -EINVAL; @@ -1011,7 +1011,8 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, /* store */ len = dlen + sizeof(struct zswap_header); ret = zpool_malloc(entry->pool->zpool, len, - __GFP_NORETRY | __GFP_NOWARN, &handle); + __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM, + &handle); if (ret == -ENOSPC) { zswap_reject_compress_poor++; goto put_dstmem; |