diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 187 |
1 files changed, 96 insertions, 91 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4785a8a2040e..3c4eb750a199 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -694,34 +694,27 @@ void prep_compound_page(struct page *page, unsigned int order) #ifdef CONFIG_DEBUG_PAGEALLOC unsigned int _debug_guardpage_minorder; -#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT -DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled); -#else +bool _debug_pagealloc_enabled_early __read_mostly + = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT); +EXPORT_SYMBOL(_debug_pagealloc_enabled_early); DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); -#endif EXPORT_SYMBOL(_debug_pagealloc_enabled); DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled); static int __init early_debug_pagealloc(char *buf) { - bool enable = false; - - if (kstrtobool(buf, &enable)) - return -EINVAL; - - if (enable) - static_branch_enable(&_debug_pagealloc_enabled); - - return 0; + return kstrtobool(buf, &_debug_pagealloc_enabled_early); } early_param("debug_pagealloc", early_debug_pagealloc); -static void init_debug_guardpage(void) +void init_debug_pagealloc(void) { if (!debug_pagealloc_enabled()) return; + static_branch_enable(&_debug_pagealloc_enabled); + if (!debug_guardpage_minorder()) return; @@ -1186,7 +1179,7 @@ static __always_inline bool free_pages_prepare(struct page *page, */ arch_free_page(page, order); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) kernel_map_pages(page, 1 << order, 0); kasan_free_nondeferred_pages(page, order); @@ -1207,7 +1200,7 @@ static bool free_pcp_prepare(struct page *page) static bool bulkfree_pcp_prepare(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return free_pages_check(page); else return false; @@ -1221,7 +1214,7 @@ static bool bulkfree_pcp_prepare(struct page *page) */ static bool free_pcp_prepare(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return free_pages_prepare(page, 0, true); else return free_pages_prepare(page, 0, false); @@ -1973,10 +1966,6 @@ void __init page_alloc_init_late(void) for_each_populated_zone(zone) set_zone_contiguous(zone); - -#ifdef CONFIG_DEBUG_PAGEALLOC - init_debug_guardpage(); -#endif } #ifdef CONFIG_CMA @@ -2106,7 +2095,7 @@ static inline bool free_pages_prezeroed(void) */ static inline bool check_pcp_refill(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return check_new_page(page); else return false; @@ -2128,7 +2117,7 @@ static inline bool check_pcp_refill(struct page *page) } static inline bool check_new_pcp(struct page *page) { - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) return check_new_page(page); else return false; @@ -2155,7 +2144,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, set_page_refcounted(page); arch_alloc_page(page, order); - if (debug_pagealloc_enabled()) + if (debug_pagealloc_enabled_static()) kernel_map_pages(page, 1 << order, 1); kasan_alloc_pages(page, order); kernel_poison_pages(page, 1 << order, 1); @@ -4476,8 +4465,11 @@ retry_cpuset: if (page) goto got_pg; - if (order >= pageblock_order && (gfp_mask & __GFP_IO) && - !(gfp_mask & __GFP_RETRY_MAYFAIL)) { + /* + * Checks for costly allocations with __GFP_NORETRY, which + * includes some THP page fault allocations + */ + if (costly_order && (gfp_mask & __GFP_NORETRY)) { /* * If allocating entire pageblock(s) and compaction * failed because all zones are below low watermarks @@ -4498,23 +4490,6 @@ retry_cpuset: if (compact_result == COMPACT_SKIPPED || compact_result == COMPACT_DEFERRED) goto nopage; - } - - /* - * Checks for costly allocations with __GFP_NORETRY, which - * includes THP page fault allocations - */ - if (costly_order && (gfp_mask & __GFP_NORETRY)) { - /* - * If compaction is deferred for high-order allocations, - * it is because sync compaction recently failed. If - * this is the case and the caller requested a THP - * allocation, we do not want to heavily disrupt the - * system, so we fail the allocation instead of entering - * direct reclaim. - */ - if (compact_result == COMPACT_DEFERRED) - goto nopage; /* * Looks like reclaim/compaction is worth trying, but @@ -5873,6 +5848,23 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) return false; } +#ifdef CONFIG_SPARSEMEM +/* Skip PFNs that belong to non-present sections */ +static inline __meminit unsigned long next_pfn(unsigned long pfn) +{ + const unsigned long section_nr = pfn_to_section_nr(++pfn); + + if (present_section_nr(section_nr)) + return pfn; + return section_nr_to_pfn(next_present_section_nr(section_nr)); +} +#else +static inline __meminit unsigned long next_pfn(unsigned long pfn) +{ + return pfn++; +} +#endif + /* * Initially all pages are reserved - free ones are freed * up by memblock_free_all() once the early boot process is @@ -5906,16 +5898,20 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, } #endif - for (pfn = start_pfn; pfn < end_pfn; pfn++) { + for (pfn = start_pfn; pfn < end_pfn; ) { /* * There can be holes in boot-time mem_map[]s handed to this * function. They do not exist on hotplugged memory. */ if (context == MEMMAP_EARLY) { - if (!early_pfn_valid(pfn)) + if (!early_pfn_valid(pfn)) { + pfn = next_pfn(pfn); continue; - if (!early_pfn_in_nid(pfn, nid)) + } + if (!early_pfn_in_nid(pfn, nid)) { + pfn++; continue; + } if (overlap_memmap_init(zone, &pfn)) continue; if (defer_init(nid, pfn, end_pfn)) @@ -5943,16 +5939,17 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, set_pageblock_migratetype(page, MIGRATE_MOVABLE); cond_resched(); } + pfn++; } } #ifdef CONFIG_ZONE_DEVICE void __ref memmap_init_zone_device(struct zone *zone, unsigned long start_pfn, - unsigned long size, + unsigned long nr_pages, struct dev_pagemap *pgmap) { - unsigned long pfn, end_pfn = start_pfn + size; + unsigned long pfn, end_pfn = start_pfn + nr_pages; struct pglist_data *pgdat = zone->zone_pgdat; struct vmem_altmap *altmap = pgmap_altmap(pgmap); unsigned long zone_idx = zone_idx(zone); @@ -5969,7 +5966,7 @@ void __ref memmap_init_zone_device(struct zone *zone, */ if (altmap) { start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap); - size = end_pfn - start_pfn; + nr_pages = end_pfn - start_pfn; } for (pfn = start_pfn; pfn < end_pfn; pfn++) { @@ -6016,7 +6013,7 @@ void __ref memmap_init_zone_device(struct zone *zone, } pr_info("%s initialised %lu pages in %ums\n", __func__, - size, jiffies_to_msecs(jiffies - start)); + nr_pages, jiffies_to_msecs(jiffies - start)); } #endif @@ -6915,10 +6912,10 @@ void __init free_area_init_node(int nid, unsigned long *zones_size, #if !defined(CONFIG_FLAT_NODE_MEM_MAP) /* - * Zero all valid struct pages in range [spfn, epfn), return number of struct - * pages zeroed + * Initialize all valid struct pages in the range [spfn, epfn) and mark them + * PageReserved(). Return the number of struct pages that were initialized. */ -static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn) +static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) { unsigned long pfn; u64 pgcnt = 0; @@ -6929,7 +6926,13 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn) + pageblock_nr_pages - 1; continue; } - mm_zero_struct_page(pfn_to_page(pfn)); + /* + * Use a fake node/zone (0) for now. Some of these pages + * (in memblock.reserved but not in memblock.memory) will + * get re-initialized via reserve_bootmem_region() later. + */ + __init_single_page(pfn_to_page(pfn), pfn, 0, 0); + __SetPageReserved(pfn_to_page(pfn)); pgcnt++; } @@ -6941,14 +6944,15 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn) * initialized by going through __init_single_page(). But, there are some * struct pages which are reserved in memblock allocator and their fields * may be accessed (for example page_to_pfn() on some configuration accesses - * flags). We must explicitly zero those struct pages. + * flags). We must explicitly initialize those struct pages. * * This function also addresses a similar issue where struct pages are left * uninitialized because the physical address range is not covered by * memblock.memory or memblock.reserved. That could happen when memblock - * layout is manually configured via memmap=. + * layout is manually configured via memmap=, or when the highest physical + * address (max_pfn) does not end on a section boundary. */ -void __init zero_resv_unavail(void) +static void __init init_unavailable_mem(void) { phys_addr_t start, end; u64 i, pgcnt; @@ -6961,10 +6965,20 @@ void __init zero_resv_unavail(void) for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) { if (next < start) - pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start)); + pgcnt += init_unavailable_range(PFN_DOWN(next), + PFN_UP(start)); next = end; } - pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn); + + /* + * Early sections always have a fully populated memmap for the whole + * section - see pfn_valid(). If the last section has holes at the + * end and that section is marked "online", the memmap will be + * considered initialized. Make sure that memmap has a well defined + * state. + */ + pgcnt += init_unavailable_range(PFN_DOWN(next), + round_up(max_pfn, PAGES_PER_SECTION)); /* * Struct pages that do not have backing memory. This could be because @@ -6973,6 +6987,10 @@ void __init zero_resv_unavail(void) if (pgcnt) pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt); } +#else +static inline void __init init_unavailable_mem(void) +{ +} #endif /* !CONFIG_FLAT_NODE_MEM_MAP */ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP @@ -7402,7 +7420,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); - zero_resv_unavail(); + init_unavailable_mem(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); free_area_init_node(nid, NULL, @@ -7597,7 +7615,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) void __init free_area_init(unsigned long *zones_size) { - zero_resv_unavail(); + init_unavailable_mem(); free_area_init_node(0, zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } @@ -8179,20 +8197,22 @@ void *__init alloc_large_system_hash(const char *tablename, /* * This function checks whether pageblock includes unmovable pages or not. - * If @count is not zero, it is okay to include less @count unmovable pages * * PageLRU check without isolation or lru_lock could race so that * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable * check without lock_page also may miss some movable non-lru pages at * race condition. So you can't expect this function should be exact. + * + * Returns a page without holding a reference. If the caller wants to + * dereference that page (e.g., dumping), it has to make sure that that it + * cannot get removed (e.g., via memory unplug) concurrently. + * */ -bool has_unmovable_pages(struct zone *zone, struct page *page, int count, - int migratetype, int flags) +struct page *has_unmovable_pages(struct zone *zone, struct page *page, + int migratetype, int flags) { - unsigned long found; unsigned long iter = 0; unsigned long pfn = page_to_pfn(page); - const char *reason = "unmovable page"; /* * TODO we could make this much more efficient by not checking every @@ -8209,22 +8229,19 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, * so consider them movable here. */ if (is_migrate_cma(migratetype)) - return false; + return NULL; - reason = "CMA page"; - goto unmovable; + return page; } - for (found = 0; iter < pageblock_nr_pages; iter++) { - unsigned long check = pfn + iter; - - if (!pfn_valid_within(check)) + for (; iter < pageblock_nr_pages; iter++) { + if (!pfn_valid_within(pfn + iter)) continue; - page = pfn_to_page(check); + page = pfn_to_page(pfn + iter); if (PageReserved(page)) - goto unmovable; + return page; /* * If the zone is movable and we have ruled out all reserved @@ -8244,7 +8261,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, unsigned int skip_pages; if (!hugepage_migration_supported(page_hstate(head))) - goto unmovable; + return page; skip_pages = compound_nr(head) - (page - head); iter += skip_pages - 1; @@ -8270,11 +8287,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) continue; - if (__PageMovable(page)) + if (__PageMovable(page) || PageLRU(page)) continue; - if (!PageLRU(page)) - found++; /* * If there are RECLAIMABLE pages, we need to check * it. But now, memory offline itself doesn't call @@ -8288,15 +8303,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, * is set to both of a memory hole page and a _used_ kernel * page at boot. */ - if (found > count) - goto unmovable; + return page; } - return false; -unmovable: - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); - if (flags & REPORT_FAILURE) - dump_page(pfn_to_page(pfn + iter), reason); - return true; + return NULL; } #ifdef CONFIG_CONTIG_ALLOC @@ -8700,10 +8709,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) BUG_ON(!PageBuddy(page)); order = page_order(page); offlined_pages += 1 << order; -#ifdef CONFIG_DEBUG_VM - pr_info("remove from free list %lx %d %lx\n", - pfn, 1 << order, end_pfn); -#endif del_page_from_free_area(page, &zone->free_area[order]); pfn += (1 << order); } |