summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c187
1 files changed, 96 insertions, 91 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4785a8a2040e..3c4eb750a199 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -694,34 +694,27 @@ void prep_compound_page(struct page *page, unsigned int order)
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned int _debug_guardpage_minorder;
-#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT
-DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled);
-#else
+bool _debug_pagealloc_enabled_early __read_mostly
+ = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
+EXPORT_SYMBOL(_debug_pagealloc_enabled_early);
DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
-#endif
EXPORT_SYMBOL(_debug_pagealloc_enabled);
DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
static int __init early_debug_pagealloc(char *buf)
{
- bool enable = false;
-
- if (kstrtobool(buf, &enable))
- return -EINVAL;
-
- if (enable)
- static_branch_enable(&_debug_pagealloc_enabled);
-
- return 0;
+ return kstrtobool(buf, &_debug_pagealloc_enabled_early);
}
early_param("debug_pagealloc", early_debug_pagealloc);
-static void init_debug_guardpage(void)
+void init_debug_pagealloc(void)
{
if (!debug_pagealloc_enabled())
return;
+ static_branch_enable(&_debug_pagealloc_enabled);
+
if (!debug_guardpage_minorder())
return;
@@ -1186,7 +1179,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
*/
arch_free_page(page, order);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
kernel_map_pages(page, 1 << order, 0);
kasan_free_nondeferred_pages(page, order);
@@ -1207,7 +1200,7 @@ static bool free_pcp_prepare(struct page *page)
static bool bulkfree_pcp_prepare(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return free_pages_check(page);
else
return false;
@@ -1221,7 +1214,7 @@ static bool bulkfree_pcp_prepare(struct page *page)
*/
static bool free_pcp_prepare(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return free_pages_prepare(page, 0, true);
else
return free_pages_prepare(page, 0, false);
@@ -1973,10 +1966,6 @@ void __init page_alloc_init_late(void)
for_each_populated_zone(zone)
set_zone_contiguous(zone);
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
- init_debug_guardpage();
-#endif
}
#ifdef CONFIG_CMA
@@ -2106,7 +2095,7 @@ static inline bool free_pages_prezeroed(void)
*/
static inline bool check_pcp_refill(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return check_new_page(page);
else
return false;
@@ -2128,7 +2117,7 @@ static inline bool check_pcp_refill(struct page *page)
}
static inline bool check_new_pcp(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return check_new_page(page);
else
return false;
@@ -2155,7 +2144,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
set_page_refcounted(page);
arch_alloc_page(page, order);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
kernel_map_pages(page, 1 << order, 1);
kasan_alloc_pages(page, order);
kernel_poison_pages(page, 1 << order, 1);
@@ -4476,8 +4465,11 @@ retry_cpuset:
if (page)
goto got_pg;
- if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
- !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
+ /*
+ * Checks for costly allocations with __GFP_NORETRY, which
+ * includes some THP page fault allocations
+ */
+ if (costly_order && (gfp_mask & __GFP_NORETRY)) {
/*
* If allocating entire pageblock(s) and compaction
* failed because all zones are below low watermarks
@@ -4498,23 +4490,6 @@ retry_cpuset:
if (compact_result == COMPACT_SKIPPED ||
compact_result == COMPACT_DEFERRED)
goto nopage;
- }
-
- /*
- * Checks for costly allocations with __GFP_NORETRY, which
- * includes THP page fault allocations
- */
- if (costly_order && (gfp_mask & __GFP_NORETRY)) {
- /*
- * If compaction is deferred for high-order allocations,
- * it is because sync compaction recently failed. If
- * this is the case and the caller requested a THP
- * allocation, we do not want to heavily disrupt the
- * system, so we fail the allocation instead of entering
- * direct reclaim.
- */
- if (compact_result == COMPACT_DEFERRED)
- goto nopage;
/*
* Looks like reclaim/compaction is worth trying, but
@@ -5873,6 +5848,23 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
return false;
}
+#ifdef CONFIG_SPARSEMEM
+/* Skip PFNs that belong to non-present sections */
+static inline __meminit unsigned long next_pfn(unsigned long pfn)
+{
+ const unsigned long section_nr = pfn_to_section_nr(++pfn);
+
+ if (present_section_nr(section_nr))
+ return pfn;
+ return section_nr_to_pfn(next_present_section_nr(section_nr));
+}
+#else
+static inline __meminit unsigned long next_pfn(unsigned long pfn)
+{
+ return pfn++;
+}
+#endif
+
/*
* Initially all pages are reserved - free ones are freed
* up by memblock_free_all() once the early boot process is
@@ -5906,16 +5898,20 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
}
#endif
- for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ for (pfn = start_pfn; pfn < end_pfn; ) {
/*
* There can be holes in boot-time mem_map[]s handed to this
* function. They do not exist on hotplugged memory.
*/
if (context == MEMMAP_EARLY) {
- if (!early_pfn_valid(pfn))
+ if (!early_pfn_valid(pfn)) {
+ pfn = next_pfn(pfn);
continue;
- if (!early_pfn_in_nid(pfn, nid))
+ }
+ if (!early_pfn_in_nid(pfn, nid)) {
+ pfn++;
continue;
+ }
if (overlap_memmap_init(zone, &pfn))
continue;
if (defer_init(nid, pfn, end_pfn))
@@ -5943,16 +5939,17 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
cond_resched();
}
+ pfn++;
}
}
#ifdef CONFIG_ZONE_DEVICE
void __ref memmap_init_zone_device(struct zone *zone,
unsigned long start_pfn,
- unsigned long size,
+ unsigned long nr_pages,
struct dev_pagemap *pgmap)
{
- unsigned long pfn, end_pfn = start_pfn + size;
+ unsigned long pfn, end_pfn = start_pfn + nr_pages;
struct pglist_data *pgdat = zone->zone_pgdat;
struct vmem_altmap *altmap = pgmap_altmap(pgmap);
unsigned long zone_idx = zone_idx(zone);
@@ -5969,7 +5966,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
*/
if (altmap) {
start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
- size = end_pfn - start_pfn;
+ nr_pages = end_pfn - start_pfn;
}
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
@@ -6016,7 +6013,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
}
pr_info("%s initialised %lu pages in %ums\n", __func__,
- size, jiffies_to_msecs(jiffies - start));
+ nr_pages, jiffies_to_msecs(jiffies - start));
}
#endif
@@ -6915,10 +6912,10 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
/*
- * Zero all valid struct pages in range [spfn, epfn), return number of struct
- * pages zeroed
+ * Initialize all valid struct pages in the range [spfn, epfn) and mark them
+ * PageReserved(). Return the number of struct pages that were initialized.
*/
-static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn)
+static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
{
unsigned long pfn;
u64 pgcnt = 0;
@@ -6929,7 +6926,13 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn)
+ pageblock_nr_pages - 1;
continue;
}
- mm_zero_struct_page(pfn_to_page(pfn));
+ /*
+ * Use a fake node/zone (0) for now. Some of these pages
+ * (in memblock.reserved but not in memblock.memory) will
+ * get re-initialized via reserve_bootmem_region() later.
+ */
+ __init_single_page(pfn_to_page(pfn), pfn, 0, 0);
+ __SetPageReserved(pfn_to_page(pfn));
pgcnt++;
}
@@ -6941,14 +6944,15 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn)
* initialized by going through __init_single_page(). But, there are some
* struct pages which are reserved in memblock allocator and their fields
* may be accessed (for example page_to_pfn() on some configuration accesses
- * flags). We must explicitly zero those struct pages.
+ * flags). We must explicitly initialize those struct pages.
*
* This function also addresses a similar issue where struct pages are left
* uninitialized because the physical address range is not covered by
* memblock.memory or memblock.reserved. That could happen when memblock
- * layout is manually configured via memmap=.
+ * layout is manually configured via memmap=, or when the highest physical
+ * address (max_pfn) does not end on a section boundary.
*/
-void __init zero_resv_unavail(void)
+static void __init init_unavailable_mem(void)
{
phys_addr_t start, end;
u64 i, pgcnt;
@@ -6961,10 +6965,20 @@ void __init zero_resv_unavail(void)
for_each_mem_range(i, &memblock.memory, NULL,
NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) {
if (next < start)
- pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start));
+ pgcnt += init_unavailable_range(PFN_DOWN(next),
+ PFN_UP(start));
next = end;
}
- pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn);
+
+ /*
+ * Early sections always have a fully populated memmap for the whole
+ * section - see pfn_valid(). If the last section has holes at the
+ * end and that section is marked "online", the memmap will be
+ * considered initialized. Make sure that memmap has a well defined
+ * state.
+ */
+ pgcnt += init_unavailable_range(PFN_DOWN(next),
+ round_up(max_pfn, PAGES_PER_SECTION));
/*
* Struct pages that do not have backing memory. This could be because
@@ -6973,6 +6987,10 @@ void __init zero_resv_unavail(void)
if (pgcnt)
pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
}
+#else
+static inline void __init init_unavailable_mem(void)
+{
+}
#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
@@ -7402,7 +7420,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
- zero_resv_unavail();
+ init_unavailable_mem();
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid, NULL,
@@ -7597,7 +7615,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
void __init free_area_init(unsigned long *zones_size)
{
- zero_resv_unavail();
+ init_unavailable_mem();
free_area_init_node(0, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
}
@@ -8179,20 +8197,22 @@ void *__init alloc_large_system_hash(const char *tablename,
/*
* This function checks whether pageblock includes unmovable pages or not.
- * If @count is not zero, it is okay to include less @count unmovable pages
*
* PageLRU check without isolation or lru_lock could race so that
* MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
* check without lock_page also may miss some movable non-lru pages at
* race condition. So you can't expect this function should be exact.
+ *
+ * Returns a page without holding a reference. If the caller wants to
+ * dereference that page (e.g., dumping), it has to make sure that that it
+ * cannot get removed (e.g., via memory unplug) concurrently.
+ *
*/
-bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
- int migratetype, int flags)
+struct page *has_unmovable_pages(struct zone *zone, struct page *page,
+ int migratetype, int flags)
{
- unsigned long found;
unsigned long iter = 0;
unsigned long pfn = page_to_pfn(page);
- const char *reason = "unmovable page";
/*
* TODO we could make this much more efficient by not checking every
@@ -8209,22 +8229,19 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
* so consider them movable here.
*/
if (is_migrate_cma(migratetype))
- return false;
+ return NULL;
- reason = "CMA page";
- goto unmovable;
+ return page;
}
- for (found = 0; iter < pageblock_nr_pages; iter++) {
- unsigned long check = pfn + iter;
-
- if (!pfn_valid_within(check))
+ for (; iter < pageblock_nr_pages; iter++) {
+ if (!pfn_valid_within(pfn + iter))
continue;
- page = pfn_to_page(check);
+ page = pfn_to_page(pfn + iter);
if (PageReserved(page))
- goto unmovable;
+ return page;
/*
* If the zone is movable and we have ruled out all reserved
@@ -8244,7 +8261,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
unsigned int skip_pages;
if (!hugepage_migration_supported(page_hstate(head)))
- goto unmovable;
+ return page;
skip_pages = compound_nr(head) - (page - head);
iter += skip_pages - 1;
@@ -8270,11 +8287,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
continue;
- if (__PageMovable(page))
+ if (__PageMovable(page) || PageLRU(page))
continue;
- if (!PageLRU(page))
- found++;
/*
* If there are RECLAIMABLE pages, we need to check
* it. But now, memory offline itself doesn't call
@@ -8288,15 +8303,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
* is set to both of a memory hole page and a _used_ kernel
* page at boot.
*/
- if (found > count)
- goto unmovable;
+ return page;
}
- return false;
-unmovable:
- WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
- if (flags & REPORT_FAILURE)
- dump_page(pfn_to_page(pfn + iter), reason);
- return true;
+ return NULL;
}
#ifdef CONFIG_CONTIG_ALLOC
@@ -8700,10 +8709,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
BUG_ON(!PageBuddy(page));
order = page_order(page);
offlined_pages += 1 << order;
-#ifdef CONFIG_DEBUG_VM
- pr_info("remove from free list %lx %d %lx\n",
- pfn, 1 << order, end_pfn);
-#endif
del_page_from_free_area(page, &zone->free_area[order]);
pfn += (1 << order);
}
OpenPOWER on IntegriCloud