summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c83
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/frame_vector.c5
-rw-r--r--mm/gup.c25
-rw-r--r--mm/huge_memory.c36
-rw-r--r--mm/hugetlb.c5
-rw-r--r--mm/internal.h12
-rw-r--r--mm/kasan/kasan.c8
-rw-r--r--mm/khugepaged.c11
-rw-r--r--mm/kmemcheck.c2
-rw-r--r--mm/ksm.c3
-rw-r--r--mm/memblock.c23
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory-failure.c20
-rw-r--r--mm/memory.c78
-rw-r--r--mm/mlock.c5
-rw-r--r--mm/mmap.c160
-rw-r--r--mm/mmu_notifier.c14
-rw-r--r--mm/nommu.c8
-rw-r--r--mm/page_alloc.c199
-rw-r--r--mm/page_isolation.c5
-rw-r--r--mm/rmap.c4
-rw-r--r--mm/slab.c6
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slab_common.c6
-rw-r--r--mm/slob.c6
-rw-r--r--mm/slub.c18
-rw-r--r--mm/swap_cgroup.c3
-rw-r--r--mm/swap_slots.c19
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/swapfile.c10
-rw-r--r--mm/truncate.c21
-rw-r--r--mm/util.c61
-rw-r--r--mm/vmalloc.c30
-rw-r--r--mm/vmpressure.c6
-rw-r--r--mm/vmscan.c38
-rw-r--r--mm/vmstat.c2
37 files changed, 604 insertions, 344 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 09c5282ebdd2..613c59e928cb 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -89,11 +89,6 @@ static void map_pages(struct list_head *list)
list_splice(&tmp_list, list);
}
-static inline bool migrate_async_suitable(int migratetype)
-{
- return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
-}
-
#ifdef CONFIG_COMPACTION
int PageMovable(struct page *page)
@@ -988,6 +983,22 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
#endif /* CONFIG_COMPACTION || CONFIG_CMA */
#ifdef CONFIG_COMPACTION
+static bool suitable_migration_source(struct compact_control *cc,
+ struct page *page)
+{
+ int block_mt;
+
+ if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
+ return true;
+
+ block_mt = get_pageblock_migratetype(page);
+
+ if (cc->migratetype == MIGRATE_MOVABLE)
+ return is_migrate_movable(block_mt);
+ else
+ return block_mt == cc->migratetype;
+}
+
/* Returns true if the page is within a block suitable for migration to */
static bool suitable_migration_target(struct compact_control *cc,
struct page *page)
@@ -1007,7 +1018,7 @@ static bool suitable_migration_target(struct compact_control *cc,
return true;
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
- if (migrate_async_suitable(get_pageblock_migratetype(page)))
+ if (is_migrate_movable(get_pageblock_migratetype(page)))
return true;
/* Otherwise skip the block */
@@ -1242,8 +1253,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
* Async compaction is optimistic to see if the minimum amount
* of work satisfies the allocation.
*/
- if (cc->mode == MIGRATE_ASYNC &&
- !migrate_async_suitable(get_pageblock_migratetype(page)))
+ if (!suitable_migration_source(cc, page))
continue;
/* Perform the isolation */
@@ -1276,11 +1286,11 @@ static inline bool is_via_compact_memory(int order)
return order == -1;
}
-static enum compact_result __compact_finished(struct zone *zone, struct compact_control *cc,
- const int migratetype)
+static enum compact_result __compact_finished(struct zone *zone,
+ struct compact_control *cc)
{
unsigned int order;
- unsigned long watermark;
+ const int migratetype = cc->migratetype;
if (cc->contended || fatal_signal_pending(current))
return COMPACT_CONTENDED;
@@ -1308,12 +1318,16 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_
if (is_via_compact_memory(cc->order))
return COMPACT_CONTINUE;
- /* Compaction run is not finished if the watermark is not met */
- watermark = zone->watermark[cc->alloc_flags & ALLOC_WMARK_MASK];
-
- if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx,
- cc->alloc_flags))
- return COMPACT_CONTINUE;
+ if (cc->finishing_block) {
+ /*
+ * We have finished the pageblock, but better check again that
+ * we really succeeded.
+ */
+ if (IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
+ cc->finishing_block = false;
+ else
+ return COMPACT_CONTINUE;
+ }
/* Direct compactor: Is a suitable page free? */
for (order = cc->order; order < MAX_ORDER; order++) {
@@ -1335,20 +1349,40 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_
* other migratetype buddy lists.
*/
if (find_suitable_fallback(area, order, migratetype,
- true, &can_steal) != -1)
- return COMPACT_SUCCESS;
+ true, &can_steal) != -1) {
+
+ /* movable pages are OK in any pageblock */
+ if (migratetype == MIGRATE_MOVABLE)
+ return COMPACT_SUCCESS;
+
+ /*
+ * We are stealing for a non-movable allocation. Make
+ * sure we finish compacting the current pageblock
+ * first so it is as free as possible and we won't
+ * have to steal another one soon. This only applies
+ * to sync compaction, as async compaction operates
+ * on pageblocks of the same migratetype.
+ */
+ if (cc->mode == MIGRATE_ASYNC ||
+ IS_ALIGNED(cc->migrate_pfn,
+ pageblock_nr_pages)) {
+ return COMPACT_SUCCESS;
+ }
+
+ cc->finishing_block = true;
+ return COMPACT_CONTINUE;
+ }
}
return COMPACT_NO_SUITABLE_PAGE;
}
static enum compact_result compact_finished(struct zone *zone,
- struct compact_control *cc,
- const int migratetype)
+ struct compact_control *cc)
{
int ret;
- ret = __compact_finished(zone, cc, migratetype);
+ ret = __compact_finished(zone, cc);
trace_mm_compaction_finished(zone, cc->order, ret);
if (ret == COMPACT_NO_SUITABLE_PAGE)
ret = COMPACT_CONTINUE;
@@ -1481,9 +1515,9 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
enum compact_result ret;
unsigned long start_pfn = zone->zone_start_pfn;
unsigned long end_pfn = zone_end_pfn(zone);
- const int migratetype = gfpflags_to_migratetype(cc->gfp_mask);
const bool sync = cc->mode != MIGRATE_ASYNC;
+ cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
cc->classzone_idx);
/* Compaction is likely to fail */
@@ -1533,8 +1567,7 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
migrate_prep_local();
- while ((ret = compact_finished(zone, cc, migratetype)) ==
- COMPACT_CONTINUE) {
+ while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
int err;
switch (isolate_migratepages(zone, cc)) {
diff --git a/mm/filemap.c b/mm/filemap.c
index 681da61080bc..6f1be573a5e6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2050,7 +2050,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
iocb->ki_pos += retval;
count -= retval;
}
- iov_iter_revert(iter, iov_iter_count(iter) - count);
+ iov_iter_revert(iter, count - iov_iter_count(iter));
/*
* Btrfs can have a short DIO read if we encounter
@@ -2791,12 +2791,6 @@ ssize_t generic_perform_write(struct file *file,
ssize_t written = 0;
unsigned int flags = 0;
- /*
- * Copies from kernel address space cannot fail (NFSD is a big user).
- */
- if (!iter_is_iovec(i))
- flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
do {
struct page *page;
unsigned long offset; /* Offset into pagecache page */
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index db77dcb38afd..72ebec18629c 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -200,10 +200,7 @@ struct frame_vector *frame_vector_create(unsigned int nr_frames)
* Avoid higher order allocations, use vmalloc instead. It should
* be rare anyway.
*/
- if (size <= PAGE_SIZE)
- vec = kmalloc(size, GFP_KERNEL);
- else
- vec = vmalloc(size);
+ vec = kvmalloc(size, GFP_KERNEL);
if (!vec)
return NULL;
vec->nr_allocated = nr_frames;
diff --git a/mm/gup.c b/mm/gup.c
index d9e6fddcc51f..576c4df58882 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -387,11 +387,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
/* mlock all present pages, but do not fault in new pages */
if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
return -ENOENT;
- /* For mm_populate(), just skip the stack guard page. */
- if ((*flags & FOLL_POPULATE) &&
- (stack_guard_page_start(vma, address) ||
- stack_guard_page_end(vma, address + PAGE_SIZE)))
- return -ENOENT;
if (*flags & FOLL_WRITE)
fault_flags |= FAULT_FLAG_WRITE;
if (*flags & FOLL_REMOTE)
@@ -407,12 +402,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
ret = handle_mm_fault(vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
- if (ret & VM_FAULT_OOM)
- return -ENOMEM;
- if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
- return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
- if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
- return -EFAULT;
+ int err = vm_fault_to_errno(ret, *flags);
+
+ if (err)
+ return err;
BUG();
}
@@ -723,12 +716,10 @@ retry:
ret = handle_mm_fault(vma, address, fault_flags);
major |= ret & VM_FAULT_MAJOR;
if (ret & VM_FAULT_ERROR) {
- if (ret & VM_FAULT_OOM)
- return -ENOMEM;
- if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
- return -EHWPOISON;
- if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
- return -EFAULT;
+ int err = vm_fault_to_errno(ret, 0);
+
+ if (err)
+ return err;
BUG();
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b787c4cfda0e..88c6167f194d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -715,7 +715,8 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
}
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write)
+ pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write,
+ pgtable_t pgtable)
{
struct mm_struct *mm = vma->vm_mm;
pmd_t entry;
@@ -729,6 +730,12 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
entry = pmd_mkyoung(pmd_mkdirty(entry));
entry = maybe_pmd_mkwrite(entry, vma);
}
+
+ if (pgtable) {
+ pgtable_trans_huge_deposit(mm, pmd, pgtable);
+ atomic_long_inc(&mm->nr_ptes);
+ }
+
set_pmd_at(mm, addr, pmd, entry);
update_mmu_cache_pmd(vma, addr, pmd);
spin_unlock(ptl);
@@ -738,6 +745,7 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, pfn_t pfn, bool write)
{
pgprot_t pgprot = vma->vm_page_prot;
+ pgtable_t pgtable = NULL;
/*
* If we had pmd_special, we could avoid all these restrictions,
* but we need to be consistent with PTEs and architectures that
@@ -752,9 +760,15 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
if (addr < vma->vm_start || addr >= vma->vm_end)
return VM_FAULT_SIGBUS;
+ if (arch_needs_pgtable_deposit()) {
+ pgtable = pte_alloc_one(vma->vm_mm, addr);
+ if (!pgtable)
+ return VM_FAULT_OOM;
+ }
+
track_pfn_insert(vma, &pgprot, pfn);
- insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
+ insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write, pgtable);
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
@@ -1412,8 +1426,11 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
*/
if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
page = pmd_page(*vmf->pmd);
+ if (!get_page_unless_zero(page))
+ goto out_unlock;
spin_unlock(vmf->ptl);
wait_on_page_locked(page);
+ put_page(page);
goto out;
}
@@ -1445,9 +1462,12 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
/* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) {
+ page_nid = -1;
+ if (!get_page_unless_zero(page))
+ goto out_unlock;
spin_unlock(vmf->ptl);
wait_on_page_locked(page);
- page_nid = -1;
+ put_page(page);
goto out;
}
@@ -1611,12 +1631,13 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
tlb->fullmm);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
if (vma_is_dax(vma)) {
+ if (arch_needs_pgtable_deposit())
+ zap_deposited_table(tlb->mm, pmd);
spin_unlock(ptl);
if (is_huge_zero_pmd(orig_pmd))
tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
} else if (is_huge_zero_pmd(orig_pmd)) {
- pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
- atomic_long_dec(&tlb->mm->nr_ptes);
+ zap_deposited_table(tlb->mm, pmd);
spin_unlock(ptl);
tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
} else {
@@ -1625,10 +1646,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
VM_BUG_ON_PAGE(!PageHead(page), page);
if (PageAnon(page)) {
- pgtable_t pgtable;
- pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
- pte_free(tlb->mm, pgtable);
- atomic_long_dec(&tlb->mm->nr_ptes);
+ zap_deposited_table(tlb->mm, pmd);
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
} else {
if (arch_needs_pgtable_deposit())
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e5828875f7bb..3eedb187e549 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4170,6 +4170,11 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
if (ret & VM_FAULT_ERROR) {
+ int err = vm_fault_to_errno(ret, flags);
+
+ if (err)
+ return err;
+
remainder = 0;
break;
}
diff --git a/mm/internal.h b/mm/internal.h
index 04d08ef91224..0e4f558412fb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -183,6 +183,7 @@ extern int user_min_free_kbytes;
struct compact_control {
struct list_head freepages; /* List of free pages to migrate to */
struct list_head migratepages; /* List of pages being migrated */
+ struct zone *zone;
unsigned long nr_freepages; /* Number of isolated free pages */
unsigned long nr_migratepages; /* Number of pages to migrate */
unsigned long total_migrate_scanned;
@@ -190,17 +191,18 @@ struct compact_control {
unsigned long free_pfn; /* isolate_freepages search base */
unsigned long migrate_pfn; /* isolate_migratepages search base */
unsigned long last_migrated_pfn;/* Not yet flushed page being freed */
+ const gfp_t gfp_mask; /* gfp mask of a direct compactor */
+ int order; /* order a direct compactor needs */
+ int migratetype; /* migratetype of direct compactor */
+ const unsigned int alloc_flags; /* alloc flags of a direct compactor */
+ const int classzone_idx; /* zone index of a direct compactor */
enum migrate_mode mode; /* Async or sync migration mode */
bool ignore_skip_hint; /* Scan blocks even if marked skip */
bool ignore_block_suitable; /* Scan blocks considered unsuitable */
bool direct_compaction; /* False from kcompactd or /proc/... */
bool whole_zone; /* Whole zone should/has been scanned */
- int order; /* order a direct compactor needs */
- const gfp_t gfp_mask; /* gfp mask of a direct compactor */
- const unsigned int alloc_flags; /* alloc flags of a direct compactor */
- const int classzone_idx; /* zone index of a direct compactor */
- struct zone *zone;
bool contended; /* Signal lock or sched contention */
+ bool finishing_block; /* Finishing current pageblock */
};
unsigned long
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 9348d27088c1..c81549d5c833 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -413,7 +413,7 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size,
*size += sizeof(struct kasan_alloc_meta);
/* Add free meta. */
- if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
+ if (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor ||
cache->object_size < sizeof(struct kasan_free_meta)) {
cache->kasan_info.free_meta_offset = *size;
*size += sizeof(struct kasan_free_meta);
@@ -561,7 +561,7 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
/* RCU slabs could be legally used after free within the RCU period */
- if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+ if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
return;
kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
@@ -572,7 +572,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object)
s8 shadow_byte;
/* RCU slabs could be legally used after free within the RCU period */
- if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+ if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
return false;
shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object));
@@ -691,7 +691,7 @@ int kasan_module_alloc(void *addr, size_t size)
ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
shadow_start + shadow_size,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ GFP_KERNEL | __GFP_ZERO,
PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
__builtin_return_address(0));
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 7cb9c88bb4a3..945fd1ca49b5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -612,7 +612,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
spinlock_t *ptl)
{
pte_t *_pte;
- for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) {
+ for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
+ _pte++, page++, address += PAGE_SIZE) {
pte_t pteval = *_pte;
struct page *src_page;
@@ -651,9 +652,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
spin_unlock(ptl);
free_page_and_swap_cache(src_page);
}
-
- address += PAGE_SIZE;
- page++;
+ cond_resched();
}
}
@@ -907,8 +906,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
return false;
}
/* check if the pmd is still valid */
- if (mm_find_pmd(mm, address) != pmd)
+ if (mm_find_pmd(mm, address) != pmd) {
+ trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
+ }
}
if (ret & VM_FAULT_ERROR) {
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index 5bf191756a4a..2d5959c5f7c5 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -95,7 +95,7 @@ void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
{
/* TODO: RCU freeing is unsupported for now; hide false positives. */
- if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU))
+ if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU))
kmemcheck_mark_freed(object, size);
}
diff --git a/mm/ksm.c b/mm/ksm.c
index d9fc0e456128..216184af0e19 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1028,8 +1028,7 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
goto out;
if (PageTransCompound(page)) {
- err = split_huge_page(page);
- if (err)
+ if (split_huge_page(page))
goto out_unlock;
}
diff --git a/mm/memblock.c b/mm/memblock.c
index b049c9b2dba8..7b8a5db76a2f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1739,6 +1739,29 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
}
}
+extern unsigned long __init_memblock
+memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
+{
+ struct memblock_region *rgn;
+ unsigned long size = 0;
+ int idx;
+
+ for_each_memblock_type((&memblock.reserved), rgn) {
+ phys_addr_t start, end;
+
+ if (rgn->base + rgn->size < start_addr)
+ continue;
+ if (rgn->base > end_addr)
+ continue;
+
+ start = rgn->base;
+ end = start + rgn->size;
+ size += end - start;
+ }
+
+ return size;
+}
+
void __init_memblock __memblock_dump_all(void)
{
pr_info("MEMBLOCK configuration:\n");
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ff73899af61a..94172089f52f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5528,7 +5528,7 @@ static void uncharge_list(struct list_head *page_list)
next = page->lru.next;
VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(page_count(page), page);
+ VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
if (!page->mem_cgroup)
continue;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 73066b80d14a..ecc183fd94f3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -539,6 +539,13 @@ static int delete_from_lru_cache(struct page *p)
*/
ClearPageActive(p);
ClearPageUnevictable(p);
+
+ /*
+ * Poisoned page might never drop its ref count to 0 so we have
+ * to uncharge it manually from its memcg.
+ */
+ mem_cgroup_uncharge(p);
+
/*
* drop the page count elevated by isolate_lru_page()
*/
@@ -1177,7 +1184,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
* page_remove_rmap() in try_to_unmap_one(). So to determine page status
* correctly, we save a copy of the page flags at this time.
*/
- page_flags = p->flags;
+ if (PageHuge(p))
+ page_flags = hpage->flags;
+ else
+ page_flags = p->flags;
/*
* unpoison always clear PG_hwpoison inside page lock
@@ -1588,12 +1598,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
if (ret) {
pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n",
pfn, ret, page->flags, &page->flags);
- /*
- * We know that soft_offline_huge_page() tries to migrate
- * only one hugepage pointed to by hpage, so we need not
- * run through the pagelist here.
- */
- putback_active_hugepage(hpage);
+ if (!list_empty(&pagelist))
+ putback_movable_pages(&pagelist);
if (ret > 0)
ret = -EIO;
} else {
diff --git a/mm/memory.c b/mm/memory.c
index 6ff5d729ded0..bb11c474857e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2855,40 +2855,6 @@ out_release:
}
/*
- * This is like a special single-page "expand_{down|up}wards()",
- * except we must first make sure that 'address{-|+}PAGE_SIZE'
- * doesn't hit another vma.
- */
-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
-{
- address &= PAGE_MASK;
- if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
- struct vm_area_struct *prev = vma->vm_prev;
-
- /*
- * Is there a mapping abutting this one below?
- *
- * That's only ok if it's the same stack mapping
- * that has gotten split..
- */
- if (prev && prev->vm_end == address)
- return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
-
- return expand_downwards(vma, address - PAGE_SIZE);
- }
- if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
- struct vm_area_struct *next = vma->vm_next;
-
- /* As VM_GROWSDOWN but s/below/above/ */
- if (next && next->vm_start == address + PAGE_SIZE)
- return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
-
- return expand_upwards(vma, address + PAGE_SIZE);
- }
- return 0;
-}
-
-/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
@@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_fault *vmf)
if (vma->vm_flags & VM_SHARED)
return VM_FAULT_SIGBUS;
- /* Check if we need to add a guard page to the stack */
- if (check_stack_guard_page(vma, vmf->address) < 0)
- return VM_FAULT_SIGSEGV;
-
/*
* Use pte_alloc() instead of pte_alloc_map(). We can't run
* pte_offset_map() on pmds where a huge pmd might be created
@@ -3029,6 +2991,17 @@ static int __do_fault(struct vm_fault *vmf)
return ret;
}
+/*
+ * The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
+ * If we check pmd_trans_unstable() first we will trip the bad_pmd() check
+ * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
+ * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
+ */
+static int pmd_devmap_trans_unstable(pmd_t *pmd)
+{
+ return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
+}
+
static int pte_alloc_one_map(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
@@ -3052,18 +3025,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
map_pte:
/*
* If a huge pmd materialized under us just retry later. Use
- * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
- * didn't become pmd_trans_huge under us and then back to pmd_none, as
- * a result of MADV_DONTNEED running immediately after a huge pmd fault
- * in a different thread of this mm, in turn leading to a misleading
- * pmd_trans_huge() retval. All we have to ensure is that it is a
- * regular pmd that we can walk with pte_offset_map() and we can do that
- * through an atomic read in C, which is what pmd_trans_unstable()
- * provides.
+ * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
+ * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
+ * under us and then back to pmd_none, as a result of MADV_DONTNEED
+ * running immediately after a huge pmd fault in a different thread of
+ * this mm, in turn leading to a misleading pmd_trans_huge() retval.
+ * All we have to ensure is that it is a regular pmd that we can walk
+ * with pte_offset_map() and we can do that through an atomic read in
+ * C, which is what pmd_trans_unstable() provides.
*/
- if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+ if (pmd_devmap_trans_unstable(vmf->pmd))
return VM_FAULT_NOPAGE;
+ /*
+ * At this point we know that our vmf->pmd points to a page of ptes
+ * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
+ * for the duration of the fault. If a racing MADV_DONTNEED runs and
+ * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
+ * be valid and we will re-check to make sure the vmf->pte isn't
+ * pte_none() under vmf->ptl protection when we return to
+ * alloc_set_pte().
+ */
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
return 0;
@@ -3690,7 +3672,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
vmf->pte = NULL;
} else {
/* See comment in pte_alloc_one_map() */
- if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+ if (pmd_devmap_trans_unstable(vmf->pmd))
return 0;
/*
* A regular pmd is established and it can't morph into a huge
diff --git a/mm/mlock.c b/mm/mlock.c
index c483c5c20b4b..b562b5523a65 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -284,7 +284,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
{
int i;
int nr = pagevec_count(pvec);
- int delta_munlocked;
+ int delta_munlocked = -nr;
struct pagevec pvec_putback;
int pgrescued = 0;
@@ -304,6 +304,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
continue;
else
__munlock_isolation_failed(page);
+ } else {
+ delta_munlocked++;
}
/*
@@ -315,7 +317,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
pagevec_add(&pvec_putback, pvec->pages[i]);
pvec->pages[i] = NULL;
}
- delta_munlocked = -nr + pagevec_count(&pvec_putback);
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
spin_unlock_irq(zone_lru_lock(zone));
diff --git a/mm/mmap.c b/mm/mmap.c
index f82741e199c0..a5e3dcd75e79 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
unsigned long retval;
unsigned long newbrk, oldbrk;
struct mm_struct *mm = current->mm;
+ struct vm_area_struct *next;
unsigned long min_brk;
bool populate;
LIST_HEAD(uf);
@@ -229,7 +230,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
}
/* Check against existing mmap mappings. */
- if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+ next = find_vma(mm, oldbrk);
+ if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
goto out;
/* Ok, looks good - let it rip. */
@@ -253,10 +255,22 @@ out:
static long vma_compute_subtree_gap(struct vm_area_struct *vma)
{
- unsigned long max, subtree_gap;
- max = vma->vm_start;
- if (vma->vm_prev)
- max -= vma->vm_prev->vm_end;
+ unsigned long max, prev_end, subtree_gap;
+
+ /*
+ * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
+ * allow two stack_guard_gaps between them here, and when choosing
+ * an unmapped area; whereas when expanding we only require one.
+ * That's a little inconsistent, but keeps the code here simpler.
+ */
+ max = vm_start_gap(vma);
+ if (vma->vm_prev) {
+ prev_end = vm_end_gap(vma->vm_prev);
+ if (max > prev_end)
+ max -= prev_end;
+ else
+ max = 0;
+ }
if (vma->vm_rb.rb_left) {
subtree_gap = rb_entry(vma->vm_rb.rb_left,
struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -352,7 +366,7 @@ static void validate_mm(struct mm_struct *mm)
anon_vma_unlock_read(anon_vma);
}
- highest_address = vma->vm_end;
+ highest_address = vm_end_gap(vma);
vma = vma->vm_next;
i++;
}
@@ -541,7 +555,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
if (vma->vm_next)
vma_gap_update(vma->vm_next);
else
- mm->highest_vm_end = vma->vm_end;
+ mm->highest_vm_end = vm_end_gap(vma);
/*
* vma->vm_prev wasn't known when we followed the rbtree to find the
@@ -856,7 +870,7 @@ again:
vma_gap_update(vma);
if (end_changed) {
if (!next)
- mm->highest_vm_end = end;
+ mm->highest_vm_end = vm_end_gap(vma);
else if (!adjust_next)
vma_gap_update(next);
}
@@ -941,7 +955,7 @@ again:
* mm->highest_vm_end doesn't need any update
* in remove_next == 1 case.
*/
- VM_WARN_ON(mm->highest_vm_end != end);
+ VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
}
}
if (insert && file)
@@ -1787,7 +1801,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
while (true) {
/* Visit left subtree if it looks promising */
- gap_end = vma->vm_start;
+ gap_end = vm_start_gap(vma);
if (gap_end >= low_limit && vma->vm_rb.rb_left) {
struct vm_area_struct *left =
rb_entry(vma->vm_rb.rb_left,
@@ -1798,12 +1812,13 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
}
}
- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
check_current:
/* Check if current node has a suitable gap */
if (gap_start > high_limit)
return -ENOMEM;
- if (gap_end >= low_limit && gap_end - gap_start >= length)
+ if (gap_end >= low_limit &&
+ gap_end > gap_start && gap_end - gap_start >= length)
goto found;
/* Visit right subtree if it looks promising */
@@ -1825,8 +1840,8 @@ check_current:
vma = rb_entry(rb_parent(prev),
struct vm_area_struct, vm_rb);
if (prev == vma->vm_rb.rb_left) {
- gap_start = vma->vm_prev->vm_end;
- gap_end = vma->vm_start;
+ gap_start = vm_end_gap(vma->vm_prev);
+ gap_end = vm_start_gap(vma);
goto check_current;
}
}
@@ -1890,7 +1905,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
while (true) {
/* Visit right subtree if it looks promising */
- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
if (gap_start <= high_limit && vma->vm_rb.rb_right) {
struct vm_area_struct *right =
rb_entry(vma->vm_rb.rb_right,
@@ -1903,10 +1918,11 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
check_current:
/* Check if current node has a suitable gap */
- gap_end = vma->vm_start;
+ gap_end = vm_start_gap(vma);
if (gap_end < low_limit)
return -ENOMEM;
- if (gap_start <= high_limit && gap_end - gap_start >= length)
+ if (gap_start <= high_limit &&
+ gap_end > gap_start && gap_end - gap_start >= length)
goto found;
/* Visit left subtree if it looks promising */
@@ -1929,7 +1945,7 @@ check_current:
struct vm_area_struct, vm_rb);
if (prev == vma->vm_rb.rb_right) {
gap_start = vma->vm_prev ?
- vma->vm_prev->vm_end : 0;
+ vm_end_gap(vma->vm_prev) : 0;
goto check_current;
}
}
@@ -1967,7 +1983,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct vm_unmapped_area_info info;
if (len > TASK_SIZE - mmap_min_addr)
@@ -1978,9 +1994,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
if (addr) {
addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
+ vma = find_vma_prev(mm, addr, &prev);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)) &&
+ (!prev || addr >= vm_end_gap(prev)))
return addr;
}
@@ -2003,7 +2020,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len, const unsigned long pgoff,
const unsigned long flags)
{
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
@@ -2018,9 +2035,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
/* requesting a specific address */
if (addr) {
addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
+ vma = find_vma_prev(mm, addr, &prev);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)) &&
+ (!prev || addr >= vm_end_gap(prev)))
return addr;
}
@@ -2155,21 +2173,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
* update accounting. This is shared with both the
* grow-up and grow-down cases.
*/
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma,
+ unsigned long size, unsigned long grow)
{
struct mm_struct *mm = vma->vm_mm;
struct rlimit *rlim = current->signal->rlim;
- unsigned long new_start, actual_size;
+ unsigned long new_start;
/* address space limit tests */
if (!may_expand_vm(mm, vma->vm_flags, grow))
return -ENOMEM;
/* Stack limit test */
- actual_size = size;
- if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
- actual_size -= PAGE_SIZE;
- if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+ if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
return -ENOMEM;
/* mlock limit tests */
@@ -2207,16 +2223,32 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
+ struct vm_area_struct *next;
+ unsigned long gap_addr;
int error = 0;
if (!(vma->vm_flags & VM_GROWSUP))
return -EFAULT;
- /* Guard against wrapping around to address 0. */
- if (address < PAGE_ALIGN(address+4))
- address = PAGE_ALIGN(address+4);
- else
+ /* Guard against exceeding limits of the address space. */
+ address &= PAGE_MASK;
+ if (address >= TASK_SIZE)
return -ENOMEM;
+ address += PAGE_SIZE;
+
+ /* Enforce stack_guard_gap */
+ gap_addr = address + stack_guard_gap;
+
+ /* Guard against overflow */
+ if (gap_addr < address || gap_addr > TASK_SIZE)
+ gap_addr = TASK_SIZE;
+
+ next = vma->vm_next;
+ if (next && next->vm_start < gap_addr) {
+ if (!(next->vm_flags & VM_GROWSUP))
+ return -ENOMEM;
+ /* Check that both stack segments have the same anon_vma? */
+ }
/* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
@@ -2261,7 +2293,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
if (vma->vm_next)
vma_gap_update(vma->vm_next);
else
- mm->highest_vm_end = address;
+ mm->highest_vm_end = vm_end_gap(vma);
spin_unlock(&mm->page_table_lock);
perf_event_mmap(vma);
@@ -2282,6 +2314,8 @@ int expand_downwards(struct vm_area_struct *vma,
unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
+ struct vm_area_struct *prev;
+ unsigned long gap_addr;
int error;
address &= PAGE_MASK;
@@ -2289,6 +2323,17 @@ int expand_downwards(struct vm_area_struct *vma,
if (error)
return error;
+ /* Enforce stack_guard_gap */
+ gap_addr = address - stack_guard_gap;
+ if (gap_addr > address)
+ return -ENOMEM;
+ prev = vma->vm_prev;
+ if (prev && prev->vm_end > gap_addr) {
+ if (!(prev->vm_flags & VM_GROWSDOWN))
+ return -ENOMEM;
+ /* Check that both stack segments have the same anon_vma? */
+ }
+
/* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
return -ENOMEM;
@@ -2343,28 +2388,25 @@ int expand_downwards(struct vm_area_struct *vma,
return error;
}
-/*
- * Note how expand_stack() refuses to expand the stack all the way to
- * abut the next virtual mapping, *unless* that mapping itself is also
- * a stack mapping. We want to leave room for a guard page, after all
- * (the guard page itself is not added here, that is done by the
- * actual page faulting logic)
- *
- * This matches the behavior of the guard page logic (see mm/memory.c:
- * check_stack_guard_page()), which only allows the guard page to be
- * removed under these circumstances.
- */
+/* enforced gap between the expanding stack and other mappings. */
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+static int __init cmdline_parse_stack_guard_gap(char *p)
+{
+ unsigned long val;
+ char *endptr;
+
+ val = simple_strtoul(p, &endptr, 10);
+ if (!*endptr)
+ stack_guard_gap = val << PAGE_SHIFT;
+
+ return 0;
+}
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
#ifdef CONFIG_STACK_GROWSUP
int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
- struct vm_area_struct *next;
-
- address &= PAGE_MASK;
- next = vma->vm_next;
- if (next && next->vm_start == address + PAGE_SIZE) {
- if (!(next->vm_flags & VM_GROWSUP))
- return -ENOMEM;
- }
return expand_upwards(vma, address);
}
@@ -2386,14 +2428,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
#else
int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
- struct vm_area_struct *prev;
-
- address &= PAGE_MASK;
- prev = vma->vm_prev;
- if (prev && prev->vm_end == address) {
- if (!(prev->vm_flags & VM_GROWSDOWN))
- return -ENOMEM;
- }
return expand_downwards(vma, address);
}
@@ -2491,7 +2525,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
vma->vm_prev = prev;
vma_gap_update(vma);
} else
- mm->highest_vm_end = prev ? prev->vm_end : 0;
+ mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
tail_vma->vm_next = NULL;
/* Kill the cache */
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index a7652acd2ab9..54ca54562928 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -21,7 +21,7 @@
#include <linux/slab.h>
/* global SRCU for all MMs */
-static struct srcu_struct srcu;
+DEFINE_STATIC_SRCU(srcu);
/*
* This function allows mmu_notifier::release callback to delay a call to
@@ -252,12 +252,6 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
BUG_ON(atomic_read(&mm->mm_users) <= 0);
- /*
- * Verify that mmu_notifier_init() already run and the global srcu is
- * initialized.
- */
- BUG_ON(!srcu.per_cpu_ref);
-
ret = -ENOMEM;
mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
if (unlikely(!mmu_notifier_mm))
@@ -406,9 +400,3 @@ void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
mmdrop(mm);
}
EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
-
-static int __init mmu_notifier_init(void)
-{
- return init_srcu_struct(&srcu);
-}
-subsys_initcall(mmu_notifier_init);
diff --git a/mm/nommu.c b/mm/nommu.c
index 2d131b97a851..fc184f597d59 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -237,12 +237,16 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
}
EXPORT_SYMBOL(__vmalloc);
+void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags)
+{
+ return __vmalloc(size, flags, PAGE_KERNEL);
+}
+
void *vmalloc_user(unsigned long size)
{
void *ret;
- ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
- PAGE_KERNEL);
+ ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
if (ret) {
struct vm_area_struct *vma;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2c25de46c58f..2302f250d6b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -292,6 +292,26 @@ int page_group_by_mobility_disabled __read_mostly;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
static inline void reset_deferred_meminit(pg_data_t *pgdat)
{
+ unsigned long max_initialise;
+ unsigned long reserved_lowmem;
+
+ /*
+ * Initialise at least 2G of a node but also take into account that
+ * two large system hashes that can take up 1GB for 0.25TB/node.
+ */
+ max_initialise = max(2UL << (30 - PAGE_SHIFT),
+ (pgdat->node_spanned_pages >> 8));
+
+ /*
+ * Compensate the all the memblock reservations (e.g. crash kernel)
+ * from the initial estimation to make sure we will initialize enough
+ * memory to boot.
+ */
+ reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
+ pgdat->node_start_pfn + max_initialise);
+ max_initialise += reserved_lowmem;
+
+ pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
pgdat->first_deferred_pfn = ULONG_MAX;
}
@@ -314,20 +334,11 @@ static inline bool update_defer_init(pg_data_t *pgdat,
unsigned long pfn, unsigned long zone_end,
unsigned long *nr_initialised)
{
- unsigned long max_initialise;
-
/* Always populate low zones for address-contrained allocations */
if (zone_end < pgdat_end_pfn(pgdat))
return true;
- /*
- * Initialise at least 2G of a node but also take into account that
- * two large system hashes that can take up 1GB for 0.25TB/node.
- */
- max_initialise = max(2UL << (30 - PAGE_SHIFT),
- (pgdat->node_spanned_pages >> 8));
-
(*nr_initialised)++;
- if ((*nr_initialised > max_initialise) &&
+ if ((*nr_initialised > pgdat->static_init_size) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
pgdat->first_deferred_pfn = pfn;
return false;
@@ -1832,9 +1843,9 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
* Note that start_page and end_pages are not aligned on a pageblock
* boundary. If alignment is required, use move_freepages_block()
*/
-int move_freepages(struct zone *zone,
+static int move_freepages(struct zone *zone,
struct page *start_page, struct page *end_page,
- int migratetype)
+ int migratetype, int *num_movable)
{
struct page *page;
unsigned int order;
@@ -1851,6 +1862,9 @@ int move_freepages(struct zone *zone,
VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
#endif
+ if (num_movable)
+ *num_movable = 0;
+
for (page = start_page; page <= end_page;) {
if (!pfn_valid_within(page_to_pfn(page))) {
page++;
@@ -1861,6 +1875,15 @@ int move_freepages(struct zone *zone,
VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
if (!PageBuddy(page)) {
+ /*
+ * We assume that pages that could be isolated for
+ * migration are movable. But we don't actually try
+ * isolating, as that would be expensive.
+ */
+ if (num_movable &&
+ (PageLRU(page) || __PageMovable(page)))
+ (*num_movable)++;
+
page++;
continue;
}
@@ -1876,7 +1899,7 @@ int move_freepages(struct zone *zone,
}
int move_freepages_block(struct zone *zone, struct page *page,
- int migratetype)
+ int migratetype, int *num_movable)
{
unsigned long start_pfn, end_pfn;
struct page *start_page, *end_page;
@@ -1893,7 +1916,8 @@ int move_freepages_block(struct zone *zone, struct page *page,
if (!zone_spans_pfn(zone, end_pfn))
return 0;
- return move_freepages(zone, start_page, end_page, migratetype);
+ return move_freepages(zone, start_page, end_page, migratetype,
+ num_movable);
}
static void change_pageblock_range(struct page *pageblock_page,
@@ -1943,28 +1967,79 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
/*
* This function implements actual steal behaviour. If order is large enough,
* we can steal whole pageblock. If not, we first move freepages in this
- * pageblock and check whether half of pages are moved or not. If half of
- * pages are moved, we can change migratetype of pageblock and permanently
- * use it's pages as requested migratetype in the future.
+ * pageblock to our migratetype and determine how many already-allocated pages
+ * are there in the pageblock with a compatible migratetype. If at least half
+ * of pages are free or compatible, we can change migratetype of the pageblock
+ * itself, so pages freed in the future will be put on the correct free list.
*/
static void steal_suitable_fallback(struct zone *zone, struct page *page,
- int start_type)
+ int start_type, bool whole_block)
{
unsigned int current_order = page_order(page);
- int pages;
+ struct free_area *area;
+ int free_pages, movable_pages, alike_pages;
+ int old_block_type;
+
+ old_block_type = get_pageblock_migratetype(page);
+
+ /*
+ * This can happen due to races and we want to prevent broken
+ * highatomic accounting.
+ */
+ if (is_migrate_highatomic(old_block_type))
+ goto single_page;
/* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order) {
change_pageblock_range(page, current_order, start_type);
- return;
+ goto single_page;
}
- pages = move_freepages_block(zone, page, start_type);
+ /* We are not allowed to try stealing from the whole block */
+ if (!whole_block)
+ goto single_page;
- /* Claim the whole block if over half of it is free */
- if (pages >= (1 << (pageblock_order-1)) ||
+ free_pages = move_freepages_block(zone, page, start_type,
+ &movable_pages);
+ /*
+ * Determine how many pages are compatible with our allocation.
+ * For movable allocation, it's the number of movable pages which
+ * we just obtained. For other types it's a bit more tricky.
+ */
+ if (start_type == MIGRATE_MOVABLE) {
+ alike_pages = movable_pages;
+ } else {
+ /*
+ * If we are falling back a RECLAIMABLE or UNMOVABLE allocation
+ * to MOVABLE pageblock, consider all non-movable pages as
+ * compatible. If it's UNMOVABLE falling back to RECLAIMABLE or
+ * vice versa, be conservative since we can't distinguish the
+ * exact migratetype of non-movable pages.
+ */
+ if (old_block_type == MIGRATE_MOVABLE)
+ alike_pages = pageblock_nr_pages
+ - (free_pages + movable_pages);
+ else
+ alike_pages = 0;
+ }
+
+ /* moving whole block can fail due to zone boundary conditions */
+ if (!free_pages)
+ goto single_page;
+
+ /*
+ * If a sufficient number of pages in the block are either free or of
+ * comparable migratability as our allocation, claim the whole block.
+ */
+ if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
page_group_by_mobility_disabled)
set_pageblock_migratetype(page, start_type);
+
+ return;
+
+single_page:
+ area = &zone->free_area[current_order];
+ list_move(&page->lru, &area->free_list[start_type]);
}
/*
@@ -2034,7 +2109,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
&& !is_migrate_cma(mt)) {
zone->nr_reserved_highatomic += pageblock_nr_pages;
set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
- move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
+ move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL);
}
out_unlock:
@@ -2111,7 +2186,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* may increase.
*/
set_pageblock_migratetype(page, ac->migratetype);
- ret = move_freepages_block(zone, page, ac->migratetype);
+ ret = move_freepages_block(zone, page, ac->migratetype,
+ NULL);
if (ret) {
spin_unlock_irqrestore(&zone->lock, flags);
return ret;
@@ -2123,8 +2199,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
return false;
}
-/* Remove an element from the buddy allocator from the fallback list */
-static inline struct page *
+/*
+ * Try finding a free buddy page on the fallback list and put it on the free
+ * list of requested migratetype, possibly along with other pages from the same
+ * block, depending on fragmentation avoidance heuristics. Returns true if
+ * fallback was found so that __rmqueue_smallest() can grab it.
+ */
+static inline bool
__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
{
struct free_area *area;
@@ -2145,32 +2226,17 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
page = list_first_entry(&area->free_list[fallback_mt],
struct page, lru);
- if (can_steal && !is_migrate_highatomic_page(page))
- steal_suitable_fallback(zone, page, start_migratetype);
-
- /* Remove the page from the freelists */
- area->nr_free--;
- list_del(&page->lru);
- rmv_page_order(page);
- expand(zone, page, order, current_order, area,
- start_migratetype);
- /*
- * The pcppage_migratetype may differ from pageblock's
- * migratetype depending on the decisions in
- * find_suitable_fallback(). This is OK as long as it does not
- * differ for MIGRATE_CMA pageblocks. Those can be used as
- * fallback only via special __rmqueue_cma_fallback() function
- */
- set_pcppage_migratetype(page, start_migratetype);
+ steal_suitable_fallback(zone, page, start_migratetype,
+ can_steal);
trace_mm_page_alloc_extfrag(page, order, current_order,
start_migratetype, fallback_mt);
- return page;
+ return true;
}
- return NULL;
+ return false;
}
/*
@@ -2182,13 +2248,14 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
{
struct page *page;
+retry:
page = __rmqueue_smallest(zone, order, migratetype);
if (unlikely(!page)) {
if (migratetype == MIGRATE_MOVABLE)
page = __rmqueue_cma_fallback(zone, order);
- if (!page)
- page = __rmqueue_fallback(zone, order, migratetype);
+ if (!page && __rmqueue_fallback(zone, order, migratetype))
+ goto retry;
}
trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -3227,14 +3294,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
enum compact_priority prio, enum compact_result *compact_result)
{
struct page *page;
+ unsigned int noreclaim_flag;
if (!order)
return NULL;
- current->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
prio);
- current->flags &= ~PF_MEMALLOC;
+ memalloc_noreclaim_restore(noreclaim_flag);
if (*compact_result <= COMPACT_INACTIVE)
return NULL;
@@ -3381,12 +3449,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
{
struct reclaim_state reclaim_state;
int progress;
+ unsigned int noreclaim_flag;
cond_resched();
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
- current->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
current->reclaim_state = &reclaim_state;
@@ -3396,7 +3465,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
current->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
- current->flags &= ~PF_MEMALLOC;
+ memalloc_noreclaim_restore(noreclaim_flag);
cond_resched();
@@ -3609,6 +3678,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct alloc_context *ac)
{
bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
+ const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
struct page *page = NULL;
unsigned int alloc_flags;
unsigned long did_some_progress;
@@ -3676,12 +3746,17 @@ retry_cpuset:
/*
* For costly allocations, try direct compaction first, as it's likely
- * that we have enough base pages and don't need to reclaim. Don't try
- * that for allocations that are allowed to ignore watermarks, as the
- * ALLOC_NO_WATERMARKS attempt didn't yet happen.
+ * that we have enough base pages and don't need to reclaim. For non-
+ * movable high-order allocations, do that as well, as compaction will
+ * try prevent permanent fragmentation by migrating from blocks of the
+ * same migratetype.
+ * Don't try this for allocations that are allowed to ignore
+ * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
*/
- if (can_direct_reclaim && order > PAGE_ALLOC_COSTLY_ORDER &&
- !gfp_pfmemalloc_allowed(gfp_mask)) {
+ if (can_direct_reclaim &&
+ (costly_order ||
+ (order > 0 && ac->migratetype != MIGRATE_MOVABLE))
+ && !gfp_pfmemalloc_allowed(gfp_mask)) {
page = __alloc_pages_direct_compact(gfp_mask, order,
alloc_flags, ac,
INIT_COMPACT_PRIORITY,
@@ -3693,7 +3768,7 @@ retry_cpuset:
* Checks for costly allocations with __GFP_NORETRY, which
* includes THP page fault allocations
*/
- if (gfp_mask & __GFP_NORETRY) {
+ if (costly_order && (gfp_mask & __GFP_NORETRY)) {
/*
* If compaction is deferred for high-order allocations,
* it is because sync compaction recently failed. If
@@ -3774,7 +3849,7 @@ retry:
* Do not retry costly high order allocations unless they are
* __GFP_REPEAT
*/
- if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
+ if (costly_order && !(gfp_mask & __GFP_REPEAT))
goto nopage;
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
@@ -3806,7 +3881,9 @@ retry:
goto got_pg;
/* Avoid allocations with no watermarks from looping endlessly */
- if (test_thread_flag(TIF_MEMDIE))
+ if (test_thread_flag(TIF_MEMDIE) &&
+ (alloc_flags == ALLOC_NO_WATERMARKS ||
+ (gfp_mask & __GFP_NOMEMALLOC)))
goto nopage;
/* Retry as long as the OOM killer is making progress */
@@ -6072,7 +6149,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
/* pg_data_t should be reset to zero when it's allocated */
WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
- reset_deferred_meminit(pgdat);
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
pgdat->per_cpu_nodestats = NULL;
@@ -6094,6 +6170,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
(unsigned long)pgdat->node_mem_map);
#endif
+ reset_deferred_meminit(pgdat);
free_area_init_core(pgdat);
}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 7927bbb54a4e..5092e4ef00c8 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -66,7 +66,8 @@ out:
set_pageblock_migratetype(page, MIGRATE_ISOLATE);
zone->nr_isolate_pageblock++;
- nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
+ nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE,
+ NULL);
__mod_zone_freepage_state(zone, -nr_pages, migratetype);
}
@@ -120,7 +121,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
* pageblock scanning for freepage moving.
*/
if (!isolated_page) {
- nr_pages = move_freepages_block(zone, page, migratetype);
+ nr_pages = move_freepages_block(zone, page, migratetype, NULL);
__mod_zone_freepage_state(zone, nr_pages, migratetype);
}
set_pageblock_migratetype(page, migratetype);
diff --git a/mm/rmap.c b/mm/rmap.c
index 3ff241f714eb..d405f0e0ee96 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -430,7 +430,7 @@ static void anon_vma_ctor(void *data)
void __init anon_vma_init(void)
{
anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
- 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
+ 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
anon_vma_ctor);
anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
SLAB_PANIC|SLAB_ACCOUNT);
@@ -481,7 +481,7 @@ struct anon_vma *page_get_anon_vma(struct page *page)
* If this page is still mapped, then its anon_vma cannot have been
* freed. But if it has been unmapped, we have no security against the
* anon_vma structure being freed and reused (for another anon_vma:
- * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
+ * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
* above cannot corrupt).
*/
if (!page_mapped(page)) {
diff --git a/mm/slab.c b/mm/slab.c
index 1880d482a0cb..2a31ee3c5814 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1728,7 +1728,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
freelist = page->freelist;
slab_destroy_debugcheck(cachep, page);
- if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
+ if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
call_rcu(&page->rcu_head, kmem_rcu_free);
else
kmem_freepages(cachep, page);
@@ -1924,7 +1924,7 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
cachep->num = 0;
- if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU)
+ if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
return false;
left = calculate_slab_order(cachep, size,
@@ -2030,7 +2030,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2 * sizeof(unsigned long long)))
flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
- if (!(flags & SLAB_DESTROY_BY_RCU))
+ if (!(flags & SLAB_TYPESAFE_BY_RCU))
flags |= SLAB_POISON;
#endif
#endif
diff --git a/mm/slab.h b/mm/slab.h
index 65e7c3fcac72..9cfcf099709c 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -126,7 +126,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
/* Legal flag mask for kmem_cache_create(), for various configurations */
#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
- SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS )
+ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
#if defined(CONFIG_DEBUG_SLAB)
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
@@ -415,7 +415,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
* back there or track user information then we can
* only use the space before that information.
*/
- if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
+ if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
return s->inuse;
/*
* Else we can use all the padding etc for the allocation
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 09d0e849b07f..01a0fe2eb332 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -39,7 +39,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
* Set of flags that will prevent slab merging
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
- SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
+ SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
SLAB_FAILSLAB | SLAB_KASAN)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
@@ -500,7 +500,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
struct kmem_cache *s, *s2;
/*
- * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the
+ * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
* @slab_caches_to_rcu_destroy list. The slab pages are freed
* through RCU and and the associated kmem_cache are dereferenced
* while freeing the pages, so the kmem_caches should be freed only
@@ -537,7 +537,7 @@ static int shutdown_cache(struct kmem_cache *s)
memcg_unlink_cache(s);
list_del(&s->list);
- if (s->flags & SLAB_DESTROY_BY_RCU) {
+ if (s->flags & SLAB_TYPESAFE_BY_RCU) {
list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
schedule_work(&slab_caches_to_rcu_destroy_work);
} else {
diff --git a/mm/slob.c b/mm/slob.c
index eac04d4357ec..1bae78d71096 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -126,7 +126,7 @@ static inline void clear_slob_page_free(struct page *sp)
/*
* struct slob_rcu is inserted at the tail of allocated slob blocks, which
- * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
+ * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free
* the block using call_rcu.
*/
struct slob_rcu {
@@ -524,7 +524,7 @@ EXPORT_SYMBOL(ksize);
int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
{
- if (flags & SLAB_DESTROY_BY_RCU) {
+ if (flags & SLAB_TYPESAFE_BY_RCU) {
/* leave room for rcu footer at the end of object */
c->size += sizeof(struct slob_rcu);
}
@@ -598,7 +598,7 @@ static void kmem_rcu_free(struct rcu_head *head)
void kmem_cache_free(struct kmem_cache *c, void *b)
{
kmemleak_free_recursive(b, c->flags);
- if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
+ if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
struct slob_rcu *slob_rcu;
slob_rcu = b + (c->size - sizeof(struct slob_rcu));
slob_rcu->size = c->size;
diff --git a/mm/slub.c b/mm/slub.c
index 7f4bc7027ed5..7449593fca72 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1687,7 +1687,7 @@ static void rcu_free_slab(struct rcu_head *h)
static void free_slab(struct kmem_cache *s, struct page *page)
{
- if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
+ if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
struct rcu_head *head;
if (need_reserve_slab_rcu) {
@@ -2963,7 +2963,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
* slab_free_freelist_hook() could have put the items into quarantine.
* If so, no need to free them.
*/
- if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU))
+ if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
return;
do_slab_free(s, page, head, tail, cnt, addr);
}
@@ -3433,7 +3433,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
* the slab may touch the object after free or before allocation
* then we should never poison the object itself.
*/
- if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
+ if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
!s->ctor)
s->flags |= __OBJECT_POISON;
else
@@ -3455,7 +3455,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
*/
s->inuse = size;
- if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
+ if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
s->ctor)) {
/*
* Relocate free pointer after the object if it is not
@@ -3537,7 +3537,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
s->reserved = 0;
- if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
+ if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU))
s->reserved = sizeof(struct rcu_head);
if (!calculate_sizes(s, -1))
@@ -5042,7 +5042,7 @@ SLAB_ATTR_RO(cache_dma);
static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
{
- return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
+ return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
}
SLAB_ATTR_RO(destroy_by_rcu);
@@ -5512,6 +5512,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
char mbuf[64];
char *buf;
struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
+ ssize_t len;
if (!attr || !attr->store || !attr->show)
continue;
@@ -5536,8 +5537,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
buf = buffer;
}
- attr->show(root_cache, buf);
- attr->store(s, buf, strlen(buf));
+ len = attr->show(root_cache, buf);
+ if (len > 0)
+ attr->store(s, buf, len);
}
if (buffer)
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index ac6318a064d3..3405b4ee1757 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -48,6 +48,9 @@ static int swap_cgroup_prepare(int type)
if (!page)
goto not_enough_page;
ctrl->map[idx] = page;
+
+ if (!(idx % SWAP_CLUSTER_MAX))
+ cond_resched();
}
return 0;
not_enough_page:
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index aa1c415f4abd..58f6c78f1dad 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -31,6 +31,7 @@
#include <linux/cpumask.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
+#include <linux/mm.h>
#ifdef CONFIG_SWAP
@@ -119,16 +120,18 @@ static int alloc_swap_slot_cache(unsigned int cpu)
/*
* Do allocation outside swap_slots_cache_mutex
- * as vzalloc could trigger reclaim and get_swap_page,
+ * as kvzalloc could trigger reclaim and get_swap_page,
* which can lock swap_slots_cache_mutex.
*/
- slots = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
+ slots = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
+ GFP_KERNEL);
if (!slots)
return -ENOMEM;
- slots_ret = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
+ slots_ret = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
+ GFP_KERNEL);
if (!slots_ret) {
- vfree(slots);
+ kvfree(slots);
return -ENOMEM;
}
@@ -152,9 +155,9 @@ static int alloc_swap_slot_cache(unsigned int cpu)
out:
mutex_unlock(&swap_slots_cache_mutex);
if (slots)
- vfree(slots);
+ kvfree(slots);
if (slots_ret)
- vfree(slots_ret);
+ kvfree(slots_ret);
return 0;
}
@@ -171,7 +174,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
cache->cur = 0;
cache->nr = 0;
if (free_slots && cache->slots) {
- vfree(cache->slots);
+ kvfree(cache->slots);
cache->slots = NULL;
}
mutex_unlock(&cache->alloc_lock);
@@ -186,7 +189,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
}
spin_unlock_irq(&cache->free_lock);
if (slots)
- vfree(slots);
+ kvfree(slots);
}
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 7bfb9bd1ca21..539b8885e3d1 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -523,7 +523,7 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
unsigned int i, nr;
nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
- spaces = vzalloc(sizeof(struct address_space) * nr);
+ spaces = kvzalloc(sizeof(struct address_space) * nr, GFP_KERNEL);
if (!spaces)
return -ENOMEM;
for (i = 0; i < nr; i++) {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b86b2aca3fb9..4f6cba1b6632 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2270,8 +2270,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
free_percpu(p->percpu_cluster);
p->percpu_cluster = NULL;
vfree(swap_map);
- vfree(cluster_info);
- vfree(frontswap_map);
+ kvfree(cluster_info);
+ kvfree(frontswap_map);
/* Destroy swap account information */
swap_cgroup_swapoff(p->type);
exit_swap_address_space(p->type);
@@ -2794,7 +2794,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
- cluster_info = vzalloc(nr_cluster * sizeof(*cluster_info));
+ cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info),
+ GFP_KERNEL);
if (!cluster_info) {
error = -ENOMEM;
goto bad_swap;
@@ -2827,7 +2828,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
/* frontswap enabled? set up bit-per-page map for frontswap */
if (IS_ENABLED(CONFIG_FRONTSWAP))
- frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
+ frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long),
+ GFP_KERNEL);
if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
/*
diff --git a/mm/truncate.c b/mm/truncate.c
index 83a059e8cd1d..6479ed2afc53 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -67,17 +67,14 @@ static void truncate_exceptional_entry(struct address_space *mapping,
/*
* Invalidate exceptional entry if easily possible. This handles exceptional
- * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
- * clean entries.
+ * entries for invalidate_inode_pages().
*/
static int invalidate_exceptional_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
- /* Handled by shmem itself */
- if (shmem_mapping(mapping))
+ /* Handled by shmem itself, or for DAX we do nothing. */
+ if (shmem_mapping(mapping) || dax_mapping(mapping))
return 1;
- if (dax_mapping(mapping))
- return dax_invalidate_mapping_entry(mapping, index);
clear_shadow_entry(mapping, index, entry);
return 1;
}
@@ -689,7 +686,17 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
cond_resched();
index++;
}
-
+ /*
+ * For DAX we invalidate page tables after invalidating radix tree. We
+ * could invalidate page tables while invalidating each entry however
+ * that would be expensive. And doing range unmapping before doesn't
+ * work as we have no cheap way to find whether radix tree entry didn't
+ * get remapped later.
+ */
+ if (dax_mapping(mapping)) {
+ unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT,
+ (loff_t)(end - start + 1) << PAGE_SHIFT, 0);
+ }
out:
cleancache_invalidate_inode(mapping);
return ret;
diff --git a/mm/util.c b/mm/util.c
index 656dc5e37a87..26be6407abd7 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -329,6 +329,67 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
}
EXPORT_SYMBOL(vm_mmap);
+/**
+ * kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * failure, fall back to non-contiguous (vmalloc) allocation.
+ * @size: size of the request.
+ * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
+ * @node: numa node to allocate from
+ *
+ * Uses kmalloc to get the memory but if the allocation fails then falls back
+ * to the vmalloc allocator. Use kvfree for freeing the memory.
+ *
+ * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT
+ * is supported only for large (>32kB) allocations, and it should be used only if
+ * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks.
+ *
+ * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people.
+ */
+void *kvmalloc_node(size_t size, gfp_t flags, int node)
+{
+ gfp_t kmalloc_flags = flags;
+ void *ret;
+
+ /*
+ * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
+ * so the given set of flags has to be compatible.
+ */
+ WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
+
+ /*
+ * We want to attempt a large physically contiguous block first because
+ * it is less likely to fragment multiple larger blocks and therefore
+ * contribute to a long term fragmentation less than vmalloc fallback.
+ * However make sure that larger requests are not too disruptive - no
+ * OOM killer and no allocation failure warnings as we have a fallback.
+ */
+ if (size > PAGE_SIZE) {
+ kmalloc_flags |= __GFP_NOWARN;
+
+ /*
+ * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly
+ * requests because there is no other way to tell the allocator
+ * that we want to fail rather than retry endlessly.
+ */
+ if (!(kmalloc_flags & __GFP_REPEAT) ||
+ (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ kmalloc_flags |= __GFP_NORETRY;
+ }
+
+ ret = kmalloc_node(size, kmalloc_flags, node);
+
+ /*
+ * It doesn't really make sense to fallback to vmalloc for sub page
+ * requests
+ */
+ if (ret || size <= PAGE_SIZE)
+ return ret;
+
+ return __vmalloc_node_flags_caller(size, node, flags,
+ __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kvmalloc_node);
+
void kvfree(const void *addr)
{
if (is_vmalloc_addr(addr))
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b52aeed3f58e..34a1c3e46ed7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -521,7 +521,7 @@ overflow:
}
}
- if (printk_ratelimit())
+ if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
size);
kfree(va);
@@ -1658,7 +1658,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
struct page **pages;
unsigned int nr_pages, array_size, i;
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
- const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
+ const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN;
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
@@ -1786,6 +1786,13 @@ fail:
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
+ *
+ * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
+ * and __GFP_NOFAIL are not supported
+ *
+ * Any use of gfp flags outside of GFP_KERNEL should be consulted
+ * with mm people.
+ *
*/
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
@@ -1809,6 +1816,13 @@ static inline void *__vmalloc_node_flags(unsigned long size,
node, __builtin_return_address(0));
}
+
+void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
+ void *caller)
+{
+ return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
+}
+
/**
* vmalloc - allocate virtually contiguous memory
* @size: allocation size
@@ -1821,7 +1835,7 @@ static inline void *__vmalloc_node_flags(unsigned long size,
void *vmalloc(unsigned long size)
{
return __vmalloc_node_flags(size, NUMA_NO_NODE,
- GFP_KERNEL | __GFP_HIGHMEM);
+ GFP_KERNEL);
}
EXPORT_SYMBOL(vmalloc);
@@ -1838,7 +1852,7 @@ EXPORT_SYMBOL(vmalloc);
void *vzalloc(unsigned long size)
{
return __vmalloc_node_flags(size, NUMA_NO_NODE,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+ GFP_KERNEL | __GFP_ZERO);
}
EXPORT_SYMBOL(vzalloc);
@@ -1855,7 +1869,7 @@ void *vmalloc_user(unsigned long size)
void *ret;
ret = __vmalloc_node(size, SHMLBA,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ GFP_KERNEL | __GFP_ZERO,
PAGE_KERNEL, NUMA_NO_NODE,
__builtin_return_address(0));
if (ret) {
@@ -1879,7 +1893,7 @@ EXPORT_SYMBOL(vmalloc_user);
*/
void *vmalloc_node(unsigned long size, int node)
{
- return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
+ return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
node, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_node);
@@ -1899,7 +1913,7 @@ EXPORT_SYMBOL(vmalloc_node);
void *vzalloc_node(unsigned long size, int node)
{
return __vmalloc_node_flags(size, node,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+ GFP_KERNEL | __GFP_ZERO);
}
EXPORT_SYMBOL(vzalloc_node);
@@ -1921,7 +1935,7 @@ EXPORT_SYMBOL(vzalloc_node);
void *vmalloc_exec(unsigned long size)
{
- return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
+ return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC,
NUMA_NO_NODE, __builtin_return_address(0));
}
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 6063581f705c..ce0618bfa8d0 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -115,9 +115,9 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
unsigned long pressure = 0;
/*
- * reclaimed can be greater than scanned in cases
- * like THP, where the scanned is 1 and reclaimed
- * could be 512
+ * reclaimed can be greater than scanned for things such as reclaimed
+ * slab pages. shrink_node() just adds reclaimed pages without a
+ * related increment to scanned pages.
*/
if (reclaimed >= scanned)
goto out;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4e7ed65842af..8ad39bbc79e6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1449,7 +1449,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
*
* Appropriate locks must be held before calling this function.
*
- * @nr_to_scan: The number of pages to look through on the list.
+ * @nr_to_scan: The number of eligible pages to look through on the list.
* @lruvec: The LRU vector to pull pages from.
* @dst: The temp list to put pages on to.
* @nr_scanned: The number of pages that were scanned.
@@ -1469,11 +1469,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
unsigned long skipped = 0;
- unsigned long scan, nr_pages;
+ unsigned long scan, total_scan, nr_pages;
LIST_HEAD(pages_skipped);
- for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&
- !list_empty(src); scan++) {
+ scan = 0;
+ for (total_scan = 0;
+ scan < nr_to_scan && nr_taken < nr_to_scan && !list_empty(src);
+ total_scan++) {
struct page *page;
page = lru_to_page(src);
@@ -1487,6 +1489,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
continue;
}
+ /*
+ * Do not count skipped pages because that makes the function
+ * return with no isolated pages if the LRU mostly contains
+ * ineligible pages. This causes the VM to not reclaim any
+ * pages, triggering a premature OOM.
+ */
+ scan++;
switch (__isolate_lru_page(page, mode)) {
case 0:
nr_pages = hpage_nr_pages(page);
@@ -1524,9 +1533,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
skipped += nr_skipped[zid];
}
}
- *nr_scanned = scan;
+ *nr_scanned = total_scan;
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
- scan, skipped, nr_taken, mode, lru);
+ total_scan, skipped, nr_taken, mode, lru);
update_lru_sizes(lruvec, lru, nr_zone_taken);
return nr_taken;
}
@@ -3036,6 +3045,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
struct zonelist *zonelist;
unsigned long nr_reclaimed;
int nid;
+ unsigned int noreclaim_flag;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) |
@@ -3062,9 +3072,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
sc.gfp_mask,
sc.reclaim_idx);
- current->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
- current->flags &= ~PF_MEMALLOC;
+ memalloc_noreclaim_restore(noreclaim_flag);
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
@@ -3589,8 +3599,9 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
struct task_struct *p = current;
unsigned long nr_reclaimed;
+ unsigned int noreclaim_flag;
- p->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@@ -3599,7 +3610,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
p->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
- p->flags &= ~PF_MEMALLOC;
+ memalloc_noreclaim_restore(noreclaim_flag);
return nr_reclaimed;
}
@@ -3764,6 +3775,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
struct task_struct *p = current;
struct reclaim_state reclaim_state;
int classzone_idx = gfp_zone(gfp_mask);
+ unsigned int noreclaim_flag;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask = current_gfp_context(gfp_mask)),
@@ -3781,7 +3793,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
* and we also need to be able to write out pages for RECLAIM_WRITE
* and RECLAIM_UNMAP.
*/
- p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
+ noreclaim_flag = memalloc_noreclaim_save();
+ p->flags |= PF_SWAPWRITE;
lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@@ -3797,7 +3810,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
}
p->reclaim_state = NULL;
- current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
+ current->flags &= ~PF_SWAPWRITE;
+ memalloc_noreclaim_restore(noreclaim_flag);
lockdep_clear_current_reclaim_state();
return sc.nr_reclaimed >= nr_pages;
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f5fa1bd1eb16..76f73670200a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1359,8 +1359,6 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
return zone == compare;
}
- /* The zone must be somewhere! */
- WARN_ON_ONCE(1);
return false;
}
OpenPOWER on IntegriCloud