diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/compaction.c | 7 | ||||
-rw-r--r-- | mm/kmemleak.c | 30 | ||||
-rw-r--r-- | mm/memory-failure.c | 22 | ||||
-rw-r--r-- | mm/page_alloc.c | 6 | ||||
-rw-r--r-- | mm/page_ext.c | 23 | ||||
-rw-r--r-- | mm/page_owner.c | 55 | ||||
-rw-r--r-- | mm/slab.c | 3 | ||||
-rw-r--r-- | mm/slub.c | 35 |
8 files changed, 101 insertions, 80 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index ce08b39d85d4..672d3c78c6ab 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -270,14 +270,15 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, /* Ensure the start of the pageblock or zone is online and valid */ block_pfn = pageblock_start_pfn(pfn); - block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn)); + block_pfn = max(block_pfn, zone->zone_start_pfn); + block_page = pfn_to_online_page(block_pfn); if (block_page) { page = block_page; pfn = block_pfn; } /* Ensure the end of the pageblock or zone is online and valid */ - block_pfn += pageblock_nr_pages; + block_pfn = pageblock_end_pfn(pfn) - 1; block_pfn = min(block_pfn, zone_end_pfn(zone) - 1); end_page = pfn_to_online_page(block_pfn); if (!end_page) @@ -303,7 +304,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, page += (1 << PAGE_ALLOC_COSTLY_ORDER); pfn += (1 << PAGE_ALLOC_COSTLY_ORDER); - } while (page < end_page); + } while (page <= end_page); return false; } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 03a8d84badad..244607663363 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -527,6 +527,16 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) } /* + * Remove an object from the object_tree_root and object_list. Must be called + * with the kmemleak_lock held _if_ kmemleak is still enabled. + */ +static void __remove_object(struct kmemleak_object *object) +{ + rb_erase(&object->rb_node, &object_tree_root); + list_del_rcu(&object->object_list); +} + +/* * Look up an object in the object search tree and remove it from both * object_tree_root and object_list. The returned object's use_count should be * at least 1, as initially set by create_object(). @@ -538,10 +548,8 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali write_lock_irqsave(&kmemleak_lock, flags); object = lookup_object(ptr, alias); - if (object) { - rb_erase(&object->rb_node, &object_tree_root); - list_del_rcu(&object->object_list); - } + if (object) + __remove_object(object); write_unlock_irqrestore(&kmemleak_lock, flags); return object; @@ -1834,12 +1842,16 @@ static const struct file_operations kmemleak_fops = { static void __kmemleak_do_cleanup(void) { - struct kmemleak_object *object; + struct kmemleak_object *object, *tmp; - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) - delete_object_full(object->pointer); - rcu_read_unlock(); + /* + * Kmemleak has already been disabled, no need for RCU list traversal + * or kmemleak_lock held. + */ + list_for_each_entry_safe(object, tmp, &object_list, object_list) { + __remove_object(object); + __delete_object(object); + } } /* diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 7ef849da8278..0ae72b6acee7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -199,7 +199,6 @@ struct to_kill { struct task_struct *tsk; unsigned long addr; short size_shift; - char addr_valid; }; /* @@ -324,22 +323,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, } } tk->addr = page_address_in_vma(p, vma); - tk->addr_valid = 1; if (is_zone_device_page(p)) tk->size_shift = dev_pagemap_mapping_shift(p, vma); else tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT; /* - * In theory we don't have to kill when the page was - * munmaped. But it could be also a mremap. Since that's - * likely very rare kill anyways just out of paranoia, but use - * a SIGKILL because the error is not contained anymore. + * Send SIGKILL if "tk->addr == -EFAULT". Also, as + * "tk->size_shift" is always non-zero for !is_zone_device_page(), + * so "tk->size_shift == 0" effectively checks no mapping on + * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times + * to a process' address space, it's possible not all N VMAs + * contain mappings for the page, but at least one VMA does. + * Only deliver SIGBUS with payload derived from the VMA that + * has a mapping for the page. */ - if (tk->addr == -EFAULT || tk->size_shift == 0) { + if (tk->addr == -EFAULT) { pr_info("Memory failure: Unable to find user space address %lx in %s\n", page_to_pfn(p), tsk->comm); - tk->addr_valid = 0; + } else if (tk->size_shift == 0) { + kfree(tk); + return; } get_task_struct(tsk); tk->tsk = tsk; @@ -366,7 +370,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, * make sure the process doesn't catch the * signal and then access the memory. Just kill it. */ - if (fail || tk->addr_valid == 0) { + if (fail || tk->addr == -EFAULT) { pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", pfn, tk->tsk->comm, tk->tsk->pid); do_send_sig_info(SIGKILL, SEND_SIG_PRIV, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c0b2e0306720..ecc3dbad606b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4473,12 +4473,14 @@ retry_cpuset: if (page) goto got_pg; - if (order >= pageblock_order && (gfp_mask & __GFP_IO)) { + if (order >= pageblock_order && (gfp_mask & __GFP_IO) && + !(gfp_mask & __GFP_RETRY_MAYFAIL)) { /* * If allocating entire pageblock(s) and compaction * failed because all zones are below low watermarks * or is prohibited because it recently failed at this - * order, fail immediately. + * order, fail immediately unless the allocator has + * requested compaction and reclaim retry. * * Reclaim is * - potentially very expensive because zones are far diff --git a/mm/page_ext.c b/mm/page_ext.c index 5f5769c7db3b..4ade843ff588 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -67,8 +67,9 @@ static struct page_ext_operations *page_ext_ops[] = { #endif }; +unsigned long page_ext_size = sizeof(struct page_ext); + static unsigned long total_usage; -static unsigned long extra_mem; static bool __init invoke_need_callbacks(void) { @@ -78,9 +79,8 @@ static bool __init invoke_need_callbacks(void) for (i = 0; i < entries; i++) { if (page_ext_ops[i]->need && page_ext_ops[i]->need()) { - page_ext_ops[i]->offset = sizeof(struct page_ext) + - extra_mem; - extra_mem += page_ext_ops[i]->size; + page_ext_ops[i]->offset = page_ext_size; + page_ext_size += page_ext_ops[i]->size; need = true; } } @@ -99,14 +99,9 @@ static void __init invoke_init_callbacks(void) } } -static unsigned long get_entry_size(void) -{ - return sizeof(struct page_ext) + extra_mem; -} - static inline struct page_ext *get_entry(void *base, unsigned long index) { - return base + get_entry_size() * index; + return base + page_ext_size * index; } #if !defined(CONFIG_SPARSEMEM) @@ -156,7 +151,7 @@ static int __init alloc_node_page_ext(int nid) !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES)) nr_pages += MAX_ORDER_NR_PAGES; - table_size = get_entry_size() * nr_pages; + table_size = page_ext_size * nr_pages; base = memblock_alloc_try_nid( table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), @@ -234,7 +229,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid) if (section->page_ext) return 0; - table_size = get_entry_size() * PAGES_PER_SECTION; + table_size = page_ext_size * PAGES_PER_SECTION; base = alloc_page_ext(table_size, nid); /* @@ -254,7 +249,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid) * we need to apply a mask. */ pfn &= PAGE_SECTION_MASK; - section->page_ext = (void *)base - get_entry_size() * pfn; + section->page_ext = (void *)base - page_ext_size * pfn; total_usage += table_size; return 0; } @@ -267,7 +262,7 @@ static void free_page_ext(void *addr) struct page *page = virt_to_page(addr); size_t table_size; - table_size = get_entry_size() * PAGES_PER_SECTION; + table_size = page_ext_size * PAGES_PER_SECTION; BUG_ON(PageReserved(page)); kmemleak_free(addr); diff --git a/mm/page_owner.c b/mm/page_owner.c index dee931184788..e327bcd0380e 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -24,12 +24,10 @@ struct page_owner { short last_migrate_reason; gfp_t gfp_mask; depot_stack_handle_t handle; -#ifdef CONFIG_DEBUG_PAGEALLOC depot_stack_handle_t free_handle; -#endif }; -static bool page_owner_disabled = true; +static bool page_owner_enabled = false; DEFINE_STATIC_KEY_FALSE(page_owner_inited); static depot_stack_handle_t dummy_handle; @@ -44,7 +42,7 @@ static int __init early_page_owner_param(char *buf) return -EINVAL; if (strcmp(buf, "on") == 0) - page_owner_disabled = false; + page_owner_enabled = true; return 0; } @@ -52,10 +50,7 @@ early_param("page_owner", early_page_owner_param); static bool need_page_owner(void) { - if (page_owner_disabled) - return false; - - return true; + return page_owner_enabled; } static __always_inline depot_stack_handle_t create_dummy_stack(void) @@ -84,7 +79,7 @@ static noinline void register_early_stack(void) static void init_page_owner(void) { - if (page_owner_disabled) + if (!page_owner_enabled) return; register_dummy_stack(); @@ -148,25 +143,19 @@ void __reset_page_owner(struct page *page, unsigned int order) { int i; struct page_ext *page_ext; -#ifdef CONFIG_DEBUG_PAGEALLOC depot_stack_handle_t handle = 0; struct page_owner *page_owner; - if (debug_pagealloc_enabled()) - handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); -#endif + handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; for (i = 0; i < (1 << order); i++) { - page_ext = lookup_page_ext(page + i); - if (unlikely(!page_ext)) - continue; - __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags); -#ifdef CONFIG_DEBUG_PAGEALLOC - if (debug_pagealloc_enabled()) { - page_owner = get_page_owner(page_ext); - page_owner->free_handle = handle; - } -#endif + __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); + page_owner = get_page_owner(page_ext); + page_owner->free_handle = handle; + page_ext = page_ext_next(page_ext); } } @@ -184,9 +173,9 @@ static inline void __set_page_owner_handle(struct page *page, page_owner->gfp_mask = gfp_mask; page_owner->last_migrate_reason = -1; __set_bit(PAGE_EXT_OWNER, &page_ext->flags); - __set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags); + __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); - page_ext = lookup_page_ext(page + i); + page_ext = page_ext_next(page_ext); } } @@ -224,12 +213,10 @@ void __split_page_owner(struct page *page, unsigned int order) if (unlikely(!page_ext)) return; - page_owner = get_page_owner(page_ext); - page_owner->order = 0; - for (i = 1; i < (1 << order); i++) { - page_ext = lookup_page_ext(page + i); + for (i = 0; i < (1 << order); i++) { page_owner = get_page_owner(page_ext); page_owner->order = 0; + page_ext = page_ext_next(page_ext); } } @@ -260,7 +247,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) * the new page, which will be freed. */ __set_bit(PAGE_EXT_OWNER, &new_ext->flags); - __set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags); + __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags); } void pagetypeinfo_showmixedcount_print(struct seq_file *m, @@ -320,7 +307,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, if (unlikely(!page_ext)) continue; - if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags)) + if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) continue; page_owner = get_page_owner(page_ext); @@ -435,7 +422,7 @@ void __dump_page_owner(struct page *page) return; } - if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags)) + if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) pr_alert("page_owner tracks the page as allocated\n"); else pr_alert("page_owner tracks the page as freed\n"); @@ -451,7 +438,6 @@ void __dump_page_owner(struct page *page) stack_trace_print(entries, nr_entries, 0); } -#ifdef CONFIG_DEBUG_PAGEALLOC handle = READ_ONCE(page_owner->free_handle); if (!handle) { pr_alert("page_owner free stack trace missing\n"); @@ -460,7 +446,6 @@ void __dump_page_owner(struct page *page) pr_alert("page last free stack trace:\n"); stack_trace_print(entries, nr_entries, 0); } -#endif if (page_owner->last_migrate_reason != -1) pr_alert("page has been migrated, last migrate reason: %s\n", @@ -527,7 +512,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) * Although we do have the info about past allocation of free * pages, it's not relevant for current memory usage. */ - if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags)) + if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) continue; page_owner = get_page_owner(page_ext); diff --git a/mm/slab.c b/mm/slab.c index 9df370558e5d..66e5d8032bae 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4206,9 +4206,12 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, /** * __ksize -- Uninstrumented ksize. + * @objp: pointer to the object * * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same * safety checks as ksize() with KASAN instrumentation enabled. + * + * Return: size of the actual memory used by @objp in bytes */ size_t __ksize(const void *objp) { diff --git a/mm/slub.c b/mm/slub.c index 3d63ae320d31..b25c807a111f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2672,6 +2672,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, } /* + * If the object has been wiped upon free, make sure it's fully initialized by + * zeroing out freelist pointer. + */ +static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, + void *obj) +{ + if (unlikely(slab_want_init_on_free(s)) && obj) + memset((void *)((char *)obj + s->offset), 0, sizeof(void *)); +} + +/* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * have the fastpath folded into their functions. So no function call * overhead for requests that can be satisfied on the fastpath. @@ -2759,12 +2770,8 @@ redo: prefetch_freepointer(s, next_object); stat(s, ALLOC_FASTPATH); } - /* - * If the object has been wiped upon free, make sure it's fully - * initialized by zeroing out freelist pointer. - */ - if (unlikely(slab_want_init_on_free(s)) && object) - memset(object + s->offset, 0, sizeof(void *)); + + maybe_wipe_obj_freeptr(s, object); if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(object, 0, s->object_size); @@ -3178,10 +3185,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, goto error; c = this_cpu_ptr(s->cpu_slab); + maybe_wipe_obj_freeptr(s, p[i]); + continue; /* goto for-loop */ } c->freelist = get_freepointer(s, object); p[i] = object; + maybe_wipe_obj_freeptr(s, p[i]); } c->tid = next_tid(c->tid); local_irq_enable(); @@ -4846,7 +4856,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s, } } - get_online_mems(); + /* + * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex" + * already held which will conflict with an existing lock order: + * + * mem_hotplug_lock->slab_mutex->kernfs_mutex + * + * We don't really need mem_hotplug_lock (to hold off + * slab_mem_going_offline_callback) here because slab's memory hot + * unplug code doesn't destroy the kmem_cache->node[] data. + */ + #ifdef CONFIG_SLUB_DEBUG if (flags & SO_ALL) { struct kmem_cache_node *n; @@ -4887,7 +4907,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s, x += sprintf(buf + x, " N%d=%lu", node, nodes[node]); #endif - put_online_mems(); kfree(nodes); return x + sprintf(buf + x, "\n"); } |