diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig.debug | 2 | ||||
-rw-r--r-- | mm/filemap.c | 161 | ||||
-rw-r--r-- | mm/huge_memory.c | 2 | ||||
-rw-r--r-- | mm/ksm.c | 3 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 10 | ||||
-rw-r--r-- | mm/mmap.c | 13 | ||||
-rw-r--r-- | mm/page-writeback.c | 26 | ||||
-rw-r--r-- | mm/slab.c | 114 | ||||
-rw-r--r-- | mm/slub.c | 65 | ||||
-rw-r--r-- | mm/workingset.c | 10 |
11 files changed, 192 insertions, 216 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 22f4cd96acb0..afcc550877ff 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -76,8 +76,6 @@ config PAGE_POISONING_ZERO no longer necessary to write zeros when GFP_ZERO is used on allocation. - Enabling page poisoning with this option will disable hibernation - If unsure, say N bool diff --git a/mm/filemap.c b/mm/filemap.c index 8a287dfc5372..4bad32dd4b3b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -110,36 +110,94 @@ * ->tasklist_lock (memory_failure, collect_procs_ao) */ +static int page_cache_tree_insert(struct address_space *mapping, + struct page *page, void **shadowp) +{ + struct radix_tree_node *node; + void **slot; + int error; + + error = __radix_tree_create(&mapping->page_tree, page->index, 0, + &node, &slot); + if (error) + return error; + if (*slot) { + void *p; + + p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); + if (!radix_tree_exceptional_entry(p)) + return -EEXIST; + + mapping->nrexceptional--; + if (!dax_mapping(mapping)) { + if (shadowp) + *shadowp = p; + if (node) + workingset_node_shadows_dec(node); + } else { + /* DAX can replace empty locked entry with a hole */ + WARN_ON_ONCE(p != + (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | + RADIX_DAX_ENTRY_LOCK)); + /* DAX accounts exceptional entries as normal pages */ + if (node) + workingset_node_pages_dec(node); + /* Wakeup waiters for exceptional entry lock */ + dax_wake_mapping_entry_waiter(mapping, page->index, + false); + } + } + radix_tree_replace_slot(slot, page); + mapping->nrpages++; + if (node) { + workingset_node_pages_inc(node); + /* + * Don't track node that contains actual pages. + * + * Avoid acquiring the list_lru lock if already + * untracked. The list_empty() test is safe as + * node->private_list is protected by + * mapping->tree_lock. + */ + if (!list_empty(&node->private_list)) + list_lru_del(&workingset_shadow_nodes, + &node->private_list); + } + return 0; +} + static void page_cache_tree_delete(struct address_space *mapping, struct page *page, void *shadow) { - struct radix_tree_node *node; int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(nr != 1 && shadow, page); - if (shadow) { - mapping->nrexceptional += nr; - /* - * Make sure the nrexceptional update is committed before - * the nrpages update so that final truncate racing - * with reclaim does not see both counters 0 at the - * same time and miss a shadow entry. - */ - smp_wmb(); - } - mapping->nrpages -= nr; - for (i = 0; i < nr; i++) { - node = radix_tree_replace_clear_tags(&mapping->page_tree, - page->index + i, shadow); + struct radix_tree_node *node; + void **slot; + + __radix_tree_lookup(&mapping->page_tree, page->index + i, + &node, &slot); + + radix_tree_clear_tags(&mapping->page_tree, node, slot); + if (!node) { VM_BUG_ON_PAGE(nr != 1, page); - return; + /* + * We need a node to properly account shadow + * entries. Don't plant any without. XXX + */ + shadow = NULL; } + radix_tree_replace_slot(slot, shadow); + + if (!node) + break; + workingset_node_pages_dec(node); if (shadow) workingset_node_shadows_inc(node); @@ -163,6 +221,18 @@ static void page_cache_tree_delete(struct address_space *mapping, &node->private_list); } } + + if (shadow) { + mapping->nrexceptional += nr; + /* + * Make sure the nrexceptional update is committed before + * the nrpages update so that final truncate racing + * with reclaim does not see both counters 0 at the + * same time and miss a shadow entry. + */ + smp_wmb(); + } + mapping->nrpages -= nr; } /* @@ -561,9 +631,8 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) spin_lock_irqsave(&mapping->tree_lock, flags); __delete_from_page_cache(old, NULL); - error = radix_tree_insert(&mapping->page_tree, offset, new); + error = page_cache_tree_insert(mapping, new, NULL); BUG_ON(error); - mapping->nrpages++; /* * hugetlb pages do not participate in page cache accounting. @@ -584,62 +653,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -static int page_cache_tree_insert(struct address_space *mapping, - struct page *page, void **shadowp) -{ - struct radix_tree_node *node; - void **slot; - int error; - - error = __radix_tree_create(&mapping->page_tree, page->index, 0, - &node, &slot); - if (error) - return error; - if (*slot) { - void *p; - - p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); - if (!radix_tree_exceptional_entry(p)) - return -EEXIST; - - mapping->nrexceptional--; - if (!dax_mapping(mapping)) { - if (shadowp) - *shadowp = p; - if (node) - workingset_node_shadows_dec(node); - } else { - /* DAX can replace empty locked entry with a hole */ - WARN_ON_ONCE(p != - (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | - RADIX_DAX_ENTRY_LOCK)); - /* DAX accounts exceptional entries as normal pages */ - if (node) - workingset_node_pages_dec(node); - /* Wakeup waiters for exceptional entry lock */ - dax_wake_mapping_entry_waiter(mapping, page->index, - false); - } - } - radix_tree_replace_slot(slot, page); - mapping->nrpages++; - if (node) { - workingset_node_pages_inc(node); - /* - * Don't track node that contains actual pages. - * - * Avoid acquiring the list_lru lock if already - * untracked. The list_empty() test is safe as - * node->private_list is protected by - * mapping->tree_lock. - */ - if (!list_empty(&node->private_list)) - list_lru_del(&workingset_shadow_nodes, - &node->private_list); - } - return 0; -} - static int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 53ae6d00656a..283583fcb1e7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1165,7 +1165,7 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd) } /* See similar comment in do_numa_page for explanation */ - if (!(vma->vm_flags & VM_WRITE)) + if (!pmd_write(pmd)) flags |= TNF_NO_GROUP; /* @@ -283,7 +283,8 @@ static inline struct rmap_item *alloc_rmap_item(void) { struct rmap_item *rmap_item; - rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL); + rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL | + __GFP_NORETRY | __GFP_NOWARN); if (rmap_item) ksm_rmap_items++; return rmap_item; diff --git a/mm/memory.c b/mm/memory.c index 793fe0f9841c..f1a68049edff 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3395,7 +3395,7 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) * pte_dirty has unpredictable behaviour between PTE scan updates, * background writeback, dirty balancing and application behaviour. */ - if (!(vma->vm_flags & VM_WRITE)) + if (!pte_write(pte)) flags |= TNF_NO_GROUP; /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b58906b6215c..9d29ba0f7192 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1555,8 +1555,8 @@ static struct page *new_node_page(struct page *page, unsigned long private, { gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; int nid = page_to_nid(page); - nodemask_t nmask = node_online_map; - struct page *new_page; + nodemask_t nmask = node_states[N_MEMORY]; + struct page *new_page = NULL; /* * TODO: allocate a destination hugepage from a nearest neighbor node, @@ -1567,14 +1567,14 @@ static struct page *new_node_page(struct page *page, unsigned long private, return alloc_huge_page_node(page_hstate(compound_head(page)), next_node_in(nid, nmask)); - if (nid != next_node_in(nid, nmask)) - node_clear(nid, nmask); + node_clear(nid, nmask); if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; - new_page = __alloc_pages_nodemask(gfp_mask, 0, + if (!nodes_empty(nmask)) + new_page = __alloc_pages_nodemask(gfp_mask, 0, node_zonelist(nid, gfp_mask), &nmask); if (!new_page) new_page = __alloc_pages(gfp_mask, 0, diff --git a/mm/mmap.c b/mm/mmap.c index ca9d91bca0d6..7a0707a48047 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -88,6 +88,11 @@ static void unmap_region(struct mm_struct *mm, * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes * + * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and + * MAP_PRIVATE: + * r: (no) no + * w: (no) no + * x: (yes) yes */ pgprot_t protection_map[16] = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, @@ -3063,6 +3068,14 @@ out: return ERR_PTR(ret); } +bool vma_is_special_mapping(const struct vm_area_struct *vma, + const struct vm_special_mapping *sm) +{ + return vma->vm_private_data == sm && + (vma->vm_ops == &special_mapping_vmops || + vma->vm_ops == &legacy_special_mapping_vmops); +} + /* * Called with mm->mmap_sem held for writing. * Insert a new vma covering the given region, with the given flags. diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f4cd7d8005c9..28d6f36a2d79 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2080,26 +2080,12 @@ void writeback_set_ratelimit(void) ratelimit_pages = 16; } -static int -ratelimit_handler(struct notifier_block *self, unsigned long action, - void *hcpu) +static int page_writeback_cpu_online(unsigned int cpu) { - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - case CPU_DEAD: - writeback_set_ratelimit(); - return NOTIFY_OK; - default: - return NOTIFY_DONE; - } + writeback_set_ratelimit(); + return 0; } -static struct notifier_block ratelimit_nb = { - .notifier_call = ratelimit_handler, - .next = NULL, -}; - /* * Called early on to tune the page writeback dirty limits. * @@ -2122,8 +2108,10 @@ void __init page_writeback_init(void) { BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL)); - writeback_set_ratelimit(); - register_cpu_notifier(&ratelimit_nb); + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online", + page_writeback_cpu_online, NULL); + cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL, + page_writeback_cpu_online); } /** diff --git a/mm/slab.c b/mm/slab.c index b67271024135..090fb26b3a39 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -886,6 +886,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) return 0; } +#if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP) /* * Allocates and initializes node for a node on each slab cache, used for * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node @@ -908,6 +909,7 @@ static int init_cache_node_node(int node) return 0; } +#endif static int setup_kmem_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp, bool force_change) @@ -975,6 +977,8 @@ fail: return ret; } +#ifdef CONFIG_SMP + static void cpuup_canceled(long cpu) { struct kmem_cache *cachep; @@ -1075,65 +1079,54 @@ bad: return -ENOMEM; } -static int cpuup_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +int slab_prepare_cpu(unsigned int cpu) { - long cpu = (long)hcpu; - int err = 0; + int err; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - mutex_lock(&slab_mutex); - err = cpuup_prepare(cpu); - mutex_unlock(&slab_mutex); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - start_cpu_timer(cpu); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - /* - * Shutdown cache reaper. Note that the slab_mutex is - * held so that if cache_reap() is invoked it cannot do - * anything expensive but will only modify reap_work - * and reschedule the timer. - */ - cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu)); - /* Now the cache_reaper is guaranteed to be not running. */ - per_cpu(slab_reap_work, cpu).work.func = NULL; - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - start_cpu_timer(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - /* - * Even if all the cpus of a node are down, we don't free the - * kmem_cache_node of any cache. This to avoid a race between - * cpu_down, and a kmalloc allocation from another cpu for - * memory from the node of the cpu going down. The node - * structure is usually allocated from kmem_cache_create() and - * gets destroyed at kmem_cache_destroy(). - */ - /* fall through */ + mutex_lock(&slab_mutex); + err = cpuup_prepare(cpu); + mutex_unlock(&slab_mutex); + return err; +} + +/* + * This is called for a failed online attempt and for a successful + * offline. + * + * Even if all the cpus of a node are down, we don't free the + * kmem_list3 of any cache. This to avoid a race between cpu_down, and + * a kmalloc allocation from another cpu for memory from the node of + * the cpu going down. The list3 structure is usually allocated from + * kmem_cache_create() and gets destroyed at kmem_cache_destroy(). + */ +int slab_dead_cpu(unsigned int cpu) +{ + mutex_lock(&slab_mutex); + cpuup_canceled(cpu); + mutex_unlock(&slab_mutex); + return 0; +} #endif - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - mutex_lock(&slab_mutex); - cpuup_canceled(cpu); - mutex_unlock(&slab_mutex); - break; - } - return notifier_from_errno(err); + +static int slab_online_cpu(unsigned int cpu) +{ + start_cpu_timer(cpu); + return 0; } -static struct notifier_block cpucache_notifier = { - &cpuup_callback, NULL, 0 -}; +static int slab_offline_cpu(unsigned int cpu) +{ + /* + * Shutdown cache reaper. Note that the slab_mutex is held so + * that if cache_reap() is invoked it cannot do anything + * expensive but will only modify reap_work and reschedule the + * timer. + */ + cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu)); + /* Now the cache_reaper is guaranteed to be not running. */ + per_cpu(slab_reap_work, cpu).work.func = NULL; + return 0; +} #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) /* @@ -1336,12 +1329,6 @@ void __init kmem_cache_init_late(void) /* Done! */ slab_state = FULL; - /* - * Register a cpu startup notifier callback that initializes - * cpu_cache_get for all new cpus - */ - register_cpu_notifier(&cpucache_notifier); - #ifdef CONFIG_NUMA /* * Register a memory hotplug callback that initializes and frees @@ -1358,13 +1345,14 @@ void __init kmem_cache_init_late(void) static int __init cpucache_init(void) { - int cpu; + int ret; /* * Register the timers that return unneeded pages to the page allocator */ - for_each_online_cpu(cpu) - start_cpu_timer(cpu); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online", + slab_online_cpu, slab_offline_cpu); + WARN_ON(ret < 0); /* Done! */ slab_state = FULL; diff --git a/mm/slub.c b/mm/slub.c index 9adae58462f8..2b3e740609e9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -194,10 +194,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) #define __OBJECT_POISON 0x80000000UL /* Poison object */ #define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */ -#ifdef CONFIG_SMP -static struct notifier_block slab_notifier; -#endif - /* * Tracking user of a slab. */ @@ -2305,6 +2301,25 @@ static void flush_all(struct kmem_cache *s) } /* + * Use the cpu notifier to insure that the cpu slabs are flushed when + * necessary. + */ +static int slub_cpu_dead(unsigned int cpu) +{ + struct kmem_cache *s; + unsigned long flags; + + mutex_lock(&slab_mutex); + list_for_each_entry(s, &slab_caches, list) { + local_irq_save(flags); + __flush_cpu_slab(s, cpu); + local_irq_restore(flags); + } + mutex_unlock(&slab_mutex); + return 0; +} + +/* * Check if the objects in a per cpu structure fit numa * locality expectations. */ @@ -4144,9 +4159,8 @@ void __init kmem_cache_init(void) /* Setup random freelists for each cache */ init_freelist_randomization(); -#ifdef CONFIG_SMP - register_cpu_notifier(&slab_notifier); -#endif + cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL, + slub_cpu_dead); pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n", cache_line_size(), @@ -4210,43 +4224,6 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags) return err; } -#ifdef CONFIG_SMP -/* - * Use the cpu notifier to insure that the cpu slabs are flushed when - * necessary. - */ -static int slab_cpuup_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - long cpu = (long)hcpu; - struct kmem_cache *s; - unsigned long flags; - - switch (action) { - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - mutex_lock(&slab_mutex); - list_for_each_entry(s, &slab_caches, list) { - local_irq_save(flags); - __flush_cpu_slab(s, cpu); - local_irq_restore(flags); - } - mutex_unlock(&slab_mutex); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block slab_notifier = { - .notifier_call = slab_cpuup_callback -}; - -#endif - void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) { struct kmem_cache *s; diff --git a/mm/workingset.c b/mm/workingset.c index 69551cfae97b..617475f529f4 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -418,21 +418,19 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, * no pages, so we expect to be able to remove them all and * delete and free the empty node afterwards. */ - - BUG_ON(!node->count); - BUG_ON(node->count & RADIX_TREE_COUNT_MASK); + BUG_ON(!workingset_node_shadows(node)); + BUG_ON(workingset_node_pages(node)); for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { if (node->slots[i]) { BUG_ON(!radix_tree_exceptional_entry(node->slots[i])); node->slots[i] = NULL; - BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT)); - node->count -= 1U << RADIX_TREE_COUNT_SHIFT; + workingset_node_shadows_dec(node); BUG_ON(!mapping->nrexceptional); mapping->nrexceptional--; } } - BUG_ON(node->count); + BUG_ON(workingset_node_shadows(node)); inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); if (!__radix_tree_delete_node(&mapping->page_tree, node)) BUG(); |