diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 7 | ||||
-rw-r--r-- | mm/compaction.c | 4 | ||||
-rw-r--r-- | mm/debug.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 312 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 3 | ||||
-rw-r--r-- | mm/mincore.c | 23 | ||||
-rw-r--r-- | mm/page_alloc.c | 82 | ||||
-rw-r--r-- | mm/shuffle.c | 207 | ||||
-rw-r--r-- | mm/shuffle.h | 64 | ||||
-rw-r--r-- | mm/vmalloc.c | 32 | ||||
-rw-r--r-- | mm/vmscan.c | 6 | ||||
-rw-r--r-- | mm/workingset.c | 7 |
12 files changed, 562 insertions, 187 deletions
diff --git a/mm/Makefile b/mm/Makefile index d210cc9d6f80..ac5e5ba78874 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -33,7 +33,7 @@ mmu-$(CONFIG_MMU) += process_vm_access.o endif obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ - maccess.o page_alloc.o page-writeback.o \ + maccess.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ @@ -41,6 +41,11 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ interval_tree.o list_lru.o workingset.o \ debug.o $(mmu-y) +# Give 'page_alloc' its own module-parameter namespace +page-alloc-y := page_alloc.o +page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o + +obj-y += page-alloc.o obj-y += init-mm.o obj-y += memblock.o diff --git a/mm/compaction.c b/mm/compaction.c index 6cc4bea33dcb..cbac7277978a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1888,13 +1888,13 @@ static enum compact_result __compact_finished(struct compact_control *cc) bool can_steal; /* Job done if page is free of the right migratetype */ - if (!list_empty(&area->free_list[migratetype])) + if (!free_area_empty(area, migratetype)) return COMPACT_SUCCESS; #ifdef CONFIG_CMA /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */ if (migratetype == MIGRATE_MOVABLE && - !list_empty(&area->free_list[MIGRATE_CMA])) + !free_area_empty(area, MIGRATE_CMA)) return COMPACT_SUCCESS; #endif /* diff --git a/mm/debug.c b/mm/debug.c index eee9c221280c..8345bb6e4769 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -67,7 +67,7 @@ void __dump_page(struct page *page, const char *reason) */ mapcount = PageSlab(page) ? 0 : page_mapcount(page); - pr_warn("page:%px count:%d mapcount:%d mapping:%px index:%#lx", + pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx", page, page_ref_count(page), mapcount, page->mapping, page_to_pgoff(page)); if (PageCompound(page)) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 287933005e11..e50a2db5b4ff 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -687,10 +687,119 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) return mz; } -static unsigned long memcg_sum_events(struct mem_cgroup *memcg, - int event) +/** + * __mod_memcg_state - update cgroup memory statistics + * @memcg: the memory cgroup + * @idx: the stat item - can be enum memcg_stat_item or enum node_stat_item + * @val: delta to add to the counter, can be negative + */ +void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) +{ + long x; + + if (mem_cgroup_disabled()) + return; + + x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]); + if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { + struct mem_cgroup *mi; + + atomic_long_add(x, &memcg->vmstats_local[idx]); + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + atomic_long_add(x, &mi->vmstats[idx]); + x = 0; + } + __this_cpu_write(memcg->vmstats_percpu->stat[idx], x); +} + +static struct mem_cgroup_per_node * +parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid) +{ + struct mem_cgroup *parent; + + parent = parent_mem_cgroup(pn->memcg); + if (!parent) + return NULL; + return mem_cgroup_nodeinfo(parent, nid); +} + +/** + * __mod_lruvec_state - update lruvec memory statistics + * @lruvec: the lruvec + * @idx: the stat item + * @val: delta to add to the counter, can be negative + * + * The lruvec is the intersection of the NUMA node and a cgroup. This + * function updates the all three counters that are affected by a + * change of state at this level: per-node, per-cgroup, per-lruvec. + */ +void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, + int val) { - return atomic_long_read(&memcg->events[event]); + pg_data_t *pgdat = lruvec_pgdat(lruvec); + struct mem_cgroup_per_node *pn; + struct mem_cgroup *memcg; + long x; + + /* Update node */ + __mod_node_page_state(pgdat, idx, val); + + if (mem_cgroup_disabled()) + return; + + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + memcg = pn->memcg; + + /* Update memcg */ + __mod_memcg_state(memcg, idx, val); + + /* Update lruvec */ + x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); + if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { + struct mem_cgroup_per_node *pi; + + atomic_long_add(x, &pn->lruvec_stat_local[idx]); + for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) + atomic_long_add(x, &pi->lruvec_stat[idx]); + x = 0; + } + __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); +} + +/** + * __count_memcg_events - account VM events in a cgroup + * @memcg: the memory cgroup + * @idx: the event item + * @count: the number of events that occured + */ +void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, + unsigned long count) +{ + unsigned long x; + + if (mem_cgroup_disabled()) + return; + + x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]); + if (unlikely(x > MEMCG_CHARGE_BATCH)) { + struct mem_cgroup *mi; + + atomic_long_add(x, &memcg->vmevents_local[idx]); + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + atomic_long_add(x, &mi->vmevents[idx]); + x = 0; + } + __this_cpu_write(memcg->vmstats_percpu->events[idx], x); +} + +static unsigned long memcg_events(struct mem_cgroup *memcg, int event) +{ + return atomic_long_read(&memcg->vmevents[event]); +} + +static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) +{ + return atomic_long_read(&memcg->vmevents_local[event]); } static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, @@ -722,7 +831,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, nr_pages = -nr_pages; /* for event */ } - __this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages); + __this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages); } static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, @@ -730,8 +839,8 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, { unsigned long val, next; - val = __this_cpu_read(memcg->stat_cpu->nr_page_events); - next = __this_cpu_read(memcg->stat_cpu->targets[target]); + val = __this_cpu_read(memcg->vmstats_percpu->nr_page_events); + next = __this_cpu_read(memcg->vmstats_percpu->targets[target]); /* from time_after() in jiffies.h */ if ((long)(next - val) < 0) { switch (target) { @@ -747,7 +856,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, default: break; } - __this_cpu_write(memcg->stat_cpu->targets[target], next); + __this_cpu_write(memcg->vmstats_percpu->targets[target], next); return true; } return false; @@ -1325,12 +1434,14 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) if (memcg1_stats[i] == MEMCG_SWAP && !do_swap_account) continue; pr_cont(" %s:%luKB", memcg1_stat_names[i], - K(memcg_page_state(iter, memcg1_stats[i]))); + K(memcg_page_state_local(iter, + memcg1_stats[i]))); } for (i = 0; i < NR_LRU_LISTS; i++) pr_cont(" %s:%luKB", mem_cgroup_lru_names[i], - K(memcg_page_state(iter, NR_LRU_BASE + i))); + K(memcg_page_state_local(iter, + NR_LRU_BASE + i))); pr_cont("\n"); } @@ -2076,7 +2187,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) static int memcg_hotplug_cpu_dead(unsigned int cpu) { struct memcg_stock_pcp *stock; - struct mem_cgroup *memcg; + struct mem_cgroup *memcg, *mi; stock = &per_cpu(memcg_stock, cpu); drain_stock(stock); @@ -2088,9 +2199,12 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) int nid; long x; - x = this_cpu_xchg(memcg->stat_cpu->count[i], 0); - if (x) - atomic_long_add(x, &memcg->stat[i]); + x = this_cpu_xchg(memcg->vmstats_percpu->stat[i], 0); + if (x) { + atomic_long_add(x, &memcg->vmstats_local[i]); + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + atomic_long_add(x, &memcg->vmstats[i]); + } if (i >= NR_VM_NODE_STAT_ITEMS) continue; @@ -2100,17 +2214,24 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) pn = mem_cgroup_nodeinfo(memcg, nid); x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0); - if (x) - atomic_long_add(x, &pn->lruvec_stat[i]); + if (x) { + atomic_long_add(x, &pn->lruvec_stat_local[i]); + do { + atomic_long_add(x, &pn->lruvec_stat[i]); + } while ((pn = parent_nodeinfo(pn, nid))); + } } } for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { long x; - x = this_cpu_xchg(memcg->stat_cpu->events[i], 0); - if (x) - atomic_long_add(x, &memcg->events[i]); + x = this_cpu_xchg(memcg->vmstats_percpu->events[i], 0); + if (x) { + atomic_long_add(x, &memcg->vmevents_local[i]); + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + atomic_long_add(x, &memcg->vmevents[i]); + } } } @@ -2940,50 +3061,15 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, return retval; } -struct accumulated_stats { - unsigned long stat[MEMCG_NR_STAT]; - unsigned long events[NR_VM_EVENT_ITEMS]; - unsigned long lru_pages[NR_LRU_LISTS]; - const unsigned int *stats_array; - const unsigned int *events_array; - int stats_size; - int events_size; -}; - -static void accumulate_memcg_tree(struct mem_cgroup *memcg, - struct accumulated_stats *acc) -{ - struct mem_cgroup *mi; - int i; - - for_each_mem_cgroup_tree(mi, memcg) { - for (i = 0; i < acc->stats_size; i++) - acc->stat[i] += memcg_page_state(mi, - acc->stats_array ? acc->stats_array[i] : i); - - for (i = 0; i < acc->events_size; i++) - acc->events[i] += memcg_sum_events(mi, - acc->events_array ? acc->events_array[i] : i); - - for (i = 0; i < NR_LRU_LISTS; i++) - acc->lru_pages[i] += memcg_page_state(mi, - NR_LRU_BASE + i); - } -} - static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) { - unsigned long val = 0; + unsigned long val; if (mem_cgroup_is_root(memcg)) { - struct mem_cgroup *iter; - - for_each_mem_cgroup_tree(iter, memcg) { - val += memcg_page_state(iter, MEMCG_CACHE); - val += memcg_page_state(iter, MEMCG_RSS); - if (swap) - val += memcg_page_state(iter, MEMCG_SWAP); - } + val = memcg_page_state(memcg, MEMCG_CACHE) + + memcg_page_state(memcg, MEMCG_RSS); + if (swap) + val += memcg_page_state(memcg, MEMCG_SWAP); } else { if (!swap) val = page_counter_read(&memcg->memory); @@ -3324,7 +3410,7 @@ static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) continue; - nr += lruvec_page_state(lruvec, NR_LRU_BASE + lru); + nr += lruvec_page_state_local(lruvec, NR_LRU_BASE + lru); } return nr; } @@ -3338,7 +3424,7 @@ static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) continue; - nr += memcg_page_state(memcg, NR_LRU_BASE + lru); + nr += memcg_page_state_local(memcg, NR_LRU_BASE + lru); } return nr; } @@ -3414,7 +3500,6 @@ static int memcg_stat_show(struct seq_file *m, void *v) unsigned long memory, memsw; struct mem_cgroup *mi; unsigned int i; - struct accumulated_stats acc; BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats)); BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); @@ -3423,17 +3508,17 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; seq_printf(m, "%s %lu\n", memcg1_stat_names[i], - memcg_page_state(memcg, memcg1_stats[i]) * + memcg_page_state_local(memcg, memcg1_stats[i]) * PAGE_SIZE); } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) seq_printf(m, "%s %lu\n", memcg1_event_names[i], - memcg_sum_events(memcg, memcg1_events[i])); + memcg_events_local(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i], - memcg_page_state(memcg, NR_LRU_BASE + i) * + memcg_page_state_local(memcg, NR_LRU_BASE + i) * PAGE_SIZE); /* Hierarchical information */ @@ -3448,27 +3533,21 @@ static int memcg_stat_show(struct seq_file *m, void *v) seq_printf(m, "hierarchical_memsw_limit %llu\n", (u64)memsw * PAGE_SIZE); - memset(&acc, 0, sizeof(acc)); - acc.stats_size = ARRAY_SIZE(memcg1_stats); - acc.stats_array = memcg1_stats; - acc.events_size = ARRAY_SIZE(memcg1_events); - acc.events_array = memcg1_events; - accumulate_memcg_tree(memcg, &acc); - for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], - (u64)acc.stat[i] * PAGE_SIZE); + (u64)memcg_page_state(memcg, i) * PAGE_SIZE); } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) seq_printf(m, "total_%s %llu\n", memcg1_event_names[i], - (u64)acc.events[i]); + (u64)memcg_events(memcg, i)); for (i = 0; i < NR_LRU_LISTS; i++) seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], - (u64)acc.lru_pages[i] * PAGE_SIZE); + (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * + PAGE_SIZE); #ifdef CONFIG_DEBUG_VM { @@ -3901,11 +3980,11 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) */ static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx) { - long x = atomic_long_read(&memcg->stat[idx]); + long x = atomic_long_read(&memcg->vmstats[idx]); int cpu; for_each_online_cpu(cpu) - x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx]; + x += per_cpu_ptr(memcg->vmstats_percpu, cpu)->stat[idx]; if (x < 0) x = 0; return x; @@ -4445,7 +4524,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); - free_percpu(memcg->stat_cpu); + free_percpu(memcg->vmstats_percpu); kfree(memcg); } @@ -4474,8 +4553,8 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (memcg->id.id < 0) goto fail; - memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu); - if (!memcg->stat_cpu) + memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu); + if (!memcg->vmstats_percpu) goto fail; for_each_node(node) @@ -5561,7 +5640,6 @@ static int memory_events_show(struct seq_file *m, void *v) static int memory_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_seq(m); - struct accumulated_stats acc; int i; /* @@ -5575,31 +5653,27 @@ static int memory_stat_show(struct seq_file *m, void *v) * Current memory state: */ - memset(&acc, 0, sizeof(acc)); - acc.stats_size = MEMCG_NR_STAT; - acc.events_size = NR_VM_EVENT_ITEMS; - accumulate_memcg_tree(memcg, &acc); - seq_printf(m, "anon %llu\n", - (u64)acc.stat[MEMCG_RSS] * PAGE_SIZE); + (u64)memcg_page_state(memcg, MEMCG_RSS) * PAGE_SIZE); seq_printf(m, "file %llu\n", - (u64)acc.stat[MEMCG_CACHE] * PAGE_SIZE); + (u64)memcg_page_state(memcg, MEMCG_CACHE) * PAGE_SIZE); seq_printf(m, "kernel_stack %llu\n", - (u64)acc.stat[MEMCG_KERNEL_STACK_KB] * 1024); + (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) * 1024); seq_printf(m, "slab %llu\n", - (u64)(acc.stat[NR_SLAB_RECLAIMABLE] + - acc.stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE); + (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) + + memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE)) * + PAGE_SIZE); seq_printf(m, "sock %llu\n", - (u64)acc.stat[MEMCG_SOCK] * PAGE_SIZE); + (u64)memcg_page_state(memcg, MEMCG_SOCK) * PAGE_SIZE); seq_printf(m, "shmem %llu\n", - (u64)acc.stat[NR_SHMEM] * PAGE_SIZE); + (u64)memcg_page_state(memcg, NR_SHMEM) * PAGE_SIZE); seq_printf(m, "file_mapped %llu\n", - (u64)acc.stat[NR_FILE_MAPPED] * PAGE_SIZE); + (u64)memcg_page_state(memcg, NR_FILE_MAPPED) * PAGE_SIZE); seq_printf(m, "file_dirty %llu\n", - (u64)acc.stat[NR_FILE_DIRTY] * PAGE_SIZE); + (u64)memcg_page_state(memcg, NR_FILE_DIRTY) * PAGE_SIZE); seq_printf(m, "file_writeback %llu\n", - (u64)acc.stat[NR_WRITEBACK] * PAGE_SIZE); + (u64)memcg_page_state(memcg, NR_WRITEBACK) * PAGE_SIZE); /* * TODO: We should eventually replace our own MEMCG_RSS_HUGE counter @@ -5608,43 +5682,47 @@ static int memory_stat_show(struct seq_file *m, void *v) * where the page->mem_cgroup is set up and stable. */ seq_printf(m, "anon_thp %llu\n", - (u64)acc.stat[MEMCG_RSS_HUGE] * PAGE_SIZE); + (u64)memcg_page_state(memcg, MEMCG_RSS_HUGE) * PAGE_SIZE); for (i = 0; i < NR_LRU_LISTS; i++) seq_printf(m, "%s %llu\n", mem_cgroup_lru_names[i], - (u64)acc.lru_pages[i] * PAGE_SIZE); + (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * + PAGE_SIZE); seq_printf(m, "slab_reclaimable %llu\n", - (u64)acc.stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE); + (u64)memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) * + PAGE_SIZE); seq_printf(m, "slab_unreclaimable %llu\n", - (u64)acc.stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE); + (u64)memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE) * + PAGE_SIZE); /* Accumulated memory events */ - seq_printf(m, "pgfault %lu\n", acc.events[PGFAULT]); - seq_printf(m, "pgmajfault %lu\n", acc.events[PGMAJFAULT]); + seq_printf(m, "pgfault %lu\n", memcg_events(memcg, PGFAULT)); + seq_printf(m, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT)); seq_printf(m, "workingset_refault %lu\n", - acc.stat[WORKINGSET_REFAULT]); + memcg_page_state(memcg, WORKINGSET_REFAULT)); seq_printf(m, "workingset_activate %lu\n", - acc.stat[WORKINGSET_ACTIVATE]); + memcg_page_state(memcg, WORKINGSET_ACTIVATE)); seq_printf(m, "workingset_nodereclaim %lu\n", - acc.stat[WORKINGSET_NODERECLAIM]); - - seq_printf(m, "pgrefill %lu\n", acc.events[PGREFILL]); - seq_printf(m, "pgscan %lu\n", acc.events[PGSCAN_KSWAPD] + - acc.events[PGSCAN_DIRECT]); - seq_printf(m, "pgsteal %lu\n", acc.events[PGSTEAL_KSWAPD] + - acc.events[PGSTEAL_DIRECT]); - seq_printf(m, "pgactivate %lu\n", acc.events[PGACTIVATE]); - seq_printf(m, "pgdeactivate %lu\n", acc.events[PGDEACTIVATE]); - seq_printf(m, "pglazyfree %lu\n", acc.events[PGLAZYFREE]); - seq_printf(m, "pglazyfreed %lu\n", acc.events[PGLAZYFREED]); + memcg_page_state(memcg, WORKINGSET_NODERECLAIM)); + + seq_printf(m, "pgrefill %lu\n", memcg_events(memcg, PGREFILL)); + seq_printf(m, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + + memcg_events(memcg, PGSCAN_DIRECT)); + seq_printf(m, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + + memcg_events(memcg, PGSTEAL_DIRECT)); + seq_printf(m, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE)); + seq_printf(m, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE)); + seq_printf(m, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE)); + seq_printf(m, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED)); #ifdef CONFIG_TRANSPARENT_HUGEPAGE - seq_printf(m, "thp_fault_alloc %lu\n", acc.events[THP_FAULT_ALLOC]); + seq_printf(m, "thp_fault_alloc %lu\n", + memcg_events(memcg, THP_FAULT_ALLOC)); seq_printf(m, "thp_collapse_alloc %lu\n", - acc.events[THP_COLLAPSE_ALLOC]); + memcg_events(memcg, THP_COLLAPSE_ALLOC)); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ return 0; @@ -6080,7 +6158,7 @@ static void uncharge_batch(const struct uncharge_gather *ug) __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge); __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem); __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); - __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages); + __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, nr_pages); memcg_check_events(ug->memcg, ug->dummy_page); local_irq_restore(flags); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6c0c4f48638e..328878b6799d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -39,6 +39,7 @@ #include <asm/tlbflush.h> #include "internal.h" +#include "shuffle.h" /* * online_page_callback contains pointer to current page onlining function. @@ -891,6 +892,8 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ zone->zone_pgdat->node_present_pages += onlined_pages; pgdat_resize_unlock(zone->zone_pgdat, &flags); + shuffle_zone(zone); + if (onlined_pages) { node_states_set_node(nid, &arg); if (need_zonelists_rebuild) diff --git a/mm/mincore.c b/mm/mincore.c index 218099b5ed31..c3f058bd0faf 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -169,6 +169,22 @@ out: return 0; } +static inline bool can_do_mincore(struct vm_area_struct *vma) +{ + if (vma_is_anonymous(vma)) + return true; + if (!vma->vm_file) + return false; + /* + * Reveal pagecache information only for non-anonymous mappings that + * correspond to the files the calling process could (if tried) open + * for writing; otherwise we'd be including shared non-exclusive + * mappings, which opens a side channel. + */ + return inode_owner_or_capable(file_inode(vma->vm_file)) || + inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; +} + /* * Do a chunk of "sys_mincore()". We've already checked * all the arguments, we hold the mmap semaphore: we should @@ -189,8 +205,13 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v vma = find_vma(current->mm, addr); if (!vma || addr < vma->vm_start) return -ENOMEM; - mincore_walk.mm = vma->vm_mm; end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); + if (!can_do_mincore(vma)) { + unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE); + memset(vec, 1, pages); + return pages; + } + mincore_walk.mm = vma->vm_mm; err = walk_page_range(addr, end, &mincore_walk); if (err < 0) return err; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f2f3fb4921d1..3b13d3914176 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -43,6 +43,7 @@ #include <linux/mempolicy.h> #include <linux/memremap.h> #include <linux/stop_machine.h> +#include <linux/random.h> #include <linux/sort.h> #include <linux/pfn.h> #include <linux/backing-dev.h> @@ -72,6 +73,7 @@ #include <asm/tlbflush.h> #include <asm/div64.h> #include "internal.h" +#include "shuffle.h" /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); @@ -755,12 +757,6 @@ static inline void set_page_order(struct page *page, unsigned int order) __SetPageBuddy(page); } -static inline void rmv_page_order(struct page *page) -{ - __ClearPageBuddy(page); - set_page_private(page, 0); -} - /* * This function checks whether a page is free && is the buddy * we can coalesce a page and its buddy if @@ -918,13 +914,10 @@ continue_merging: * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page, * merge with it and move up one order. */ - if (page_is_guard(buddy)) { + if (page_is_guard(buddy)) clear_page_guard(zone, buddy, order, migratetype); - } else { - list_del(&buddy->lru); - zone->free_area[order].nr_free--; - rmv_page_order(buddy); - } + else + del_page_from_free_area(buddy, &zone->free_area[order]); combined_pfn = buddy_pfn & pfn; page = page + (combined_pfn - pfn); pfn = combined_pfn; @@ -966,7 +959,8 @@ done_merging: * so it's less likely to be used soon and more likely to be merged * as a higher order page */ - if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)) { + if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn) + && !is_shuffle_order(order)) { struct page *higher_page, *higher_buddy; combined_pfn = buddy_pfn & pfn; higher_page = page + (combined_pfn - pfn); @@ -974,15 +968,18 @@ done_merging: higher_buddy = higher_page + (buddy_pfn - combined_pfn); if (pfn_valid_within(buddy_pfn) && page_is_buddy(higher_page, higher_buddy, order + 1)) { - list_add_tail(&page->lru, - &zone->free_area[order].free_list[migratetype]); - goto out; + add_to_free_area_tail(page, &zone->free_area[order], + migratetype); + return; } } - list_add(&page->lru, &zone->free_area[order].free_list[migratetype]); -out: - zone->free_area[order].nr_free++; + if (is_shuffle_order(order)) + add_to_free_area_random(page, &zone->free_area[order], + migratetype); + else + add_to_free_area(page, &zone->free_area[order], migratetype); + } /* @@ -1874,9 +1871,9 @@ _deferred_grow_zone(struct zone *zone, unsigned int order) void __init page_alloc_init_late(void) { struct zone *zone; + int nid; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT - int nid; /* There will be num_node_state(N_MEMORY) threads */ atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY)); @@ -1900,6 +1897,9 @@ void __init page_alloc_init_late(void) /* Discard memblock private memory */ memblock_discard(); + for_each_node_state(nid, N_MEMORY) + shuffle_free_memory(NODE_DATA(nid)); + for_each_populated_zone(zone) set_zone_contiguous(zone); } @@ -1970,8 +1970,7 @@ static inline void expand(struct zone *zone, struct page *page, if (set_page_guard(zone, &page[size], high, migratetype)) continue; - list_add(&page[size].lru, &area->free_list[migratetype]); - area->nr_free++; + add_to_free_area(&page[size], area, migratetype); set_page_order(&page[size], high); } } @@ -1986,7 +1985,7 @@ static void check_new_page_bad(struct page *page) if (unlikely(page->mapping != NULL)) bad_reason = "non-NULL mapping"; if (unlikely(page_ref_count(page) != 0)) - bad_reason = "nonzero _count"; + bad_reason = "nonzero _refcount"; if (unlikely(page->flags & __PG_HWPOISON)) { bad_reason = "HWPoisoned (hardware-corrupted)"; bad_flags = __PG_HWPOISON; @@ -2113,13 +2112,10 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, /* Find a page of the appropriate size in the preferred list */ for (current_order = order; current_order < MAX_ORDER; ++current_order) { area = &(zone->free_area[current_order]); - page = list_first_entry_or_null(&area->free_list[migratetype], - struct page, lru); + page = get_page_from_free_area(area, migratetype); if (!page) continue; - list_del(&page->lru); - rmv_page_order(page); - area->nr_free--; + del_page_from_free_area(page, area); expand(zone, page, order, current_order, area, migratetype); set_pcppage_migratetype(page, migratetype); return page; @@ -2205,8 +2201,7 @@ static int move_freepages(struct zone *zone, } order = page_order(page); - list_move(&page->lru, - &zone->free_area[order].free_list[migratetype]); + move_to_free_area(page, &zone->free_area[order], migratetype); page += 1 << order; pages_moved += 1 << order; } @@ -2394,7 +2389,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, single_page: area = &zone->free_area[current_order]; - list_move(&page->lru, &area->free_list[start_type]); + move_to_free_area(page, area, start_type); } /* @@ -2418,7 +2413,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, if (fallback_mt == MIGRATE_TYPES) break; - if (list_empty(&area->free_list[fallback_mt])) + if (free_area_empty(area, fallback_mt)) continue; if (can_steal_fallback(order, migratetype)) @@ -2505,9 +2500,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, for (order = 0; order < MAX_ORDER; order++) { struct free_area *area = &(zone->free_area[order]); - page = list_first_entry_or_null( - &area->free_list[MIGRATE_HIGHATOMIC], - struct page, lru); + page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); if (!page) continue; @@ -2630,8 +2623,7 @@ find_smallest: VM_BUG_ON(current_order == MAX_ORDER); do_steal: - page = list_first_entry(&area->free_list[fallback_mt], - struct page, lru); + page = get_page_from_free_area(area, fallback_mt); steal_suitable_fallback(zone, page, alloc_flags, start_migratetype, can_steal); @@ -3068,6 +3060,7 @@ EXPORT_SYMBOL_GPL(split_page); int __isolate_free_page(struct page *page, unsigned int order) { + struct free_area *area = &page_zone(page)->free_area[order]; unsigned long watermark; struct zone *zone; int mt; @@ -3092,9 +3085,8 @@ int __isolate_free_page(struct page *page, unsigned int order) } /* Remove page from free list */ - list_del(&page->lru); - zone->free_area[order].nr_free--; - rmv_page_order(page); + + del_page_from_free_area(page, area); /* * Set the pageblock if the isolated page is at least half of a @@ -3391,13 +3383,13 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, continue; for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { - if (!list_empty(&area->free_list[mt])) + if (!free_area_empty(area, mt)) return true; } #ifdef CONFIG_CMA if ((alloc_flags & ALLOC_CMA) && - !list_empty(&area->free_list[MIGRATE_CMA])) { + !free_area_empty(area, MIGRATE_CMA)) { return true; } #endif @@ -5324,7 +5316,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) types[order] = 0; for (type = 0; type < MIGRATE_TYPES; type++) { - if (!list_empty(&area->free_list[type])) + if (!free_area_empty(area, type)) types[order] |= 1 << type; } } @@ -8497,9 +8489,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) pr_info("remove from free list %lx %d %lx\n", pfn, 1 << order, end_pfn); #endif - list_del(&page->lru); - rmv_page_order(page); - zone->free_area[order].nr_free--; + del_page_from_free_area(page, &zone->free_area[order]); for (i = 0; i < (1 << order); i++) SetPageReserved((page+i)); pfn += (1 << order); diff --git a/mm/shuffle.c b/mm/shuffle.c new file mode 100644 index 000000000000..3ce12481b1dc --- /dev/null +++ b/mm/shuffle.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/mmzone.h> +#include <linux/random.h> +#include <linux/moduleparam.h> +#include "internal.h" +#include "shuffle.h" + +DEFINE_STATIC_KEY_FALSE(page_alloc_shuffle_key); +static unsigned long shuffle_state __ro_after_init; + +/* + * Depending on the architecture, module parameter parsing may run + * before, or after the cache detection. SHUFFLE_FORCE_DISABLE prevents, + * or reverts the enabling of the shuffle implementation. SHUFFLE_ENABLE + * attempts to turn on the implementation, but aborts if it finds + * SHUFFLE_FORCE_DISABLE already set. + */ +__meminit void page_alloc_shuffle(enum mm_shuffle_ctl ctl) +{ + if (ctl == SHUFFLE_FORCE_DISABLE) + set_bit(SHUFFLE_FORCE_DISABLE, &shuffle_state); + + if (test_bit(SHUFFLE_FORCE_DISABLE, &shuffle_state)) { + if (test_and_clear_bit(SHUFFLE_ENABLE, &shuffle_state)) + static_branch_disable(&page_alloc_shuffle_key); + } else if (ctl == SHUFFLE_ENABLE + && !test_and_set_bit(SHUFFLE_ENABLE, &shuffle_state)) + static_branch_enable(&page_alloc_shuffle_key); +} + +static bool shuffle_param; +extern int shuffle_show(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "%c\n", test_bit(SHUFFLE_ENABLE, &shuffle_state) + ? 'Y' : 'N'); +} + +static __meminit int shuffle_store(const char *val, + const struct kernel_param *kp) +{ + int rc = param_set_bool(val, kp); + + if (rc < 0) + return rc; + if (shuffle_param) + page_alloc_shuffle(SHUFFLE_ENABLE); + else + page_alloc_shuffle(SHUFFLE_FORCE_DISABLE); + return 0; +} +module_param_call(shuffle, shuffle_store, shuffle_show, &shuffle_param, 0400); + +/* + * For two pages to be swapped in the shuffle, they must be free (on a + * 'free_area' lru), have the same order, and have the same migratetype. + */ +static struct page * __meminit shuffle_valid_page(unsigned long pfn, int order) +{ + struct page *page; + + /* + * Given we're dealing with randomly selected pfns in a zone we + * need to ask questions like... + */ + + /* ...is the pfn even in the memmap? */ + if (!pfn_valid_within(pfn)) + return NULL; + + /* ...is the pfn in a present section or a hole? */ + if (!pfn_present(pfn)) + return NULL; + + /* ...is the page free and currently on a free_area list? */ + page = pfn_to_page(pfn); + if (!PageBuddy(page)) + return NULL; + + /* + * ...is the page on the same list as the page we will + * shuffle it with? + */ + if (page_order(page) != order) + return NULL; + + return page; +} + +/* + * Fisher-Yates shuffle the freelist which prescribes iterating through an + * array, pfns in this case, and randomly swapping each entry with another in + * the span, end_pfn - start_pfn. + * + * To keep the implementation simple it does not attempt to correct for sources + * of bias in the distribution, like modulo bias or pseudo-random number + * generator bias. I.e. the expectation is that this shuffling raises the bar + * for attacks that exploit the predictability of page allocations, but need not + * be a perfect shuffle. + */ +#define SHUFFLE_RETRY 10 +void __meminit __shuffle_zone(struct zone *z) +{ + unsigned long i, flags; + unsigned long start_pfn = z->zone_start_pfn; + unsigned long end_pfn = zone_end_pfn(z); + const int order = SHUFFLE_ORDER; + const int order_pages = 1 << order; + + spin_lock_irqsave(&z->lock, flags); + start_pfn = ALIGN(start_pfn, order_pages); + for (i = start_pfn; i < end_pfn; i += order_pages) { + unsigned long j; + int migratetype, retry; + struct page *page_i, *page_j; + + /* + * We expect page_i, in the sub-range of a zone being added + * (@start_pfn to @end_pfn), to more likely be valid compared to + * page_j randomly selected in the span @zone_start_pfn to + * @spanned_pages. + */ + page_i = shuffle_valid_page(i, order); + if (!page_i) + continue; + + for (retry = 0; retry < SHUFFLE_RETRY; retry++) { + /* + * Pick a random order aligned page in the zone span as + * a swap target. If the selected pfn is a hole, retry + * up to SHUFFLE_RETRY attempts find a random valid pfn + * in the zone. + */ + j = z->zone_start_pfn + + ALIGN_DOWN(get_random_long() % z->spanned_pages, + order_pages); + page_j = shuffle_valid_page(j, order); + if (page_j && page_j != page_i) + break; + } + if (retry >= SHUFFLE_RETRY) { + pr_debug("%s: failed to swap %#lx\n", __func__, i); + continue; + } + + /* + * Each migratetype corresponds to its own list, make sure the + * types match otherwise we're moving pages to lists where they + * do not belong. + */ + migratetype = get_pageblock_migratetype(page_i); + if (get_pageblock_migratetype(page_j) != migratetype) { + pr_debug("%s: migratetype mismatch %#lx\n", __func__, i); + continue; + } + + list_swap(&page_i->lru, &page_j->lru); + + pr_debug("%s: swap: %#lx -> %#lx\n", __func__, i, j); + + /* take it easy on the zone lock */ + if ((i % (100 * order_pages)) == 0) { + spin_unlock_irqrestore(&z->lock, flags); + cond_resched(); + spin_lock_irqsave(&z->lock, flags); + } + } + spin_unlock_irqrestore(&z->lock, flags); +} + +/** + * shuffle_free_memory - reduce the predictability of the page allocator + * @pgdat: node page data + */ +void __meminit __shuffle_free_memory(pg_data_t *pgdat) +{ + struct zone *z; + + for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) + shuffle_zone(z); +} + +void add_to_free_area_random(struct page *page, struct free_area *area, + int migratetype) +{ + static u64 rand; + static u8 rand_bits; + + /* + * The lack of locking is deliberate. If 2 threads race to + * update the rand state it just adds to the entropy. + */ + if (rand_bits == 0) { + rand_bits = 64; + rand = get_random_u64(); + } + + if (rand & 1) + add_to_free_area(page, area, migratetype); + else + add_to_free_area_tail(page, area, migratetype); + rand_bits--; + rand >>= 1; +} diff --git a/mm/shuffle.h b/mm/shuffle.h new file mode 100644 index 000000000000..777a257a0d2f --- /dev/null +++ b/mm/shuffle.h @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. +#ifndef _MM_SHUFFLE_H +#define _MM_SHUFFLE_H +#include <linux/jump_label.h> + +/* + * SHUFFLE_ENABLE is called from the command line enabling path, or by + * platform-firmware enabling that indicates the presence of a + * direct-mapped memory-side-cache. SHUFFLE_FORCE_DISABLE is called from + * the command line path and overrides any previous or future + * SHUFFLE_ENABLE. + */ +enum mm_shuffle_ctl { + SHUFFLE_ENABLE, + SHUFFLE_FORCE_DISABLE, +}; + +#define SHUFFLE_ORDER (MAX_ORDER-1) + +#ifdef CONFIG_SHUFFLE_PAGE_ALLOCATOR +DECLARE_STATIC_KEY_FALSE(page_alloc_shuffle_key); +extern void page_alloc_shuffle(enum mm_shuffle_ctl ctl); +extern void __shuffle_free_memory(pg_data_t *pgdat); +static inline void shuffle_free_memory(pg_data_t *pgdat) +{ + if (!static_branch_unlikely(&page_alloc_shuffle_key)) + return; + __shuffle_free_memory(pgdat); +} + +extern void __shuffle_zone(struct zone *z); +static inline void shuffle_zone(struct zone *z) +{ + if (!static_branch_unlikely(&page_alloc_shuffle_key)) + return; + __shuffle_zone(z); +} + +static inline bool is_shuffle_order(int order) +{ + if (!static_branch_unlikely(&page_alloc_shuffle_key)) + return false; + return order >= SHUFFLE_ORDER; +} +#else +static inline void shuffle_free_memory(pg_data_t *pgdat) +{ +} + +static inline void shuffle_zone(struct zone *z) +{ +} + +static inline void page_alloc_shuffle(enum mm_shuffle_ctl ctl) +{ +} + +static inline bool is_shuffle_order(int order) +{ + return false; +} +#endif +#endif /* _MM_SHUFFLE_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e5e9e1fcac01..67bbb8d2a0a8 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -633,7 +633,7 @@ static unsigned long lazy_max_pages(void) return log * (32UL * 1024 * 1024 / PAGE_SIZE); } -static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); +static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0); /* * Serialize vmap purging. There is no actual criticial section protected @@ -651,7 +651,7 @@ static void purge_fragmented_blocks_allcpus(void); */ void set_iounmap_nonlazy(void) { - atomic_set(&vmap_lazy_nr, lazy_max_pages()+1); + atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1); } /* @@ -659,34 +659,40 @@ void set_iounmap_nonlazy(void) */ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) { + unsigned long resched_threshold; struct llist_node *valist; struct vmap_area *va; struct vmap_area *n_va; - bool do_free = false; lockdep_assert_held(&vmap_purge_lock); valist = llist_del_all(&vmap_purge_list); + if (unlikely(valist == NULL)) + return false; + + /* + * TODO: to calculate a flush range without looping. + * The list can be up to lazy_max_pages() elements. + */ llist_for_each_entry(va, valist, purge_list) { if (va->va_start < start) start = va->va_start; if (va->va_end > end) end = va->va_end; - do_free = true; } - if (!do_free) - return false; - flush_tlb_kernel_range(start, end); + resched_threshold = lazy_max_pages() << 1; spin_lock(&vmap_area_lock); llist_for_each_entry_safe(va, n_va, valist, purge_list) { - int nr = (va->va_end - va->va_start) >> PAGE_SHIFT; + unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT; __free_vmap_area(va); - atomic_sub(nr, &vmap_lazy_nr); - cond_resched_lock(&vmap_area_lock); + atomic_long_sub(nr, &vmap_lazy_nr); + + if (atomic_long_read(&vmap_lazy_nr) < resched_threshold) + cond_resched_lock(&vmap_area_lock); } spin_unlock(&vmap_area_lock); return true; @@ -722,10 +728,10 @@ static void purge_vmap_area_lazy(void) */ static void free_vmap_area_noflush(struct vmap_area *va) { - int nr_lazy; + unsigned long nr_lazy; - nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT, - &vmap_lazy_nr); + nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >> + PAGE_SHIFT, &vmap_lazy_nr); /* After this point, we may free va at any time */ llist_add(&va->purge_list, &vmap_purge_list); diff --git a/mm/vmscan.c b/mm/vmscan.c index d96c54703948..7acd0afdfc2a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -346,7 +346,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone int zid; if (!mem_cgroup_disabled()) - lru_size = lruvec_page_state(lruvec, NR_LRU_BASE + lru); + lru_size = lruvec_page_state_local(lruvec, NR_LRU_BASE + lru); else lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); @@ -2150,7 +2150,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, * is being established. Disable active list protection to get * rid of the stale workingset quickly. */ - refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); + refaults = lruvec_page_state_local(lruvec, WORKINGSET_ACTIVATE); if (file && actual_reclaim && lruvec->refaults != refaults) { inactive_ratio = 0; } else { @@ -2912,7 +2912,7 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat) struct lruvec *lruvec; lruvec = mem_cgroup_lruvec(pgdat, memcg); - refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); + refaults = lruvec_page_state_local(lruvec, WORKINGSET_ACTIVATE); lruvec->refaults = refaults; } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); } diff --git a/mm/workingset.c b/mm/workingset.c index 6419baebd306..e0b4edcb88c8 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -430,9 +430,10 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, lruvec = mem_cgroup_lruvec(NODE_DATA(sc->nid), sc->memcg); for (pages = 0, i = 0; i < NR_LRU_LISTS; i++) - pages += lruvec_page_state(lruvec, NR_LRU_BASE + i); - pages += lruvec_page_state(lruvec, NR_SLAB_RECLAIMABLE); - pages += lruvec_page_state(lruvec, NR_SLAB_UNRECLAIMABLE); + pages += lruvec_page_state_local(lruvec, + NR_LRU_BASE + i); + pages += lruvec_page_state_local(lruvec, NR_SLAB_RECLAIMABLE); + pages += lruvec_page_state_local(lruvec, NR_SLAB_UNRECLAIMABLE); } else #endif pages = node_present_pages(sc->nid); |