diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 20 | ||||
-rw-r--r-- | mm/cleancache.c | 10 | ||||
-rw-r--r-- | mm/cma_debug.c | 25 | ||||
-rw-r--r-- | mm/compaction.c | 2 | ||||
-rw-r--r-- | mm/debug.c | 18 | ||||
-rw-r--r-- | mm/dmapool.c | 2 | ||||
-rw-r--r-- | mm/failslab.c | 2 | ||||
-rw-r--r-- | mm/frontswap.c | 11 | ||||
-rw-r--r-- | mm/gup.c | 2 | ||||
-rw-r--r-- | mm/huge_memory.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 8 | ||||
-rw-r--r-- | mm/init-mm.c | 11 | ||||
-rw-r--r-- | mm/kasan/kasan.c | 5 | ||||
-rw-r--r-- | mm/ksm.c | 14 | ||||
-rw-r--r-- | mm/memblock.c | 15 | ||||
-rw-r--r-- | mm/memcontrol.c | 27 | ||||
-rw-r--r-- | mm/memfd.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 31 | ||||
-rw-r--r-- | mm/mempolicy.c | 1 | ||||
-rw-r--r-- | mm/mmap.c | 67 | ||||
-rw-r--r-- | mm/mremap.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 12 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 26 | ||||
-rw-r--r-- | mm/page_idle.c | 2 | ||||
-rw-r--r-- | mm/page_owner.c | 4 | ||||
-rw-r--r-- | mm/rmap.c | 8 | ||||
-rw-r--r-- | mm/shmem.c | 57 | ||||
-rw-r--r-- | mm/slab_common.c | 41 | ||||
-rw-r--r-- | mm/slub.c | 7 | ||||
-rw-r--r-- | mm/swapfile.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 4 | ||||
-rw-r--r-- | mm/vmstat.c | 2 | ||||
-rw-r--r-- | mm/zsmalloc.c | 5 | ||||
-rw-r--r-- | mm/zswap.c | 47 |
35 files changed, 287 insertions, 211 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 347cc834c04a..2e5d3df0853d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -359,15 +359,8 @@ static void wb_shutdown(struct bdi_writeback *wb) spin_lock_bh(&wb->work_lock); if (!test_and_clear_bit(WB_registered, &wb->state)) { spin_unlock_bh(&wb->work_lock); - /* - * Wait for wb shutdown to finish if someone else is just - * running wb_shutdown(). Otherwise we could proceed to wb / - * bdi destruction before wb_shutdown() is finished. - */ - wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE); return; } - set_bit(WB_shutting_down, &wb->state); spin_unlock_bh(&wb->work_lock); cgwb_remove_from_bdi_list(wb); @@ -379,12 +372,6 @@ static void wb_shutdown(struct bdi_writeback *wb) mod_delayed_work(bdi_wq, &wb->dwork, 0); flush_delayed_work(&wb->dwork); WARN_ON(!list_empty(&wb->work_list)); - /* - * Make sure bit gets cleared after shutdown is finished. Matches with - * the barrier provided by test_and_clear_bit() above. - */ - smp_wmb(); - clear_and_wake_up_bit(WB_shutting_down, &wb->state); } static void wb_exit(struct bdi_writeback *wb) @@ -508,10 +495,12 @@ static void cgwb_release_workfn(struct work_struct *work) struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); + mutex_lock(&wb->bdi->cgwb_release_mutex); wb_shutdown(wb); css_put(wb->memcg_css); css_put(wb->blkcg_css); + mutex_unlock(&wb->bdi->cgwb_release_mutex); fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); @@ -697,6 +686,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; + mutex_init(&bdi->cgwb_release_mutex); ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (!ret) { @@ -717,7 +707,10 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) spin_lock_irq(&cgwb_lock); radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); + spin_unlock_irq(&cgwb_lock); + mutex_lock(&bdi->cgwb_release_mutex); + spin_lock_irq(&cgwb_lock); while (!list_empty(&bdi->wb_list)) { wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, bdi_node); @@ -726,6 +719,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) spin_lock_irq(&cgwb_lock); } spin_unlock_irq(&cgwb_lock); + mutex_unlock(&bdi->cgwb_release_mutex); } /** diff --git a/mm/cleancache.c b/mm/cleancache.c index 126548b5a292..2bf12da9baa0 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -307,12 +307,10 @@ static int __init init_cleancache(void) struct dentry *root = debugfs_create_dir("cleancache", NULL); if (root == NULL) return -ENXIO; - debugfs_create_u64("succ_gets", S_IRUGO, root, &cleancache_succ_gets); - debugfs_create_u64("failed_gets", S_IRUGO, - root, &cleancache_failed_gets); - debugfs_create_u64("puts", S_IRUGO, root, &cleancache_puts); - debugfs_create_u64("invalidates", S_IRUGO, - root, &cleancache_invalidates); + debugfs_create_u64("succ_gets", 0444, root, &cleancache_succ_gets); + debugfs_create_u64("failed_gets", 0444, root, &cleancache_failed_gets); + debugfs_create_u64("puts", 0444, root, &cleancache_puts); + debugfs_create_u64("invalidates", 0444, root, &cleancache_invalidates); #endif return 0; } diff --git a/mm/cma_debug.c b/mm/cma_debug.c index 275df8b5b22e..f23467291cfb 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -172,23 +172,18 @@ static void cma_debugfs_add_one(struct cma *cma, int idx) tmp = debugfs_create_dir(name, cma_debugfs_root); - debugfs_create_file("alloc", S_IWUSR, tmp, cma, - &cma_alloc_fops); - - debugfs_create_file("free", S_IWUSR, tmp, cma, - &cma_free_fops); - - debugfs_create_file("base_pfn", S_IRUGO, tmp, - &cma->base_pfn, &cma_debugfs_fops); - debugfs_create_file("count", S_IRUGO, tmp, - &cma->count, &cma_debugfs_fops); - debugfs_create_file("order_per_bit", S_IRUGO, tmp, - &cma->order_per_bit, &cma_debugfs_fops); - debugfs_create_file("used", S_IRUGO, tmp, cma, &cma_used_fops); - debugfs_create_file("maxchunk", S_IRUGO, tmp, cma, &cma_maxchunk_fops); + debugfs_create_file("alloc", 0200, tmp, cma, &cma_alloc_fops); + debugfs_create_file("free", 0200, tmp, cma, &cma_free_fops); + debugfs_create_file("base_pfn", 0444, tmp, + &cma->base_pfn, &cma_debugfs_fops); + debugfs_create_file("count", 0444, tmp, &cma->count, &cma_debugfs_fops); + debugfs_create_file("order_per_bit", 0444, tmp, + &cma->order_per_bit, &cma_debugfs_fops); + debugfs_create_file("used", 0444, tmp, cma, &cma_used_fops); + debugfs_create_file("maxchunk", 0444, tmp, cma, &cma_maxchunk_fops); u32s = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32)); - debugfs_create_u32_array("bitmap", S_IRUGO, tmp, (u32*)cma->bitmap, u32s); + debugfs_create_u32_array("bitmap", 0444, tmp, (u32 *)cma->bitmap, u32s); } static int __init cma_debugfs_init(void) diff --git a/mm/compaction.c b/mm/compaction.c index 29bd1df18b98..faca45ebe62d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1899,7 +1899,7 @@ static ssize_t sysfs_compact_node(struct device *dev, return count; } -static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node); +static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node); int compaction_register_node(struct node *node) { diff --git a/mm/debug.c b/mm/debug.c index 56e2d9125ea5..38c926520c97 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -43,12 +43,25 @@ const struct trace_print_flags vmaflag_names[] = { void __dump_page(struct page *page, const char *reason) { + bool page_poisoned = PagePoisoned(page); + int mapcount; + + /* + * If struct page is poisoned don't access Page*() functions as that + * leads to recursive loop. Page*() check for poisoned pages, and calls + * dump_page() when detected. + */ + if (page_poisoned) { + pr_emerg("page:%px is uninitialized and poisoned", page); + goto hex_only; + } + /* * Avoid VM_BUG_ON() in page_mapcount(). * page->_mapcount space in struct page is used by sl[aou]b pages to * encode own info. */ - int mapcount = PageSlab(page) ? 0 : page_mapcount(page); + mapcount = PageSlab(page) ? 0 : page_mapcount(page); pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx", page, page_ref_count(page), mapcount, @@ -60,6 +73,7 @@ void __dump_page(struct page *page, const char *reason) pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags); +hex_only: print_hex_dump(KERN_ALERT, "raw: ", DUMP_PREFIX_NONE, 32, sizeof(unsigned long), page, sizeof(struct page), false); @@ -68,7 +82,7 @@ void __dump_page(struct page *page, const char *reason) pr_alert("page dumped because: %s\n", reason); #ifdef CONFIG_MEMCG - if (page->mem_cgroup) + if (!page_poisoned && page->mem_cgroup) pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup); #endif } diff --git a/mm/dmapool.c b/mm/dmapool.c index 4d90a64b2fdc..6d4b97e7e9e9 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -105,7 +105,7 @@ show_pools(struct device *dev, struct device_attribute *attr, char *buf) return PAGE_SIZE - size; } -static DEVICE_ATTR(pools, S_IRUGO, show_pools, NULL); +static DEVICE_ATTR(pools, 0444, show_pools, NULL); /** * dma_pool_create - Creates a pool of consistent memory blocks, for dma. diff --git a/mm/failslab.c b/mm/failslab.c index 1f2f248e3601..b135ebb88b6f 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -42,7 +42,7 @@ __setup("failslab=", setup_failslab); static int __init failslab_debugfs_init(void) { struct dentry *dir; - umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + umode_t mode = S_IFREG | 0600; dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr); if (IS_ERR(dir)) diff --git a/mm/frontswap.c b/mm/frontswap.c index 4f5476a0f955..157e5bf63504 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -486,12 +486,11 @@ static int __init init_frontswap(void) struct dentry *root = debugfs_create_dir("frontswap", NULL); if (root == NULL) return -ENXIO; - debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads); - debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores); - debugfs_create_u64("failed_stores", S_IRUGO, root, - &frontswap_failed_stores); - debugfs_create_u64("invalidates", S_IRUGO, - root, &frontswap_invalidates); + debugfs_create_u64("loads", 0444, root, &frontswap_loads); + debugfs_create_u64("succ_stores", 0444, root, &frontswap_succ_stores); + debugfs_create_u64("failed_stores", 0444, root, + &frontswap_failed_stores); + debugfs_create_u64("invalidates", 0444, root, &frontswap_invalidates); #endif return 0; } @@ -1238,8 +1238,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) int locked = 0; long ret = 0; - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(len != PAGE_ALIGN(len)); end = start + len; for (nstart = start; nstart < end; nstart = nend) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1cd7c1a57a14..25346bd99364 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2084,6 +2084,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, if (vma_is_dax(vma)) return; page = pmd_page(_pmd); + if (!PageDirty(page) && pmd_dirty(_pmd)) + set_page_dirty(page); if (!PageReferenced(page) && pmd_young(_pmd)) SetPageReferenced(page); page_remove_rmap(page, true); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3612fbb32e9d..3103099f64fd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2163,6 +2163,7 @@ static void __init gather_bootmem_prealloc(void) */ if (hstate_is_gigantic(h)) adjust_managed_page_count(page, 1 << h->order); + cond_resched(); } } @@ -3166,6 +3167,13 @@ static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf) return 0; } +/* + * When a new function is introduced to vm_operations_struct and added + * to hugetlb_vm_ops, please consider adding the function to shm_vm_ops. + * This is because under System V memory model, mappings created via + * shmget/shmat with "huge page" specified are backed by hugetlbfs files, + * their original vm_ops are overwritten with shm_vm_ops. + */ const struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, diff --git a/mm/init-mm.c b/mm/init-mm.c index f0179c9c04c2..a787a319211e 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -15,6 +15,16 @@ #define INIT_MM_CONTEXT(name) #endif +/* + * For dynamically allocated mm_structs, there is a dynamically sized cpumask + * at the end of the structure, the size of which depends on the maximum CPU + * number the system can see. That way we allocate only as much memory for + * mm_cpumask() as needed for the hundreds, or thousands of processes that + * a system typically runs. + * + * Since there is only one init_mm in the entire system, keep it simple + * and size this cpu_bitmask to NR_CPUS. + */ struct mm_struct init_mm = { .mm_rb = RB_ROOT, .pgd = swapper_pg_dir, @@ -25,5 +35,6 @@ struct mm_struct init_mm = { .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns = &init_user_ns, + .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, INIT_MM_CONTEXT(init_mm) }; diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index f185455b3406..c3bd5209da38 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -619,12 +619,13 @@ void kasan_kfree_large(void *ptr, unsigned long ip) int kasan_module_alloc(void *addr, size_t size) { void *ret; + size_t scaled_size; size_t shadow_size; unsigned long shadow_start; shadow_start = (unsigned long)kasan_mem_to_shadow(addr); - shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, - PAGE_SIZE); + scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT; + shadow_size = round_up(scaled_size, PAGE_SIZE); if (WARN_ON(!PAGE_ALIGNED(shadow_start))) return -EINVAL; @@ -216,6 +216,8 @@ struct rmap_item { #define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */ #define UNSTABLE_FLAG 0x100 /* is a node of the unstable tree */ #define STABLE_FLAG 0x200 /* is listed from the stable tree */ +#define KSM_FLAG_MASK (SEQNR_MASK|UNSTABLE_FLAG|STABLE_FLAG) + /* to mask all the flags */ /* The stable and unstable tree heads */ static struct rb_root one_stable_tree[1] = { RB_ROOT }; @@ -2598,10 +2600,15 @@ again: anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { + unsigned long addr; + cond_resched(); vma = vmac->vma; - if (rmap_item->address < vma->vm_start || - rmap_item->address >= vma->vm_end) + + /* Ignore the stable/unstable/sqnr flags */ + addr = rmap_item->address & ~KSM_FLAG_MASK; + + if (addr < vma->vm_start || addr >= vma->vm_end) continue; /* * Initially we examine only the vma which covers this @@ -2615,8 +2622,7 @@ again: if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; - if (!rwc->rmap_one(page, vma, - rmap_item->address, rwc->arg)) { + if (!rwc->rmap_one(page, vma, addr, rwc->arg)) { anon_vma_unlock_read(anon_vma); return; } diff --git a/mm/memblock.c b/mm/memblock.c index 93ad42bc8a73..4b5d245fafc1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,6 +20,7 @@ #include <linux/kmemleak.h> #include <linux/seq_file.h> #include <linux/memblock.h> +#include <linux/bootmem.h> #include <asm/sections.h> #include <linux/io.h> @@ -227,7 +228,8 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, * so we use WARN_ONCE() here to see the stack trace if * fail happens. */ - WARN_ONCE(1, "memblock: bottom-up allocation failed, memory hotunplug may be affected\n"); + WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE), + "memblock: bottom-up allocation failed, memory hotremove may be affected\n"); } return __memblock_find_range_top_down(start, end, size, align, nid, @@ -1224,6 +1226,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } +#if defined(CONFIG_NO_BOOTMEM) /** * memblock_virt_alloc_internal - allocate boot memory block * @size: size of memory block to be allocated in bytes @@ -1431,6 +1434,7 @@ void * __init memblock_virt_alloc_try_nid( (u64)max_addr); return NULL; } +#endif /** * __memblock_free_early - free boot memory block @@ -1808,10 +1812,13 @@ static int __init memblock_init_debugfs(void) struct dentry *root = debugfs_create_dir("memblock", NULL); if (!root) return -ENXIO; - debugfs_create_file("memory", S_IRUGO, root, &memblock.memory, &memblock_debug_fops); - debugfs_create_file("reserved", S_IRUGO, root, &memblock.reserved, &memblock_debug_fops); + debugfs_create_file("memory", 0444, root, + &memblock.memory, &memblock_debug_fops); + debugfs_create_file("reserved", 0444, root, + &memblock.reserved, &memblock_debug_fops); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - debugfs_create_file("physmem", S_IRUGO, root, &memblock.physmem, &memblock_debug_fops); + debugfs_create_file("physmem", 0444, root, + &memblock.physmem, &memblock_debug_fops); #endif return 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c1e64d60ed02..b2173f7e5164 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -850,7 +850,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) int nid; int i; - while ((memcg = parent_mem_cgroup(memcg))) { + for (; memcg; memcg = parent_mem_cgroup(memcg)) { for_each_node(nid) { mz = mem_cgroup_nodeinfo(memcg, nid); for (i = 0; i <= DEF_PRIORITY; i++) { @@ -3550,7 +3550,8 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable); seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); - seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL)); + seq_printf(sf, "oom_kill %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); return 0; } @@ -4036,6 +4037,14 @@ static struct cftype mem_cgroup_legacy_files[] = { static DEFINE_IDR(mem_cgroup_idr); +static void mem_cgroup_id_remove(struct mem_cgroup *memcg) +{ + if (memcg->id.id > 0) { + idr_remove(&mem_cgroup_idr, memcg->id.id); + memcg->id.id = 0; + } +} + static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0); @@ -4046,8 +4055,7 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) < n); if (atomic_sub_and_test(n, &memcg->id.ref)) { - idr_remove(&mem_cgroup_idr, memcg->id.id); - memcg->id.id = 0; + mem_cgroup_id_remove(memcg); /* Memcg ID pins CSS */ css_put(&memcg->css); @@ -4184,8 +4192,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); return memcg; fail: - if (memcg->id.id > 0) - idr_remove(&mem_cgroup_idr, memcg->id.id); + mem_cgroup_id_remove(memcg); __mem_cgroup_free(memcg); return NULL; } @@ -4244,6 +4251,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &memcg->css; fail: + mem_cgroup_id_remove(memcg); mem_cgroup_free(memcg); return ERR_PTR(-ENOMEM); } @@ -5239,7 +5247,8 @@ static int memory_events_show(struct seq_file *m, void *v) atomic_long_read(&memcg->memory_events[MEMCG_MAX])); seq_printf(m, "oom %lu\n", atomic_long_read(&memcg->memory_events[MEMCG_OOM])); - seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL)); + seq_printf(m, "oom_kill %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); return 0; } @@ -5480,6 +5489,10 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, elow = memcg->memory.low; parent = parent_mem_cgroup(memcg); + /* No parent means a non-hierarchical mode on v1 memcg */ + if (!parent) + return MEMCG_PROT_NONE; + if (parent == root) goto exit; diff --git a/mm/memfd.c b/mm/memfd.c index 27069518e3c5..2bb5e257080e 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -326,7 +326,7 @@ SYSCALL_DEFINE2(memfd_create, goto err_fd; } file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; - file->f_flags |= O_RDWR | O_LARGEFILE; + file->f_flags |= O_LARGEFILE; if (flags & MFD_ALLOW_SEALING) { file_seals = memfd_file_seals_ptr(file); diff --git a/mm/memory.c b/mm/memory.c index 7206a634270b..3d0a74ab70f2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -326,16 +326,20 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ #ifdef CONFIG_HAVE_RCU_TABLE_FREE -/* - * See the comment near struct mmu_table_batch. - */ - static void tlb_remove_table_smp_sync(void *arg) { - /* Simply deliver the interrupt */ + struct mm_struct __maybe_unused *mm = arg; + /* + * On most architectures this does nothing. Simply delivering the + * interrupt is enough to prevent races with software page table + * walking like that done in get_user_pages_fast. + * + * See the comment near struct mmu_table_batch. + */ + tlb_flush_remove_tables_local(mm); } -static void tlb_remove_table_one(void *table) +static void tlb_remove_table_one(void *table, struct mmu_gather *tlb) { /* * This isn't an RCU grace period and hence the page-tables cannot be @@ -344,7 +348,7 @@ static void tlb_remove_table_one(void *table) * It is however sufficient for software page-table walkers that rely on * IRQ disabling. See the comment near struct mmu_table_batch. */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1); __tlb_remove_table(table); } @@ -365,6 +369,8 @@ void tlb_table_flush(struct mmu_gather *tlb) { struct mmu_table_batch **batch = &tlb->batch; + tlb_flush_remove_tables(tlb->mm); + if (*batch) { call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; @@ -387,7 +393,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) if (*batch == NULL) { *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { - tlb_remove_table_one(table); + tlb_remove_table_one(table, tlb); return; } (*batch)->nr = 0; @@ -1417,11 +1423,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, do { next = pmd_addr_end(addr, end); if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { - if (next - addr != HPAGE_PMD_SIZE) { - VM_BUG_ON_VMA(vma_is_anonymous(vma) && - !rwsem_is_locked(&tlb->mm->mmap_sem), vma); + if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false, NULL); - } else if (zap_huge_pmd(tlb, vma, pmd, addr)) + else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ } @@ -4397,6 +4401,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); + if (!maddr) + return -ENOMEM; + if (write) memcpy_toio(maddr + offset, buf, len); else diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9ac49ef17b4e..01f1a14facc4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2505,6 +2505,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) /* Create pseudo-vma that contains just the policy */ memset(&pvma, 0, sizeof(struct vm_area_struct)); + vma_init(&pvma, NULL); pvma.vm_end = TASK_SIZE; /* policy covers entire file */ mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ diff --git a/mm/mmap.c b/mm/mmap.c index d1eb87ef4b1a..17bbf4d3e24f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -182,12 +182,12 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return next; } -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf); - +static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, + struct list_head *uf); SYSCALL_DEFINE1(brk, unsigned long, brk) { unsigned long retval; @@ -245,7 +245,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto out; /* Ok, looks good - let it rip. */ - if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0) + if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0) goto out; set_brk: @@ -911,7 +911,7 @@ again: anon_vma_merge(vma, next); mm->map_count--; mpol_put(vma_policy(next)); - kmem_cache_free(vm_area_cachep, next); + vm_area_free(next); /* * In mprotect's case 6 (see comments on vma_merge), * we must remove another next too. It would clutter @@ -1729,19 +1729,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * specific mapper. the address has already been validated, but * not unmapped, but the maps are removed from the list. */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(mm); if (!vma) { error = -ENOMEM; goto unacct_error; } - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); if (file) { if (vm_flags & VM_DENYWRITE) { @@ -1780,6 +1778,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, error = shmem_zero_setup(vma); if (error) goto free_vma; + } else { + vma_set_anonymous(vma); } vma_link(mm, vma, prev, rb_link, rb_parent); @@ -1832,7 +1832,7 @@ allow_write_and_free_vma: if (vm_flags & VM_DENYWRITE) allow_write_access(file); free_vma: - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); unacct_error: if (charged) vm_unacct_memory(charged); @@ -2620,15 +2620,10 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, return err; } - new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + new = vm_area_dup(vma); if (!new) return -ENOMEM; - /* most fields are the same, copy all, and then fixup */ - *new = *vma; - - INIT_LIST_HEAD(&new->anon_vma_chain); - if (new_below) new->vm_end = addr; else { @@ -2669,7 +2664,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, out_free_mpol: mpol_put(vma_policy(new)); out_free_vma: - kmem_cache_free(vm_area_cachep, new); + vm_area_free(new); return err; } @@ -2929,21 +2924,14 @@ static inline void verify_mm_writelocked(struct mm_struct *mm) * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ -static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf) +static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - unsigned long len; struct rb_node **rb_link, *rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; - len = PAGE_ALIGN(request); - if (len < request) - return -ENOMEM; - if (!len) - return 0; - /* Until we need other flags, refuse anything except VM_EXEC. */ if ((flags & (~VM_EXEC)) != 0) return -EINVAL; @@ -2991,14 +2979,13 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long /* * create a vma struct for an anonymous mapping */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(mm); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); return -ENOMEM; } - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = mm; + vma_set_anonymous(vma); vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; @@ -3015,18 +3002,20 @@ out: return 0; } -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf) -{ - return do_brk_flags(addr, len, 0, uf); -} - -int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags) +int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) { struct mm_struct *mm = current->mm; + unsigned long len; int ret; bool populate; LIST_HEAD(uf); + len = PAGE_ALIGN(request); + if (len < request) + return -ENOMEM; + if (!len) + return 0; + if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -3207,16 +3196,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, } *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff); } else { - new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + new_vma = vm_area_dup(vma); if (!new_vma) goto out; - *new_vma = *vma; new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; if (vma_dup_policy(vma, new_vma)) goto out_free_vma; - INIT_LIST_HEAD(&new_vma->anon_vma_chain); if (anon_vma_clone(new_vma, vma)) goto out_free_mempol; if (new_vma->vm_file) @@ -3231,7 +3218,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, out_free_mempol: mpol_put(vma_policy(new_vma)); out_free_vma: - kmem_cache_free(vm_area_cachep, new_vma); + vm_area_free(new_vma); out: return NULL; } @@ -3355,12 +3342,10 @@ static struct vm_area_struct *__install_special_mapping( int ret; struct vm_area_struct *vma; - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(mm); if (unlikely(vma == NULL)) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; @@ -3381,7 +3366,7 @@ static struct vm_area_struct *__install_special_mapping( return vma; out: - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return ERR_PTR(ret); } diff --git a/mm/mremap.c b/mm/mremap.c index 049470aa1e3e..5c2e18505f75 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -191,8 +191,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } -#define LATENCY_LIMIT (64 * PAGE_SIZE) - unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, @@ -247,8 +245,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, next = (new_addr + PMD_SIZE) & PMD_MASK; if (extent > next - new_addr) extent = next - new_addr; - if (extent > LATENCY_LIMIT) - extent = LATENCY_LIMIT; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, new_pmd, new_addr, need_rmap_locks, &need_flush); } diff --git a/mm/nommu.c b/mm/nommu.c index 4452d8bd9ae4..9fc9e43335b6 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -769,7 +769,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); put_nommu_region(vma->vm_region); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); } /* @@ -1145,6 +1145,8 @@ static int do_mmap_private(struct vm_area_struct *vma, if (ret < len) memset(base + ret, 0, len - ret); + } else { + vma_set_anonymous(vma); } return 0; @@ -1204,7 +1206,7 @@ unsigned long do_mmap(struct file *file, if (!region) goto error_getting_region; - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(current->mm); if (!vma) goto error_getting_vma; @@ -1212,7 +1214,6 @@ unsigned long do_mmap(struct file *file, region->vm_flags = vm_flags; region->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_flags = vm_flags; vma->vm_pgoff = pgoff; @@ -1368,7 +1369,7 @@ error: kmem_cache_free(vm_region_jar, region); if (vma->vm_file) fput(vma->vm_file); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return ret; sharing_violation: @@ -1469,14 +1470,13 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (!region) return -ENOMEM; - new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + new = vm_area_dup(vma); if (!new) { kmem_cache_free(vm_region_jar, region); return -ENOMEM; } /* most fields are the same, copy all, and then fixup */ - *new = *vma; *region = *vma->vm_region; new->vm_region = region; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6694348b27e9..84081e77bc51 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -913,7 +913,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) /* Raise event before sending signal: task reaper must see this */ count_vm_event(OOM_KILL); - count_memcg_event_mm(mm, OOM_KILL); + memcg_memory_event_mm(mm, MEMCG_OOM_KILL); /* * We should send SIGKILL before granting access to memory reserves diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07b3c23762ad..3222193c46c6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3061,7 +3061,7 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) static int __init fail_page_alloc_debugfs(void) { - umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + umode_t mode = S_IFREG | 0600; struct dentry *dir; dir = fault_create_debugfs_attr("fail_page_alloc", NULL, @@ -6383,7 +6383,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, free_area_init_core(pgdat); } -#ifdef CONFIG_HAVE_MEMBLOCK +#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP) /* * Only struct pages that are backed by physical memory are zeroed and * initialized by going through __init_single_page(). But, there are some @@ -6421,7 +6421,7 @@ void __paginginit zero_resv_unavail(void) if (pgcnt) pr_info("Reserved but unavailable: %lld pages", pgcnt); } -#endif /* CONFIG_HAVE_MEMBLOCK */ +#endif /* CONFIG_HAVE_MEMBLOCK && !CONFIG_FLAT_NODE_MEM_MAP */ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP @@ -6847,6 +6847,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); + zero_resv_unavail(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); free_area_init_node(nid, NULL, @@ -6857,7 +6858,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) node_set_state(nid, N_MEMORY); check_for_memory(pgdat, nid); } - zero_resv_unavail(); } static int __init cmdline_parse_core(char *p, unsigned long *core, @@ -6939,9 +6939,21 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s) start = (void *)PAGE_ALIGN((unsigned long)start); end = (void *)((unsigned long)end & PAGE_MASK); for (pos = start; pos < end; pos += PAGE_SIZE, pages++) { + struct page *page = virt_to_page(pos); + void *direct_map_addr; + + /* + * 'direct_map_addr' might be different from 'pos' + * because some architectures' virt_to_page() + * work with aliases. Getting the direct map + * address ensures that we get a _writeable_ + * alias for the memset(). + */ + direct_map_addr = page_address(page); if ((unsigned int)poison <= 0xFF) - memset(pos, poison, PAGE_SIZE); - free_reserved_page(virt_to_page(pos)); + memset(direct_map_addr, poison, PAGE_SIZE); + + free_reserved_page(page); } if (pages && s) @@ -7033,9 +7045,9 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) void __init free_area_init(unsigned long *zones_size) { + zero_resv_unavail(); free_area_init_node(0, zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); - zero_resv_unavail(); } static int page_alloc_cpu_dead(unsigned int cpu) diff --git a/mm/page_idle.c b/mm/page_idle.c index e412a63b2b74..6302bc62c27d 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -201,7 +201,7 @@ static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj, } static struct bin_attribute page_idle_bitmap_attr = - __BIN_ATTR(bitmap, S_IRUSR | S_IWUSR, + __BIN_ATTR(bitmap, 0600, page_idle_bitmap_read, page_idle_bitmap_write, 0); static struct bin_attribute *page_idle_bin_attrs[] = { diff --git a/mm/page_owner.c b/mm/page_owner.c index 75d21a2259b3..d80adfe702d3 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -631,8 +631,8 @@ static int __init pageowner_init(void) return 0; } - dentry = debugfs_create_file("page_owner", S_IRUSR, NULL, - NULL, &proc_page_owner_operations); + dentry = debugfs_create_file("page_owner", 0400, NULL, + NULL, &proc_page_owner_operations); return PTR_ERR_OR_ZERO(dentry); } diff --git a/mm/rmap.c b/mm/rmap.c index 6db729dc4c50..eb477809a5c0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -64,6 +64,7 @@ #include <linux/backing-dev.h> #include <linux/page_idle.h> #include <linux/memremap.h> +#include <linux/userfaultfd_k.h> #include <asm/tlbflush.h> @@ -1481,11 +1482,16 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, set_pte_at(mm, address, pvmw.pte, pteval); } - } else if (pte_unused(pteval)) { + } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan * will take care of the rest. + * A future reference will then fault in a new zero + * page. When userfaultfd is active, we must not drop + * this page though, as its main user (postcopy + * migration) will not expect userfaults on already + * copied pages. */ dec_mm_counter(mm, mm_counter(page)); /* We have to invalidate as we cleared the pte */ diff --git a/mm/shmem.c b/mm/shmem.c index e9a7ac74823d..96bcc51fb9ec 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1421,6 +1421,7 @@ static void shmem_pseudo_vma_init(struct vm_area_struct *vma, { /* Create a pseudo vma that just contains the policy */ memset(vma, 0, sizeof(*vma)); + vma_init(vma, NULL); /* Bias interleave by inode number to distribute better across nodes */ vma->vm_pgoff = index + info->vfs_inode.i_ino; vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); @@ -3013,7 +3014,8 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s if (len > PAGE_SIZE) return -ENAMETOOLONG; - inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE); + inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK | 0777, 0, + VM_NORESERVE); if (!inode) return -ENOSPC; @@ -3445,7 +3447,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) sbinfo->max_blocks << (PAGE_SHIFT - 10)); if (sbinfo->max_inodes != shmem_default_max_inodes()) seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); - if (sbinfo->mode != (S_IRWXUGO | S_ISVTX)) + if (sbinfo->mode != (0777 | S_ISVTX)) seq_printf(seq, ",mode=%03ho", sbinfo->mode); if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) seq_printf(seq, ",uid=%u", @@ -3486,7 +3488,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) if (!sbinfo) return -ENOMEM; - sbinfo->mode = S_IRWXUGO | S_ISVTX; + sbinfo->mode = 0777 | S_ISVTX; sbinfo->uid = current_fsuid(); sbinfo->gid = current_fsgid(); sb->s_fs_info = sbinfo; @@ -3895,18 +3897,11 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); /* common code */ -static const struct dentry_operations anon_ops = { - .d_dname = simple_dname -}; - static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size, unsigned long flags, unsigned int i_flags) { - struct file *res; struct inode *inode; - struct path path; - struct super_block *sb; - struct qstr this; + struct file *res; if (IS_ERR(mnt)) return ERR_CAST(mnt); @@ -3917,41 +3912,21 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l if (shmem_acct_size(flags, size)) return ERR_PTR(-ENOMEM); - res = ERR_PTR(-ENOMEM); - this.name = name; - this.len = strlen(name); - this.hash = 0; /* will go */ - sb = mnt->mnt_sb; - path.mnt = mntget(mnt); - path.dentry = d_alloc_pseudo(sb, &this); - if (!path.dentry) - goto put_memory; - d_set_d_op(path.dentry, &anon_ops); - - res = ERR_PTR(-ENOSPC); - inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); - if (!inode) - goto put_memory; - + inode = shmem_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0, + flags); + if (unlikely(!inode)) { + shmem_unacct_size(flags, size); + return ERR_PTR(-ENOSPC); + } inode->i_flags |= i_flags; - d_instantiate(path.dentry, inode); inode->i_size = size; clear_nlink(inode); /* It is unlinked */ res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); + if (!IS_ERR(res)) + res = alloc_file_pseudo(inode, mnt, name, O_RDWR, + &shmem_file_operations); if (IS_ERR(res)) - goto put_path; - - res = alloc_file(&path, FMODE_WRITE | FMODE_READ, - &shmem_file_operations); - if (IS_ERR(res)) - goto put_path; - - return res; - -put_memory: - shmem_unacct_size(flags, size); -put_path: - path_put(&path); + iput(inode); return res; } diff --git a/mm/slab_common.c b/mm/slab_common.c index 98dcdc352062..2296caf87bfb 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -136,6 +136,7 @@ void slab_init_memcg_params(struct kmem_cache *s) s->memcg_params.root_cache = NULL; RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL); INIT_LIST_HEAD(&s->memcg_params.children); + s->memcg_params.dying = false; } static int init_memcg_params(struct kmem_cache *s, @@ -566,10 +567,14 @@ static int shutdown_cache(struct kmem_cache *s) list_del(&s->list); if (s->flags & SLAB_TYPESAFE_BY_RCU) { +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); +#endif list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { #ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); sysfs_slab_release(s); #else slab_kmem_cache_release(s); @@ -608,7 +613,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, * The memory cgroup could have been offlined while the cache * creation work was pending. */ - if (memcg->kmem_state != KMEM_ONLINE) + if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying) goto out_unlock; idx = memcg_cache_id(memcg); @@ -712,6 +717,9 @@ void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s, WARN_ON_ONCE(s->memcg_params.deact_fn)) return; + if (s->memcg_params.root_cache->memcg_params.dying) + return; + /* pin memcg so that @s doesn't get destroyed in the middle */ css_get(&s->memcg_params.memcg->css); @@ -823,11 +831,36 @@ static int shutdown_memcg_caches(struct kmem_cache *s) return -EBUSY; return 0; } + +static void flush_memcg_workqueue(struct kmem_cache *s) +{ + mutex_lock(&slab_mutex); + s->memcg_params.dying = true; + mutex_unlock(&slab_mutex); + + /* + * SLUB deactivates the kmem_caches through call_rcu_sched. Make + * sure all registered rcu callbacks have been invoked. + */ + if (IS_ENABLED(CONFIG_SLUB)) + rcu_barrier_sched(); + + /* + * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB + * deactivates the memcg kmem_caches through workqueue. Make sure all + * previous workitems on workqueue are processed. + */ + flush_workqueue(memcg_kmem_cache_wq); +} #else static inline int shutdown_memcg_caches(struct kmem_cache *s) { return 0; } + +static inline void flush_memcg_workqueue(struct kmem_cache *s) +{ +} #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ void slab_kmem_cache_release(struct kmem_cache *s) @@ -845,6 +878,8 @@ void kmem_cache_destroy(struct kmem_cache *s) if (unlikely(!s)) return; + flush_memcg_workqueue(s); + get_online_cpus(); get_online_mems(); @@ -1212,9 +1247,9 @@ void cache_random_seq_destroy(struct kmem_cache *cachep) #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) #ifdef CONFIG_SLAB -#define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) +#define SLABINFO_RIGHTS (0600) #else -#define SLABINFO_RIGHTS S_IRUSR +#define SLABINFO_RIGHTS (0400) #endif static void print_slabinfo_header(struct seq_file *m) diff --git a/mm/slub.c b/mm/slub.c index a3b8467c14af..51258eff4178 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5667,7 +5667,6 @@ static void sysfs_slab_remove_workfn(struct work_struct *work) kset_unregister(s->memcg_kset); #endif kobject_uevent(&s->kobj, KOBJ_REMOVE); - kobject_del(&s->kobj); out: kobject_put(&s->kobj); } @@ -5752,6 +5751,12 @@ static void sysfs_slab_remove(struct kmem_cache *s) schedule_work(&s->kobj_remove_work); } +void sysfs_slab_unlink(struct kmem_cache *s) +{ + if (slab_state >= FULL) + kobject_del(&s->kobj); +} + void sysfs_slab_release(struct kmem_cache *s) { if (slab_state >= FULL) diff --git a/mm/swapfile.c b/mm/swapfile.c index 925cf795a652..2cc2972eedaf 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -100,7 +100,7 @@ atomic_t nr_rotate_swap = ATOMIC_INIT(0); static inline unsigned char swap_count(unsigned char ent) { - return ent & ~SWAP_HAS_CACHE; /* may include SWAP_HAS_CONT flag */ + return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ } /* returns 1 if swap entry is freed */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 89efac3a020e..cfea25be7754 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2741,11 +2741,11 @@ static const struct seq_operations vmalloc_op = { static int __init proc_vmalloc_init(void) { if (IS_ENABLED(CONFIG_NUMA)) - proc_create_seq_private("vmallocinfo", S_IRUSR, NULL, + proc_create_seq_private("vmallocinfo", 0400, NULL, &vmalloc_op, nr_node_ids * sizeof(unsigned int), NULL); else - proc_create_seq("vmallocinfo", S_IRUSR, NULL, &vmalloc_op); + proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op); return 0; } module_init(proc_vmalloc_init); diff --git a/mm/vmstat.c b/mm/vmstat.c index 75eda9c2b260..8ba0870ecddd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1796,11 +1796,9 @@ static void vmstat_update(struct work_struct *w) * to occur in the future. Keep on running the * update worker thread. */ - preempt_disable(); queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, this_cpu_ptr(&vmstat_work), round_jiffies_relative(sysctl_stat_interval)); - preempt_enable(); } } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 61cb05dc950c..8d87e973a4f5 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -661,8 +661,9 @@ static void zs_pool_stat_create(struct zs_pool *pool, const char *name) } pool->stat_dentry = entry; - entry = debugfs_create_file("classes", S_IFREG | S_IRUGO, - pool->stat_dentry, pool, &zs_stats_size_fops); + entry = debugfs_create_file("classes", S_IFREG | 0444, + pool->stat_dentry, pool, + &zs_stats_size_fops); if (!entry) { pr_warn("%s: debugfs file entry <%s> creation failed\n", name, "classes"); diff --git a/mm/zswap.c b/mm/zswap.c index 61a5c41972db..cd91fd9d96b8 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1026,6 +1026,15 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, ret = -ENOMEM; goto reject; } + + /* A second zswap_is_full() check after + * zswap_shrink() to make sure it's now + * under the max_pool_percent + */ + if (zswap_is_full()) { + ret = -ENOMEM; + goto reject; + } } /* allocate entry */ @@ -1256,26 +1265,26 @@ static int __init zswap_debugfs_init(void) if (!zswap_debugfs_root) return -ENOMEM; - debugfs_create_u64("pool_limit_hit", S_IRUGO, - zswap_debugfs_root, &zswap_pool_limit_hit); - debugfs_create_u64("reject_reclaim_fail", S_IRUGO, - zswap_debugfs_root, &zswap_reject_reclaim_fail); - debugfs_create_u64("reject_alloc_fail", S_IRUGO, - zswap_debugfs_root, &zswap_reject_alloc_fail); - debugfs_create_u64("reject_kmemcache_fail", S_IRUGO, - zswap_debugfs_root, &zswap_reject_kmemcache_fail); - debugfs_create_u64("reject_compress_poor", S_IRUGO, - zswap_debugfs_root, &zswap_reject_compress_poor); - debugfs_create_u64("written_back_pages", S_IRUGO, - zswap_debugfs_root, &zswap_written_back_pages); - debugfs_create_u64("duplicate_entry", S_IRUGO, - zswap_debugfs_root, &zswap_duplicate_entry); - debugfs_create_u64("pool_total_size", S_IRUGO, - zswap_debugfs_root, &zswap_pool_total_size); - debugfs_create_atomic_t("stored_pages", S_IRUGO, - zswap_debugfs_root, &zswap_stored_pages); + debugfs_create_u64("pool_limit_hit", 0444, + zswap_debugfs_root, &zswap_pool_limit_hit); + debugfs_create_u64("reject_reclaim_fail", 0444, + zswap_debugfs_root, &zswap_reject_reclaim_fail); + debugfs_create_u64("reject_alloc_fail", 0444, + zswap_debugfs_root, &zswap_reject_alloc_fail); + debugfs_create_u64("reject_kmemcache_fail", 0444, + zswap_debugfs_root, &zswap_reject_kmemcache_fail); + debugfs_create_u64("reject_compress_poor", 0444, + zswap_debugfs_root, &zswap_reject_compress_poor); + debugfs_create_u64("written_back_pages", 0444, + zswap_debugfs_root, &zswap_written_back_pages); + debugfs_create_u64("duplicate_entry", 0444, + zswap_debugfs_root, &zswap_duplicate_entry); + debugfs_create_u64("pool_total_size", 0444, + zswap_debugfs_root, &zswap_pool_total_size); + debugfs_create_atomic_t("stored_pages", 0444, + zswap_debugfs_root, &zswap_stored_pages); debugfs_create_atomic_t("same_filled_pages", 0444, - zswap_debugfs_root, &zswap_same_filled_pages); + zswap_debugfs_root, &zswap_same_filled_pages); return 0; } |