diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/cma.c | 3 | ||||
-rw-r--r-- | mm/filemap.c | 7 | ||||
-rw-r--r-- | mm/gup.c | 3 | ||||
-rw-r--r-- | mm/huge_memory.c | 9 | ||||
-rw-r--r-- | mm/hugetlb.c | 66 | ||||
-rw-r--r-- | mm/kmemleak.c | 8 | ||||
-rw-r--r-- | mm/list_lru.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 9 | ||||
-rw-r--r-- | mm/memory-failure.c | 12 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 29 | ||||
-rw-r--r-- | mm/mremap.c | 30 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 135 | ||||
-rw-r--r-- | mm/shmem.c | 2 | ||||
-rw-r--r-- | mm/slab.c | 45 | ||||
-rw-r--r-- | mm/slab.h | 1 | ||||
-rw-r--r-- | mm/slab_common.c | 4 | ||||
-rw-r--r-- | mm/swapfile.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
20 files changed, 177 insertions, 196 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index be0ee11fa0d9..86e3e0e74d20 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -187,7 +187,7 @@ config MEMORY_HOTPLUG bool "Allow for memory hot-add" depends on SPARSEMEM || X86_64_ACPI_NUMA depends on ARCH_ENABLE_MEMORY_HOTPLUG - depends on !KASAN + depends on COMPILE_TEST || !KASAN config MEMORY_HOTPLUG_SPARSE def_bool y @@ -385,6 +385,9 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align) bitmap_maxno = cma_bitmap_maxno(cma); bitmap_count = cma_bitmap_pages_to_bits(cma, count); + if (bitmap_count > bitmap_maxno) + return NULL; + for (;;) { mutex_lock(&cma->lock); bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap, diff --git a/mm/filemap.c b/mm/filemap.c index 849f459ad078..50b52fe51937 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -790,9 +790,7 @@ EXPORT_SYMBOL(__page_cache_alloc); */ wait_queue_head_t *page_waitqueue(struct page *page) { - const struct zone *zone = page_zone(page); - - return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)]; + return bit_waitqueue(page, 0); } EXPORT_SYMBOL(page_waitqueue); @@ -1734,6 +1732,9 @@ find_page: if (inode->i_blkbits == PAGE_SHIFT || !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; + /* pipes can't handle partially uptodate pages */ + if (unlikely(iter->type & ITER_PIPE)) + goto page_not_up_to_date; if (!trylock_page(page)) goto page_not_up_to_date; /* Did it get truncated before we got the lock? */ @@ -526,7 +526,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) * instead of __get_user_pages. __get_user_pages should be used only if * you need some special @gup_flags. */ -long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, +static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *nonblocking) @@ -631,7 +631,6 @@ next_page: } while (nr_pages); return i; } -EXPORT_SYMBOL(__get_user_pages); bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cdcd25cb30fe..eff3de359d50 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1426,11 +1426,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd) + pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) { spinlock_t *old_ptl, *new_ptl; pmd_t pmd; struct mm_struct *mm = vma->vm_mm; + bool force_flush = false; if ((old_addr & ~HPAGE_PMD_MASK) || (new_addr & ~HPAGE_PMD_MASK) || @@ -1455,6 +1456,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + if (pmd_present(*old_pmd) && pmd_dirty(*old_pmd)) + force_flush = true; pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); @@ -1467,6 +1470,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); if (new_ptl != old_ptl) spin_unlock(new_ptl); + if (force_flush) + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); + else + *need_flush = true; spin_unlock(old_ptl); return true; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ec49d9ef1eef..418bf01a50ed 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h, * is not the case is if a reserve map was changed between calls. It * is the responsibility of the caller to notice the difference and * take appropriate action. + * + * vma_add_reservation is used in error paths where a reservation must + * be restored when a newly allocated huge page must be freed. It is + * to be called after calling vma_needs_reservation to determine if a + * reservation exists. */ enum vma_resv_mode { VMA_NEEDS_RESV, VMA_COMMIT_RESV, VMA_END_RESV, + VMA_ADD_RESV, }; static long __vma_reservation_common(struct hstate *h, struct vm_area_struct *vma, unsigned long addr, @@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h, region_abort(resv, idx, idx + 1); ret = 0; break; + case VMA_ADD_RESV: + if (vma->vm_flags & VM_MAYSHARE) + ret = region_add(resv, idx, idx + 1); + else { + region_abort(resv, idx, idx + 1); + ret = region_del(resv, idx, idx + 1); + } + break; default: BUG(); } @@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h, (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV); } +static long vma_add_reservation(struct hstate *h, + struct vm_area_struct *vma, unsigned long addr) +{ + return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); +} + +/* + * This routine is called to restore a reservation on error paths. In the + * specific error paths, a huge page was allocated (via alloc_huge_page) + * and is about to be freed. If a reservation for the page existed, + * alloc_huge_page would have consumed the reservation and set PagePrivate + * in the newly allocated page. When the page is freed via free_huge_page, + * the global reservation count will be incremented if PagePrivate is set. + * However, free_huge_page can not adjust the reserve map. Adjust the + * reserve map here to be consistent with global reserve count adjustments + * to be made by free_huge_page. + */ +static void restore_reserve_on_error(struct hstate *h, + struct vm_area_struct *vma, unsigned long address, + struct page *page) +{ + if (unlikely(PagePrivate(page))) { + long rc = vma_needs_reservation(h, vma, address); + + if (unlikely(rc < 0)) { + /* + * Rare out of memory condition in reserve map + * manipulation. Clear PagePrivate so that + * global reserve count will not be incremented + * by free_huge_page. This will make it appear + * as though the reservation for this page was + * consumed. This may prevent the task from + * faulting in the page at a later time. This + * is better than inconsistent global huge page + * accounting of reserve counts. + */ + ClearPagePrivate(page); + } else if (rc) { + rc = vma_add_reservation(h, vma, address); + if (unlikely(rc < 0)) + /* + * See above comment about rare out of + * memory condition. + */ + ClearPagePrivate(page); + } else + vma_end_reservation(h, vma, address); + } +} + struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { @@ -3498,6 +3562,7 @@ retry_avoidcopy: spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); out_release_all: + restore_reserve_on_error(h, vma, address, new_page); put_page(new_page); out_release_old: put_page(old_page); @@ -3680,6 +3745,7 @@ backout: spin_unlock(ptl); backout_unlocked: unlock_page(page); + restore_reserve_on_error(h, vma, address, page); put_page(page); goto out; } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index a5e453cf05c4..d1380ed93fdf 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1414,6 +1414,7 @@ static void kmemleak_scan(void) /* data/bss scanning */ scan_large_block(_sdata, _edata); scan_large_block(__bss_start, __bss_stop); + scan_large_block(__start_data_ro_after_init, __end_data_ro_after_init); #ifdef CONFIG_SMP /* per-cpu sections scanning */ @@ -1453,8 +1454,11 @@ static void kmemleak_scan(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - scan_block(task_stack_page(p), task_stack_page(p) + - THREAD_SIZE, NULL); + void *stack = try_get_task_stack(p); + if (stack) { + scan_block(stack, stack + THREAD_SIZE, NULL); + put_task_stack(p); + } } while_each_thread(g, p); read_unlock(&tasklist_lock); } diff --git a/mm/list_lru.c b/mm/list_lru.c index 1d05cb9d363d..234676e31edd 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -554,6 +554,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, err = memcg_init_list_lru(lru, memcg_aware); if (err) { kfree(lru->node); + /* Do this so a list_lru_destroy() doesn't crash: */ + lru->node = NULL; goto out; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ae052b5e3315..0f870ba43942 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1917,6 +1917,15 @@ retry: current->flags & PF_EXITING)) goto force; + /* + * Prevent unbounded recursion when reclaim operations need to + * allocate memory. This might exceed the limits temporarily, + * but we prefer facilitating memory reclaim and getting back + * under the limit over triggering OOM kills in these cases. + */ + if (unlikely(current->flags & PF_MEMALLOC)) + goto force; + if (unlikely(task_in_memcg_oom(current))) goto nomem; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index de88f33519c0..19e796d36a62 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1112,10 +1112,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } if (!PageHuge(p) && PageTransHuge(hpage)) { - lock_page(hpage); - if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) { - unlock_page(hpage); - if (!PageAnon(hpage)) + lock_page(p); + if (!PageAnon(p) || unlikely(split_huge_page(p))) { + unlock_page(p); + if (!PageAnon(p)) pr_err("Memory failure: %#lx: non anonymous thp\n", pfn); else @@ -1126,9 +1126,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) put_hwpoison_page(p); return -EBUSY; } - unlock_page(hpage); - get_hwpoison_page(p); - put_hwpoison_page(hpage); + unlock_page(p); VM_BUG_ON_PAGE(!page_count(p), p); hpage = compound_head(p); } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 962927309b6e..cad4b9125695 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -268,7 +268,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) unsigned long i, pfn, end_pfn, nr_pages; int node = pgdat->node_id; struct page *page; - struct zone *zone; nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT; page = virt_to_page(pgdat); @@ -276,19 +275,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) for (i = 0; i < nr_pages; i++, page++) get_page_bootmem(node, page, NODE_INFO); - zone = &pgdat->node_zones[0]; - for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { - if (zone_is_initialized(zone)) { - nr_pages = zone->wait_table_hash_nr_entries - * sizeof(wait_queue_head_t); - nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; - page = virt_to_page(zone->wait_table); - - for (i = 0; i < nr_pages; i++, page++) - get_page_bootmem(node, page, NODE_INFO); - } - } - pfn = pgdat->node_start_pfn; end_pfn = pgdat_end_pfn(pgdat); @@ -2131,7 +2117,6 @@ void try_offline_node(int nid) unsigned long start_pfn = pgdat->node_start_pfn; unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; unsigned long pfn; - int i; for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { unsigned long section_nr = pfn_to_section_nr(pfn); @@ -2158,20 +2143,6 @@ void try_offline_node(int nid) */ node_set_offline(nid); unregister_one_node(nid); - - /* free waittable in each zone */ - for (i = 0; i < MAX_NR_ZONES; i++) { - struct zone *zone = pgdat->node_zones + i; - - /* - * wait_table may be allocated from boot memory, - * here only free if it's allocated by vmalloc. - */ - if (is_vmalloc_addr(zone->wait_table)) { - vfree(zone->wait_table); - zone->wait_table = NULL; - } - } } EXPORT_SYMBOL(try_offline_node); diff --git a/mm/mremap.c b/mm/mremap.c index da22ad2a5678..6ccecc03f56a 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -104,11 +104,13 @@ static pte_t move_soft_dirty_pte(pte_t pte) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks) + unsigned long new_addr, bool need_rmap_locks, bool *need_flush) { struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + bool force_flush = false; + unsigned long len = old_end - old_addr; /* * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma @@ -146,6 +148,14 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_pte++, new_addr += PAGE_SIZE) { if (pte_none(*old_pte)) continue; + + /* + * We are remapping a dirty PTE, make sure to + * flush TLB before we drop the PTL for the + * old PTE or we may race with page_mkclean(). + */ + if (pte_present(*old_pte) && pte_dirty(*old_pte)) + force_flush = true; pte = ptep_get_and_clear(mm, old_addr, old_pte); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); @@ -156,6 +166,10 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); + else + *need_flush = true; pte_unmap_unlock(old_pte - 1, old_ptl); if (need_rmap_locks) drop_rmap_locks(vma); @@ -201,13 +215,12 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (need_rmap_locks) take_rmap_locks(vma); moved = move_huge_pmd(vma, old_addr, new_addr, - old_end, old_pmd, new_pmd); + old_end, old_pmd, new_pmd, + &need_flush); if (need_rmap_locks) drop_rmap_locks(vma); - if (moved) { - need_flush = true; + if (moved) continue; - } } split_huge_pmd(vma, old_pmd, old_addr); if (pmd_trans_unstable(old_pmd)) @@ -220,11 +233,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma, extent = next - new_addr; if (extent > LATENCY_LIMIT) extent = LATENCY_LIMIT; - move_ptes(vma, old_pmd, old_addr, old_addr + extent, - new_vma, new_pmd, new_addr, need_rmap_locks); - need_flush = true; + move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, + new_pmd, new_addr, need_rmap_locks, &need_flush); } - if (likely(need_flush)) + if (need_flush) flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); diff --git a/mm/nommu.c b/mm/nommu.c index db5fd1795298..8b8faaf2a9e9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -109,7 +109,7 @@ unsigned int kobjsize(const void *objp) return PAGE_SIZE << compound_order(page); } -long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, +static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int foll_flags, struct page **pages, struct vm_area_struct **vmas, int *nonblocking) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2b3bf6767d54..6de9440e3ae2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -92,7 +92,7 @@ int _node_numa_mem_[MAX_NUMNODES]; #endif #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY -volatile u64 latent_entropy __latent_entropy; +volatile unsigned long latent_entropy __latent_entropy; EXPORT_SYMBOL(latent_entropy); #endif @@ -3658,7 +3658,7 @@ retry: /* Make sure we know about allocations which stall for too long */ if (time_after(jiffies, alloc_start + stall_timeout)) { warn_alloc(gfp_mask, - "page alloction stalls for %ums, order:%u\n", + "page allocation stalls for %ums, order:%u", jiffies_to_msecs(jiffies-alloc_start), order); stall_timeout += 10 * HZ; } @@ -4224,7 +4224,7 @@ static void show_migration_types(unsigned char type) } *p = '\0'; - printk("(%s) ", tmp); + printk(KERN_CONT "(%s) ", tmp); } /* @@ -4335,7 +4335,8 @@ void show_free_areas(unsigned int filter) free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; show_node(zone); - printk("%s" + printk(KERN_CONT + "%s" " free:%lukB" " min:%lukB" " low:%lukB" @@ -4382,8 +4383,8 @@ void show_free_areas(unsigned int filter) K(zone_page_state(zone, NR_FREE_CMA_PAGES))); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) - printk(" %ld", zone->lowmem_reserve[i]); - printk("\n"); + printk(KERN_CONT " %ld", zone->lowmem_reserve[i]); + printk(KERN_CONT "\n"); } for_each_populated_zone(zone) { @@ -4394,7 +4395,7 @@ void show_free_areas(unsigned int filter) if (skip_free_areas_node(filter, zone_to_nid(zone))) continue; show_node(zone); - printk("%s: ", zone->name); + printk(KERN_CONT "%s: ", zone->name); spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { @@ -4412,11 +4413,12 @@ void show_free_areas(unsigned int filter) } spin_unlock_irqrestore(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { - printk("%lu*%lukB ", nr[order], K(1UL) << order); + printk(KERN_CONT "%lu*%lukB ", + nr[order], K(1UL) << order); if (nr[order]) show_migration_types(types[order]); } - printk("= %lukB\n", K(total)); + printk(KERN_CONT "= %lukB\n", K(total)); } hugetlb_show_meminfo(); @@ -4977,72 +4979,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) } /* - * Helper functions to size the waitqueue hash table. - * Essentially these want to choose hash table sizes sufficiently - * large so that collisions trying to wait on pages are rare. - * But in fact, the number of active page waitqueues on typical - * systems is ridiculously low, less than 200. So this is even - * conservative, even though it seems large. - * - * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to - * waitqueues, i.e. the size of the waitq table given the number of pages. - */ -#define PAGES_PER_WAITQUEUE 256 - -#ifndef CONFIG_MEMORY_HOTPLUG -static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) -{ - unsigned long size = 1; - - pages /= PAGES_PER_WAITQUEUE; - - while (size < pages) - size <<= 1; - - /* - * Once we have dozens or even hundreds of threads sleeping - * on IO we've got bigger problems than wait queue collision. - * Limit the size of the wait table to a reasonable size. - */ - size = min(size, 4096UL); - - return max(size, 4UL); -} -#else -/* - * A zone's size might be changed by hot-add, so it is not possible to determine - * a suitable size for its wait_table. So we use the maximum size now. - * - * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie: - * - * i386 (preemption config) : 4096 x 16 = 64Kbyte. - * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte. - * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte. - * - * The maximum entries are prepared when a zone's memory is (512K + 256) pages - * or more by the traditional way. (See above). It equals: - * - * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte. - * ia64(16K page size) : = ( 8G + 4M)byte. - * powerpc (64K page size) : = (32G +16M)byte. - */ -static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) -{ - return 4096UL; -} -#endif - -/* - * This is an integer logarithm so that shifts can be used later - * to extract the more random high bits from the multiplicative - * hash function before the remainder is taken. - */ -static inline unsigned long wait_table_bits(unsigned long size) -{ - return ffz(~size); -} - -/* * Initially all pages are reserved - free ones are freed * up by free_all_bootmem() once the early boot process is * done. Non-atomic initialization, single-pass. @@ -5304,49 +5240,6 @@ void __init setup_per_cpu_pageset(void) alloc_percpu(struct per_cpu_nodestat); } -static noinline __ref -int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) -{ - int i; - size_t alloc_size; - - /* - * The per-page waitqueue mechanism uses hashed waitqueues - * per zone. - */ - zone->wait_table_hash_nr_entries = - wait_table_hash_nr_entries(zone_size_pages); - zone->wait_table_bits = - wait_table_bits(zone->wait_table_hash_nr_entries); - alloc_size = zone->wait_table_hash_nr_entries - * sizeof(wait_queue_head_t); - - if (!slab_is_available()) { - zone->wait_table = (wait_queue_head_t *) - memblock_virt_alloc_node_nopanic( - alloc_size, zone->zone_pgdat->node_id); - } else { - /* - * This case means that a zone whose size was 0 gets new memory - * via memory hot-add. - * But it may be the case that a new node was hot-added. In - * this case vmalloc() will not be able to use this new node's - * memory - this wait_table must be initialized to use this new - * node itself as well. - * To use this new node's memory, further consideration will be - * necessary. - */ - zone->wait_table = vmalloc(alloc_size); - } - if (!zone->wait_table) - return -ENOMEM; - - for (i = 0; i < zone->wait_table_hash_nr_entries; ++i) - init_waitqueue_head(zone->wait_table + i); - - return 0; -} - static __meminit void zone_pcp_init(struct zone *zone) { /* @@ -5367,10 +5260,7 @@ int __meminit init_currently_empty_zone(struct zone *zone, unsigned long size) { struct pglist_data *pgdat = zone->zone_pgdat; - int ret; - ret = zone_wait_table_init(zone, size); - if (ret) - return ret; + pgdat->nr_zones = zone_idx(zone) + 1; zone->zone_start_pfn = zone_start_pfn; @@ -5382,6 +5272,7 @@ int __meminit init_currently_empty_zone(struct zone *zone, zone_start_pfn, (zone_start_pfn + size)); zone_init_free_lists(zone); + zone->initialized = 1; return 0; } diff --git a/mm/shmem.c b/mm/shmem.c index ad7813d73ea7..166ebf5d2bce 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1483,6 +1483,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, copy_highpage(newpage, oldpage); flush_dcache_page(newpage); + __SetPageLocked(newpage); + __SetPageSwapBacked(newpage); SetPageUptodate(newpage); set_page_private(newpage, swap_index); SetPageSwapCache(newpage); diff --git a/mm/slab.c b/mm/slab.c index 090fb26b3a39..0b0550ca85b4 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -233,6 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) spin_lock_init(&parent->list_lock); parent->free_objects = 0; parent->free_touched = 0; + parent->num_slabs = 0; } #define MAKE_LIST(cachep, listp, slab, nodeid) \ @@ -966,7 +967,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep, * guaranteed to be valid until irq is re-enabled, because it will be * freed after synchronize_sched(). */ - if (force_change) + if (old_shared && force_change) synchronize_sched(); fail: @@ -1382,24 +1383,27 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) for_each_kmem_cache_node(cachep, node, n) { unsigned long active_objs = 0, num_objs = 0, free_objects = 0; unsigned long active_slabs = 0, num_slabs = 0; + unsigned long num_slabs_partial = 0, num_slabs_free = 0; + unsigned long num_slabs_full; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->slabs_full, lru) { - active_objs += cachep->num; - active_slabs++; - } + num_slabs = n->num_slabs; list_for_each_entry(page, &n->slabs_partial, lru) { active_objs += page->active; - active_slabs++; + num_slabs_partial++; } list_for_each_entry(page, &n->slabs_free, lru) - num_slabs++; + num_slabs_free++; free_objects += n->free_objects; spin_unlock_irqrestore(&n->list_lock, flags); - num_slabs += active_slabs; num_objs = num_slabs * cachep->num; + active_slabs = num_slabs - num_slabs_free; + num_slabs_full = num_slabs - + (num_slabs_partial + num_slabs_free); + active_objs += (num_slabs_full * cachep->num); + pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n", node, active_slabs, num_slabs, active_objs, num_objs, free_objects); @@ -2314,6 +2318,7 @@ static int drain_freelist(struct kmem_cache *cache, page = list_entry(p, struct page, lru); list_del(&page->lru); + n->num_slabs--; /* * Safe to drop the lock. The slab is no longer linked * to the cache. @@ -2752,6 +2757,8 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page) list_add_tail(&page->lru, &(n->slabs_free)); else fixup_slab_list(cachep, n, page, &list); + + n->num_slabs++; STATS_INC_GROWN(cachep); n->free_objects += cachep->num - page->active; spin_unlock(&n->list_lock); @@ -3443,6 +3450,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, page = list_last_entry(&n->slabs_free, struct page, lru); list_move(&page->lru, list); + n->num_slabs--; } } @@ -4099,6 +4107,8 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) unsigned long num_objs; unsigned long active_slabs = 0; unsigned long num_slabs, free_objects = 0, shared_avail = 0; + unsigned long num_slabs_partial = 0, num_slabs_free = 0; + unsigned long num_slabs_full = 0; const char *name; char *error = NULL; int node; @@ -4111,33 +4121,34 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) check_irq_on(); spin_lock_irq(&n->list_lock); - list_for_each_entry(page, &n->slabs_full, lru) { - if (page->active != cachep->num && !error) - error = "slabs_full accounting error"; - active_objs += cachep->num; - active_slabs++; - } + num_slabs += n->num_slabs; + list_for_each_entry(page, &n->slabs_partial, lru) { if (page->active == cachep->num && !error) error = "slabs_partial accounting error"; if (!page->active && !error) error = "slabs_partial accounting error"; active_objs += page->active; - active_slabs++; + num_slabs_partial++; } + list_for_each_entry(page, &n->slabs_free, lru) { if (page->active && !error) error = "slabs_free accounting error"; - num_slabs++; + num_slabs_free++; } + free_objects += n->free_objects; if (n->shared) shared_avail += n->shared->avail; spin_unlock_irq(&n->list_lock); } - num_slabs += active_slabs; num_objs = num_slabs * cachep->num; + active_slabs = num_slabs - num_slabs_free; + num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free); + active_objs += (num_slabs_full * cachep->num); + if (num_objs - active_objs != free_objects && !error) error = "free_objects accounting error"; diff --git a/mm/slab.h b/mm/slab.h index 9653f2e2591a..bc05fdc3edce 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -432,6 +432,7 @@ struct kmem_cache_node { struct list_head slabs_partial; /* partial list first, better asm code */ struct list_head slabs_full; struct list_head slabs_free; + unsigned long num_slabs; unsigned long free_objects; unsigned int free_limit; unsigned int colour_next; /* Per-node cache coloring */ diff --git a/mm/slab_common.c b/mm/slab_common.c index 71f0b28a1bec..329b03843863 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -533,8 +533,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, s = create_cache(cache_name, root_cache->object_size, root_cache->size, root_cache->align, - root_cache->flags, root_cache->ctor, - memcg, root_cache); + root_cache->flags & CACHE_CREATE_MASK, + root_cache->ctor, memcg, root_cache); /* * If we could not create a memcg cache, do not complain, because * that's not critical at all as we can always proceed with the root diff --git a/mm/swapfile.c b/mm/swapfile.c index 2210de290b54..f30438970cd1 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2224,6 +2224,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p, swab32s(&swap_header->info.version); swab32s(&swap_header->info.last_page); swab32s(&swap_header->info.nr_badpages); + if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) + return 0; for (i = 0; i < swap_header->info.nr_badpages; i++) swab32s(&swap_header->info.badpages[i]); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 744f926af442..76fda2268148 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3043,7 +3043,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, sc.gfp_mask, sc.reclaim_idx); + current->flags |= PF_MEMALLOC; nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + current->flags &= ~PF_MEMALLOC; trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); |