summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/cma.c3
-rw-r--r--mm/filemap.c7
-rw-r--r--mm/gup.c3
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/hugetlb.c66
-rw-r--r--mm/kmemleak.c8
-rw-r--r--mm/list_lru.c2
-rw-r--r--mm/memcontrol.c9
-rw-r--r--mm/memory-failure.c12
-rw-r--r--mm/memory_hotplug.c29
-rw-r--r--mm/mremap.c30
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/page_alloc.c135
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/slab.c45
-rw-r--r--mm/slab.h1
-rw-r--r--mm/slab_common.c4
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/vmscan.c2
20 files changed, 177 insertions, 196 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index be0ee11fa0d9..86e3e0e74d20 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -187,7 +187,7 @@ config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
depends on SPARSEMEM || X86_64_ACPI_NUMA
depends on ARCH_ENABLE_MEMORY_HOTPLUG
- depends on !KASAN
+ depends on COMPILE_TEST || !KASAN
config MEMORY_HOTPLUG_SPARSE
def_bool y
diff --git a/mm/cma.c b/mm/cma.c
index 384c2cb51b56..c960459eda7e 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -385,6 +385,9 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
bitmap_maxno = cma_bitmap_maxno(cma);
bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+ if (bitmap_count > bitmap_maxno)
+ return NULL;
+
for (;;) {
mutex_lock(&cma->lock);
bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap,
diff --git a/mm/filemap.c b/mm/filemap.c
index 849f459ad078..50b52fe51937 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -790,9 +790,7 @@ EXPORT_SYMBOL(__page_cache_alloc);
*/
wait_queue_head_t *page_waitqueue(struct page *page)
{
- const struct zone *zone = page_zone(page);
-
- return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
+ return bit_waitqueue(page, 0);
}
EXPORT_SYMBOL(page_waitqueue);
@@ -1734,6 +1732,9 @@ find_page:
if (inode->i_blkbits == PAGE_SHIFT ||
!mapping->a_ops->is_partially_uptodate)
goto page_not_up_to_date;
+ /* pipes can't handle partially uptodate pages */
+ if (unlikely(iter->type & ITER_PIPE))
+ goto page_not_up_to_date;
if (!trylock_page(page))
goto page_not_up_to_date;
/* Did it get truncated before we got the lock? */
diff --git a/mm/gup.c b/mm/gup.c
index 7aa113c2d373..ec4f82704b6f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -526,7 +526,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* instead of __get_user_pages. __get_user_pages should be used only if
* you need some special @gup_flags.
*/
-long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
@@ -631,7 +631,6 @@ next_page:
} while (nr_pages);
return i;
}
-EXPORT_SYMBOL(__get_user_pages);
bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cdcd25cb30fe..eff3de359d50 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1426,11 +1426,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
- pmd_t *old_pmd, pmd_t *new_pmd)
+ pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
{
spinlock_t *old_ptl, *new_ptl;
pmd_t pmd;
struct mm_struct *mm = vma->vm_mm;
+ bool force_flush = false;
if ((old_addr & ~HPAGE_PMD_MASK) ||
(new_addr & ~HPAGE_PMD_MASK) ||
@@ -1455,6 +1456,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
new_ptl = pmd_lockptr(mm, new_pmd);
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+ if (pmd_present(*old_pmd) && pmd_dirty(*old_pmd))
+ force_flush = true;
pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
VM_BUG_ON(!pmd_none(*new_pmd));
@@ -1467,6 +1470,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
+ if (force_flush)
+ flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
+ else
+ *need_flush = true;
spin_unlock(old_ptl);
return true;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ec49d9ef1eef..418bf01a50ed 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h,
* is not the case is if a reserve map was changed between calls. It
* is the responsibility of the caller to notice the difference and
* take appropriate action.
+ *
+ * vma_add_reservation is used in error paths where a reservation must
+ * be restored when a newly allocated huge page must be freed. It is
+ * to be called after calling vma_needs_reservation to determine if a
+ * reservation exists.
*/
enum vma_resv_mode {
VMA_NEEDS_RESV,
VMA_COMMIT_RESV,
VMA_END_RESV,
+ VMA_ADD_RESV,
};
static long __vma_reservation_common(struct hstate *h,
struct vm_area_struct *vma, unsigned long addr,
@@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h,
region_abort(resv, idx, idx + 1);
ret = 0;
break;
+ case VMA_ADD_RESV:
+ if (vma->vm_flags & VM_MAYSHARE)
+ ret = region_add(resv, idx, idx + 1);
+ else {
+ region_abort(resv, idx, idx + 1);
+ ret = region_del(resv, idx, idx + 1);
+ }
+ break;
default:
BUG();
}
@@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h,
(void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
}
+static long vma_add_reservation(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
+}
+
+/*
+ * This routine is called to restore a reservation on error paths. In the
+ * specific error paths, a huge page was allocated (via alloc_huge_page)
+ * and is about to be freed. If a reservation for the page existed,
+ * alloc_huge_page would have consumed the reservation and set PagePrivate
+ * in the newly allocated page. When the page is freed via free_huge_page,
+ * the global reservation count will be incremented if PagePrivate is set.
+ * However, free_huge_page can not adjust the reserve map. Adjust the
+ * reserve map here to be consistent with global reserve count adjustments
+ * to be made by free_huge_page.
+ */
+static void restore_reserve_on_error(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long address,
+ struct page *page)
+{
+ if (unlikely(PagePrivate(page))) {
+ long rc = vma_needs_reservation(h, vma, address);
+
+ if (unlikely(rc < 0)) {
+ /*
+ * Rare out of memory condition in reserve map
+ * manipulation. Clear PagePrivate so that
+ * global reserve count will not be incremented
+ * by free_huge_page. This will make it appear
+ * as though the reservation for this page was
+ * consumed. This may prevent the task from
+ * faulting in the page at a later time. This
+ * is better than inconsistent global huge page
+ * accounting of reserve counts.
+ */
+ ClearPagePrivate(page);
+ } else if (rc) {
+ rc = vma_add_reservation(h, vma, address);
+ if (unlikely(rc < 0))
+ /*
+ * See above comment about rare out of
+ * memory condition.
+ */
+ ClearPagePrivate(page);
+ } else
+ vma_end_reservation(h, vma, address);
+ }
+}
+
struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve)
{
@@ -3498,6 +3562,7 @@ retry_avoidcopy:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
out_release_all:
+ restore_reserve_on_error(h, vma, address, new_page);
put_page(new_page);
out_release_old:
put_page(old_page);
@@ -3680,6 +3745,7 @@ backout:
spin_unlock(ptl);
backout_unlocked:
unlock_page(page);
+ restore_reserve_on_error(h, vma, address, page);
put_page(page);
goto out;
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index a5e453cf05c4..d1380ed93fdf 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1414,6 +1414,7 @@ static void kmemleak_scan(void)
/* data/bss scanning */
scan_large_block(_sdata, _edata);
scan_large_block(__bss_start, __bss_stop);
+ scan_large_block(__start_data_ro_after_init, __end_data_ro_after_init);
#ifdef CONFIG_SMP
/* per-cpu sections scanning */
@@ -1453,8 +1454,11 @@ static void kmemleak_scan(void)
read_lock(&tasklist_lock);
do_each_thread(g, p) {
- scan_block(task_stack_page(p), task_stack_page(p) +
- THREAD_SIZE, NULL);
+ void *stack = try_get_task_stack(p);
+ if (stack) {
+ scan_block(stack, stack + THREAD_SIZE, NULL);
+ put_task_stack(p);
+ }
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
}
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 1d05cb9d363d..234676e31edd 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -554,6 +554,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
err = memcg_init_list_lru(lru, memcg_aware);
if (err) {
kfree(lru->node);
+ /* Do this so a list_lru_destroy() doesn't crash: */
+ lru->node = NULL;
goto out;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ae052b5e3315..0f870ba43942 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1917,6 +1917,15 @@ retry:
current->flags & PF_EXITING))
goto force;
+ /*
+ * Prevent unbounded recursion when reclaim operations need to
+ * allocate memory. This might exceed the limits temporarily,
+ * but we prefer facilitating memory reclaim and getting back
+ * under the limit over triggering OOM kills in these cases.
+ */
+ if (unlikely(current->flags & PF_MEMALLOC))
+ goto force;
+
if (unlikely(task_in_memcg_oom(current)))
goto nomem;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index de88f33519c0..19e796d36a62 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1112,10 +1112,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
}
if (!PageHuge(p) && PageTransHuge(hpage)) {
- lock_page(hpage);
- if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) {
- unlock_page(hpage);
- if (!PageAnon(hpage))
+ lock_page(p);
+ if (!PageAnon(p) || unlikely(split_huge_page(p))) {
+ unlock_page(p);
+ if (!PageAnon(p))
pr_err("Memory failure: %#lx: non anonymous thp\n",
pfn);
else
@@ -1126,9 +1126,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
put_hwpoison_page(p);
return -EBUSY;
}
- unlock_page(hpage);
- get_hwpoison_page(p);
- put_hwpoison_page(hpage);
+ unlock_page(p);
VM_BUG_ON_PAGE(!page_count(p), p);
hpage = compound_head(p);
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 962927309b6e..cad4b9125695 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -268,7 +268,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
unsigned long i, pfn, end_pfn, nr_pages;
int node = pgdat->node_id;
struct page *page;
- struct zone *zone;
nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
page = virt_to_page(pgdat);
@@ -276,19 +275,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
for (i = 0; i < nr_pages; i++, page++)
get_page_bootmem(node, page, NODE_INFO);
- zone = &pgdat->node_zones[0];
- for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
- if (zone_is_initialized(zone)) {
- nr_pages = zone->wait_table_hash_nr_entries
- * sizeof(wait_queue_head_t);
- nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
- page = virt_to_page(zone->wait_table);
-
- for (i = 0; i < nr_pages; i++, page++)
- get_page_bootmem(node, page, NODE_INFO);
- }
- }
-
pfn = pgdat->node_start_pfn;
end_pfn = pgdat_end_pfn(pgdat);
@@ -2131,7 +2117,6 @@ void try_offline_node(int nid)
unsigned long start_pfn = pgdat->node_start_pfn;
unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
unsigned long pfn;
- int i;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
unsigned long section_nr = pfn_to_section_nr(pfn);
@@ -2158,20 +2143,6 @@ void try_offline_node(int nid)
*/
node_set_offline(nid);
unregister_one_node(nid);
-
- /* free waittable in each zone */
- for (i = 0; i < MAX_NR_ZONES; i++) {
- struct zone *zone = pgdat->node_zones + i;
-
- /*
- * wait_table may be allocated from boot memory,
- * here only free if it's allocated by vmalloc.
- */
- if (is_vmalloc_addr(zone->wait_table)) {
- vfree(zone->wait_table);
- zone->wait_table = NULL;
- }
- }
}
EXPORT_SYMBOL(try_offline_node);
diff --git a/mm/mremap.c b/mm/mremap.c
index da22ad2a5678..6ccecc03f56a 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -104,11 +104,13 @@ static pte_t move_soft_dirty_pte(pte_t pte)
static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
unsigned long old_addr, unsigned long old_end,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
- unsigned long new_addr, bool need_rmap_locks)
+ unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
{
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
spinlock_t *old_ptl, *new_ptl;
+ bool force_flush = false;
+ unsigned long len = old_end - old_addr;
/*
* When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
@@ -146,6 +148,14 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
new_pte++, new_addr += PAGE_SIZE) {
if (pte_none(*old_pte))
continue;
+
+ /*
+ * We are remapping a dirty PTE, make sure to
+ * flush TLB before we drop the PTL for the
+ * old PTE or we may race with page_mkclean().
+ */
+ if (pte_present(*old_pte) && pte_dirty(*old_pte))
+ force_flush = true;
pte = ptep_get_and_clear(mm, old_addr, old_pte);
pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
@@ -156,6 +166,10 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
pte_unmap(new_pte - 1);
+ if (force_flush)
+ flush_tlb_range(vma, old_end - len, old_end);
+ else
+ *need_flush = true;
pte_unmap_unlock(old_pte - 1, old_ptl);
if (need_rmap_locks)
drop_rmap_locks(vma);
@@ -201,13 +215,12 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (need_rmap_locks)
take_rmap_locks(vma);
moved = move_huge_pmd(vma, old_addr, new_addr,
- old_end, old_pmd, new_pmd);
+ old_end, old_pmd, new_pmd,
+ &need_flush);
if (need_rmap_locks)
drop_rmap_locks(vma);
- if (moved) {
- need_flush = true;
+ if (moved)
continue;
- }
}
split_huge_pmd(vma, old_pmd, old_addr);
if (pmd_trans_unstable(old_pmd))
@@ -220,11 +233,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
extent = next - new_addr;
if (extent > LATENCY_LIMIT)
extent = LATENCY_LIMIT;
- move_ptes(vma, old_pmd, old_addr, old_addr + extent,
- new_vma, new_pmd, new_addr, need_rmap_locks);
- need_flush = true;
+ move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
+ new_pmd, new_addr, need_rmap_locks, &need_flush);
}
- if (likely(need_flush))
+ if (need_flush)
flush_tlb_range(vma, old_end-len, old_addr);
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
diff --git a/mm/nommu.c b/mm/nommu.c
index db5fd1795298..8b8faaf2a9e9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -109,7 +109,7 @@ unsigned int kobjsize(const void *objp)
return PAGE_SIZE << compound_order(page);
}
-long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int foll_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2b3bf6767d54..6de9440e3ae2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -92,7 +92,7 @@ int _node_numa_mem_[MAX_NUMNODES];
#endif
#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
-volatile u64 latent_entropy __latent_entropy;
+volatile unsigned long latent_entropy __latent_entropy;
EXPORT_SYMBOL(latent_entropy);
#endif
@@ -3658,7 +3658,7 @@ retry:
/* Make sure we know about allocations which stall for too long */
if (time_after(jiffies, alloc_start + stall_timeout)) {
warn_alloc(gfp_mask,
- "page alloction stalls for %ums, order:%u\n",
+ "page allocation stalls for %ums, order:%u",
jiffies_to_msecs(jiffies-alloc_start), order);
stall_timeout += 10 * HZ;
}
@@ -4224,7 +4224,7 @@ static void show_migration_types(unsigned char type)
}
*p = '\0';
- printk("(%s) ", tmp);
+ printk(KERN_CONT "(%s) ", tmp);
}
/*
@@ -4335,7 +4335,8 @@ void show_free_areas(unsigned int filter)
free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
show_node(zone);
- printk("%s"
+ printk(KERN_CONT
+ "%s"
" free:%lukB"
" min:%lukB"
" low:%lukB"
@@ -4382,8 +4383,8 @@ void show_free_areas(unsigned int filter)
K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
printk("lowmem_reserve[]:");
for (i = 0; i < MAX_NR_ZONES; i++)
- printk(" %ld", zone->lowmem_reserve[i]);
- printk("\n");
+ printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
+ printk(KERN_CONT "\n");
}
for_each_populated_zone(zone) {
@@ -4394,7 +4395,7 @@ void show_free_areas(unsigned int filter)
if (skip_free_areas_node(filter, zone_to_nid(zone)))
continue;
show_node(zone);
- printk("%s: ", zone->name);
+ printk(KERN_CONT "%s: ", zone->name);
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
@@ -4412,11 +4413,12 @@ void show_free_areas(unsigned int filter)
}
spin_unlock_irqrestore(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
- printk("%lu*%lukB ", nr[order], K(1UL) << order);
+ printk(KERN_CONT "%lu*%lukB ",
+ nr[order], K(1UL) << order);
if (nr[order])
show_migration_types(types[order]);
}
- printk("= %lukB\n", K(total));
+ printk(KERN_CONT "= %lukB\n", K(total));
}
hugetlb_show_meminfo();
@@ -4977,72 +4979,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
}
/*
- * Helper functions to size the waitqueue hash table.
- * Essentially these want to choose hash table sizes sufficiently
- * large so that collisions trying to wait on pages are rare.
- * But in fact, the number of active page waitqueues on typical
- * systems is ridiculously low, less than 200. So this is even
- * conservative, even though it seems large.
- *
- * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
- * waitqueues, i.e. the size of the waitq table given the number of pages.
- */
-#define PAGES_PER_WAITQUEUE 256
-
-#ifndef CONFIG_MEMORY_HOTPLUG
-static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
-{
- unsigned long size = 1;
-
- pages /= PAGES_PER_WAITQUEUE;
-
- while (size < pages)
- size <<= 1;
-
- /*
- * Once we have dozens or even hundreds of threads sleeping
- * on IO we've got bigger problems than wait queue collision.
- * Limit the size of the wait table to a reasonable size.
- */
- size = min(size, 4096UL);
-
- return max(size, 4UL);
-}
-#else
-/*
- * A zone's size might be changed by hot-add, so it is not possible to determine
- * a suitable size for its wait_table. So we use the maximum size now.
- *
- * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie:
- *
- * i386 (preemption config) : 4096 x 16 = 64Kbyte.
- * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte.
- * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte.
- *
- * The maximum entries are prepared when a zone's memory is (512K + 256) pages
- * or more by the traditional way. (See above). It equals:
- *
- * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte.
- * ia64(16K page size) : = ( 8G + 4M)byte.
- * powerpc (64K page size) : = (32G +16M)byte.
- */
-static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
-{
- return 4096UL;
-}
-#endif
-
-/*
- * This is an integer logarithm so that shifts can be used later
- * to extract the more random high bits from the multiplicative
- * hash function before the remainder is taken.
- */
-static inline unsigned long wait_table_bits(unsigned long size)
-{
- return ffz(~size);
-}
-
-/*
* Initially all pages are reserved - free ones are freed
* up by free_all_bootmem() once the early boot process is
* done. Non-atomic initialization, single-pass.
@@ -5304,49 +5240,6 @@ void __init setup_per_cpu_pageset(void)
alloc_percpu(struct per_cpu_nodestat);
}
-static noinline __ref
-int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
-{
- int i;
- size_t alloc_size;
-
- /*
- * The per-page waitqueue mechanism uses hashed waitqueues
- * per zone.
- */
- zone->wait_table_hash_nr_entries =
- wait_table_hash_nr_entries(zone_size_pages);
- zone->wait_table_bits =
- wait_table_bits(zone->wait_table_hash_nr_entries);
- alloc_size = zone->wait_table_hash_nr_entries
- * sizeof(wait_queue_head_t);
-
- if (!slab_is_available()) {
- zone->wait_table = (wait_queue_head_t *)
- memblock_virt_alloc_node_nopanic(
- alloc_size, zone->zone_pgdat->node_id);
- } else {
- /*
- * This case means that a zone whose size was 0 gets new memory
- * via memory hot-add.
- * But it may be the case that a new node was hot-added. In
- * this case vmalloc() will not be able to use this new node's
- * memory - this wait_table must be initialized to use this new
- * node itself as well.
- * To use this new node's memory, further consideration will be
- * necessary.
- */
- zone->wait_table = vmalloc(alloc_size);
- }
- if (!zone->wait_table)
- return -ENOMEM;
-
- for (i = 0; i < zone->wait_table_hash_nr_entries; ++i)
- init_waitqueue_head(zone->wait_table + i);
-
- return 0;
-}
-
static __meminit void zone_pcp_init(struct zone *zone)
{
/*
@@ -5367,10 +5260,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
unsigned long size)
{
struct pglist_data *pgdat = zone->zone_pgdat;
- int ret;
- ret = zone_wait_table_init(zone, size);
- if (ret)
- return ret;
+
pgdat->nr_zones = zone_idx(zone) + 1;
zone->zone_start_pfn = zone_start_pfn;
@@ -5382,6 +5272,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
zone_start_pfn, (zone_start_pfn + size));
zone_init_free_lists(zone);
+ zone->initialized = 1;
return 0;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index ad7813d73ea7..166ebf5d2bce 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1483,6 +1483,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
copy_highpage(newpage, oldpage);
flush_dcache_page(newpage);
+ __SetPageLocked(newpage);
+ __SetPageSwapBacked(newpage);
SetPageUptodate(newpage);
set_page_private(newpage, swap_index);
SetPageSwapCache(newpage);
diff --git a/mm/slab.c b/mm/slab.c
index 090fb26b3a39..0b0550ca85b4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -233,6 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
+ parent->num_slabs = 0;
}
#define MAKE_LIST(cachep, listp, slab, nodeid) \
@@ -966,7 +967,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
* guaranteed to be valid until irq is re-enabled, because it will be
* freed after synchronize_sched().
*/
- if (force_change)
+ if (old_shared && force_change)
synchronize_sched();
fail:
@@ -1382,24 +1383,27 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
for_each_kmem_cache_node(cachep, node, n) {
unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
unsigned long active_slabs = 0, num_slabs = 0;
+ unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+ unsigned long num_slabs_full;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->slabs_full, lru) {
- active_objs += cachep->num;
- active_slabs++;
- }
+ num_slabs = n->num_slabs;
list_for_each_entry(page, &n->slabs_partial, lru) {
active_objs += page->active;
- active_slabs++;
+ num_slabs_partial++;
}
list_for_each_entry(page, &n->slabs_free, lru)
- num_slabs++;
+ num_slabs_free++;
free_objects += n->free_objects;
spin_unlock_irqrestore(&n->list_lock, flags);
- num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
+ active_slabs = num_slabs - num_slabs_free;
+ num_slabs_full = num_slabs -
+ (num_slabs_partial + num_slabs_free);
+ active_objs += (num_slabs_full * cachep->num);
+
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
node, active_slabs, num_slabs, active_objs, num_objs,
free_objects);
@@ -2314,6 +2318,7 @@ static int drain_freelist(struct kmem_cache *cache,
page = list_entry(p, struct page, lru);
list_del(&page->lru);
+ n->num_slabs--;
/*
* Safe to drop the lock. The slab is no longer linked
* to the cache.
@@ -2752,6 +2757,8 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
list_add_tail(&page->lru, &(n->slabs_free));
else
fixup_slab_list(cachep, n, page, &list);
+
+ n->num_slabs++;
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - page->active;
spin_unlock(&n->list_lock);
@@ -3443,6 +3450,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
page = list_last_entry(&n->slabs_free, struct page, lru);
list_move(&page->lru, list);
+ n->num_slabs--;
}
}
@@ -4099,6 +4107,8 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
unsigned long num_objs;
unsigned long active_slabs = 0;
unsigned long num_slabs, free_objects = 0, shared_avail = 0;
+ unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+ unsigned long num_slabs_full = 0;
const char *name;
char *error = NULL;
int node;
@@ -4111,33 +4121,34 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
check_irq_on();
spin_lock_irq(&n->list_lock);
- list_for_each_entry(page, &n->slabs_full, lru) {
- if (page->active != cachep->num && !error)
- error = "slabs_full accounting error";
- active_objs += cachep->num;
- active_slabs++;
- }
+ num_slabs += n->num_slabs;
+
list_for_each_entry(page, &n->slabs_partial, lru) {
if (page->active == cachep->num && !error)
error = "slabs_partial accounting error";
if (!page->active && !error)
error = "slabs_partial accounting error";
active_objs += page->active;
- active_slabs++;
+ num_slabs_partial++;
}
+
list_for_each_entry(page, &n->slabs_free, lru) {
if (page->active && !error)
error = "slabs_free accounting error";
- num_slabs++;
+ num_slabs_free++;
}
+
free_objects += n->free_objects;
if (n->shared)
shared_avail += n->shared->avail;
spin_unlock_irq(&n->list_lock);
}
- num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
+ active_slabs = num_slabs - num_slabs_free;
+ num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free);
+ active_objs += (num_slabs_full * cachep->num);
+
if (num_objs - active_objs != free_objects && !error)
error = "free_objects accounting error";
diff --git a/mm/slab.h b/mm/slab.h
index 9653f2e2591a..bc05fdc3edce 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -432,6 +432,7 @@ struct kmem_cache_node {
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
+ unsigned long num_slabs;
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 71f0b28a1bec..329b03843863 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -533,8 +533,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
s = create_cache(cache_name, root_cache->object_size,
root_cache->size, root_cache->align,
- root_cache->flags, root_cache->ctor,
- memcg, root_cache);
+ root_cache->flags & CACHE_CREATE_MASK,
+ root_cache->ctor, memcg, root_cache);
/*
* If we could not create a memcg cache, do not complain, because
* that's not critical at all as we can always proceed with the root
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2210de290b54..f30438970cd1 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2224,6 +2224,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
swab32s(&swap_header->info.version);
swab32s(&swap_header->info.last_page);
swab32s(&swap_header->info.nr_badpages);
+ if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
+ return 0;
for (i = 0; i < swap_header->info.nr_badpages; i++)
swab32s(&swap_header->info.badpages[i]);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 744f926af442..76fda2268148 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3043,7 +3043,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
sc.gfp_mask,
sc.reclaim_idx);
+ current->flags |= PF_MEMALLOC;
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+ current->flags &= ~PF_MEMALLOC;
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
OpenPOWER on IntegriCloud