diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/fremap.c | 8 | ||||
-rw-r--r-- | mm/huge_memory.c | 3 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 10 | ||||
-rw-r--r-- | mm/mlock.c | 44 |
5 files changed, 48 insertions, 19 deletions
diff --git a/mm/fremap.c b/mm/fremap.c index 5bff08147768..bbc4d660221a 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -208,9 +208,10 @@ get_write_lock: if (mapping_cap_account_dirty(mapping)) { unsigned long addr; struct file *file = get_file(vma->vm_file); + /* mmap_region may free vma; grab the info now */ + vm_flags = vma->vm_flags; - addr = mmap_region(file, start, size, - vma->vm_flags, pgoff); + addr = mmap_region(file, start, size, vm_flags, pgoff); fput(file); if (IS_ERR_VALUE(addr)) { err = addr; @@ -218,7 +219,7 @@ get_write_lock: BUG_ON(addr != start); err = 0; } - goto out; + goto out_freed; } mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); @@ -253,6 +254,7 @@ get_write_lock: out: if (vma) vm_flags = vma->vm_flags; +out_freed: if (likely(!has_write_lock)) up_read(&mm->mmap_sem); else diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7de1bf85f683..9c0b17295ba0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -883,9 +883,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out_unlock; } - /* mmap_sem prevents this happening but warn if that changes */ - WARN_ON(pmd_trans_migrating(pmd)); - if (unlikely(pmd_trans_splitting(pmd))) { /* split huge page running from under us */ spin_unlock(src_ptl); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bf5e89457149..7f1a356153c0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -338,7 +338,7 @@ struct mem_cgroup { static size_t memcg_size(void) { return sizeof(struct mem_cgroup) + - nr_node_ids * sizeof(struct mem_cgroup_per_node); + nr_node_ids * sizeof(struct mem_cgroup_per_node *); } /* internal only representation about the status of kmem accounting. */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index db08af92c6fc..fabe55046c1d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -938,6 +938,16 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, BUG_ON(!PageHWPoison(p)); return SWAP_FAIL; } + /* + * We pinned the head page for hwpoison handling, + * now we split the thp and we are interested in + * the hwpoisoned raw page, so move the refcount + * to it. + */ + if (hpage != p) { + put_page(hpage); + get_page(p); + } /* THP is split, so ppage should be the real poisoned page. */ ppage = p; } diff --git a/mm/mlock.c b/mm/mlock.c index d480cd6fc475..192e6eebe4f2 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -133,7 +133,10 @@ static void __munlock_isolation_failed(struct page *page) /** * munlock_vma_page - munlock a vma page - * @page - page to be unlocked + * @page - page to be unlocked, either a normal page or THP page head + * + * returns the size of the page as a page mask (0 for normal page, + * HPAGE_PMD_NR - 1 for THP head page) * * called from munlock()/munmap() path with page supposedly on the LRU. * When we munlock a page, because the vma where we found the page is being @@ -148,21 +151,30 @@ static void __munlock_isolation_failed(struct page *page) */ unsigned int munlock_vma_page(struct page *page) { - unsigned int page_mask = 0; + unsigned int nr_pages; BUG_ON(!PageLocked(page)); if (TestClearPageMlocked(page)) { - unsigned int nr_pages = hpage_nr_pages(page); + nr_pages = hpage_nr_pages(page); mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); - page_mask = nr_pages - 1; if (!isolate_lru_page(page)) __munlock_isolated_page(page); else __munlock_isolation_failed(page); + } else { + nr_pages = hpage_nr_pages(page); } - return page_mask; + /* + * Regardless of the original PageMlocked flag, we determine nr_pages + * after touching the flag. This leaves a possible race with a THP page + * split, such that a whole THP page was munlocked, but nr_pages == 1. + * Returning a smaller mask due to that is OK, the worst that can + * happen is subsequent useless scanning of the former tail pages. + * The NR_MLOCK accounting can however become broken. + */ + return nr_pages - 1; } /** @@ -286,10 +298,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked = -nr; + int delta_munlocked; struct pagevec pvec_putback; int pgrescued = 0; + pagevec_init(&pvec_putback, 0); + /* Phase 1: page isolation */ spin_lock_irq(&zone->lru_lock); for (i = 0; i < nr; i++) { @@ -318,18 +332,21 @@ skip_munlock: /* * We won't be munlocking this page in the next phase * but we still need to release the follow_page_mask() - * pin. + * pin. We cannot do it under lru_lock however. If it's + * the last pin, __page_cache_release would deadlock. */ + pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; - put_page(page); - delta_munlocked++; } } + delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(&zone->lru_lock); + /* Now we can release pins of pages that we are not munlocking */ + pagevec_release(&pvec_putback); + /* Phase 2: page munlock */ - pagevec_init(&pvec_putback, 0); for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; @@ -440,7 +457,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, while (start < end) { struct page *page = NULL; - unsigned int page_mask, page_increm; + unsigned int page_mask; + unsigned long page_increm; struct pagevec pvec; struct zone *zone; int zoneid; @@ -490,7 +508,9 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, goto next; } } - page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); + /* It's a bug to munlock in the middle of a THP page */ + VM_BUG_ON((start >> PAGE_SHIFT) & page_mask); + page_increm = 1 + page_mask; start += page_increm * PAGE_SIZE; next: cond_resched(); |