diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 79 |
1 files changed, 54 insertions, 25 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 097c7a4bfbd9..279a818a39b1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -23,6 +23,7 @@ #include <linux/pagemap.h> #include <linux/migrate.h> #include <linux/hashtable.h> +#include <linux/userfaultfd_k.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -716,21 +717,27 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long haddr, pmd_t *pmd, - struct page *page, gfp_t gfp) + unsigned long address, pmd_t *pmd, + struct page *page, gfp_t gfp, + unsigned int flags) { struct mem_cgroup *memcg; pgtable_t pgtable; spinlock_t *ptl; + unsigned long haddr = address & HPAGE_PMD_MASK; VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) - return VM_FAULT_OOM; + if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) { + put_page(page); + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; + } pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) { mem_cgroup_cancel_charge(page, memcg); + put_page(page); return VM_FAULT_OOM; } @@ -750,6 +757,21 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pte_free(mm, pgtable); } else { pmd_t entry; + + /* Deliver the page fault to userland */ + if (userfaultfd_missing(vma)) { + int ret; + + spin_unlock(ptl); + mem_cgroup_cancel_charge(page, memcg); + put_page(page); + pte_free(mm, pgtable); + ret = handle_userfault(vma, address, flags, + VM_UFFD_MISSING); + VM_BUG_ON(ret & VM_FAULT_FALLBACK); + return ret; + } + entry = mk_huge_pmd(page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr); @@ -760,6 +782,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); atomic_long_inc(&mm->nr_ptes); spin_unlock(ptl); + count_vm_event(THP_FAULT_ALLOC); } return 0; @@ -771,19 +794,16 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) } /* Caller must hold page table lock. */ -static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, +static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, struct page *zero_page) { pmd_t entry; - if (!pmd_none(*pmd)) - return false; entry = mk_pmd(zero_page, vma->vm_page_prot); entry = pmd_mkhuge(entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); atomic_long_inc(&mm->nr_ptes); - return true; } int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, @@ -806,6 +826,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, pgtable_t pgtable; struct page *zero_page; bool set; + int ret; pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; @@ -816,14 +837,28 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_FALLBACK; } ptl = pmd_lock(mm, pmd); - set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, - zero_page); - spin_unlock(ptl); + ret = 0; + set = false; + if (pmd_none(*pmd)) { + if (userfaultfd_missing(vma)) { + spin_unlock(ptl); + ret = handle_userfault(vma, address, flags, + VM_UFFD_MISSING); + VM_BUG_ON(ret & VM_FAULT_FALLBACK); + } else { + set_huge_zero_page(pgtable, mm, vma, + haddr, pmd, + zero_page); + spin_unlock(ptl); + set = true; + } + } else + spin_unlock(ptl); if (!set) { pte_free(mm, pgtable); put_huge_zero_page(); } - return 0; + return ret; } gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); @@ -831,14 +866,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } - if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) { - put_page(page); - count_vm_event(THP_FAULT_FALLBACK); - return VM_FAULT_FALLBACK; - } - - count_vm_event(THP_FAULT_ALLOC); - return 0; + return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp, + flags); } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -873,16 +902,14 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, */ if (is_huge_zero_pmd(pmd)) { struct page *zero_page; - bool set; /* * get_huge_zero_page() will never allocate a new page here, * since we already have a zero page to copy. It just takes a * reference. */ zero_page = get_huge_zero_page(); - set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, + set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, zero_page); - BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */ ret = 0; goto out_unlock; } @@ -2133,7 +2160,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, _pte++, address += PAGE_SIZE) { pte_t pteval = *_pte; if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { - if (++none_or_zero <= khugepaged_max_ptes_none) + if (!userfaultfd_armed(vma) && + ++none_or_zero <= khugepaged_max_ptes_none) continue; else goto out; @@ -2586,7 +2614,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, _pte++, _address += PAGE_SIZE) { pte_t pteval = *_pte; if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { - if (++none_or_zero <= khugepaged_max_ptes_none) + if (!userfaultfd_armed(vma) && + ++none_or_zero <= khugepaged_max_ptes_none) continue; else goto out_unmap; |