diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 153 |
1 files changed, 74 insertions, 79 deletions
diff --git a/mm/memory.c b/mm/memory.c index 19f47d7b9b86..c467102a5cbc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -238,23 +238,13 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, __tlb_reset_range(tlb); } -static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) +static void tlb_flush_mmu_free(struct mmu_gather *tlb) { - if (!tlb->end) - return; + struct mmu_gather_batch *batch; - tlb_flush(tlb); - mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb_table_flush(tlb); #endif - __tlb_reset_range(tlb); -} - -static void tlb_flush_mmu_free(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { free_pages_and_swap_cache(batch->pages, batch->nr); batch->nr = 0; @@ -326,20 +316,31 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ #ifdef CONFIG_HAVE_RCU_TABLE_FREE -static void tlb_remove_table_smp_sync(void *arg) +/* + * See the comment near struct mmu_table_batch. + */ + +/* + * If we want tlb_remove_table() to imply TLB invalidates. + */ +static inline void tlb_table_invalidate(struct mmu_gather *tlb) { - struct mm_struct __maybe_unused *mm = arg; +#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE /* - * On most architectures this does nothing. Simply delivering the - * interrupt is enough to prevent races with software page table - * walking like that done in get_user_pages_fast. - * - * See the comment near struct mmu_table_batch. + * Invalidate page-table caches used by hardware walkers. Then we still + * need to RCU-sched wait while freeing the pages because software + * walkers can still be in-flight. */ - tlb_flush_remove_tables_local(mm); + tlb_flush_mmu_tlbonly(tlb); +#endif } -static void tlb_remove_table_one(void *table, struct mmu_gather *tlb) +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) { /* * This isn't an RCU grace period and hence the page-tables cannot be @@ -348,7 +349,7 @@ static void tlb_remove_table_one(void *table, struct mmu_gather *tlb) * It is however sufficient for software page-table walkers that rely on * IRQ disabling. See the comment near struct mmu_table_batch. */ - smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1); + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); __tlb_remove_table(table); } @@ -369,9 +370,8 @@ void tlb_table_flush(struct mmu_gather *tlb) { struct mmu_table_batch **batch = &tlb->batch; - tlb_flush_remove_tables(tlb->mm); - if (*batch) { + tlb_table_invalidate(tlb); call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } @@ -381,23 +381,16 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; - /* - * When there's less then two users of this mm there cannot be a - * concurrent page-table walk. - */ - if (atomic_read(&tlb->mm->mm_users) < 2) { - __tlb_remove_table(table); - return; - } - if (*batch == NULL) { *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { - tlb_remove_table_one(table, tlb); + tlb_table_invalidate(tlb); + tlb_remove_table_one(table); return; } (*batch)->nr = 0; } + (*batch)->tables[(*batch)->nr++] = table; if ((*batch)->nr == MAX_TABLE_BATCH) tlb_table_flush(tlb); @@ -1029,7 +1022,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, * If it's a COW mapping, write protect it both * in the parent and the child */ - if (is_cow_mapping(vm_flags)) { + if (is_cow_mapping(vm_flags) && pte_write(pte)) { ptep_set_wrprotect(src_mm, addr, src_pte); pte = pte_wrprotect(pte); } @@ -2384,9 +2377,9 @@ static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) * * We do this without the lock held, so that it can sleep if it needs to. */ -static int do_page_mkwrite(struct vm_fault *vmf) +static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) { - int ret; + vm_fault_t ret; struct page *page = vmf->page; unsigned int old_flags = vmf->flags; @@ -2490,7 +2483,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf) * held to the old page, as well as updating the rmap. * - In any case, unlock the PTL and drop the reference we took to the old page. */ -static int wp_page_copy(struct vm_fault *vmf) +static vm_fault_t wp_page_copy(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; @@ -2638,7 +2631,7 @@ oom: * The function expects the page to be locked or other protection against * concurrent faults / writeback (such as DAX radix tree locks). */ -int finish_mkwrite_fault(struct vm_fault *vmf) +vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) { WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, @@ -2659,12 +2652,12 @@ int finish_mkwrite_fault(struct vm_fault *vmf) * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * mapping */ -static int wp_pfn_shared(struct vm_fault *vmf) +static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { - int ret; + vm_fault_t ret; pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; @@ -2677,7 +2670,7 @@ static int wp_pfn_shared(struct vm_fault *vmf) return VM_FAULT_WRITE; } -static int wp_page_shared(struct vm_fault *vmf) +static vm_fault_t wp_page_shared(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; @@ -2685,7 +2678,7 @@ static int wp_page_shared(struct vm_fault *vmf) get_page(vmf->page); if (vma->vm_ops && vma->vm_ops->page_mkwrite) { - int tmp; + vm_fault_t tmp; pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); @@ -2728,7 +2721,7 @@ static int wp_page_shared(struct vm_fault *vmf) * but allow concurrent faults), with pte both mapped and locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_wp_page(struct vm_fault *vmf) +static vm_fault_t do_wp_page(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; @@ -2904,7 +2897,7 @@ EXPORT_SYMBOL(unmap_mapping_range); * We return with the mmap_sem locked or unlocked in the same cases * as does filemap_fault(). */ -int do_swap_page(struct vm_fault *vmf) +vm_fault_t do_swap_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL, *swapcache; @@ -2913,7 +2906,7 @@ int do_swap_page(struct vm_fault *vmf) pte_t pte; int locked; int exclusive = 0; - int ret = 0; + vm_fault_t ret = 0; if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) goto out; @@ -3124,12 +3117,12 @@ out_release: * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_anonymous_page(struct vm_fault *vmf) +static vm_fault_t do_anonymous_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mem_cgroup *memcg; struct page *page; - int ret = 0; + vm_fault_t ret = 0; pte_t entry; /* File mapping without ->vm_ops ? */ @@ -3239,10 +3232,10 @@ oom: * released depending on flags and vma->vm_ops->fault() return value. * See filemap_fault() and __lock_page_retry(). */ -static int __do_fault(struct vm_fault *vmf) +static vm_fault_t __do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret; + vm_fault_t ret; ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | @@ -3276,7 +3269,7 @@ static int pmd_devmap_trans_unstable(pmd_t *pmd) return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); } -static int pte_alloc_one_map(struct vm_fault *vmf) +static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -3352,13 +3345,14 @@ static void deposit_prealloc_pte(struct vm_fault *vmf) vmf->prealloc_pte = NULL; } -static int do_set_pmd(struct vm_fault *vmf, struct page *page) +static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; pmd_t entry; - int i, ret; + int i; + vm_fault_t ret; if (!transhuge_vma_suitable(vma, haddr)) return VM_FAULT_FALLBACK; @@ -3408,7 +3402,7 @@ out: return ret; } #else -static int do_set_pmd(struct vm_fault *vmf, struct page *page) +static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { BUILD_BUG(); return 0; @@ -3429,13 +3423,13 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) * Target users are page handler itself and implementations of * vm_ops->map_pages. */ -int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, +vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; pte_t entry; - int ret; + vm_fault_t ret; if (pmd_none(*vmf->pmd) && PageTransCompound(page) && IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { @@ -3494,10 +3488,10 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, * The function expects the page to be locked and on success it consumes a * reference of a page being mapped (for the PTE which maps it). */ -int finish_fault(struct vm_fault *vmf) +vm_fault_t finish_fault(struct vm_fault *vmf) { struct page *page; - int ret = 0; + vm_fault_t ret = 0; /* Did we COW the page? */ if ((vmf->flags & FAULT_FLAG_WRITE) && @@ -3583,12 +3577,13 @@ late_initcall(fault_around_debugfs); * (and therefore to page order). This way it's easier to guarantee * that we don't cross page table boundaries. */ -static int do_fault_around(struct vm_fault *vmf) +static vm_fault_t do_fault_around(struct vm_fault *vmf) { unsigned long address = vmf->address, nr_pages, mask; pgoff_t start_pgoff = vmf->pgoff; pgoff_t end_pgoff; - int off, ret = 0; + int off; + vm_fault_t ret = 0; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; @@ -3638,10 +3633,10 @@ out: return ret; } -static int do_read_fault(struct vm_fault *vmf) +static vm_fault_t do_read_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret = 0; + vm_fault_t ret = 0; /* * Let's call ->map_pages() first and use ->fault() as fallback @@ -3665,10 +3660,10 @@ static int do_read_fault(struct vm_fault *vmf) return ret; } -static int do_cow_fault(struct vm_fault *vmf) +static vm_fault_t do_cow_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret; + vm_fault_t ret; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; @@ -3704,10 +3699,10 @@ uncharge_out: return ret; } -static int do_shared_fault(struct vm_fault *vmf) +static vm_fault_t do_shared_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret, tmp; + vm_fault_t ret, tmp; ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) @@ -3745,10 +3740,10 @@ static int do_shared_fault(struct vm_fault *vmf) * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -static int do_fault(struct vm_fault *vmf) +static vm_fault_t do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - int ret; + vm_fault_t ret; /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ if (!vma->vm_ops->fault) @@ -3783,7 +3778,7 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, return mpol_misplaced(page, vma, addr); } -static int do_numa_page(struct vm_fault *vmf) +static vm_fault_t do_numa_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL; @@ -3873,7 +3868,7 @@ out: return 0; } -static inline int create_huge_pmd(struct vm_fault *vmf) +static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) { if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_anonymous_page(vmf); @@ -3883,7 +3878,7 @@ static inline int create_huge_pmd(struct vm_fault *vmf) } /* `inline' is required to avoid gcc 4.1.2 build error */ -static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) +static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) { if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_wp_page(vmf, orig_pmd); @@ -3902,7 +3897,7 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); } -static int create_huge_pud(struct vm_fault *vmf) +static vm_fault_t create_huge_pud(struct vm_fault *vmf) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* No support for anonymous transparent PUD pages yet */ @@ -3914,7 +3909,7 @@ static int create_huge_pud(struct vm_fault *vmf) return VM_FAULT_FALLBACK; } -static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) +static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* No support for anonymous transparent PUD pages yet */ @@ -3941,7 +3936,7 @@ static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) * The mmap_sem may have been released depending on flags and our return value. * See filemap_fault() and __lock_page_or_retry(). */ -static int handle_pte_fault(struct vm_fault *vmf) +static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { pte_t entry; @@ -4029,8 +4024,8 @@ unlock: * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, - unsigned int flags) +static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, + unsigned long address, unsigned int flags) { struct vm_fault vmf = { .vma = vma, @@ -4043,7 +4038,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; p4d_t *p4d; - int ret; + vm_fault_t ret; pgd = pgd_offset(mm, address); p4d = p4d_alloc(mm, pgd, address); @@ -4118,10 +4113,10 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, +vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { - int ret; + vm_fault_t ret; __set_current_state(TASK_RUNNING); |