diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/backing-dev.c | 17 | ||||
| -rw-r--r-- | mm/debug.c | 28 | ||||
| -rw-r--r-- | mm/early_ioremap.c | 2 | ||||
| -rw-r--r-- | mm/frame_vector.c | 14 | ||||
| -rw-r--r-- | mm/gup.c | 64 | ||||
| -rw-r--r-- | mm/huge_memory.c | 36 | ||||
| -rw-r--r-- | mm/hugetlb.c | 12 | ||||
| -rw-r--r-- | mm/kasan/report.c | 8 | ||||
| -rw-r--r-- | mm/kmemcheck.c | 1 | ||||
| -rw-r--r-- | mm/kmemleak.c | 4 | ||||
| -rw-r--r-- | mm/madvise.c | 4 | ||||
| -rw-r--r-- | mm/memcontrol.c | 2 | ||||
| -rw-r--r-- | mm/memory.c | 3 | ||||
| -rw-r--r-- | mm/mmap.c | 18 | ||||
| -rw-r--r-- | mm/mprotect.c | 6 | ||||
| -rw-r--r-- | mm/oom_kill.c | 11 | ||||
| -rw-r--r-- | mm/page-writeback.c | 5 | ||||
| -rw-r--r-- | mm/page_alloc.c | 26 | ||||
| -rw-r--r-- | mm/percpu.c | 4 | ||||
| -rw-r--r-- | mm/shmem.c | 10 | ||||
| -rw-r--r-- | mm/slab.c | 23 | ||||
| -rw-r--r-- | mm/sparse.c | 2 | ||||
| -rw-r--r-- | mm/vmscan.c | 3 | ||||
| -rw-r--r-- | mm/zsmalloc.c | 1 | 
24 files changed, 210 insertions, 94 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 74b52dfd5852..b5f940ce0143 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -113,11 +113,23 @@ static const struct file_operations bdi_debug_stats_fops = {  	.release	= single_release,  }; -static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) +static int bdi_debug_register(struct backing_dev_info *bdi, const char *name)  { +	if (!bdi_debug_root) +		return -ENOMEM; +  	bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); +	if (!bdi->debug_dir) +		return -ENOMEM; +  	bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,  					       bdi, &bdi_debug_stats_fops); +	if (!bdi->debug_stats) { +		debugfs_remove(bdi->debug_dir); +		return -ENOMEM; +	} + +	return 0;  }  static void bdi_debug_unregister(struct backing_dev_info *bdi) @@ -129,9 +141,10 @@ static void bdi_debug_unregister(struct backing_dev_info *bdi)  static inline void bdi_debug_init(void)  {  } -static inline void bdi_debug_register(struct backing_dev_info *bdi, +static inline int bdi_debug_register(struct backing_dev_info *bdi,  				      const char *name)  { +	return 0;  }  static inline void bdi_debug_unregister(struct backing_dev_info *bdi)  { diff --git a/mm/debug.c b/mm/debug.c index d947f3e03b0d..56e2d9125ea5 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -50,7 +50,7 @@ void __dump_page(struct page *page, const char *reason)  	 */  	int mapcount = PageSlab(page) ? 0 : page_mapcount(page); -	pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", +	pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx",  		  page, page_ref_count(page), mapcount,  		  page->mapping, page_to_pgoff(page));  	if (PageCompound(page)) @@ -69,7 +69,7 @@ void __dump_page(struct page *page, const char *reason)  #ifdef CONFIG_MEMCG  	if (page->mem_cgroup) -		pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); +		pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup);  #endif  } @@ -84,10 +84,10 @@ EXPORT_SYMBOL(dump_page);  void dump_vma(const struct vm_area_struct *vma)  { -	pr_emerg("vma %p start %p end %p\n" -		"next %p prev %p mm %p\n" -		"prot %lx anon_vma %p vm_ops %p\n" -		"pgoff %lx file %p private_data %p\n" +	pr_emerg("vma %px start %px end %px\n" +		"next %px prev %px mm %px\n" +		"prot %lx anon_vma %px vm_ops %px\n" +		"pgoff %lx file %px private_data %px\n"  		"flags: %#lx(%pGv)\n",  		vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,  		vma->vm_prev, vma->vm_mm, @@ -100,27 +100,27 @@ EXPORT_SYMBOL(dump_vma);  void dump_mm(const struct mm_struct *mm)  { -	pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n" +	pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n"  #ifdef CONFIG_MMU -		"get_unmapped_area %p\n" +		"get_unmapped_area %px\n"  #endif  		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" -		"pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" +		"pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"  		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"  		"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"  		"start_code %lx end_code %lx start_data %lx end_data %lx\n"  		"start_brk %lx brk %lx start_stack %lx\n"  		"arg_start %lx arg_end %lx env_start %lx env_end %lx\n" -		"binfmt %p flags %lx core_state %p\n" +		"binfmt %px flags %lx core_state %px\n"  #ifdef CONFIG_AIO -		"ioctx_table %p\n" +		"ioctx_table %px\n"  #endif  #ifdef CONFIG_MEMCG -		"owner %p " +		"owner %px "  #endif -		"exe_file %p\n" +		"exe_file %px\n"  #ifdef CONFIG_MMU_NOTIFIER -		"mmu_notifier_mm %p\n" +		"mmu_notifier_mm %px\n"  #endif  #ifdef CONFIG_NUMA_BALANCING  		"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n" diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index d04ac1ec0559..1826f191e72c 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -111,7 +111,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)  	enum fixed_addresses idx;  	int i, slot; -	WARN_ON(system_state != SYSTEM_BOOTING); +	WARN_ON(system_state >= SYSTEM_RUNNING);  	slot = -1;  	for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 2f98df0d460e..c64dca6e27c2 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -53,6 +53,20 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,  		ret = -EFAULT;  		goto out;  	} + +	/* +	 * While get_vaddr_frames() could be used for transient (kernel +	 * controlled lifetime) pinning of memory pages all current +	 * users establish long term (userspace controlled lifetime) +	 * page pinning. Treat get_vaddr_frames() like +	 * get_user_pages_longterm() and disallow it for filesystem-dax +	 * mappings. +	 */ +	if (vma_is_fsdax(vma)) { +		ret = -EOPNOTSUPP; +		goto out; +	} +  	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {  		vec->got_ref = true;  		vec->is_pfns = false; @@ -1095,6 +1095,70 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,  }  EXPORT_SYMBOL(get_user_pages); +#ifdef CONFIG_FS_DAX +/* + * This is the same as get_user_pages() in that it assumes we are + * operating on the current task's mm, but it goes further to validate + * that the vmas associated with the address range are suitable for + * longterm elevated page reference counts. For example, filesystem-dax + * mappings are subject to the lifetime enforced by the filesystem and + * we need guarantees that longterm users like RDMA and V4L2 only + * establish mappings that have a kernel enforced revocation mechanism. + * + * "longterm" == userspace controlled elevated page count lifetime. + * Contrast this to iov_iter_get_pages() usages which are transient. + */ +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, +		unsigned int gup_flags, struct page **pages, +		struct vm_area_struct **vmas_arg) +{ +	struct vm_area_struct **vmas = vmas_arg; +	struct vm_area_struct *vma_prev = NULL; +	long rc, i; + +	if (!pages) +		return -EINVAL; + +	if (!vmas) { +		vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *), +			       GFP_KERNEL); +		if (!vmas) +			return -ENOMEM; +	} + +	rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas); + +	for (i = 0; i < rc; i++) { +		struct vm_area_struct *vma = vmas[i]; + +		if (vma == vma_prev) +			continue; + +		vma_prev = vma; + +		if (vma_is_fsdax(vma)) +			break; +	} + +	/* +	 * Either get_user_pages() failed, or the vma validation +	 * succeeded, in either case we don't need to put_page() before +	 * returning. +	 */ +	if (i >= rc) +		goto out; + +	for (i = 0; i < rc; i++) +		put_page(pages[i]); +	rc = -EOPNOTSUPP; +out: +	if (vmas != vmas_arg) +		kfree(vmas); +	return rc; +} +EXPORT_SYMBOL(get_user_pages_longterm); +#endif /* CONFIG_FS_DAX */ +  /**   * populate_vma_page_range() -  populate a range of pages in the vma.   * @vma:   target vma diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86fe697e8bfb..0e7ded98d114 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -842,20 +842,15 @@ EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);  #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */  static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, -		pmd_t *pmd) +		pmd_t *pmd, int flags)  {  	pmd_t _pmd; -	/* -	 * We should set the dirty bit only for FOLL_WRITE but for now -	 * the dirty bit in the pmd is meaningless.  And if the dirty -	 * bit will become meaningful and we'll only set it with -	 * FOLL_WRITE, an atomic set_bit will be required on the pmd to -	 * set the young bit, instead of the current set_pmd_at. -	 */ -	_pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); +	_pmd = pmd_mkyoung(*pmd); +	if (flags & FOLL_WRITE) +		_pmd = pmd_mkdirty(_pmd);  	if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, -				pmd, _pmd,  1)) +				pmd, _pmd, flags & FOLL_WRITE))  		update_mmu_cache_pmd(vma, addr, pmd);  } @@ -884,7 +879,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,  		return NULL;  	if (flags & FOLL_TOUCH) -		touch_pmd(vma, addr, pmd); +		touch_pmd(vma, addr, pmd, flags);  	/*  	 * device mapped pages can only be returned if the @@ -995,20 +990,15 @@ out:  #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD  static void touch_pud(struct vm_area_struct *vma, unsigned long addr, -		pud_t *pud) +		pud_t *pud, int flags)  {  	pud_t _pud; -	/* -	 * We should set the dirty bit only for FOLL_WRITE but for now -	 * the dirty bit in the pud is meaningless.  And if the dirty -	 * bit will become meaningful and we'll only set it with -	 * FOLL_WRITE, an atomic set_bit will be required on the pud to -	 * set the young bit, instead of the current set_pud_at. -	 */ -	_pud = pud_mkyoung(pud_mkdirty(*pud)); +	_pud = pud_mkyoung(*pud); +	if (flags & FOLL_WRITE) +		_pud = pud_mkdirty(_pud);  	if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, -				pud, _pud,  1)) +				pud, _pud, flags & FOLL_WRITE))  		update_mmu_cache_pud(vma, addr, pud);  } @@ -1031,7 +1021,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,  		return NULL;  	if (flags & FOLL_TOUCH) -		touch_pud(vma, addr, pud); +		touch_pud(vma, addr, pud, flags);  	/*  	 * device mapped pages can only be returned if the @@ -1424,7 +1414,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,  	page = pmd_page(*pmd);  	VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);  	if (flags & FOLL_TOUCH) -		touch_pmd(vma, addr, pmd); +		touch_pmd(vma, addr, pmd, flags);  	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {  		/*  		 * We don't mlock() pte-mapped THPs. This way we can avoid diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 681b300185c0..9a334f5fb730 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3125,6 +3125,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)  	}  } +static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) +{ +	if (addr & ~(huge_page_mask(hstate_vma(vma)))) +		return -EINVAL; +	return 0; +} +  /*   * We cannot handle pagefaults against hugetlb pages at all.  They cause   * handle_mm_fault() to try to instantiate regular-sized pages in the @@ -3141,6 +3148,7 @@ const struct vm_operations_struct hugetlb_vm_ops = {  	.fault = hugetlb_vm_op_fault,  	.open = hugetlb_vm_op_open,  	.close = hugetlb_vm_op_close, +	.split = hugetlb_vm_op_split,  };  static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, @@ -4627,7 +4635,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,  	pte_t *pte = NULL;  	pgd = pgd_offset(mm, addr); -	p4d = p4d_offset(pgd, addr); +	p4d = p4d_alloc(mm, pgd, addr); +	if (!p4d) +		return NULL;  	pud = pud_alloc(mm, p4d, addr);  	if (pud) {  		if (sz == PUD_SIZE) { diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 6bcfb01ba038..410c8235e671 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -134,7 +134,7 @@ static void print_error_description(struct kasan_access_info *info)  	pr_err("BUG: KASAN: %s in %pS\n",  		bug_type, (void *)info->ip); -	pr_err("%s of size %zu at addr %p by task %s/%d\n", +	pr_err("%s of size %zu at addr %px by task %s/%d\n",  		info->is_write ? "Write" : "Read", info->access_size,  		info->access_addr, current->comm, task_pid_nr(current));  } @@ -206,7 +206,7 @@ static void describe_object_addr(struct kmem_cache *cache, void *object,  	const char *rel_type;  	int rel_bytes; -	pr_err("The buggy address belongs to the object at %p\n" +	pr_err("The buggy address belongs to the object at %px\n"  	       " which belongs to the cache %s of size %d\n",  		object, cache->name, cache->object_size); @@ -225,7 +225,7 @@ static void describe_object_addr(struct kmem_cache *cache, void *object,  	}  	pr_err("The buggy address is located %d bytes %s of\n" -	       " %d-byte region [%p, %p)\n", +	       " %d-byte region [%px, %px)\n",  		rel_bytes, rel_type, cache->object_size, (void *)object_addr,  		(void *)(object_addr + cache->object_size));  } @@ -302,7 +302,7 @@ static void print_shadow_for_address(const void *addr)  		char shadow_buf[SHADOW_BYTES_PER_ROW];  		snprintf(buffer, sizeof(buffer), -			(i == 0) ? ">%p: " : " %p: ", kaddr); +			(i == 0) ? ">%px: " : " %px: ", kaddr);  		/*  		 * We should not pass a shadow pointer to generic  		 * function, because generic functions may try to diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c deleted file mode 100644 index cec594032515..000000000000 --- a/mm/kmemcheck.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e4738d5e9b8c..f656ca27f6c2 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -127,7 +127,7 @@  /* GFP bitmask for kmemleak internal allocations */  #define gfp_kmemleak_mask(gfp)	(((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \  				 __GFP_NORETRY | __GFP_NOMEMALLOC | \ -				 __GFP_NOWARN) +				 __GFP_NOWARN | __GFP_NOFAIL)  /* scanning area inside a memory block */  struct kmemleak_scan_area { @@ -1523,6 +1523,8 @@ static void kmemleak_scan(void)  			if (page_count(page) == 0)  				continue;  			scan_block(page, page + 1, NULL); +			if (!(pfn & 63)) +				cond_resched();  		}  	}  	put_online_mems(); diff --git a/mm/madvise.c b/mm/madvise.c index 375cf32087e4..751e97aa2210 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -276,15 +276,14 @@ static long madvise_willneed(struct vm_area_struct *vma,  {  	struct file *file = vma->vm_file; +	*prev = vma;  #ifdef CONFIG_SWAP  	if (!file) { -		*prev = vma;  		force_swapin_readahead(vma, start, end);  		return 0;  	}  	if (shmem_mapping(file->f_mapping)) { -		*prev = vma;  		force_shm_swapin_readahead(vma, start, end,  					file->f_mapping);  		return 0; @@ -299,7 +298,6 @@ static long madvise_willneed(struct vm_area_struct *vma,  		return 0;  	} -	*prev = vma;  	start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;  	if (end > vma->vm_end)  		end = vma->vm_end; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 50e6906314f8..ac2ffd5e02b9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6044,7 +6044,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)  	memcg_check_events(memcg, page);  	if (!mem_cgroup_is_root(memcg)) -		css_put(&memcg->css); +		css_put_many(&memcg->css, nr_entries);  }  /** diff --git a/mm/memory.c b/mm/memory.c index 85e7a87da79f..ca5674cbaff2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3831,7 +3831,8 @@ static inline int create_huge_pmd(struct vm_fault *vmf)  	return VM_FAULT_FALLBACK;  } -static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) +/* `inline' is required to avoid gcc 4.1.2 build error */ +static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)  {  	if (vma_is_anonymous(vmf->vma))  		return do_huge_pmd_wp_page(vmf, orig_pmd); diff --git a/mm/mmap.c b/mm/mmap.c index 924839fac0e6..9efdc021ad22 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2555,9 +2555,11 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,  	struct vm_area_struct *new;  	int err; -	if (is_vm_hugetlb_page(vma) && (addr & -					~(huge_page_mask(hstate_vma(vma))))) -		return -EINVAL; +	if (vma->vm_ops && vma->vm_ops->split) { +		err = vma->vm_ops->split(vma, addr); +		if (err) +			return err; +	}  	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);  	if (!new) @@ -3017,20 +3019,20 @@ void exit_mmap(struct mm_struct *mm)  	/* Use -1 here to ensure all VMAs in the mm are unmapped */  	unmap_vmas(&tlb, vma, 0, -1); -	set_bit(MMF_OOM_SKIP, &mm->flags); -	if (unlikely(tsk_is_oom_victim(current))) { +	if (unlikely(mm_is_oom_victim(mm))) {  		/*  		 * Wait for oom_reap_task() to stop working on this  		 * mm. Because MMF_OOM_SKIP is already set before  		 * calling down_read(), oom_reap_task() will not run  		 * on this "mm" post up_write().  		 * -		 * tsk_is_oom_victim() cannot be set from under us -		 * either because current->mm is already set to NULL +		 * mm_is_oom_victim() cannot be set from under us +		 * either because victim->mm is already set to NULL  		 * under task_lock before calling mmput and oom_mm is -		 * set not NULL by the OOM killer only if current->mm +		 * set not NULL by the OOM killer only if victim->mm  		 * is found not NULL while holding the task_lock.  		 */ +		set_bit(MMF_OOM_SKIP, &mm->flags);  		down_write(&mm->mmap_sem);  		up_write(&mm->mmap_sem);  	} diff --git a/mm/mprotect.c b/mm/mprotect.c index ec39f730a0bf..58b629bb70de 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -166,7 +166,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,  		next = pmd_addr_end(addr, end);  		if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)  				&& pmd_none_or_clear_bad(pmd)) -			continue; +			goto next;  		/* invoke the mmu notifier if the pmd is populated */  		if (!mni_start) { @@ -188,7 +188,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,  					}  					/* huge pmd was handled */ -					continue; +					goto next;  				}  			}  			/* fall through, the trans huge pmd just split */ @@ -196,6 +196,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,  		this_pages = change_pte_range(vma, pmd, addr, next, newprot,  				 dirty_accountable, prot_numa);  		pages += this_pages; +next: +		cond_resched();  	} while (pmd++, addr = next, addr != end);  	if (mni_start) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c86fbd1b590e..29f855551efe 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -550,7 +550,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)  	 */  	set_bit(MMF_UNSTABLE, &mm->flags); -	tlb_gather_mmu(&tlb, mm, 0, -1);  	for (vma = mm->mmap ; vma; vma = vma->vm_next) {  		if (!can_madv_dontneed_vma(vma))  			continue; @@ -565,11 +564,13 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)  		 * we do not want to block exit_mmap by keeping mm ref  		 * count elevated without a good reason.  		 */ -		if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) +		if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { +			tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);  			unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,  					 NULL); +			tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end); +		}  	} -	tlb_finish_mmu(&tlb, 0, -1);  	pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",  			task_pid_nr(tsk), tsk->comm,  			K(get_mm_counter(mm, MM_ANONPAGES)), @@ -682,8 +683,10 @@ static void mark_oom_victim(struct task_struct *tsk)  		return;  	/* oom_mm is bound to the signal struct life time. */ -	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) +	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {  		mmgrab(tsk->signal->oom_mm); +		set_bit(MMF_OOM_VICTIM, &mm->flags); +	}  	/*  	 * Make sure that the task is woken up from uninterruptible sleep diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e7095030aa1f..586f31261c83 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -433,11 +433,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)  	else  		bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; -	if (unlikely(bg_thresh >= thresh)) { -		pr_warn("vm direct limit must be set greater than background limit.\n"); +	if (bg_thresh >= thresh)  		bg_thresh = thresh / 2; -	} -  	tsk = current;  	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {  		bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d4096f4a5c1f..76c9688b6a0a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2507,10 +2507,6 @@ void drain_all_pages(struct zone *zone)  	if (WARN_ON_ONCE(!mm_percpu_wq))  		return; -	/* Workqueues cannot recurse */ -	if (current->flags & PF_WQ_WORKER) -		return; -  	/*  	 * Do not drain if one is already in progress unless it's specific to  	 * a zone. Such callers are primarily CMA and memory hotplug and need @@ -2688,6 +2684,7 @@ void free_unref_page_list(struct list_head *list)  {  	struct page *page, *next;  	unsigned long flags, pfn; +	int batch_count = 0;  	/* Prepare pages for freeing */  	list_for_each_entry_safe(page, next, list, lru) { @@ -2704,6 +2701,16 @@ void free_unref_page_list(struct list_head *list)  		set_page_private(page, 0);  		trace_mm_page_free_batched(page);  		free_unref_page_commit(page, pfn); + +		/* +		 * Guard against excessive IRQ disabled times when we get +		 * a large list of pages to free. +		 */ +		if (++batch_count == SWAP_CLUSTER_MAX) { +			local_irq_restore(flags); +			batch_count = 0; +			local_irq_save(flags); +		}  	}  	local_irq_restore(flags);  } @@ -6253,6 +6260,8 @@ void __paginginit zero_resv_unavail(void)  	pgcnt = 0;  	for_each_resv_unavail_range(i, &start, &end) {  		for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) { +			if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) +				continue;  			mm_zero_struct_page(pfn_to_page(pfn));  			pgcnt++;  		} @@ -7656,11 +7665,18 @@ int alloc_contig_range(unsigned long start, unsigned long end,  	/*  	 * In case of -EBUSY, we'd like to know which page causes problem. -	 * So, just fall through. We will check it in test_pages_isolated(). +	 * So, just fall through. test_pages_isolated() has a tracepoint +	 * which will report the busy page. +	 * +	 * It is possible that busy pages could become available before +	 * the call to test_pages_isolated, and the range will actually be +	 * allocated.  So, if we fall through be sure to clear ret so that +	 * -EBUSY is not accidentally used or returned to caller.  	 */  	ret = __alloc_contig_migrate_range(&cc, start, end);  	if (ret && ret != -EBUSY)  		goto done; +	ret =0;  	/*  	 * Pages from [start, end) are within a MAX_ORDER_NR_PAGES diff --git a/mm/percpu.c b/mm/percpu.c index 79e3549cab0f..50e7fdf84055 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2719,7 +2719,11 @@ void __init setup_per_cpu_areas(void)  	if (pcpu_setup_first_chunk(ai, fc) < 0)  		panic("Failed to initialize percpu areas."); +#ifdef CONFIG_CRIS +#warning "the CRIS architecture has physical and virtual addresses confused" +#else  	pcpu_free_alloc_info(ai); +#endif  }  #endif	/* CONFIG_SMP */ diff --git a/mm/shmem.c b/mm/shmem.c index 4aa9307feab0..7fbe67be86fa 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3776,7 +3776,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)  	 * tmpfs instance, limiting inodes to one per page of lowmem;  	 * but the internal instance is left unlimited.  	 */ -	if (!(sb->s_flags & MS_KERNMOUNT)) { +	if (!(sb->s_flags & SB_KERNMOUNT)) {  		sbinfo->max_blocks = shmem_default_max_blocks();  		sbinfo->max_inodes = shmem_default_max_inodes();  		if (shmem_parse_options(data, sbinfo, false)) { @@ -3784,12 +3784,12 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)  			goto failed;  		}  	} else { -		sb->s_flags |= MS_NOUSER; +		sb->s_flags |= SB_NOUSER;  	}  	sb->s_export_op = &shmem_export_ops; -	sb->s_flags |= MS_NOSEC; +	sb->s_flags |= SB_NOSEC;  #else -	sb->s_flags |= MS_NOUSER; +	sb->s_flags |= SB_NOUSER;  #endif  	spin_lock_init(&sbinfo->stat_lock); @@ -3809,7 +3809,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)  	sb->s_xattr = shmem_xattr_handlers;  #endif  #ifdef CONFIG_TMPFS_POSIX_ACL -	sb->s_flags |= MS_POSIXACL; +	sb->s_flags |= SB_POSIXACL;  #endif  	uuid_gen(&sb->s_uuid); diff --git a/mm/slab.c b/mm/slab.c index 183e996dde5f..4e51ef954026 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1584,11 +1584,8 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)  		       *dbg_redzone2(cachep, objp));  	} -	if (cachep->flags & SLAB_STORE_USER) { -		pr_err("Last user: [<%p>](%pSR)\n", -		       *dbg_userword(cachep, objp), -		       *dbg_userword(cachep, objp)); -	} +	if (cachep->flags & SLAB_STORE_USER) +		pr_err("Last user: (%pSR)\n", *dbg_userword(cachep, objp));  	realobj = (char *)objp + obj_offset(cachep);  	size = cachep->object_size;  	for (i = 0; i < size && lines; i += 16, lines--) { @@ -1621,7 +1618,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)  			/* Mismatch ! */  			/* Print header */  			if (lines == 0) { -				pr_err("Slab corruption (%s): %s start=%p, len=%d\n", +				pr_err("Slab corruption (%s): %s start=%px, len=%d\n",  				       print_tainted(), cachep->name,  				       realobj, size);  				print_objinfo(cachep, objp, 0); @@ -1650,13 +1647,13 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)  		if (objnr) {  			objp = index_to_obj(cachep, page, objnr - 1);  			realobj = (char *)objp + obj_offset(cachep); -			pr_err("Prev obj: start=%p, len=%d\n", realobj, size); +			pr_err("Prev obj: start=%px, len=%d\n", realobj, size);  			print_objinfo(cachep, objp, 2);  		}  		if (objnr + 1 < cachep->num) {  			objp = index_to_obj(cachep, page, objnr + 1);  			realobj = (char *)objp + obj_offset(cachep); -			pr_err("Next obj: start=%p, len=%d\n", realobj, size); +			pr_err("Next obj: start=%px, len=%d\n", realobj, size);  			print_objinfo(cachep, objp, 2);  		}  	} @@ -2608,7 +2605,7 @@ static void slab_put_obj(struct kmem_cache *cachep,  	/* Verify double free bug */  	for (i = page->active; i < cachep->num; i++) {  		if (get_free_obj(page, i) == objnr) { -			pr_err("slab: double free detected in cache '%s', objp %p\n", +			pr_err("slab: double free detected in cache '%s', objp %px\n",  			       cachep->name, objp);  			BUG();  		} @@ -2772,7 +2769,7 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)  	else  		slab_error(cache, "memory outside object was overwritten"); -	pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n", +	pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",  	       obj, redzone1, redzone2);  } @@ -3078,7 +3075,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,  		if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||  				*dbg_redzone2(cachep, objp) != RED_INACTIVE) {  			slab_error(cachep, "double free, or memory outside object was overwritten"); -			pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n", +			pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",  			       objp, *dbg_redzone1(cachep, objp),  			       *dbg_redzone2(cachep, objp));  		} @@ -3091,7 +3088,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,  		cachep->ctor(objp);  	if (ARCH_SLAB_MINALIGN &&  	    ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { -		pr_err("0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", +		pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n",  		       objp, (int)ARCH_SLAB_MINALIGN);  	}  	return objp; @@ -4283,7 +4280,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)  		return;  	}  #endif -	seq_printf(m, "%p", (void *)address); +	seq_printf(m, "%px", (void *)address);  }  static int leaks_show(struct seq_file *m, void *p) diff --git a/mm/sparse.c b/mm/sparse.c index 7a5dacaa06e3..2609aba121e8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -211,7 +211,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)  	if (unlikely(!mem_section)) {  		unsigned long size, align; -		size = sizeof(struct mem_section) * NR_SECTION_ROOTS; +		size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;  		align = 1 << (INTERNODE_CACHE_SHIFT);  		mem_section = memblock_virt_alloc(size, align);  	} diff --git a/mm/vmscan.c b/mm/vmscan.c index c02c850ea349..47d5ced51f2d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -297,10 +297,13 @@ EXPORT_SYMBOL(register_shrinker);   */  void unregister_shrinker(struct shrinker *shrinker)  { +	if (!shrinker->nr_deferred) +		return;  	down_write(&shrinker_rwsem);  	list_del(&shrinker->list);  	up_write(&shrinker_rwsem);  	kfree(shrinker->nr_deferred); +	shrinker->nr_deferred = NULL;  }  EXPORT_SYMBOL(unregister_shrinker); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 685049a9048d..683c0651098c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -53,6 +53,7 @@  #include <linux/mount.h>  #include <linux/migrate.h>  #include <linux/pagemap.h> +#include <linux/fs.h>  #define ZSPAGE_MAGIC	0x58  | 

