summaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c215
1 files changed, 133 insertions, 82 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 2ce04a649f6b..76d1ec29149b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -42,6 +42,7 @@
#include <linux/memory.h>
#include <linux/printk.h>
#include <linux/userfaultfd_k.h>
+#include <linux/moduleparam.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -58,6 +59,20 @@
#define arch_rebalance_pgtables(addr, len) (addr)
#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
+const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
+int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
+const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
+int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
+#endif
+
+static bool ignore_rlimit_data = true;
+core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
+
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end);
@@ -375,8 +390,9 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
}
#ifdef CONFIG_DEBUG_VM_RB
-static int browse_rb(struct rb_root *root)
+static int browse_rb(struct mm_struct *mm)
{
+ struct rb_root *root = &mm->mm_rb;
int i = 0, j, bug = 0;
struct rb_node *nd, *pn = NULL;
unsigned long prev = 0, pend = 0;
@@ -399,12 +415,14 @@ static int browse_rb(struct rb_root *root)
vma->vm_start, vma->vm_end);
bug = 1;
}
+ spin_lock(&mm->page_table_lock);
if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
pr_emerg("free gap %lx, correct %lx\n",
vma->rb_subtree_gap,
vma_compute_subtree_gap(vma));
bug = 1;
}
+ spin_unlock(&mm->page_table_lock);
i++;
pn = nd;
prev = vma->vm_start;
@@ -441,12 +459,16 @@ static void validate_mm(struct mm_struct *mm)
struct vm_area_struct *vma = mm->mmap;
while (vma) {
+ struct anon_vma *anon_vma = vma->anon_vma;
struct anon_vma_chain *avc;
- vma_lock_anon_vma(vma);
- list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
- anon_vma_interval_tree_verify(avc);
- vma_unlock_anon_vma(vma);
+ if (anon_vma) {
+ anon_vma_lock_read(anon_vma);
+ list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+ anon_vma_interval_tree_verify(avc);
+ anon_vma_unlock_read(anon_vma);
+ }
+
highest_address = vma->vm_end;
vma = vma->vm_next;
i++;
@@ -460,7 +482,7 @@ static void validate_mm(struct mm_struct *mm)
mm->highest_vm_end, highest_address);
bug = 1;
}
- i = browse_rb(&mm->mm_rb);
+ i = browse_rb(mm);
if (i != mm->map_count) {
if (i != -1)
pr_emerg("map_count %d rb %d\n", mm->map_count, i);
@@ -1208,24 +1230,6 @@ none:
return NULL;
}
-#ifdef CONFIG_PROC_FS
-void vm_stat_account(struct mm_struct *mm, unsigned long flags,
- struct file *file, long pages)
-{
- const unsigned long stack_flags
- = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
-
- mm->total_vm += pages;
-
- if (file) {
- mm->shared_vm += pages;
- if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
- mm->exec_vm += pages;
- } else if (flags & stack_flags)
- mm->stack_vm += pages;
-}
-#endif /* CONFIG_PROC_FS */
-
/*
* If a hint addr is less than mmap_min_addr change hint to be as
* low as possible but still greater than mmap_min_addr
@@ -1544,19 +1548,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long charged = 0;
/* Check against address space limit. */
- if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
+ if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
unsigned long nr_pages;
/*
* MAP_FIXED may remove pages of mappings that intersects with
* requested mapping. Account for the pages it would unmap.
*/
- if (!(vm_flags & MAP_FIXED))
- return -ENOMEM;
-
nr_pages = count_vma_pages_range(mm, addr, addr + len);
- if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages))
+ if (!may_expand_vm(mm, vm_flags,
+ (len >> PAGE_SHIFT) - nr_pages))
return -ENOMEM;
}
@@ -1655,7 +1657,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
out:
perf_event_mmap(vma);
- vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
+ vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(current->mm)))
@@ -2102,7 +2104,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
unsigned long new_start, actual_size;
/* address space limit tests */
- if (!may_expand_vm(mm, grow))
+ if (!may_expand_vm(mm, vma->vm_flags, grow))
return -ENOMEM;
/* Stack limit test */
@@ -2147,32 +2149,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
- int error;
+ int error = 0;
if (!(vma->vm_flags & VM_GROWSUP))
return -EFAULT;
- /*
- * We must make sure the anon_vma is allocated
- * so that the anon_vma locking is not a noop.
- */
+ /* Guard against wrapping around to address 0. */
+ if (address < PAGE_ALIGN(address+4))
+ address = PAGE_ALIGN(address+4);
+ else
+ return -ENOMEM;
+
+ /* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
return -ENOMEM;
- vma_lock_anon_vma(vma);
/*
* vma->vm_start/vm_end cannot change under us because the caller
* is required to hold the mmap_sem in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
- * Also guard against wrapping around to address 0.
*/
- if (address < PAGE_ALIGN(address+4))
- address = PAGE_ALIGN(address+4);
- else {
- vma_unlock_anon_vma(vma);
- return -ENOMEM;
- }
- error = 0;
+ anon_vma_lock_write(vma->anon_vma);
/* Somebody else might have raced and expanded it already */
if (address > vma->vm_end) {
@@ -2190,7 +2187,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
* updates, but we only hold a shared mmap_sem
* lock here, so we need to protect against
* concurrent vma expansions.
- * vma_lock_anon_vma() doesn't help here, as
+ * anon_vma_lock_write() doesn't help here, as
* we don't guarantee that all growable vmas
* in a mm share the same root anon vma.
* So, we reuse mm->page_table_lock to guard
@@ -2199,8 +2196,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
spin_lock(&mm->page_table_lock);
if (vma->vm_flags & VM_LOCKED)
mm->locked_vm += grow;
- vm_stat_account(mm, vma->vm_flags,
- vma->vm_file, grow);
+ vm_stat_account(mm, vma->vm_flags, grow);
anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_end = address;
anon_vma_interval_tree_post_update_vma(vma);
@@ -2214,7 +2210,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
}
}
}
- vma_unlock_anon_vma(vma);
+ anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(mm);
return error;
@@ -2230,25 +2226,21 @@ int expand_downwards(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
int error;
- /*
- * We must make sure the anon_vma is allocated
- * so that the anon_vma locking is not a noop.
- */
- if (unlikely(anon_vma_prepare(vma)))
- return -ENOMEM;
-
address &= PAGE_MASK;
error = security_mmap_addr(address);
if (error)
return error;
- vma_lock_anon_vma(vma);
+ /* We must make sure the anon_vma is allocated. */
+ if (unlikely(anon_vma_prepare(vma)))
+ return -ENOMEM;
/*
* vma->vm_start/vm_end cannot change under us because the caller
* is required to hold the mmap_sem in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
+ anon_vma_lock_write(vma->anon_vma);
/* Somebody else might have raced and expanded it already */
if (address < vma->vm_start) {
@@ -2266,7 +2258,7 @@ int expand_downwards(struct vm_area_struct *vma,
* updates, but we only hold a shared mmap_sem
* lock here, so we need to protect against
* concurrent vma expansions.
- * vma_lock_anon_vma() doesn't help here, as
+ * anon_vma_lock_write() doesn't help here, as
* we don't guarantee that all growable vmas
* in a mm share the same root anon vma.
* So, we reuse mm->page_table_lock to guard
@@ -2275,8 +2267,7 @@ int expand_downwards(struct vm_area_struct *vma,
spin_lock(&mm->page_table_lock);
if (vma->vm_flags & VM_LOCKED)
mm->locked_vm += grow;
- vm_stat_account(mm, vma->vm_flags,
- vma->vm_file, grow);
+ vm_stat_account(mm, vma->vm_flags, grow);
anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_start = address;
vma->vm_pgoff -= grow;
@@ -2288,7 +2279,7 @@ int expand_downwards(struct vm_area_struct *vma,
}
}
}
- vma_unlock_anon_vma(vma);
+ anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(mm);
return error;
@@ -2390,7 +2381,7 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += nrpages;
- vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
+ vm_stat_account(mm, vma->vm_flags, -nrpages);
vma = remove_vma(vma);
} while (vma);
vm_unacct_memory(nr_accounted);
@@ -2673,12 +2664,29 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
if (!vma || !(vma->vm_flags & VM_SHARED))
goto out;
- if (start < vma->vm_start || start + size > vma->vm_end)
+ if (start < vma->vm_start)
goto out;
- if (pgoff == linear_page_index(vma, start)) {
- ret = 0;
- goto out;
+ if (start + size > vma->vm_end) {
+ struct vm_area_struct *next;
+
+ for (next = vma->vm_next; next; next = next->vm_next) {
+ /* hole between vmas ? */
+ if (next->vm_start != next->vm_prev->vm_end)
+ goto out;
+
+ if (next->vm_file != vma->vm_file)
+ goto out;
+
+ if (next->vm_flags != vma->vm_flags)
+ goto out;
+
+ if (start + size <= next->vm_end)
+ break;
+ }
+
+ if (!next)
+ goto out;
}
prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
@@ -2688,9 +2696,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
flags &= MAP_NONBLOCK;
flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
if (vma->vm_flags & VM_LOCKED) {
+ struct vm_area_struct *tmp;
flags |= MAP_LOCKED;
+
/* drop PG_Mlocked flag for over-mapped range */
- munlock_vma_pages_range(vma, start, start + size);
+ for (tmp = vma; tmp->vm_start >= start + size;
+ tmp = tmp->vm_next) {
+ munlock_vma_pages_range(tmp,
+ max(tmp->vm_start, start),
+ min(tmp->vm_end, start + size));
+ }
}
file = get_file(vma->vm_file);
@@ -2760,7 +2775,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
}
/* Check against address space limits *after* clearing old maps... */
- if (!may_expand_vm(mm, len >> PAGE_SHIFT))
+ if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
return -ENOMEM;
if (mm->map_count > sysctl_max_map_count)
@@ -2795,6 +2810,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
out:
perf_event_mmap(vma);
mm->total_vm += len >> PAGE_SHIFT;
+ mm->data_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
vma->vm_flags |= VM_SOFTDIRTY;
@@ -2986,16 +3002,36 @@ out:
* Return true if the calling process may expand its vm space by the passed
* number of pages
*/
-int may_expand_vm(struct mm_struct *mm, unsigned long npages)
-{
- unsigned long cur = mm->total_vm; /* pages */
- unsigned long lim;
+bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
+{
+ if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
+ return false;
+
+ if (is_data_mapping(flags) &&
+ mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
+ if (ignore_rlimit_data)
+ pr_warn_once("%s (%d): VmData %lu exceed data ulimit "
+ "%lu. Will be forbidden soon.\n",
+ current->comm, current->pid,
+ (mm->data_vm + npages) << PAGE_SHIFT,
+ rlimit(RLIMIT_DATA));
+ else
+ return false;
+ }
- lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
+ return true;
+}
- if (cur + npages > lim)
- return 0;
- return 1;
+void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
+{
+ mm->total_vm += npages;
+
+ if (is_exec_mapping(flags))
+ mm->exec_vm += npages;
+ else if (is_stack_mapping(flags))
+ mm->stack_vm += npages;
+ else if (is_data_mapping(flags))
+ mm->data_vm += npages;
}
static int special_mapping_fault(struct vm_area_struct *vma,
@@ -3077,7 +3113,7 @@ static struct vm_area_struct *__install_special_mapping(
if (ret)
goto out;
- mm->total_vm += len >> PAGE_SHIFT;
+ vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
perf_event_mmap(vma);
@@ -3181,10 +3217,16 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
* mapping->flags avoid to take the same lock twice, if more than one
* vma in this mm is backed by the same anon_vma or address_space.
*
- * We can take all the locks in random order because the VM code
- * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never
- * takes more than one of them in a row. Secondly we're protected
- * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
+ * We take locks in following order, accordingly to comment at beginning
+ * of mm/rmap.c:
+ * - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for
+ * hugetlb mapping);
+ * - all i_mmap_rwsem locks;
+ * - all anon_vma->rwseml
+ *
+ * We can take all locks within these types randomly because the VM code
+ * doesn't nest them and we protected from parallel mm_take_all_locks() by
+ * mm_all_locks_mutex.
*
* mm_take_all_locks() and mm_drop_all_locks are expensive operations
* that may have to take thousand of locks.
@@ -3203,7 +3245,16 @@ int mm_take_all_locks(struct mm_struct *mm)
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (signal_pending(current))
goto out_unlock;
- if (vma->vm_file && vma->vm_file->f_mapping)
+ if (vma->vm_file && vma->vm_file->f_mapping &&
+ is_vm_hugetlb_page(vma))
+ vm_lock_mapping(mm, vma->vm_file->f_mapping);
+ }
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (signal_pending(current))
+ goto out_unlock;
+ if (vma->vm_file && vma->vm_file->f_mapping &&
+ !is_vm_hugetlb_page(vma))
vm_lock_mapping(mm, vma->vm_file->f_mapping);
}
OpenPOWER on IntegriCloud