summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c40
-rw-r--r--mm/cma.c66
-rw-r--r--mm/memory.c3
-rw-r--r--mm/mmap.c45
-rw-r--r--mm/mprotect.c20
-rw-r--r--mm/slab.c2
6 files changed, 106 insertions, 70 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 12a992b62576..0ae0df55000b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -40,7 +40,7 @@ LIST_HEAD(bdi_list);
/* bdi_wq serves all asynchronous writeback tasks */
struct workqueue_struct *bdi_wq;
-void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
+static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
{
if (wb1 < wb2) {
spin_lock(&wb1->list_lock);
@@ -376,13 +376,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
flush_delayed_work(&bdi->wb.dwork);
WARN_ON(!list_empty(&bdi->work_list));
-
- /*
- * This shouldn't be necessary unless @bdi for some reason has
- * unflushed dirty IO after work_list is drained. Do it anyway
- * just in case.
- */
- cancel_delayed_work_sync(&bdi->wb.dwork);
+ WARN_ON(delayed_work_pending(&bdi->wb.dwork));
}
/*
@@ -402,21 +396,15 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
void bdi_unregister(struct backing_dev_info *bdi)
{
- struct device *dev = bdi->dev;
-
- if (dev) {
+ if (bdi->dev) {
bdi_set_min_ratio(bdi, 0);
trace_writeback_bdi_unregister(bdi);
bdi_prune_sb(bdi);
bdi_wb_shutdown(bdi);
bdi_debug_unregister(bdi);
-
- spin_lock_bh(&bdi->wb_lock);
+ device_unregister(bdi->dev);
bdi->dev = NULL;
- spin_unlock_bh(&bdi->wb_lock);
-
- device_unregister(dev);
}
}
EXPORT_SYMBOL(bdi_unregister);
@@ -487,8 +475,17 @@ void bdi_destroy(struct backing_dev_info *bdi)
int i;
/*
- * Splice our entries to the default_backing_dev_info, if this
- * bdi disappears
+ * Splice our entries to the default_backing_dev_info. This
+ * condition shouldn't happen. @wb must be empty at this point and
+ * dirty inodes on it might cause other issues. This workaround is
+ * added by ce5f8e779519 ("writeback: splice dirty inode entries to
+ * default bdi on bdi_destroy()") without root-causing the issue.
+ *
+ * http://lkml.kernel.org/g/1253038617-30204-11-git-send-email-jens.axboe@oracle.com
+ * http://thread.gmane.org/gmane.linux.file-systems/35341/focus=35350
+ *
+ * We should probably add WARN_ON() to find out whether it still
+ * happens and track it down if so.
*/
if (bdi_has_dirty_io(bdi)) {
struct bdi_writeback *dst = &default_backing_dev_info.wb;
@@ -503,12 +500,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
bdi_unregister(bdi);
- /*
- * If bdi_unregister() had already been called earlier, the dwork
- * could still be pending because bdi_prune_sb() can race with the
- * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
- */
- cancel_delayed_work_sync(&bdi->wb.dwork);
+ WARN_ON(delayed_work_pending(&bdi->wb.dwork));
for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
percpu_counter_destroy(&bdi->bdi_stat[i]);
diff --git a/mm/cma.c b/mm/cma.c
index 474c644a0dc6..963bc4add9af 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -58,7 +58,9 @@ unsigned long cma_get_size(struct cma *cma)
static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
{
- return (1UL << (align_order >> cma->order_per_bit)) - 1;
+ if (align_order <= cma->order_per_bit)
+ return 0;
+ return (1UL << (align_order - cma->order_per_bit)) - 1;
}
static unsigned long cma_bitmap_maxno(struct cma *cma)
@@ -141,6 +143,54 @@ static int __init cma_init_reserved_areas(void)
core_initcall(cma_init_reserved_areas);
/**
+ * cma_init_reserved_mem() - create custom contiguous area from reserved memory
+ * @base: Base address of the reserved area
+ * @size: Size of the reserved area (in bytes),
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function creates custom contiguous area from already reserved memory.
+ */
+int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
+ int order_per_bit, struct cma **res_cma)
+{
+ struct cma *cma;
+ phys_addr_t alignment;
+
+ /* Sanity checks */
+ if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+ pr_err("Not enough slots for CMA reserved regions!\n");
+ return -ENOSPC;
+ }
+
+ if (!size || !memblock_is_region_reserved(base, size))
+ return -EINVAL;
+
+ /* ensure minimal alignment requied by mm core */
+ alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+
+ /* alignment should be aligned with order_per_bit */
+ if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
+ return -EINVAL;
+
+ if (ALIGN(base, alignment) != base || ALIGN(size, alignment) != size)
+ return -EINVAL;
+
+ /*
+ * Each reserved area must be initialised later, when more kernel
+ * subsystems (like slab allocator) are available.
+ */
+ cma = &cma_areas[cma_area_count];
+ cma->base_pfn = PFN_DOWN(base);
+ cma->count = size >> PAGE_SHIFT;
+ cma->order_per_bit = order_per_bit;
+ *res_cma = cma;
+ cma_area_count++;
+
+ return 0;
+}
+
+/**
* cma_declare_contiguous() - reserve custom contiguous area
* @base: Base address of the reserved area optional, use 0 for any
* @size: Size of the reserved area (in bytes),
@@ -163,7 +213,6 @@ int __init cma_declare_contiguous(phys_addr_t base,
phys_addr_t alignment, unsigned int order_per_bit,
bool fixed, struct cma **res_cma)
{
- struct cma *cma;
phys_addr_t memblock_end = memblock_end_of_DRAM();
phys_addr_t highmem_start = __pa(high_memory);
int ret = 0;
@@ -235,16 +284,9 @@ int __init cma_declare_contiguous(phys_addr_t base,
}
}
- /*
- * Each reserved area must be initialised later, when more kernel
- * subsystems (like slab allocator) are available.
- */
- cma = &cma_areas[cma_area_count];
- cma->base_pfn = PFN_DOWN(base);
- cma->count = size >> PAGE_SHIFT;
- cma->order_per_bit = order_per_bit;
- *res_cma = cma;
- cma_area_count++;
+ ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma);
+ if (ret)
+ goto err;
pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
(unsigned long)base);
diff --git a/mm/memory.c b/mm/memory.c
index e229970e4223..1cc6bfbd872e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2053,7 +2053,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
old_page = vm_normal_page(vma, address, orig_pte);
if (!old_page) {
/*
- * VM_MIXEDMAP !pfn_valid() case
+ * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
+ * VM_PFNMAP VMA.
*
* We should not cow pages in a shared writeable mapping.
* Just mark the pages writable as we can't do any dirty
diff --git a/mm/mmap.c b/mm/mmap.c
index 93d28c7e5420..7f855206e7fb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -89,6 +89,25 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
}
EXPORT_SYMBOL(vm_get_page_prot);
+static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
+{
+ return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
+}
+
+/* Update vma->vm_page_prot to reflect vma->vm_flags. */
+void vma_set_page_prot(struct vm_area_struct *vma)
+{
+ unsigned long vm_flags = vma->vm_flags;
+
+ vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
+ if (vma_wants_writenotify(vma)) {
+ vm_flags &= ~VM_SHARED;
+ vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
+ vm_flags);
+ }
+}
+
+
int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */
unsigned long sysctl_overcommit_kbytes __read_mostly;
@@ -1475,11 +1494,16 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
if (vma->vm_ops && vma->vm_ops->page_mkwrite)
return 1;
- /* The open routine did something to the protections already? */
+ /* The open routine did something to the protections that pgprot_modify
+ * won't preserve? */
if (pgprot_val(vma->vm_page_prot) !=
- pgprot_val(vm_get_page_prot(vm_flags)))
+ pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
return 0;
+ /* Do we need to track softdirty? */
+ if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
+ return 1;
+
/* Specialty mapping? */
if (vm_flags & VM_PFNMAP)
return 0;
@@ -1615,21 +1639,6 @@ munmap_back:
goto free_vma;
}
- if (vma_wants_writenotify(vma)) {
- pgprot_t pprot = vma->vm_page_prot;
-
- /* Can vma->vm_page_prot have changed??
- *
- * Answer: Yes, drivers may have changed it in their
- * f_op->mmap method.
- *
- * Ensures that vmas marked as uncached stay that way.
- */
- vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
- if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- }
-
vma_link(mm, vma, prev, rb_link, rb_parent);
/* Once vma denies write, undo our temporary denial count */
if (file) {
@@ -1663,6 +1672,8 @@ out:
*/
vma->vm_flags |= VM_SOFTDIRTY;
+ vma_set_page_prot(vma);
+
return addr;
unmap_and_free_vma:
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c43d557941f8..ace93454ce8e 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -29,13 +29,6 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-#ifndef pgprot_modify
-static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
-{
- return newprot;
-}
-#endif
-
/*
* For a prot_numa update we only hold mmap_sem for read so there is a
* potential race with faulting where a pmd was temporarily none. This
@@ -93,7 +86,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
* Avoid taking write faults for pages we
* know to be dirty.
*/
- if (dirty_accountable && pte_dirty(ptent))
+ if (dirty_accountable && pte_dirty(ptent) &&
+ (pte_soft_dirty(ptent) ||
+ !(vma->vm_flags & VM_SOFTDIRTY)))
ptent = pte_mkwrite(ptent);
ptep_modify_prot_commit(mm, addr, pte, ptent);
updated = true;
@@ -320,13 +315,8 @@ success:
* held in write mode.
*/
vma->vm_flags = newflags;
- vma->vm_page_prot = pgprot_modify(vma->vm_page_prot,
- vm_get_page_prot(newflags));
-
- if (vma_wants_writenotify(vma)) {
- vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
- dirty_accountable = 1;
- }
+ dirty_accountable = vma_wants_writenotify(vma);
+ vma_set_page_prot(vma);
change_protection(vma, start, end, vma->vm_page_prot,
dirty_accountable, 0);
diff --git a/mm/slab.c b/mm/slab.c
index 154aac8411c5..eb2b2ea30130 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1992,7 +1992,7 @@ static struct array_cache __percpu *alloc_kmem_cache_cpus(
struct array_cache __percpu *cpu_cache;
size = sizeof(void *) * entries + sizeof(struct array_cache);
- cpu_cache = __alloc_percpu(size, 0);
+ cpu_cache = __alloc_percpu(size, sizeof(void *));
if (!cpu_cache)
return NULL;
OpenPOWER on IntegriCloud