diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 40 | ||||
-rw-r--r-- | mm/cma.c | 66 | ||||
-rw-r--r-- | mm/memory.c | 3 | ||||
-rw-r--r-- | mm/mmap.c | 45 | ||||
-rw-r--r-- | mm/mprotect.c | 20 | ||||
-rw-r--r-- | mm/slab.c | 2 |
6 files changed, 106 insertions, 70 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 12a992b62576..0ae0df55000b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -40,7 +40,7 @@ LIST_HEAD(bdi_list); /* bdi_wq serves all asynchronous writeback tasks */ struct workqueue_struct *bdi_wq; -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) +static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { if (wb1 < wb2) { spin_lock(&wb1->list_lock); @@ -376,13 +376,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); flush_delayed_work(&bdi->wb.dwork); WARN_ON(!list_empty(&bdi->work_list)); - - /* - * This shouldn't be necessary unless @bdi for some reason has - * unflushed dirty IO after work_list is drained. Do it anyway - * just in case. - */ - cancel_delayed_work_sync(&bdi->wb.dwork); + WARN_ON(delayed_work_pending(&bdi->wb.dwork)); } /* @@ -402,21 +396,15 @@ static void bdi_prune_sb(struct backing_dev_info *bdi) void bdi_unregister(struct backing_dev_info *bdi) { - struct device *dev = bdi->dev; - - if (dev) { + if (bdi->dev) { bdi_set_min_ratio(bdi, 0); trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); bdi_wb_shutdown(bdi); bdi_debug_unregister(bdi); - - spin_lock_bh(&bdi->wb_lock); + device_unregister(bdi->dev); bdi->dev = NULL; - spin_unlock_bh(&bdi->wb_lock); - - device_unregister(dev); } } EXPORT_SYMBOL(bdi_unregister); @@ -487,8 +475,17 @@ void bdi_destroy(struct backing_dev_info *bdi) int i; /* - * Splice our entries to the default_backing_dev_info, if this - * bdi disappears + * Splice our entries to the default_backing_dev_info. This + * condition shouldn't happen. @wb must be empty at this point and + * dirty inodes on it might cause other issues. This workaround is + * added by ce5f8e779519 ("writeback: splice dirty inode entries to + * default bdi on bdi_destroy()") without root-causing the issue. + * + * http://lkml.kernel.org/g/1253038617-30204-11-git-send-email-jens.axboe@oracle.com + * http://thread.gmane.org/gmane.linux.file-systems/35341/focus=35350 + * + * We should probably add WARN_ON() to find out whether it still + * happens and track it down if so. */ if (bdi_has_dirty_io(bdi)) { struct bdi_writeback *dst = &default_backing_dev_info.wb; @@ -503,12 +500,7 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); - /* - * If bdi_unregister() had already been called earlier, the dwork - * could still be pending because bdi_prune_sb() can race with the - * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty(). - */ - cancel_delayed_work_sync(&bdi->wb.dwork); + WARN_ON(delayed_work_pending(&bdi->wb.dwork)); for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); @@ -58,7 +58,9 @@ unsigned long cma_get_size(struct cma *cma) static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) { - return (1UL << (align_order >> cma->order_per_bit)) - 1; + if (align_order <= cma->order_per_bit) + return 0; + return (1UL << (align_order - cma->order_per_bit)) - 1; } static unsigned long cma_bitmap_maxno(struct cma *cma) @@ -141,6 +143,54 @@ static int __init cma_init_reserved_areas(void) core_initcall(cma_init_reserved_areas); /** + * cma_init_reserved_mem() - create custom contiguous area from reserved memory + * @base: Base address of the reserved area + * @size: Size of the reserved area (in bytes), + * @order_per_bit: Order of pages represented by one bit on bitmap. + * @res_cma: Pointer to store the created cma region. + * + * This function creates custom contiguous area from already reserved memory. + */ +int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, + int order_per_bit, struct cma **res_cma) +{ + struct cma *cma; + phys_addr_t alignment; + + /* Sanity checks */ + if (cma_area_count == ARRAY_SIZE(cma_areas)) { + pr_err("Not enough slots for CMA reserved regions!\n"); + return -ENOSPC; + } + + if (!size || !memblock_is_region_reserved(base, size)) + return -EINVAL; + + /* ensure minimal alignment requied by mm core */ + alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); + + /* alignment should be aligned with order_per_bit */ + if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit)) + return -EINVAL; + + if (ALIGN(base, alignment) != base || ALIGN(size, alignment) != size) + return -EINVAL; + + /* + * Each reserved area must be initialised later, when more kernel + * subsystems (like slab allocator) are available. + */ + cma = &cma_areas[cma_area_count]; + cma->base_pfn = PFN_DOWN(base); + cma->count = size >> PAGE_SHIFT; + cma->order_per_bit = order_per_bit; + *res_cma = cma; + cma_area_count++; + + return 0; +} + +/** * cma_declare_contiguous() - reserve custom contiguous area * @base: Base address of the reserved area optional, use 0 for any * @size: Size of the reserved area (in bytes), @@ -163,7 +213,6 @@ int __init cma_declare_contiguous(phys_addr_t base, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, struct cma **res_cma) { - struct cma *cma; phys_addr_t memblock_end = memblock_end_of_DRAM(); phys_addr_t highmem_start = __pa(high_memory); int ret = 0; @@ -235,16 +284,9 @@ int __init cma_declare_contiguous(phys_addr_t base, } } - /* - * Each reserved area must be initialised later, when more kernel - * subsystems (like slab allocator) are available. - */ - cma = &cma_areas[cma_area_count]; - cma->base_pfn = PFN_DOWN(base); - cma->count = size >> PAGE_SHIFT; - cma->order_per_bit = order_per_bit; - *res_cma = cma; - cma_area_count++; + ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma); + if (ret) + goto err; pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, (unsigned long)base); diff --git a/mm/memory.c b/mm/memory.c index e229970e4223..1cc6bfbd872e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2053,7 +2053,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) { /* - * VM_MIXEDMAP !pfn_valid() case + * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a + * VM_PFNMAP VMA. * * We should not cow pages in a shared writeable mapping. * Just mark the pages writable as we can't do any dirty diff --git a/mm/mmap.c b/mm/mmap.c index 93d28c7e5420..7f855206e7fb 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -89,6 +89,25 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags) } EXPORT_SYMBOL(vm_get_page_prot); +static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) +{ + return pgprot_modify(oldprot, vm_get_page_prot(vm_flags)); +} + +/* Update vma->vm_page_prot to reflect vma->vm_flags. */ +void vma_set_page_prot(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + + vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags); + if (vma_wants_writenotify(vma)) { + vm_flags &= ~VM_SHARED; + vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, + vm_flags); + } +} + + int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ unsigned long sysctl_overcommit_kbytes __read_mostly; @@ -1475,11 +1494,16 @@ int vma_wants_writenotify(struct vm_area_struct *vma) if (vma->vm_ops && vma->vm_ops->page_mkwrite) return 1; - /* The open routine did something to the protections already? */ + /* The open routine did something to the protections that pgprot_modify + * won't preserve? */ if (pgprot_val(vma->vm_page_prot) != - pgprot_val(vm_get_page_prot(vm_flags))) + pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags))) return 0; + /* Do we need to track softdirty? */ + if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY)) + return 1; + /* Specialty mapping? */ if (vm_flags & VM_PFNMAP) return 0; @@ -1615,21 +1639,6 @@ munmap_back: goto free_vma; } - if (vma_wants_writenotify(vma)) { - pgprot_t pprot = vma->vm_page_prot; - - /* Can vma->vm_page_prot have changed?? - * - * Answer: Yes, drivers may have changed it in their - * f_op->mmap method. - * - * Ensures that vmas marked as uncached stay that way. - */ - vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); - if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot))) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - } - vma_link(mm, vma, prev, rb_link, rb_parent); /* Once vma denies write, undo our temporary denial count */ if (file) { @@ -1663,6 +1672,8 @@ out: */ vma->vm_flags |= VM_SOFTDIRTY; + vma_set_page_prot(vma); + return addr; unmap_and_free_vma: diff --git a/mm/mprotect.c b/mm/mprotect.c index c43d557941f8..ace93454ce8e 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -29,13 +29,6 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#ifndef pgprot_modify -static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) -{ - return newprot; -} -#endif - /* * For a prot_numa update we only hold mmap_sem for read so there is a * potential race with faulting where a pmd was temporarily none. This @@ -93,7 +86,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, * Avoid taking write faults for pages we * know to be dirty. */ - if (dirty_accountable && pte_dirty(ptent)) + if (dirty_accountable && pte_dirty(ptent) && + (pte_soft_dirty(ptent) || + !(vma->vm_flags & VM_SOFTDIRTY))) ptent = pte_mkwrite(ptent); ptep_modify_prot_commit(mm, addr, pte, ptent); updated = true; @@ -320,13 +315,8 @@ success: * held in write mode. */ vma->vm_flags = newflags; - vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, - vm_get_page_prot(newflags)); - - if (vma_wants_writenotify(vma)) { - vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); - dirty_accountable = 1; - } + dirty_accountable = vma_wants_writenotify(vma); + vma_set_page_prot(vma); change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0); diff --git a/mm/slab.c b/mm/slab.c index 154aac8411c5..eb2b2ea30130 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1992,7 +1992,7 @@ static struct array_cache __percpu *alloc_kmem_cache_cpus( struct array_cache __percpu *cpu_cache; size = sizeof(void *) * entries + sizeof(struct array_cache); - cpu_cache = __alloc_percpu(size, 0); + cpu_cache = __alloc_percpu(size, sizeof(void *)); if (!cpu_cache) return NULL; |