diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 13 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 11 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page-writeback.c | 32 | ||||
-rw-r--r-- | mm/percpu.c | 6 | ||||
-rw-r--r-- | mm/vmalloc.c | 2 |
8 files changed, 52 insertions, 19 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index c0018f2d50e0..5f0a3c91fdac 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1828,7 +1828,7 @@ repeat: page = __page_cache_alloc(gfp | __GFP_COLD); if (!page) return ERR_PTR(-ENOMEM); - err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); + err = add_to_page_cache_lru(page, mapping, index, gfp); if (unlikely(err)) { page_cache_release(page); if (err == -EEXIST) @@ -1925,10 +1925,7 @@ static struct page *wait_on_page_read(struct page *page) * @gfp: the page allocator flags to use if allocating * * This is the same as "read_mapping_page(mapping, index, NULL)", but with - * any new page allocations done using the specified allocation flags. Note - * that the Radix tree operations will still use GFP_KERNEL, so you can't - * expect to do this atomically or anything like that - but you can pass in - * other page requirements. + * any new page allocations done using the specified allocation flags. * * If the page does not get brought uptodate, return -EIO. */ @@ -2407,7 +2404,6 @@ static ssize_t generic_perform_write(struct file *file, iov_iter_count(i)); again: - /* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the @@ -2463,7 +2459,10 @@ again: written += copied; balance_dirty_pages_ratelimited(mapping); - + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } } while (iov_iter_count(i)); return written ? written : status; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 73f17c0293c0..2316840b337a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -901,7 +901,6 @@ retry: h->resv_huge_pages += delta; ret = 0; - spin_unlock(&hugetlb_lock); /* Free the needed pages to the hugetlb pool */ list_for_each_entry_safe(page, tmp, &surplus_list, lru) { if ((--needed) < 0) @@ -915,6 +914,7 @@ retry: VM_BUG_ON(page_count(page)); enqueue_huge_page(h, page); } + spin_unlock(&hugetlb_lock); /* Free unnecessary surplus pages to the buddy allocator */ free: diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6aff93c98aca..b63f5f7dfa07 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4907,9 +4907,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) int cpu; enable_swap_cgroup(); parent = NULL; - root_mem_cgroup = memcg; if (mem_cgroup_soft_limit_tree_init()) goto free_out; + root_mem_cgroup = memcg; for_each_possible_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); @@ -4948,7 +4948,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) return &memcg->css; free_out: __mem_cgroup_free(memcg); - root_mem_cgroup = NULL; return ERR_PTR(error); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index adc395481813..c3fdbcb17658 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -636,6 +636,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, struct vm_area_struct *prev; struct vm_area_struct *vma; int err = 0; + pgoff_t pgoff; unsigned long vmstart; unsigned long vmend; @@ -643,13 +644,21 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (!vma || vma->vm_start > start) return -EFAULT; + if (start > vma->vm_start) + prev = vma; + for (; vma && vma->vm_start < end; prev = vma, vma = next) { next = vma->vm_next; vmstart = max(start, vma->vm_start); vmend = min(end, vma->vm_end); + if (mpol_equal(vma_policy(vma), new_pol)) + continue; + + pgoff = vma->vm_pgoff + + ((vmstart - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, - vma->anon_vma, vma->vm_file, vma->vm_pgoff, + vma->anon_vma, vma->vm_file, pgoff, new_pol); if (prev) { vma = prev; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 76f2c5ae908e..069b64e521fc 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -176,7 +176,7 @@ static bool oom_unkillable_task(struct task_struct *p, unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, const nodemask_t *nodemask, unsigned long totalpages) { - int points; + long points; if (oom_unkillable_task(p, mem, nodemask)) return 0; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 71252486bc6f..50f08241f981 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -411,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) * * Returns @bdi's dirty limit in pages. The term "dirty" in the context of * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. - * And the "limit" in the name is not seriously taken as hard limit in - * balance_dirty_pages(). + * + * Note that balance_dirty_pages() will only seriously take it as a hard limit + * when sleeping max_pause per page is not enough to keep the dirty pages under + * control. For example, when the device is completely stalled due to some error + * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key. + * In the other normal situations, it acts more gently by throttling the tasks + * more (rather than completely block them) when the bdi dirty pages go high. * * It allocates high/low dirty limits to fast/slow devices, in order to prevent * - starving fast devices @@ -594,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, */ if (unlikely(bdi_thresh > thresh)) bdi_thresh = thresh; + /* + * It's very possible that bdi_thresh is close to 0 not because the + * device is slow, but that it has remained inactive for long time. + * Honour such devices a reasonable good (hopefully IO efficient) + * threshold, so that the occasional writes won't be blocked and active + * writes can rampup the threshold quickly. + */ bdi_thresh = max(bdi_thresh, (limit - dirty) / 8); /* * scale global setpoint to bdi's: @@ -977,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi, * * 8 serves as the safety ratio. */ - if (bdi_dirty) - t = min(t, bdi_dirty * HZ / (8 * bw + 1)); + t = min(t, bdi_dirty * HZ / (8 * bw + 1)); /* * The pause time will be settled within range (max_pause/4, max_pause). @@ -1136,6 +1147,19 @@ pause: if (task_ratelimit) break; + /* + * In the case of an unresponding NFS server and the NFS dirty + * pages exceeds dirty_thresh, give the other good bdi's a pipe + * to go through, so that tasks on them still remain responsive. + * + * In theory 1 page is enough to keep the comsumer-producer + * pipe going: the flusher cleans 1 page => the task dirties 1 + * more page. However bdi_dirty has accounting errors. So use + * the larger and more IO friendly bdi_stat_error. + */ + if (bdi_dirty <= bdi_stat_error(bdi)) + break; + if (fatal_signal_pending(current)) break; } diff --git a/mm/percpu.c b/mm/percpu.c index 3bb810a72006..716eb4acf2fc 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1023,9 +1023,11 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) if (!is_vmalloc_addr(addr)) return __pa(addr); else - return page_to_phys(vmalloc_to_page(addr)); + return page_to_phys(vmalloc_to_page(addr)) + + offset_in_page(addr); } else - return page_to_phys(pcpu_addr_to_page(addr)); + return page_to_phys(pcpu_addr_to_page(addr)) + + offset_in_page(addr); } /** diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f87577042a86..21fdf46ad5aa 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1315,7 +1315,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, void *caller) { - static struct vmap_area *va; + struct vmap_area *va; struct vm_struct *area; BUG_ON(in_interrupt()); |