diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 102 |
1 files changed, 73 insertions, 29 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a13ded1938f0..a712fb9e04ce 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg) } /* - * Spill all the per-cpu pages from all CPUs back into the buddy allocator + * Spill all the per-cpu pages from all CPUs back into the buddy allocator. + * + * Note that this code is protected against sending an IPI to an offline + * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: + * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but + * nothing keeps CPUs from showing up after we populated the cpumask and + * before the call to on_each_cpu_mask(). */ void drain_all_pages(void) { - on_each_cpu(drain_local_pages, NULL, 1); + int cpu; + struct per_cpu_pageset *pcp; + struct zone *zone; + + /* + * Allocate in the BSS so we wont require allocation in + * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y + */ + static cpumask_t cpus_with_pcps; + + /* + * We don't care about racing with CPU hotplug event + * as offline notification will cause the notified + * cpu to drain that CPU pcps and on_each_cpu_mask + * disables preemption as part of its processing + */ + for_each_online_cpu(cpu) { + bool has_pcps = false; + for_each_populated_zone(zone) { + pcp = per_cpu_ptr(zone->pageset, cpu); + if (pcp->pcp.count) { + has_pcps = true; + break; + } + } + if (has_pcps) + cpumask_set_cpu(cpu, &cpus_with_pcps); + else + cpumask_clear_cpu(cpu, &cpus_with_pcps); + } + on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1); } #ifdef CONFIG_HIBERNATION @@ -1968,7 +2004,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, goto out; } /* Exhausted what can be done so it's blamo time */ - out_of_memory(zonelist, gfp_mask, order, nodemask); + out_of_memory(zonelist, gfp_mask, order, nodemask, false); out: clear_zonelist_oom(zonelist, gfp_mask); @@ -1990,7 +2026,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, if (!order) return NULL; - if (compaction_deferred(preferred_zone)) { + if (compaction_deferred(preferred_zone, order)) { *deferred_compaction = true; return NULL; } @@ -2012,6 +2048,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, if (page) { preferred_zone->compact_considered = 0; preferred_zone->compact_defer_shift = 0; + if (order >= preferred_zone->compact_order_failed) + preferred_zone->compact_order_failed = order + 1; count_vm_event(COMPACTSUCCESS); return page; } @@ -2028,7 +2066,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, * defer if the failure was a sync compaction failure. */ if (sync_migration) - defer_compaction(preferred_zone); + defer_compaction(preferred_zone, order); cond_resched(); } @@ -2306,6 +2344,10 @@ rebalance: if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { if (oom_killer_disabled) goto nopage; + /* Coredumps can quickly deplete all memory reserves */ + if ((current->flags & PF_DUMPCORE) && + !(gfp_mask & __GFP_NOFAIL)) + goto nopage; page = __alloc_pages_may_oom(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, @@ -2378,8 +2420,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, { enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zone *preferred_zone; - struct page *page; + struct page *page = NULL; int migratetype = allocflags_to_migratetype(gfp_mask); + unsigned int cpuset_mems_cookie; gfp_mask &= gfp_allowed_mask; @@ -2398,15 +2441,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (unlikely(!zonelist->_zonerefs->zone)) return NULL; - get_mems_allowed(); +retry_cpuset: + cpuset_mems_cookie = get_mems_allowed(); + /* The preferred zone is used for statistics later */ first_zones_zonelist(zonelist, high_zoneidx, nodemask ? : &cpuset_current_mems_allowed, &preferred_zone); - if (!preferred_zone) { - put_mems_allowed(); - return NULL; - } + if (!preferred_zone) + goto out; /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, @@ -2416,9 +2459,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); - put_mems_allowed(); trace_mm_page_alloc(page, order, gfp_mask, migratetype); + +out: + /* + * When updating a task's mems_allowed, it is possible to race with + * parallel threads in such a way that an allocation can fail while + * the mask is being updated. If a page allocation is about to fail, + * check if the cpuset changed during allocation and if so, retry. + */ + if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + goto retry_cpuset; + return page; } EXPORT_SYMBOL(__alloc_pages_nodemask); @@ -2632,13 +2685,15 @@ void si_meminfo_node(struct sysinfo *val, int nid) bool skip_free_areas_node(unsigned int flags, int nid) { bool ret = false; + unsigned int cpuset_mems_cookie; if (!(flags & SHOW_MEM_FILTER_NODES)) goto out; - get_mems_allowed(); - ret = !node_isset(nid, cpuset_current_mems_allowed); - put_mems_allowed(); + do { + cpuset_mems_cookie = get_mems_allowed(); + ret = !node_isset(nid, cpuset_current_mems_allowed); + } while (!put_mems_allowed(cpuset_mems_cookie)); out: return ret; } @@ -3925,18 +3980,6 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) } } -int __init add_from_early_node_map(struct range *range, int az, - int nr_range, int nid) -{ - unsigned long start_pfn, end_pfn; - int i; - - /* need to go over early_node_map to find out good range for node */ - for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) - nr_range = add_range(range, az, nr_range, start_pfn, end_pfn); - return nr_range; -} - /** * sparse_memory_present_with_active_regions - Call memory_present for each active range * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. @@ -4521,7 +4564,7 @@ static unsigned long __init early_calculate_totalpages(void) * memory. When they don't, some nodes will have more kernelcore than * others */ -static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) +static void __init find_zone_movable_pfns_for_nodes(void) { int i, nid; unsigned long usable_startpfn; @@ -4713,7 +4756,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Find the PFNs that ZONE_MOVABLE begins at in each node */ memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); - find_zone_movable_pfns_for_nodes(zone_movable_pfn); + find_zone_movable_pfns_for_nodes(); /* Print out the zone ranges */ printk("Zone PFN ranges:\n"); @@ -4823,6 +4866,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, int cpu = (unsigned long)hcpu; if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + lru_add_drain_cpu(cpu); drain_pages(cpu); /* |