summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c52
-rw-r--r--mm/compaction.c156
-rw-r--r--mm/filemap.c7
-rw-r--r--mm/huge_memory.c1
-rw-r--r--mm/internal.h1
-rw-r--r--mm/memblock.c2
-rw-r--r--mm/memory_hotplug.c16
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mmap.c7
-rw-r--r--mm/page-writeback.c1
-rw-r--r--mm/page_alloc.c40
-rw-r--r--mm/slab.c265
-rw-r--r--mm/slab.h19
-rw-r--r--mm/slab_common.c80
-rw-r--r--mm/slob.c60
-rw-r--r--mm/slub.c164
-rw-r--r--mm/vmscan.c1
17 files changed, 447 insertions, 427 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6b4718e2ee34..b41823cc05e6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -39,12 +39,6 @@ DEFINE_SPINLOCK(bdi_lock);
LIST_HEAD(bdi_list);
LIST_HEAD(bdi_pending_list);
-static struct task_struct *sync_supers_tsk;
-static struct timer_list sync_supers_timer;
-
-static int bdi_sync_supers(void *);
-static void sync_supers_timer_fn(unsigned long);
-
void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
{
if (wb1 < wb2) {
@@ -250,12 +244,6 @@ static int __init default_bdi_init(void)
{
int err;
- sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
- BUG_ON(IS_ERR(sync_supers_tsk));
-
- setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
- bdi_arm_supers_timer();
-
err = bdi_init(&default_backing_dev_info);
if (!err)
bdi_register(&default_backing_dev_info, NULL, "default");
@@ -270,46 +258,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
return wb_has_dirty_io(&bdi->wb);
}
-/*
- * kupdated() used to do this. We cannot do it from the bdi_forker_thread()
- * or we risk deadlocking on ->s_umount. The longer term solution would be
- * to implement sync_supers_bdi() or similar and simply do it from the
- * bdi writeback thread individually.
- */
-static int bdi_sync_supers(void *unused)
-{
- set_user_nice(current, 0);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
-
- /*
- * Do this periodically, like kupdated() did before.
- */
- sync_supers();
- }
-
- return 0;
-}
-
-void bdi_arm_supers_timer(void)
-{
- unsigned long next;
-
- if (!dirty_writeback_interval)
- return;
-
- next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
- mod_timer(&sync_supers_timer, round_jiffies_up(next));
-}
-
-static void sync_supers_timer_fn(unsigned long unused)
-{
- wake_up_process(sync_supers_tsk);
- bdi_arm_supers_timer();
-}
-
static void wakeup_timer_fn(unsigned long data)
{
struct backing_dev_info *bdi = (struct backing_dev_info *)data;
diff --git a/mm/compaction.c b/mm/compaction.c
index e78cb9688421..7fcd3a52e68d 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -51,6 +51,47 @@ static inline bool migrate_async_suitable(int migratetype)
}
/*
+ * Compaction requires the taking of some coarse locks that are potentially
+ * very heavily contended. Check if the process needs to be scheduled or
+ * if the lock is contended. For async compaction, back out in the event
+ * if contention is severe. For sync compaction, schedule.
+ *
+ * Returns true if the lock is held.
+ * Returns false if the lock is released and compaction should abort
+ */
+static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
+ bool locked, struct compact_control *cc)
+{
+ if (need_resched() || spin_is_contended(lock)) {
+ if (locked) {
+ spin_unlock_irqrestore(lock, *flags);
+ locked = false;
+ }
+
+ /* async aborts if taking too long or contended */
+ if (!cc->sync) {
+ if (cc->contended)
+ *cc->contended = true;
+ return false;
+ }
+
+ cond_resched();
+ if (fatal_signal_pending(current))
+ return false;
+ }
+
+ if (!locked)
+ spin_lock_irqsave(lock, *flags);
+ return true;
+}
+
+static inline bool compact_trylock_irqsave(spinlock_t *lock,
+ unsigned long *flags, struct compact_control *cc)
+{
+ return compact_checklock_irqsave(lock, flags, false, cc);
+}
+
+/*
* Isolate free pages onto a private freelist. Caller must hold zone->lock.
* If @strict is true, will abort returning 0 on any invalid PFNs or non-free
* pages inside of the pageblock (even though it may still end up isolating
@@ -173,7 +214,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
}
/* Update the number of anon and file isolated pages in the zone */
-static void acct_isolated(struct zone *zone, struct compact_control *cc)
+static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc)
{
struct page *page;
unsigned int count[2] = { 0, };
@@ -181,8 +222,14 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
list_for_each_entry(page, &cc->migratepages, lru)
count[!!page_is_file_cache(page)]++;
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+ /* If locked we can use the interrupt unsafe versions */
+ if (locked) {
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+ } else {
+ mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
+ mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+ }
}
/* Similar to reclaim, but different enough that they don't share logic */
@@ -228,6 +275,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
struct list_head *migratelist = &cc->migratepages;
isolate_mode_t mode = 0;
struct lruvec *lruvec;
+ unsigned long flags;
+ bool locked;
/*
* Ensure that there are not too many pages isolated from the LRU
@@ -247,25 +296,22 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
/* Time to isolate some pages for migration */
cond_resched();
- spin_lock_irq(&zone->lru_lock);
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ locked = true;
for (; low_pfn < end_pfn; low_pfn++) {
struct page *page;
- bool locked = true;
/* give a chance to irqs before checking need_resched() */
if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
- spin_unlock_irq(&zone->lru_lock);
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
locked = false;
}
- if (need_resched() || spin_is_contended(&zone->lru_lock)) {
- if (locked)
- spin_unlock_irq(&zone->lru_lock);
- cond_resched();
- spin_lock_irq(&zone->lru_lock);
- if (fatal_signal_pending(current))
- break;
- } else if (!locked)
- spin_lock_irq(&zone->lru_lock);
+
+ /* Check if it is ok to still hold the lock */
+ locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
+ locked, cc);
+ if (!locked)
+ break;
/*
* migrate_pfn does not necessarily start aligned to a
@@ -349,9 +395,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
}
}
- acct_isolated(zone, cc);
+ acct_isolated(zone, locked, cc);
- spin_unlock_irq(&zone->lru_lock);
+ if (locked)
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
@@ -384,6 +431,20 @@ static bool suitable_migration_target(struct page *page)
}
/*
+ * Returns the start pfn of the last page block in a zone. This is the starting
+ * point for full compaction of a zone. Compaction searches for free pages from
+ * the end of each zone, while isolate_freepages_block scans forward inside each
+ * page block.
+ */
+static unsigned long start_free_pfn(struct zone *zone)
+{
+ unsigned long free_pfn;
+ free_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ free_pfn &= ~(pageblock_nr_pages-1);
+ return free_pfn;
+}
+
+/*
* Based on information in the current compact_control, find blocks
* suitable for isolating free pages from and then isolate them.
*/
@@ -422,17 +483,6 @@ static void isolate_freepages(struct zone *zone,
pfn -= pageblock_nr_pages) {
unsigned long isolated;
- /*
- * Skip ahead if another thread is compacting in the area
- * simultaneously. If we wrapped around, we can only skip
- * ahead if zone->compact_cached_free_pfn also wrapped to
- * above our starting point.
- */
- if (cc->order > 0 && (!cc->wrapped ||
- zone->compact_cached_free_pfn >
- cc->start_free_pfn))
- pfn = min(pfn, zone->compact_cached_free_pfn);
-
if (!pfn_valid(pfn))
continue;
@@ -458,7 +508,16 @@ static void isolate_freepages(struct zone *zone,
* are disabled
*/
isolated = 0;
- spin_lock_irqsave(&zone->lock, flags);
+
+ /*
+ * The zone lock must be held to isolate freepages. This
+ * unfortunately this is a very coarse lock and can be
+ * heavily contended if there are parallel allocations
+ * or parallel compactions. For async compaction do not
+ * spin on the lock
+ */
+ if (!compact_trylock_irqsave(&zone->lock, &flags, cc))
+ break;
if (suitable_migration_target(page)) {
end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
isolated = isolate_freepages_block(pfn, end_pfn,
@@ -474,7 +533,15 @@ static void isolate_freepages(struct zone *zone,
*/
if (isolated) {
high_pfn = max(high_pfn, pfn);
- if (cc->order > 0)
+
+ /*
+ * If the free scanner has wrapped, update
+ * compact_cached_free_pfn to point to the highest
+ * pageblock with free pages. This reduces excessive
+ * scanning of full pageblocks near the end of the
+ * zone
+ */
+ if (cc->order > 0 && cc->wrapped)
zone->compact_cached_free_pfn = high_pfn;
}
}
@@ -484,6 +551,11 @@ static void isolate_freepages(struct zone *zone,
cc->free_pfn = high_pfn;
cc->nr_freepages = nr_freepages;
+
+ /* If compact_cached_free_pfn is reset then set it now */
+ if (cc->order > 0 && !cc->wrapped &&
+ zone->compact_cached_free_pfn == start_free_pfn(zone))
+ zone->compact_cached_free_pfn = high_pfn;
}
/*
@@ -570,20 +642,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
return ISOLATE_SUCCESS;
}
-/*
- * Returns the start pfn of the last page block in a zone. This is the starting
- * point for full compaction of a zone. Compaction searches for free pages from
- * the end of each zone, while isolate_freepages_block scans forward inside each
- * page block.
- */
-static unsigned long start_free_pfn(struct zone *zone)
-{
- unsigned long free_pfn;
- free_pfn = zone->zone_start_pfn + zone->spanned_pages;
- free_pfn &= ~(pageblock_nr_pages-1);
- return free_pfn;
-}
-
static int compact_finished(struct zone *zone,
struct compact_control *cc)
{
@@ -771,7 +829,7 @@ out:
static unsigned long compact_zone_order(struct zone *zone,
int order, gfp_t gfp_mask,
- bool sync)
+ bool sync, bool *contended)
{
struct compact_control cc = {
.nr_freepages = 0,
@@ -780,6 +838,7 @@ static unsigned long compact_zone_order(struct zone *zone,
.migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone,
.sync = sync,
+ .contended = contended,
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
@@ -801,7 +860,7 @@ int sysctl_extfrag_threshold = 500;
*/
unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
- bool sync)
+ bool sync, bool *contended)
{
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
int may_enter_fs = gfp_mask & __GFP_FS;
@@ -825,7 +884,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
nodemask) {
int status;
- status = compact_zone_order(zone, order, gfp_mask, sync);
+ status = compact_zone_order(zone, order, gfp_mask, sync,
+ contended);
rc = max(status, rc);
/* If a normal allocation would succeed, stop compacting */
@@ -861,7 +921,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
if (cc->order > 0) {
int ok = zone_watermark_ok(zone, cc->order,
low_wmark_pages(zone), 0, 0);
- if (ok && cc->order > zone->compact_order_failed)
+ if (ok && cc->order >= zone->compact_order_failed)
zone->compact_order_failed = cc->order + 1;
/* Currently async compaction is never deferred. */
else if (!ok && cc->sync)
diff --git a/mm/filemap.c b/mm/filemap.c
index fa5ca304148e..384344575c37 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1412,12 +1412,8 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
retval = filemap_write_and_wait_range(mapping, pos,
pos + iov_length(iov, nr_segs) - 1);
if (!retval) {
- struct blk_plug plug;
-
- blk_start_plug(&plug);
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
- blk_finish_plug(&plug);
}
if (retval > 0) {
*ppos = pos + retval;
@@ -2527,14 +2523,12 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
sb_start_write(inode->i_sb);
mutex_lock(&inode->i_mutex);
- blk_start_plug(&plug);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
@@ -2545,7 +2539,6 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}
- blk_finish_plug(&plug);
sb_end_write(inode->i_sb);
return ret;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 57c4b9309015..141dbb695097 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1811,7 +1811,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
src_page = pte_page(pteval);
copy_user_highpage(page, src_page, address, vma);
VM_BUG_ON(page_mapcount(src_page) != 1);
- VM_BUG_ON(page_count(src_page) != 2);
release_pte_page(src_page);
/*
* ptl mostly unnecessary, but preempt has to
diff --git a/mm/internal.h b/mm/internal.h
index 3314f79d775a..b8c91b342e24 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -130,6 +130,7 @@ struct compact_control {
int order; /* order a direct compactor needs */
int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone;
+ bool *contended; /* True if a lock was contended */
};
unsigned long
diff --git a/mm/memblock.c b/mm/memblock.c
index 4d9393c7edc9..82aa349d2f7a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -246,7 +246,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
min(new_area_start, memblock.current_limit),
new_alloc_size, PAGE_SIZE);
- new_array = addr ? __va(addr) : 0;
+ new_array = addr ? __va(addr) : NULL;
}
if (!addr) {
pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3ad25f9d1fc1..6a5b90d0cfd7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -126,9 +126,6 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
struct mem_section *ms;
struct page *page, *memmap;
- if (!pfn_valid(start_pfn))
- return;
-
section_nr = pfn_to_section_nr(start_pfn);
ms = __nr_to_section(section_nr);
@@ -187,9 +184,16 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
end_pfn = pfn + pgdat->node_spanned_pages;
/* register_section info */
- for (; pfn < end_pfn; pfn += PAGES_PER_SECTION)
- register_page_bootmem_info_section(pfn);
-
+ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ /*
+ * Some platforms can assign the same pfn to multiple nodes - on
+ * node0 as well as nodeN. To avoid registering a pfn against
+ * multiple nodes we check that this pfn does not already
+ * reside in some other node.
+ */
+ if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node))
+ register_page_bootmem_info_section(pfn);
+ }
}
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bd92431d4c49..4ada3be6e252 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2562,7 +2562,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
break;
default:
- BUG();
+ return -EINVAL;
}
l = strlen(policy_modes[mode]);
diff --git a/mm/mmap.c b/mm/mmap.c
index e3e86914f11a..ae18a48e7e4e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1356,9 +1356,8 @@ out:
} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
make_pages_present(addr, addr + len);
- if (file && uprobe_mmap(vma))
- /* matching probes but cannot insert */
- goto unmap_and_free_vma;
+ if (file)
+ uprobe_mmap(vma);
return addr;
@@ -2309,7 +2308,7 @@ void exit_mmap(struct mm_struct *mm)
}
vm_unacct_memory(nr_accounted);
- BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
+ WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
}
/* Insert vm structure into process list sorted by address
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e5363f34e025..5ad5ce23c1e0 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1532,7 +1532,6 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, buffer, length, ppos);
- bdi_arm_supers_timer();
return 0;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 009ac285fea7..c13ea7538891 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -584,7 +584,7 @@ static inline void __free_one_page(struct page *page,
combined_idx = buddy_idx & page_idx;
higher_page = page + (combined_idx - page_idx);
buddy_idx = __find_buddy_index(combined_idx, order + 1);
- higher_buddy = page + (buddy_idx - combined_idx);
+ higher_buddy = higher_page + (buddy_idx - combined_idx);
if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
list_add_tail(&page->lru,
&zone->free_area[order].free_list[migratetype]);
@@ -1928,6 +1928,17 @@ this_zone_full:
zlc_active = 0;
goto zonelist_scan;
}
+
+ if (page)
+ /*
+ * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
+ * necessary to allocate the page. The expectation is
+ * that the caller is taking steps that will free more
+ * memory. The caller should avoid the page being used
+ * for !PFMEMALLOC purposes.
+ */
+ page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
+
return page;
}
@@ -2091,7 +2102,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int migratetype, bool sync_migration,
- bool *deferred_compaction,
+ bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress)
{
struct page *page;
@@ -2106,7 +2117,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
current->flags |= PF_MEMALLOC;
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
- nodemask, sync_migration);
+ nodemask, sync_migration,
+ contended_compaction);
current->flags &= ~PF_MEMALLOC;
if (*did_some_progress != COMPACT_SKIPPED) {
@@ -2152,7 +2164,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int migratetype, bool sync_migration,
- bool *deferred_compaction,
+ bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress)
{
return NULL;
@@ -2325,6 +2337,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned long did_some_progress;
bool sync_migration = false;
bool deferred_compaction = false;
+ bool contended_compaction = false;
/*
* In the slowpath, we sanity check order to avoid ever trying to
@@ -2389,14 +2402,6 @@ rebalance:
zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype);
if (page) {
- /*
- * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
- * necessary to allocate the page. The expectation is
- * that the caller is taking steps that will free more
- * memory. The caller should avoid the page being used
- * for !PFMEMALLOC purposes.
- */
- page->pfmemalloc = true;
goto got_pg;
}
}
@@ -2422,6 +2427,7 @@ rebalance:
nodemask,
alloc_flags, preferred_zone,
migratetype, sync_migration,
+ &contended_compaction,
&deferred_compaction,
&did_some_progress);
if (page)
@@ -2431,10 +2437,11 @@ rebalance:
/*
* If compaction is deferred for high-order allocations, it is because
* sync compaction recently failed. In this is the case and the caller
- * has requested the system not be heavily disrupted, fail the
- * allocation now instead of entering direct reclaim
+ * requested a movable allocation that does not heavily disrupt the
+ * system then fail the allocation instead of entering direct reclaim.
*/
- if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
+ if ((deferred_compaction || contended_compaction) &&
+ (gfp_mask & __GFP_NO_KSWAPD))
goto nopage;
/* Try direct reclaim and then allocating */
@@ -2505,6 +2512,7 @@ rebalance:
nodemask,
alloc_flags, preferred_zone,
migratetype, sync_migration,
+ &contended_compaction,
&deferred_compaction,
&did_some_progress);
if (page)
@@ -2569,8 +2577,6 @@ retry_cpuset:
page = __alloc_pages_slowpath(gfp_mask, order,
zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype);
- else
- page->pfmemalloc = false;
trace_mm_page_alloc(page, order, gfp_mask, migratetype);
diff --git a/mm/slab.c b/mm/slab.c
index ca3849fe0584..87c55b0e3e0e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -570,9 +570,9 @@ static struct arraycache_init initarray_generic =
{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
/* internal cache of cache description objs */
-static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
-static struct kmem_cache cache_cache = {
- .nodelists = cache_cache_nodelists,
+static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES];
+static struct kmem_cache kmem_cache_boot = {
+ .nodelists = kmem_cache_nodelists,
.batchcount = 1,
.limit = BOOT_CPUCACHE_ENTRIES,
.shared = 1,
@@ -795,6 +795,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
}
+#if DEBUG
#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
static void __slab_error(const char *function, struct kmem_cache *cachep,
@@ -805,6 +806,7 @@ static void __slab_error(const char *function, struct kmem_cache *cachep,
dump_stack();
add_taint(TAINT_BAD_PAGE);
}
+#endif
/*
* By default on NUMA we use alien caches to stage the freeing of
@@ -969,7 +971,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
}
/* The caller cannot use PFMEMALLOC objects, find another one */
- for (i = 1; i < ac->avail; i++) {
+ for (i = 0; i < ac->avail; i++) {
/* If a !PFMEMALLOC object is found, swap them */
if (!is_obj_pfmemalloc(ac->entry[i])) {
objp = ac->entry[i];
@@ -986,7 +988,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
l3 = cachep->nodelists[numa_mem_id()];
if (!list_empty(&l3->slabs_free) && force_refill) {
struct slab *slabp = virt_to_slab(objp);
- ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem));
+ ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
clear_obj_pfmemalloc(&objp);
recheck_pfmemalloc_active(cachep, ac);
return objp;
@@ -1018,7 +1020,7 @@ static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
{
if (unlikely(pfmemalloc_active)) {
/* Some pfmemalloc slabs exist, check if this is one */
- struct page *page = virt_to_page(objp);
+ struct page *page = virt_to_head_page(objp);
if (PageSlabPfmemalloc(page))
set_obj_pfmemalloc(&objp);
}
@@ -1587,15 +1589,17 @@ void __init kmem_cache_init(void)
int order;
int node;
+ kmem_cache = &kmem_cache_boot;
+
if (num_possible_nodes() == 1)
use_alien_caches = 0;
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
if (i < MAX_NUMNODES)
- cache_cache.nodelists[i] = NULL;
+ kmem_cache->nodelists[i] = NULL;
}
- set_up_list3s(&cache_cache, CACHE_CACHE);
+ set_up_list3s(kmem_cache, CACHE_CACHE);
/*
* Fragmentation resistance on low memory - only use bigger
@@ -1607,9 +1611,9 @@ void __init kmem_cache_init(void)
/* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet:
- * 1) initialize the cache_cache cache: it contains the struct
- * kmem_cache structures of all caches, except cache_cache itself:
- * cache_cache is statically allocated.
+ * 1) initialize the kmem_cache cache: it contains the struct
+ * kmem_cache structures of all caches, except kmem_cache itself:
+ * kmem_cache is statically allocated.
* Initially an __init data area is used for the head array and the
* kmem_list3 structures, it's replaced with a kmalloc allocated
* array at the end of the bootstrap.
@@ -1618,43 +1622,43 @@ void __init kmem_cache_init(void)
* An __init data area is used for the head array.
* 3) Create the remaining kmalloc caches, with minimally sized
* head arrays.
- * 4) Replace the __init data head arrays for cache_cache and the first
+ * 4) Replace the __init data head arrays for kmem_cache and the first
* kmalloc cache with kmalloc allocated arrays.
- * 5) Replace the __init data for kmem_list3 for cache_cache and
+ * 5) Replace the __init data for kmem_list3 for kmem_cache and
* the other cache's with kmalloc allocated memory.
* 6) Resize the head arrays of the kmalloc caches to their final sizes.
*/
node = numa_mem_id();
- /* 1) create the cache_cache */
+ /* 1) create the kmem_cache */
INIT_LIST_HEAD(&slab_caches);
- list_add(&cache_cache.list, &slab_caches);
- cache_cache.colour_off = cache_line_size();
- cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
- cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
+ list_add(&kmem_cache->list, &slab_caches);
+ kmem_cache->colour_off = cache_line_size();
+ kmem_cache->array[smp_processor_id()] = &initarray_cache.cache;
+ kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
/*
* struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
*/
- cache_cache.size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+ kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
nr_node_ids * sizeof(struct kmem_list3 *);
- cache_cache.object_size = cache_cache.size;
- cache_cache.size = ALIGN(cache_cache.size,
+ kmem_cache->object_size = kmem_cache->size;
+ kmem_cache->size = ALIGN(kmem_cache->object_size,
cache_line_size());
- cache_cache.reciprocal_buffer_size =
- reciprocal_value(cache_cache.size);
+ kmem_cache->reciprocal_buffer_size =
+ reciprocal_value(kmem_cache->size);
for (order = 0; order < MAX_ORDER; order++) {
- cache_estimate(order, cache_cache.size,
- cache_line_size(), 0, &left_over, &cache_cache.num);
- if (cache_cache.num)
+ cache_estimate(order, kmem_cache->size,
+ cache_line_size(), 0, &left_over, &kmem_cache->num);
+ if (kmem_cache->num)
break;
}
- BUG_ON(!cache_cache.num);
- cache_cache.gfporder = order;
- cache_cache.colour = left_over / cache_cache.colour_off;
- cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
+ BUG_ON(!kmem_cache->num);
+ kmem_cache->gfporder = order;
+ kmem_cache->colour = left_over / kmem_cache->colour_off;
+ kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) +
sizeof(struct slab), cache_line_size());
/* 2+3) create the kmalloc caches */
@@ -1667,19 +1671,22 @@ void __init kmem_cache_init(void)
* bug.
*/
- sizes[INDEX_AC].cs_cachep = __kmem_cache_create(names[INDEX_AC].name,
- sizes[INDEX_AC].cs_size,
- ARCH_KMALLOC_MINALIGN,
- ARCH_KMALLOC_FLAGS|SLAB_PANIC,
- NULL);
+ sizes[INDEX_AC].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ sizes[INDEX_AC].cs_cachep->name = names[INDEX_AC].name;
+ sizes[INDEX_AC].cs_cachep->size = sizes[INDEX_AC].cs_size;
+ sizes[INDEX_AC].cs_cachep->object_size = sizes[INDEX_AC].cs_size;
+ sizes[INDEX_AC].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
+ __kmem_cache_create(sizes[INDEX_AC].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
+ list_add(&sizes[INDEX_AC].cs_cachep->list, &slab_caches);
if (INDEX_AC != INDEX_L3) {
- sizes[INDEX_L3].cs_cachep =
- __kmem_cache_create(names[INDEX_L3].name,
- sizes[INDEX_L3].cs_size,
- ARCH_KMALLOC_MINALIGN,
- ARCH_KMALLOC_FLAGS|SLAB_PANIC,
- NULL);
+ sizes[INDEX_L3].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ sizes[INDEX_L3].cs_cachep->name = names[INDEX_L3].name;
+ sizes[INDEX_L3].cs_cachep->size = sizes[INDEX_L3].cs_size;
+ sizes[INDEX_L3].cs_cachep->object_size = sizes[INDEX_L3].cs_size;
+ sizes[INDEX_L3].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
+ __kmem_cache_create(sizes[INDEX_L3].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
+ list_add(&sizes[INDEX_L3].cs_cachep->list, &slab_caches);
}
slab_early_init = 0;
@@ -1693,20 +1700,23 @@ void __init kmem_cache_init(void)
* allow tighter packing of the smaller caches.
*/
if (!sizes->cs_cachep) {
- sizes->cs_cachep = __kmem_cache_create(names->name,
- sizes->cs_size,
- ARCH_KMALLOC_MINALIGN,
- ARCH_KMALLOC_FLAGS|SLAB_PANIC,
- NULL);
+ sizes->cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ sizes->cs_cachep->name = names->name;
+ sizes->cs_cachep->size = sizes->cs_size;
+ sizes->cs_cachep->object_size = sizes->cs_size;
+ sizes->cs_cachep->align = ARCH_KMALLOC_MINALIGN;
+ __kmem_cache_create(sizes->cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
+ list_add(&sizes->cs_cachep->list, &slab_caches);
}
#ifdef CONFIG_ZONE_DMA
- sizes->cs_dmacachep = __kmem_cache_create(
- names->name_dma,
- sizes->cs_size,
- ARCH_KMALLOC_MINALIGN,
- ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
- SLAB_PANIC,
- NULL);
+ sizes->cs_dmacachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ sizes->cs_dmacachep->name = names->name_dma;
+ sizes->cs_dmacachep->size = sizes->cs_size;
+ sizes->cs_dmacachep->object_size = sizes->cs_size;
+ sizes->cs_dmacachep->align = ARCH_KMALLOC_MINALIGN;
+ __kmem_cache_create(sizes->cs_dmacachep,
+ ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC);
+ list_add(&sizes->cs_dmacachep->list, &slab_caches);
#endif
sizes++;
names++;
@@ -1717,15 +1727,15 @@ void __init kmem_cache_init(void)
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
- BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
- memcpy(ptr, cpu_cache_get(&cache_cache),
+ BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache);
+ memcpy(ptr, cpu_cache_get(kmem_cache),
sizeof(struct arraycache_init));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->lock);
- cache_cache.array[smp_processor_id()] = ptr;
+ kmem_cache->array[smp_processor_id()] = ptr;
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
@@ -1746,7 +1756,7 @@ void __init kmem_cache_init(void)
int nid;
for_each_online_node(nid) {
- init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
+ init_list(kmem_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
init_list(malloc_sizes[INDEX_AC].cs_cachep,
&initkmem_list3[SIZE_AC + nid], nid);
@@ -2195,27 +2205,6 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
}
}
-static void __kmem_cache_destroy(struct kmem_cache *cachep)
-{
- int i;
- struct kmem_list3 *l3;
-
- for_each_online_cpu(i)
- kfree(cachep->array[i]);
-
- /* NUMA: free the list3 structures */
- for_each_online_node(i) {
- l3 = cachep->nodelists[i];
- if (l3) {
- kfree(l3->shared);
- free_alien_cache(l3->alien);
- kfree(l3);
- }
- }
- kmem_cache_free(&cache_cache, cachep);
-}
-
-
/**
* calculate_slab_order - calculate size (page order) of slabs
* @cachep: pointer to the cache that is being created
@@ -2352,9 +2341,6 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
* Cannot be called within a int, but can be interrupted.
* The @ctor is run when new pages are allocated by the cache.
*
- * @name must be valid until the cache is destroyed. This implies that
- * the module calling this has to destroy the cache before getting unloaded.
- *
* The flags are
*
* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
@@ -2367,13 +2353,13 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
*/
-struct kmem_cache *
-__kmem_cache_create (const char *name, size_t size, size_t align,
- unsigned long flags, void (*ctor)(void *))
+int
+__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
{
size_t left_over, slab_size, ralign;
- struct kmem_cache *cachep = NULL;
gfp_t gfp;
+ int err;
+ size_t size = cachep->size;
#if DEBUG
#if FORCED_DEBUG
@@ -2445,8 +2431,8 @@ __kmem_cache_create (const char *name, size_t size, size_t align,
ralign = ARCH_SLAB_MINALIGN;
}
/* 3) caller mandated alignment */
- if (ralign < align) {
- ralign = align;
+ if (ralign < cachep->align) {
+ ralign = cachep->align;
}
/* disable debug if necessary */
if (ralign > __alignof__(unsigned long long))
@@ -2454,21 +2440,14 @@ __kmem_cache_create (const char *name, size_t size, size_t align,
/*
* 4) Store it.
*/
- align = ralign;
+ cachep->align = ralign;
if (slab_is_available())
gfp = GFP_KERNEL;
else
gfp = GFP_NOWAIT;
- /* Get cache's description obj. */
- cachep = kmem_cache_zalloc(&cache_cache, gfp);
- if (!cachep)
- return NULL;
-
cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
- cachep->object_size = size;
- cachep->align = align;
#if DEBUG
/*
@@ -2492,8 +2471,9 @@ __kmem_cache_create (const char *name, size_t size, size_t align,
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
- && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
- cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
+ && cachep->object_size > cache_line_size()
+ && ALIGN(size, cachep->align) < PAGE_SIZE) {
+ cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE;
}
#endif
@@ -2513,18 +2493,15 @@ __kmem_cache_create (const char *name, size_t size, size_t align,
*/
flags |= CFLGS_OFF_SLAB;
- size = ALIGN(size, align);
+ size = ALIGN(size, cachep->align);
- left_over = calculate_slab_order(cachep, size, align, flags);
+ left_over = calculate_slab_order(cachep, size, cachep->align, flags);
+
+ if (!cachep->num)
+ return -E2BIG;
- if (!cachep->num) {
- printk(KERN_ERR
- "kmem_cache_create: couldn't create cache %s.\n", name);
- kmem_cache_free(&cache_cache, cachep);
- return NULL;
- }
slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
- + sizeof(struct slab), align);
+ + sizeof(struct slab), cachep->align);
/*
* If the slab has been placed off-slab, and we have enough space then
@@ -2552,8 +2529,8 @@ __kmem_cache_create (const char *name, size_t size, size_t align,
cachep->colour_off = cache_line_size();
/* Offset must be a multiple of the alignment. */
- if (cachep->colour_off < align)
- cachep->colour_off = align;
+ if (cachep->colour_off < cachep->align)
+ cachep->colour_off = cachep->align;
cachep->colour = left_over / cachep->colour_off;
cachep->slab_size = slab_size;
cachep->flags = flags;
@@ -2574,12 +2551,11 @@ __kmem_cache_create (const char *name, size_t size, size_t align,
*/
BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
}
- cachep->ctor = ctor;
- cachep->name = name;
- if (setup_cpu_cache(cachep, gfp)) {
- __kmem_cache_destroy(cachep);
- return NULL;
+ err = setup_cpu_cache(cachep, gfp);
+ if (err) {
+ __kmem_cache_shutdown(cachep);
+ return err;
}
if (flags & SLAB_DEBUG_OBJECTS) {
@@ -2592,9 +2568,7 @@ __kmem_cache_create (const char *name, size_t size, size_t align,
slab_set_debugobj_lock_classes(cachep);
}
- /* cache setup completed, link it into the list */
- list_add(&cachep->list, &slab_caches);
- return cachep;
+ return 0;
}
#if DEBUG
@@ -2753,49 +2727,29 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
}
EXPORT_SYMBOL(kmem_cache_shrink);
-/**
- * kmem_cache_destroy - delete a cache
- * @cachep: the cache to destroy
- *
- * Remove a &struct kmem_cache object from the slab cache.
- *
- * It is expected this function will be called by a module when it is
- * unloaded. This will remove the cache completely, and avoid a duplicate
- * cache being allocated each time a module is loaded and unloaded, if the
- * module doesn't have persistent in-kernel storage across loads and unloads.
- *
- * The cache must be empty before calling this function.
- *
- * The caller must guarantee that no one will allocate memory from the cache
- * during the kmem_cache_destroy().
- */
-void kmem_cache_destroy(struct kmem_cache *cachep)
+int __kmem_cache_shutdown(struct kmem_cache *cachep)
{
- BUG_ON(!cachep || in_interrupt());
+ int i;
+ struct kmem_list3 *l3;
+ int rc = __cache_shrink(cachep);
- /* Find the cache in the chain of caches. */
- get_online_cpus();
- mutex_lock(&slab_mutex);
- /*
- * the chain is never empty, cache_cache is never destroyed
- */
- list_del(&cachep->list);
- if (__cache_shrink(cachep)) {
- slab_error(cachep, "Can't free all objects");
- list_add(&cachep->list, &slab_caches);
- mutex_unlock(&slab_mutex);
- put_online_cpus();
- return;
- }
+ if (rc)
+ return rc;
- if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
- rcu_barrier();
+ for_each_online_cpu(i)
+ kfree(cachep->array[i]);
- __kmem_cache_destroy(cachep);
- mutex_unlock(&slab_mutex);
- put_online_cpus();
+ /* NUMA: free the list3 structures */
+ for_each_online_node(i) {
+ l3 = cachep->nodelists[i];
+ if (l3) {
+ kfree(l3->shared);
+ free_alien_cache(l3->alien);
+ kfree(l3);
+ }
+ }
+ return 0;
}
-EXPORT_SYMBOL(kmem_cache_destroy);
/*
* Get the memory for a slab management obj.
@@ -3246,6 +3200,7 @@ force_grow:
/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
+ node = numa_mem_id();
/* no objects in sight? abort */
if (!x && (ac->avail == 0 || force_refill))
@@ -3328,7 +3283,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
{
- if (cachep == &cache_cache)
+ if (cachep == kmem_cache)
return false;
return should_failslab(cachep->object_size, flags, cachep->flags);
diff --git a/mm/slab.h b/mm/slab.h
index db7848caaa25..7deeb449a301 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -25,9 +25,26 @@ extern enum slab_state slab_state;
/* The slab cache mutex protects the management structures during changes */
extern struct mutex slab_mutex;
+
+/* The list of all slab caches on the system */
extern struct list_head slab_caches;
-struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
+/* The slab cache that manages slab cache information */
+extern struct kmem_cache *kmem_cache;
+
+/* Functions provided by the slab allocators */
+extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags);
+
+#ifdef CONFIG_SLUB
+struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
size_t align, unsigned long flags, void (*ctor)(void *));
+#else
+static inline struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
+ size_t align, unsigned long flags, void (*ctor)(void *))
+{ return NULL; }
+#endif
+
+
+int __kmem_cache_shutdown(struct kmem_cache *);
#endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 8cf8b4962d6c..9c217255ac49 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -22,6 +22,7 @@
enum slab_state slab_state;
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
+struct kmem_cache *kmem_cache;
#ifdef CONFIG_DEBUG_VM
static int kmem_cache_sanity_check(const char *name, size_t size)
@@ -98,21 +99,92 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *s = NULL;
+ int err = 0;
get_online_cpus();
mutex_lock(&slab_mutex);
- if (kmem_cache_sanity_check(name, size) == 0)
- s = __kmem_cache_create(name, size, align, flags, ctor);
+
+ if (!kmem_cache_sanity_check(name, size) == 0)
+ goto out_locked;
+
+
+ s = __kmem_cache_alias(name, size, align, flags, ctor);
+ if (s)
+ goto out_locked;
+
+ s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
+ if (s) {
+ s->object_size = s->size = size;
+ s->align = align;
+ s->ctor = ctor;
+ s->name = kstrdup(name, GFP_KERNEL);
+ if (!s->name) {
+ kmem_cache_free(kmem_cache, s);
+ err = -ENOMEM;
+ goto out_locked;
+ }
+
+ err = __kmem_cache_create(s, flags);
+ if (!err) {
+
+ s->refcount = 1;
+ list_add(&s->list, &slab_caches);
+
+ } else {
+ kfree(s->name);
+ kmem_cache_free(kmem_cache, s);
+ }
+ } else
+ err = -ENOMEM;
+
+out_locked:
mutex_unlock(&slab_mutex);
put_online_cpus();
- if (!s && (flags & SLAB_PANIC))
- panic("kmem_cache_create: Failed to create slab '%s'\n", name);
+ if (err) {
+
+ if (flags & SLAB_PANIC)
+ panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
+ name, err);
+ else {
+ printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d",
+ name, err);
+ dump_stack();
+ }
+
+ return NULL;
+ }
return s;
}
EXPORT_SYMBOL(kmem_cache_create);
+void kmem_cache_destroy(struct kmem_cache *s)
+{
+ get_online_cpus();
+ mutex_lock(&slab_mutex);
+ s->refcount--;
+ if (!s->refcount) {
+ list_del(&s->list);
+
+ if (!__kmem_cache_shutdown(s)) {
+ if (s->flags & SLAB_DESTROY_BY_RCU)
+ rcu_barrier();
+
+ kfree(s->name);
+ kmem_cache_free(kmem_cache, s);
+ } else {
+ list_add(&s->list, &slab_caches);
+ printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n",
+ s->name);
+ dump_stack();
+ }
+ }
+ mutex_unlock(&slab_mutex);
+ put_online_cpus();
+}
+EXPORT_SYMBOL(kmem_cache_destroy);
+
int slab_is_available(void)
{
return slab_state >= UP;
diff --git a/mm/slob.c b/mm/slob.c
index 8c00d22afc9e..a08e4681fd0d 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -529,44 +529,24 @@ size_t ksize(const void *block)
}
EXPORT_SYMBOL(ksize);
-struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
- size_t align, unsigned long flags, void (*ctor)(void *))
+int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
{
- struct kmem_cache *c;
+ size_t align = c->size;
- c = slob_alloc(sizeof(struct kmem_cache),
- GFP_KERNEL, ARCH_KMALLOC_MINALIGN, NUMA_NO_NODE);
-
- if (c) {
- c->name = name;
- c->size = size;
- if (flags & SLAB_DESTROY_BY_RCU) {
- /* leave room for rcu footer at the end of object */
- c->size += sizeof(struct slob_rcu);
- }
- c->flags = flags;
- c->ctor = ctor;
- /* ignore alignment unless it's forced */
- c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
- if (c->align < ARCH_SLAB_MINALIGN)
- c->align = ARCH_SLAB_MINALIGN;
- if (c->align < align)
- c->align = align;
-
- kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL);
- c->refcount = 1;
+ if (flags & SLAB_DESTROY_BY_RCU) {
+ /* leave room for rcu footer at the end of object */
+ c->size += sizeof(struct slob_rcu);
}
- return c;
-}
+ c->flags = flags;
+ /* ignore alignment unless it's forced */
+ c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
+ if (c->align < ARCH_SLAB_MINALIGN)
+ c->align = ARCH_SLAB_MINALIGN;
+ if (c->align < align)
+ c->align = align;
-void kmem_cache_destroy(struct kmem_cache *c)
-{
- kmemleak_free(c);
- if (c->flags & SLAB_DESTROY_BY_RCU)
- rcu_barrier();
- slob_free(c, sizeof(struct kmem_cache));
+ return 0;
}
-EXPORT_SYMBOL(kmem_cache_destroy);
void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
{
@@ -634,14 +614,28 @@ unsigned int kmem_cache_size(struct kmem_cache *c)
}
EXPORT_SYMBOL(kmem_cache_size);
+int __kmem_cache_shutdown(struct kmem_cache *c)
+{
+ /* No way to check for remaining objects */
+ return 0;
+}
+
int kmem_cache_shrink(struct kmem_cache *d)
{
return 0;
}
EXPORT_SYMBOL(kmem_cache_shrink);
+struct kmem_cache kmem_cache_boot = {
+ .name = "kmem_cache",
+ .size = sizeof(struct kmem_cache),
+ .flags = SLAB_PANIC,
+ .align = ARCH_KMALLOC_MINALIGN,
+};
+
void __init kmem_cache_init(void)
{
+ kmem_cache = &kmem_cache_boot;
slab_state = UP;
}
diff --git a/mm/slub.c b/mm/slub.c
index f074f756405a..a0d698467f70 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -210,11 +210,7 @@ static void sysfs_slab_remove(struct kmem_cache *);
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
-static inline void sysfs_slab_remove(struct kmem_cache *s)
-{
- kfree(s->name);
- kfree(s);
-}
+static inline void sysfs_slab_remove(struct kmem_cache *s) { }
#endif
@@ -626,7 +622,7 @@ static void object_err(struct kmem_cache *s, struct page *page,
print_trailer(s, page, object);
}
-static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
+static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...)
{
va_list args;
char buf[100];
@@ -1531,12 +1527,13 @@ static inline void *acquire_slab(struct kmem_cache *s,
}
static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
+static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
/*
* Try to allocate a partial slab from a specific node.
*/
-static void *get_partial_node(struct kmem_cache *s,
- struct kmem_cache_node *n, struct kmem_cache_cpu *c)
+static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+ struct kmem_cache_cpu *c, gfp_t flags)
{
struct page *page, *page2;
void *object = NULL;
@@ -1552,9 +1549,13 @@ static void *get_partial_node(struct kmem_cache *s,
spin_lock(&n->list_lock);
list_for_each_entry_safe(page, page2, &n->partial, lru) {
- void *t = acquire_slab(s, n, page, object == NULL);
+ void *t;
int available;
+ if (!pfmemalloc_match(page, flags))
+ continue;
+
+ t = acquire_slab(s, n, page, object == NULL);
if (!t)
break;
@@ -1621,7 +1622,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
n->nr_partial > s->min_partial) {
- object = get_partial_node(s, n, c);
+ object = get_partial_node(s, n, c, flags);
if (object) {
/*
* Return the object even if
@@ -1650,7 +1651,7 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
void *object;
int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
- object = get_partial_node(s, get_node(s, searchnode), c);
+ object = get_partial_node(s, get_node(s, searchnode), c, flags);
if (object || node != NUMA_NO_NODE)
return object;
@@ -1716,7 +1717,7 @@ static inline void note_cmpxchg_failure(const char *n,
stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
}
-void init_kmem_cache_cpus(struct kmem_cache *s)
+static void init_kmem_cache_cpus(struct kmem_cache *s)
{
int cpu;
@@ -1941,7 +1942,7 @@ static void unfreeze_partials(struct kmem_cache *s)
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
-int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
+static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
{
struct page *oldpage;
int pages;
@@ -2622,6 +2623,13 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
page = virt_to_head_page(x);
+ if (kmem_cache_debug(s) && page->slab != s) {
+ pr_err("kmem_cache_free: Wrong slab cache. %s but object"
+ " is from %s\n", page->slab->name, s->name);
+ WARN_ON_ONCE(1);
+ return;
+ }
+
slab_free(s, page, x, _RET_IP_);
trace_kmem_cache_free(_RET_IP_, x);
@@ -3036,17 +3044,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
}
-static int kmem_cache_open(struct kmem_cache *s,
- const char *name, size_t size,
- size_t align, unsigned long flags,
- void (*ctor)(void *))
+static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
{
- memset(s, 0, kmem_size);
- s->name = name;
- s->ctor = ctor;
- s->object_size = size;
- s->align = align;
- s->flags = kmem_cache_flags(size, flags, name, ctor);
+ s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
s->reserved = 0;
if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
@@ -3108,7 +3108,6 @@ static int kmem_cache_open(struct kmem_cache *s,
else
s->cpu_partial = 30;
- s->refcount = 1;
#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
#endif
@@ -3116,16 +3115,16 @@ static int kmem_cache_open(struct kmem_cache *s,
goto error;
if (alloc_kmem_cache_cpus(s))
- return 1;
+ return 0;
free_kmem_cache_nodes(s);
error:
if (flags & SLAB_PANIC)
panic("Cannot create slab %s size=%lu realsize=%u "
"order=%u offset=%u flags=%lx\n",
- s->name, (unsigned long)size, s->size, oo_order(s->oo),
+ s->name, (unsigned long)s->size, s->size, oo_order(s->oo),
s->offset, flags);
- return 0;
+ return -EINVAL;
}
/*
@@ -3147,7 +3146,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
sizeof(long), GFP_ATOMIC);
if (!map)
return;
- slab_err(s, page, "%s", text);
+ slab_err(s, page, text, s->name);
slab_lock(page);
get_map(s, page, map);
@@ -3179,7 +3178,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
discard_slab(s, page);
} else {
list_slab_objects(s, page,
- "Objects remaining on kmem_cache_close()");
+ "Objects remaining in %s on kmem_cache_close()");
}
}
}
@@ -3192,7 +3191,6 @@ static inline int kmem_cache_close(struct kmem_cache *s)
int node;
flush_all(s);
- free_percpu(s->cpu_slab);
/* Attempt to free all objects */
for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n = get_node(s, node);
@@ -3201,33 +3199,20 @@ static inline int kmem_cache_close(struct kmem_cache *s)
if (n->nr_partial || slabs_node(s, node))
return 1;
}
+ free_percpu(s->cpu_slab);
free_kmem_cache_nodes(s);
return 0;
}
-/*
- * Close a cache and release the kmem_cache structure
- * (must be used for caches created using kmem_cache_create)
- */
-void kmem_cache_destroy(struct kmem_cache *s)
+int __kmem_cache_shutdown(struct kmem_cache *s)
{
- mutex_lock(&slab_mutex);
- s->refcount--;
- if (!s->refcount) {
- list_del(&s->list);
- mutex_unlock(&slab_mutex);
- if (kmem_cache_close(s)) {
- printk(KERN_ERR "SLUB %s: %s called for cache that "
- "still has objects.\n", s->name, __func__);
- dump_stack();
- }
- if (s->flags & SLAB_DESTROY_BY_RCU)
- rcu_barrier();
+ int rc = kmem_cache_close(s);
+
+ if (!rc)
sysfs_slab_remove(s);
- } else
- mutex_unlock(&slab_mutex);
+
+ return rc;
}
-EXPORT_SYMBOL(kmem_cache_destroy);
/********************************************************************
* Kmalloc subsystem
@@ -3236,8 +3221,6 @@ EXPORT_SYMBOL(kmem_cache_destroy);
struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
EXPORT_SYMBOL(kmalloc_caches);
-static struct kmem_cache *kmem_cache;
-
#ifdef CONFIG_ZONE_DMA
static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
#endif
@@ -3283,14 +3266,17 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
{
struct kmem_cache *s;
- s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
+ s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+
+ s->name = name;
+ s->size = s->object_size = size;
+ s->align = ARCH_KMALLOC_MINALIGN;
/*
* This function is called with IRQs disabled during early-boot on
* single CPU so there's no need to take slab_mutex here.
*/
- if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
- flags, NULL))
+ if (kmem_cache_open(s, flags))
goto panic;
list_add(&s->list, &slab_caches);
@@ -3729,12 +3715,12 @@ void __init kmem_cache_init(void)
slub_max_order = 0;
kmem_size = offsetof(struct kmem_cache, node) +
- nr_node_ids * sizeof(struct kmem_cache_node *);
+ nr_node_ids * sizeof(struct kmem_cache_node *);
/* Allocate two kmem_caches from the page allocator */
kmalloc_size = ALIGN(kmem_size, cache_line_size());
order = get_order(2 * kmalloc_size);
- kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
+ kmem_cache = (void *)__get_free_pages(GFP_NOWAIT | __GFP_ZERO, order);
/*
* Must first have the slab cache available for the allocations of the
@@ -3743,9 +3729,10 @@ void __init kmem_cache_init(void)
*/
kmem_cache_node = (void *)kmem_cache + kmalloc_size;
- kmem_cache_open(kmem_cache_node, "kmem_cache_node",
- sizeof(struct kmem_cache_node),
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+ kmem_cache_node->name = "kmem_cache_node";
+ kmem_cache_node->size = kmem_cache_node->object_size =
+ sizeof(struct kmem_cache_node);
+ kmem_cache_open(kmem_cache_node, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
@@ -3753,8 +3740,10 @@ void __init kmem_cache_init(void)
slab_state = PARTIAL;
temp_kmem_cache = kmem_cache;
- kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+ kmem_cache->name = "kmem_cache";
+ kmem_cache->size = kmem_cache->object_size = kmem_size;
+ kmem_cache_open(kmem_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+
kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
memcpy(kmem_cache, temp_kmem_cache, kmem_size);
@@ -3943,11 +3932,10 @@ static struct kmem_cache *find_mergeable(size_t size,
return NULL;
}
-struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
+struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
size_t align, unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *s;
- char *n;
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
@@ -3961,36 +3949,29 @@ struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
if (sysfs_slab_alias(s, name)) {
s->refcount--;
- return NULL;
+ s = NULL;
}
- return s;
}
- n = kstrdup(name, GFP_KERNEL);
- if (!n)
- return NULL;
+ return s;
+}
- s = kmalloc(kmem_size, GFP_KERNEL);
- if (s) {
- if (kmem_cache_open(s, n,
- size, align, flags, ctor)) {
- int r;
+int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
+{
+ int err;
- list_add(&s->list, &slab_caches);
- mutex_unlock(&slab_mutex);
- r = sysfs_slab_add(s);
- mutex_lock(&slab_mutex);
+ err = kmem_cache_open(s, flags);
+ if (err)
+ return err;
- if (!r)
- return s;
+ mutex_unlock(&slab_mutex);
+ err = sysfs_slab_add(s);
+ mutex_lock(&slab_mutex);
- list_del(&s->list);
- kmem_cache_close(s);
- }
- kfree(s);
- }
- kfree(n);
- return NULL;
+ if (err)
+ kmem_cache_close(s);
+
+ return err;
}
#ifdef CONFIG_SMP
@@ -5220,14 +5201,6 @@ static ssize_t slab_attr_store(struct kobject *kobj,
return err;
}
-static void kmem_cache_release(struct kobject *kobj)
-{
- struct kmem_cache *s = to_slab(kobj);
-
- kfree(s->name);
- kfree(s);
-}
-
static const struct sysfs_ops slab_sysfs_ops = {
.show = slab_attr_show,
.store = slab_attr_store,
@@ -5235,7 +5208,6 @@ static const struct sysfs_ops slab_sysfs_ops = {
static struct kobj_type slab_ktype = {
.sysfs_ops = &slab_sysfs_ops,
- .release = kmem_cache_release
};
static int uevent_filter(struct kset *kset, struct kobject *kobj)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8d01243d9560..99b434b674c0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3102,6 +3102,7 @@ int kswapd_run(int nid)
/* failure at boot is fatal */
BUG_ON(system_state == SYSTEM_BOOTING);
printk("Failed to start kswapd on node %d\n",nid);
+ pgdat->kswapd = NULL;
ret = -1;
}
return ret;
OpenPOWER on IntegriCloud