summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig1
-rw-r--r--mm/filemap.c54
-rw-r--r--mm/filemap.h26
-rw-r--r--mm/memory_hotplug.c126
-rw-r--r--mm/mempolicy.c12
-rw-r--r--mm/migrate.c30
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c13
-rw-r--r--mm/pdflush.c15
-rw-r--r--mm/readahead.c20
-rw-r--r--mm/rmap.c9
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/slab.c21
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap.c3
-rw-r--r--mm/vmscan.c39
16 files changed, 278 insertions, 99 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 66e65ab39426..e76c023eb0bb 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -116,6 +116,7 @@ config SPARSEMEM_EXTREME
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND
+ depends on (IA64 || X86 || PPC64)
comment "Memory hotplug is currently incompatible with Software Suspend"
depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
diff --git a/mm/filemap.c b/mm/filemap.c
index 807a463fd5ed..d504d6e98886 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -828,6 +828,32 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
}
EXPORT_SYMBOL(grab_cache_page_nowait);
+/*
+ * CD/DVDs are error prone. When a medium error occurs, the driver may fail
+ * a _large_ part of the i/o request. Imagine the worst scenario:
+ *
+ * ---R__________________________________________B__________
+ * ^ reading here ^ bad block(assume 4k)
+ *
+ * read(R) => miss => readahead(R...B) => media error => frustrating retries
+ * => failing the whole request => read(R) => read(R+1) =>
+ * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) =>
+ * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) =>
+ * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ......
+ *
+ * It is going insane. Fix it by quickly scaling down the readahead size.
+ */
+static void shrink_readahead_size_eio(struct file *filp,
+ struct file_ra_state *ra)
+{
+ if (!ra->ra_pages)
+ return;
+
+ ra->ra_pages /= 4;
+ printk(KERN_WARNING "Reducing readahead size to %luK\n",
+ ra->ra_pages << (PAGE_CACHE_SHIFT - 10));
+}
+
/**
* do_generic_mapping_read - generic file read routine
* @mapping: address_space to be read
@@ -985,6 +1011,7 @@ readpage:
}
unlock_page(page);
error = -EIO;
+ shrink_readahead_size_eio(filp, &ra);
goto readpage_error;
}
unlock_page(page);
@@ -1522,6 +1549,7 @@ page_not_uptodate:
* Things didn't work out. Return zero to tell the
* mm layer so, possibly freeing the page cache page first.
*/
+ shrink_readahead_size_eio(file, ra);
page_cache_release(page);
return NULL;
}
@@ -1892,7 +1920,7 @@ int remove_suid(struct dentry *dentry)
EXPORT_SYMBOL(remove_suid);
size_t
-__filemap_copy_from_user_iovec(char *vaddr,
+__filemap_copy_from_user_iovec_inatomic(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes)
{
size_t copied = 0, left = 0;
@@ -1908,12 +1936,8 @@ __filemap_copy_from_user_iovec(char *vaddr,
vaddr += copy;
iov++;
- if (unlikely(left)) {
- /* zero the rest of the target like __copy_from_user */
- if (bytes)
- memset(vaddr, 0, bytes);
+ if (unlikely(left))
break;
- }
}
return copied - left;
}
@@ -2071,14 +2095,21 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
do {
unsigned long index;
unsigned long offset;
- unsigned long maxlen;
size_t copied;
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
index = pos >> PAGE_CACHE_SHIFT;
bytes = PAGE_CACHE_SIZE - offset;
- if (bytes > count)
- bytes = count;
+
+ /* Limit the size of the copy to the caller's write size */
+ bytes = min(bytes, count);
+
+ /*
+ * Limit the size of the copy to that of the current segment,
+ * because fault_in_pages_readable() doesn't know how to walk
+ * segments.
+ */
+ bytes = min(bytes, cur_iov->iov_len - iov_base);
/*
* Bring in the user page that we will copy from _first_.
@@ -2086,10 +2117,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
* same page as we're writing to, without it being marked
* up-to-date.
*/
- maxlen = cur_iov->iov_len - iov_base;
- if (maxlen > bytes)
- maxlen = bytes;
- fault_in_pages_readable(buf, maxlen);
+ fault_in_pages_readable(buf, bytes);
page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
if (!page) {
diff --git a/mm/filemap.h b/mm/filemap.h
index 5683cde22055..536979fb4ba7 100644
--- a/mm/filemap.h
+++ b/mm/filemap.h
@@ -16,15 +16,23 @@
#include <linux/uaccess.h>
size_t
-__filemap_copy_from_user_iovec(char *vaddr,
- const struct iovec *iov,
- size_t base,
- size_t bytes);
+__filemap_copy_from_user_iovec_inatomic(char *vaddr,
+ const struct iovec *iov,
+ size_t base,
+ size_t bytes);
/*
* Copy as much as we can into the page and return the number of bytes which
* were sucessfully copied. If a fault is encountered then clear the page
* out to (offset+bytes) and return the number of bytes which were copied.
+ *
+ * NOTE: For this to work reliably we really want copy_from_user_inatomic_nocache
+ * to *NOT* zero any tail of the buffer that it failed to copy. If it does,
+ * and if the following non-atomic copy succeeds, then there is a small window
+ * where the target page contains neither the data before the write, nor the
+ * data after the write (it contains zero). A read at this time will see
+ * data that is inconsistent with any ordering of the read and the write.
+ * (This has been detected in practice).
*/
static inline size_t
filemap_copy_from_user(struct page *page, unsigned long offset,
@@ -60,13 +68,15 @@ filemap_copy_from_user_iovec(struct page *page, unsigned long offset,
size_t copied;
kaddr = kmap_atomic(page, KM_USER0);
- copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
- base, bytes);
+ copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov,
+ base, bytes);
kunmap_atomic(kaddr, KM_USER0);
if (copied != bytes) {
kaddr = kmap(page);
- copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
- base, bytes);
+ copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov,
+ base, bytes);
+ if (bytes - copied)
+ memset(kaddr + offset + copied, 0, bytes - copied);
kunmap(page);
}
return copied;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 841a077d5aeb..ea4038838b0a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -21,6 +21,7 @@
#include <linux/memory_hotplug.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
+#include <linux/ioport.h>
#include <asm/tlbflush.h>
@@ -126,6 +127,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
unsigned long i;
unsigned long flags;
unsigned long onlined_pages = 0;
+ struct resource res;
+ u64 section_end;
+ unsigned long start_pfn;
struct zone *zone;
int need_zonelists_rebuild = 0;
@@ -148,10 +152,27 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
if (!populated_zone(zone))
need_zonelists_rebuild = 1;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pfn_to_page(pfn + i);
- online_page(page);
- onlined_pages++;
+ res.start = (u64)pfn << PAGE_SHIFT;
+ res.end = res.start + ((u64)nr_pages << PAGE_SHIFT) - 1;
+ res.flags = IORESOURCE_MEM; /* we just need system ram */
+ section_end = res.end;
+
+ while (find_next_system_ram(&res) >= 0) {
+ start_pfn = (unsigned long)(res.start >> PAGE_SHIFT);
+ nr_pages = (unsigned long)
+ ((res.end + 1 - res.start) >> PAGE_SHIFT);
+
+ if (PageReserved(pfn_to_page(start_pfn))) {
+ /* this region's page is not onlined now */
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pfn_to_page(start_pfn + i);
+ online_page(page);
+ onlined_pages++;
+ }
+ }
+
+ res.start = res.end + 1;
+ res.end = section_end;
}
zone->present_pages += onlined_pages;
zone->zone_pgdat->node_present_pages += onlined_pages;
@@ -163,3 +184,100 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
vm_total_pages = nr_free_pagecache_pages();
return 0;
}
+
+static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
+{
+ struct pglist_data *pgdat;
+ unsigned long zones_size[MAX_NR_ZONES] = {0};
+ unsigned long zholes_size[MAX_NR_ZONES] = {0};
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+
+ pgdat = arch_alloc_nodedata(nid);
+ if (!pgdat)
+ return NULL;
+
+ arch_refresh_nodedata(nid, pgdat);
+
+ /* we can use NODE_DATA(nid) from here */
+
+ /* init node's zones as empty zones, we don't have any present pages.*/
+ free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);
+
+ return pgdat;
+}
+
+static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
+{
+ arch_refresh_nodedata(nid, NULL);
+ arch_free_nodedata(pgdat);
+ return;
+}
+
+/* add this memory to iomem resource */
+static void register_memory_resource(u64 start, u64 size)
+{
+ struct resource *res;
+
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ BUG_ON(!res);
+
+ res->name = "System RAM";
+ res->start = start;
+ res->end = start + size - 1;
+ res->flags = IORESOURCE_MEM;
+ if (request_resource(&iomem_resource, res) < 0) {
+ printk("System RAM resource %llx - %llx cannot be added\n",
+ (unsigned long long)res->start, (unsigned long long)res->end);
+ kfree(res);
+ }
+}
+
+
+
+int add_memory(int nid, u64 start, u64 size)
+{
+ pg_data_t *pgdat = NULL;
+ int new_pgdat = 0;
+ int ret;
+
+ if (!node_online(nid)) {
+ pgdat = hotadd_new_pgdat(nid, start);
+ if (!pgdat)
+ return -ENOMEM;
+ new_pgdat = 1;
+ ret = kswapd_run(nid);
+ if (ret)
+ goto error;
+ }
+
+ /* call arch's memory hotadd */
+ ret = arch_add_memory(nid, start, size);
+
+ if (ret < 0)
+ goto error;
+
+ /* we online node here. we can't roll back from here. */
+ node_set_online(nid);
+
+ if (new_pgdat) {
+ ret = register_one_node(nid);
+ /*
+ * If sysfs file of new node can't create, cpu on the node
+ * can't be hot-added. There is no rollback way now.
+ * So, check by BUG_ON() to catch it reluctantly..
+ */
+ BUG_ON(ret);
+ }
+
+ /* register this memory as resource */
+ register_memory_resource(start, size);
+
+ return ret;
+error:
+ /* rollback pgdat allocation and others */
+ if (new_pgdat)
+ rollback_node_hotadd(nid, pgdat);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(add_memory);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ec4a1a950df9..6b9740bbf4c0 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -632,6 +632,10 @@ int do_migrate_pages(struct mm_struct *mm,
down_read(&mm->mmap_sem);
+ err = migrate_vmas(mm, from_nodes, to_nodes, flags);
+ if (err)
+ goto out;
+
/*
* Find a 'source' bit set in 'tmp' whose corresponding 'dest'
* bit in 'to' is not also set in 'tmp'. Clear the found 'source'
@@ -691,7 +695,7 @@ int do_migrate_pages(struct mm_struct *mm,
if (err < 0)
break;
}
-
+out:
up_read(&mm->mmap_sem);
if (err < 0)
return err;
@@ -1817,7 +1821,7 @@ static inline void check_huge_range(struct vm_area_struct *vma,
int show_numa_map(struct seq_file *m, void *v)
{
- struct task_struct *task = m->private;
+ struct proc_maps_private *priv = m->private;
struct vm_area_struct *vma = v;
struct numa_maps *md;
struct file *file = vma->vm_file;
@@ -1833,7 +1837,7 @@ int show_numa_map(struct seq_file *m, void *v)
return 0;
mpol_to_str(buffer, sizeof(buffer),
- get_vma_policy(task, vma, vma->vm_start));
+ get_vma_policy(priv->task, vma, vma->vm_start));
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
@@ -1887,7 +1891,7 @@ out:
kfree(md);
if (m->count < m->size)
- m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
+ m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
return 0;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 1c2a71aa05cd..3f1e0c2c942c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -616,15 +616,13 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
/*
* Establish migration ptes or remove ptes
*/
- if (try_to_unmap(page, 1) != SWAP_FAIL) {
- if (!page_mapped(page))
- rc = move_to_new_page(newpage, page);
- } else
- /* A vma has VM_LOCKED set -> permanent failure */
- rc = -EPERM;
+ try_to_unmap(page, 1);
+ if (!page_mapped(page))
+ rc = move_to_new_page(newpage, page);
if (rc)
remove_migration_ptes(page, page);
+
unlock:
unlock_page(page);
@@ -976,3 +974,23 @@ out2:
}
#endif
+/*
+ * Call migration functions in the vma_ops that may prepare
+ * memory in a vm for migration. migration functions may perform
+ * the migration for vmas that do not have an underlying page struct.
+ */
+int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
+ const nodemask_t *from, unsigned long flags)
+{
+ struct vm_area_struct *vma;
+ int err = 0;
+
+ for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) {
+ if (vma->vm_ops && vma->vm_ops->migrate) {
+ err = vma->vm_ops->migrate(vma, to, from, flags);
+ if (err)
+ break;
+ }
+ }
+ return err;
+}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8ccf6f1b1473..4ec7026c7bab 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -516,14 +516,14 @@ static void set_ratelimit(void)
ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
}
-static int
+static int __cpuinit
ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
{
set_ratelimit();
return 0;
}
-static struct notifier_block ratelimit_nb = {
+static struct notifier_block __cpuinitdata ratelimit_nb = {
.notifier_call = ratelimit_handler,
.next = NULL,
};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 423db0db7c02..084a2de7e52a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -266,7 +266,7 @@ static inline void rmv_page_order(struct page *page)
* satisfies the following equation:
* P = B & ~(1 << O)
*
- * Assumption: *_mem_map is contigious at least up to MAX_ORDER
+ * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
*/
static inline struct page *
__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order)
@@ -446,8 +446,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
arch_free_page(page, order);
if (!PageHighMem(page))
- mutex_debug_check_no_locks_freed(page_address(page),
- PAGE_SIZE<<order);
+ debug_check_no_locks_freed(page_address(page),
+ PAGE_SIZE<<order);
for (i = 0 ; i < (1 << order) ; ++i)
reserved += free_pages_check(page + i);
@@ -957,8 +957,7 @@ restart:
goto got_pg;
do {
- if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL))
- wakeup_kswapd(*z, order);
+ wakeup_kswapd(*z, order);
} while (*(++z));
/*
@@ -2010,7 +2009,7 @@ static inline void free_zone_pagesets(int cpu)
}
}
-static int pageset_cpuup_callback(struct notifier_block *nfb,
+static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -2032,7 +2031,7 @@ static int pageset_cpuup_callback(struct notifier_block *nfb,
return ret;
}
-static struct notifier_block pageset_notifier =
+static struct notifier_block __cpuinitdata pageset_notifier =
{ &pageset_cpuup_callback, NULL, 0 };
void __init setup_per_cpu_pageset(void)
diff --git a/mm/pdflush.c b/mm/pdflush.c
index df7e50b8f70c..b02102feeb4b 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -104,21 +104,20 @@ static int __pdflush(struct pdflush_work *my_work)
list_move(&my_work->list, &pdflush_list);
my_work->when_i_went_to_sleep = jiffies;
spin_unlock_irq(&pdflush_lock);
-
schedule();
- if (try_to_freeze()) {
- spin_lock_irq(&pdflush_lock);
- continue;
- }
-
+ try_to_freeze();
spin_lock_irq(&pdflush_lock);
if (!list_empty(&my_work->list)) {
- printk("pdflush: bogus wakeup!\n");
+ /*
+ * Someone woke us up, but without removing our control
+ * structure from the global list. swsusp will do this
+ * in try_to_freeze()->refrigerator(). Handle it.
+ */
my_work->fn = NULL;
continue;
}
if (my_work->fn == NULL) {
- printk("pdflush: NULL work function\n");
+ printk("pdflush: bogus wakeup\n");
continue;
}
spin_unlock_irq(&pdflush_lock);
diff --git a/mm/readahead.c b/mm/readahead.c
index 0f142a40984b..aa7ec424656a 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -118,8 +118,7 @@ static inline unsigned long get_next_ra_size(struct file_ra_state *ra)
#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
/**
- * read_cache_pages - populate an address space with some pages, and
- * start reads against them.
+ * read_cache_pages - populate an address space with some pages & start reads against them
* @mapping: the address_space
* @pages: The address of a list_head which contains the target pages. These
* pages have their ->index populated and are otherwise uninitialised.
@@ -182,14 +181,11 @@ static int read_pages(struct address_space *mapping, struct file *filp,
list_del(&page->lru);
if (!add_to_page_cache(page, mapping,
page->index, GFP_KERNEL)) {
- ret = mapping->a_ops->readpage(filp, page);
- if (ret != AOP_TRUNCATED_PAGE) {
- if (!pagevec_add(&lru_pvec, page))
- __pagevec_lru_add(&lru_pvec);
- continue;
- } /* else fall through to release */
- }
- page_cache_release(page);
+ mapping->a_ops->readpage(filp, page);
+ if (!pagevec_add(&lru_pvec, page))
+ __pagevec_lru_add(&lru_pvec);
+ } else
+ page_cache_release(page);
}
pagevec_lru_add(&lru_pvec);
ret = 0;
@@ -394,8 +390,8 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
* Read 'nr_to_read' pages starting at page 'offset'. If the flag 'block'
* is set wait till the read completes. Otherwise attempt to read without
* blocking.
- * Returns 1 meaning 'success' if read is succesfull without switching off
- * readhaead mode. Otherwise return failure.
+ * Returns 1 meaning 'success' if read is successful without switching off
+ * readahead mode. Otherwise return failure.
*/
static int
blockable_page_cache_readahead(struct address_space *mapping, struct file *filp,
diff --git a/mm/rmap.c b/mm/rmap.c
index 882a85826bb2..e76909e880ca 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -562,9 +562,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* If it's recently referenced (perhaps page_referenced
* skipped over this mm) then we should reactivate it.
*/
- if ((vma->vm_flags & VM_LOCKED) ||
- (ptep_clear_flush_young(vma, address, pte)
- && !migration)) {
+ if (!migration && ((vma->vm_flags & VM_LOCKED) ||
+ (ptep_clear_flush_young(vma, address, pte)))) {
ret = SWAP_FAIL;
goto out_unmap;
}
@@ -771,7 +770,7 @@ static int try_to_unmap_file(struct page *page, int migration)
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
- if (vma->vm_flags & VM_LOCKED)
+ if ((vma->vm_flags & VM_LOCKED) && !migration)
continue;
cursor = (unsigned long) vma->vm_private_data;
if (cursor > max_nl_cursor)
@@ -805,7 +804,7 @@ static int try_to_unmap_file(struct page *page, int migration)
do {
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
- if (vma->vm_flags & VM_LOCKED)
+ if ((vma->vm_flags & VM_LOCKED) && !migration)
continue;
cursor = (unsigned long) vma->vm_private_data;
while ( cursor < max_nl_cursor &&
diff --git a/mm/shmem.c b/mm/shmem.c
index 84b5cf9b63c5..38bc3334f263 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2255,7 +2255,7 @@ static int __init init_tmpfs(void)
#ifdef CONFIG_TMPFS
devfs_mk_dir("shm");
#endif
- shm_mnt = do_kern_mount(tmpfs_fs_type.name, MS_NOUSER,
+ shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
tmpfs_fs_type.name, NULL);
if (IS_ERR(shm_mnt)) {
error = PTR_ERR(shm_mnt);
diff --git a/mm/slab.c b/mm/slab.c
index 98ac20bc0de9..233e39d14caf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -89,6 +89,7 @@
#include <linux/config.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/poison.h>
#include <linux/swap.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
@@ -106,6 +107,7 @@
#include <linux/nodemask.h>
#include <linux/mempolicy.h>
#include <linux/mutex.h>
+#include <linux/rtmutex.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -492,17 +494,6 @@ struct kmem_cache {
#endif
#if DEBUG
-/*
- * Magic nums for obj red zoning.
- * Placed in the first word before and the first word after an obj.
- */
-#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */
-#define RED_ACTIVE 0x170FC2A5UL /* when obj is active */
-
-/* ...and for poisoning */
-#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
-#define POISON_FREE 0x6b /* for use-after-free poisoning */
-#define POISON_END 0xa5 /* end-byte of poisoning */
/*
* memory layout of objects:
@@ -1083,7 +1074,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
#endif
-static int cpuup_callback(struct notifier_block *nfb,
+static int __devinit cpuup_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -1265,7 +1256,9 @@ bad:
return NOTIFY_BAD;
}
-static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
+static struct notifier_block __cpuinitdata cpucache_notifier = {
+ &cpuup_callback, NULL, 0
+};
/*
* swap the static kmem_list3 with kmalloced memory
@@ -3405,7 +3398,7 @@ void kfree(const void *objp)
local_irq_save(flags);
kfree_debugcheck(objp);
c = virt_to_cache(objp);
- mutex_debug_check_no_locks_freed(objp, obj_size(c));
+ debug_check_no_locks_freed(objp, obj_size(c));
__cache_free(c, (void *)objp);
local_irq_restore(flags);
}
diff --git a/mm/sparse.c b/mm/sparse.c
index e0a3fe48aa37..c7a2b3a0e46b 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -45,7 +45,7 @@ static struct mem_section *sparse_index_alloc(int nid)
static int sparse_index_init(unsigned long section_nr, int nid)
{
- static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(index_init_lock);
unsigned long root = SECTION_NR_TO_ROOT(section_nr);
struct mem_section *section;
int ret = 0;
diff --git a/mm/swap.c b/mm/swap.c
index 03ae2076f92f..990868afc1c6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -86,8 +86,7 @@ int rotate_reclaimable_page(struct page *page)
zone = page_zone(page);
spin_lock_irqsave(&zone->lru_lock, flags);
if (PageLRU(page) && !PageActive(page)) {
- list_del(&page->lru);
- list_add_tail(&page->lru, &zone->inactive_list);
+ list_move_tail(&page->lru, &zone->inactive_list);
inc_page_state(pgrotated);
}
if (!test_clear_page_writeback(page))
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 72babac71dea..eeacb0d695c3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -34,6 +34,7 @@
#include <linux/notifier.h>
#include <linux/rwsem.h>
#include <linux/delay.h>
+#include <linux/kthread.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -1223,7 +1224,6 @@ static int kswapd(void *p)
};
cpumask_t cpumask;
- daemonize("kswapd%d", pgdat->node_id);
cpumask = node_to_cpumask(pgdat->node_id);
if (!cpus_empty(cpumask))
set_cpus_allowed(tsk, cpumask);
@@ -1450,7 +1450,7 @@ out:
not required for correctness. So if the last cpu in a node goes
away, we get changed to run anywhere: as the first one comes back,
restore their cpu bindings. */
-static int cpu_callback(struct notifier_block *nfb,
+static int __devinit cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
pg_data_t *pgdat;
@@ -1468,20 +1468,35 @@ static int cpu_callback(struct notifier_block *nfb,
}
#endif /* CONFIG_HOTPLUG_CPU */
+/*
+ * This kswapd start function will be called by init and node-hot-add.
+ * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
+ */
+int kswapd_run(int nid)
+{
+ pg_data_t *pgdat = NODE_DATA(nid);
+ int ret = 0;
+
+ if (pgdat->kswapd)
+ return 0;
+
+ pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
+ if (IS_ERR(pgdat->kswapd)) {
+ /* failure at boot is fatal */
+ BUG_ON(system_state == SYSTEM_BOOTING);
+ printk("Failed to start kswapd on node %d\n",nid);
+ ret = -1;
+ }
+ return ret;
+}
+
static int __init kswapd_init(void)
{
- pg_data_t *pgdat;
+ int nid;
swap_setup();
- for_each_online_pgdat(pgdat) {
- pid_t pid;
-
- pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
- BUG_ON(pid < 0);
- read_lock(&tasklist_lock);
- pgdat->kswapd = find_task_by_pid(pid);
- read_unlock(&tasklist_lock);
- }
+ for_each_online_node(nid)
+ kswapd_run(nid);
hotcpu_notifier(cpu_callback, 0);
return 0;
}
OpenPOWER on IntegriCloud