summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/Makefile4
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/filemap_xip.c2
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/nommu.c44
-rw-r--r--mm/page-writeback.c30
-rw-r--r--mm/percpu.c83
-rw-r--r--mm/shmem.c9
-rw-r--r--mm/vmscan.c8
11 files changed, 119 insertions, 71 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 247760729593..edd300aca173 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -244,10 +244,12 @@ config DEFAULT_MMAP_MIN_ADDR
This value can be changed after boot using the
/proc/sys/vm/mmap_min_addr tunable.
+config ARCH_SUPPORTS_MEMORY_FAILURE
+ bool
config MEMORY_FAILURE
depends on MMU
- depends on X86_MCE
+ depends on ARCH_SUPPORTS_MEMORY_FAILURE
bool "Enable recovery from hardware memory errors"
help
Enables code to recover from some memory failures on systems
diff --git a/mm/Makefile b/mm/Makefile
index 515fd793c17f..ebf849042ed3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -5,14 +5,14 @@
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
- vmalloc.o
+ vmalloc.o pagewalk.o
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
maccess.o page_alloc.o page-writeback.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
page_isolation.o mm_init.o mmu_context.o \
- pagewalk.o $(mmu-y)
+ $(mmu-y)
obj-y += init-mm.o
obj-$(CONFIG_BOUNCE) += bounce.o
diff --git a/mm/filemap.c b/mm/filemap.c
index 6c84e598b4a9..ef169f37156d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1611,7 +1611,7 @@ page_not_uptodate:
}
EXPORT_SYMBOL(filemap_fault);
-struct vm_operations_struct generic_file_vm_ops = {
+const struct vm_operations_struct generic_file_vm_ops = {
.fault = filemap_fault,
};
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 427dfe3ce78c..1888b2d71bb8 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -296,7 +296,7 @@ out:
}
}
-static struct vm_operations_struct xip_file_vm_ops = {
+static const struct vm_operations_struct xip_file_vm_ops = {
.fault = xip_file_fault,
};
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6f048fcc749c..5d7601b02874 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1721,7 +1721,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return 0;
}
-struct vm_operations_struct hugetlb_vm_ops = {
+const struct vm_operations_struct hugetlb_vm_ops = {
.fault = hugetlb_vm_op_fault,
.open = hugetlb_vm_op_open,
.close = hugetlb_vm_op_close,
diff --git a/mm/mmap.c b/mm/mmap.c
index 21d4029a07b3..73f5e4b64010 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2282,7 +2282,7 @@ static void special_mapping_close(struct vm_area_struct *vma)
{
}
-static struct vm_operations_struct special_mapping_vmops = {
+static const struct vm_operations_struct special_mapping_vmops = {
.close = special_mapping_close,
.fault = special_mapping_fault,
};
diff --git a/mm/nommu.c b/mm/nommu.c
index 56a446f05971..5189b5aed8c0 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -79,7 +79,7 @@ static struct kmem_cache *vm_region_jar;
struct rb_root nommu_region_tree = RB_ROOT;
DECLARE_RWSEM(nommu_region_sem);
-struct vm_operations_struct generic_file_vm_ops = {
+const struct vm_operations_struct generic_file_vm_ops = {
};
/*
@@ -826,7 +826,7 @@ static int validate_mmap_request(struct file *file,
int ret;
/* do the simple checks first */
- if (flags & MAP_FIXED || addr) {
+ if (flags & MAP_FIXED) {
printk(KERN_DEBUG
"%d: Can't do fixed-address/overlay mmap of RAM\n",
current->pid);
@@ -1034,7 +1034,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
if (ret == 0) {
vma->vm_region->vm_top = vma->vm_region->vm_end;
- return ret;
+ return 0;
}
if (ret != -ENOSYS)
return ret;
@@ -1051,7 +1051,8 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
*/
static int do_mmap_private(struct vm_area_struct *vma,
struct vm_region *region,
- unsigned long len)
+ unsigned long len,
+ unsigned long capabilities)
{
struct page *pages;
unsigned long total, point, n, rlen;
@@ -1062,13 +1063,13 @@ static int do_mmap_private(struct vm_area_struct *vma,
* shared mappings on devices or memory
* - VM_MAYSHARE will be set if it may attempt to share
*/
- if (vma->vm_file) {
+ if (capabilities & BDI_CAP_MAP_DIRECT) {
ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
if (ret == 0) {
/* shouldn't return success if we're not sharing */
BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
vma->vm_region->vm_top = vma->vm_region->vm_end;
- return ret;
+ return 0;
}
if (ret != -ENOSYS)
return ret;
@@ -1181,9 +1182,6 @@ unsigned long do_mmap_pgoff(struct file *file,
kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
- if (!(flags & MAP_FIXED))
- addr = round_hint_to_min(addr);
-
/* decide whether we should attempt the mapping, and if so what sort of
* mapping */
ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
@@ -1193,6 +1191,9 @@ unsigned long do_mmap_pgoff(struct file *file,
return ret;
}
+ /* we ignore the address hint */
+ addr = 0;
+
/* we've determined that we can make the mapping, now translate what we
* now know into VMA flags */
vm_flags = determine_vm_flags(file, prot, flags, capabilities);
@@ -1306,7 +1307,7 @@ unsigned long do_mmap_pgoff(struct file *file,
* - this is the hook for quasi-memory character devices to
* tell us the location of a shared mapping
*/
- if (file && file->f_op->get_unmapped_area) {
+ if (capabilities & BDI_CAP_MAP_DIRECT) {
addr = file->f_op->get_unmapped_area(file, addr, len,
pgoff, flags);
if (IS_ERR((void *) addr)) {
@@ -1330,15 +1331,17 @@ unsigned long do_mmap_pgoff(struct file *file,
}
vma->vm_region = region;
- add_nommu_region(region);
- /* set up the mapping */
+ /* set up the mapping
+ * - the region is filled in if BDI_CAP_MAP_DIRECT is still set
+ */
if (file && vma->vm_flags & VM_SHARED)
ret = do_mmap_shared_file(vma);
else
- ret = do_mmap_private(vma, region, len);
+ ret = do_mmap_private(vma, region, len, capabilities);
if (ret < 0)
- goto error_put_region;
+ goto error_just_free;
+ add_nommu_region(region);
/* okay... we have a mapping; now we have to register it */
result = vma->vm_start;
@@ -1356,19 +1359,6 @@ share:
kleave(" = %lx", result);
return result;
-error_put_region:
- __put_nommu_region(region);
- if (vma) {
- if (vma->vm_file) {
- fput(vma->vm_file);
- if (vma->vm_flags & VM_EXECUTABLE)
- removed_exe_file_vma(vma->vm_mm);
- }
- kmem_cache_free(vm_area_cachep, vma);
- }
- kleave(" = %d [pr]", ret);
- return ret;
-
error_just_free:
up_write(&nommu_region_sem);
error:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d99664e8607e..a3b14090b1fb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -44,18 +44,21 @@ static long ratelimit_pages = 32;
/*
* When balance_dirty_pages decides that the caller needs to perform some
* non-background writeback, this is how many pages it will attempt to write.
- * It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably
+ * It should be somewhat larger than dirtied pages to ensure that reasonably
* large amounts of I/O are submitted.
*/
-static inline long sync_writeback_pages(void)
+static inline long sync_writeback_pages(unsigned long dirtied)
{
- return ratelimit_pages + ratelimit_pages / 2;
+ if (dirtied < ratelimit_pages)
+ dirtied = ratelimit_pages;
+
+ return dirtied + dirtied / 2;
}
/* The following parameters are exported via /proc/sys/vm */
/*
- * Start background writeback (via pdflush) at this percentage
+ * Start background writeback (via writeback threads) at this percentage
*/
int dirty_background_ratio = 10;
@@ -474,10 +477,11 @@ get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
* balance_dirty_pages() must be called by processes which are generating dirty
* data. It looks at the number of dirty pages in the machine and will force
* the caller to perform writeback if the system is over `vm_dirty_ratio'.
- * If we're over `background_thresh' then pdflush is woken to perform some
- * writeout.
+ * If we're over `background_thresh' then the writeback threads are woken to
+ * perform some writeout.
*/
-static void balance_dirty_pages(struct address_space *mapping)
+static void balance_dirty_pages(struct address_space *mapping,
+ unsigned long write_chunk)
{
long nr_reclaimable, bdi_nr_reclaimable;
long nr_writeback, bdi_nr_writeback;
@@ -485,7 +489,6 @@ static void balance_dirty_pages(struct address_space *mapping)
unsigned long dirty_thresh;
unsigned long bdi_thresh;
unsigned long pages_written = 0;
- unsigned long write_chunk = sync_writeback_pages();
unsigned long pause = 1;
struct backing_dev_info *bdi = mapping->backing_dev_info;
@@ -579,7 +582,7 @@ static void balance_dirty_pages(struct address_space *mapping)
bdi->dirty_exceeded = 0;
if (writeback_in_progress(bdi))
- return; /* pdflush is already working this queue */
+ return;
/*
* In laptop mode, we wait until hitting the higher threshold before
@@ -590,10 +593,10 @@ static void balance_dirty_pages(struct address_space *mapping)
* background_thresh, to keep the amount of dirty memory low.
*/
if ((laptop_mode && pages_written) ||
- (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
- + global_page_state(NR_UNSTABLE_NFS))
+ (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
+ + global_page_state(NR_UNSTABLE_NFS))
> background_thresh)))
- bdi_start_writeback(bdi, nr_writeback);
+ bdi_start_writeback(bdi, NULL, 0);
}
void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -640,9 +643,10 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
p = &__get_cpu_var(bdp_ratelimits);
*p += nr_pages_dirtied;
if (unlikely(*p >= ratelimit)) {
+ ratelimit = sync_writeback_pages(*p);
*p = 0;
preempt_enable();
- balance_dirty_pages(mapping);
+ balance_dirty_pages(mapping, ratelimit);
return;
}
preempt_enable();
diff --git a/mm/percpu.c b/mm/percpu.c
index 43d8cacfdaa5..4a048abad043 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1043,7 +1043,9 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
*/
static void *pcpu_alloc(size_t size, size_t align, bool reserved)
{
+ static int warn_limit = 10;
struct pcpu_chunk *chunk;
+ const char *err;
int slot, off;
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
@@ -1059,11 +1061,14 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
if (reserved && pcpu_reserved_chunk) {
chunk = pcpu_reserved_chunk;
if (size > chunk->contig_hint ||
- pcpu_extend_area_map(chunk) < 0)
+ pcpu_extend_area_map(chunk) < 0) {
+ err = "failed to extend area map of reserved chunk";
goto fail_unlock;
+ }
off = pcpu_alloc_area(chunk, size, align);
if (off >= 0)
goto area_found;
+ err = "alloc from reserved chunk failed";
goto fail_unlock;
}
@@ -1080,6 +1085,7 @@ restart:
case 1:
goto restart; /* pcpu_lock dropped, restart */
default:
+ err = "failed to extend area map";
goto fail_unlock;
}
@@ -1093,8 +1099,10 @@ restart:
spin_unlock_irq(&pcpu_lock);
chunk = alloc_pcpu_chunk();
- if (!chunk)
+ if (!chunk) {
+ err = "failed to allocate new chunk";
goto fail_unlock_mutex;
+ }
spin_lock_irq(&pcpu_lock);
pcpu_chunk_relocate(chunk, -1);
@@ -1107,6 +1115,7 @@ area_found:
if (pcpu_populate_chunk(chunk, off, size)) {
spin_lock_irq(&pcpu_lock);
pcpu_free_area(chunk, off);
+ err = "failed to populate";
goto fail_unlock;
}
@@ -1119,6 +1128,13 @@ fail_unlock:
spin_unlock_irq(&pcpu_lock);
fail_unlock_mutex:
mutex_unlock(&pcpu_alloc_mutex);
+ if (warn_limit) {
+ pr_warning("PERCPU: allocation failed, size=%zu align=%zu, "
+ "%s\n", size, align, err);
+ dump_stack();
+ if (!--warn_limit)
+ pr_info("PERCPU: limit reached, disable warning\n");
+ }
return NULL;
}
@@ -1347,6 +1363,10 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
struct pcpu_alloc_info *ai;
unsigned int *cpu_map;
+ /* this function may be called multiple times */
+ memset(group_map, 0, sizeof(group_map));
+ memset(group_cnt, 0, sizeof(group_map));
+
/*
* Determine min_unit_size, alloc_size and max_upa such that
* alloc_size is multiple of atom_size and is the smallest
@@ -1574,6 +1594,7 @@ static void pcpu_dump_alloc_info(const char *lvl,
int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
void *base_addr)
{
+ static char cpus_buf[4096] __initdata;
static int smap[2], dmap[2];
size_t dyn_size = ai->dyn_size;
size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
@@ -1585,17 +1606,26 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
int *unit_map;
int group, unit, i;
+ cpumask_scnprintf(cpus_buf, sizeof(cpus_buf), cpu_possible_mask);
+
+#define PCPU_SETUP_BUG_ON(cond) do { \
+ if (unlikely(cond)) { \
+ pr_emerg("PERCPU: failed to initialize, %s", #cond); \
+ pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \
+ pcpu_dump_alloc_info(KERN_EMERG, ai); \
+ BUG(); \
+ } \
+} while (0)
+
/* sanity checks */
BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
- BUG_ON(ai->nr_groups <= 0);
- BUG_ON(!ai->static_size);
- BUG_ON(!base_addr);
- BUG_ON(ai->unit_size < size_sum);
- BUG_ON(ai->unit_size & ~PAGE_MASK);
- BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
-
- pcpu_dump_alloc_info(KERN_DEBUG, ai);
+ PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
+ PCPU_SETUP_BUG_ON(!ai->static_size);
+ PCPU_SETUP_BUG_ON(!base_addr);
+ PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
+ PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
+ PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
/* process group information and build config tables accordingly */
group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0]));
@@ -1604,7 +1634,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0]));
for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- unit_map[cpu] = NR_CPUS;
+ unit_map[cpu] = UINT_MAX;
pcpu_first_unit_cpu = NR_CPUS;
for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
@@ -1618,8 +1648,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
if (cpu == NR_CPUS)
continue;
- BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu));
- BUG_ON(unit_map[cpu] != NR_CPUS);
+ PCPU_SETUP_BUG_ON(cpu > nr_cpu_ids);
+ PCPU_SETUP_BUG_ON(!cpu_possible(cpu));
+ PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX);
unit_map[cpu] = unit + i;
unit_off[cpu] = gi->base_offset + i * ai->unit_size;
@@ -1632,7 +1663,11 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
pcpu_nr_units = unit;
for_each_possible_cpu(cpu)
- BUG_ON(unit_map[cpu] == NR_CPUS);
+ PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX);
+
+ /* we're done parsing the input, undefine BUG macro and dump config */
+#undef PCPU_SETUP_BUG_ON
+ pcpu_dump_alloc_info(KERN_INFO, ai);
pcpu_nr_groups = ai->nr_groups;
pcpu_group_offsets = group_offsets;
@@ -1782,7 +1817,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
void *base = (void *)ULONG_MAX;
void **areas = NULL;
struct pcpu_alloc_info *ai;
- size_t size_sum, areas_size;
+ size_t size_sum, areas_size, max_distance;
int group, i, rc;
ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
@@ -1832,8 +1867,24 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
}
/* base address is now known, determine group base offsets */
- for (group = 0; group < ai->nr_groups; group++)
+ max_distance = 0;
+ for (group = 0; group < ai->nr_groups; group++) {
ai->groups[group].base_offset = areas[group] - base;
+ max_distance = max(max_distance, ai->groups[group].base_offset);
+ }
+ max_distance += ai->unit_size;
+
+ /* warn if maximum distance is further than 75% of vmalloc space */
+ if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) {
+ pr_warning("PERCPU: max_distance=0x%lx too large for vmalloc "
+ "space 0x%lx\n",
+ max_distance, VMALLOC_END - VMALLOC_START);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+ /* and fail if we have fallback */
+ rc = -EINVAL;
+ goto out_free;
+#endif
+ }
pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
diff --git a/mm/shmem.c b/mm/shmem.c
index 98631c26c200..356dd99566ec 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -218,7 +218,7 @@ static const struct file_operations shmem_file_operations;
static const struct inode_operations shmem_inode_operations;
static const struct inode_operations shmem_dir_inode_operations;
static const struct inode_operations shmem_special_inode_operations;
-static struct vm_operations_struct shmem_vm_ops;
+static const struct vm_operations_struct shmem_vm_ops;
static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
.ra_pages = 0, /* No readahead */
@@ -1046,8 +1046,9 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
* sync from ever calling shmem_writepage; but a stacking filesystem
* may use the ->writepage of its underlying filesystem, in which case
* tmpfs should write out to swap only in response to memory pressure,
- * and not for pdflush or sync. However, in those cases, we do still
- * want to check if there's a redundant swappage to be discarded.
+ * and not for the writeback threads or sync. However, in those cases,
+ * we do still want to check if there's a redundant swappage to be
+ * discarded.
*/
if (wbc->for_reclaim)
swap = get_swap_page();
@@ -2497,7 +2498,7 @@ static const struct super_operations shmem_ops = {
.put_super = shmem_put_super,
};
-static struct vm_operations_struct shmem_vm_ops = {
+static const struct vm_operations_struct shmem_vm_ops = {
.fault = shmem_fault,
#ifdef CONFIG_NUMA
.set_policy = shmem_set_policy,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1219ceb8a9b2..64e438898832 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1709,10 +1709,10 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
*
* If the caller is !__GFP_FS then the probability of a failure is reasonably
* high - the zone may be full of dirty or under-writeback pages, which this
- * caller can't do much about. We kick pdflush and take explicit naps in the
- * hope that some of these pages can be written. But if the allocating task
- * holds filesystem locks which prevent writeout this might not work, and the
- * allocation attempt will fail.
+ * caller can't do much about. We kick the writeback threads and take explicit
+ * naps in the hope that some of these pages can be written. But if the
+ * allocating task holds filesystem locks which prevent writeout this might not
+ * work, and the allocation attempt will fail.
*
* returns: 0, if no pages reclaimed
* else, the number of pages reclaimed
OpenPOWER on IntegriCloud