diff options
-rw-r--r-- | Documentation/cgroups/memory.txt | 74 | ||||
-rw-r--r-- | MAINTAINERS | 3 | ||||
-rw-r--r-- | arch/s390/defconfig | 16 | ||||
-rw-r--r-- | arch/s390/include/asm/compat.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/elf.h | 9 | ||||
-rw-r--r-- | arch/s390/include/asm/system.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/thread_info.h | 6 | ||||
-rw-r--r-- | arch/s390/kernel/process.c | 38 | ||||
-rw-r--r-- | arch/s390/kernel/vdso.c | 4 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 49 | ||||
-rw-r--r-- | drivers/gpio/timbgpio.c | 6 | ||||
-rw-r--r-- | drivers/s390/cio/device.c | 1 | ||||
-rw-r--r-- | include/linux/gpio.h | 4 | ||||
-rw-r--r-- | include/linux/page_cgroup.h | 23 | ||||
-rw-r--r-- | kernel/cgroup.c | 17 |
15 files changed, 124 insertions, 130 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index bac328c232f5..7781857dc940 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -385,10 +385,6 @@ mapped_file - # of bytes of mapped file (includes tmpfs/shmem) pgpgin - # of pages paged in (equivalent to # of charging events). pgpgout - # of pages paged out (equivalent to # of uncharging events). swap - # of bytes of swap usage -dirty - # of bytes that are waiting to get written back to the disk. -writeback - # of bytes that are actively being written back to the disk. -nfs_unstable - # of bytes sent to the NFS server, but not yet committed to - the actual storage. inactive_anon - # of bytes of anonymous memory and swap cache memory on LRU list. active_anon - # of bytes of anonymous and swap cache memory on active @@ -410,9 +406,6 @@ total_mapped_file - sum of all children's "cache" total_pgpgin - sum of all children's "pgpgin" total_pgpgout - sum of all children's "pgpgout" total_swap - sum of all children's "swap" -total_dirty - sum of all children's "dirty" -total_writeback - sum of all children's "writeback" -total_nfs_unstable - sum of all children's "nfs_unstable" total_inactive_anon - sum of all children's "inactive_anon" total_active_anon - sum of all children's "active_anon" total_inactive_file - sum of all children's "inactive_file" @@ -460,73 +453,6 @@ memory under it will be reclaimed. You can reset failcnt by writing 0 to failcnt file. # echo 0 > .../memory.failcnt -5.5 dirty memory - -Control the maximum amount of dirty pages a cgroup can have at any given time. - -Limiting dirty memory is like fixing the max amount of dirty (hard to reclaim) -page cache used by a cgroup. So, in case of multiple cgroup writers, they will -not be able to consume more than their designated share of dirty pages and will -be forced to perform write-out if they cross that limit. - -The interface is equivalent to the procfs interface: /proc/sys/vm/dirty_*. It -is possible to configure a limit to trigger both a direct writeback or a -background writeback performed by per-bdi flusher threads. The root cgroup -memory.dirty_* control files are read-only and match the contents of -the /proc/sys/vm/dirty_* files. - -Per-cgroup dirty limits can be set using the following files in the cgroupfs: - -- memory.dirty_ratio: the amount of dirty memory (expressed as a percentage of - cgroup memory) at which a process generating dirty pages will itself start - writing out dirty data. - -- memory.dirty_limit_in_bytes: the amount of dirty memory (expressed in bytes) - in the cgroup at which a process generating dirty pages will start itself - writing out dirty data. Suffix (k, K, m, M, g, or G) can be used to indicate - that value is kilo, mega or gigabytes. - - Note: memory.dirty_limit_in_bytes is the counterpart of memory.dirty_ratio. - Only one of them may be specified at a time. When one is written it is - immediately taken into account to evaluate the dirty memory limits and the - other appears as 0 when read. - -- memory.dirty_background_ratio: the amount of dirty memory of the cgroup - (expressed as a percentage of cgroup memory) at which background writeback - kernel threads will start writing out dirty data. - -- memory.dirty_background_limit_in_bytes: the amount of dirty memory (expressed - in bytes) in the cgroup at which background writeback kernel threads will - start writing out dirty data. Suffix (k, K, m, M, g, or G) can be used to - indicate that value is kilo, mega or gigabytes. - - Note: memory.dirty_background_limit_in_bytes is the counterpart of - memory.dirty_background_ratio. Only one of them may be specified at a time. - When one is written it is immediately taken into account to evaluate the dirty - memory limits and the other appears as 0 when read. - -A cgroup may contain more dirty memory than its dirty limit. This is possible -because of the principle that the first cgroup to touch a page is charged for -it. Subsequent page counting events (dirty, writeback, nfs_unstable) are also -counted to the originally charged cgroup. - -Example: If page is allocated by a cgroup A task, then the page is charged to -cgroup A. If the page is later dirtied by a task in cgroup B, then the cgroup A -dirty count will be incremented. If cgroup A is over its dirty limit but cgroup -B is not, then dirtying a cgroup A page from a cgroup B task may push cgroup A -over its dirty limit without throttling the dirtying cgroup B task. - -When use_hierarchy=0, each cgroup has dirty memory usage and limits. -System-wide dirty limits are also consulted. Dirty memory consumption is -checked against both system-wide and per-cgroup dirty limits. - -The current implementation does not enforce per-cgroup dirty limits when -use_hierarchy=1. System-wide dirty limits are used for processes in such -cgroups. Attempts to read memory.dirty_* files return the system-wide -values. Writes to the memory.dirty_* files return error. An enhanced -implementation is needed to check the chain of parents to ensure that no -dirty limit is exceeded. - 6. Hierarchy support The memory controller supports a deep hierarchy and hierarchical accounting. diff --git a/MAINTAINERS b/MAINTAINERS index af656ded404e..89e4d4b145bb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5272,8 +5272,7 @@ S: Supported F: drivers/s390/net/ S390 ZCRYPT DRIVER -M: Felix Beck <felix.beck@de.ibm.com> -M: Ralph Wuerthner <ralph.wuerthner@de.ibm.com> +M: Holger Dengler <hd@linux.vnet.ibm.com> M: linux390@de.ibm.com L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ diff --git a/arch/s390/defconfig b/arch/s390/defconfig index d79697157ac0..29c82c640a88 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -5,10 +5,21 @@ CONFIG_AUDIT=y CONFIG_RCU_TRACE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_CGROUP=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_PERF_EVENTS=y +# CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y +CONFIG_PROFILING=y +CONFIG_OPROFILE=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -19,7 +30,9 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y CONFIG_BINFMT_MISC=m +CONFIG_CMM=m CONFIG_HZ_100=y CONFIG_KEXEC=y CONFIG_PM=y @@ -105,6 +118,7 @@ CONFIG_DEBUG_LIST=y CONFIG_DEBUG_NOTIFIERS=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_KPROBES_SANITY_TEST=y +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y CONFIG_CPU_NOTIFIER_ERROR_INJECT=m CONFIG_LATENCYTOP=y CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index a875c2f542e1..da359ca6fe55 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -169,7 +169,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) static inline int is_compat_task(void) { - return test_thread_flag(TIF_31BIT); + return is_32bit_task(); } #else diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 354d42616c7e..10c029cfcc7d 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -161,7 +161,9 @@ extern unsigned int vdso_enabled; use of this is to invoke "./ld.so someprog" to test out a new version of the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (STACK_TOP / 3 * 2) + +extern unsigned long randomize_et_dyn(unsigned long base); +#define ELF_ET_DYN_BASE (randomize_et_dyn(STACK_TOP / 3 * 2)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ @@ -206,6 +208,8 @@ do { \ current->mm->context.noexec == 0; \ }) +#define STACK_RND_MASK 0x7ffUL + #define ARCH_DLINFO \ do { \ if (vdso_enabled) \ @@ -218,4 +222,7 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 int arch_setup_additional_pages(struct linux_binprm *, int); +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + #endif diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 6710b0eac165..8f8d759f6a7b 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -449,7 +449,7 @@ extern void (*_machine_restart)(char *command); extern void (*_machine_halt)(void); extern void (*_machine_power_off)(void); -#define arch_align_stack(x) (x) +extern unsigned long arch_align_stack(unsigned long sp); static inline int tprot(unsigned long addr) { diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index ebc77091466f..ad1382f7932e 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -118,6 +118,12 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SINGLE_STEP (1<<TIF_FREEZE) #define _TIF_FREEZE (1<<TIF_FREEZE) +#ifdef CONFIG_64BIT +#define is_32bit_task() (test_thread_flag(TIF_31BIT)) +#else +#define is_32bit_task() (1) +#endif + #endif /* __KERNEL__ */ #define PREEMPT_ACTIVE 0x4000000 diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 6ba42222b542..a895e69379f7 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -30,9 +30,11 @@ #include <linux/tick.h> #include <linux/elfcore.h> #include <linux/kernel_stat.h> +#include <linux/personality.h> #include <linux/syscalls.h> #include <linux/compat.h> #include <linux/kprobes.h> +#include <linux/random.h> #include <asm/compat.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -332,3 +334,39 @@ unsigned long get_wchan(struct task_struct *p) } return 0; } + +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() & ~PAGE_MASK; + return sp & ~0xf; +} + +static inline unsigned long brk_rnd(void) +{ + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; + else + return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); + + if (ret < mm->brk) + return mm->brk; + return ret; +} + +unsigned long randomize_et_dyn(unsigned long base) +{ + unsigned long ret = PAGE_ALIGN(base + brk_rnd()); + + if (!(current->flags & PF_RANDOMIZE)) + return base; + if (ret < base) + return base; + return ret; +} diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index e3150dd2fe74..f438d74dedbd 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -203,7 +203,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!uses_interp) return 0; - vdso_base = mm->mmap_base; #ifdef CONFIG_64BIT vdso_pagelist = vdso64_pagelist; vdso_pages = vdso64_pages; @@ -233,8 +232,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * fail and end up putting it elsewhere. */ down_write(&mm->mmap_sem); - vdso_base = get_unmapped_area(NULL, vdso_base, - vdso_pages << PAGE_SHIFT, 0, 0); + vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); if (IS_ERR_VALUE(vdso_base)) { rc = vdso_base; goto out_up; diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 869efbaed3ea..c9a9f7f18188 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -27,17 +27,44 @@ #include <linux/personality.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/random.h> #include <asm/pgalloc.h> #include <asm/compat.h> +static unsigned long stack_maxrandom_size(void) +{ + if (!(current->flags & PF_RANDOMIZE)) + return 0; + if (current->personality & ADDR_NO_RANDOMIZE) + return 0; + return STACK_RND_MASK << PAGE_SHIFT; +} + /* * Top of mmap area (just below the process stack). * - * Leave an at least ~128 MB hole. + * Leave at least a ~32 MB hole. */ -#define MIN_GAP (128*1024*1024) +#define MIN_GAP (32*1024*1024) #define MAX_GAP (STACK_TOP/6*5) +static inline int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_rnd(void) +{ + if (!(current->flags & PF_RANDOMIZE)) + return 0; + /* 8MB randomization for mmap_base */ + return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; +} + static inline unsigned long mmap_base(void) { unsigned long gap = rlimit(RLIMIT_STACK); @@ -46,22 +73,8 @@ static inline unsigned long mmap_base(void) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; - - return STACK_TOP - (gap & PAGE_MASK); -} - -static inline int mmap_is_legacy(void) -{ -#ifdef CONFIG_64BIT - /* - * Force standard allocation for 64 bit programs. - */ - if (!is_compat_task()) - return 1; -#endif - return sysctl_legacy_va_layout || - (current->personality & ADDR_COMPAT_LAYOUT) || - rlimit(RLIMIT_STACK) == RLIM_INFINITY; + gap &= PAGE_MASK; + return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap; } #ifndef CONFIG_64BIT diff --git a/drivers/gpio/timbgpio.c b/drivers/gpio/timbgpio.c index 349131eb1ce0..58c8f30352dd 100644 --- a/drivers/gpio/timbgpio.c +++ b/drivers/gpio/timbgpio.c @@ -193,13 +193,13 @@ out: return ret; } -static void timbgpio_irq(struct irq_data *d, struct irq_desc *desc) +static void timbgpio_irq(unsigned int irq, struct irq_desc *desc) { - struct timbgpio *tgpio = irq_data_get_irq_data(d); + struct timbgpio *tgpio = get_irq_data(irq); unsigned long ipr; int offset; - desc->irq_data.chip->ack(irq_get_irq_data(d)); + desc->irq_data.chip->irq_ack(irq_get_irq_data(irq)); ipr = ioread32(tgpio->membase + TGPIO_IPR); iowrite32(ipr, tgpio->membase + TGPIO_ICR); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e8391b89eff4..b7eaff9ca19e 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1835,6 +1835,7 @@ static void __ccw_device_pm_restore(struct ccw_device *cdev) * available again. Kick re-detection. */ cdev->private->flags.resuming = 1; + cdev->private->path_new_mask = LPM_ANYPATH; css_schedule_eval(sch->schid); spin_unlock_irq(sch->lock); css_complete_work(); diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 4b47ed96f131..32720baf70f1 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -35,13 +35,13 @@ static inline int gpio_request(unsigned gpio, const char *label) return -ENOSYS; } -static inline int __must_check gpio_request_one(unsigned gpio, +static inline int gpio_request_one(unsigned gpio, unsigned long flags, const char *label) { return -ENOSYS; } -static inline int __must_check gpio_request_array(struct gpio *array, size_t num) +static inline int gpio_request_array(struct gpio *array, size_t num) { return -ENOSYS; } diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 5b0c971d7cae..6d6cb7a57bb3 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -42,9 +42,6 @@ enum { /* flags for mem_cgroup and file and I/O status */ PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ PCG_FILE_MAPPED, /* page is accounted as "mapped" */ - PCG_FILE_DIRTY, /* page is dirty */ - PCG_FILE_WRITEBACK, /* page is under writeback */ - PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */ /* No lock in page_cgroup */ PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ }; @@ -65,10 +62,6 @@ static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ { return test_and_clear_bit(PCG_##lname, &pc->flags); } -#define TESTSETPCGFLAG(uname, lname) \ -static inline int TestSetPageCgroup##uname(struct page_cgroup *pc) \ - { return test_and_set_bit(PCG_##lname, &pc->flags); } - /* Cache flag is set only once (at allocation) */ TESTPCGFLAG(Cache, CACHE) CLEARPCGFLAG(Cache, CACHE) @@ -88,22 +81,6 @@ SETPCGFLAG(FileMapped, FILE_MAPPED) CLEARPCGFLAG(FileMapped, FILE_MAPPED) TESTPCGFLAG(FileMapped, FILE_MAPPED) -SETPCGFLAG(FileDirty, FILE_DIRTY) -CLEARPCGFLAG(FileDirty, FILE_DIRTY) -TESTPCGFLAG(FileDirty, FILE_DIRTY) -TESTCLEARPCGFLAG(FileDirty, FILE_DIRTY) -TESTSETPCGFLAG(FileDirty, FILE_DIRTY) - -SETPCGFLAG(FileWriteback, FILE_WRITEBACK) -CLEARPCGFLAG(FileWriteback, FILE_WRITEBACK) -TESTPCGFLAG(FileWriteback, FILE_WRITEBACK) - -SETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) -CLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) -TESTPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) -TESTCLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) -TESTSETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) - SETPCGFLAG(Migration, MIGRATION) CLEARPCGFLAG(Migration, MIGRATION) TESTPCGFLAG(Migration, MIGRATION) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5c5f4cc2e99a..ffb7bbad0638 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); */ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); +static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); static const struct inode_operations cgroup_dir_inode_operations; @@ -860,6 +861,11 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) iput(inode); } +static int cgroup_delete(const struct dentry *d) +{ + return 1; +} + static void remove_dir(struct dentry *d) { struct dentry *parent = dget(d->d_parent); @@ -1451,6 +1457,7 @@ static int cgroup_get_rootdir(struct super_block *sb) { static const struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, + .d_delete = cgroup_delete, }; struct inode *inode = @@ -2195,12 +2202,20 @@ static const struct file_operations cgroup_file_operations = { }; static const struct inode_operations cgroup_dir_inode_operations = { - .lookup = simple_lookup, + .lookup = cgroup_lookup, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .rename = cgroup_rename, }; +static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + d_add(dentry, NULL); + return NULL; +} + /* * Check if a file is a control file */ |