From af36f906c0f4c2ffa0482ecdf856a33dc88ae8c5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 12:09:55 -0700 Subject: memcg: always create memsw files if CONFIG_CGROUP_MEM_RES_CTLR_SWAP Instead of conditioning creation of memsw files on do_swap_account, always create the files if compiled-in and fail read/write attempts with -EOPNOTSUPP if !do_swap_account. This is suggested by KAMEZAWA to simplify memcg file creation so that it can use cgroup->subsys_cftypes. Signed-off-by: Tejun Heo Acked-by: KAMEZAWA Hiroyuki Acked-by: Li Zefan --- mm/memcontrol.c | 65 +++++++++++++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 34 deletions(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7d698df4a067..97a45b392f77 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3879,14 +3879,21 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) return val << PAGE_SHIFT; } -static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) +static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, + struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + char str[64]; u64 val; - int type, name; + int type, name, len; type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); + + if (!do_swap_account && type == _MEMSWAP) + return -EOPNOTSUPP; + switch (type) { case _MEM: if (name == RES_USAGE) @@ -3903,7 +3910,9 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) default: BUG(); } - return val; + + len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); + return simple_read_from_buffer(buf, nbytes, ppos, str, len); } /* * The user of this function is... @@ -3919,6 +3928,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); + + if (!do_swap_account && type == _MEMSWAP) + return -EOPNOTSUPP; + switch (name) { case RES_LIMIT: if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ @@ -3984,12 +3997,15 @@ out: static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) { - struct mem_cgroup *memcg; + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); int type, name; - memcg = mem_cgroup_from_cont(cont); type = MEMFILE_TYPE(event); name = MEMFILE_ATTR(event); + + if (!do_swap_account && type == _MEMSWAP) + return -EOPNOTSUPP; + switch (name) { case RES_MAX_USAGE: if (type == _MEM) @@ -4655,7 +4671,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, .register_event = mem_cgroup_usage_register_event, .unregister_event = mem_cgroup_usage_unregister_event, }, @@ -4663,25 +4679,25 @@ static struct cftype mem_cgroup_files[] = { .name = "max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), .trigger = mem_cgroup_reset, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), .write_string = mem_cgroup_write, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "soft_limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), .write_string = mem_cgroup_write, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "failcnt", .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), .trigger = mem_cgroup_reset, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "stat", @@ -4721,14 +4737,11 @@ static struct cftype mem_cgroup_files[] = { .mode = S_IRUGO, }, #endif -}; - #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP -static struct cftype memsw_cgroup_files[] = { { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, .register_event = mem_cgroup_usage_register_event, .unregister_event = mem_cgroup_usage_unregister_event, }, @@ -4736,35 +4749,22 @@ static struct cftype memsw_cgroup_files[] = { .name = "memsw.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), .trigger = mem_cgroup_reset, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "memsw.limit_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), .write_string = mem_cgroup_write, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, { .name = "memsw.failcnt", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), .trigger = mem_cgroup_reset, - .read_u64 = mem_cgroup_read, + .read = mem_cgroup_read, }, -}; - -static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) -{ - if (!do_swap_account) - return 0; - return cgroup_add_files(cont, ss, memsw_cgroup_files, - ARRAY_SIZE(memsw_cgroup_files)); -}; -#else -static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) -{ - return 0; -} #endif +}; static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) { @@ -5046,9 +5046,6 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss, ret = cgroup_add_files(cont, ss, mem_cgroup_files, ARRAY_SIZE(mem_cgroup_files)); - if (!ret) - ret = register_memsw_files(cont, ss); - if (!ret) ret = register_kmem_files(cont, ss); -- cgit v1.2.1 From 6bc103498f5fe512928496fc7802d639cc2d1d20 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 12:09:55 -0700 Subject: cgroup: convert memcg controller to the new cftype interface Convert memcg to use the new cftype based interface. kmem support abuses ->populate() for mem_cgroup_sockets_init() so it can't be removed at the moment. tcp_memcontrol is updated so that tcp_files[] is registered via a __initcall. This change also allows removing the forward declaration of tcp_files[]. Removed. Signed-off-by: Tejun Heo Acked-by: KAMEZAWA Hiroyuki Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: Balbir Singh Cc: Glauber Costa Cc: Hugh Dickins Cc: Greg Thelen --- mm/memcontrol.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 97a45b392f77..bef114258bbd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4764,6 +4764,7 @@ static struct cftype mem_cgroup_files[] = { .read = mem_cgroup_read, }, #endif + { }, /* terminate */ }; static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) @@ -5041,15 +5042,7 @@ static void mem_cgroup_destroy(struct cgroup *cont) static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) { - int ret; - - ret = cgroup_add_files(cont, ss, mem_cgroup_files, - ARRAY_SIZE(mem_cgroup_files)); - - if (!ret) - ret = register_kmem_files(cont, ss); - - return ret; + return register_kmem_files(cont, ss); } #ifdef CONFIG_MMU @@ -5639,6 +5632,7 @@ struct cgroup_subsys mem_cgroup_subsys = { .can_attach = mem_cgroup_can_attach, .cancel_attach = mem_cgroup_cancel_attach, .attach = mem_cgroup_move_task, + .base_cftypes = mem_cgroup_files, .early_init = 0, .use_id = 1, }; -- cgit v1.2.1 From 48ddbe194623ae089cc0576e60363f2d2e85662a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 12:09:56 -0700 Subject: cgroup: make css->refcnt clearing on cgroup removal optional Currently, cgroup removal tries to drain all css references. If there are active css references, the removal logic waits and retries ->pre_detroy() until either all refs drop to zero or removal is cancelled. This semantics is unusual and adds non-trivial complexity to cgroup core and IMHO is fundamentally misguided in that it couples internal implementation details (references to internal data structure) with externally visible operation (rmdir). To userland, this is a behavior peculiarity which is unnecessary and difficult to expect (css refs is otherwise invisible from userland), and, to policy implementations, this is an unnecessary restriction (e.g. blkcg wants to hold css refs for caching purposes but can't as that becomes visible as rmdir hang). Unfortunately, memcg currently depends on ->pre_destroy() retrials and cgroup removal vetoing and can't be immmediately switched to the new behavior. This patch introduces the new behavior of not waiting for css refs to drain and maintains the old behavior for subsystems which have __DEPRECATED_clear_css_refs set. Once, memcg is updated, we can drop the code paths for the old behavior as proposed in the following patch. Note that the following patch is incorrect in that dput work item is in cgroup and may lose some of dputs when multiples css's are released back-to-back, and __css_put() triggers check_for_release() when refcnt reaches 0 instead of 1; however, it shows what part can be removed. http://thread.gmane.org/gmane.linux.kernel.containers/22559/focus=75251 Note that, in not-too-distant future, cgroup core will start emitting warning messages for subsys which require the old behavior, so please get moving. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Vivek Goyal Cc: Johannes Weiner Cc: Michal Hocko Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki --- mm/memcontrol.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bef114258bbd..d28359cd6b55 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5635,6 +5635,7 @@ struct cgroup_subsys mem_cgroup_subsys = { .base_cftypes = mem_cgroup_files, .early_init = 0, .use_id = 1, + .__DEPRECATED_clear_css_refs = true, }; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP -- cgit v1.2.1 From 1d62e43657c63a858560c98069706c705d20505d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 9 Apr 2012 19:36:33 -0300 Subject: cgroup: pass struct mem_cgroup instead of struct cgroup to socket memcg The only reason cgroup was used, was to be consistent with the populate() interface. Now that we're getting rid of it, not only we no longer need it, but we also *can't* call it this way. Since we will no longer rely on populate(), this will be called from create(). During create, the association between struct mem_cgroup and struct cgroup does not yet exist, since cgroup internals hasn't yet initialized its bookkeeping. This means we would not be able to draw the memcg pointer from the cgroup pointer in these functions, which is highly undesirable. Signed-off-by: Glauber Costa Acked-by: Kamezawa Hiroyuki Signed-off-by: Tejun Heo CC: Li Zefan CC: Johannes Weiner CC: Michal Hocko --- mm/memcontrol.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d28359cd6b55..785c32367075 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4640,29 +4640,22 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file) #endif /* CONFIG_NUMA */ #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM -static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) +static int register_kmem_files(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { - /* - * Part of this would be better living in a separate allocation - * function, leaving us with just the cgroup tree population work. - * We, however, depend on state such as network's proto_list that - * is only initialized after cgroup creation. I found the less - * cumbersome way to deal with it to defer it all to populate time - */ - return mem_cgroup_sockets_init(cont, ss); + return mem_cgroup_sockets_init(memcg, ss); }; -static void kmem_cgroup_destroy(struct cgroup *cont) +static void kmem_cgroup_destroy(struct mem_cgroup *memcg) { - mem_cgroup_sockets_destroy(cont); + mem_cgroup_sockets_destroy(memcg); } #else -static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) +static int register_kmem_files(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { return 0; } -static void kmem_cgroup_destroy(struct cgroup *cont) +static void kmem_cgroup_destroy(struct mem_cgroup *memcg) { } #endif @@ -5034,7 +5027,7 @@ static void mem_cgroup_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - kmem_cgroup_destroy(cont); + kmem_cgroup_destroy(memcg); mem_cgroup_put(memcg); } @@ -5042,7 +5035,8 @@ static void mem_cgroup_destroy(struct cgroup *cont) static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) { - return register_kmem_files(cont, ss); + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + return register_kmem_files(memcg, ss); } #ifdef CONFIG_MMU -- cgit v1.2.1 From cbe128e348e5994516304f94865ff90c40c1c5ae Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 9 Apr 2012 19:36:34 -0300 Subject: cgroup: get rid of populate for memcg The last man standing justifying the need for populate() is the sock memcg initialization functions. Now that we are able to pass a struct mem_cgroup instead of a struct cgroup to the socket initialization, there is nothing that stops us from initializing everything in create(). Signed-off-by: Glauber Costa Acked-by: Kamezawa Hiroyuki Signed-off-by: Tejun Heo CC: Li Zefan CC: Johannes Weiner CC: Michal Hocko --- mm/memcontrol.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 785c32367075..901bb03f2ae7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4640,7 +4640,7 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file) #endif /* CONFIG_NUMA */ #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM -static int register_kmem_files(struct mem_cgroup *memcg, struct cgroup_subsys *ss) +static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { return mem_cgroup_sockets_init(memcg, ss); }; @@ -4650,7 +4650,7 @@ static void kmem_cgroup_destroy(struct mem_cgroup *memcg) mem_cgroup_sockets_destroy(memcg); } #else -static int register_kmem_files(struct mem_cgroup *memcg, struct cgroup_subsys *ss) +static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { return 0; } @@ -5010,6 +5010,17 @@ mem_cgroup_create(struct cgroup *cont) memcg->move_charge_at_immigrate = 0; mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); + + error = memcg_init_kmem(memcg, &mem_cgroup_subsys); + if (error) { + /* + * We call put now because our (and parent's) refcnts + * are already in place. mem_cgroup_put() will internally + * call __mem_cgroup_free, so return directly + */ + mem_cgroup_put(memcg); + return ERR_PTR(error); + } return &memcg->css; free_out: __mem_cgroup_free(memcg); @@ -5032,13 +5043,6 @@ static void mem_cgroup_destroy(struct cgroup *cont) mem_cgroup_put(memcg); } -static int mem_cgroup_populate(struct cgroup_subsys *ss, - struct cgroup *cont) -{ - struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - return register_kmem_files(memcg, ss); -} - #ifdef CONFIG_MMU /* Handlers for move charge at task migration. */ #define PRECHARGE_COUNT_AT_ONCE 256 @@ -5622,7 +5626,6 @@ struct cgroup_subsys mem_cgroup_subsys = { .create = mem_cgroup_create, .pre_destroy = mem_cgroup_pre_destroy, .destroy = mem_cgroup_destroy, - .populate = mem_cgroup_populate, .can_attach = mem_cgroup_can_attach, .cancel_attach = mem_cgroup_cancel_attach, .attach = mem_cgroup_move_task, -- cgit v1.2.1