From 5eb385cc5ae1b31fbcdd727854a00c5a083f6b9b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2016 22:25:46 -0500 Subject: Revert "cgroup: add cgroup_subsys->css_e_css_changed()" This reverts commit 56c807ba4e91f0980567b6a69de239677879b17f. cgroup_subsys->css_e_css_changed() was supposed to be used by cgroup writeback support; however, the change to per-inode cgroup association made it unnecessary and the callback doesn't have any user. Remove it. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner --- include/linux/cgroup-defs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 789471dba6fb..4f3c0dac26b5 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -434,7 +434,6 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); - void (*css_e_css_changed)(struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); -- cgit v1.2.3 From 8699b7762a623c46ced891b3cf490058b56cf99c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2016 22:25:46 -0500 Subject: cgroup: s/child_subsys_mask/subtree_ss_mask/ For consistency with cgroup->subtree_control. * cgroup->child_subsys_mask -> cgroup->subtree_ss_mask * cgroup_calc_child_subsys_mask() -> cgroup_calc_subtree_ss_mask() * cgroup_refresh_child_subsys_mask() -> cgroup_refresh_subtree_ss_mask() No functional changes. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner --- include/linux/cgroup-defs.h | 11 +++++------ kernel/cgroup.c | 48 ++++++++++++++++++++++----------------------- 2 files changed, 29 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4f3c0dac26b5..c68ae7f0fb5f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -253,13 +253,12 @@ struct cgroup { /* * The bitmask of subsystems enabled on the child cgroups. * ->subtree_control is the one configured through - * "cgroup.subtree_control" while ->child_subsys_mask is the - * effective one which may have more subsystems enabled. - * Controller knobs are made available iff it's enabled in - * ->subtree_control. + * "cgroup.subtree_control" while ->child_ss_mask is the effective + * one which may have more subsystems enabled. Controller knobs + * are made available iff it's enabled in ->subtree_control. */ - unsigned int subtree_control; - unsigned int child_subsys_mask; + unsigned long subtree_control; + unsigned long subtree_ss_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7727b6e43e10..f3cd67bfe6c0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -391,10 +391,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, /* * This function is used while updating css associations and thus - * can't test the csses directly. Use ->child_subsys_mask. + * can't test the csses directly. Use ->subtree_ss_mask. */ while (cgroup_parent(cgrp) && - !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) + !(cgroup_parent(cgrp)->subtree_ss_mask & (1 << ss->id))) cgrp = cgroup_parent(cgrp); return cgroup_css(cgrp, ss); @@ -1256,7 +1256,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) } /** - * cgroup_calc_child_subsys_mask - calculate child_subsys_mask + * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask * @cgrp: the target cgroup * @subtree_control: the new subtree_control mask to consider * @@ -1268,8 +1268,8 @@ static umode_t cgroup_file_mode(const struct cftype *cft) * @subtree_control is to be applied to @cgrp. The returned mask is always * a superset of @subtree_control and follows the usual hierarchy rules. */ -static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp, - unsigned long subtree_control) +static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, + unsigned long subtree_control) { struct cgroup *parent = cgroup_parent(cgrp); unsigned long cur_ss_mask = subtree_control; @@ -1293,7 +1293,7 @@ static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp, * to non-default hierarchies. */ if (parent) - new_ss_mask &= parent->child_subsys_mask; + new_ss_mask &= parent->subtree_ss_mask; else new_ss_mask &= cgrp->root->subsys_mask; @@ -1306,16 +1306,16 @@ static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp, } /** - * cgroup_refresh_child_subsys_mask - update child_subsys_mask + * cgroup_refresh_subtree_ss_mask - update subtree_ss_mask * @cgrp: the target cgroup * - * Update @cgrp->child_subsys_mask according to the current - * @cgrp->subtree_control using cgroup_calc_child_subsys_mask(). + * Update @cgrp->subtree_ss_mask according to the current + * @cgrp->subtree_control using cgroup_calc_subtree_ss_mask(). */ -static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) +static void cgroup_refresh_subtree_ss_mask(struct cgroup *cgrp) { - cgrp->child_subsys_mask = - cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control); + cgrp->subtree_ss_mask = + cgroup_calc_subtree_ss_mask(cgrp, cgrp->subtree_control); } /** @@ -1542,7 +1542,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, src_root->subsys_mask &= ~(1 << ssid); scgrp->subtree_control &= ~(1 << ssid); - cgroup_refresh_child_subsys_mask(scgrp); + cgroup_refresh_subtree_ss_mask(scgrp); /* default hierarchy doesn't enable controllers by default */ dst_root->subsys_mask |= 1 << ssid; @@ -1550,7 +1550,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, static_branch_enable(cgroup_subsys_on_dfl_key[ssid]); } else { dcgrp->subtree_control |= 1 << ssid; - cgroup_refresh_child_subsys_mask(dcgrp); + cgroup_refresh_subtree_ss_mask(dcgrp); static_branch_disable(cgroup_subsys_on_dfl_key[ssid]); } @@ -2523,11 +2523,11 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, lockdep_assert_held(&cgroup_mutex); /* - * Except for the root, child_subsys_mask must be zero for a cgroup + * Except for the root, subtree_ss_mask must be zero for a cgroup * with tasks so that child cgroups don't compete against tasks. */ if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) && - dst_cgrp->child_subsys_mask) + dst_cgrp->subtree_ss_mask) return -EBUSY; /* look up the dst cset for each src cset and link it to src */ @@ -2880,7 +2880,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v) * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy * @cgrp: root of the subtree to update csses for * - * @cgrp's child_subsys_mask has changed and its subtree's (self excluded) + * @cgrp's subtree_ss_mask has changed and its subtree's (self excluded) * css associations need to be updated accordingly. This function looks up * all css_sets which are attached to the subtree, creates the matching * updated css_sets and migrates the tasks to the new ones. @@ -2902,7 +2902,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { struct cgrp_cset_link *link; - /* self is not affected by child_subsys_mask change */ + /* self is not affected by subtree_ss_mask change */ if (css->cgroup == cgrp) continue; @@ -3034,9 +3034,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * depending on subsystem dependencies. */ old_sc = cgrp->subtree_control; - old_ss = cgrp->child_subsys_mask; + old_ss = cgrp->subtree_ss_mask; new_sc = (old_sc | enable) & ~disable; - new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc); + new_ss = cgroup_calc_subtree_ss_mask(cgrp, new_sc); css_enable = ~old_ss & new_ss; css_disable = old_ss & ~new_ss; @@ -3069,7 +3069,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, } cgrp->subtree_control = new_sc; - cgrp->child_subsys_mask = new_ss; + cgrp->subtree_ss_mask = new_ss; /* * Create new csses or make the existing ones visible. A css is @@ -3135,7 +3135,7 @@ out_unlock: err_undo_css: cgrp->subtree_control = old_sc; - cgrp->child_subsys_mask = old_ss; + cgrp->subtree_ss_mask = old_ss; for_each_subsys(ss, ssid) { if (!(enable & (1 << ssid))) @@ -4969,7 +4969,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, /* let's create and online css's */ for_each_subsys(ss, ssid) { - if (parent->child_subsys_mask & (1 << ssid)) { + if (parent->subtree_ss_mask & (1 << ssid)) { ret = create_css(cgrp, ss, parent->subtree_control & (1 << ssid)); if (ret) @@ -4983,7 +4983,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, */ if (!cgroup_on_dfl(cgrp)) { cgrp->subtree_control = parent->subtree_control; - cgroup_refresh_child_subsys_mask(cgrp); + cgroup_refresh_subtree_ss_mask(cgrp); } kernfs_activate(kn); -- cgit v1.2.3 From 6e5c830770f9045a17b1b931c3e11fbd5591e630 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2016 22:25:47 -0500 Subject: cgroup: make cgroup subsystem masks u16 After the recent do_each_subsys_mask() conversion, there's no reason to use ulong for subsystem masks. We'll be adding more subsystem masks to persistent data structures, let's reduce its size to u16 which should be enough for now and the foreseeable future. This doesn't create any noticeable behavior differences. v2: Johannes spotted that the initial patch missed cgroup_no_v1_mask. Converted. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner --- include/linux/cgroup-defs.h | 4 ++-- kernel/cgroup.c | 50 ++++++++++++++++++++++----------------------- 2 files changed, 26 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index c68ae7f0fb5f..0abf6aa86c81 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -257,8 +257,8 @@ struct cgroup { * one which may have more subsystems enabled. Controller knobs * are made available iff it's enabled in ->subtree_control. */ - unsigned long subtree_control; - unsigned long subtree_ss_mask; + u16 subtree_control; + u16 subtree_ss_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1e561bd990b9..7669f68077b8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -181,10 +181,10 @@ EXPORT_SYMBOL_GPL(cgrp_dfl_root); static bool cgrp_dfl_root_visible; /* Controllers blocked by the commandline in v1 */ -static unsigned long cgroup_no_v1_mask; +static u16 cgroup_no_v1_mask; /* some controllers are not supported in the default hierarchy */ -static unsigned long cgrp_dfl_root_inhibit_ss_mask; +static u16 cgrp_dfl_root_inhibit_ss_mask; /* The list of hierarchy roots */ @@ -208,19 +208,18 @@ static u64 css_serial_nr_next = 1; * fork/exit handlers to call. This avoids us having to do extra work in the * fork/exit path to check which subsystems have fork/exit callbacks. */ -static unsigned long have_fork_callback __read_mostly; -static unsigned long have_exit_callback __read_mostly; -static unsigned long have_free_callback __read_mostly; +static u16 have_fork_callback __read_mostly; +static u16 have_exit_callback __read_mostly; +static u16 have_free_callback __read_mostly; /* Ditto for the can_fork callback. */ -static unsigned long have_canfork_callback __read_mostly; +static u16 have_canfork_callback __read_mostly; static struct file_system_type cgroup2_fs_type; static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; -static int rebind_subsystems(struct cgroup_root *dst_root, - unsigned long ss_mask); +static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); static void css_task_iter_advance(struct css_task_iter *it); static int cgroup_destroy_locked(struct cgroup *cgrp); static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, @@ -1274,11 +1273,10 @@ static umode_t cgroup_file_mode(const struct cftype *cft) * @subtree_control is to be applied to @cgrp. The returned mask is always * a superset of @subtree_control and follows the usual hierarchy rules. */ -static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, - unsigned long subtree_control) +static u16 cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, u16 subtree_control) { struct cgroup *parent = cgroup_parent(cgrp); - unsigned long cur_ss_mask = subtree_control; + u16 cur_ss_mask = subtree_control; struct cgroup_subsys *ss; int ssid; @@ -1288,7 +1286,7 @@ static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, return cur_ss_mask; while (true) { - unsigned long new_ss_mask = cur_ss_mask; + u16 new_ss_mask = cur_ss_mask; do_each_subsys_mask(ss, ssid, cur_ss_mask) { new_ss_mask |= ss->depends_on; @@ -1466,12 +1464,11 @@ err: return ret; } -static int rebind_subsystems(struct cgroup_root *dst_root, - unsigned long ss_mask) +static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) { struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; - unsigned long tmp_ss_mask; + u16 tmp_ss_mask; int ssid, i, ret; lockdep_assert_held(&cgroup_mutex); @@ -1507,7 +1504,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, */ if (dst_root == &cgrp_dfl_root) { if (cgrp_dfl_root_visible) { - pr_warn("failed to create files (%d) while rebinding 0x%lx to default root\n", + pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n", ret, ss_mask); pr_warn("you may retry by moving them to a different hierarchy and unbinding\n"); } @@ -1599,7 +1596,7 @@ static int cgroup_show_options(struct seq_file *seq, } struct cgroup_sb_opts { - unsigned long subsys_mask; + u16 subsys_mask; unsigned int flags; char *release_agent; bool cpuset_clone_children; @@ -1612,13 +1609,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) { char *token, *o = data; bool all_ss = false, one_ss = false; - unsigned long mask = -1UL; + u16 mask = U16_MAX; struct cgroup_subsys *ss; int nr_opts = 0; int i; #ifdef CONFIG_CPUSETS - mask = ~(1U << cpuset_cgrp_id); + mask = ~((u16)1 << cpuset_cgrp_id); #endif memset(opts, 0, sizeof(*opts)); @@ -1745,7 +1742,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) int ret = 0; struct cgroup_root *root = cgroup_root_from_kf(kf_root); struct cgroup_sb_opts opts; - unsigned long added_mask, removed_mask; + u16 added_mask, removed_mask; if (root == &cgrp_dfl_root) { pr_err("remount is not allowed\n"); @@ -1893,7 +1890,7 @@ static void init_cgroup_root(struct cgroup_root *root, set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask) +static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; @@ -2839,7 +2836,7 @@ static int cgroup_sane_behavior_show(struct seq_file *seq, void *v) return 0; } -static void cgroup_print_ss_mask(struct seq_file *seq, unsigned long ss_mask) +static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask) { struct cgroup_subsys *ss; bool printed = false; @@ -2950,8 +2947,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - unsigned long enable = 0, disable = 0; - unsigned long css_enable, css_disable, old_sc, new_sc, old_ss, new_ss; + u16 enable = 0, disable = 0; + u16 css_enable, css_disable, old_sc, new_sc, old_ss, new_ss; struct cgroup *cgrp, *child; struct cgroup_subsys *ss; char *tok; @@ -5255,7 +5252,7 @@ int __init cgroup_init_early(void) return 0; } -static unsigned long cgroup_disable_mask __initdata; +static u16 cgroup_disable_mask __initdata; /** * cgroup_init - cgroup initialization @@ -5269,6 +5266,7 @@ int __init cgroup_init(void) unsigned long key; int ssid; + BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); @@ -5754,7 +5752,7 @@ static int __init cgroup_no_v1(char *str) continue; if (!strcmp(token, "all")) { - cgroup_no_v1_mask = ~0UL; + cgroup_no_v1_mask = U16_MAX; break; } -- cgit v1.2.3 From b38e42e962dbc2fbc3839ce70750881db7c9277e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Feb 2016 10:00:50 -0500 Subject: cgroup: convert cgroup_subsys flag fields to bool bitfields Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Ingo Molnar Cc: Peter Zijlstra --- include/linux/cgroup-defs.h | 6 +++--- kernel/cpuset.c | 2 +- kernel/sched/core.c | 2 +- kernel/sched/cpuacct.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 0abf6aa86c81..8fc3f0452289 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -444,7 +444,7 @@ struct cgroup_subsys { void (*free)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); - int early_init; + bool early_init:1; /* * If %false, this subsystem is properly hierarchical - @@ -458,8 +458,8 @@ struct cgroup_subsys { * cases. Eventually, all subsystems will be made properly * hierarchical and this will go away. */ - bool broken_hierarchy; - bool warned_broken_hierarchy; + bool broken_hierarchy:1; + bool warned_broken_hierarchy:1; /* the following two fields are initialized automtically during boot */ int id; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 41989ab4db57..90899837ea78 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2089,7 +2089,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .attach = cpuset_attach, .bind = cpuset_bind, .legacy_cftypes = files, - .early_init = 1, + .early_init = true, }; /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 44253adb3c36..0f5abc6e4ff3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8706,7 +8706,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, .legacy_cftypes = cpu_files, - .early_init = 1, + .early_init = true, }; #endif /* CONFIG_CGROUP_SCHED */ diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index dd7cbb55bbf2..2ddaebf7469a 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -279,5 +279,5 @@ struct cgroup_subsys cpuacct_cgrp_subsys = { .css_alloc = cpuacct_css_alloc, .css_free = cpuacct_css_free, .legacy_cftypes = files, - .early_init = 1, + .early_init = true, }; -- cgit v1.2.3 From 88cb04b96a1934ecbfd1d324e7cde55890c1a576 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:58 -0500 Subject: cgroup: explicitly track whether a cgroup_subsys_state is visible to userland Currently, whether a css (cgroup_subsys_state) has its interface files created is not tracked and assumed to change together with the owning cgroup's lifecycle. cgroup directory and interface creation is being separated out from internal object creation to help refactoring and eventually allow cgroups which are not visible through cgroupfs. This patch adds CSS_VISIBLE to track whether a css has its interface files created and perform management operations only when necessary which helps decoupling interface file handling from internal object lifecycle. After this patch, all css interface file management functions can be called regardless of the current state and will achieve the expected result. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- include/linux/cgroup-defs.h | 1 + kernel/cgroup.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8fc3f0452289..7593c1a46786 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -45,6 +45,7 @@ enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ + CSS_VISIBLE = (1 << 3), /* css is visible to userland */ }; /* bits in struct cgroup flags field */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4178e45becb4..a9a53ca942f3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1421,6 +1421,11 @@ static void css_clear_dir(struct cgroup_subsys_state *css, struct cgroup *cgrp = cgrp_override ?: css->cgroup; struct cftype *cfts; + if (!(css->flags & CSS_VISIBLE)) + return; + + css->flags &= ~CSS_VISIBLE; + list_for_each_entry(cfts, &css->ss->cfts, node) cgroup_addrm_files(css, cgrp, cfts, false); } @@ -1439,6 +1444,9 @@ static int css_populate_dir(struct cgroup_subsys_state *css, struct cftype *cfts, *failed_cfts; int ret; + if (css->flags & CSS_VISIBLE) + return 0; + if (!css->ss) { if (cgroup_on_dfl(cgrp)) cfts = cgroup_dfl_base_files; @@ -1455,6 +1463,9 @@ static int css_populate_dir(struct cgroup_subsys_state *css, goto err; } } + + css->flags |= CSS_VISIBLE; + return 0; err: list_for_each_entry(cfts, &css->ss->cfts, node) { @@ -3403,7 +3414,7 @@ static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add) css_for_each_descendant_pre(css, cgroup_css(root, ss)) { struct cgroup *cgrp = css->cgroup; - if (cgroup_is_dead(cgrp)) + if (!(css->flags & CSS_VISIBLE)) continue; ret = cgroup_addrm_files(css, cgrp, cfts, is_add); -- cgit v1.2.3 From 15a27c362d54378f17ec078579b2f6af88495a3f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:59 -0500 Subject: cgroup: introduce cgroup_{save|propagate|restore}_control() While controllers are being enabled and disabled in cgroup_subtree_control_write(), the original subsystem masks are stashed in local variables so that they can be restored if the operation fails in the middle. This patch adds dedicated fields to struct cgroup to be used instead of the local variables and implements functions to stash the current values, propagate the changes and restore them recursively. Combined with the previous changes, this makes subsystem management operations fully recursive and modularlized. This will be used to expand cgroup core functionalities. While at it, remove now unused @css_enable and @css_disable from cgroup_subtree_control_write(). Signed-off-by: Tejun Heo Acked-by: Zefan Li --- include/linux/cgroup-defs.h | 2 ++ kernel/cgroup.c | 82 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 7593c1a46786..aae8c94de4b3 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -260,6 +260,8 @@ struct cgroup { */ u16 subtree_control; u16 subtree_ss_mask; + u16 old_subtree_control; + u16 old_subtree_ss_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0398f2a6673b..452a90e455fa 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3031,6 +3031,62 @@ static bool cgroup_drain_offline(struct cgroup *cgrp) return false; } +/** + * cgroup_save_control - save control masks of a subtree + * @cgrp: root of the target subtree + * + * Save ->subtree_control and ->subtree_ss_mask to the respective old_ + * prefixed fields for @cgrp's subtree including @cgrp itself. + */ +static void cgroup_save_control(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; + + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { + dsct->old_subtree_control = dsct->subtree_control; + dsct->old_subtree_ss_mask = dsct->subtree_ss_mask; + } +} + +/** + * cgroup_propagate_control - refresh control masks of a subtree + * @cgrp: root of the target subtree + * + * For @cgrp and its subtree, ensure ->subtree_ss_mask matches + * ->subtree_control and propagate controller availability through the + * subtree so that descendants don't have unavailable controllers enabled. + */ +static void cgroup_propagate_control(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; + + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { + dsct->subtree_control &= cgroup_control(dsct); + dsct->subtree_ss_mask = cgroup_calc_subtree_ss_mask(dsct, + dsct->subtree_control); + } +} + +/** + * cgroup_restore_control - restore control masks of a subtree + * @cgrp: root of the target subtree + * + * Restore ->subtree_control and ->subtree_ss_mask from the respective old_ + * prefixed fields for @cgrp's subtree including @cgrp itself. + */ +static void cgroup_restore_control(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; + + cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { + dsct->subtree_control = dsct->old_subtree_control; + dsct->subtree_ss_mask = dsct->old_subtree_ss_mask; + } +} + /** * cgroup_apply_control_enable - enable or show csses according to control * @cgrp: root of the target subtree @@ -3119,7 +3175,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, loff_t off) { u16 enable = 0, disable = 0; - u16 css_enable, css_disable, old_sc, new_sc, old_ss, new_ss; struct cgroup *cgrp, *child; struct cgroup_subsys *ss; char *tok; @@ -3203,25 +3258,14 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, return restart_syscall(); } - /* - * Update subsys masks and calculate what needs to be done. More - * subsystems than specified may need to be enabled or disabled - * depending on subsystem dependencies. - */ - old_sc = cgrp->subtree_control; - old_ss = cgrp->subtree_ss_mask; - new_sc = (old_sc | enable) & ~disable; - new_ss = cgroup_calc_subtree_ss_mask(cgrp, new_sc); + /* save and update control masks and prepare csses */ + cgroup_save_control(cgrp); - css_enable = ~old_ss & new_ss; - css_disable = old_ss & ~new_ss; - enable |= css_enable; - disable |= css_disable; + cgrp->subtree_control |= enable; + cgrp->subtree_control &= ~disable; - cgrp->subtree_control = new_sc; - cgrp->subtree_ss_mask = new_ss; + cgroup_propagate_control(cgrp); - /* prepare csses */ ret = cgroup_apply_control_enable(cgrp); if (ret) goto err_undo_css; @@ -3246,9 +3290,7 @@ out_unlock: err_undo_css: /* restore masks and shoot down new csses */ - cgrp->subtree_control = old_sc; - cgrp->subtree_ss_mask = old_ss; - + cgroup_restore_control(cgrp); cgroup_apply_control_disable(cgrp); goto out_unlock; -- cgit v1.2.3 From e4857982f49d21c05a84351b56724bf353022355 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Mar 2016 11:51:26 -0500 Subject: cgroup: use css_set->mg_dst_cgrp for the migration target cgroup Migration can be multi-target on the default hierarchy when a controller is enabled - processes belonging to each child cgroup have to be moved to the child cgroup itself to refresh css association. This isn't a problem for cgroup_migrate_add_src() as each source css_set still maps to single source and target cgroups; however, cgroup_migrate_prepare_dst() is called once after all source css_sets are added and thus might not have a single destination cgroup. This is currently worked around by specifying NULL for @dst_cgrp and using the source's default cgroup as destination as the only multi-target migration in use is self-targetting. While this works, it's subtle and clunky. As all taget cgroups are already specified while preparing the source css_sets, this clunkiness can easily be removed by recording the target cgroup in each source css_set. This patch adds css_set->mg_dst_cgrp which is recorded on cgroup_migrate_src() and used by cgroup_migrate_prepare_dst(). This also makes migration code ready for arbitrary multi-target migration. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 9 +++++---- kernel/cgroup.c | 26 +++++++++++++------------- 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index aae8c94de4b3..590291d56049 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -191,12 +191,13 @@ struct css_set { /* * If this cset is acting as the source of migration the following - * two fields are set. mg_src_cgrp is the source cgroup of the - * on-going migration and mg_dst_cset is the destination cset the - * target tasks on this cset should be migrated to. Protected by - * cgroup_mutex. + * two fields are set. mg_src_cgrp and mg_dst_cgrp are + * respectively the source and destination cgroups of the on-going + * migration. mg_dst_cset is the destination cset the target tasks + * on this cset should be migrated to. Protected by cgroup_mutex. */ struct cgroup *mg_src_cgrp; + struct cgroup *mg_dst_cgrp; struct css_set *mg_dst_cset; /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5dd761355033..fbd3e99a4e98 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2473,6 +2473,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) spin_lock_bh(&css_set_lock); list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) { cset->mg_src_cgrp = NULL; + cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; list_del_init(&cset->mg_preload_node); put_css_set_locked(cset); @@ -2511,32 +2512,31 @@ static void cgroup_migrate_add_src(struct css_set *src_cset, return; WARN_ON(src_cset->mg_src_cgrp); + WARN_ON(src_cset->mg_dst_cgrp); WARN_ON(!list_empty(&src_cset->mg_tasks)); WARN_ON(!list_empty(&src_cset->mg_node)); src_cset->mg_src_cgrp = src_cgrp; + src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); list_add(&src_cset->mg_preload_node, preloaded_csets); } /** * cgroup_migrate_prepare_dst - prepare destination css_sets for migration - * @dst_cgrp: the destination cgroup (may be %NULL) * @preloaded_csets: list of preloaded source css_sets * - * Tasks are about to be moved to @dst_cgrp and all the source css_sets - * have been preloaded to @preloaded_csets. This function looks up and - * pins all destination css_sets, links each to its source, and append them - * to @preloaded_csets. If @dst_cgrp is %NULL, the destination of each - * source css_set is assumed to be its cgroup on the default hierarchy. + * Tasks are about to be moved and all the source css_sets have been + * preloaded to @preloaded_csets. This function looks up and pins all + * destination css_sets, links each to its source, and append them to + * @preloaded_csets. * * This function must be called after cgroup_migrate_add_src() has been * called on each migration source css_set. After migration is performed * using cgroup_migrate(), cgroup_migrate_finish() must be called on * @preloaded_csets. */ -static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, - struct list_head *preloaded_csets) +static int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets) { LIST_HEAD(csets); struct css_set *src_cset, *tmp_cset; @@ -2547,8 +2547,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) { struct css_set *dst_cset; - dst_cset = find_css_set(src_cset, - dst_cgrp ?: src_cset->dfl_cgrp); + dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp); if (!dst_cset) goto err; @@ -2561,6 +2560,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, */ if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; + src_cset->mg_dst_cgrp = NULL; list_del_init(&src_cset->mg_preload_node); put_css_set(src_cset); put_css_set(dst_cset); @@ -2657,7 +2657,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, spin_unlock_bh(&css_set_lock); /* prepare dst csets and commit */ - ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets); + ret = cgroup_migrate_prepare_dst(&preloaded_csets); if (!ret) ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root); @@ -2916,7 +2916,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) spin_unlock_bh(&css_set_lock); /* NULL dst indicates self on default hierarchy */ - ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets); + ret = cgroup_migrate_prepare_dst(&preloaded_csets); if (ret) goto out_finish; @@ -4156,7 +4156,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) cgroup_migrate_add_src(link->cset, to, &preloaded_csets); spin_unlock_bh(&css_set_lock); - ret = cgroup_migrate_prepare_dst(to, &preloaded_csets); + ret = cgroup_migrate_prepare_dst(&preloaded_csets); if (ret) goto out_err; -- cgit v1.2.3 From f6d635ad341d5cc0b9c7ab46adfbf3bf5886cee4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Mar 2016 11:51:26 -0500 Subject: cgroup: implement cgroup_subsys->implicit_on_dfl Some controllers, perf_event for now and possibly freezer in the future, don't really make sense to control explicitly through "cgroup.subtree_control". For example, the primary role of perf_event is identifying the cgroups of tasks; however, because the controller also keeps a small amount of state per cgroup, it can't be replaced with simple cgroup membership tests. This patch implements cgroup_subsys->implicit_on_dfl flag. When set, the controller is implicitly enabled on all cgroups on the v2 hierarchy so that utility type controllers such as perf_event can be enabled and function transparently. An implicit controller doesn't show up in "cgroup.controllers" or "cgroup.subtree_control", is exempt from no internal process rule and can be stolen from the default hierarchy even if there are non-root csses. v2: Reimplemented on top of the recent updates to css handling and subsystem rebinding. Rebinding implicit subsystems is now a simple matter of exempting it from the busy subsystem check. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 13 +++++++++++++ kernel/cgroup.c | 38 +++++++++++++++++++++++++++++++------- 2 files changed, 44 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 590291d56049..34b42f03fcd8 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -450,6 +450,19 @@ struct cgroup_subsys { bool early_init:1; + /* + * If %true, the controller, on the default hierarchy, doesn't show + * up in "cgroup.controllers" or "cgroup.subtree_control", is + * implicitly enabled on all cgroups on the default hierarchy, and + * bypasses the "no internal process" constraint. This is for + * utility type controllers which is transparent to userland. + * + * An implicit controller can be stolen from the default hierarchy + * anytime and thus must be okay with offline csses from previous + * hierarchies coexisting with csses for the current one. + */ + bool implicit_on_dfl:1; + /* * If %false, this subsystem is properly hierarchical - * configuration, resource accounting and restriction on a parent diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fbd3e99a4e98..e22df5d81e59 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -186,6 +186,9 @@ static u16 cgroup_no_v1_mask; /* some controllers are not supported in the default hierarchy */ static u16 cgrp_dfl_inhibit_ss_mask; +/* some controllers are implicitly enabled on the default hierarchy */ +static unsigned long cgrp_dfl_implicit_ss_mask; + /* The list of hierarchy roots */ static LIST_HEAD(cgroup_roots); @@ -359,8 +362,8 @@ static u16 cgroup_control(struct cgroup *cgrp) return parent->subtree_control; if (cgroup_on_dfl(cgrp)) - root_ss_mask &= ~cgrp_dfl_inhibit_ss_mask; - + root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask | + cgrp_dfl_implicit_ss_mask); return root_ss_mask; } @@ -1327,6 +1330,8 @@ static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask) lockdep_assert_held(&cgroup_mutex); + cur_ss_mask |= cgrp_dfl_implicit_ss_mask; + while (true) { u16 new_ss_mask = cur_ss_mask; @@ -1512,8 +1517,13 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) lockdep_assert_held(&cgroup_mutex); do_each_subsys_mask(ss, ssid, ss_mask) { - /* if @ss has non-root csses attached to it, can't move */ - if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss))) + /* + * If @ss has non-root csses attached to it, can't move. + * If @ss is an implicit controller, it is exempt from this + * rule and can be stolen. + */ + if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) && + !ss->implicit_on_dfl) return -EBUSY; /* can't move between two non-dummy roots either */ @@ -3039,6 +3049,18 @@ static void cgroup_restore_control(struct cgroup *cgrp) } } +static bool css_visible(struct cgroup_subsys_state *css) +{ + struct cgroup_subsys *ss = css->ss; + struct cgroup *cgrp = css->cgroup; + + if (cgroup_control(cgrp) & (1 << ss->id)) + return true; + if (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) + return false; + return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl; +} + /** * cgroup_apply_control_enable - enable or show csses according to control * @cgrp: root of the target subtree @@ -3074,7 +3096,7 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) return PTR_ERR(css); } - if (cgroup_control(dsct) & (1 << ss->id)) { + if (css_visible(css)) { ret = css_populate_dir(css); if (ret) return ret; @@ -3117,7 +3139,7 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { kill_css(css); - } else if (!(cgroup_control(dsct) & (1 << ss->id))) { + } else if (!css_visible(css)) { css_clear_dir(css); if (ss->css_reset) ss->css_reset(css); @@ -5455,7 +5477,9 @@ int __init cgroup_init(void) cgrp_dfl_root.subsys_mask |= 1 << ss->id; - if (!ss->dfl_cftypes) + if (ss->implicit_on_dfl) + cgrp_dfl_implicit_ss_mask |= 1 << ss->id; + else if (!ss->dfl_cftypes) cgrp_dfl_inhibit_ss_mask |= 1 << ss->id; if (ss->dfl_cftypes == ss->legacy_cftypes) { -- cgit v1.2.3 From 2b021cbf3cb6208f0d40fd2f1869f237934340ed Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Mar 2016 20:43:04 -0400 Subject: cgroup: ignore css_sets associated with dead cgroups during migration Before 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups"), all dead tasks were associated with init_css_set. If a zombie task is requested for migration, while migration prep operations would still be performed on init_css_set, the actual migration would ignore zombie tasks. As init_css_set is always valid, this worked fine. However, after 2e91fa7f6d45, zombie tasks stay with the css_set it was associated with at the time of death. Let's say a task T associated with cgroup A on hierarchy H-1 and cgroup B on hiearchy H-2. After T becomes a zombie, it would still remain associated with A and B. If A only contains zombie tasks, it can be removed. On removal, A gets marked offline but stays pinned until all zombies are drained. At this point, if migration is initiated on T to a cgroup C on hierarchy H-2, migration path would try to prepare T's css_set for migration and trigger the following. WARNING: CPU: 0 PID: 1576 at kernel/cgroup.c:474 cgroup_get+0x121/0x160() CPU: 0 PID: 1576 Comm: bash Not tainted 4.4.0-work+ #289 ... Call Trace: [] dump_stack+0x4e/0x82 [] warn_slowpath_common+0x78/0xb0 [] warn_slowpath_null+0x15/0x20 [] cgroup_get+0x121/0x160 [] link_css_set+0x7b/0x90 [] find_css_set+0x3bc/0x5e0 [] cgroup_migrate_prepare_dst+0x89/0x1f0 [] cgroup_attach_task+0x157/0x230 [] __cgroup_procs_write+0x2b7/0x470 [] cgroup_tasks_write+0xc/0x10 [] cgroup_file_write+0x30/0x1b0 [] kernfs_fop_write+0x13c/0x180 [] __vfs_write+0x23/0xe0 [] vfs_write+0xa4/0x1a0 [] SyS_write+0x44/0xa0 [] entry_SYSCALL_64_fastpath+0x12/0x6f It doesn't make sense to prepare migration for css_sets pointing to dead cgroups as they are guaranteed to contain only zombies which are ignored later during migration. This patch makes cgroup destruction path mark all affected css_sets as dead and updates the migration path to ignore them during preparation. Signed-off-by: Tejun Heo Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups") Cc: stable@vger.kernel.org # v4.4+ --- include/linux/cgroup-defs.h | 3 +++ kernel/cgroup.c | 20 ++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 34b42f03fcd8..3e39ae5bc799 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -212,6 +212,9 @@ struct css_set { /* all css_task_iters currently walking this cset */ struct list_head task_iters; + /* dead and being drained, ignore for migration */ + bool dead; + /* For RCU-protected deletion */ struct rcu_head rcu_head; }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e22df5d81e59..d57318950076 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2516,6 +2516,14 @@ static void cgroup_migrate_add_src(struct css_set *src_cset, lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&css_set_lock); + /* + * If ->dead, @src_set is associated with one or more dead cgroups + * and doesn't contain any migratable tasks. Ignore it early so + * that the rest of migration path doesn't get confused by it. + */ + if (src_cset->dead) + return; + src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); if (!list_empty(&src_cset->mg_preload_node)) @@ -5258,6 +5266,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) __releases(&cgroup_mutex) __acquires(&cgroup_mutex) { struct cgroup_subsys_state *css; + struct cgrp_cset_link *link; int ssid; lockdep_assert_held(&cgroup_mutex); @@ -5278,11 +5287,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) return -EBUSY; /* - * Mark @cgrp dead. This prevents further task migration and child - * creation by disabling cgroup_lock_live_group(). + * Mark @cgrp and the associated csets dead. The former prevents + * further task migration and child creation by disabling + * cgroup_lock_live_group(). The latter makes the csets ignored by + * the migration path. */ cgrp->self.flags &= ~CSS_ONLINE; + spin_lock_bh(&css_set_lock); + list_for_each_entry(link, &cgrp->cset_links, cset_link) + link->cset->dead = true; + spin_unlock_bh(&css_set_lock); + /* initiate massacre of all css's */ for_each_css(css, ssid, cgrp) kill_css(css); -- cgit v1.2.3