diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 122 |
1 files changed, 57 insertions, 65 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f33c7153b6d7..7bb63eea6eb8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -302,10 +302,10 @@ static void guarantee_online_cpus(const struct cpuset *cs, * are online, with memory. If none are online with memory, walk * up the cpuset hierarchy until we find one that does have some * online mems. If we get all the way to the top and still haven't - * found any online mems, return node_states[N_HIGH_MEMORY]. + * found any online mems, return node_states[N_MEMORY]. * * One way or another, we guarantee to return some non-empty subset - * of node_states[N_HIGH_MEMORY]. + * of node_states[N_MEMORY]. * * Call with callback_mutex held. */ @@ -313,14 +313,14 @@ static void guarantee_online_cpus(const struct cpuset *cs, static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) { while (cs && !nodes_intersects(cs->mems_allowed, - node_states[N_HIGH_MEMORY])) + node_states[N_MEMORY])) cs = cs->parent; if (cs) nodes_and(*pmask, cs->mems_allowed, - node_states[N_HIGH_MEMORY]); + node_states[N_MEMORY]); else - *pmask = node_states[N_HIGH_MEMORY]; - BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY])); + *pmask = node_states[N_MEMORY]; + BUG_ON(!nodes_intersects(*pmask, node_states[N_MEMORY])); } /* @@ -1100,7 +1100,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, return -ENOMEM; /* - * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; + * top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; * it's read-only */ if (cs == &top_cpuset) { @@ -1122,7 +1122,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, goto done; if (!nodes_subset(trialcs->mems_allowed, - node_states[N_HIGH_MEMORY])) { + node_states[N_MEMORY])) { retval = -EINVAL; goto done; } @@ -1784,56 +1784,20 @@ static struct cftype files[] = { }; /* - * post_clone() is called during cgroup_create() when the - * clone_children mount argument was specified. The cgroup - * can not yet have any tasks. - * - * Currently we refuse to set up the cgroup - thereby - * refusing the task to be entered, and as a result refusing - * the sys_unshare() or clone() which initiated it - if any - * sibling cpusets have exclusive cpus or mem. - * - * If this becomes a problem for some users who wish to - * allow that scenario, then cpuset_post_clone() could be - * changed to grant parent->cpus_allowed-sibling_cpus_exclusive - * (and likewise for mems) to the new cgroup. Called with cgroup_mutex - * held. - */ -static void cpuset_post_clone(struct cgroup *cgroup) -{ - struct cgroup *parent, *child; - struct cpuset *cs, *parent_cs; - - parent = cgroup->parent; - list_for_each_entry(child, &parent->children, sibling) { - cs = cgroup_cs(child); - if (is_mem_exclusive(cs) || is_cpu_exclusive(cs)) - return; - } - cs = cgroup_cs(cgroup); - parent_cs = cgroup_cs(parent); - - mutex_lock(&callback_mutex); - cs->mems_allowed = parent_cs->mems_allowed; - cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); - mutex_unlock(&callback_mutex); - return; -} - -/* - * cpuset_create - create a cpuset + * cpuset_css_alloc - allocate a cpuset css * cont: control group that the new cpuset will be part of */ -static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) +static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) { - struct cpuset *cs; - struct cpuset *parent; + struct cgroup *parent_cg = cont->parent; + struct cgroup *tmp_cg; + struct cpuset *parent, *cs; - if (!cont->parent) { + if (!parent_cg) return &top_cpuset.css; - } - parent = cgroup_cs(cont->parent); + parent = cgroup_cs(parent_cg); + cs = kmalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); @@ -1855,7 +1819,36 @@ static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) cs->parent = parent; number_of_cpusets++; - return &cs->css ; + + if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cont->flags)) + goto skip_clone; + + /* + * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is + * set. This flag handling is implemented in cgroup core for + * histrical reasons - the flag may be specified during mount. + * + * Currently, if any sibling cpusets have exclusive cpus or mem, we + * refuse to clone the configuration - thereby refusing the task to + * be entered, and as a result refusing the sys_unshare() or + * clone() which initiated it. If this becomes a problem for some + * users who wish to allow that scenario, then this could be + * changed to grant parent->cpus_allowed-sibling_cpus_exclusive + * (and likewise for mems) to the new cgroup. + */ + list_for_each_entry(tmp_cg, &parent_cg->children, sibling) { + struct cpuset *tmp_cs = cgroup_cs(tmp_cg); + + if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) + goto skip_clone; + } + + mutex_lock(&callback_mutex); + cs->mems_allowed = parent->mems_allowed; + cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); + mutex_unlock(&callback_mutex); +skip_clone: + return &cs->css; } /* @@ -1864,7 +1857,7 @@ static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) * will call async_rebuild_sched_domains(). */ -static void cpuset_destroy(struct cgroup *cont) +static void cpuset_css_free(struct cgroup *cont) { struct cpuset *cs = cgroup_cs(cont); @@ -1878,11 +1871,10 @@ static void cpuset_destroy(struct cgroup *cont) struct cgroup_subsys cpuset_subsys = { .name = "cpuset", - .create = cpuset_create, - .destroy = cpuset_destroy, + .css_alloc = cpuset_css_alloc, + .css_free = cpuset_css_free, .can_attach = cpuset_can_attach, .attach = cpuset_attach, - .post_clone = cpuset_post_clone, .subsys_id = cpuset_subsys_id, .base_cftypes = files, .early_init = 1, @@ -2034,7 +2026,7 @@ static struct cpuset *cpuset_next(struct list_head *queue) * before dropping down to the next. It always processes a node before * any of its children. * - * In the case of memory hot-unplug, it will remove nodes from N_HIGH_MEMORY + * In the case of memory hot-unplug, it will remove nodes from N_MEMORY * if all present pages from a node are offlined. */ static void @@ -2073,7 +2065,7 @@ scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event) /* Continue past cpusets with all mems online */ if (nodes_subset(cp->mems_allowed, - node_states[N_HIGH_MEMORY])) + node_states[N_MEMORY])) continue; oldmems = cp->mems_allowed; @@ -2081,7 +2073,7 @@ scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event) /* Remove offline mems from this cpuset. */ mutex_lock(&callback_mutex); nodes_and(cp->mems_allowed, cp->mems_allowed, - node_states[N_HIGH_MEMORY]); + node_states[N_MEMORY]); mutex_unlock(&callback_mutex); /* Move tasks from the empty cpuset to a parent */ @@ -2134,8 +2126,8 @@ void cpuset_update_active_cpus(bool cpu_online) #ifdef CONFIG_MEMORY_HOTPLUG /* - * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. - * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. + * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY]. + * Call this routine anytime after node_states[N_MEMORY] changes. * See cpuset_update_active_cpus() for CPU hotplug handling. */ static int cpuset_track_online_nodes(struct notifier_block *self, @@ -2148,7 +2140,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, case MEM_ONLINE: oldmems = top_cpuset.mems_allowed; mutex_lock(&callback_mutex); - top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; + top_cpuset.mems_allowed = node_states[N_MEMORY]; mutex_unlock(&callback_mutex); update_tasks_nodemask(&top_cpuset, &oldmems, NULL); break; @@ -2177,7 +2169,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, void __init cpuset_init_smp(void) { cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); - top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; + top_cpuset.mems_allowed = node_states[N_MEMORY]; hotplug_memory_notifier(cpuset_track_online_nodes, 10); @@ -2245,7 +2237,7 @@ void cpuset_init_current_mems_allowed(void) * * Description: Returns the nodemask_t mems_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty - * subset of node_states[N_HIGH_MEMORY], even if this means going outside the + * subset of node_states[N_MEMORY], even if this means going outside the * tasks cpuset. **/ |