diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/backing-dev.h | 5 | ||||
-rw-r--r-- | include/linux/cgroup-defs.h | 76 | ||||
-rw-r--r-- | include/linux/cgroup.h | 129 | ||||
-rw-r--r-- | include/linux/hugetlb_cgroup.h | 4 | ||||
-rw-r--r-- | include/linux/init_task.h | 8 | ||||
-rw-r--r-- | include/linux/jump_label.h | 18 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 8 | ||||
-rw-r--r-- | include/linux/sched.h | 12 |
8 files changed, 144 insertions, 116 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c85f74946a8b..c82794f20110 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -13,7 +13,6 @@ #include <linux/sched.h> #include <linux/blkdev.h> #include <linux/writeback.h> -#include <linux/memcontrol.h> #include <linux/blk-cgroup.h> #include <linux/backing-dev-defs.h> #include <linux/slab.h> @@ -267,8 +266,8 @@ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode); - return cgroup_on_dfl(mem_cgroup_root_css->cgroup) && - cgroup_on_dfl(blkcg_root_css->cgroup) && + return cgroup_subsys_on_dfl(memory_cgrp_subsys) && + cgroup_subsys_on_dfl(io_cgrp_subsys) && bdi_cap_account_dirty(bdi) && (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8492721b39be..60d44b26276d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -76,6 +76,7 @@ enum { CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ + CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ @@ -83,6 +84,17 @@ enum { }; /* + * cgroup_file is the handle for a file instance created in a cgroup which + * is used, for example, to generate file changed notifications. This can + * be obtained by setting cftype->file_offset. + */ +struct cgroup_file { + /* do not access any fields from outside cgroup core */ + struct list_head node; /* anchored at css->files */ + struct kernfs_node *kn; +}; + +/* * Per-subsystem/per-cgroup state maintained by the system. This is the * fundamental structural building block that controllers deal with. * @@ -122,6 +134,9 @@ struct cgroup_subsys_state { */ u64 serial_nr; + /* all cgroup_files associated with this css */ + struct list_head files; + /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; @@ -196,6 +211,9 @@ struct css_set { */ struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; + /* all css_task_iters currently walking this cset */ + struct list_head task_iters; + /* For RCU-protected deletion */ struct rcu_head rcu_head; }; @@ -217,16 +235,16 @@ struct cgroup { int id; /* - * If this cgroup contains any tasks, it contributes one to - * populated_cnt. All children with non-zero popuplated_cnt of - * their own contribute one. The count is zero iff there's no task - * in this cgroup or its subtree. + * Each non-empty css_set associated with this cgroup contributes + * one to populated_cnt. All children with non-zero popuplated_cnt + * of their own contribute one. The count is zero iff there's no + * task in this cgroup or its subtree. */ int populated_cnt; struct kernfs_node *kn; /* cgroup kernfs entry */ - struct kernfs_node *procs_kn; /* kn for "cgroup.procs" */ - struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ + struct cgroup_file procs_file; /* handle for "cgroup.procs" */ + struct cgroup_file events_file; /* handle for "cgroup.events" */ /* * The bitmask of subsystems enabled on the child cgroups. @@ -324,11 +342,6 @@ struct cftype { */ char name[MAX_CFTYPE_NAME]; unsigned long private; - /* - * If not 0, file mode is set to this value, otherwise it will - * be figured out automatically - */ - umode_t mode; /* * The maximum length of string, excluding trailing nul, that can @@ -340,6 +353,14 @@ struct cftype { unsigned int flags; /* + * If non-zero, should contain the offset from the start of css to + * a struct cgroup_file field. cgroup will record the handle of + * the created file into it. The recorded handle can be used as + * long as the containing css remains accessible. + */ + unsigned int file_offset; + + /* * Fields used for internal bookkeeping. Initialized automatically * during registration. */ @@ -414,12 +435,10 @@ struct cgroup_subsys { int (*can_fork)(struct task_struct *task, void **priv_p); void (*cancel_fork)(struct task_struct *task, void *priv); void (*fork)(struct task_struct *task, void *priv); - void (*exit)(struct cgroup_subsys_state *css, - struct cgroup_subsys_state *old_css, - struct task_struct *task); + void (*exit)(struct task_struct *task); + void (*free)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); - int disabled; int early_init; /* @@ -473,8 +492,31 @@ struct cgroup_subsys { unsigned int depends_on; }; -void cgroup_threadgroup_change_begin(struct task_struct *tsk); -void cgroup_threadgroup_change_end(struct task_struct *tsk); +extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; + +/** + * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_begin() and allows cgroup operations to + * synchronize against threadgroup changes using a percpu_rw_semaphore. + */ +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) +{ + percpu_down_read(&cgroup_threadgroup_rwsem); +} + +/** + * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_end(). Counterpart of + * cgroup_threadcgroup_change_begin(). + */ +static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) +{ + percpu_up_read(&cgroup_threadgroup_rwsem); +} #else /* CONFIG_CGROUPS */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index eb7ca55f72ef..22e3754f89c5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -13,10 +13,10 @@ #include <linux/nodemask.h> #include <linux/rculist.h> #include <linux/cgroupstats.h> -#include <linux/rwsem.h> #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/kernfs.h> +#include <linux/jump_label.h> #include <linux/cgroup-defs.h> @@ -41,6 +41,10 @@ struct css_task_iter { struct list_head *task_pos; struct list_head *tasks_head; struct list_head *mg_tasks_head; + + struct css_set *cur_cset; + struct task_struct *cur_task; + struct list_head iters_node; /* css_set->task_iters */ }; extern struct cgroup_root cgrp_dfl_root; @@ -50,6 +54,26 @@ extern struct css_set init_css_set; #include <linux/cgroup_subsys.h> #undef SUBSYS +#define SUBSYS(_x) \ + extern struct static_key_true _x ## _cgrp_subsys_enabled_key; \ + extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key; +#include <linux/cgroup_subsys.h> +#undef SUBSYS + +/** + * cgroup_subsys_enabled - fast test on whether a subsys is enabled + * @ss: subsystem in question + */ +#define cgroup_subsys_enabled(ss) \ + static_branch_likely(&ss ## _enabled_key) + +/** + * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy + * @ss: subsystem in question + */ +#define cgroup_subsys_on_dfl(ss) \ + static_branch_likely(&ss ## _on_dfl_key) + bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, @@ -78,6 +102,7 @@ extern void cgroup_cancel_fork(struct task_struct *p, extern void cgroup_post_fork(struct task_struct *p, void *old_ss_priv[CGROUP_CANFORK_COUNT]); void cgroup_exit(struct task_struct *p); +void cgroup_free(struct task_struct *p); int cgroup_init_early(void); int cgroup_init(void); @@ -211,11 +236,33 @@ void css_task_iter_end(struct css_task_iter *it); * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor * @tset: taskset to iterate + * + * @tset may contain multiple tasks and they may belong to multiple + * processes. When there are multiple tasks in @tset, if a task of a + * process is in @tset, all tasks of the process are in @tset. Also, all + * are guaranteed to share the same source and destination csses. + * + * Iteration is not in any specific order. */ #define cgroup_taskset_for_each(task, tset) \ for ((task) = cgroup_taskset_first((tset)); (task); \ (task) = cgroup_taskset_next((tset))) +/** + * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset + * @leader: the loop cursor + * @tset: takset to iterate + * + * Iterate threadgroup leaders of @tset. For single-task migrations, @tset + * may not contain any. + */ +#define cgroup_taskset_for_each_leader(leader, tset) \ + for ((leader) = cgroup_taskset_first((tset)); (leader); \ + (leader) = cgroup_taskset_next((tset))) \ + if ((leader) != (leader)->group_leader) \ + ; \ + else + /* * Inline functions. */ @@ -320,11 +367,11 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) */ #ifdef CONFIG_PROVE_RCU extern struct mutex cgroup_mutex; -extern struct rw_semaphore css_set_rwsem; +extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ lockdep_is_held(&cgroup_mutex) || \ - lockdep_is_held(&css_set_rwsem) || \ + lockdep_is_held(&css_set_lock) || \ ((task)->flags & PF_EXITING) || (__c)) #else #define task_css_set_check(task, __c) \ @@ -412,68 +459,10 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, return task_css(task, subsys_id)->cgroup; } -/** - * cgroup_on_dfl - test whether a cgroup is on the default hierarchy - * @cgrp: the cgroup of interest - * - * The default hierarchy is the v2 interface of cgroup and this function - * can be used to test whether a cgroup is on the default hierarchy for - * cases where a subsystem should behave differnetly depending on the - * interface version. - * - * The set of behaviors which change on the default hierarchy are still - * being determined and the mount option is prefixed with __DEVEL__. - * - * List of changed behaviors: - * - * - Mount options "noprefix", "xattr", "clone_children", "release_agent" - * and "name" are disallowed. - * - * - When mounting an existing superblock, mount options should match. - * - * - Remount is disallowed. - * - * - rename(2) is disallowed. - * - * - "tasks" is removed. Everything should be at process granularity. Use - * "cgroup.procs" instead. - * - * - "cgroup.procs" is not sorted. pids will be unique unless they got - * recycled inbetween reads. - * - * - "release_agent" and "notify_on_release" are removed. Replacement - * notification mechanism will be implemented. - * - * - "cgroup.clone_children" is removed. - * - * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup - * and its descendants contain no task; otherwise, 1. The file also - * generates kernfs notification which can be monitored through poll and - * [di]notify when the value of the file changes. - * - * - cpuset: tasks will be kept in empty cpusets when hotplug happens and - * take masks of ancestors with non-empty cpus/mems, instead of being - * moved to an ancestor. - * - * - cpuset: a task can be moved into an empty cpuset, and again it takes - * masks of ancestors. - * - * - memcg: use_hierarchy is on by default and the cgroup file for the flag - * is not created. - * - * - blkcg: blk-throttle becomes properly hierarchical. - * - * - debug: disallowed on the default hierarchy. - */ -static inline bool cgroup_on_dfl(const struct cgroup *cgrp) -{ - return cgrp->root == &cgrp_dfl_root; -} - /* no synchronization, the result can only be used as a hint */ -static inline bool cgroup_has_tasks(struct cgroup *cgrp) +static inline bool cgroup_is_populated(struct cgroup *cgrp) { - return !list_empty(&cgrp->cset_links); + return cgrp->populated_cnt; } /* returns ino associated with a cgroup */ @@ -527,6 +516,19 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } +/** + * cgroup_file_notify - generate a file modified event for a cgroup_file + * @cfile: target cgroup_file + * + * @cfile must have been obtained by setting cftype->file_offset. + */ +static inline void cgroup_file_notify(struct cgroup_file *cfile) +{ + /* might not have been created due to one of the CFTYPE selector flags */ + if (cfile->kn) + kernfs_notify(cfile->kn); +} + #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -546,6 +548,7 @@ static inline void cgroup_cancel_fork(struct task_struct *p, static inline void cgroup_post_fork(struct task_struct *p, void *ss_priv[CGROUP_CANFORK_COUNT]) {} static inline void cgroup_exit(struct task_struct *p) {} +static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index bcc853eccc85..7edd30515298 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -48,9 +48,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) static inline bool hugetlb_cgroup_disabled(void) { - if (hugetlb_cgrp_subsys.disabled) - return true; - return false; + return !cgroup_subsys_enabled(hugetlb_cgrp_subsys); } extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 810a34f60424..1c1ff7e4faa4 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,13 +25,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; -#ifdef CONFIG_CGROUPS -#define INIT_GROUP_RWSEM(sig) \ - .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), -#else -#define INIT_GROUP_RWSEM(sig) -#endif - #ifdef CONFIG_CPUSETS #define INIT_CPUSET_SEQ(tsk) \ .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), @@ -65,7 +58,6 @@ extern struct fs_struct init_fs; INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ - INIT_GROUP_RWSEM(sig) \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f1094238ab2a..8dde55974f18 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -214,11 +214,6 @@ static inline int jump_label_apply_nops(struct module *mod) #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled -static inline bool static_key_enabled(struct static_key *key) -{ - return static_key_count(key) > 0; -} - static inline void static_key_enable(struct static_key *key) { int count = static_key_count(key); @@ -265,6 +260,17 @@ struct static_key_false { #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT +extern bool ____wrong_branch_error(void); + +#define static_key_enabled(x) \ +({ \ + if (!__builtin_types_compatible_p(typeof(*x), struct static_key) && \ + !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\ + !__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ + ____wrong_branch_error(); \ + static_key_count((struct static_key *)x) > 0; \ +}) + #ifdef HAVE_JUMP_LABEL /* @@ -323,8 +329,6 @@ struct static_key_false { * See jump_label_type() / jump_label_init_type(). */ -extern bool ____wrong_branch_error(void); - #define static_branch_likely(x) \ ({ \ bool branch; \ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3e3318ddfc0e..27251ed428f7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -213,6 +213,9 @@ struct mem_cgroup { /* OOM-Killer disable */ int oom_kill_disable; + /* handle for "memory.events" */ + struct cgroup_file events_file; + /* protect arrays of thresholds */ struct mutex thresholds_lock; @@ -285,6 +288,7 @@ static inline void mem_cgroup_events(struct mem_cgroup *memcg, unsigned int nr) { this_cpu_add(memcg->stat->events[idx], nr); + cgroup_file_notify(&memcg->events_file); } bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); @@ -346,9 +350,7 @@ ino_t page_cgroup_ino(struct page *page); static inline bool mem_cgroup_disabled(void) { - if (memory_cgrp_subsys.disabled) - return true; - return false; + return !cgroup_subsys_enabled(memory_cgrp_subsys); } /* diff --git a/include/linux/sched.h b/include/linux/sched.h index c115d617739d..4effb1025fbb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -771,18 +771,6 @@ struct signal_struct { unsigned audit_tty_log_passwd; struct tty_audit_buf *tty_audit_buf; #endif -#ifdef CONFIG_CGROUPS - /* - * group_rwsem prevents new tasks from entering the threadgroup and - * member tasks from exiting,a more specifically, setting of - * PF_EXITING. fork and exit paths are protected with this rwsem - * using threadgroup_change_begin/end(). Users which require - * threadgroup to remain stable should use threadgroup_[un]lock() - * which also takes care of exec path. Currently, cgroup is the - * only user. - */ - struct rw_semaphore group_rwsem; -#endif oom_flags_t oom_flags; short oom_score_adj; /* OOM kill score adjustment */ |