From 1cce1eea0aff51201753fcaca421df825b0813b6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 14 Dec 2016 15:56:33 +0200 Subject: inotify: Convert to using per-namespace limits This patchset converts inotify to using the newly introduced per-userns sysctl infrastructure. Currently the inotify instances/watches are being accounted in the user_struct structure. This means that in setups where multiple users in unprivileged containers map to the same underlying real user (i.e. pointing to the same user_struct) the inotify limits are going to be shared as well, allowing one user(or application) to exhaust all others limits. Fix this by switching the inotify sysctls to using the per-namespace/per-user limits. This will allow the server admin to set sensible global limits, which can further be tuned inside every individual user namespace. Additionally, in order to preserve the sysctl ABI make the existing inotify instances/watches sysctls modify the values of the initial user namespace. Signed-off-by: Nikolay Borisov Acked-by: Jan Kara Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/linux/sched.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d1905245c7a..d2334229167f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -868,10 +868,6 @@ struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_INOTIFY_USER - atomic_t inotify_watches; /* How many inotify watches does this user have? */ - atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ -#endif #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; #endif -- cgit v1.2.3 From 0f1b92cbdd0309afae0af1963e8cccddb3d2eaff Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 30 Jan 2017 18:06:11 +0300 Subject: introduce the walk_process_tree() helper Add the new helper to walk the process tree, the next patch adds a user. Note that it visits the group leaders only, proc_visitor can do for_each_thread itself or we can trivially extend walk_process_tree() to do this. Signed-off-by: Oleg Nesterov Signed-off-by: Pavel Tikhomirov Signed-off-by: Eric W. Biederman --- include/linux/sched.h | 3 +++ kernel/fork.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index d2334229167f..6261bfc12853 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3053,6 +3053,9 @@ extern bool current_is_single_threaded(void); #define for_each_process_thread(p, t) \ for_each_process(p) for_each_thread(p, t) +typedef int (*proc_visitor)(struct task_struct *p, void *data); +void walk_process_tree(struct task_struct *top, proc_visitor, void *); + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; diff --git a/kernel/fork.c b/kernel/fork.c index 11c5c8ab827c..135b7a49ad59 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2053,6 +2053,38 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, } #endif +void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data) +{ + struct task_struct *leader, *parent, *child; + int res; + + read_lock(&tasklist_lock); + leader = top = top->group_leader; +down: + for_each_thread(leader, parent) { + list_for_each_entry(child, &parent->children, sibling) { + res = visitor(child, data); + if (res) { + if (res < 0) + goto out; + leader = child; + goto down; + } +up: + ; + } + } + + if (leader != top) { + child = leader; + parent = child->real_parent; + leader = parent->group_leader; + goto up; + } +out: + read_unlock(&tasklist_lock); +} + #ifndef ARCH_MIN_MMSTRUCT_ALIGN #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -- cgit v1.2.3