From 1fb53567a3633740aac8761eb7023dc5671f0edb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 May 2017 13:45:14 -0500 Subject: pids: Move task_pid_type into sched/signal.h The function is general and inline so there is no need to hide it inside of exit.c Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/sched/signal.h') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 113d1ad1ced7..d8ef0a3d2e7e 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -556,6 +556,14 @@ extern bool current_is_single_threaded(void); typedef int (*proc_visitor)(struct task_struct *p, void *data); void walk_process_tree(struct task_struct *top, proc_visitor, void *); +static inline +struct pid *task_pid_type(struct task_struct *task, enum pid_type type) +{ + if (type != PIDTYPE_PID) + task = task->group_leader; + return task->pids[type].pid; +} + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; -- cgit v1.2.1 From 7a36094d61bfe9843de5484ff0140227983ac5d5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 26 Sep 2017 12:45:33 -0500 Subject: pids: Compute task_tgid using signal->leader_pid The cost is the the same and this removes the need to worry about complications that come from de_thread and group_leader changing. __task_pid_nr_ns has been updated to take advantage of this change. Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/sched/signal.h') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index d8ef0a3d2e7e..b95a272c1ab5 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -564,6 +564,11 @@ struct pid *task_pid_type(struct task_struct *task, enum pid_type type) return task->pids[type].pid; } +static inline struct pid *task_tgid(struct task_struct *task) +{ + return task->signal->leader_pid; +} + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; -- cgit v1.2.1 From 2c4704756cab7cfa031ada4dab361562f0e357c0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 26 Sep 2017 13:06:43 -0500 Subject: pids: Move the pgrp and session pid pointers from task_struct to signal_struct To access these fields the code always has to go to group leader so going to signal struct is no loss and is actually a fundamental simplification. This saves a little bit of memory by only allocating the pid pointer array once instead of once for every thread, and even better this removes a few potential races caused by the fact that group_leader can be changed by de_thread, while signal_struct can not. Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux/sched/signal.h') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index b95a272c1ab5..2dcded16eb1e 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -146,7 +146,9 @@ struct signal_struct { #endif + /* PID/PID hash table linkage. */ struct pid *leader_pid; + struct pid *pids[PIDTYPE_MAX]; #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; @@ -559,9 +561,12 @@ void walk_process_tree(struct task_struct *top, proc_visitor, void *); static inline struct pid *task_pid_type(struct task_struct *task, enum pid_type type) { - if (type != PIDTYPE_PID) - task = task->group_leader; - return task->pids[type].pid; + struct pid *pid; + if (type == PIDTYPE_PID) + pid = task_pid(task); + else + pid = task->signal->pids[type]; + return pid; } static inline struct pid *task_tgid(struct task_struct *task) @@ -569,6 +574,21 @@ static inline struct pid *task_tgid(struct task_struct *task) return task->signal->leader_pid; } +/* + * Without tasklist or RCU lock it is not safe to dereference + * the result of task_pgrp/task_session even if task == current, + * we can race with another thread doing sys_setsid/sys_setpgid. + */ +static inline struct pid *task_pgrp(struct task_struct *task) +{ + return task->signal->pids[PIDTYPE_PGID]; +} + +static inline struct pid *task_session(struct task_struct *task) +{ + return task->signal->pids[PIDTYPE_SID]; +} + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; -- cgit v1.2.1 From 6883f81aac6f44e7df70a6af189b3689ff52cbfb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 4 Jun 2017 04:32:13 -0500 Subject: pid: Implement PIDTYPE_TGID Everywhere except in the pid array we distinguish between a tasks pid and a tasks tgid (thread group id). Even in the enumeration we want that distinction sometimes so we have added __PIDTYPE_TGID. With leader_pid we almost have an implementation of PIDTYPE_TGID in struct signal_struct. Add PIDTYPE_TGID as a first class member of the pid_type enumeration and into the pids array. Then remove the __PIDTYPE_TGID special case and the leader_pid in signal_struct. The net size increase is just an extra pointer added to struct pid and an extra pair of pointers of an hlist_node added to task_struct. The effect on code maintenance is the removal of a number of special cases today and the potential to remove many more special cases as PIDTYPE_TGID gets used to it's fullest. The long term potential is allowing zombie thread group leaders to exit, which will remove a lot more special cases in the code. Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/sched/signal.h') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 2dcded16eb1e..ee30a5ba475f 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -147,7 +147,6 @@ struct signal_struct { #endif /* PID/PID hash table linkage. */ - struct pid *leader_pid; struct pid *pids[PIDTYPE_MAX]; #ifdef CONFIG_NO_HZ_FULL @@ -571,7 +570,7 @@ struct pid *task_pid_type(struct task_struct *task, enum pid_type type) static inline struct pid *task_tgid(struct task_struct *task) { - return task->signal->leader_pid; + return task->signal->pids[PIDTYPE_TGID]; } /* @@ -607,7 +606,7 @@ static inline bool thread_group_leader(struct task_struct *p) */ static inline bool has_group_leader_pid(struct task_struct *p) { - return task_pid(p) == p->signal->leader_pid; + return task_pid(p) == task_tgid(p); } static inline -- cgit v1.2.1 From 24122c7f4969adeeaeca3fb1656a31569e9aa59b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Jul 2018 14:30:23 -0500 Subject: signal: Pass pid and pid type into send_sigqueue Make the code more maintainable by performing more of the signal related work in send_sigqueue. A quick inspection of do_timer_create will show that this code path does not lookup a thread group by a thread's pid. Making it safe to find the task pointed to by it_pid with "pid_task(it_pid, type)"; This supports the changes needed in fork to tell if a signal was sent to a single process or a group of processes. Having the pid to task transition in signal.c will also make it easier to sort out races with de_thread and and the thread group leader exiting when it comes time to address that. Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched/signal.h') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index ee30a5ba475f..94558ffa82ab 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -330,7 +330,7 @@ extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); +extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); static inline int restart_syscall(void) -- cgit v1.2.1 From 088fe47ce952542389e604e83f533811750aaf7c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 Jul 2018 17:26:49 -0500 Subject: signal: Add calculate_sigpending() Add a function calculate_sigpending to test to see if any signals are pending for a new task immediately following fork. Signals have to happen either before or after fork. Today our practice is to push all of the signals to before the fork, but that has the downside that frequent or periodic signals can make fork take much much longer than normal or prevent fork from completing entirely. So we need move signals that we can after the fork to prevent that. This updates the code to set TIF_SIGPENDING on a new task if there are signals or other activities that have moved so that they appear to happen after the fork. As the code today restarts if it sees any such activity this won't immediately have an effect, as there will be no reason for it to set TIF_SIGPENDING immediately after the fork. Adding calculate_sigpending means the code in fork can safely be changed to not always restart if a signal is pending. The new calculate_sigpending function sets sigpending if there are pending bits in jobctl, pending signals, the freezer needs to freeze the new task or the live kernel patching framework need the new thread to take the slow path to userspace. I have verified that setting TIF_SIGPENDING does make a new process take the slow path to userspace before it executes it's first userspace instruction. I have looked at the callers of signal_wake_up and the code paths setting TIF_SIGPENDING and I don't see anything else that needs to be handled. The code probably doesn't need to set TIF_SIGPENDING for the kernel live patching as it uses a separate thread flag as well. But at this point it seems safer reuse the recalc_sigpending logic and get the kernel live patching folks to sort out their story later. V2: I have moved the test into schedule_tail where siglock can be grabbed and recalc_sigpending can be reused directly. Further as the last action of setting up a new task this guarantees that TIF_SIGPENDING will be properly set in the new process. The helper calculate_sigpending takes the siglock and uncontitionally sets TIF_SIGPENDING and let's recalc_sigpending clear TIF_SIGPENDING if it is unnecessary. This allows reusing the existing code and keeps maintenance of the conditions simple. Oleg Nesterov suggested the movement and pointed out the need to take siglock if this code was going to be called while the new task is discoverable. Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched/signal.h') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 94558ffa82ab..b55fd293c1e5 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -372,6 +372,7 @@ static inline int signal_pending_state(long state, struct task_struct *p) */ extern void recalc_sigpending_and_wake(struct task_struct *t); extern void recalc_sigpending(void); +extern void calculate_sigpending(void); extern void signal_wake_up_state(struct task_struct *t, unsigned int state); -- cgit v1.2.1 From 924de3b8c9410c404c6eda7abffd282b97b3ff7f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 Jul 2018 13:38:00 -0500 Subject: fork: Have new threads join on-going signal group stops There are only two signals that are delivered to every member of a signal group: SIGSTOP and SIGKILL. Signal delivery requires every signal appear to be delivered either before or after a clone syscall. SIGKILL terminates the clone so does not need to be considered. Which leaves only SIGSTOP that needs to be considered when creating new threads. Today in the event of a group stop TIF_SIGPENDING will get set and the fork will restart ensuring the fork syscall participates in the group stop. A fork (especially of a process with a lot of memory) is one of the most expensive system so we really only want to restart a fork when necessary. It is easy so check to see if a SIGSTOP is ongoing and have the new thread join it immediate after the clone completes. Making it appear the clone completed happened just before the SIGSTOP. The calculate_sigpending function will see the bits set in jobctl and set TIF_SIGPENDING to ensure the new task takes the slow path to userspace. V2: The call to task_join_group_stop was moved before the new task is added to the thread group list. This should not matter as sighand->siglock is held over both the addition of the threads, the call to task_join_group_stop and do_signal_stop. But the change is trivial and it is one less thing to worry about when reading the code. Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sched/signal.h') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index b55fd293c1e5..ae2b0b81be25 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -385,6 +385,8 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) signal_wake_up_state(t, resume ? __TASK_TRACED : 0); } +void task_join_group_stop(struct task_struct *task); + #ifdef TIF_RESTORE_SIGMASK /* * Legacy restore_sigmask accessors. These are inefficient on -- cgit v1.2.1 From c3ad2c3b02e953ead2b8d52a0c9e70312930c3d0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 Jul 2018 15:20:37 -0500 Subject: signal: Don't restart fork when signals come in. Wen Yang and majiang report that a periodic signal received during fork can cause fork to continually restart preventing an application from making progress. The code was being overly pessimistic. Fork needs to guarantee that a signal sent to multiple processes is logically delivered before the fork and just to the forking process or logically delivered after the fork to both the forking process and it's newly spawned child. For signals like periodic timers that are always delivered to a single process fork can safely complete and let them appear to logically delivered after the fork(). While examining this issue I also discovered that fork today will miss signals delivered to multiple processes during the fork and handled by another thread. Similarly the current code will also miss blocked signals that are delivered to multiple process, as those signals will not appear pending during fork. Add a list of each thread that is currently forking, and keep on that list a signal set that records all of the signals sent to multiple processes. When fork completes initialize the new processes shared_pending signal set with it. The calculate_sigpending function will see those signals and set TIF_SIGPENDING causing the new task to take the slow path to userspace to handle those signals. Making it appear as if those signals were received immediately after the fork. It is not possible to send real time signals to multiple processes and exceptions don't go to multiple processes, which means that that are no signals sent to multiple processes that require siginfo. This means it is safe to not bother collecting siginfo on signals sent during fork. The sigaction of a child of fork is initially the same as the sigaction of the parent process. So a signal the parent ignores the child will also initially ignore. Therefore it is safe to ignore signals sent to multiple processes and ignored by the forking process. Signals sent to only a single process or only a single thread and delivered during fork are treated as if they are received after the fork, and generally not dealt with. They won't cause any problems. V2: Added removal from the multiprocess list on failure. V3: Use -ERESTARTNOINTR directly V4: - Don't queue both SIGCONT and SIGSTOP - Initialize signal_struct.multiprocess in init_task - Move setting of shared_pending to before the new task is visible to signals. This prevents signals from comming in before shared_pending.signal is set to delayed.signal and being lost. V5: - rework list add and delete to account for idle threads v6: - Use sigdelsetmask when removing stop signals Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200447 Reported-by: Wen Yang and Reported-by: majiang Fixes: 4a2c7a7837da ("[PATCH] make fork() atomic wrt pgrp/session signals") Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/sched/signal.h') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index ae2b0b81be25..4e9b77fb702d 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -69,6 +69,11 @@ struct thread_group_cputimer { bool checking_timer; }; +struct multiprocess_signals { + sigset_t signal; + struct hlist_node node; +}; + /* * NOTE! "signal_struct" does not have its own * locking, because a shared signal_struct always @@ -90,6 +95,9 @@ struct signal_struct { /* shared signal handling: */ struct sigpending shared_pending; + /* For collecting multiprocess signals during fork */ + struct hlist_head multiprocess; + /* thread group exit support */ int group_exit_code; /* overloaded: -- cgit v1.2.1