From fc37191272a972d449bab3d8247cf52cadf5d4e6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 21 Aug 2018 21:55:38 -0700 Subject: userns: use refcount_t for reference counting instead atomic_t refcount_t type and corresponding API should be used instead of atomic_t wh en the variable is used as a reference counter. This avoids accidental refcounter overflows that might lead to use-after-free situations. Link: http://lkml.kernel.org/r/20180703200141.28415-6-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Suggested-by: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Reviewed-by: Andrew Morton Cc: "Eric W. Biederman" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/user.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/sched') diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 96fe289c4c6e..39ad98c09c58 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -4,6 +4,7 @@ #include #include +#include #include struct key; @@ -12,7 +13,7 @@ struct key; * Some day this will be a full-fledged user tracking system.. */ struct user_struct { - atomic_t __count; /* reference count */ + refcount_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_FANOTIFY @@ -59,7 +60,7 @@ extern struct user_struct root_user; extern struct user_struct * alloc_uid(kuid_t); static inline struct user_struct *get_uid(struct user_struct *u) { - atomic_inc(&u->__count); + refcount_inc(&u->__count); return u; } extern void free_uid(struct user_struct *); -- cgit v1.2.3 From a2e514453861dd39b53b7a50b6771bd3f9852078 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 21 Aug 2018 21:55:52 -0700 Subject: kernel/hung_task.c: allow to set checking interval separately from timeout Currently task hung checking interval is equal to timeout, as the result hung is detected anywhere between timeout and 2*timeout. This is fine for most interactive environments, but this hurts automated testing setups (syzbot). In an automated setup we need to strictly order CPU lockup < RCU stall < workqueue lockup < task hung < silent loss, so that RCU stall is not detected as task hung and task hung is not detected as silent machine loss. The large variance in task hung detection timeout requires setting silent machine loss timeout to a very large value (e.g. if task hung is 3 mins, then silent loss need to be set to ~7 mins). The additional 3 minutes significantly reduce testing efficiency because usually we crash kernel within a minute, and this can add hours to bug localization process as it needs to do dozens of tests. Allow setting checking interval separately from timeout. This allows to set timeout to, say, 3 minutes, but checking interval to 10 secs. The interval is controlled via a new hung_task_check_interval_secs sysctl, similar to the existing hung_task_timeout_secs sysctl. The default value of 0 results in the current behavior: checking interval is equal to timeout. [akpm@linux-foundation.org: update hung_task_timeout_max's comment] Link: http://lkml.kernel.org/r/20180611111004.203513-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Paul E. McKenney Cc: Tetsuo Handa Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 15 ++++++++++++++- include/linux/sched.h | 1 + include/linux/sched/sysctl.h | 1 + kernel/fork.c | 1 + kernel/hung_task.c | 15 ++++++++++++++- kernel/sysctl.c | 13 ++++++++++++- 6 files changed, 43 insertions(+), 3 deletions(-) (limited to 'include/linux/sched') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 59585030cbaf..9d38e75b19a0 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -38,6 +38,7 @@ show up in /proc/sys/kernel: - hung_task_panic - hung_task_check_count - hung_task_timeout_secs +- hung_task_check_interval_secs - hung_task_warnings - hyperv_record_panic_msg - kexec_load_disabled @@ -355,7 +356,7 @@ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. hung_task_timeout_secs: -Check interval. When a task in D state did not get scheduled +When a task in D state did not get scheduled for more than this value report a warning. This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. @@ -364,6 +365,18 @@ Possible values to set are in range {0..LONG_MAX/HZ}. ============================================================== +hung_task_check_interval_secs: + +Hung task check interval. If hung task checking is enabled +(see hung_task_timeout_secs), the check is done every +hung_task_check_interval_secs seconds. +This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. + +0 (default): means use hung_task_timeout_secs as checking interval. +Possible values to set are in range {0..LONG_MAX/HZ}. + +============================================================== + hung_task_warnings: The maximum number of warnings to report. During a check interval diff --git a/include/linux/sched.h b/include/linux/sched.h index 789923fbee3a..58eb3a2bc695 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -853,6 +853,7 @@ struct task_struct { #endif #ifdef CONFIG_DETECT_HUNG_TASK unsigned long last_switch_count; + unsigned long last_switch_time; #endif /* Filesystem information: */ struct fs_struct *fs; diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 913488d828cb..a9c32daeb9d8 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -10,6 +10,7 @@ struct ctl_table; extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned long sysctl_hung_task_check_interval_secs; extern int sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, void __user *buffer, diff --git a/kernel/fork.c b/kernel/fork.c index 8c760effa42e..8bcef2413f1b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1302,6 +1302,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) tsk->nvcsw = tsk->nivcsw = 0; #ifdef CONFIG_DETECT_HUNG_TASK tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; + tsk->last_switch_time = 0; #endif tsk->mm = NULL; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 32b479468e4d..b9132d1269ef 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -40,6 +40,11 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; */ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; +/* + * Zero (default value) means use sysctl_hung_task_timeout_secs: + */ +unsigned long __read_mostly sysctl_hung_task_check_interval_secs; + int __read_mostly sysctl_hung_task_warnings = 10; static int __read_mostly did_panic; @@ -98,8 +103,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (switch_count != t->last_switch_count) { t->last_switch_count = switch_count; + t->last_switch_time = jiffies; return; } + if (time_is_after_jiffies(t->last_switch_time + timeout * HZ)) + return; trace_sched_process_hang(t); @@ -245,8 +253,13 @@ static int watchdog(void *dummy) for ( ; ; ) { unsigned long timeout = sysctl_hung_task_timeout_secs; - long t = hung_timeout_jiffies(hung_last_checked, timeout); + unsigned long interval = sysctl_hung_task_check_interval_secs; + long t; + if (interval == 0) + interval = timeout; + interval = min_t(unsigned long, interval, timeout); + t = hung_timeout_jiffies(hung_last_checked, interval); if (t <= 0) { if (!atomic_xchg(&reset_hung_task, 0)) check_hung_uninterruptible_tasks(timeout); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f22f76b7a138..dbdcb6673b27 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -145,7 +145,10 @@ static int minolduid; static int ngroups_max = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; -/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */ +/* + * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs + * and hung_task_check_interval_secs + */ #ifdef CONFIG_DETECT_HUNG_TASK static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); #endif @@ -1090,6 +1093,14 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dohung_task_timeout_secs, .extra2 = &hung_task_timeout_max, }, + { + .procname = "hung_task_check_interval_secs", + .data = &sysctl_hung_task_check_interval_secs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = &hung_task_timeout_max, + }, { .procname = "hung_task_warnings", .data = &sysctl_hung_task_warnings, -- cgit v1.2.3 From 52cba1a274818c3ee6299c1a1b476e7bc5037a2f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 21:59:51 -0700 Subject: signal: make force_sigsegv() void Patch series "signal: refactor some functions", v3. This series refactors a bunch of functions in signal.c to simplify parts of the code. The greatest single change is declaring the static do_sigpending() helper as void which makes it possible to remove a bunch of unnecessary checks in the syscalls later on. This patch (of 17): force_sigsegv() returned 0 unconditionally so it doesn't make sense to have it return at all. In addition, there are no callers that check force_sigsegv()'s return value. Link: http://lkml.kernel.org/r/20180602103653.18181-2-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Al Viro Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/signal.h | 2 +- kernel/signal.c | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux/sched') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 113d1ad1ced7..e138ac16c650 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -314,7 +314,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey); int force_sig_ptrace_errno_trap(int errno, void __user *addr); extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sigsegv(int, struct task_struct *); +extern void force_sigsegv(int sig, struct task_struct *p); extern int force_sig_info(int, struct siginfo *, struct task_struct *); extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); diff --git a/kernel/signal.c b/kernel/signal.c index 8d8a940422a8..8a828baa0f93 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1458,8 +1458,7 @@ send_sig(int sig, struct task_struct *p, int priv) return send_sig_info(sig, __si_special(priv), p); } -void -force_sig(int sig, struct task_struct *p) +void force_sig(int sig, struct task_struct *p) { force_sig_info(sig, SEND_SIG_PRIV, p); } @@ -1470,8 +1469,7 @@ force_sig(int sig, struct task_struct *p) * the problem was already a SIGSEGV, we'll want to * make sure we don't even try to deliver the signal.. */ -int -force_sigsegv(int sig, struct task_struct *p) +void force_sigsegv(int sig, struct task_struct *p) { if (sig == SIGSEGV) { unsigned long flags; @@ -1480,7 +1478,6 @@ force_sigsegv(int sig, struct task_struct *p) spin_unlock_irqrestore(&p->sighand->siglock, flags); } force_sig(SIGSEGV, p); - return 0; } int force_sig_fault(int sig, int code, void __user *addr -- cgit v1.2.3