summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/hashtab.c23
-rw-r--r--kernel/bpf/sockmap.c11
-rw-r--r--kernel/cgroup/cgroup-internal.h26
-rw-r--r--kernel/cgroup/cgroup-v1.c4
-rw-r--r--kernel/cgroup/cgroup.c12
-rw-r--r--kernel/crash_core.c8
-rw-r--r--kernel/events/callchain.c2
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/fork.c81
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/hung_task.c15
-rw-r--r--kernel/kallsyms.c51
-rw-r--r--kernel/memremap.c3
-rw-r--r--kernel/module.c32
-rw-r--r--kernel/pid.c42
-rw-r--r--kernel/power/Kconfig1
-rw-r--r--kernel/printk/printk.c19
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/idle.c2
-rw-r--r--kernel/sched/wait.c2
-rw-r--r--kernel/signal.c268
-rw-r--r--kernel/sys.c95
-rw-r--r--kernel/sysctl.c37
-rw-r--r--kernel/time/itimer.c5
-rw-r--r--kernel/time/posix-cpu-timers.c2
-rw-r--r--kernel/time/posix-timers.c21
-rw-r--r--kernel/trace/Kconfig12
-rw-r--r--kernel/trace/Makefile5
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/trace_kprobe.c9
-rw-r--r--kernel/tracepoint.c49
-rw-r--r--kernel/user.c11
-rw-r--r--kernel/user_namespace.c24
-rw-r--r--kernel/utsname_sysctl.c41
-rw-r--r--kernel/workqueue.c45
36 files changed, 613 insertions, 367 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 04b8eda94e7d..03cc59ee9c95 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -15,6 +15,7 @@
#include <linux/jhash.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
+#include <linux/random.h>
#include <uapi/linux/btf.h>
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
@@ -41,6 +42,7 @@ struct bpf_htab {
atomic_t count; /* number of elements in this hashtable */
u32 n_buckets; /* number of hash buckets */
u32 elem_size; /* size of each element in bytes */
+ u32 hashrnd;
};
/* each htab element is struct htab_elem + key + value */
@@ -371,6 +373,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
if (!htab->buckets)
goto free_htab;
+ htab->hashrnd = get_random_int();
for (i = 0; i < htab->n_buckets; i++) {
INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
raw_spin_lock_init(&htab->buckets[i].lock);
@@ -402,9 +405,9 @@ free_htab:
return ERR_PTR(err);
}
-static inline u32 htab_map_hash(const void *key, u32 key_len)
+static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
{
- return jhash(key, key_len, 0);
+ return jhash(key, key_len, hashrnd);
}
static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
@@ -470,7 +473,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
head = select_bucket(htab, hash);
@@ -597,7 +600,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
if (!key)
goto find_first_elem;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
head = select_bucket(htab, hash);
@@ -824,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -880,7 +883,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -945,7 +948,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -998,7 +1001,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -1071,7 +1074,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
@@ -1103,7 +1106,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
key_size = map->key_size;
- hash = htab_map_hash(key, key_size);
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
b = __select_bucket(htab, hash);
head = &b->head;
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 98e621a29e8e..cf5195c7c331 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1427,12 +1427,15 @@ out:
static void smap_write_space(struct sock *sk)
{
struct smap_psock *psock;
+ void (*write_space)(struct sock *sk);
rcu_read_lock();
psock = smap_psock_sk(sk);
if (likely(psock && test_bit(SMAP_TX_RUNNING, &psock->state)))
schedule_work(&psock->tx_work);
+ write_space = psock->save_write_space;
rcu_read_unlock();
+ write_space(sk);
}
static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
@@ -2140,7 +2143,9 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
return ERR_PTR(-EPERM);
/* check sanity of attributes */
- if (attr->max_entries == 0 || attr->value_size != 4 ||
+ if (attr->max_entries == 0 ||
+ attr->key_size == 0 ||
+ attr->value_size != 4 ||
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
@@ -2267,8 +2272,10 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
}
l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
htab->map.numa_node);
- if (!l_new)
+ if (!l_new) {
+ atomic_dec(&htab->count);
return ERR_PTR(-ENOMEM);
+ }
memcpy(l_new->key, key, key_size);
l_new->sk = sk;
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 77ff1cd6a252..75568fcf2180 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -8,6 +8,32 @@
#include <linux/list.h>
#include <linux/refcount.h>
+#define TRACE_CGROUP_PATH_LEN 1024
+extern spinlock_t trace_cgroup_path_lock;
+extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
+
+/*
+ * cgroup_path() takes a spin lock. It is good practice not to take
+ * spin locks within trace point handlers, as they are mostly hidden
+ * from normal view. As cgroup_path() can take the kernfs_rename_lock
+ * spin lock, it is best to not call that function from the trace event
+ * handler.
+ *
+ * Note: trace_cgroup_##type##_enabled() is a static branch that will only
+ * be set when the trace event is enabled.
+ */
+#define TRACE_CGROUP_PATH(type, cgrp, ...) \
+ do { \
+ if (trace_cgroup_##type##_enabled()) { \
+ spin_lock(&trace_cgroup_path_lock); \
+ cgroup_path(cgrp, trace_cgroup_path, \
+ TRACE_CGROUP_PATH_LEN); \
+ trace_cgroup_##type(cgrp, trace_cgroup_path, \
+ ##__VA_ARGS__); \
+ spin_unlock(&trace_cgroup_path_lock); \
+ } \
+ } while (0)
+
/*
* A cgroup can be associated with multiple css_sets as different tasks may
* belong to different cgroups on different hierarchies. In the other
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 8b4f0768efd6..51063e7a93c2 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -135,7 +135,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
if (task) {
ret = cgroup_migrate(task, false, &mgctx);
if (!ret)
- trace_cgroup_transfer_tasks(to, task, false);
+ TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
put_task_struct(task);
}
} while (task && !ret);
@@ -865,7 +865,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
ret = kernfs_rename(kn, new_parent, new_name_str);
if (!ret)
- trace_cgroup_rename(cgrp);
+ TRACE_CGROUP_PATH(rename, cgrp);
mutex_unlock(&cgroup_mutex);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 35cf3d71f8aa..aae10baf1902 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -83,6 +83,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex);
EXPORT_SYMBOL_GPL(css_set_lock);
#endif
+DEFINE_SPINLOCK(trace_cgroup_path_lock);
+char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
+
/*
* Protects cgroup_idr and css_idr so that IDs can be released without
* grabbing cgroup_mutex.
@@ -2638,7 +2641,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
cgroup_migrate_finish(&mgctx);
if (!ret)
- trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+ TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
return ret;
}
@@ -4636,7 +4639,7 @@ static void css_release_work_fn(struct work_struct *work)
struct cgroup *tcgrp;
/* cgroup release path */
- trace_cgroup_release(cgrp);
+ TRACE_CGROUP_PATH(release, cgrp);
if (cgroup_on_dfl(cgrp))
cgroup_rstat_flush(cgrp);
@@ -4979,7 +4982,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
if (ret)
goto out_destroy;
- trace_cgroup_mkdir(cgrp);
+ TRACE_CGROUP_PATH(mkdir, cgrp);
/* let's create and online css's */
kernfs_activate(kn);
@@ -5167,9 +5170,8 @@ int cgroup_rmdir(struct kernfs_node *kn)
return 0;
ret = cgroup_destroy_locked(cgrp);
-
if (!ret)
- trace_cgroup_rmdir(cgrp);
+ TRACE_CGROUP_PATH(rmdir, cgrp);
cgroup_kn_unlock(kn);
return ret;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b66aced5e8c2..933cb3e45b98 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -14,8 +14,8 @@
#include <asm/sections.h>
/* vmcoreinfo stuff */
-static unsigned char *vmcoreinfo_data;
-static size_t vmcoreinfo_size;
+unsigned char *vmcoreinfo_data;
+size_t vmcoreinfo_size;
u32 *vmcoreinfo_note;
/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
@@ -344,7 +344,7 @@ void crash_save_vmcoreinfo(void)
if (vmcoreinfo_data_safecopy)
vmcoreinfo_data = vmcoreinfo_data_safecopy;
- vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
+ vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds());
update_vmcoreinfo_note();
}
@@ -401,7 +401,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_SYMBOL(init_uts_ns);
VMCOREINFO_SYMBOL(node_online_map);
#ifdef CONFIG_MMU
- VMCOREINFO_SYMBOL(swapper_pg_dir);
+ VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir);
#endif
VMCOREINFO_SYMBOL(_stext);
VMCOREINFO_SYMBOL(vmap_area_list);
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index c187aa3df3c8..24a77c34e9ad 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -4,7 +4,7 @@
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
- * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
*/
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80f456ec5d89..2a62b96600ad 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1334,7 +1334,7 @@ static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p,
static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
{
- return perf_event_pid_type(event, p, __PIDTYPE_TGID);
+ return perf_event_pid_type(event, p, PIDTYPE_TGID);
}
static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
diff --git a/kernel/exit.c b/kernel/exit.c
index c3c7ac560114..0e21e6d21f35 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -73,6 +73,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
nr_threads--;
detach_pid(p, PIDTYPE_PID);
if (group_dead) {
+ detach_pid(p, PIDTYPE_TGID);
detach_pid(p, PIDTYPE_PGID);
detach_pid(p, PIDTYPE_SID);
@@ -680,7 +681,8 @@ static void forget_original_parent(struct task_struct *father,
t->parent = t->real_parent;
if (t->pdeath_signal)
group_send_sig_info(t->pdeath_signal,
- SEND_SIG_NOINFO, t);
+ SEND_SIG_NOINFO, t,
+ PIDTYPE_TGID);
}
/*
* If this is a threaded reparent there is no need to
@@ -1001,14 +1003,6 @@ struct wait_opts {
int notask_error;
};
-static inline
-struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
-{
- if (type != PIDTYPE_PID)
- task = task->group_leader;
- return task->pids[type].pid;
-}
-
static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
{
return wo->wo_type == PIDTYPE_MAX ||
diff --git a/kernel/fork.c b/kernel/fork.c
index 5ee74c113381..d896e9ca38b0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -310,8 +310,9 @@ static struct kmem_cache *mm_cachep;
struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
{
- struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ struct vm_area_struct *vma;
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (vma)
vma_init(vma, mm);
return vma;
@@ -1301,6 +1302,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
tsk->nvcsw = tsk->nivcsw = 0;
#ifdef CONFIG_DETECT_HUNG_TASK
tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+ tsk->last_switch_time = 0;
#endif
tsk->mm = NULL;
@@ -1425,7 +1427,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
return -ENOMEM;
atomic_set(&sig->count, 1);
+ spin_lock_irq(&current->sighand->siglock);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+ spin_unlock_irq(&current->sighand->siglock);
return 0;
}
@@ -1487,6 +1491,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
init_waitqueue_head(&sig->wait_chldexit);
sig->curr_target = tsk;
init_sigpending(&sig->shared_pending);
+ INIT_HLIST_HEAD(&sig->multiprocess);
seqlock_init(&sig->stats_lock);
prev_cputime_init(&sig->prev_cputime);
@@ -1580,10 +1585,22 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
#endif
+static inline void init_task_pid_links(struct task_struct *task)
+{
+ enum pid_type type;
+
+ for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
+ INIT_HLIST_NODE(&task->pid_links[type]);
+ }
+}
+
static inline void
init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
{
- task->pids[type].pid = pid;
+ if (type == PIDTYPE_PID)
+ task->thread_pid = pid;
+ else
+ task->signal->pids[type] = pid;
}
static inline void rcu_copy_process(struct task_struct *p)
@@ -1621,6 +1638,7 @@ static __latent_entropy struct task_struct *copy_process(
{
int retval;
struct task_struct *p;
+ struct multiprocess_signals delayed;
/*
* Don't allow sharing the root directory with processes in a different
@@ -1668,6 +1686,24 @@ static __latent_entropy struct task_struct *copy_process(
return ERR_PTR(-EINVAL);
}
+ /*
+ * Force any signals received before this point to be delivered
+ * before the fork happens. Collect up signals sent to multiple
+ * processes that happen during the fork and delay them so that
+ * they appear to happen after the fork.
+ */
+ sigemptyset(&delayed.signal);
+ INIT_HLIST_NODE(&delayed.node);
+
+ spin_lock_irq(&current->sighand->siglock);
+ if (!(clone_flags & CLONE_THREAD))
+ hlist_add_head(&delayed.node, &current->signal->multiprocess);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+ retval = -ERESTARTNOINTR;
+ if (signal_pending(current))
+ goto fork_out;
+
retval = -ENOMEM;
p = dup_task_struct(current, node);
if (!p)
@@ -1941,29 +1977,26 @@ static __latent_entropy struct task_struct *copy_process(
rseq_fork(p, clone_flags);
- /*
- * Process group and session signals need to be delivered to just the
- * parent before the fork or both the parent and the child after the
- * fork. Restart if a signal comes in before we add the new process to
- * it's process group.
- * A fatal signal pending means that current will exit, so the new
- * thread can't slip out of an OOM kill (or normal SIGKILL).
- */
- recalc_sigpending();
- if (signal_pending(current)) {
- retval = -ERESTARTNOINTR;
- goto bad_fork_cancel_cgroup;
- }
+ /* Don't start children in a dying pid namespace */
if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
retval = -ENOMEM;
goto bad_fork_cancel_cgroup;
}
+ /* Let kill terminate clone/fork in the middle */
+ if (fatal_signal_pending(current)) {
+ retval = -EINTR;
+ goto bad_fork_cancel_cgroup;
+ }
+
+
+ init_task_pid_links(p);
if (likely(p->pid)) {
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
init_task_pid(p, PIDTYPE_PID, pid);
if (thread_group_leader(p)) {
+ init_task_pid(p, PIDTYPE_TGID, pid);
init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
init_task_pid(p, PIDTYPE_SID, task_session(current));
@@ -1971,8 +2004,7 @@ static __latent_entropy struct task_struct *copy_process(
ns_of_pid(pid)->child_reaper = p;
p->signal->flags |= SIGNAL_UNKILLABLE;
}
-
- p->signal->leader_pid = pid;
+ p->signal->shared_pending.signal = delayed.signal;
p->signal->tty = tty_kref_get(current->signal->tty);
/*
* Inherit has_child_subreaper flag under the same
@@ -1983,6 +2015,7 @@ static __latent_entropy struct task_struct *copy_process(
p->real_parent->signal->is_child_subreaper;
list_add_tail(&p->sibling, &p->real_parent->children);
list_add_tail_rcu(&p->tasks, &init_task.tasks);
+ attach_pid(p, PIDTYPE_TGID);
attach_pid(p, PIDTYPE_PGID);
attach_pid(p, PIDTYPE_SID);
__this_cpu_inc(process_counts);
@@ -1990,6 +2023,7 @@ static __latent_entropy struct task_struct *copy_process(
current->signal->nr_threads++;
atomic_inc(&current->signal->live);
atomic_inc(&current->signal->sigcnt);
+ task_join_group_stop(p);
list_add_tail_rcu(&p->thread_group,
&p->group_leader->thread_group);
list_add_tail_rcu(&p->thread_node,
@@ -1998,8 +2032,8 @@ static __latent_entropy struct task_struct *copy_process(
attach_pid(p, PIDTYPE_PID);
nr_threads++;
}
-
total_forks++;
+ hlist_del_init(&delayed.node);
spin_unlock(&current->sighand->siglock);
syscall_tracepoint_update(p);
write_unlock_irq(&tasklist_lock);
@@ -2064,16 +2098,19 @@ bad_fork_free:
put_task_stack(p);
free_task(p);
fork_out:
+ spin_lock_irq(&current->sighand->siglock);
+ hlist_del_init(&delayed.node);
+ spin_unlock_irq(&current->sighand->siglock);
return ERR_PTR(retval);
}
-static inline void init_idle_pids(struct pid_link *links)
+static inline void init_idle_pids(struct task_struct *idle)
{
enum pid_type type;
for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
- INIT_HLIST_NODE(&links[type].node); /* not really needed */
- links[type].pid = &init_struct_pid;
+ INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
+ init_task_pid(idle, type, &init_struct_pid);
}
}
@@ -2083,7 +2120,7 @@ struct task_struct *fork_idle(int cpu)
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
cpu_to_node(cpu));
if (!IS_ERR(task)) {
- init_idle_pids(task->pids);
+ init_idle_pids(task);
init_idle(task, cpu);
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 1f450e092c74..11fc3bb456d6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3523,10 +3523,12 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
switch (cmd) {
case FUTEX_WAIT:
val3 = FUTEX_BITSET_MATCH_ANY;
+ /* fall through */
case FUTEX_WAIT_BITSET:
return futex_wait(uaddr, flags, val, timeout, val3);
case FUTEX_WAKE:
val3 = FUTEX_BITSET_MATCH_ANY;
+ /* fall through */
case FUTEX_WAKE_BITSET:
return futex_wake(uaddr, flags, val, val3);
case FUTEX_REQUEUE:
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 32b479468e4d..b9132d1269ef 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -40,6 +40,11 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
*/
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
+/*
+ * Zero (default value) means use sysctl_hung_task_timeout_secs:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
+
int __read_mostly sysctl_hung_task_warnings = 10;
static int __read_mostly did_panic;
@@ -98,8 +103,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
if (switch_count != t->last_switch_count) {
t->last_switch_count = switch_count;
+ t->last_switch_time = jiffies;
return;
}
+ if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+ return;
trace_sched_process_hang(t);
@@ -245,8 +253,13 @@ static int watchdog(void *dummy)
for ( ; ; ) {
unsigned long timeout = sysctl_hung_task_timeout_secs;
- long t = hung_timeout_jiffies(hung_last_checked, timeout);
+ unsigned long interval = sysctl_hung_task_check_interval_secs;
+ long t;
+ if (interval == 0)
+ interval = timeout;
+ interval = min_t(unsigned long, interval, timeout);
+ t = hung_timeout_jiffies(hung_last_checked, interval);
if (t <= 0) {
if (!atomic_xchg(&reset_hung_task, 0))
check_hung_uninterruptible_tasks(timeout);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index a23e21ada81b..02a0b01380d8 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -432,6 +432,7 @@ int sprint_backtrace(char *buffer, unsigned long address)
/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
struct kallsym_iter {
loff_t pos;
+ loff_t pos_arch_end;
loff_t pos_mod_end;
loff_t pos_ftrace_mod_end;
unsigned long value;
@@ -443,9 +444,29 @@ struct kallsym_iter {
int show_value;
};
+int __weak arch_get_kallsym(unsigned int symnum, unsigned long *value,
+ char *type, char *name)
+{
+ return -EINVAL;
+}
+
+static int get_ksymbol_arch(struct kallsym_iter *iter)
+{
+ int ret = arch_get_kallsym(iter->pos - kallsyms_num_syms,
+ &iter->value, &iter->type,
+ iter->name);
+
+ if (ret < 0) {
+ iter->pos_arch_end = iter->pos;
+ return 0;
+ }
+
+ return 1;
+}
+
static int get_ksymbol_mod(struct kallsym_iter *iter)
{
- int ret = module_get_kallsym(iter->pos - kallsyms_num_syms,
+ int ret = module_get_kallsym(iter->pos - iter->pos_arch_end,
&iter->value, &iter->type,
iter->name, iter->module_name,
&iter->exported);
@@ -501,32 +522,34 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
iter->nameoff = get_symbol_offset(new_pos);
iter->pos = new_pos;
if (new_pos == 0) {
+ iter->pos_arch_end = 0;
iter->pos_mod_end = 0;
iter->pos_ftrace_mod_end = 0;
}
}
+/*
+ * The end position (last + 1) of each additional kallsyms section is recorded
+ * in iter->pos_..._end as each section is added, and so can be used to
+ * determine which get_ksymbol_...() function to call next.
+ */
static int update_iter_mod(struct kallsym_iter *iter, loff_t pos)
{
iter->pos = pos;
- if (iter->pos_ftrace_mod_end > 0 &&
- iter->pos_ftrace_mod_end < iter->pos)
- return get_ksymbol_bpf(iter);
+ if ((!iter->pos_arch_end || iter->pos_arch_end > pos) &&
+ get_ksymbol_arch(iter))
+ return 1;
- if (iter->pos_mod_end > 0 &&
- iter->pos_mod_end < iter->pos) {
- if (!get_ksymbol_ftrace_mod(iter))
- return get_ksymbol_bpf(iter);
+ if ((!iter->pos_mod_end || iter->pos_mod_end > pos) &&
+ get_ksymbol_mod(iter))
return 1;
- }
- if (!get_ksymbol_mod(iter)) {
- if (!get_ksymbol_ftrace_mod(iter))
- return get_ksymbol_bpf(iter);
- }
+ if ((!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > pos) &&
+ get_ksymbol_ftrace_mod(iter))
+ return 1;
- return 1;
+ return get_ksymbol_bpf(iter);
}
/* Returns false if pos at or past end of file. */
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 1f87ea6b6545..5b8600d39931 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -43,7 +43,7 @@ static unsigned long order_at(struct resource *res, unsigned long pgoff)
pgoff += 1UL << order, order = order_at((res), pgoff))
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
-int device_private_entry_fault(struct vm_area_struct *vma,
+vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
unsigned long addr,
swp_entry_t entry,
unsigned int flags,
@@ -365,7 +365,6 @@ void __put_devmap_managed_page(struct page *page)
__ClearPageActive(page);
__ClearPageWaiters(page);
- page->mapping = NULL;
mem_cgroup_uncharge(page);
page->pgmap->page_free(page, page->pgmap->data);
diff --git a/kernel/module.c b/kernel/module.c
index b046a32520d8..6746c85511fe 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -529,12 +529,30 @@ static bool check_symbol(const struct symsearch *syms,
return true;
}
+static unsigned long kernel_symbol_value(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ return (unsigned long)offset_to_ptr(&sym->value_offset);
+#else
+ return sym->value;
+#endif
+}
+
+static const char *kernel_symbol_name(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ return offset_to_ptr(&sym->name_offset);
+#else
+ return sym->name;
+#endif
+}
+
static int cmp_name(const void *va, const void *vb)
{
const char *a;
const struct kernel_symbol *b;
a = va; b = vb;
- return strcmp(a, b->name);
+ return strcmp(a, kernel_symbol_name(b));
}
static bool find_symbol_in_section(const struct symsearch *syms,
@@ -2170,7 +2188,7 @@ void *__symbol_get(const char *symbol)
sym = NULL;
preempt_enable();
- return sym ? (void *)sym->value : NULL;
+ return sym ? (void *)kernel_symbol_value(sym) : NULL;
}
EXPORT_SYMBOL_GPL(__symbol_get);
@@ -2200,10 +2218,12 @@ static int verify_export_symbols(struct module *mod)
for (i = 0; i < ARRAY_SIZE(arr); i++) {
for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
- if (find_symbol(s->name, &owner, NULL, true, false)) {
+ if (find_symbol(kernel_symbol_name(s), &owner, NULL,
+ true, false)) {
pr_err("%s: exports duplicate symbol %s"
" (owned by %s)\n",
- mod->name, s->name, module_name(owner));
+ mod->name, kernel_symbol_name(s),
+ module_name(owner));
return -ENOEXEC;
}
}
@@ -2252,7 +2272,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
ksym = resolve_symbol_wait(mod, info, name);
/* Ok if resolved. */
if (ksym && !IS_ERR(ksym)) {
- sym[i].st_value = ksym->value;
+ sym[i].st_value = kernel_symbol_value(ksym);
break;
}
@@ -2516,7 +2536,7 @@ static int is_exported(const char *name, unsigned long value,
ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab);
else
ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms);
- return ks != NULL && ks->value == value;
+ return ks != NULL && kernel_symbol_value(ks) == value;
}
/* As per nm */
diff --git a/kernel/pid.c b/kernel/pid.c
index 157fe4b19971..de1cfc4f75a2 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -265,27 +265,33 @@ struct pid *find_vpid(int nr)
}
EXPORT_SYMBOL_GPL(find_vpid);
+static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
+{
+ return (type == PIDTYPE_PID) ?
+ &task->thread_pid :
+ &task->signal->pids[type];
+}
+
/*
* attach_pid() must be called with the tasklist_lock write-held.
*/
void attach_pid(struct task_struct *task, enum pid_type type)
{
- struct pid_link *link = &task->pids[type];
- hlist_add_head_rcu(&link->node, &link->pid->tasks[type]);
+ struct pid *pid = *task_pid_ptr(task, type);
+ hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
}
static void __change_pid(struct task_struct *task, enum pid_type type,
struct pid *new)
{
- struct pid_link *link;
+ struct pid **pid_ptr = task_pid_ptr(task, type);
struct pid *pid;
int tmp;
- link = &task->pids[type];
- pid = link->pid;
+ pid = *pid_ptr;
- hlist_del_rcu(&link->node);
- link->pid = new;
+ hlist_del_rcu(&task->pid_links[type]);
+ *pid_ptr = new;
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
if (!hlist_empty(&pid->tasks[tmp]))
@@ -310,8 +316,9 @@ void change_pid(struct task_struct *task, enum pid_type type,
void transfer_pid(struct task_struct *old, struct task_struct *new,
enum pid_type type)
{
- new->pids[type].pid = old->pids[type].pid;
- hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
+ if (type == PIDTYPE_PID)
+ new->thread_pid = old->thread_pid;
+ hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
}
struct task_struct *pid_task(struct pid *pid, enum pid_type type)
@@ -322,7 +329,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
lockdep_tasklist_lock_is_held());
if (first)
- result = hlist_entry(first, struct task_struct, pids[(type)].node);
+ result = hlist_entry(first, struct task_struct, pid_links[(type)]);
}
return result;
}
@@ -360,9 +367,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
{
struct pid *pid;
rcu_read_lock();
- if (type != PIDTYPE_PID)
- task = task->group_leader;
- pid = get_pid(rcu_dereference(task->pids[type].pid));
+ pid = get_pid(rcu_dereference(*task_pid_ptr(task, type)));
rcu_read_unlock();
return pid;
}
@@ -420,15 +425,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
rcu_read_lock();
if (!ns)
ns = task_active_pid_ns(current);
- if (likely(pid_alive(task))) {
- if (type != PIDTYPE_PID) {
- if (type == __PIDTYPE_TGID)
- type = PIDTYPE_PID;
-
- task = task->group_leader;
- }
- nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
- }
+ if (likely(pid_alive(task)))
+ nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
rcu_read_unlock();
return nr;
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index e880ca22c5a5..3a6c2f87699e 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -105,6 +105,7 @@ config PM_SLEEP
def_bool y
depends on SUSPEND || HIBERNATE_CALLBACKS
select PM
+ select SRCU
config PM_SLEEP_SMP
def_bool y
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 90b6ab01db59..924e37fb1620 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -66,6 +66,9 @@ int console_printk[4] = {
CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */
};
+atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
+EXPORT_SYMBOL(ignore_console_lock_warning);
+
/*
* Low level drivers may need that to know if they can schedule in
* their unblank() callback or not. So let's export it.
@@ -2788,7 +2791,8 @@ EXPORT_SYMBOL(unregister_console);
void __init console_init(void)
{
int ret;
- initcall_t *call;
+ initcall_t call;
+ initcall_entry_t *ce;
/* Setup the default TTY line discipline. */
n_tty_init();
@@ -2797,13 +2801,14 @@ void __init console_init(void)
* set up the console device so that later boot sequences can
* inform about problems etc..
*/
- call = __con_initcall_start;
+ ce = __con_initcall_start;
trace_initcall_level("console");
- while (call < __con_initcall_end) {
- trace_initcall_start((*call));
- ret = (*call)();
- trace_initcall_finish((*call), ret);
- call++;
+ while (ce < __con_initcall_end) {
+ call = initcall_from_entry(ce);
+ trace_initcall_start(call);
+ ret = call();
+ trace_initcall_finish(call, ret);
+ ce++;
}
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c89302ddf807..625bc9897f62 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2774,6 +2774,8 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
if (current->set_child_tid)
put_user(task_pid_vnr(current), current->set_child_tid);
+
+ calculate_sigpending();
}
/*
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 1a3e9bddd17b..16f84142f2f4 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -190,7 +190,7 @@ static void cpuidle_idle_call(void)
*/
next_state = cpuidle_select(drv, dev, &stop_tick);
- if (stop_tick)
+ if (stop_tick || tick_nohz_tick_stopped())
tick_nohz_idle_stop_tick();
else
tick_nohz_idle_retain_tick();
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 870f97b313e3..5dd47f1103d1 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -69,6 +69,8 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
wait_queue_entry_t *curr, *next;
int cnt = 0;
+ lockdep_assert_held(&wq_head->lock);
+
if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {
curr = list_next_entry(bookmark, entry);
diff --git a/kernel/signal.c b/kernel/signal.c
index 8d8a940422a8..5843c541fda9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -65,14 +65,14 @@ static void __user *sig_handler(struct task_struct *t, int sig)
return t->sighand->action[sig - 1].sa.sa_handler;
}
-static int sig_handler_ignored(void __user *handler, int sig)
+static inline bool sig_handler_ignored(void __user *handler, int sig)
{
/* Is it explicitly or implicitly ignored? */
return handler == SIG_IGN ||
- (handler == SIG_DFL && sig_kernel_ignore(sig));
+ (handler == SIG_DFL && sig_kernel_ignore(sig));
}
-static int sig_task_ignored(struct task_struct *t, int sig, bool force)
+static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
{
void __user *handler;
@@ -80,12 +80,12 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force)
if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
handler == SIG_DFL && !(force && sig_kernel_only(sig)))
- return 1;
+ return true;
return sig_handler_ignored(handler, sig);
}
-static int sig_ignored(struct task_struct *t, int sig, bool force)
+static bool sig_ignored(struct task_struct *t, int sig, bool force)
{
/*
* Blocked signals are never ignored, since the
@@ -93,7 +93,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force)
* unblocked.
*/
if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
- return 0;
+ return false;
/*
* Tracers may want to know about even ignored signal unless it
@@ -101,7 +101,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force)
* by SIGNAL_UNKILLABLE task.
*/
if (t->ptrace && sig != SIGKILL)
- return 0;
+ return false;
return sig_task_ignored(t, sig, force);
}
@@ -110,7 +110,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force)
* Re-calculate pending state from the set of locally pending
* signals, globally pending signals, and blocked signals.
*/
-static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
+static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
{
unsigned long ready;
long i;
@@ -138,20 +138,21 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
-static int recalc_sigpending_tsk(struct task_struct *t)
+static bool recalc_sigpending_tsk(struct task_struct *t)
{
if ((t->jobctl & JOBCTL_PENDING_MASK) ||
PENDING(&t->pending, &t->blocked) ||
PENDING(&t->signal->shared_pending, &t->blocked)) {
set_tsk_thread_flag(t, TIF_SIGPENDING);
- return 1;
+ return true;
}
+
/*
* We must never clear the flag in another thread, or in current
* when it's possible the current syscall is returning -ERESTART*.
* So we don't clear it here, and only callers who know they should do.
*/
- return 0;
+ return false;
}
/*
@@ -172,6 +173,17 @@ void recalc_sigpending(void)
}
+void calculate_sigpending(void)
+{
+ /* Have any signals or users of TIF_SIGPENDING been delayed
+ * until after fork?
+ */
+ spin_lock_irq(&current->sighand->siglock);
+ set_tsk_thread_flag(current, TIF_SIGPENDING);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+}
+
/* Given the mask, find the first available signal that should be serviced. */
#define SYNCHRONOUS_MASK \
@@ -362,6 +374,20 @@ static bool task_participate_group_stop(struct task_struct *task)
return false;
}
+void task_join_group_stop(struct task_struct *task)
+{
+ /* Have the new thread join an on-going signal group stop */
+ unsigned long jobctl = current->jobctl;
+ if (jobctl & JOBCTL_STOP_PENDING) {
+ struct signal_struct *sig = current->signal;
+ unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
+ unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
+ if (task_set_jobctl_pending(task, signr | gstop)) {
+ sig->group_stop_count++;
+ }
+ }
+}
+
/*
* allocate a new signal queue record
* - this may be called without locks if and only if t == current, otherwise an
@@ -504,13 +530,15 @@ flush_signal_handlers(struct task_struct *t, int force_default)
}
}
-int unhandled_signal(struct task_struct *tsk, int sig)
+bool unhandled_signal(struct task_struct *tsk, int sig)
{
void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
if (is_global_init(tsk))
- return 1;
+ return true;
+
if (handler != SIG_IGN && handler != SIG_DFL)
- return 0;
+ return false;
+
/* if ptraced, let the tracer determine */
return !tsk->ptrace;
}
@@ -684,14 +712,14 @@ void signal_wake_up_state(struct task_struct *t, unsigned int state)
*
* All callers must be holding the siglock.
*/
-static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
+static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
{
struct sigqueue *q, *n;
sigset_t m;
sigandsets(&m, mask, &s->signal);
if (sigisemptyset(&m))
- return 0;
+ return;
sigandnsets(&s->signal, &s->signal, mask);
list_for_each_entry_safe(q, n, &s->list, list) {
@@ -700,7 +728,6 @@ static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
__sigqueue_free(q);
}
}
- return 1;
}
static inline int is_si_special(const struct siginfo *info)
@@ -717,21 +744,16 @@ static inline bool si_fromuser(const struct siginfo *info)
/*
* called with RCU read lock from check_kill_permission()
*/
-static int kill_ok_by_cred(struct task_struct *t)
+static bool kill_ok_by_cred(struct task_struct *t)
{
const struct cred *cred = current_cred();
const struct cred *tcred = __task_cred(t);
- if (uid_eq(cred->euid, tcred->suid) ||
- uid_eq(cred->euid, tcred->uid) ||
- uid_eq(cred->uid, tcred->suid) ||
- uid_eq(cred->uid, tcred->uid))
- return 1;
-
- if (ns_capable(tcred->user_ns, CAP_KILL))
- return 1;
-
- return 0;
+ return uid_eq(cred->euid, tcred->suid) ||
+ uid_eq(cred->euid, tcred->uid) ||
+ uid_eq(cred->uid, tcred->suid) ||
+ uid_eq(cred->uid, tcred->uid) ||
+ ns_capable(tcred->user_ns, CAP_KILL);
}
/*
@@ -882,20 +904,24 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
* as soon as they're available, so putting the signal on the shared queue
* will be equivalent to sending it to one such thread.
*/
-static inline int wants_signal(int sig, struct task_struct *p)
+static inline bool wants_signal(int sig, struct task_struct *p)
{
if (sigismember(&p->blocked, sig))
- return 0;
+ return false;
+
if (p->flags & PF_EXITING)
- return 0;
+ return false;
+
if (sig == SIGKILL)
- return 1;
+ return true;
+
if (task_is_stopped_or_traced(p))
- return 0;
+ return false;
+
return task_curr(p) || !signal_pending(p);
}
-static void complete_signal(int sig, struct task_struct *p, int group)
+static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
{
struct signal_struct *signal = p->signal;
struct task_struct *t;
@@ -908,7 +934,7 @@ static void complete_signal(int sig, struct task_struct *p, int group)
*/
if (wants_signal(sig, p))
t = p;
- else if (!group || thread_group_empty(p))
+ else if ((type == PIDTYPE_PID) || thread_group_empty(p))
/*
* There is just one thread and it does not need to be woken.
* It will dequeue unblocked signals before it runs again.
@@ -971,7 +997,7 @@ static void complete_signal(int sig, struct task_struct *p, int group)
return;
}
-static inline int legacy_queue(struct sigpending *signals, int sig)
+static inline bool legacy_queue(struct sigpending *signals, int sig)
{
return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}
@@ -998,7 +1024,7 @@ static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_str
#endif
static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
- int group, int from_ancestor_ns)
+ enum pid_type type, int from_ancestor_ns)
{
struct sigpending *pending;
struct sigqueue *q;
@@ -1012,7 +1038,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
from_ancestor_ns || (info == SEND_SIG_FORCED)))
goto ret;
- pending = group ? &t->signal->shared_pending : &t->pending;
+ pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
/*
* Short-circuit ignored signals and support queuing
* exactly one non-rt signal, so that we can get more
@@ -1096,14 +1122,29 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
out_set:
signalfd_notify(t, sig);
sigaddset(&pending->signal, sig);
- complete_signal(sig, t, group);
+
+ /* Let multiprocess signals appear after on-going forks */
+ if (type > PIDTYPE_TGID) {
+ struct multiprocess_signals *delayed;
+ hlist_for_each_entry(delayed, &t->signal->multiprocess, node) {
+ sigset_t *signal = &delayed->signal;
+ /* Can't queue both a stop and a continue signal */
+ if (sig == SIGCONT)
+ sigdelsetmask(signal, SIG_KERNEL_STOP_MASK);
+ else if (sig_kernel_stop(sig))
+ sigdelset(signal, SIGCONT);
+ sigaddset(signal, sig);
+ }
+ }
+
+ complete_signal(sig, t, type);
ret:
- trace_signal_generate(sig, info, t, group, result);
+ trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result);
return ret;
}
static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
- int group)
+ enum pid_type type)
{
int from_ancestor_ns = 0;
@@ -1112,7 +1153,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
!task_pid_nr_ns(current, task_active_pid_ns(t));
#endif
- return __send_signal(sig, info, t, group, from_ancestor_ns);
+ return __send_signal(sig, info, t, type, from_ancestor_ns);
}
static void print_fatal_signal(int signr)
@@ -1151,23 +1192,23 @@ __setup("print-fatal-signals=", setup_print_fatal_signals);
int
__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
- return send_signal(sig, info, p, 1);
+ return send_signal(sig, info, p, PIDTYPE_TGID);
}
static int
specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
{
- return send_signal(sig, info, t, 0);
+ return send_signal(sig, info, t, PIDTYPE_PID);
}
int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
- bool group)
+ enum pid_type type)
{
unsigned long flags;
int ret = -ESRCH;
if (lock_task_sighand(p, &flags)) {
- ret = send_signal(sig, info, p, group);
+ ret = send_signal(sig, info, p, type);
unlock_task_sighand(p, &flags);
}
@@ -1274,7 +1315,8 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
/*
* send signal info to all the members of a group
*/
-int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+ enum pid_type type)
{
int ret;
@@ -1283,7 +1325,7 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
rcu_read_unlock();
if (!ret && sig)
- ret = do_send_sig_info(sig, info, p, true);
+ ret = do_send_sig_info(sig, info, p, type);
return ret;
}
@@ -1301,7 +1343,7 @@ int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
success = 0;
retval = -ESRCH;
do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
- int err = group_send_sig_info(sig, info, p);
+ int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID);
success |= !err;
retval = err;
} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
@@ -1317,7 +1359,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
rcu_read_lock();
p = pid_task(pid, PIDTYPE_PID);
if (p)
- error = group_send_sig_info(sig, info, p);
+ error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
rcu_read_unlock();
if (likely(!p || error != -ESRCH))
return error;
@@ -1339,14 +1381,15 @@ static int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
return error;
}
-static int kill_as_cred_perm(const struct cred *cred,
- struct task_struct *target)
+static inline bool kill_as_cred_perm(const struct cred *cred,
+ struct task_struct *target)
{
const struct cred *pcred = __task_cred(target);
- if (!uid_eq(cred->euid, pcred->suid) && !uid_eq(cred->euid, pcred->uid) &&
- !uid_eq(cred->uid, pcred->suid) && !uid_eq(cred->uid, pcred->uid))
- return 0;
- return 1;
+
+ return uid_eq(cred->euid, pcred->suid) ||
+ uid_eq(cred->euid, pcred->uid) ||
+ uid_eq(cred->uid, pcred->suid) ||
+ uid_eq(cred->uid, pcred->uid);
}
/* like kill_pid_info(), but doesn't use uid/euid of "current" */
@@ -1376,7 +1419,7 @@ int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid,
if (sig) {
if (lock_task_sighand(p, &flags)) {
- ret = __send_signal(sig, info, p, 1, 0);
+ ret = __send_signal(sig, info, p, PIDTYPE_TGID, 0);
unlock_task_sighand(p, &flags);
} else
ret = -ESRCH;
@@ -1420,7 +1463,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
for_each_process(p) {
if (task_pid_vnr(p) > 1 &&
!same_thread_group(p, current)) {
- int err = group_send_sig_info(sig, info, p);
+ int err = group_send_sig_info(sig, info, p,
+ PIDTYPE_MAX);
++count;
if (err != -EPERM)
retval = err;
@@ -1446,7 +1490,7 @@ int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
if (!valid_signal(sig))
return -EINVAL;
- return do_send_sig_info(sig, info, p, false);
+ return do_send_sig_info(sig, info, p, PIDTYPE_PID);
}
#define __si_special(priv) \
@@ -1458,8 +1502,7 @@ send_sig(int sig, struct task_struct *p, int priv)
return send_sig_info(sig, __si_special(priv), p);
}
-void
-force_sig(int sig, struct task_struct *p)
+void force_sig(int sig, struct task_struct *p)
{
force_sig_info(sig, SEND_SIG_PRIV, p);
}
@@ -1470,8 +1513,7 @@ force_sig(int sig, struct task_struct *p)
* the problem was already a SIGSEGV, we'll want to
* make sure we don't even try to deliver the signal..
*/
-int
-force_sigsegv(int sig, struct task_struct *p)
+void force_sigsegv(int sig, struct task_struct *p)
{
if (sig == SIGSEGV) {
unsigned long flags;
@@ -1480,7 +1522,6 @@ force_sigsegv(int sig, struct task_struct *p)
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
force_sig(SIGSEGV, p);
- return 0;
}
int force_sig_fault(int sig, int code, void __user *addr
@@ -1664,17 +1705,20 @@ void sigqueue_free(struct sigqueue *q)
__sigqueue_free(q);
}
-int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
+int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
{
int sig = q->info.si_signo;
struct sigpending *pending;
+ struct task_struct *t;
unsigned long flags;
int ret, result;
BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
ret = -1;
- if (!likely(lock_task_sighand(t, &flags)))
+ rcu_read_lock();
+ t = pid_task(pid, type);
+ if (!t || !likely(lock_task_sighand(t, &flags)))
goto ret;
ret = 1; /* the signal is ignored */
@@ -1696,15 +1740,16 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
q->info.si_overrun = 0;
signalfd_notify(t, sig);
- pending = group ? &t->signal->shared_pending : &t->pending;
+ pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
list_add_tail(&q->list, &pending->list);
sigaddset(&pending->signal, sig);
- complete_signal(sig, t, group);
+ complete_signal(sig, t, type);
result = TRACE_SIGNAL_DELIVERED;
out:
- trace_signal_generate(sig, &q->info, t, group, result);
+ trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result);
unlock_task_sighand(t, &flags);
ret:
+ rcu_read_unlock();
return ret;
}
@@ -1877,10 +1922,10 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
spin_unlock_irqrestore(&sighand->siglock, flags);
}
-static inline int may_ptrace_stop(void)
+static inline bool may_ptrace_stop(void)
{
if (!likely(current->ptrace))
- return 0;
+ return false;
/*
* Are we in the middle of do_coredump?
* If so and our tracer is also part of the coredump stopping
@@ -1896,19 +1941,19 @@ static inline int may_ptrace_stop(void)
*/
if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
- return 0;
+ return false;
- return 1;
+ return true;
}
/*
* Return non-zero if there is a SIGKILL that should be waking us up.
* Called with the siglock held.
*/
-static int sigkill_pending(struct task_struct *tsk)
+static bool sigkill_pending(struct task_struct *tsk)
{
- return sigismember(&tsk->pending.signal, SIGKILL) ||
- sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
+ return sigismember(&tsk->pending.signal, SIGKILL) ||
+ sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
}
/*
@@ -2288,7 +2333,7 @@ static int ptrace_signal(int signr, siginfo_t *info)
return signr;
}
-int get_signal(struct ksignal *ksig)
+bool get_signal(struct ksignal *ksig)
{
struct sighand_struct *sighand = current->sighand;
struct signal_struct *signal = current->signal;
@@ -2298,7 +2343,7 @@ int get_signal(struct ksignal *ksig)
task_work_run();
if (unlikely(uprobe_deny_signal()))
- return 0;
+ return false;
/*
* Do this once, we can't return to user-mode if freezing() == T.
@@ -2755,7 +2800,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
}
#endif
-static int do_sigpending(sigset_t *set)
+static void do_sigpending(sigset_t *set)
{
spin_lock_irq(&current->sighand->siglock);
sigorsets(set, &current->pending.signal,
@@ -2764,7 +2809,6 @@ static int do_sigpending(sigset_t *set)
/* Outside the lock because only this thread touches it. */
sigandsets(set, &current->blocked, set);
- return 0;
}
/**
@@ -2776,15 +2820,16 @@ static int do_sigpending(sigset_t *set)
SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
{
sigset_t set;
- int err;
if (sigsetsize > sizeof(*uset))
return -EINVAL;
- err = do_sigpending(&set);
- if (!err && copy_to_user(uset, &set, sigsetsize))
- err = -EFAULT;
- return err;
+ do_sigpending(&set);
+
+ if (copy_to_user(uset, &set, sigsetsize))
+ return -EFAULT;
+
+ return 0;
}
#ifdef CONFIG_COMPAT
@@ -2792,15 +2837,13 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
compat_size_t, sigsetsize)
{
sigset_t set;
- int err;
if (sigsetsize > sizeof(*uset))
return -EINVAL;
- err = do_sigpending(&set);
- if (!err)
- err = put_compat_sigset(uset, &set, sigsetsize);
- return err;
+ do_sigpending(&set);
+
+ return put_compat_sigset(uset, &set, sigsetsize);
}
#endif
@@ -3193,7 +3236,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
* probe. No signal is actually delivered.
*/
if (!error && sig) {
- error = do_send_sig_info(sig, info, p, false);
+ error = do_send_sig_info(sig, info, p, PIDTYPE_PID);
/*
* If lock_task_sighand() failed we pretend the task
* dies after receiving the signal. The window is tiny,
@@ -3562,25 +3605,26 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset)
{
sigset_t set;
- int err;
if (sizeof(old_sigset_t) > sizeof(*uset))
return -EINVAL;
- err = do_sigpending(&set);
- if (!err && copy_to_user(uset, &set, sizeof(old_sigset_t)))
- err = -EFAULT;
- return err;
+ do_sigpending(&set);
+
+ if (copy_to_user(uset, &set, sizeof(old_sigset_t)))
+ return -EFAULT;
+
+ return 0;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32)
{
sigset_t set;
- int err = do_sigpending(&set);
- if (!err)
- err = put_user(set.sig[0], set32);
- return err;
+
+ do_sigpending(&set);
+
+ return put_user(set.sig[0], set32);
}
#endif
@@ -3651,25 +3695,23 @@ SYSCALL_DEFINE4(rt_sigaction, int, sig,
size_t, sigsetsize)
{
struct k_sigaction new_sa, old_sa;
- int ret = -EINVAL;
+ int ret;
/* XXX: Don't preclude handling different sized sigset_t's. */
if (sigsetsize != sizeof(sigset_t))
- goto out;
+ return -EINVAL;
- if (act) {
- if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
- return -EFAULT;
- }
+ if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
+ return -EFAULT;
ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
+ if (ret)
+ return ret;
- if (!ret && oact) {
- if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
- return -EFAULT;
- }
-out:
- return ret;
+ if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
+ return -EFAULT;
+
+ return 0;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
@@ -3960,7 +4002,7 @@ void kdb_send_sig(struct task_struct *t, int sig)
"the deadlock.\n");
return;
}
- ret = send_signal(sig, SEND_SIG_PRIV, t, false);
+ ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID);
spin_unlock(&t->sighand->siglock);
if (ret)
kdb_printf("Fail to deliver Signal %d to process %d.\n",
diff --git a/kernel/sys.c b/kernel/sys.c
index e27b51d3facd..cf5c67533ff1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1237,18 +1237,19 @@ static int override_release(char __user *release, size_t len)
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
{
- int errno = 0;
+ struct new_utsname tmp;
down_read(&uts_sem);
- if (copy_to_user(name, utsname(), sizeof *name))
- errno = -EFAULT;
+ memcpy(&tmp, utsname(), sizeof(tmp));
up_read(&uts_sem);
+ if (copy_to_user(name, &tmp, sizeof(tmp)))
+ return -EFAULT;
- if (!errno && override_release(name->release, sizeof(name->release)))
- errno = -EFAULT;
- if (!errno && override_architecture(name))
- errno = -EFAULT;
- return errno;
+ if (override_release(name->release, sizeof(name->release)))
+ return -EFAULT;
+ if (override_architecture(name))
+ return -EFAULT;
+ return 0;
}
#ifdef __ARCH_WANT_SYS_OLD_UNAME
@@ -1257,55 +1258,46 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
*/
SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
{
- int error = 0;
+ struct old_utsname tmp;
if (!name)
return -EFAULT;
down_read(&uts_sem);
- if (copy_to_user(name, utsname(), sizeof(*name)))
- error = -EFAULT;
+ memcpy(&tmp, utsname(), sizeof(tmp));
up_read(&uts_sem);
+ if (copy_to_user(name, &tmp, sizeof(tmp)))
+ return -EFAULT;
- if (!error && override_release(name->release, sizeof(name->release)))
- error = -EFAULT;
- if (!error && override_architecture(name))
- error = -EFAULT;
- return error;
+ if (override_release(name->release, sizeof(name->release)))
+ return -EFAULT;
+ if (override_architecture(name))
+ return -EFAULT;
+ return 0;
}
SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
{
- int error;
+ struct oldold_utsname tmp = {};
if (!name)
return -EFAULT;
- if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
- return -EFAULT;
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname, &utsname()->sysname,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->nodename, &utsname()->nodename,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->release, &utsname()->release,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->release + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->version, &utsname()->version,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->version + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->machine, &utsname()->machine,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->machine + __OLD_UTS_LEN);
+ memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN);
+ memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN);
+ memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN);
+ memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN);
+ memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN);
up_read(&uts_sem);
+ if (copy_to_user(name, &tmp, sizeof(tmp)))
+ return -EFAULT;
- if (!error && override_architecture(name))
- error = -EFAULT;
- if (!error && override_release(name->release, sizeof(name->release)))
- error = -EFAULT;
- return error ? -EFAULT : 0;
+ if (override_architecture(name))
+ return -EFAULT;
+ if (override_release(name->release, sizeof(name->release)))
+ return -EFAULT;
+ return 0;
}
#endif
@@ -1319,17 +1311,18 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- struct new_utsname *u = utsname();
+ struct new_utsname *u;
+ down_write(&uts_sem);
+ u = utsname();
memcpy(u->nodename, tmp, len);
memset(u->nodename + len, 0, sizeof(u->nodename) - len);
errno = 0;
uts_proc_notify(UTS_PROC_HOSTNAME);
+ up_write(&uts_sem);
}
- up_write(&uts_sem);
return errno;
}
@@ -1337,8 +1330,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
{
- int i, errno;
+ int i;
struct new_utsname *u;
+ char tmp[__NEW_UTS_LEN + 1];
if (len < 0)
return -EINVAL;
@@ -1347,11 +1341,11 @@ SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
i = 1 + strlen(u->nodename);
if (i > len)
i = len;
- errno = 0;
- if (copy_to_user(name, u->nodename, i))
- errno = -EFAULT;
+ memcpy(tmp, u->nodename, i);
up_read(&uts_sem);
- return errno;
+ if (copy_to_user(name, tmp, i))
+ return -EFAULT;
+ return 0;
}
#endif
@@ -1370,17 +1364,18 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- struct new_utsname *u = utsname();
+ struct new_utsname *u;
+ down_write(&uts_sem);
+ u = utsname();
memcpy(u->domainname, tmp, len);
memset(u->domainname + len, 0, sizeof(u->domainname) - len);
errno = 0;
uts_proc_notify(UTS_PROC_DOMAINNAME);
+ up_write(&uts_sem);
}
- up_write(&uts_sem);
return errno;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f22f76b7a138..cc02050fd0c4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -145,7 +145,10 @@ static int minolduid;
static int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
-/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
#ifdef CONFIG_DETECT_HUNG_TASK
static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
#endif
@@ -222,7 +225,7 @@ static int proc_dopipe_max_size(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#ifdef CONFIG_MAGIC_SYSRQ
-/* Note: sysrq code uses it's own private copy */
+/* Note: sysrq code uses its own private copy */
static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
static int sysrq_sysctl_handler(struct ctl_table *table, int write,
@@ -1091,6 +1094,14 @@ static struct ctl_table kern_table[] = {
.extra2 = &hung_task_timeout_max,
},
{
+ .procname = "hung_task_check_interval_secs",
+ .data = &sysctl_hung_task_check_interval_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = &hung_task_timeout_max,
+ },
+ {
.procname = "hung_task_warnings",
.data = &sysctl_hung_task_warnings,
.maxlen = sizeof(int),
@@ -1797,6 +1808,24 @@ static struct ctl_table fs_table[] = {
.extra2 = &one,
},
{
+ .procname = "protected_fifos",
+ .data = &sysctl_protected_fifos,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+ {
+ .procname = "protected_regular",
+ .data = &sysctl_protected_regular,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+ {
.procname = "suid_dumpable",
.data = &suid_dumpable,
.maxlen = sizeof(int),
@@ -1965,13 +1994,13 @@ static void warn_sysctl_write(struct ctl_table *table)
}
/**
- * proc_first_pos_non_zero_ignore - check if firs position is allowed
+ * proc_first_pos_non_zero_ignore - check if first position is allowed
* @ppos: file position
* @table: the sysctl table
*
* Returns true if the first position is non-zero and the sysctl_writes_strict
* mode indicates this is not allowed for numeric input types. String proc
- * hadlers can ignore the return value.
+ * handlers can ignore the return value.
*/
static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
struct ctl_table *table)
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index f26acef5d7b4..9a65713c8309 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -139,9 +139,10 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
{
struct signal_struct *sig =
container_of(timer, struct signal_struct, real_timer);
+ struct pid *leader_pid = sig->pids[PIDTYPE_TGID];
- trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
- kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
+ trace_itimer_expire(ITIMER_REAL, leader_pid, 0);
+ kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid);
return HRTIMER_NORESTART;
}
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 294d7b65af33..ce32cf741b25 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -894,7 +894,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
trace_itimer_expire(signo == SIGPROF ?
ITIMER_PROF : ITIMER_VIRTUAL,
- tsk->signal->leader_pid, cur_time);
+ task_tgid(tsk), cur_time);
__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index f23cc46ecf3e..4b9127e95430 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -333,8 +333,8 @@ void posixtimer_rearm(struct siginfo *info)
int posix_timer_event(struct k_itimer *timr, int si_private)
{
- struct task_struct *task;
- int shared, ret = -1;
+ enum pid_type type;
+ int ret = -1;
/*
* FIXME: if ->sigq is queued we can race with
* dequeue_signal()->posixtimer_rearm().
@@ -348,13 +348,8 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
*/
timr->sigq->info.si_sys_private = si_private;
- rcu_read_lock();
- task = pid_task(timr->it_pid, PIDTYPE_PID);
- if (task) {
- shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
- ret = send_sigqueue(timr->sigq, task, shared);
- }
- rcu_read_unlock();
+ type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID;
+ ret = send_sigqueue(timr->sigq, timr->it_pid, type);
/* If we failed to send the signal the timer stops. */
return ret > 0;
}
@@ -433,11 +428,13 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
static struct pid *good_sigevent(sigevent_t * event)
{
- struct task_struct *rtn = current->group_leader;
+ struct pid *pid = task_tgid(current);
+ struct task_struct *rtn;
switch (event->sigev_notify) {
case SIGEV_SIGNAL | SIGEV_THREAD_ID:
- rtn = find_task_by_vpid(event->sigev_notify_thread_id);
+ pid = find_vpid(event->sigev_notify_thread_id);
+ rtn = pid_task(pid, PIDTYPE_PID);
if (!rtn || !same_thread_group(rtn, current))
return NULL;
/* FALLTHRU */
@@ -447,7 +444,7 @@ static struct pid *good_sigevent(sigevent_t * event)
return NULL;
/* FALLTHRU */
case SIGEV_NONE:
- return task_pid(rtn);
+ return pid;
default:
return NULL;
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index c042a455afc6..5e3de28c7677 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -774,6 +774,18 @@ config TRACING_EVENTS_GPIO
help
Enable tracing events for gpio subsystem
+config GCOV_PROFILE_FTRACE
+ bool "Enable GCOV profiling on ftrace subsystem"
+ depends on GCOV_KERNEL
+ help
+ Enable GCOV profiling on ftrace subsystem for checking
+ which functions/lines are tested.
+
+ If unsure, say N.
+
+ Note that on a kernel compiled with this config, ftrace will
+ run significantly slower.
+
endif # FTRACE
endif # TRACING_SUPPORT
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 98d53b39a8ee..f81dadbc7c4a 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -23,6 +23,11 @@ ifdef CONFIG_TRACING_BRANCHES
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
endif
+# for GCOV coverage profiling
+ifdef CONFIG_GCOV_PROFILE_FTRACE
+GCOV_PROFILE := y
+endif
+
CFLAGS_trace_benchmark.o := -I$(src)
CFLAGS_trace_events_filter.o := -I$(src)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2a24a59e99c5..2868d85f1fb1 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1829,6 +1829,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
mutex_lock(&q->blk_trace_mutex);
if (attr == &dev_attr_enable) {
+ if (!!value == !!q->blk_trace) {
+ ret = 0;
+ goto out_unlock_bdev;
+ }
if (value)
ret = blk_trace_setup_queue(q, bdev);
else
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2bfb2bbeb3d2..c30032367aab 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -513,7 +513,14 @@ static bool within_notrace_func(struct trace_kprobe *tk)
if (!addr || !kallsyms_lookup_size_offset(addr, &size, &offset))
return false;
- return !ftrace_location_range(addr - offset, addr - offset + size);
+ /* Get the entry address of the target function */
+ addr -= offset;
+
+ /*
+ * Since ftrace_location_range() does inclusive range check, we need
+ * to subtract 1 byte from the end address.
+ */
+ return !ftrace_location_range(addr, addr + size - 1);
}
#else
#define within_notrace_func(tk) (false)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 96db841bf0fc..bf2c06ef9afc 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -371,6 +371,27 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
}
EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+static void for_each_tracepoint_range(struct tracepoint * const *begin,
+ struct tracepoint * const *end,
+ void (*fct)(struct tracepoint *tp, void *priv),
+ void *priv)
+{
+ if (!begin)
+ return;
+
+ if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
+ const int *iter;
+
+ for (iter = (const int *)begin; iter < (const int *)end; iter++)
+ fct(offset_to_ptr(iter), priv);
+ } else {
+ struct tracepoint * const *iter;
+
+ for (iter = begin; iter < end; iter++)
+ fct(*iter, priv);
+ }
+}
+
#ifdef CONFIG_MODULES
bool trace_module_has_bad_taint(struct module *mod)
{
@@ -435,15 +456,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);
* Ensure the tracer unregistered the module's probes before the module
* teardown is performed. Prevents leaks of probe and data pointers.
*/
-static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
- struct tracepoint * const *end)
+static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
{
- struct tracepoint * const *iter;
-
- if (!begin)
- return;
- for (iter = begin; iter < end; iter++)
- WARN_ON_ONCE((*iter)->funcs);
+ WARN_ON_ONCE(tp->funcs);
}
static int tracepoint_module_coming(struct module *mod)
@@ -494,8 +509,9 @@ static void tracepoint_module_going(struct module *mod)
* Called the going notifier before checking for
* quiescence.
*/
- tp_module_going_check_quiescent(mod->tracepoints_ptrs,
- mod->tracepoints_ptrs + mod->num_tracepoints);
+ for_each_tracepoint_range(mod->tracepoints_ptrs,
+ mod->tracepoints_ptrs + mod->num_tracepoints,
+ tp_module_going_check_quiescent, NULL);
break;
}
}
@@ -547,19 +563,6 @@ static __init int init_tracepoints(void)
__initcall(init_tracepoints);
#endif /* CONFIG_MODULES */
-static void for_each_tracepoint_range(struct tracepoint * const *begin,
- struct tracepoint * const *end,
- void (*fct)(struct tracepoint *tp, void *priv),
- void *priv)
-{
- struct tracepoint * const *iter;
-
- if (!begin)
- return;
- for (iter = begin; iter < end; iter++)
- fct(*iter, priv);
-}
-
/**
* for_each_kernel_tracepoint - iteration on all kernel tracepoints
* @fct: callback
diff --git a/kernel/user.c b/kernel/user.c
index 36288d840675..0df9b1640b2a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock);
/* root_user.__count is 1, for init task cred */
struct user_struct root_user = {
- .__count = ATOMIC_INIT(1),
+ .__count = REFCOUNT_INIT(1),
.processes = ATOMIC_INIT(1),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
@@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent)
hlist_for_each_entry(user, hashent, uidhash_node) {
if (uid_eq(user->uid, uid)) {
- atomic_inc(&user->__count);
+ refcount_inc(&user->__count);
return user;
}
}
@@ -169,11 +169,8 @@ void free_uid(struct user_struct *up)
if (!up)
return;
- local_irq_save(flags);
- if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
+ if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags))
free_user(up, flags);
- else
- local_irq_restore(flags);
}
struct user_struct *alloc_uid(kuid_t uid)
@@ -191,7 +188,7 @@ struct user_struct *alloc_uid(kuid_t uid)
goto out_unlock;
new->uid = uid;
- atomic_set(&new->__count, 1);
+ refcount_set(&new->__count, 1);
ratelimit_state_init(&new->ratelimit, HZ, 100);
ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index c3d7583fcd21..e5222b5fb4fe 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -859,7 +859,16 @@ static ssize_t map_write(struct file *file, const char __user *buf,
unsigned idx;
struct uid_gid_extent extent;
char *kbuf = NULL, *pos, *next_line;
- ssize_t ret = -EINVAL;
+ ssize_t ret;
+
+ /* Only allow < page size writes at the beginning of the file */
+ if ((*ppos != 0) || (count >= PAGE_SIZE))
+ return -EINVAL;
+
+ /* Slurp in the user data */
+ kbuf = memdup_user_nul(buf, count);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
/*
* The userns_state_mutex serializes all writes to any given map.
@@ -895,19 +904,6 @@ static ssize_t map_write(struct file *file, const char __user *buf,
if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
goto out;
- /* Only allow < page size writes at the beginning of the file */
- ret = -EINVAL;
- if ((*ppos != 0) || (count >= PAGE_SIZE))
- goto out;
-
- /* Slurp in the user data */
- kbuf = memdup_user_nul(buf, count);
- if (IS_ERR(kbuf)) {
- ret = PTR_ERR(kbuf);
- kbuf = NULL;
- goto out;
- }
-
/* Parse the user data */
ret = -EINVAL;
pos = kbuf;
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 233cd8fc6910..258033d62cb3 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -18,7 +18,7 @@
#ifdef CONFIG_PROC_SYSCTL
-static void *get_uts(struct ctl_table *table, int write)
+static void *get_uts(struct ctl_table *table)
{
char *which = table->data;
struct uts_namespace *uts_ns;
@@ -26,21 +26,9 @@ static void *get_uts(struct ctl_table *table, int write)
uts_ns = current->nsproxy->uts_ns;
which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
- if (!write)
- down_read(&uts_sem);
- else
- down_write(&uts_sem);
return which;
}
-static void put_uts(struct ctl_table *table, int write, void *which)
-{
- if (!write)
- up_read(&uts_sem);
- else
- up_write(&uts_sem);
-}
-
/*
* Special case of dostring for the UTS structure. This has locks
* to observe. Should this be in kernel/sys.c ????
@@ -50,13 +38,34 @@ static int proc_do_uts_string(struct ctl_table *table, int write,
{
struct ctl_table uts_table;
int r;
+ char tmp_data[__NEW_UTS_LEN + 1];
+
memcpy(&uts_table, table, sizeof(uts_table));
- uts_table.data = get_uts(table, write);
+ uts_table.data = tmp_data;
+
+ /*
+ * Buffer the value in tmp_data so that proc_dostring() can be called
+ * without holding any locks.
+ * We also need to read the original value in the write==1 case to
+ * support partial writes.
+ */
+ down_read(&uts_sem);
+ memcpy(tmp_data, get_uts(table), sizeof(tmp_data));
+ up_read(&uts_sem);
r = proc_dostring(&uts_table, write, buffer, lenp, ppos);
- put_uts(table, write, uts_table.data);
- if (write)
+ if (write) {
+ /*
+ * Write back the new value.
+ * Note that, since we dropped uts_sem, the result can
+ * theoretically be incorrect if there are two parallel writes
+ * at non-zero offsets to the same sysctl.
+ */
+ down_write(&uts_sem);
+ memcpy(get_uts(table), tmp_data, sizeof(tmp_data));
+ up_write(&uts_sem);
proc_sys_poll_notify(table->poll);
+ }
return r;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 78b192071ef7..60e80198c3df 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2652,6 +2652,9 @@ void flush_workqueue(struct workqueue_struct *wq)
if (WARN_ON(!wq_online))
return;
+ lock_map_acquire(&wq->lockdep_map);
+ lock_map_release(&wq->lockdep_map);
+
mutex_lock(&wq->mutex);
/*
@@ -2843,7 +2846,8 @@ reflush:
}
EXPORT_SYMBOL_GPL(drain_workqueue);
-static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
+static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
+ bool from_cancel)
{
struct worker *worker = NULL;
struct worker_pool *pool;
@@ -2885,7 +2889,8 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
* workqueues the deadlock happens when the rescuer stalls, blocking
* forward progress.
*/
- if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) {
+ if (!from_cancel &&
+ (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
lock_map_acquire(&pwq->wq->lockdep_map);
lock_map_release(&pwq->wq->lockdep_map);
}
@@ -2896,6 +2901,27 @@ already_gone:
return false;
}
+static bool __flush_work(struct work_struct *work, bool from_cancel)
+{
+ struct wq_barrier barr;
+
+ if (WARN_ON(!wq_online))
+ return false;
+
+ if (!from_cancel) {
+ lock_map_acquire(&work->lockdep_map);
+ lock_map_release(&work->lockdep_map);
+ }
+
+ if (start_flush_work(work, &barr, from_cancel)) {
+ wait_for_completion(&barr.done);
+ destroy_work_on_stack(&barr.work);
+ return true;
+ } else {
+ return false;
+ }
+}
+
/**
* flush_work - wait for a work to finish executing the last queueing instance
* @work: the work to flush
@@ -2909,18 +2935,7 @@ already_gone:
*/
bool flush_work(struct work_struct *work)
{
- struct wq_barrier barr;
-
- if (WARN_ON(!wq_online))
- return false;
-
- if (start_flush_work(work, &barr)) {
- wait_for_completion(&barr.done);
- destroy_work_on_stack(&barr.work);
- return true;
- } else {
- return false;
- }
+ return __flush_work(work, false);
}
EXPORT_SYMBOL_GPL(flush_work);
@@ -2986,7 +3001,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
* isn't executing.
*/
if (wq_online)
- flush_work(work);
+ __flush_work(work, true);
clear_work_data(work);
OpenPOWER on IntegriCloud