diff options
Diffstat (limited to 'kernel')
36 files changed, 613 insertions, 367 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 04b8eda94e7d..03cc59ee9c95 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -15,6 +15,7 @@ #include <linux/jhash.h> #include <linux/filter.h> #include <linux/rculist_nulls.h> +#include <linux/random.h> #include <uapi/linux/btf.h> #include "percpu_freelist.h" #include "bpf_lru_list.h" @@ -41,6 +42,7 @@ struct bpf_htab { atomic_t count; /* number of elements in this hashtable */ u32 n_buckets; /* number of hash buckets */ u32 elem_size; /* size of each element in bytes */ + u32 hashrnd; }; /* each htab element is struct htab_elem + key + value */ @@ -371,6 +373,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) if (!htab->buckets) goto free_htab; + htab->hashrnd = get_random_int(); for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); raw_spin_lock_init(&htab->buckets[i].lock); @@ -402,9 +405,9 @@ free_htab: return ERR_PTR(err); } -static inline u32 htab_map_hash(const void *key, u32 key_len) +static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) { - return jhash(key, key_len, 0); + return jhash(key, key_len, hashrnd); } static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) @@ -470,7 +473,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); head = select_bucket(htab, hash); @@ -597,7 +600,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) if (!key) goto find_first_elem; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); head = select_bucket(htab, hash); @@ -824,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -880,7 +883,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -945,7 +948,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -998,7 +1001,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -1071,7 +1074,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; @@ -1103,7 +1106,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) key_size = map->key_size; - hash = htab_map_hash(key, key_size); + hash = htab_map_hash(key, key_size, htab->hashrnd); b = __select_bucket(htab, hash); head = &b->head; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 98e621a29e8e..cf5195c7c331 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1427,12 +1427,15 @@ out: static void smap_write_space(struct sock *sk) { struct smap_psock *psock; + void (*write_space)(struct sock *sk); rcu_read_lock(); psock = smap_psock_sk(sk); if (likely(psock && test_bit(SMAP_TX_RUNNING, &psock->state))) schedule_work(&psock->tx_work); + write_space = psock->save_write_space; rcu_read_unlock(); + write_space(sk); } static void smap_stop_sock(struct smap_psock *psock, struct sock *sk) @@ -2140,7 +2143,9 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) return ERR_PTR(-EPERM); /* check sanity of attributes */ - if (attr->max_entries == 0 || attr->value_size != 4 || + if (attr->max_entries == 0 || + attr->key_size == 0 || + attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); @@ -2267,8 +2272,10 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab, } l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, htab->map.numa_node); - if (!l_new) + if (!l_new) { + atomic_dec(&htab->count); return ERR_PTR(-ENOMEM); + } memcpy(l_new->key, key, key_size); l_new->sk = sk; diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 77ff1cd6a252..75568fcf2180 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -8,6 +8,32 @@ #include <linux/list.h> #include <linux/refcount.h> +#define TRACE_CGROUP_PATH_LEN 1024 +extern spinlock_t trace_cgroup_path_lock; +extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; + +/* + * cgroup_path() takes a spin lock. It is good practice not to take + * spin locks within trace point handlers, as they are mostly hidden + * from normal view. As cgroup_path() can take the kernfs_rename_lock + * spin lock, it is best to not call that function from the trace event + * handler. + * + * Note: trace_cgroup_##type##_enabled() is a static branch that will only + * be set when the trace event is enabled. + */ +#define TRACE_CGROUP_PATH(type, cgrp, ...) \ + do { \ + if (trace_cgroup_##type##_enabled()) { \ + spin_lock(&trace_cgroup_path_lock); \ + cgroup_path(cgrp, trace_cgroup_path, \ + TRACE_CGROUP_PATH_LEN); \ + trace_cgroup_##type(cgrp, trace_cgroup_path, \ + ##__VA_ARGS__); \ + spin_unlock(&trace_cgroup_path_lock); \ + } \ + } while (0) + /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 8b4f0768efd6..51063e7a93c2 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -135,7 +135,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) if (task) { ret = cgroup_migrate(task, false, &mgctx); if (!ret) - trace_cgroup_transfer_tasks(to, task, false); + TRACE_CGROUP_PATH(transfer_tasks, to, task, false); put_task_struct(task); } } while (task && !ret); @@ -865,7 +865,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent ret = kernfs_rename(kn, new_parent, new_name_str); if (!ret) - trace_cgroup_rename(cgrp); + TRACE_CGROUP_PATH(rename, cgrp); mutex_unlock(&cgroup_mutex); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 35cf3d71f8aa..aae10baf1902 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -83,6 +83,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex); EXPORT_SYMBOL_GPL(css_set_lock); #endif +DEFINE_SPINLOCK(trace_cgroup_path_lock); +char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; + /* * Protects cgroup_idr and css_idr so that IDs can be released without * grabbing cgroup_mutex. @@ -2638,7 +2641,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, cgroup_migrate_finish(&mgctx); if (!ret) - trace_cgroup_attach_task(dst_cgrp, leader, threadgroup); + TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup); return ret; } @@ -4636,7 +4639,7 @@ static void css_release_work_fn(struct work_struct *work) struct cgroup *tcgrp; /* cgroup release path */ - trace_cgroup_release(cgrp); + TRACE_CGROUP_PATH(release, cgrp); if (cgroup_on_dfl(cgrp)) cgroup_rstat_flush(cgrp); @@ -4979,7 +4982,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) if (ret) goto out_destroy; - trace_cgroup_mkdir(cgrp); + TRACE_CGROUP_PATH(mkdir, cgrp); /* let's create and online css's */ kernfs_activate(kn); @@ -5167,9 +5170,8 @@ int cgroup_rmdir(struct kernfs_node *kn) return 0; ret = cgroup_destroy_locked(cgrp); - if (!ret) - trace_cgroup_rmdir(cgrp); + TRACE_CGROUP_PATH(rmdir, cgrp); cgroup_kn_unlock(kn); return ret; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index b66aced5e8c2..933cb3e45b98 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -14,8 +14,8 @@ #include <asm/sections.h> /* vmcoreinfo stuff */ -static unsigned char *vmcoreinfo_data; -static size_t vmcoreinfo_size; +unsigned char *vmcoreinfo_data; +size_t vmcoreinfo_size; u32 *vmcoreinfo_note; /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ @@ -344,7 +344,7 @@ void crash_save_vmcoreinfo(void) if (vmcoreinfo_data_safecopy) vmcoreinfo_data = vmcoreinfo_data_safecopy; - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); + vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); update_vmcoreinfo_note(); } @@ -401,7 +401,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL(init_uts_ns); VMCOREINFO_SYMBOL(node_online_map); #ifdef CONFIG_MMU - VMCOREINFO_SYMBOL(swapper_pg_dir); + VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); #endif VMCOREINFO_SYMBOL(_stext); VMCOREINFO_SYMBOL(vmap_area_list); diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index c187aa3df3c8..24a77c34e9ad 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -4,7 +4,7 @@ * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra - * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * * For licensing details see kernel-base/COPYING */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 80f456ec5d89..2a62b96600ad 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1334,7 +1334,7 @@ static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p, static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) { - return perf_event_pid_type(event, p, __PIDTYPE_TGID); + return perf_event_pid_type(event, p, PIDTYPE_TGID); } static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) diff --git a/kernel/exit.c b/kernel/exit.c index c3c7ac560114..0e21e6d21f35 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -73,6 +73,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead) nr_threads--; detach_pid(p, PIDTYPE_PID); if (group_dead) { + detach_pid(p, PIDTYPE_TGID); detach_pid(p, PIDTYPE_PGID); detach_pid(p, PIDTYPE_SID); @@ -680,7 +681,8 @@ static void forget_original_parent(struct task_struct *father, t->parent = t->real_parent; if (t->pdeath_signal) group_send_sig_info(t->pdeath_signal, - SEND_SIG_NOINFO, t); + SEND_SIG_NOINFO, t, + PIDTYPE_TGID); } /* * If this is a threaded reparent there is no need to @@ -1001,14 +1003,6 @@ struct wait_opts { int notask_error; }; -static inline -struct pid *task_pid_type(struct task_struct *task, enum pid_type type) -{ - if (type != PIDTYPE_PID) - task = task->group_leader; - return task->pids[type].pid; -} - static int eligible_pid(struct wait_opts *wo, struct task_struct *p) { return wo->wo_type == PIDTYPE_MAX || diff --git a/kernel/fork.c b/kernel/fork.c index 5ee74c113381..d896e9ca38b0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -310,8 +310,9 @@ static struct kmem_cache *mm_cachep; struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) { - struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + struct vm_area_struct *vma; + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) vma_init(vma, mm); return vma; @@ -1301,6 +1302,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) tsk->nvcsw = tsk->nivcsw = 0; #ifdef CONFIG_DETECT_HUNG_TASK tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; + tsk->last_switch_time = 0; #endif tsk->mm = NULL; @@ -1425,7 +1427,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) return -ENOMEM; atomic_set(&sig->count, 1); + spin_lock_irq(¤t->sighand->siglock); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); + spin_unlock_irq(¤t->sighand->siglock); return 0; } @@ -1487,6 +1491,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) init_waitqueue_head(&sig->wait_chldexit); sig->curr_target = tsk; init_sigpending(&sig->shared_pending); + INIT_HLIST_HEAD(&sig->multiprocess); seqlock_init(&sig->stats_lock); prev_cputime_init(&sig->prev_cputime); @@ -1580,10 +1585,22 @@ static void posix_cpu_timers_init(struct task_struct *tsk) static inline void posix_cpu_timers_init(struct task_struct *tsk) { } #endif +static inline void init_task_pid_links(struct task_struct *task) +{ + enum pid_type type; + + for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { + INIT_HLIST_NODE(&task->pid_links[type]); + } +} + static inline void init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) { - task->pids[type].pid = pid; + if (type == PIDTYPE_PID) + task->thread_pid = pid; + else + task->signal->pids[type] = pid; } static inline void rcu_copy_process(struct task_struct *p) @@ -1621,6 +1638,7 @@ static __latent_entropy struct task_struct *copy_process( { int retval; struct task_struct *p; + struct multiprocess_signals delayed; /* * Don't allow sharing the root directory with processes in a different @@ -1668,6 +1686,24 @@ static __latent_entropy struct task_struct *copy_process( return ERR_PTR(-EINVAL); } + /* + * Force any signals received before this point to be delivered + * before the fork happens. Collect up signals sent to multiple + * processes that happen during the fork and delay them so that + * they appear to happen after the fork. + */ + sigemptyset(&delayed.signal); + INIT_HLIST_NODE(&delayed.node); + + spin_lock_irq(¤t->sighand->siglock); + if (!(clone_flags & CLONE_THREAD)) + hlist_add_head(&delayed.node, ¤t->signal->multiprocess); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + retval = -ERESTARTNOINTR; + if (signal_pending(current)) + goto fork_out; + retval = -ENOMEM; p = dup_task_struct(current, node); if (!p) @@ -1941,29 +1977,26 @@ static __latent_entropy struct task_struct *copy_process( rseq_fork(p, clone_flags); - /* - * Process group and session signals need to be delivered to just the - * parent before the fork or both the parent and the child after the - * fork. Restart if a signal comes in before we add the new process to - * it's process group. - * A fatal signal pending means that current will exit, so the new - * thread can't slip out of an OOM kill (or normal SIGKILL). - */ - recalc_sigpending(); - if (signal_pending(current)) { - retval = -ERESTARTNOINTR; - goto bad_fork_cancel_cgroup; - } + /* Don't start children in a dying pid namespace */ if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) { retval = -ENOMEM; goto bad_fork_cancel_cgroup; } + /* Let kill terminate clone/fork in the middle */ + if (fatal_signal_pending(current)) { + retval = -EINTR; + goto bad_fork_cancel_cgroup; + } + + + init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); init_task_pid(p, PIDTYPE_PID, pid); if (thread_group_leader(p)) { + init_task_pid(p, PIDTYPE_TGID, pid); init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); init_task_pid(p, PIDTYPE_SID, task_session(current)); @@ -1971,8 +2004,7 @@ static __latent_entropy struct task_struct *copy_process( ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; } - - p->signal->leader_pid = pid; + p->signal->shared_pending.signal = delayed.signal; p->signal->tty = tty_kref_get(current->signal->tty); /* * Inherit has_child_subreaper flag under the same @@ -1983,6 +2015,7 @@ static __latent_entropy struct task_struct *copy_process( p->real_parent->signal->is_child_subreaper; list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); + attach_pid(p, PIDTYPE_TGID); attach_pid(p, PIDTYPE_PGID); attach_pid(p, PIDTYPE_SID); __this_cpu_inc(process_counts); @@ -1990,6 +2023,7 @@ static __latent_entropy struct task_struct *copy_process( current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); + task_join_group_stop(p); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); list_add_tail_rcu(&p->thread_node, @@ -1998,8 +2032,8 @@ static __latent_entropy struct task_struct *copy_process( attach_pid(p, PIDTYPE_PID); nr_threads++; } - total_forks++; + hlist_del_init(&delayed.node); spin_unlock(¤t->sighand->siglock); syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); @@ -2064,16 +2098,19 @@ bad_fork_free: put_task_stack(p); free_task(p); fork_out: + spin_lock_irq(¤t->sighand->siglock); + hlist_del_init(&delayed.node); + spin_unlock_irq(¤t->sighand->siglock); return ERR_PTR(retval); } -static inline void init_idle_pids(struct pid_link *links) +static inline void init_idle_pids(struct task_struct *idle) { enum pid_type type; for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { - INIT_HLIST_NODE(&links[type].node); /* not really needed */ - links[type].pid = &init_struct_pid; + INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */ + init_task_pid(idle, type, &init_struct_pid); } } @@ -2083,7 +2120,7 @@ struct task_struct *fork_idle(int cpu) task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0, cpu_to_node(cpu)); if (!IS_ERR(task)) { - init_idle_pids(task->pids); + init_idle_pids(task); init_idle(task, cpu); } diff --git a/kernel/futex.c b/kernel/futex.c index 1f450e092c74..11fc3bb456d6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3523,10 +3523,12 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; + /* fall through */ case FUTEX_WAIT_BITSET: return futex_wait(uaddr, flags, val, timeout, val3); case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; + /* fall through */ case FUTEX_WAKE_BITSET: return futex_wake(uaddr, flags, val, val3); case FUTEX_REQUEUE: diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 32b479468e4d..b9132d1269ef 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -40,6 +40,11 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; */ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; +/* + * Zero (default value) means use sysctl_hung_task_timeout_secs: + */ +unsigned long __read_mostly sysctl_hung_task_check_interval_secs; + int __read_mostly sysctl_hung_task_warnings = 10; static int __read_mostly did_panic; @@ -98,8 +103,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (switch_count != t->last_switch_count) { t->last_switch_count = switch_count; + t->last_switch_time = jiffies; return; } + if (time_is_after_jiffies(t->last_switch_time + timeout * HZ)) + return; trace_sched_process_hang(t); @@ -245,8 +253,13 @@ static int watchdog(void *dummy) for ( ; ; ) { unsigned long timeout = sysctl_hung_task_timeout_secs; - long t = hung_timeout_jiffies(hung_last_checked, timeout); + unsigned long interval = sysctl_hung_task_check_interval_secs; + long t; + if (interval == 0) + interval = timeout; + interval = min_t(unsigned long, interval, timeout); + t = hung_timeout_jiffies(hung_last_checked, interval); if (t <= 0) { if (!atomic_xchg(&reset_hung_task, 0)) check_hung_uninterruptible_tasks(timeout); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index a23e21ada81b..02a0b01380d8 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -432,6 +432,7 @@ int sprint_backtrace(char *buffer, unsigned long address) /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ struct kallsym_iter { loff_t pos; + loff_t pos_arch_end; loff_t pos_mod_end; loff_t pos_ftrace_mod_end; unsigned long value; @@ -443,9 +444,29 @@ struct kallsym_iter { int show_value; }; +int __weak arch_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name) +{ + return -EINVAL; +} + +static int get_ksymbol_arch(struct kallsym_iter *iter) +{ + int ret = arch_get_kallsym(iter->pos - kallsyms_num_syms, + &iter->value, &iter->type, + iter->name); + + if (ret < 0) { + iter->pos_arch_end = iter->pos; + return 0; + } + + return 1; +} + static int get_ksymbol_mod(struct kallsym_iter *iter) { - int ret = module_get_kallsym(iter->pos - kallsyms_num_syms, + int ret = module_get_kallsym(iter->pos - iter->pos_arch_end, &iter->value, &iter->type, iter->name, iter->module_name, &iter->exported); @@ -501,32 +522,34 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) iter->nameoff = get_symbol_offset(new_pos); iter->pos = new_pos; if (new_pos == 0) { + iter->pos_arch_end = 0; iter->pos_mod_end = 0; iter->pos_ftrace_mod_end = 0; } } +/* + * The end position (last + 1) of each additional kallsyms section is recorded + * in iter->pos_..._end as each section is added, and so can be used to + * determine which get_ksymbol_...() function to call next. + */ static int update_iter_mod(struct kallsym_iter *iter, loff_t pos) { iter->pos = pos; - if (iter->pos_ftrace_mod_end > 0 && - iter->pos_ftrace_mod_end < iter->pos) - return get_ksymbol_bpf(iter); + if ((!iter->pos_arch_end || iter->pos_arch_end > pos) && + get_ksymbol_arch(iter)) + return 1; - if (iter->pos_mod_end > 0 && - iter->pos_mod_end < iter->pos) { - if (!get_ksymbol_ftrace_mod(iter)) - return get_ksymbol_bpf(iter); + if ((!iter->pos_mod_end || iter->pos_mod_end > pos) && + get_ksymbol_mod(iter)) return 1; - } - if (!get_ksymbol_mod(iter)) { - if (!get_ksymbol_ftrace_mod(iter)) - return get_ksymbol_bpf(iter); - } + if ((!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > pos) && + get_ksymbol_ftrace_mod(iter)) + return 1; - return 1; + return get_ksymbol_bpf(iter); } /* Returns false if pos at or past end of file. */ diff --git a/kernel/memremap.c b/kernel/memremap.c index 1f87ea6b6545..5b8600d39931 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -43,7 +43,7 @@ static unsigned long order_at(struct resource *res, unsigned long pgoff) pgoff += 1UL << order, order = order_at((res), pgoff)) #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) -int device_private_entry_fault(struct vm_area_struct *vma, +vm_fault_t device_private_entry_fault(struct vm_area_struct *vma, unsigned long addr, swp_entry_t entry, unsigned int flags, @@ -365,7 +365,6 @@ void __put_devmap_managed_page(struct page *page) __ClearPageActive(page); __ClearPageWaiters(page); - page->mapping = NULL; mem_cgroup_uncharge(page); page->pgmap->page_free(page, page->pgmap->data); diff --git a/kernel/module.c b/kernel/module.c index b046a32520d8..6746c85511fe 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -529,12 +529,30 @@ static bool check_symbol(const struct symsearch *syms, return true; } +static unsigned long kernel_symbol_value(const struct kernel_symbol *sym) +{ +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + return (unsigned long)offset_to_ptr(&sym->value_offset); +#else + return sym->value; +#endif +} + +static const char *kernel_symbol_name(const struct kernel_symbol *sym) +{ +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + return offset_to_ptr(&sym->name_offset); +#else + return sym->name; +#endif +} + static int cmp_name(const void *va, const void *vb) { const char *a; const struct kernel_symbol *b; a = va; b = vb; - return strcmp(a, b->name); + return strcmp(a, kernel_symbol_name(b)); } static bool find_symbol_in_section(const struct symsearch *syms, @@ -2170,7 +2188,7 @@ void *__symbol_get(const char *symbol) sym = NULL; preempt_enable(); - return sym ? (void *)sym->value : NULL; + return sym ? (void *)kernel_symbol_value(sym) : NULL; } EXPORT_SYMBOL_GPL(__symbol_get); @@ -2200,10 +2218,12 @@ static int verify_export_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { - if (find_symbol(s->name, &owner, NULL, true, false)) { + if (find_symbol(kernel_symbol_name(s), &owner, NULL, + true, false)) { pr_err("%s: exports duplicate symbol %s" " (owned by %s)\n", - mod->name, s->name, module_name(owner)); + mod->name, kernel_symbol_name(s), + module_name(owner)); return -ENOEXEC; } } @@ -2252,7 +2272,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ if (ksym && !IS_ERR(ksym)) { - sym[i].st_value = ksym->value; + sym[i].st_value = kernel_symbol_value(ksym); break; } @@ -2516,7 +2536,7 @@ static int is_exported(const char *name, unsigned long value, ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); else ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); - return ks != NULL && ks->value == value; + return ks != NULL && kernel_symbol_value(ks) == value; } /* As per nm */ diff --git a/kernel/pid.c b/kernel/pid.c index 157fe4b19971..de1cfc4f75a2 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -265,27 +265,33 @@ struct pid *find_vpid(int nr) } EXPORT_SYMBOL_GPL(find_vpid); +static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type) +{ + return (type == PIDTYPE_PID) ? + &task->thread_pid : + &task->signal->pids[type]; +} + /* * attach_pid() must be called with the tasklist_lock write-held. */ void attach_pid(struct task_struct *task, enum pid_type type) { - struct pid_link *link = &task->pids[type]; - hlist_add_head_rcu(&link->node, &link->pid->tasks[type]); + struct pid *pid = *task_pid_ptr(task, type); + hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]); } static void __change_pid(struct task_struct *task, enum pid_type type, struct pid *new) { - struct pid_link *link; + struct pid **pid_ptr = task_pid_ptr(task, type); struct pid *pid; int tmp; - link = &task->pids[type]; - pid = link->pid; + pid = *pid_ptr; - hlist_del_rcu(&link->node); - link->pid = new; + hlist_del_rcu(&task->pid_links[type]); + *pid_ptr = new; for (tmp = PIDTYPE_MAX; --tmp >= 0; ) if (!hlist_empty(&pid->tasks[tmp])) @@ -310,8 +316,9 @@ void change_pid(struct task_struct *task, enum pid_type type, void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type type) { - new->pids[type].pid = old->pids[type].pid; - hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node); + if (type == PIDTYPE_PID) + new->thread_pid = old->thread_pid; + hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]); } struct task_struct *pid_task(struct pid *pid, enum pid_type type) @@ -322,7 +329,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), lockdep_tasklist_lock_is_held()); if (first) - result = hlist_entry(first, struct task_struct, pids[(type)].node); + result = hlist_entry(first, struct task_struct, pid_links[(type)]); } return result; } @@ -360,9 +367,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) { struct pid *pid; rcu_read_lock(); - if (type != PIDTYPE_PID) - task = task->group_leader; - pid = get_pid(rcu_dereference(task->pids[type].pid)); + pid = get_pid(rcu_dereference(*task_pid_ptr(task, type))); rcu_read_unlock(); return pid; } @@ -420,15 +425,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, rcu_read_lock(); if (!ns) ns = task_active_pid_ns(current); - if (likely(pid_alive(task))) { - if (type != PIDTYPE_PID) { - if (type == __PIDTYPE_TGID) - type = PIDTYPE_PID; - - task = task->group_leader; - } - nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns); - } + if (likely(pid_alive(task))) + nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns); rcu_read_unlock(); return nr; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index e880ca22c5a5..3a6c2f87699e 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -105,6 +105,7 @@ config PM_SLEEP def_bool y depends on SUSPEND || HIBERNATE_CALLBACKS select PM + select SRCU config PM_SLEEP_SMP def_bool y diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 90b6ab01db59..924e37fb1620 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -66,6 +66,9 @@ int console_printk[4] = { CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ }; +atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); +EXPORT_SYMBOL(ignore_console_lock_warning); + /* * Low level drivers may need that to know if they can schedule in * their unblank() callback or not. So let's export it. @@ -2788,7 +2791,8 @@ EXPORT_SYMBOL(unregister_console); void __init console_init(void) { int ret; - initcall_t *call; + initcall_t call; + initcall_entry_t *ce; /* Setup the default TTY line discipline. */ n_tty_init(); @@ -2797,13 +2801,14 @@ void __init console_init(void) * set up the console device so that later boot sequences can * inform about problems etc.. */ - call = __con_initcall_start; + ce = __con_initcall_start; trace_initcall_level("console"); - while (call < __con_initcall_end) { - trace_initcall_start((*call)); - ret = (*call)(); - trace_initcall_finish((*call), ret); - call++; + while (ce < __con_initcall_end) { + call = initcall_from_entry(ce); + trace_initcall_start(call); + ret = call(); + trace_initcall_finish(call, ret); + ce++; } } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c89302ddf807..625bc9897f62 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2774,6 +2774,8 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) if (current->set_child_tid) put_user(task_pid_vnr(current), current->set_child_tid); + + calculate_sigpending(); } /* diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 1a3e9bddd17b..16f84142f2f4 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -190,7 +190,7 @@ static void cpuidle_idle_call(void) */ next_state = cpuidle_select(drv, dev, &stop_tick); - if (stop_tick) + if (stop_tick || tick_nohz_tick_stopped()) tick_nohz_idle_stop_tick(); else tick_nohz_idle_retain_tick(); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 870f97b313e3..5dd47f1103d1 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -69,6 +69,8 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, wait_queue_entry_t *curr, *next; int cnt = 0; + lockdep_assert_held(&wq_head->lock); + if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) { curr = list_next_entry(bookmark, entry); diff --git a/kernel/signal.c b/kernel/signal.c index 8d8a940422a8..5843c541fda9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -65,14 +65,14 @@ static void __user *sig_handler(struct task_struct *t, int sig) return t->sighand->action[sig - 1].sa.sa_handler; } -static int sig_handler_ignored(void __user *handler, int sig) +static inline bool sig_handler_ignored(void __user *handler, int sig) { /* Is it explicitly or implicitly ignored? */ return handler == SIG_IGN || - (handler == SIG_DFL && sig_kernel_ignore(sig)); + (handler == SIG_DFL && sig_kernel_ignore(sig)); } -static int sig_task_ignored(struct task_struct *t, int sig, bool force) +static bool sig_task_ignored(struct task_struct *t, int sig, bool force) { void __user *handler; @@ -80,12 +80,12 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force) if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && handler == SIG_DFL && !(force && sig_kernel_only(sig))) - return 1; + return true; return sig_handler_ignored(handler, sig); } -static int sig_ignored(struct task_struct *t, int sig, bool force) +static bool sig_ignored(struct task_struct *t, int sig, bool force) { /* * Blocked signals are never ignored, since the @@ -93,7 +93,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force) * unblocked. */ if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) - return 0; + return false; /* * Tracers may want to know about even ignored signal unless it @@ -101,7 +101,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force) * by SIGNAL_UNKILLABLE task. */ if (t->ptrace && sig != SIGKILL) - return 0; + return false; return sig_task_ignored(t, sig, force); } @@ -110,7 +110,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force) * Re-calculate pending state from the set of locally pending * signals, globally pending signals, and blocked signals. */ -static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) +static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked) { unsigned long ready; long i; @@ -138,20 +138,21 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) #define PENDING(p,b) has_pending_signals(&(p)->signal, (b)) -static int recalc_sigpending_tsk(struct task_struct *t) +static bool recalc_sigpending_tsk(struct task_struct *t) { if ((t->jobctl & JOBCTL_PENDING_MASK) || PENDING(&t->pending, &t->blocked) || PENDING(&t->signal->shared_pending, &t->blocked)) { set_tsk_thread_flag(t, TIF_SIGPENDING); - return 1; + return true; } + /* * We must never clear the flag in another thread, or in current * when it's possible the current syscall is returning -ERESTART*. * So we don't clear it here, and only callers who know they should do. */ - return 0; + return false; } /* @@ -172,6 +173,17 @@ void recalc_sigpending(void) } +void calculate_sigpending(void) +{ + /* Have any signals or users of TIF_SIGPENDING been delayed + * until after fork? + */ + spin_lock_irq(¤t->sighand->siglock); + set_tsk_thread_flag(current, TIF_SIGPENDING); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} + /* Given the mask, find the first available signal that should be serviced. */ #define SYNCHRONOUS_MASK \ @@ -362,6 +374,20 @@ static bool task_participate_group_stop(struct task_struct *task) return false; } +void task_join_group_stop(struct task_struct *task) +{ + /* Have the new thread join an on-going signal group stop */ + unsigned long jobctl = current->jobctl; + if (jobctl & JOBCTL_STOP_PENDING) { + struct signal_struct *sig = current->signal; + unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK; + unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; + if (task_set_jobctl_pending(task, signr | gstop)) { + sig->group_stop_count++; + } + } +} + /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an @@ -504,13 +530,15 @@ flush_signal_handlers(struct task_struct *t, int force_default) } } -int unhandled_signal(struct task_struct *tsk, int sig) +bool unhandled_signal(struct task_struct *tsk, int sig) { void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler; if (is_global_init(tsk)) - return 1; + return true; + if (handler != SIG_IGN && handler != SIG_DFL) - return 0; + return false; + /* if ptraced, let the tracer determine */ return !tsk->ptrace; } @@ -684,14 +712,14 @@ void signal_wake_up_state(struct task_struct *t, unsigned int state) * * All callers must be holding the siglock. */ -static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s) +static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s) { struct sigqueue *q, *n; sigset_t m; sigandsets(&m, mask, &s->signal); if (sigisemptyset(&m)) - return 0; + return; sigandnsets(&s->signal, &s->signal, mask); list_for_each_entry_safe(q, n, &s->list, list) { @@ -700,7 +728,6 @@ static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s) __sigqueue_free(q); } } - return 1; } static inline int is_si_special(const struct siginfo *info) @@ -717,21 +744,16 @@ static inline bool si_fromuser(const struct siginfo *info) /* * called with RCU read lock from check_kill_permission() */ -static int kill_ok_by_cred(struct task_struct *t) +static bool kill_ok_by_cred(struct task_struct *t) { const struct cred *cred = current_cred(); const struct cred *tcred = __task_cred(t); - if (uid_eq(cred->euid, tcred->suid) || - uid_eq(cred->euid, tcred->uid) || - uid_eq(cred->uid, tcred->suid) || - uid_eq(cred->uid, tcred->uid)) - return 1; - - if (ns_capable(tcred->user_ns, CAP_KILL)) - return 1; - - return 0; + return uid_eq(cred->euid, tcred->suid) || + uid_eq(cred->euid, tcred->uid) || + uid_eq(cred->uid, tcred->suid) || + uid_eq(cred->uid, tcred->uid) || + ns_capable(tcred->user_ns, CAP_KILL); } /* @@ -882,20 +904,24 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force) * as soon as they're available, so putting the signal on the shared queue * will be equivalent to sending it to one such thread. */ -static inline int wants_signal(int sig, struct task_struct *p) +static inline bool wants_signal(int sig, struct task_struct *p) { if (sigismember(&p->blocked, sig)) - return 0; + return false; + if (p->flags & PF_EXITING) - return 0; + return false; + if (sig == SIGKILL) - return 1; + return true; + if (task_is_stopped_or_traced(p)) - return 0; + return false; + return task_curr(p) || !signal_pending(p); } -static void complete_signal(int sig, struct task_struct *p, int group) +static void complete_signal(int sig, struct task_struct *p, enum pid_type type) { struct signal_struct *signal = p->signal; struct task_struct *t; @@ -908,7 +934,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) */ if (wants_signal(sig, p)) t = p; - else if (!group || thread_group_empty(p)) + else if ((type == PIDTYPE_PID) || thread_group_empty(p)) /* * There is just one thread and it does not need to be woken. * It will dequeue unblocked signals before it runs again. @@ -971,7 +997,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) return; } -static inline int legacy_queue(struct sigpending *signals, int sig) +static inline bool legacy_queue(struct sigpending *signals, int sig) { return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); } @@ -998,7 +1024,7 @@ static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_str #endif static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, - int group, int from_ancestor_ns) + enum pid_type type, int from_ancestor_ns) { struct sigpending *pending; struct sigqueue *q; @@ -1012,7 +1038,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, from_ancestor_ns || (info == SEND_SIG_FORCED))) goto ret; - pending = group ? &t->signal->shared_pending : &t->pending; + pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending; /* * Short-circuit ignored signals and support queuing * exactly one non-rt signal, so that we can get more @@ -1096,14 +1122,29 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, out_set: signalfd_notify(t, sig); sigaddset(&pending->signal, sig); - complete_signal(sig, t, group); + + /* Let multiprocess signals appear after on-going forks */ + if (type > PIDTYPE_TGID) { + struct multiprocess_signals *delayed; + hlist_for_each_entry(delayed, &t->signal->multiprocess, node) { + sigset_t *signal = &delayed->signal; + /* Can't queue both a stop and a continue signal */ + if (sig == SIGCONT) + sigdelsetmask(signal, SIG_KERNEL_STOP_MASK); + else if (sig_kernel_stop(sig)) + sigdelset(signal, SIGCONT); + sigaddset(signal, sig); + } + } + + complete_signal(sig, t, type); ret: - trace_signal_generate(sig, info, t, group, result); + trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result); return ret; } static int send_signal(int sig, struct siginfo *info, struct task_struct *t, - int group) + enum pid_type type) { int from_ancestor_ns = 0; @@ -1112,7 +1153,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, !task_pid_nr_ns(current, task_active_pid_ns(t)); #endif - return __send_signal(sig, info, t, group, from_ancestor_ns); + return __send_signal(sig, info, t, type, from_ancestor_ns); } static void print_fatal_signal(int signr) @@ -1151,23 +1192,23 @@ __setup("print-fatal-signals=", setup_print_fatal_signals); int __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { - return send_signal(sig, info, p, 1); + return send_signal(sig, info, p, PIDTYPE_TGID); } static int specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) { - return send_signal(sig, info, t, 0); + return send_signal(sig, info, t, PIDTYPE_PID); } int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, - bool group) + enum pid_type type) { unsigned long flags; int ret = -ESRCH; if (lock_task_sighand(p, &flags)) { - ret = send_signal(sig, info, p, group); + ret = send_signal(sig, info, p, type); unlock_task_sighand(p, &flags); } @@ -1274,7 +1315,8 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, /* * send signal info to all the members of a group */ -int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) +int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, + enum pid_type type) { int ret; @@ -1283,7 +1325,7 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) rcu_read_unlock(); if (!ret && sig) - ret = do_send_sig_info(sig, info, p, true); + ret = do_send_sig_info(sig, info, p, type); return ret; } @@ -1301,7 +1343,7 @@ int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) success = 0; retval = -ESRCH; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { - int err = group_send_sig_info(sig, info, p); + int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID); success |= !err; retval = err; } while_each_pid_task(pgrp, PIDTYPE_PGID, p); @@ -1317,7 +1359,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (p) - error = group_send_sig_info(sig, info, p); + error = group_send_sig_info(sig, info, p, PIDTYPE_TGID); rcu_read_unlock(); if (likely(!p || error != -ESRCH)) return error; @@ -1339,14 +1381,15 @@ static int kill_proc_info(int sig, struct siginfo *info, pid_t pid) return error; } -static int kill_as_cred_perm(const struct cred *cred, - struct task_struct *target) +static inline bool kill_as_cred_perm(const struct cred *cred, + struct task_struct *target) { const struct cred *pcred = __task_cred(target); - if (!uid_eq(cred->euid, pcred->suid) && !uid_eq(cred->euid, pcred->uid) && - !uid_eq(cred->uid, pcred->suid) && !uid_eq(cred->uid, pcred->uid)) - return 0; - return 1; + + return uid_eq(cred->euid, pcred->suid) || + uid_eq(cred->euid, pcred->uid) || + uid_eq(cred->uid, pcred->suid) || + uid_eq(cred->uid, pcred->uid); } /* like kill_pid_info(), but doesn't use uid/euid of "current" */ @@ -1376,7 +1419,7 @@ int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid, if (sig) { if (lock_task_sighand(p, &flags)) { - ret = __send_signal(sig, info, p, 1, 0); + ret = __send_signal(sig, info, p, PIDTYPE_TGID, 0); unlock_task_sighand(p, &flags); } else ret = -ESRCH; @@ -1420,7 +1463,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid) for_each_process(p) { if (task_pid_vnr(p) > 1 && !same_thread_group(p, current)) { - int err = group_send_sig_info(sig, info, p); + int err = group_send_sig_info(sig, info, p, + PIDTYPE_MAX); ++count; if (err != -EPERM) retval = err; @@ -1446,7 +1490,7 @@ int send_sig_info(int sig, struct siginfo *info, struct task_struct *p) if (!valid_signal(sig)) return -EINVAL; - return do_send_sig_info(sig, info, p, false); + return do_send_sig_info(sig, info, p, PIDTYPE_PID); } #define __si_special(priv) \ @@ -1458,8 +1502,7 @@ send_sig(int sig, struct task_struct *p, int priv) return send_sig_info(sig, __si_special(priv), p); } -void -force_sig(int sig, struct task_struct *p) +void force_sig(int sig, struct task_struct *p) { force_sig_info(sig, SEND_SIG_PRIV, p); } @@ -1470,8 +1513,7 @@ force_sig(int sig, struct task_struct *p) * the problem was already a SIGSEGV, we'll want to * make sure we don't even try to deliver the signal.. */ -int -force_sigsegv(int sig, struct task_struct *p) +void force_sigsegv(int sig, struct task_struct *p) { if (sig == SIGSEGV) { unsigned long flags; @@ -1480,7 +1522,6 @@ force_sigsegv(int sig, struct task_struct *p) spin_unlock_irqrestore(&p->sighand->siglock, flags); } force_sig(SIGSEGV, p); - return 0; } int force_sig_fault(int sig, int code, void __user *addr @@ -1664,17 +1705,20 @@ void sigqueue_free(struct sigqueue *q) __sigqueue_free(q); } -int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) +int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type) { int sig = q->info.si_signo; struct sigpending *pending; + struct task_struct *t; unsigned long flags; int ret, result; BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); ret = -1; - if (!likely(lock_task_sighand(t, &flags))) + rcu_read_lock(); + t = pid_task(pid, type); + if (!t || !likely(lock_task_sighand(t, &flags))) goto ret; ret = 1; /* the signal is ignored */ @@ -1696,15 +1740,16 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) q->info.si_overrun = 0; signalfd_notify(t, sig); - pending = group ? &t->signal->shared_pending : &t->pending; + pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending; list_add_tail(&q->list, &pending->list); sigaddset(&pending->signal, sig); - complete_signal(sig, t, group); + complete_signal(sig, t, type); result = TRACE_SIGNAL_DELIVERED; out: - trace_signal_generate(sig, &q->info, t, group, result); + trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result); unlock_task_sighand(t, &flags); ret: + rcu_read_unlock(); return ret; } @@ -1877,10 +1922,10 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, spin_unlock_irqrestore(&sighand->siglock, flags); } -static inline int may_ptrace_stop(void) +static inline bool may_ptrace_stop(void) { if (!likely(current->ptrace)) - return 0; + return false; /* * Are we in the middle of do_coredump? * If so and our tracer is also part of the coredump stopping @@ -1896,19 +1941,19 @@ static inline int may_ptrace_stop(void) */ if (unlikely(current->mm->core_state) && unlikely(current->mm == current->parent->mm)) - return 0; + return false; - return 1; + return true; } /* * Return non-zero if there is a SIGKILL that should be waking us up. * Called with the siglock held. */ -static int sigkill_pending(struct task_struct *tsk) +static bool sigkill_pending(struct task_struct *tsk) { - return sigismember(&tsk->pending.signal, SIGKILL) || - sigismember(&tsk->signal->shared_pending.signal, SIGKILL); + return sigismember(&tsk->pending.signal, SIGKILL) || + sigismember(&tsk->signal->shared_pending.signal, SIGKILL); } /* @@ -2288,7 +2333,7 @@ static int ptrace_signal(int signr, siginfo_t *info) return signr; } -int get_signal(struct ksignal *ksig) +bool get_signal(struct ksignal *ksig) { struct sighand_struct *sighand = current->sighand; struct signal_struct *signal = current->signal; @@ -2298,7 +2343,7 @@ int get_signal(struct ksignal *ksig) task_work_run(); if (unlikely(uprobe_deny_signal())) - return 0; + return false; /* * Do this once, we can't return to user-mode if freezing() == T. @@ -2755,7 +2800,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, } #endif -static int do_sigpending(sigset_t *set) +static void do_sigpending(sigset_t *set) { spin_lock_irq(¤t->sighand->siglock); sigorsets(set, ¤t->pending.signal, @@ -2764,7 +2809,6 @@ static int do_sigpending(sigset_t *set) /* Outside the lock because only this thread touches it. */ sigandsets(set, ¤t->blocked, set); - return 0; } /** @@ -2776,15 +2820,16 @@ static int do_sigpending(sigset_t *set) SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize) { sigset_t set; - int err; if (sigsetsize > sizeof(*uset)) return -EINVAL; - err = do_sigpending(&set); - if (!err && copy_to_user(uset, &set, sigsetsize)) - err = -EFAULT; - return err; + do_sigpending(&set); + + if (copy_to_user(uset, &set, sigsetsize)) + return -EFAULT; + + return 0; } #ifdef CONFIG_COMPAT @@ -2792,15 +2837,13 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, compat_size_t, sigsetsize) { sigset_t set; - int err; if (sigsetsize > sizeof(*uset)) return -EINVAL; - err = do_sigpending(&set); - if (!err) - err = put_compat_sigset(uset, &set, sigsetsize); - return err; + do_sigpending(&set); + + return put_compat_sigset(uset, &set, sigsetsize); } #endif @@ -3193,7 +3236,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) * probe. No signal is actually delivered. */ if (!error && sig) { - error = do_send_sig_info(sig, info, p, false); + error = do_send_sig_info(sig, info, p, PIDTYPE_PID); /* * If lock_task_sighand() failed we pretend the task * dies after receiving the signal. The window is tiny, @@ -3562,25 +3605,26 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset) { sigset_t set; - int err; if (sizeof(old_sigset_t) > sizeof(*uset)) return -EINVAL; - err = do_sigpending(&set); - if (!err && copy_to_user(uset, &set, sizeof(old_sigset_t))) - err = -EFAULT; - return err; + do_sigpending(&set); + + if (copy_to_user(uset, &set, sizeof(old_sigset_t))) + return -EFAULT; + + return 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32) { sigset_t set; - int err = do_sigpending(&set); - if (!err) - err = put_user(set.sig[0], set32); - return err; + + do_sigpending(&set); + + return put_user(set.sig[0], set32); } #endif @@ -3651,25 +3695,23 @@ SYSCALL_DEFINE4(rt_sigaction, int, sig, size_t, sigsetsize) { struct k_sigaction new_sa, old_sa; - int ret = -EINVAL; + int ret; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) - goto out; + return -EINVAL; - if (act) { - if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa))) - return -EFAULT; - } + if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa))) + return -EFAULT; ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); + if (ret) + return ret; - if (!ret && oact) { - if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa))) - return -EFAULT; - } -out: - return ret; + if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa))) + return -EFAULT; + + return 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, @@ -3960,7 +4002,7 @@ void kdb_send_sig(struct task_struct *t, int sig) "the deadlock.\n"); return; } - ret = send_signal(sig, SEND_SIG_PRIV, t, false); + ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); spin_unlock(&t->sighand->siglock); if (ret) kdb_printf("Fail to deliver Signal %d to process %d.\n", diff --git a/kernel/sys.c b/kernel/sys.c index e27b51d3facd..cf5c67533ff1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1237,18 +1237,19 @@ static int override_release(char __user *release, size_t len) SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { - int errno = 0; + struct new_utsname tmp; down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof *name)) - errno = -EFAULT; + memcpy(&tmp, utsname(), sizeof(tmp)); up_read(&uts_sem); + if (copy_to_user(name, &tmp, sizeof(tmp))) + return -EFAULT; - if (!errno && override_release(name->release, sizeof(name->release))) - errno = -EFAULT; - if (!errno && override_architecture(name)) - errno = -EFAULT; - return errno; + if (override_release(name->release, sizeof(name->release))) + return -EFAULT; + if (override_architecture(name)) + return -EFAULT; + return 0; } #ifdef __ARCH_WANT_SYS_OLD_UNAME @@ -1257,55 +1258,46 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) */ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) { - int error = 0; + struct old_utsname tmp; if (!name) return -EFAULT; down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - error = -EFAULT; + memcpy(&tmp, utsname(), sizeof(tmp)); up_read(&uts_sem); + if (copy_to_user(name, &tmp, sizeof(tmp))) + return -EFAULT; - if (!error && override_release(name->release, sizeof(name->release))) - error = -EFAULT; - if (!error && override_architecture(name)) - error = -EFAULT; - return error; + if (override_release(name->release, sizeof(name->release))) + return -EFAULT; + if (override_architecture(name)) + return -EFAULT; + return 0; } SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) { - int error; + struct oldold_utsname tmp = {}; if (!name) return -EFAULT; - if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) - return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname, &utsname()->sysname, - __OLD_UTS_LEN); - error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &utsname()->nodename, - __OLD_UTS_LEN); - error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &utsname()->release, - __OLD_UTS_LEN); - error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &utsname()->version, - __OLD_UTS_LEN); - error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &utsname()->machine, - __OLD_UTS_LEN); - error |= __put_user(0, name->machine + __OLD_UTS_LEN); + memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN); + memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN); + memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN); + memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN); + memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN); up_read(&uts_sem); + if (copy_to_user(name, &tmp, sizeof(tmp))) + return -EFAULT; - if (!error && override_architecture(name)) - error = -EFAULT; - if (!error && override_release(name->release, sizeof(name->release))) - error = -EFAULT; - return error ? -EFAULT : 0; + if (override_architecture(name)) + return -EFAULT; + if (override_release(name->release, sizeof(name->release))) + return -EFAULT; + return 0; } #endif @@ -1319,17 +1311,18 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - struct new_utsname *u = utsname(); + struct new_utsname *u; + down_write(&uts_sem); + u = utsname(); memcpy(u->nodename, tmp, len); memset(u->nodename + len, 0, sizeof(u->nodename) - len); errno = 0; uts_proc_notify(UTS_PROC_HOSTNAME); + up_write(&uts_sem); } - up_write(&uts_sem); return errno; } @@ -1337,8 +1330,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) { - int i, errno; + int i; struct new_utsname *u; + char tmp[__NEW_UTS_LEN + 1]; if (len < 0) return -EINVAL; @@ -1347,11 +1341,11 @@ SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) i = 1 + strlen(u->nodename); if (i > len) i = len; - errno = 0; - if (copy_to_user(name, u->nodename, i)) - errno = -EFAULT; + memcpy(tmp, u->nodename, i); up_read(&uts_sem); - return errno; + if (copy_to_user(name, tmp, i)) + return -EFAULT; + return 0; } #endif @@ -1370,17 +1364,18 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - struct new_utsname *u = utsname(); + struct new_utsname *u; + down_write(&uts_sem); + u = utsname(); memcpy(u->domainname, tmp, len); memset(u->domainname + len, 0, sizeof(u->domainname) - len); errno = 0; uts_proc_notify(UTS_PROC_DOMAINNAME); + up_write(&uts_sem); } - up_write(&uts_sem); return errno; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f22f76b7a138..cc02050fd0c4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -145,7 +145,10 @@ static int minolduid; static int ngroups_max = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; -/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */ +/* + * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs + * and hung_task_check_interval_secs + */ #ifdef CONFIG_DETECT_HUNG_TASK static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); #endif @@ -222,7 +225,7 @@ static int proc_dopipe_max_size(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #ifdef CONFIG_MAGIC_SYSRQ -/* Note: sysrq code uses it's own private copy */ +/* Note: sysrq code uses its own private copy */ static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE; static int sysrq_sysctl_handler(struct ctl_table *table, int write, @@ -1091,6 +1094,14 @@ static struct ctl_table kern_table[] = { .extra2 = &hung_task_timeout_max, }, { + .procname = "hung_task_check_interval_secs", + .data = &sysctl_hung_task_check_interval_secs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = &hung_task_timeout_max, + }, + { .procname = "hung_task_warnings", .data = &sysctl_hung_task_warnings, .maxlen = sizeof(int), @@ -1797,6 +1808,24 @@ static struct ctl_table fs_table[] = { .extra2 = &one, }, { + .procname = "protected_fifos", + .data = &sysctl_protected_fifos, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, + }, + { + .procname = "protected_regular", + .data = &sysctl_protected_regular, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, + }, + { .procname = "suid_dumpable", .data = &suid_dumpable, .maxlen = sizeof(int), @@ -1965,13 +1994,13 @@ static void warn_sysctl_write(struct ctl_table *table) } /** - * proc_first_pos_non_zero_ignore - check if firs position is allowed + * proc_first_pos_non_zero_ignore - check if first position is allowed * @ppos: file position * @table: the sysctl table * * Returns true if the first position is non-zero and the sysctl_writes_strict * mode indicates this is not allowed for numeric input types. String proc - * hadlers can ignore the return value. + * handlers can ignore the return value. */ static bool proc_first_pos_non_zero_ignore(loff_t *ppos, struct ctl_table *table) diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index f26acef5d7b4..9a65713c8309 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -139,9 +139,10 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) { struct signal_struct *sig = container_of(timer, struct signal_struct, real_timer); + struct pid *leader_pid = sig->pids[PIDTYPE_TGID]; - trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0); - kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid); + trace_itimer_expire(ITIMER_REAL, leader_pid, 0); + kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid); return HRTIMER_NORESTART; } diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 294d7b65af33..ce32cf741b25 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -894,7 +894,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, trace_itimer_expire(signo == SIGPROF ? ITIMER_PROF : ITIMER_VIRTUAL, - tsk->signal->leader_pid, cur_time); + task_tgid(tsk), cur_time); __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index f23cc46ecf3e..4b9127e95430 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -333,8 +333,8 @@ void posixtimer_rearm(struct siginfo *info) int posix_timer_event(struct k_itimer *timr, int si_private) { - struct task_struct *task; - int shared, ret = -1; + enum pid_type type; + int ret = -1; /* * FIXME: if ->sigq is queued we can race with * dequeue_signal()->posixtimer_rearm(). @@ -348,13 +348,8 @@ int posix_timer_event(struct k_itimer *timr, int si_private) */ timr->sigq->info.si_sys_private = si_private; - rcu_read_lock(); - task = pid_task(timr->it_pid, PIDTYPE_PID); - if (task) { - shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID); - ret = send_sigqueue(timr->sigq, task, shared); - } - rcu_read_unlock(); + type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID; + ret = send_sigqueue(timr->sigq, timr->it_pid, type); /* If we failed to send the signal the timer stops. */ return ret > 0; } @@ -433,11 +428,13 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) static struct pid *good_sigevent(sigevent_t * event) { - struct task_struct *rtn = current->group_leader; + struct pid *pid = task_tgid(current); + struct task_struct *rtn; switch (event->sigev_notify) { case SIGEV_SIGNAL | SIGEV_THREAD_ID: - rtn = find_task_by_vpid(event->sigev_notify_thread_id); + pid = find_vpid(event->sigev_notify_thread_id); + rtn = pid_task(pid, PIDTYPE_PID); if (!rtn || !same_thread_group(rtn, current)) return NULL; /* FALLTHRU */ @@ -447,7 +444,7 @@ static struct pid *good_sigevent(sigevent_t * event) return NULL; /* FALLTHRU */ case SIGEV_NONE: - return task_pid(rtn); + return pid; default: return NULL; } diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index c042a455afc6..5e3de28c7677 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -774,6 +774,18 @@ config TRACING_EVENTS_GPIO help Enable tracing events for gpio subsystem +config GCOV_PROFILE_FTRACE + bool "Enable GCOV profiling on ftrace subsystem" + depends on GCOV_KERNEL + help + Enable GCOV profiling on ftrace subsystem for checking + which functions/lines are tested. + + If unsure, say N. + + Note that on a kernel compiled with this config, ftrace will + run significantly slower. + endif # FTRACE endif # TRACING_SUPPORT diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 98d53b39a8ee..f81dadbc7c4a 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -23,6 +23,11 @@ ifdef CONFIG_TRACING_BRANCHES KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING endif +# for GCOV coverage profiling +ifdef CONFIG_GCOV_PROFILE_FTRACE +GCOV_PROFILE := y +endif + CFLAGS_trace_benchmark.o := -I$(src) CFLAGS_trace_events_filter.o := -I$(src) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2a24a59e99c5..2868d85f1fb1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1829,6 +1829,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, mutex_lock(&q->blk_trace_mutex); if (attr == &dev_attr_enable) { + if (!!value == !!q->blk_trace) { + ret = 0; + goto out_unlock_bdev; + } if (value) ret = blk_trace_setup_queue(q, bdev); else diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2bfb2bbeb3d2..c30032367aab 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -513,7 +513,14 @@ static bool within_notrace_func(struct trace_kprobe *tk) if (!addr || !kallsyms_lookup_size_offset(addr, &size, &offset)) return false; - return !ftrace_location_range(addr - offset, addr - offset + size); + /* Get the entry address of the target function */ + addr -= offset; + + /* + * Since ftrace_location_range() does inclusive range check, we need + * to subtract 1 byte from the end address. + */ + return !ftrace_location_range(addr, addr + size - 1); } #else #define within_notrace_func(tk) (false) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 96db841bf0fc..bf2c06ef9afc 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -371,6 +371,27 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); +static void for_each_tracepoint_range(struct tracepoint * const *begin, + struct tracepoint * const *end, + void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + if (!begin) + return; + + if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) { + const int *iter; + + for (iter = (const int *)begin; iter < (const int *)end; iter++) + fct(offset_to_ptr(iter), priv); + } else { + struct tracepoint * const *iter; + + for (iter = begin; iter < end; iter++) + fct(*iter, priv); + } +} + #ifdef CONFIG_MODULES bool trace_module_has_bad_taint(struct module *mod) { @@ -435,15 +456,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier); * Ensure the tracer unregistered the module's probes before the module * teardown is performed. Prevents leaks of probe and data pointers. */ -static void tp_module_going_check_quiescent(struct tracepoint * const *begin, - struct tracepoint * const *end) +static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv) { - struct tracepoint * const *iter; - - if (!begin) - return; - for (iter = begin; iter < end; iter++) - WARN_ON_ONCE((*iter)->funcs); + WARN_ON_ONCE(tp->funcs); } static int tracepoint_module_coming(struct module *mod) @@ -494,8 +509,9 @@ static void tracepoint_module_going(struct module *mod) * Called the going notifier before checking for * quiescence. */ - tp_module_going_check_quiescent(mod->tracepoints_ptrs, - mod->tracepoints_ptrs + mod->num_tracepoints); + for_each_tracepoint_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints, + tp_module_going_check_quiescent, NULL); break; } } @@ -547,19 +563,6 @@ static __init int init_tracepoints(void) __initcall(init_tracepoints); #endif /* CONFIG_MODULES */ -static void for_each_tracepoint_range(struct tracepoint * const *begin, - struct tracepoint * const *end, - void (*fct)(struct tracepoint *tp, void *priv), - void *priv) -{ - struct tracepoint * const *iter; - - if (!begin) - return; - for (iter = begin; iter < end; iter++) - fct(*iter, priv); -} - /** * for_each_kernel_tracepoint - iteration on all kernel tracepoints * @fct: callback diff --git a/kernel/user.c b/kernel/user.c index 36288d840675..0df9b1640b2a 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock); /* root_user.__count is 1, for init task cred */ struct user_struct root_user = { - .__count = ATOMIC_INIT(1), + .__count = REFCOUNT_INIT(1), .processes = ATOMIC_INIT(1), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, @@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent) hlist_for_each_entry(user, hashent, uidhash_node) { if (uid_eq(user->uid, uid)) { - atomic_inc(&user->__count); + refcount_inc(&user->__count); return user; } } @@ -169,11 +169,8 @@ void free_uid(struct user_struct *up) if (!up) return; - local_irq_save(flags); - if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) + if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags)) free_user(up, flags); - else - local_irq_restore(flags); } struct user_struct *alloc_uid(kuid_t uid) @@ -191,7 +188,7 @@ struct user_struct *alloc_uid(kuid_t uid) goto out_unlock; new->uid = uid; - atomic_set(&new->__count, 1); + refcount_set(&new->__count, 1); ratelimit_state_init(&new->ratelimit, HZ, 100); ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index c3d7583fcd21..e5222b5fb4fe 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -859,7 +859,16 @@ static ssize_t map_write(struct file *file, const char __user *buf, unsigned idx; struct uid_gid_extent extent; char *kbuf = NULL, *pos, *next_line; - ssize_t ret = -EINVAL; + ssize_t ret; + + /* Only allow < page size writes at the beginning of the file */ + if ((*ppos != 0) || (count >= PAGE_SIZE)) + return -EINVAL; + + /* Slurp in the user data */ + kbuf = memdup_user_nul(buf, count); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); /* * The userns_state_mutex serializes all writes to any given map. @@ -895,19 +904,6 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) goto out; - /* Only allow < page size writes at the beginning of the file */ - ret = -EINVAL; - if ((*ppos != 0) || (count >= PAGE_SIZE)) - goto out; - - /* Slurp in the user data */ - kbuf = memdup_user_nul(buf, count); - if (IS_ERR(kbuf)) { - ret = PTR_ERR(kbuf); - kbuf = NULL; - goto out; - } - /* Parse the user data */ ret = -EINVAL; pos = kbuf; diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 233cd8fc6910..258033d62cb3 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -18,7 +18,7 @@ #ifdef CONFIG_PROC_SYSCTL -static void *get_uts(struct ctl_table *table, int write) +static void *get_uts(struct ctl_table *table) { char *which = table->data; struct uts_namespace *uts_ns; @@ -26,21 +26,9 @@ static void *get_uts(struct ctl_table *table, int write) uts_ns = current->nsproxy->uts_ns; which = (which - (char *)&init_uts_ns) + (char *)uts_ns; - if (!write) - down_read(&uts_sem); - else - down_write(&uts_sem); return which; } -static void put_uts(struct ctl_table *table, int write, void *which) -{ - if (!write) - up_read(&uts_sem); - else - up_write(&uts_sem); -} - /* * Special case of dostring for the UTS structure. This has locks * to observe. Should this be in kernel/sys.c ???? @@ -50,13 +38,34 @@ static int proc_do_uts_string(struct ctl_table *table, int write, { struct ctl_table uts_table; int r; + char tmp_data[__NEW_UTS_LEN + 1]; + memcpy(&uts_table, table, sizeof(uts_table)); - uts_table.data = get_uts(table, write); + uts_table.data = tmp_data; + + /* + * Buffer the value in tmp_data so that proc_dostring() can be called + * without holding any locks. + * We also need to read the original value in the write==1 case to + * support partial writes. + */ + down_read(&uts_sem); + memcpy(tmp_data, get_uts(table), sizeof(tmp_data)); + up_read(&uts_sem); r = proc_dostring(&uts_table, write, buffer, lenp, ppos); - put_uts(table, write, uts_table.data); - if (write) + if (write) { + /* + * Write back the new value. + * Note that, since we dropped uts_sem, the result can + * theoretically be incorrect if there are two parallel writes + * at non-zero offsets to the same sysctl. + */ + down_write(&uts_sem); + memcpy(get_uts(table), tmp_data, sizeof(tmp_data)); + up_write(&uts_sem); proc_sys_poll_notify(table->poll); + } return r; } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 78b192071ef7..60e80198c3df 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2652,6 +2652,9 @@ void flush_workqueue(struct workqueue_struct *wq) if (WARN_ON(!wq_online)) return; + lock_map_acquire(&wq->lockdep_map); + lock_map_release(&wq->lockdep_map); + mutex_lock(&wq->mutex); /* @@ -2843,7 +2846,8 @@ reflush: } EXPORT_SYMBOL_GPL(drain_workqueue); -static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) +static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, + bool from_cancel) { struct worker *worker = NULL; struct worker_pool *pool; @@ -2885,7 +2889,8 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) * workqueues the deadlock happens when the rescuer stalls, blocking * forward progress. */ - if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) { + if (!from_cancel && + (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) { lock_map_acquire(&pwq->wq->lockdep_map); lock_map_release(&pwq->wq->lockdep_map); } @@ -2896,6 +2901,27 @@ already_gone: return false; } +static bool __flush_work(struct work_struct *work, bool from_cancel) +{ + struct wq_barrier barr; + + if (WARN_ON(!wq_online)) + return false; + + if (!from_cancel) { + lock_map_acquire(&work->lockdep_map); + lock_map_release(&work->lockdep_map); + } + + if (start_flush_work(work, &barr, from_cancel)) { + wait_for_completion(&barr.done); + destroy_work_on_stack(&barr.work); + return true; + } else { + return false; + } +} + /** * flush_work - wait for a work to finish executing the last queueing instance * @work: the work to flush @@ -2909,18 +2935,7 @@ already_gone: */ bool flush_work(struct work_struct *work) { - struct wq_barrier barr; - - if (WARN_ON(!wq_online)) - return false; - - if (start_flush_work(work, &barr)) { - wait_for_completion(&barr.done); - destroy_work_on_stack(&barr.work); - return true; - } else { - return false; - } + return __flush_work(work, false); } EXPORT_SYMBOL_GPL(flush_work); @@ -2986,7 +3001,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) * isn't executing. */ if (wq_online) - flush_work(work); + __flush_work(work, true); clear_work_data(work); |