diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/async.c | 3 | ||||
-rw-r--r-- | kernel/audit.c | 40 | ||||
-rw-r--r-- | kernel/audit_tree.c | 26 | ||||
-rw-r--r-- | kernel/audit_watch.c | 2 | ||||
-rw-r--r-- | kernel/auditfilter.c | 1 | ||||
-rw-r--r-- | kernel/auditsc.c | 20 | ||||
-rw-r--r-- | kernel/fork.c | 8 | ||||
-rw-r--r-- | kernel/module.c | 27 | ||||
-rw-r--r-- | kernel/pid.c | 15 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 4 | ||||
-rw-r--r-- | kernel/printk.c | 5 | ||||
-rw-r--r-- | kernel/ptrace.c | 2 | ||||
-rw-r--r-- | kernel/rwsem.c | 10 | ||||
-rw-r--r-- | kernel/signal.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace.c | 17 |
15 files changed, 143 insertions, 46 deletions
diff --git a/kernel/async.c b/kernel/async.c index 9d3118384858..a1d585c351d6 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -196,6 +196,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); + /* mark that this task has queued an async job, used by module init */ + current->flags |= PF_USED_ASYNC; + /* schedule for execution */ queue_work(system_unbound_wq, &entry->work); diff --git a/kernel/audit.c b/kernel/audit.c index 40414e9143db..d596e5355f15 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -272,6 +272,8 @@ static int audit_log_config_change(char *function_name, int new, int old, int rc = 0; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); + if (unlikely(!ab)) + return rc; audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, old, from_kuid(&init_user_ns, loginuid), sessionid); if (sid) { @@ -619,6 +621,8 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, } *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); + if (unlikely(!*ab)) + return rc; audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", task_tgid_vnr(current), from_kuid(&init_user_ns, current_uid()), @@ -1097,6 +1101,23 @@ static inline void audit_get_stamp(struct audit_context *ctx, } } +/* + * Wait for auditd to drain the queue a little + */ +static void wait_for_auditd(unsigned long sleep_time) +{ + DECLARE_WAITQUEUE(wait, current); + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&audit_backlog_wait, &wait); + + if (audit_backlog_limit && + skb_queue_len(&audit_skb_queue) > audit_backlog_limit) + schedule_timeout(sleep_time); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&audit_backlog_wait, &wait); +} + /* Obtain an audit buffer. This routine does locking to obtain the * audit buffer, but then no locking is required for calls to * audit_log_*format. If the tsk is a task that is currently in a @@ -1142,20 +1163,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, while (audit_backlog_limit && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { - if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time - && time_before(jiffies, timeout_start + audit_backlog_wait_time)) { + if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) { + unsigned long sleep_time; - /* Wait for auditd to drain the queue a little */ - DECLARE_WAITQUEUE(wait, current); - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&audit_backlog_wait, &wait); - - if (audit_backlog_limit && - skb_queue_len(&audit_skb_queue) > audit_backlog_limit) - schedule_timeout(timeout_start + audit_backlog_wait_time - jiffies); - - __set_current_state(TASK_RUNNING); - remove_wait_queue(&audit_backlog_wait, &wait); + sleep_time = timeout_start + audit_backlog_wait_time - + jiffies; + if ((long)sleep_time > 0) + wait_for_auditd(sleep_time); continue; } if (audit_rate_check() && printk_ratelimit()) diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index e81175ef25f8..642a89c4f3d6 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -449,11 +449,26 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) return 0; } +static void audit_log_remove_rule(struct audit_krule *rule) +{ + struct audit_buffer *ab; + + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); + if (unlikely(!ab)) + return; + audit_log_format(ab, "op="); + audit_log_string(ab, "remove rule"); + audit_log_format(ab, " dir="); + audit_log_untrustedstring(ab, rule->tree->pathname); + audit_log_key(ab, rule->filterkey); + audit_log_format(ab, " list=%d res=1", rule->listnr); + audit_log_end(ab); +} + static void kill_rules(struct audit_tree *tree) { struct audit_krule *rule, *next; struct audit_entry *entry; - struct audit_buffer *ab; list_for_each_entry_safe(rule, next, &tree->rules, rlist) { entry = container_of(rule, struct audit_entry, rule); @@ -461,14 +476,7 @@ static void kill_rules(struct audit_tree *tree) list_del_init(&rule->rlist); if (rule->tree) { /* not a half-baked one */ - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); - audit_log_format(ab, "op="); - audit_log_string(ab, "remove rule"); - audit_log_format(ab, " dir="); - audit_log_untrustedstring(ab, rule->tree->pathname); - audit_log_key(ab, rule->filterkey); - audit_log_format(ab, " list=%d res=1", rule->listnr); - audit_log_end(ab); + audit_log_remove_rule(rule); rule->tree = NULL; list_del_rcu(&entry->list); list_del(&entry->rule.list); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 4a599f699adc..22831c4d369c 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -240,6 +240,8 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc if (audit_enabled) { struct audit_buffer *ab; ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); + if (unlikely(!ab)) + return; audit_log_format(ab, "auid=%u ses=%u op=", from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 7f19f23d38a3..f9fc54bbe06f 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1144,7 +1144,6 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid, * audit_receive_filter - apply all rules to the specified message type * @type: audit message type * @pid: target pid for netlink audit messages - * @uid: target uid for netlink audit messages * @seq: netlink audit message sequence (serial) number * @data: payload data * @datasz: size of payload data diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e37e6a12c5e3..a371f857a0a9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1464,14 +1464,14 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_end(ab); ab = audit_log_start(context, GFP_KERNEL, AUDIT_IPC_SET_PERM); + if (unlikely(!ab)) + return; audit_log_format(ab, "qbytes=%lx ouid=%u ogid=%u mode=%#ho", context->ipc.qbytes, context->ipc.perm_uid, context->ipc.perm_gid, context->ipc.perm_mode); - if (!ab) - return; } break; } case AUDIT_MQ_OPEN: { @@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags) context->type = AUDIT_MMAP; } -static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +static void audit_log_task(struct audit_buffer *ab) { kuid_t auid, uid; kgid_t gid; @@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) audit_log_task_context(ab); audit_log_format(ab, " pid=%d comm=", current->pid); audit_log_untrustedstring(ab, current->comm); +} + +static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +{ + audit_log_task(ab); audit_log_format(ab, " reason="); audit_log_string(ab, reason); audit_log_format(ab, " sig=%ld", signr); @@ -2715,6 +2720,8 @@ void audit_core_dumps(long signr) return; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); + if (unlikely(!ab)) + return; audit_log_abend(ab, "memory violation", signr); audit_log_end(ab); } @@ -2723,8 +2730,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code) { struct audit_buffer *ab; - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); - audit_log_abend(ab, "seccomp", signr); + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP); + if (unlikely(!ab)) + return; + audit_log_task(ab); + audit_log_format(ab, " sig=%ld", signr); audit_log_format(ab, " syscall=%ld", syscall); audit_log_format(ab, " compat=%d", is_compat_task()); audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current)); diff --git a/kernel/fork.c b/kernel/fork.c index e05cff2429b5..c535f33bbb9c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1166,6 +1166,14 @@ static struct task_struct *copy_process(unsigned long clone_flags, current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); + /* + * If the new process will be in a different pid namespace + * don't allow the creation of threads. + */ + if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && + (task_active_pid_ns(current) != current->nsproxy->pid_ns)) + return ERR_PTR(-EINVAL); + retval = security_task_create(clone_flags); if (retval) goto fork_out; diff --git a/kernel/module.c b/kernel/module.c index 250092c1d57d..b10b048367e1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3013,6 +3013,12 @@ static int do_init_module(struct module *mod) { int ret = 0; + /* + * We want to find out whether @mod uses async during init. Clear + * PF_USED_ASYNC. async_schedule*() will set it. + */ + current->flags &= ~PF_USED_ASYNC; + blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); @@ -3058,8 +3064,25 @@ static int do_init_module(struct module *mod) blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_LIVE, mod); - /* We need to finish all async code before the module init sequence is done */ - async_synchronize_full(); + /* + * We need to finish all async code before the module init sequence + * is done. This has potential to deadlock. For example, a newly + * detected block device can trigger request_module() of the + * default iosched from async probing task. Once userland helper + * reaches here, async_synchronize_full() will wait on the async + * task waiting on request_module() and deadlock. + * + * This deadlock is avoided by perfomring async_synchronize_full() + * iff module init queued any async jobs. This isn't a full + * solution as it will deadlock the same if module loading from + * async jobs nests more than once; however, due to the various + * constraints, this hack seems to be the best option for now. + * Please refer to the following thread for details. + * + * http://thread.gmane.org/gmane.linux.kernel/1420814 + */ + if (current->flags & PF_USED_ASYNC) + async_synchronize_full(); mutex_lock(&module_mutex); /* Drop initial reference. */ diff --git a/kernel/pid.c b/kernel/pid.c index 36aa02ff17d6..de9af600006f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -270,7 +270,6 @@ void free_pid(struct pid *pid) wake_up_process(ns->child_reaper); break; case 0: - ns->nr_hashed = -1; schedule_work(&ns->proc_work); break; } @@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) upid = pid->numbers + ns->level; spin_lock_irq(&pidmap_lock); - if (ns->nr_hashed < 0) + if (!(ns->nr_hashed & PIDNS_HASH_ADDING)) goto out_unlock; for ( ; upid >= pid->numbers; --upid) { hlist_add_head_rcu(&upid->pid_chain, @@ -342,6 +341,13 @@ out_free: goto out; } +void disable_pid_allocation(struct pid_namespace *ns) +{ + spin_lock_irq(&pidmap_lock); + ns->nr_hashed &= ~PIDNS_HASH_ADDING; + spin_unlock_irq(&pidmap_lock); +} + struct pid *find_pid_ns(int nr, struct pid_namespace *ns) { struct hlist_node *elem; @@ -573,6 +579,9 @@ void __init pidhash_init(void) void __init pidmap_init(void) { + /* Veryify no one has done anything silly */ + BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING); + /* bump default and minimum pid_max based on number of cpus */ pid_max = min(pid_max_max, max_t(int, pid_max, PIDS_PER_CPU_DEFAULT * num_possible_cpus())); @@ -584,7 +593,7 @@ void __init pidmap_init(void) /* Reserve PID 0. We never call free_pidmap(0) */ set_bit(0, init_pid_ns.pidmap[0].page); atomic_dec(&init_pid_ns.pidmap[0].nr_free); - init_pid_ns.nr_hashed = 1; + init_pid_ns.nr_hashed = PIDNS_HASH_ADDING; init_pid_ns.pid_cachep = KMEM_CACHE(pid, SLAB_HWCACHE_ALIGN | SLAB_PANIC); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index fdbd0cdf271a..c1c3dc1c6023 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); + ns->nr_hashed = PIDNS_HASH_ADDING; INIT_WORK(&ns->proc_work, proc_cleanup_work); set_bit(0, ns->pidmap[0].page); @@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) int rc; struct task_struct *task, *me = current; + /* Don't allow any more processes into the pid namespace */ + disable_pid_allocation(pid_ns); + /* Ignore SIGCHLD causing any terminated children to autoreap */ spin_lock_irq(&me->sighand->siglock); me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; diff --git a/kernel/printk.c b/kernel/printk.c index 19c0d7bcf24a..357f714ddd49 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -870,10 +870,11 @@ static size_t print_time(u64 ts, char *buf) if (!printk_time) return 0; + rem_nsec = do_div(ts, 1000000000); + if (!buf) - return 15; + return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts); - rem_nsec = do_div(ts, 1000000000); return sprintf(buf, "[%5lu.%06lu] ", (unsigned long)ts, rem_nsec / 1000); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1599157336a6..612a56126851 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -139,7 +139,7 @@ void __ptrace_unlink(struct task_struct *child) * RETURNS: * 0 on success, -ESRCH if %child is not ready. */ -int ptrace_check_attach(struct task_struct *child, bool ignore_state) +static int ptrace_check_attach(struct task_struct *child, bool ignore_state) { int ret = -ESRCH; diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 6850f53e02d8..b3c6c3fcd847 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -116,6 +116,16 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) EXPORT_SYMBOL(down_read_nested); +void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) +{ + might_sleep(); + rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); + + LOCK_CONTENDED(sem, __down_write_trylock, __down_write); +} + +EXPORT_SYMBOL(_down_write_nest_lock); + void down_write_nested(struct rw_semaphore *sem, int subclass) { might_sleep(); diff --git a/kernel/signal.c b/kernel/signal.c index 00b4a6d4449d..53cd5c4d1172 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2528,11 +2528,8 @@ static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) */ void set_current_blocked(sigset_t *newset) { - struct task_struct *tsk = current; sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP)); - spin_lock_irq(&tsk->sighand->siglock); - __set_task_blocked(tsk, newset); - spin_unlock_irq(&tsk->sighand->siglock); + __set_current_blocked(newset); } void __set_current_blocked(const sigset_t *newset) @@ -3205,7 +3202,6 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, if (nset) { if (copy_from_user(&new_set, nset, sizeof(*nset))) return -EFAULT; - new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); new_blocked = current->blocked; @@ -3223,7 +3219,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, return -EINVAL; } - __set_current_blocked(&new_blocked); + set_current_blocked(&new_blocked); } if (oset) { @@ -3287,6 +3283,7 @@ SYSCALL_DEFINE1(ssetmask, int, newmask) int old = current->blocked.sig[0]; sigset_t newset; + siginitset(&newset, newmask); set_current_blocked(&newset); return old; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e5125677efa0..3c13e46d7d24 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2899,6 +2899,8 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; + buf[cnt] = 0; + trace_set_options(buf); *ppos += cnt; @@ -3452,7 +3454,7 @@ static int tracing_wait_pipe(struct file *filp) return -EINTR; /* - * We block until we read something and tracing is enabled. + * We block until we read something and tracing is disabled. * We still block if tracing is disabled, but we have never * read anything. This allows a user to cat this file, and * then enable tracing. But after we have read something, @@ -3460,7 +3462,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (tracing_is_enabled() && iter->pos) + if (!tracing_is_enabled() && iter->pos) break; } @@ -4815,10 +4817,17 @@ rb_simple_write(struct file *filp, const char __user *ubuf, return ret; if (buffer) { - if (val) + mutex_lock(&trace_types_lock); + if (val) { ring_buffer_record_on(buffer); - else + if (current_trace->start) + current_trace->start(tr); + } else { ring_buffer_record_off(buffer); + if (current_trace->stop) + current_trace->stop(tr); + } + mutex_unlock(&trace_types_lock); } (*ppos)++; |