summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c13
-rw-r--r--kernel/events/core.c10
-rw-r--r--kernel/exit.c19
-rw-r--r--kernel/pid_namespace.c20
-rw-r--r--kernel/printk.c241
-rw-r--r--kernel/sys.c6
6 files changed, 261 insertions, 48 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 72fcd3069a90..2097684cf194 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -255,12 +255,17 @@ int cgroup_lock_is_held(void)
EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
+static int css_unbias_refcnt(int refcnt)
+{
+ return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
+}
+
/* the current nr of refs, always >= 0 whether @css is deactivated or not */
static int css_refcnt(struct cgroup_subsys_state *css)
{
int v = atomic_read(&css->refcnt);
- return v >= 0 ? v : v - CSS_DEACT_BIAS;
+ return css_unbias_refcnt(v);
}
/* convenient tests for these bits */
@@ -4982,10 +4987,12 @@ EXPORT_SYMBOL_GPL(__css_tryget);
void __css_put(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
+ int v;
rcu_read_lock();
- atomic_dec(&css->refcnt);
- switch (css_refcnt(css)) {
+ v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
+
+ switch (v) {
case 1:
if (notify_on_release(cgrp)) {
set_bit(CGRP_RELEASABLE, &cgrp->flags);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f85c0154b333..d7d71d6ec972 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -253,9 +253,9 @@ perf_cgroup_match(struct perf_event *event)
return !event->cgrp || event->cgrp == cpuctx->cgrp;
}
-static inline void perf_get_cgroup(struct perf_event *event)
+static inline bool perf_tryget_cgroup(struct perf_event *event)
{
- css_get(&event->cgrp->css);
+ return css_tryget(&event->cgrp->css);
}
static inline void perf_put_cgroup(struct perf_event *event)
@@ -484,7 +484,11 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
event->cgrp = cgrp;
/* must be done before we fput() the file */
- perf_get_cgroup(event);
+ if (!perf_tryget_cgroup(event)) {
+ event->cgrp = NULL;
+ ret = -ENOENT;
+ goto out;
+ }
/*
* all events in a group must monitor
diff --git a/kernel/exit.c b/kernel/exit.c
index 34867cc5b42a..2f59cc334516 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,6 +72,18 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
list_del_rcu(&p->tasks);
list_del_init(&p->sibling);
__this_cpu_dec(process_counts);
+ /*
+ * If we are the last child process in a pid namespace to be
+ * reaped, notify the reaper sleeping zap_pid_ns_processes().
+ */
+ if (IS_ENABLED(CONFIG_PID_NS)) {
+ struct task_struct *parent = p->real_parent;
+
+ if ((task_active_pid_ns(parent)->child_reaper == parent) &&
+ list_empty(&parent->children) &&
+ (parent->flags & PF_EXITING))
+ wake_up_process(parent);
+ }
}
list_del_rcu(&p->thread_group);
}
@@ -643,6 +655,7 @@ static void exit_mm(struct task_struct * tsk)
mm_release(tsk, mm);
if (!mm)
return;
+ sync_mm_rss(mm);
/*
* Serialize with any possible pending coredump.
* We must hold mmap_sem around checking core_state
@@ -719,12 +732,6 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
zap_pid_ns_processes(pid_ns);
write_lock_irq(&tasklist_lock);
- /*
- * We can not clear ->child_reaper or leave it alone.
- * There may by stealth EXIT_DEAD tasks on ->children,
- * forget_original_parent() must move them somewhere.
- */
- pid_ns->child_reaper = init_pid_ns.child_reaper;
} else if (father->signal->has_child_subreaper) {
struct task_struct *reaper;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 16b20e38c4a1..b3c7fd554250 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -184,11 +184,31 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
}
read_unlock(&tasklist_lock);
+ /* Firstly reap the EXIT_ZOMBIE children we may have. */
do {
clear_thread_flag(TIF_SIGPENDING);
rc = sys_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
+ /*
+ * sys_wait4() above can't reap the TASK_DEAD children.
+ * Make sure they all go away, see __unhash_process().
+ */
+ for (;;) {
+ bool need_wait = false;
+
+ read_lock(&tasklist_lock);
+ if (!list_empty(&current->children)) {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ need_wait = true;
+ }
+ read_unlock(&tasklist_lock);
+
+ if (!need_wait)
+ break;
+ schedule();
+ }
+
if (pid_ns->reboot)
current->signal->group_exit_code = pid_ns->reboot;
diff --git a/kernel/printk.c b/kernel/printk.c
index 32462d2b364a..a2276b916769 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -227,10 +227,10 @@ static u32 clear_idx;
#define LOG_LINE_MAX 1024
/* record buffer */
-#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
#define LOG_ALIGN 4
#else
-#define LOG_ALIGN 8
+#define LOG_ALIGN __alignof__(struct log)
#endif
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -414,7 +414,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
if (!user)
return -EBADF;
- mutex_lock(&user->lock);
+ ret = mutex_lock_interruptible(&user->lock);
+ if (ret)
+ return ret;
raw_spin_lock(&logbuf_lock);
while (user->seq == log_next_seq) {
if (file->f_flags & O_NONBLOCK) {
@@ -878,7 +880,9 @@ static int syslog_print(char __user *buf, int size)
syslog_seq++;
raw_spin_unlock_irq(&logbuf_lock);
- if (len > 0 && copy_to_user(buf, text, len))
+ if (len > size)
+ len = -EINVAL;
+ else if (len > 0 && copy_to_user(buf, text, len))
len = -EFAULT;
kfree(text);
@@ -909,7 +913,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
/*
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
- */
+ */
seq = clear_seq;
idx = clear_idx;
while (seq < log_next_seq) {
@@ -919,6 +923,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
idx = log_next(idx);
seq++;
}
+
+ /* move first record forward until length fits into the buffer */
seq = clear_seq;
idx = clear_idx;
while (len > size && seq < log_next_seq) {
@@ -929,7 +935,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
seq++;
}
- /* last message in this dump */
+ /* last message fitting into this dump */
next_seq = log_next_seq;
len = 0;
@@ -974,6 +980,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
{
bool clear = false;
static int saved_console_loglevel = -1;
+ static DEFINE_MUTEX(syslog_mutex);
int error;
error = check_syslog_permissions(type, from_file);
@@ -1000,11 +1007,17 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
error = -EFAULT;
goto out;
}
+ error = mutex_lock_interruptible(&syslog_mutex);
+ if (error)
+ goto out;
error = wait_event_interruptible(log_wait,
syslog_seq != log_next_seq);
- if (error)
+ if (error) {
+ mutex_unlock(&syslog_mutex);
goto out;
+ }
error = syslog_print(buf, len);
+ mutex_unlock(&syslog_mutex);
break;
/* Read/clear last kernel messages */
case SYSLOG_ACTION_READ_CLEAR:
@@ -2300,48 +2313,210 @@ module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
* kmsg_dump - dump kernel log to kernel message dumpers.
* @reason: the reason (oops, panic etc) for dumping
*
- * Iterate through each of the dump devices and call the oops/panic
- * callbacks with the log buffer.
+ * Call each of the registered dumper's dump() callback, which can
+ * retrieve the kmsg records with kmsg_dump_get_line() or
+ * kmsg_dump_get_buffer().
*/
void kmsg_dump(enum kmsg_dump_reason reason)
{
- u64 idx;
struct kmsg_dumper *dumper;
- const char *s1, *s2;
- unsigned long l1, l2;
unsigned long flags;
if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
return;
- /* Theoretically, the log could move on after we do this, but
- there's not a lot we can do about that. The new messages
- will overwrite the start of what we dump. */
+ rcu_read_lock();
+ list_for_each_entry_rcu(dumper, &dump_list, list) {
+ if (dumper->max_reason && reason > dumper->max_reason)
+ continue;
+
+ /* initialize iterator with data about the stored records */
+ dumper->active = true;
+
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
+ dumper->cur_seq = clear_seq;
+ dumper->cur_idx = clear_idx;
+ dumper->next_seq = log_next_seq;
+ dumper->next_idx = log_next_idx;
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+
+ /* invoke dumper which will iterate over records */
+ dumper->dump(dumper, reason);
+
+ /* reset iterator */
+ dumper->active = false;
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * kmsg_dump_get_line - retrieve one kmsg log line
+ * @dumper: registered kmsg dumper
+ * @syslog: include the "<4>" prefixes
+ * @line: buffer to copy the line to
+ * @size: maximum size of the buffer
+ * @len: length of line placed into buffer
+ *
+ * Start at the beginning of the kmsg buffer, with the oldest kmsg
+ * record, and copy one record into the provided buffer.
+ *
+ * Consecutive calls will return the next available record moving
+ * towards the end of the buffer with the youngest messages.
+ *
+ * A return value of FALSE indicates that there are no more records to
+ * read.
+ */
+bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+ char *line, size_t size, size_t *len)
+{
+ unsigned long flags;
+ struct log *msg;
+ size_t l = 0;
+ bool ret = false;
+
+ if (!dumper->active)
+ goto out;
raw_spin_lock_irqsave(&logbuf_lock, flags);
- if (syslog_seq < log_first_seq)
- idx = syslog_idx;
- else
- idx = log_first_idx;
+ if (dumper->cur_seq < log_first_seq) {
+ /* messages are gone, move to first available one */
+ dumper->cur_seq = log_first_seq;
+ dumper->cur_idx = log_first_idx;
+ }
+
+ /* last entry */
+ if (dumper->cur_seq >= log_next_seq) {
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ goto out;
+ }
- if (idx > log_next_idx) {
- s1 = log_buf;
- l1 = log_next_idx;
+ msg = log_from_idx(dumper->cur_idx);
+ l = msg_print_text(msg, syslog,
+ line, size);
- s2 = log_buf + idx;
- l2 = log_buf_len - idx;
- } else {
- s1 = "";
- l1 = 0;
+ dumper->cur_idx = log_next(dumper->cur_idx);
+ dumper->cur_seq++;
+ ret = true;
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+out:
+ if (len)
+ *len = l;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
+
+/**
+ * kmsg_dump_get_buffer - copy kmsg log lines
+ * @dumper: registered kmsg dumper
+ * @syslog: include the "<4>" prefixes
+ * @line: buffer to copy the line to
+ * @size: maximum size of the buffer
+ * @len: length of line placed into buffer
+ *
+ * Start at the end of the kmsg buffer and fill the provided buffer
+ * with as many of the the *youngest* kmsg records that fit into it.
+ * If the buffer is large enough, all available kmsg records will be
+ * copied with a single call.
+ *
+ * Consecutive calls will fill the buffer with the next block of
+ * available older records, not including the earlier retrieved ones.
+ *
+ * A return value of FALSE indicates that there are no more records to
+ * read.
+ */
+bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+ char *buf, size_t size, size_t *len)
+{
+ unsigned long flags;
+ u64 seq;
+ u32 idx;
+ u64 next_seq;
+ u32 next_idx;
+ size_t l = 0;
+ bool ret = false;
+
+ if (!dumper->active)
+ goto out;
+
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
+ if (dumper->cur_seq < log_first_seq) {
+ /* messages are gone, move to first available one */
+ dumper->cur_seq = log_first_seq;
+ dumper->cur_idx = log_first_idx;
+ }
+
+ /* last entry */
+ if (dumper->cur_seq >= dumper->next_seq) {
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ goto out;
+ }
+
+ /* calculate length of entire buffer */
+ seq = dumper->cur_seq;
+ idx = dumper->cur_idx;
+ while (seq < dumper->next_seq) {
+ struct log *msg = log_from_idx(idx);
+
+ l += msg_print_text(msg, true, NULL, 0);
+ idx = log_next(idx);
+ seq++;
+ }
- s2 = log_buf + idx;
- l2 = log_next_idx - idx;
+ /* move first record forward until length fits into the buffer */
+ seq = dumper->cur_seq;
+ idx = dumper->cur_idx;
+ while (l > size && seq < dumper->next_seq) {
+ struct log *msg = log_from_idx(idx);
+
+ l -= msg_print_text(msg, true, NULL, 0);
+ idx = log_next(idx);
+ seq++;
+ }
+
+ /* last message in next interation */
+ next_seq = seq;
+ next_idx = idx;
+
+ l = 0;
+ while (seq < dumper->next_seq) {
+ struct log *msg = log_from_idx(idx);
+
+ l += msg_print_text(msg, syslog,
+ buf + l, size - l);
+
+ idx = log_next(idx);
+ seq++;
}
+
+ dumper->next_seq = next_seq;
+ dumper->next_idx = next_idx;
+ ret = true;
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+out:
+ if (len)
+ *len = l;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
- rcu_read_lock();
- list_for_each_entry_rcu(dumper, &dump_list, list)
- dumper->dump(dumper, reason, s1, l1, s2, l2);
- rcu_read_unlock();
+/**
+ * kmsg_dump_rewind - reset the interator
+ * @dumper: registered kmsg dumper
+ *
+ * Reset the dumper's iterator so that kmsg_dump_get_line() and
+ * kmsg_dump_get_buffer() can be called again and used multiple
+ * times within the same dumper.dump() callback.
+ */
+void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
+ dumper->cur_seq = clear_seq;
+ dumper->cur_idx = clear_idx;
+ dumper->next_seq = log_next_seq;
+ dumper->next_idx = log_next_idx;
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
}
+EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
#endif
diff --git a/kernel/sys.c b/kernel/sys.c
index f0ec44dcd415..e0c8ffc50d7f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2127,9 +2127,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
return -EINVAL;
break;
- case PR_GET_TID_ADDRESS:
- error = prctl_get_tid_address(me, (int __user **)arg2);
- break;
default:
return -EINVAL;
}
@@ -2147,6 +2144,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_MM:
error = prctl_set_mm(arg2, arg3, arg4, arg5);
break;
+ case PR_GET_TID_ADDRESS:
+ error = prctl_get_tid_address(me, (int __user **)arg2);
+ break;
case PR_SET_CHILD_SUBREAPER:
me->signal->is_child_subreaper = !!arg2;
error = 0;
OpenPOWER on IntegriCloud