summaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig22
-rw-r--r--kernel/trace/blktrace.c20
-rw-r--r--kernel/trace/ftrace.c144
-rw-r--r--kernel/trace/power-traces.c5
-rw-r--r--kernel/trace/ring_buffer.c368
-rw-r--r--kernel/trace/trace.c40
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_event_perf.c62
-rw-r--r--kernel/trace/trace_events.c67
-rw-r--r--kernel/trace/trace_export.c14
-rw-r--r--kernel/trace/trace_functions_graph.c209
-rw-r--r--kernel/trace/trace_irqsoff.c152
-rw-r--r--kernel/trace/trace_kdb.c1
-rw-r--r--kernel/trace/trace_kprobe.c46
-rw-r--r--kernel/trace/trace_sched_wakeup.c256
-rw-r--r--kernel/trace/trace_selftest.c2
-rw-r--r--kernel/trace/trace_stack.c1
-rw-r--r--kernel/trace/trace_workqueue.c10
18 files changed, 967 insertions, 456 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 538501c6ea50..14674dce77a6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS
help
See Documentation/trace/ftrace-design.txt
+config HAVE_C_RECORDMCOUNT
+ bool
+ help
+ C version of recordmcount available?
+
config TRACER_MAX_TRACE
bool
@@ -64,6 +69,21 @@ config EVENT_TRACING
select CONTEXT_SWITCH_TRACER
bool
+config EVENT_POWER_TRACING_DEPRECATED
+ depends on EVENT_TRACING
+ bool "Deprecated power event trace API, to be removed"
+ default y
+ help
+ Provides old power event types:
+ C-state/idle accounting events:
+ power:power_start
+ power:power_end
+ and old cpufreq accounting event:
+ power:power_frequency
+ This is for userspace compatibility
+ and will vanish after 5 kernel iterations,
+ namely 2.6.41.
+
config CONTEXT_SWITCH_TRACER
bool
@@ -121,7 +141,7 @@ if FTRACE
config FUNCTION_TRACER
bool "Kernel Function Tracer"
depends on HAVE_FUNCTION_TRACER
- select FRAME_POINTER
+ select FRAME_POINTER if !ARM_UNWIND && !S390
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 959f8d6c8cc1..7b8ec0281548 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,7 +23,6 @@
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
-#include <linux/smp_lock.h>
#include <linux/time.h>
#include <linux/uaccess.h>
@@ -169,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
BLK_TC_ACT(BLK_TC_WRITE) };
-#define BLK_TC_HARDBARRIER BLK_TC_BARRIER
#define BLK_TC_RAHEAD BLK_TC_AHEAD
/* The ilog2() calls fall out because they're constant */
@@ -197,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return;
what |= ddir_act[rw & WRITE];
- what |= MASK_TC_BIT(rw, HARDBARRIER);
what |= MASK_TC_BIT(rw, SYNC);
what |= MASK_TC_BIT(rw, RAHEAD);
what |= MASK_TC_BIT(rw, META);
@@ -326,6 +323,7 @@ static const struct file_operations blk_dropped_fops = {
.owner = THIS_MODULE,
.open = blk_dropped_open,
.read = blk_dropped_read,
+ .llseek = default_llseek,
};
static int blk_msg_open(struct inode *inode, struct file *filp)
@@ -365,6 +363,7 @@ static const struct file_operations blk_msg_fops = {
.owner = THIS_MODULE,
.open = blk_msg_open,
.write = blk_msg_write,
+ .llseek = noop_llseek,
};
/*
@@ -639,7 +638,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
if (!q)
return -ENXIO;
- lock_kernel();
mutex_lock(&bdev->bd_mutex);
switch (cmd) {
@@ -667,7 +665,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
}
mutex_unlock(&bdev->bd_mutex);
- unlock_kernel();
return ret;
}
@@ -1652,10 +1649,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct block_device *bdev;
ssize_t ret = -ENXIO;
- lock_kernel();
bdev = bdget(part_devt(p));
if (bdev == NULL)
- goto out_unlock_kernel;
+ goto out;
q = blk_trace_get_queue(bdev);
if (q == NULL)
@@ -1683,8 +1679,7 @@ out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
-out_unlock_kernel:
- unlock_kernel();
+out:
return ret;
}
@@ -1714,11 +1709,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
ret = -ENXIO;
- lock_kernel();
p = dev_to_part(dev);
bdev = bdget(part_devt(p));
if (bdev == NULL)
- goto out_unlock_kernel;
+ goto out;
q = blk_trace_get_queue(bdev);
if (q == NULL)
@@ -1753,8 +1747,6 @@ out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
-out_unlock_kernel:
- unlock_kernel();
out:
return ret ? ret : count;
}
@@ -1813,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
if (rw & REQ_RAHEAD)
rwbs[i++] = 'A';
- if (rw & REQ_HARDBARRIER)
- rwbs[i++] = 'B';
if (rw & REQ_SYNC)
rwbs[i++] = 'S';
if (rw & REQ_META)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0d88ce9b9fb8..f3dadae83883 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -381,12 +381,19 @@ static int function_stat_show(struct seq_file *m, void *v)
{
struct ftrace_profile *rec = v;
char str[KSYM_SYMBOL_LEN];
+ int ret = 0;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- static DEFINE_MUTEX(mutex);
static struct trace_seq s;
unsigned long long avg;
unsigned long long stddev;
#endif
+ mutex_lock(&ftrace_profile_lock);
+
+ /* we raced with function_profile_reset() */
+ if (unlikely(rec->counter == 0)) {
+ ret = -EBUSY;
+ goto out;
+ }
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, " %-30.30s %10lu", str, rec->counter);
@@ -408,7 +415,6 @@ static int function_stat_show(struct seq_file *m, void *v)
do_div(stddev, (rec->counter - 1) * 1000);
}
- mutex_lock(&mutex);
trace_seq_init(&s);
trace_print_graph_duration(rec->time, &s);
trace_seq_puts(&s, " ");
@@ -416,11 +422,12 @@ static int function_stat_show(struct seq_file *m, void *v)
trace_seq_puts(&s, " ");
trace_print_graph_duration(stddev, &s);
trace_print_seq(m, &s);
- mutex_unlock(&mutex);
#endif
seq_putc(m, '\n');
+out:
+ mutex_unlock(&ftrace_profile_lock);
- return 0;
+ return ret;
}
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
@@ -793,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = {
.open = tracing_open_generic,
.read = ftrace_profile_read,
.write = ftrace_profile_write,
+ .llseek = default_llseek,
};
/* used to initialize the real stat files */
@@ -877,10 +885,8 @@ enum {
FTRACE_ENABLE_CALLS = (1 << 0),
FTRACE_DISABLE_CALLS = (1 << 1),
FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
- FTRACE_ENABLE_MCOUNT = (1 << 3),
- FTRACE_DISABLE_MCOUNT = (1 << 4),
- FTRACE_START_FUNC_RET = (1 << 5),
- FTRACE_STOP_FUNC_RET = (1 << 6),
+ FTRACE_START_FUNC_RET = (1 << 3),
+ FTRACE_STOP_FUNC_RET = (1 << 4),
};
static int ftrace_filtered;
@@ -1219,8 +1225,6 @@ static void ftrace_shutdown(int command)
static void ftrace_startup_sysctl(void)
{
- int command = FTRACE_ENABLE_MCOUNT;
-
if (unlikely(ftrace_disabled))
return;
@@ -1228,23 +1232,17 @@ static void ftrace_startup_sysctl(void)
saved_ftrace_func = NULL;
/* ftrace_start_up is true if we want ftrace running */
if (ftrace_start_up)
- command |= FTRACE_ENABLE_CALLS;
-
- ftrace_run_update_code(command);
+ ftrace_run_update_code(FTRACE_ENABLE_CALLS);
}
static void ftrace_shutdown_sysctl(void)
{
- int command = FTRACE_DISABLE_MCOUNT;
-
if (unlikely(ftrace_disabled))
return;
/* ftrace_start_up is true if ftrace is running */
if (ftrace_start_up)
- command |= FTRACE_DISABLE_CALLS;
-
- ftrace_run_update_code(command);
+ ftrace_run_update_code(FTRACE_DISABLE_CALLS);
}
static cycle_t ftrace_update_time;
@@ -1361,24 +1359,29 @@ enum {
#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
struct ftrace_iterator {
- struct ftrace_page *pg;
- int hidx;
- int idx;
- unsigned flags;
- struct trace_parser parser;
+ loff_t pos;
+ loff_t func_pos;
+ struct ftrace_page *pg;
+ struct dyn_ftrace *func;
+ struct ftrace_func_probe *probe;
+ struct trace_parser parser;
+ int hidx;
+ int idx;
+ unsigned flags;
};
static void *
-t_hash_next(struct seq_file *m, void *v, loff_t *pos)
+t_hash_next(struct seq_file *m, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
- struct hlist_node *hnd = v;
+ struct hlist_node *hnd = NULL;
struct hlist_head *hhd;
- WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
-
(*pos)++;
+ iter->pos = *pos;
+ if (iter->probe)
+ hnd = &iter->probe->node;
retry:
if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
return NULL;
@@ -1401,7 +1404,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
}
}
- return hnd;
+ if (WARN_ON_ONCE(!hnd))
+ return NULL;
+
+ iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
+
+ return iter;
}
static void *t_hash_start(struct seq_file *m, loff_t *pos)
@@ -1410,26 +1418,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
void *p = NULL;
loff_t l;
- if (!(iter->flags & FTRACE_ITER_HASH))
- *pos = 0;
-
- iter->flags |= FTRACE_ITER_HASH;
+ if (iter->func_pos > *pos)
+ return NULL;
iter->hidx = 0;
- for (l = 0; l <= *pos; ) {
- p = t_hash_next(m, p, &l);
+ for (l = 0; l <= (*pos - iter->func_pos); ) {
+ p = t_hash_next(m, &l);
if (!p)
break;
}
- return p;
+ if (!p)
+ return NULL;
+
+ /* Only set this if we have an item */
+ iter->flags |= FTRACE_ITER_HASH;
+
+ return iter;
}
-static int t_hash_show(struct seq_file *m, void *v)
+static int
+t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
{
struct ftrace_func_probe *rec;
- struct hlist_node *hnd = v;
- rec = hlist_entry(hnd, struct ftrace_func_probe, node);
+ rec = iter->probe;
+ if (WARN_ON_ONCE(!rec))
+ return -EIO;
if (rec->ops->print)
return rec->ops->print(m, rec->ip, rec->ops, rec->data);
@@ -1450,12 +1464,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
struct dyn_ftrace *rec = NULL;
if (iter->flags & FTRACE_ITER_HASH)
- return t_hash_next(m, v, pos);
+ return t_hash_next(m, pos);
(*pos)++;
+ iter->pos = *pos;
if (iter->flags & FTRACE_ITER_PRINTALL)
- return NULL;
+ return t_hash_start(m, pos);
retry:
if (iter->idx >= iter->pg->index) {
@@ -1484,7 +1499,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
}
}
- return rec;
+ if (!rec)
+ return t_hash_start(m, pos);
+
+ iter->func_pos = *pos;
+ iter->func = rec;
+
+ return iter;
+}
+
+static void reset_iter_read(struct ftrace_iterator *iter)
+{
+ iter->pos = 0;
+ iter->func_pos = 0;
+ iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
}
static void *t_start(struct seq_file *m, loff_t *pos)
@@ -1495,6 +1523,12 @@ static void *t_start(struct seq_file *m, loff_t *pos)
mutex_lock(&ftrace_lock);
/*
+ * If an lseek was done, then reset and start from beginning.
+ */
+ if (*pos < iter->pos)
+ reset_iter_read(iter);
+
+ /*
* For set_ftrace_filter reading, if we have the filter
* off, we can short cut and just print out that all
* functions are enabled.
@@ -1503,12 +1537,19 @@ static void *t_start(struct seq_file *m, loff_t *pos)
if (*pos > 0)
return t_hash_start(m, pos);
iter->flags |= FTRACE_ITER_PRINTALL;
+ /* reset in case of seek/pread */
+ iter->flags &= ~FTRACE_ITER_HASH;
return iter;
}
if (iter->flags & FTRACE_ITER_HASH)
return t_hash_start(m, pos);
+ /*
+ * Unfortunately, we need to restart at ftrace_pages_start
+ * every time we let go of the ftrace_mutex. This is because
+ * those pointers can change without the lock.
+ */
iter->pg = ftrace_pages_start;
iter->idx = 0;
for (l = 0; l <= *pos; ) {
@@ -1517,10 +1558,14 @@ static void *t_start(struct seq_file *m, loff_t *pos)
break;
}
- if (!p && iter->flags & FTRACE_ITER_FILTER)
- return t_hash_start(m, pos);
+ if (!p) {
+ if (iter->flags & FTRACE_ITER_FILTER)
+ return t_hash_start(m, pos);
- return p;
+ return NULL;
+ }
+
+ return iter;
}
static void t_stop(struct seq_file *m, void *p)
@@ -1531,16 +1576,18 @@ static void t_stop(struct seq_file *m, void *p)
static int t_show(struct seq_file *m, void *v)
{
struct ftrace_iterator *iter = m->private;
- struct dyn_ftrace *rec = v;
+ struct dyn_ftrace *rec;
if (iter->flags & FTRACE_ITER_HASH)
- return t_hash_show(m, v);
+ return t_hash_show(m, iter);
if (iter->flags & FTRACE_ITER_PRINTALL) {
seq_printf(m, "#### all functions enabled ####\n");
return 0;
}
+ rec = iter->func;
+
if (!rec)
return 0;
@@ -1592,8 +1639,8 @@ ftrace_failures_open(struct inode *inode, struct file *file)
ret = ftrace_avail_open(inode, file);
if (!ret) {
- m = (struct seq_file *)file->private_data;
- iter = (struct ftrace_iterator *)m->private;
+ m = file->private_data;
+ iter = m->private;
iter->flags = FTRACE_ITER_FAILURES;
}
@@ -2623,6 +2670,7 @@ static const struct file_operations ftrace_graph_fops = {
.read = seq_read,
.write = ftrace_graph_write,
.release = ftrace_graph_release,
+ .llseek = seq_lseek,
};
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index a22582a06161..f55fcf61b223 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -13,5 +13,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/power.h>
-EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
+#ifdef EVENT_POWER_TRACING_DEPRECATED
+EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
+#endif
+EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 19cccc3c3028..bd1c35a4fbcc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -224,6 +224,9 @@ enum {
RB_LEN_TIME_STAMP = 16,
};
+#define skip_time_extend(event) \
+ ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
+
static inline int rb_null_event(struct ring_buffer_event *event)
{
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
return length + RB_EVNT_HDR_SIZE;
}
-/* inline for ring buffer fast paths */
-static unsigned
+/*
+ * Return the length of the given event. Will return
+ * the length of the time extend if the event is a
+ * time extend.
+ */
+static inline unsigned
rb_event_length(struct ring_buffer_event *event)
{
switch (event->type_len) {
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
return 0;
}
+/*
+ * Return total length of time extend and data,
+ * or just the event length for all other events.
+ */
+static inline unsigned
+rb_event_ts_length(struct ring_buffer_event *event)
+{
+ unsigned len = 0;
+
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ /* time extends include the data event after it */
+ len = RB_LEN_TIME_EXTEND;
+ event = skip_time_extend(event);
+ }
+ return len + rb_event_length(event);
+}
+
/**
* ring_buffer_event_length - return the length of the event
* @event: the event to get the length of
+ *
+ * Returns the size of the data load of a data event.
+ * If the event is something other than a data event, it
+ * returns the size of the event itself. With the exception
+ * of a TIME EXTEND, where it still returns the size of the
+ * data load of the data event after it.
*/
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{
- unsigned length = rb_event_length(event);
+ unsigned length;
+
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
+
+ length = rb_event_length(event);
if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
return length;
length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
static void *
rb_event_data(struct ring_buffer_event *event)
{
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */
if (event->type_len)
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
-/* Max number of timestamps that can fit on a page */
-#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
-
int ring_buffer_print_page_header(struct trace_seq *s)
{
struct buffer_data_page field;
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
iter->head = 0;
}
+/* Slow path, do not inline */
+static noinline struct ring_buffer_event *
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+{
+ event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+
+ /* Not the first event on the page? */
+ if (rb_event_index(event)) {
+ event->time_delta = delta & TS_MASK;
+ event->array[0] = delta >> TS_SHIFT;
+ } else {
+ /* nope, just zero it */
+ event->time_delta = 0;
+ event->array[0] = 0;
+ }
+
+ return skip_time_extend(event);
+}
+
/**
* ring_buffer_update_event - update event type and data
* @event: the even to update
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
* data field.
*/
static void
-rb_update_event(struct ring_buffer_event *event,
- unsigned type, unsigned length)
+rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event, unsigned length,
+ int add_timestamp, u64 delta)
{
- event->type_len = type;
-
- switch (type) {
-
- case RINGBUF_TYPE_PADDING:
- case RINGBUF_TYPE_TIME_EXTEND:
- case RINGBUF_TYPE_TIME_STAMP:
- break;
+ /* Only a commit updates the timestamp */
+ if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
+ delta = 0;
- case 0:
- length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
- event->array[0] = length;
- else
- event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
- break;
- default:
- BUG();
+ /*
+ * If we need to add a timestamp, then we
+ * add it to the start of the resevered space.
+ */
+ if (unlikely(add_timestamp)) {
+ event = rb_add_time_stamp(event, delta);
+ length -= RB_LEN_TIME_EXTEND;
+ delta = 0;
}
+
+ event->time_delta = delta;
+ length -= RB_EVNT_HDR_SIZE;
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+ event->type_len = 0;
+ event->array[0] = length;
+ } else
+ event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
}
/*
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
local_sub(length, &tail_page->write);
}
-static struct ring_buffer_event *
+/*
+ * This is the slow path, force gcc not to inline it.
+ */
+static noinline struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length, unsigned long tail,
- struct buffer_page *tail_page, u64 *ts)
+ struct buffer_page *tail_page, u64 ts)
{
struct buffer_page *commit_page = cpu_buffer->commit_page;
struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
* Nested commits always have zero deltas, so
* just reread the time stamp
*/
- *ts = rb_time_stamp(buffer);
- next_page->page->time_stamp = *ts;
+ ts = rb_time_stamp(buffer);
+ next_page->page->time_stamp = ts;
}
out_again:
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
- unsigned type, unsigned long length, u64 *ts)
+ unsigned long length, u64 ts,
+ u64 delta, int add_timestamp)
{
struct buffer_page *tail_page;
struct ring_buffer_event *event;
unsigned long tail, write;
+ /*
+ * If the time delta since the last event is too big to
+ * hold in the time field of the event, then we append a
+ * TIME EXTEND event ahead of the data event.
+ */
+ if (unlikely(add_timestamp))
+ length += RB_LEN_TIME_EXTEND;
+
tail_page = cpu_buffer->tail_page;
write = local_add_return(length, &tail_page->write);
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
tail = write - length;
/* See if we shot pass the end of this buffer page */
- if (write > BUF_PAGE_SIZE)
+ if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, length, tail,
tail_page, ts);
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
- rb_update_event(event, type, length);
+ rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
- /* The passed in type is zero for DATA */
- if (likely(!type))
- local_inc(&tail_page->entries);
+ local_inc(&tail_page->entries);
/*
* If this is the first commit on the page, then update
* its timestamp.
*/
if (!tail)
- tail_page->page->time_stamp = *ts;
+ tail_page->page->time_stamp = ts;
return event;
}
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long addr;
new_index = rb_event_index(event);
- old_index = new_index + rb_event_length(event);
+ old_index = new_index + rb_event_ts_length(event);
addr = (unsigned long)event;
addr &= PAGE_MASK;
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
return 0;
}
-static int
-rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
- u64 *ts, u64 *delta)
-{
- struct ring_buffer_event *event;
- int ret;
-
- WARN_ONCE(*delta > (1ULL << 59),
- KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
- (unsigned long long)*delta,
- (unsigned long long)*ts,
- (unsigned long long)cpu_buffer->write_stamp);
-
- /*
- * The delta is too big, we to add a
- * new timestamp.
- */
- event = __rb_reserve_next(cpu_buffer,
- RINGBUF_TYPE_TIME_EXTEND,
- RB_LEN_TIME_EXTEND,
- ts);
- if (!event)
- return -EBUSY;
-
- if (PTR_ERR(event) == -EAGAIN)
- return -EAGAIN;
-
- /* Only a commited time event can update the write stamp */
- if (rb_event_is_commit(cpu_buffer, event)) {
- /*
- * If this is the first on the page, then it was
- * updated with the page itself. Try to discard it
- * and if we can't just make it zero.
- */
- if (rb_event_index(event)) {
- event->time_delta = *delta & TS_MASK;
- event->array[0] = *delta >> TS_SHIFT;
- } else {
- /* try to discard, since we do not need this */
- if (!rb_try_to_discard(cpu_buffer, event)) {
- /* nope, just zero it */
- event->time_delta = 0;
- event->array[0] = 0;
- }
- }
- cpu_buffer->write_stamp = *ts;
- /* let the caller know this was the commit */
- ret = 1;
- } else {
- /* Try to discard the event */
- if (!rb_try_to_discard(cpu_buffer, event)) {
- /* Darn, this is just wasted space */
- event->time_delta = 0;
- event->array[0] = 0;
- }
- ret = 0;
- }
-
- *delta = 0;
-
- return ret;
-}
-
static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
local_inc(&cpu_buffer->committing);
local_inc(&cpu_buffer->commits);
}
-static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long commits;
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
unsigned long length)
{
struct ring_buffer_event *event;
- u64 ts, delta = 0;
- int commit = 0;
+ u64 ts, delta;
int nr_loops = 0;
+ int add_timestamp;
+ u64 diff;
rb_start_commit(cpu_buffer);
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
length = rb_calculate_event_length(length);
again:
+ add_timestamp = 0;
+ delta = 0;
+
/*
* We allow for interrupts to reenter here and do a trace.
* If one does, it will cause this original code to loop
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
goto out_fail;
ts = rb_time_stamp(cpu_buffer->buffer);
+ diff = ts - cpu_buffer->write_stamp;
- /*
- * Only the first commit can update the timestamp.
- * Yes there is a race here. If an interrupt comes in
- * just after the conditional and it traces too, then it
- * will also check the deltas. More than one timestamp may
- * also be made. But only the entry that did the actual
- * commit will be something other than zero.
- */
- if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
- rb_page_write(cpu_buffer->tail_page) ==
- rb_commit_index(cpu_buffer))) {
- u64 diff;
-
- diff = ts - cpu_buffer->write_stamp;
-
- /* make sure this diff is calculated here */
- barrier();
-
- /* Did the write stamp get updated already? */
- if (unlikely(ts < cpu_buffer->write_stamp))
- goto get_event;
+ /* make sure this diff is calculated here */
+ barrier();
+ /* Did the write stamp get updated already? */
+ if (likely(ts >= cpu_buffer->write_stamp)) {
delta = diff;
if (unlikely(test_time_stamp(delta))) {
-
- commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
- if (commit == -EBUSY)
- goto out_fail;
-
- if (commit == -EAGAIN)
- goto again;
-
- RB_WARN_ON(cpu_buffer, commit < 0);
+ WARN_ONCE(delta > (1ULL << 59),
+ KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
+ (unsigned long long)delta,
+ (unsigned long long)ts,
+ (unsigned long long)cpu_buffer->write_stamp);
+ add_timestamp = 1;
}
}
- get_event:
- event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+ event = __rb_reserve_next(cpu_buffer, length, ts,
+ delta, add_timestamp);
if (unlikely(PTR_ERR(event) == -EAGAIN))
goto again;
if (!event)
goto out_fail;
- if (!rb_event_is_commit(cpu_buffer, event))
- delta = 0;
-
- event->time_delta = delta;
-
return event;
out_fail:
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
#define TRACE_RECURSIVE_DEPTH 16
-static int trace_recursive_lock(void)
+/* Keep this code out of the fast path cache */
+static noinline void trace_recursive_fail(void)
{
- current->trace_recursion++;
-
- if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
- return 0;
-
/* Disable all tracing before we do anything else */
tracing_off_permanent();
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
in_nmi());
WARN_ON_ONCE(1);
+}
+
+static inline int trace_recursive_lock(void)
+{
+ current->trace_recursion++;
+
+ if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+ return 0;
+
+ trace_recursive_fail();
+
return -1;
}
-static void trace_recursive_unlock(void)
+static inline void trace_recursive_unlock(void)
{
WARN_ON_ONCE(!current->trace_recursion);
@@ -2308,12 +2298,28 @@ static void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
+ u64 delta;
+
/*
* The event first in the commit queue updates the
* time stamp.
*/
- if (rb_event_is_commit(cpu_buffer, event))
- cpu_buffer->write_stamp += event->time_delta;
+ if (rb_event_is_commit(cpu_buffer, event)) {
+ /*
+ * A commit event that is first on a page
+ * updates the write timestamp with the page stamp
+ */
+ if (!rb_event_index(event))
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
+ else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ delta = event->array[0];
+ delta <<= TS_SHIFT;
+ delta += event->time_delta;
+ cpu_buffer->write_stamp += delta;
+ } else
+ cpu_buffer->write_stamp += event->time_delta;
+ }
}
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
static inline void rb_event_discard(struct ring_buffer_event *event)
{
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
+
/* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING;
@@ -2606,6 +2615,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
}
EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
+/*
+ * The total entries in the ring buffer is the running counter
+ * of entries entered into the ring buffer, minus the sum of
+ * the entries read from the ring buffer and the number of
+ * entries that were overwritten.
+ */
+static inline unsigned long
+rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return local_read(&cpu_buffer->entries) -
+ (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
+}
+
/**
* ring_buffer_entries_cpu - get the number of entries in a cpu buffer
* @buffer: The ring buffer
@@ -2614,16 +2636,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
- unsigned long ret;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return 0;
cpu_buffer = buffer->buffers[cpu];
- ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
- - cpu_buffer->read;
- return ret;
+ return rb_num_of_entries(cpu_buffer);
}
EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
@@ -2684,8 +2703,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
/* if you care about this being correct, lock the buffer */
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
- entries += (local_read(&cpu_buffer->entries) -
- local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
+ entries += rb_num_of_entries(cpu_buffer);
}
return entries;
@@ -2985,13 +3003,11 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
static void rb_advance_iter(struct ring_buffer_iter *iter)
{
- struct ring_buffer *buffer;
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
unsigned length;
cpu_buffer = iter->cpu_buffer;
- buffer = cpu_buffer->buffer;
/*
* Check if we are at the end of the buffer.
@@ -3042,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
again:
/*
- * We repeat when a timestamp is encountered. It is possible
- * to get multiple timestamps from an interrupt entering just
- * as one timestamp is about to be written, or from discarded
- * commits. The most that we can have is the number on a single page.
+ * We repeat when a time extend is encountered.
+ * Since the time extend is always attached to a data event,
+ * we should never loop more than once.
+ * (We never hit the following condition more than twice).
*/
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL;
reader = rb_get_reader_page(cpu_buffer);
@@ -3123,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
return NULL;
/*
- * We repeat when a timestamp is encountered.
- * We can get multiple timestamps by nested interrupts or also
- * if filtering is on (discarding commits). Since discarding
- * commits can be frequent we can get a lot of timestamps.
- * But we limit them by not adding timestamps if they begin
- * at the start of a page.
+ * We repeat when a time extend is encountered.
+ * Since the time extend is always attached to a data event,
+ * we should never loop more than once.
+ * (We never hit the following condition more than twice).
*/
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL;
if (rb_per_cpu_empty(cpu_buffer))
@@ -3828,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
if (len > (commit - read))
len = (commit - read);
- size = rb_event_length(event);
+ /* Always keep the time extend and data together */
+ size = rb_event_ts_length(event);
if (len < size)
goto out_unlock;
@@ -3838,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
/* Need to copy one event at a time */
do {
+ /* We need the size of one event, because
+ * rb_advance_reader only advances by one event,
+ * whereas rb_event_ts_length may include the size of
+ * one or two events.
+ * We have already ensured there's enough space if this
+ * is a time extend. */
+ size = rb_event_length(event);
memcpy(bpage->data + pos, rpage->data + rpos, size);
len -= size;
@@ -3850,8 +3872,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
break;
event = rb_reader_event(cpu_buffer);
- size = rb_event_length(event);
- } while (len > size);
+ /* Always keep the time extend and data together */
+ size = rb_event_ts_length(event);
+ } while (len >= size);
/* update bpage */
local_set(&bpage->commit, pos);
@@ -3967,6 +3990,7 @@ static const struct file_operations rb_simple_fops = {
.open = tracing_open_generic,
.read = rb_simple_read,
.write = rb_simple_write,
+ .llseek = default_llseek,
};
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9ec59f541156..f8cf959bad45 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,7 +17,6 @@
#include <linux/writeback.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
-#include <linux/smp_lock.h>
#include <linux/notifier.h>
#include <linux/irqflags.h>
#include <linux/debugfs.h>
@@ -1284,6 +1283,8 @@ void trace_dump_stack(void)
__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
}
+static DEFINE_PER_CPU(int, user_stack_count);
+
void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
@@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
if (unlikely(in_nmi()))
return;
+ /*
+ * prevent recursion, since the user stack tracing may
+ * trigger other kernel events.
+ */
+ preempt_disable();
+ if (__this_cpu_read(user_stack_count))
+ goto out;
+
+ __this_cpu_inc(user_stack_count);
+
+
+
event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
sizeof(*entry), flags, pc);
if (!event)
@@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
save_stack_trace_user(&trace);
if (!filter_check_discard(call, entry, buffer, event))
ring_buffer_unlock_commit(buffer, event);
+
+ __this_cpu_dec(user_stack_count);
+
+ out:
+ preempt_enable();
}
#ifdef UNUSED
@@ -2196,7 +2214,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
static int tracing_release(struct inode *inode, struct file *file)
{
- struct seq_file *m = (struct seq_file *)file->private_data;
+ struct seq_file *m = file->private_data;
struct trace_iterator *iter;
int cpu;
@@ -2320,11 +2338,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf,
return count;
}
+static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
+{
+ if (file->f_mode & FMODE_READ)
+ return seq_lseek(file, offset, origin);
+ else
+ return 0;
+}
+
static const struct file_operations tracing_fops = {
.open = tracing_open,
.read = seq_read,
.write = tracing_write_stub,
- .llseek = seq_lseek,
+ .llseek = tracing_seek,
.release = tracing_release,
};
@@ -3996,13 +4022,9 @@ static void tracing_init_debugfs_percpu(long cpu)
{
struct dentry *d_percpu = tracing_dentry_percpu();
struct dentry *d_cpu;
- /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
- char cpu_dir[7];
-
- if (cpu > 999 || cpu < 0)
- return;
+ char cpu_dir[30]; /* 30 characters should be more than enough */
- sprintf(cpu_dir, "cpu%ld", cpu);
+ snprintf(cpu_dir, 30, "cpu%ld", cpu);
d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
if (!d_cpu) {
pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d39b3c5454a5..9021f8c0c0c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -343,6 +343,10 @@ void trace_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags, int pc);
+void trace_graph_function(struct trace_array *tr,
+ unsigned long ip,
+ unsigned long parent_ip,
+ unsigned long flags, int pc);
void trace_default_header(struct seq_file *m);
void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 000e6e85b445..19a359d5e6d5 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
#include <linux/kprobes.h>
#include "trace.h"
-static char *perf_trace_buf[4];
+static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
/*
* Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
/* Count the events in use (per event id, not per instance) */
static int total_ref_count;
+static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
+ struct perf_event *p_event)
+{
+ /* No tracing, just counting, so no obvious leak */
+ if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
+ return 0;
+
+ /* Some events are ok to be traced by non-root users... */
+ if (p_event->attach_state == PERF_ATTACH_TASK) {
+ if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
+ return 0;
+ }
+
+ /*
+ * ...otherwise raw tracepoint data can be a severe data leak,
+ * only allow root to have these.
+ */
+ if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return 0;
+}
+
static int perf_trace_event_init(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
- struct hlist_head *list;
- int ret = -ENOMEM;
+ struct hlist_head __percpu *list;
+ int ret;
int cpu;
+ ret = perf_trace_event_perm(tp_event, p_event);
+ if (ret)
+ return ret;
+
p_event->tp_event = tp_event;
if (tp_event->perf_refcount++ > 0)
return 0;
+ ret = -ENOMEM;
+
list = alloc_percpu(struct hlist_head);
if (!list)
goto fail;
@@ -42,11 +71,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
tp_event->perf_events = list;
if (!total_ref_count) {
- char *buf;
+ char __percpu *buf;
int i;
- for (i = 0; i < 4; i++) {
- buf = (char *)alloc_percpu(perf_trace_t);
+ for (i = 0; i < PERF_NR_CONTEXTS; i++) {
+ buf = (char __percpu *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail;
@@ -65,7 +94,7 @@ fail:
if (!total_ref_count) {
int i;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
@@ -91,6 +120,8 @@ int perf_trace_init(struct perf_event *p_event)
tp_event->class && tp_event->class->reg &&
try_module_get(tp_event->mod)) {
ret = perf_trace_event_init(tp_event, p_event);
+ if (ret)
+ module_put(tp_event->mod);
break;
}
}
@@ -99,22 +130,26 @@ int perf_trace_init(struct perf_event *p_event)
return ret;
}
-int perf_trace_enable(struct perf_event *p_event)
+int perf_trace_add(struct perf_event *p_event, int flags)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct hlist_head __percpu *pcpu_list;
struct hlist_head *list;
- list = tp_event->perf_events;
- if (WARN_ON_ONCE(!list))
+ pcpu_list = tp_event->perf_events;
+ if (WARN_ON_ONCE(!pcpu_list))
return -EINVAL;
- list = this_cpu_ptr(list);
+ if (!(flags & PERF_EF_START))
+ p_event->hw.state = PERF_HES_STOPPED;
+
+ list = this_cpu_ptr(pcpu_list);
hlist_add_head_rcu(&p_event->hlist_entry, list);
return 0;
}
-void perf_trace_disable(struct perf_event *p_event)
+void perf_trace_del(struct perf_event *p_event, int flags)
{
hlist_del_rcu(&p_event->hlist_entry);
}
@@ -140,12 +175,13 @@ void perf_trace_destroy(struct perf_event *p_event)
tp_event->perf_events = NULL;
if (!--total_ref_count) {
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
}
out:
+ module_put(tp_event->mod);
mutex_unlock(&event_mutex);
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4c758f146328..35fde09b81de 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,6 +27,12 @@
DEFINE_MUTEX(event_mutex);
+DEFINE_MUTEX(event_storage_mutex);
+EXPORT_SYMBOL_GPL(event_storage_mutex);
+
+char event_storage[EVENT_STORAGE_SIZE];
+EXPORT_SYMBOL_GPL(event_storage);
+
LIST_HEAD(ftrace_events);
LIST_HEAD(ftrace_common_fields);
@@ -600,21 +606,29 @@ out:
enum {
FORMAT_HEADER = 1,
- FORMAT_PRINTFMT = 2,
+ FORMAT_FIELD_SEPERATOR = 2,
+ FORMAT_PRINTFMT = 3,
};
static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
struct ftrace_event_call *call = m->private;
struct ftrace_event_field *field;
- struct list_head *head;
+ struct list_head *common_head = &ftrace_common_fields;
+ struct list_head *head = trace_get_fields(call);
(*pos)++;
switch ((unsigned long)v) {
case FORMAT_HEADER:
- head = &ftrace_common_fields;
+ if (unlikely(list_empty(common_head)))
+ return NULL;
+ field = list_entry(common_head->prev,
+ struct ftrace_event_field, link);
+ return field;
+
+ case FORMAT_FIELD_SEPERATOR:
if (unlikely(list_empty(head)))
return NULL;
@@ -626,31 +640,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
return NULL;
}
- head = trace_get_fields(call);
-
- /*
- * To separate common fields from event fields, the
- * LSB is set on the first event field. Clear it in case.
- */
- v = (void *)((unsigned long)v & ~1L);
-
field = v;
- /*
- * If this is a common field, and at the end of the list, then
- * continue with main list.
- */
- if (field->link.prev == &ftrace_common_fields) {
- if (unlikely(list_empty(head)))
- return NULL;
- field = list_entry(head->prev, struct ftrace_event_field, link);
- /* Set the LSB to notify f_show to print an extra newline */
- field = (struct ftrace_event_field *)
- ((unsigned long)field | 1);
- return field;
- }
-
- /* If we are done tell f_show to print the format */
- if (field->link.prev == head)
+ if (field->link.prev == common_head)
+ return (void *)FORMAT_FIELD_SEPERATOR;
+ else if (field->link.prev == head)
return (void *)FORMAT_PRINTFMT;
field = list_entry(field->link.prev, struct ftrace_event_field, link);
@@ -688,22 +681,16 @@ static int f_show(struct seq_file *m, void *v)
seq_printf(m, "format:\n");
return 0;
+ case FORMAT_FIELD_SEPERATOR:
+ seq_putc(m, '\n');
+ return 0;
+
case FORMAT_PRINTFMT:
seq_printf(m, "\nprint fmt: %s\n",
call->print_fmt);
return 0;
}
- /*
- * To separate common fields from event fields, the
- * LSB is set on the first event field. Clear it and
- * print a newline if it is set.
- */
- if ((unsigned long)v & 1) {
- seq_putc(m, '\n');
- v = (void *)((unsigned long)v & ~1L);
- }
-
field = v;
/*
@@ -951,6 +938,7 @@ static const struct file_operations ftrace_enable_fops = {
.open = tracing_open_generic,
.read = event_enable_read,
.write = event_enable_write,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_event_format_fops = {
@@ -963,29 +951,34 @@ static const struct file_operations ftrace_event_format_fops = {
static const struct file_operations ftrace_event_id_fops = {
.open = tracing_open_generic,
.read = event_id_read,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_event_filter_fops = {
.open = tracing_open_generic,
.read = event_filter_read,
.write = event_filter_write,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_subsystem_filter_fops = {
.open = tracing_open_generic,
.read = subsystem_filter_read,
.write = subsystem_filter_write,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_system_enable_fops = {
.open = tracing_open_generic,
.read = system_enable_read,
.write = system_enable_write,
+ .llseek = default_llseek,
};
static const struct file_operations ftrace_show_header_fops = {
.open = tracing_open_generic,
.read = show_header,
+ .llseek = default_llseek,
};
static struct dentry *event_trace_events_dir(void)
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4ba44deaac25..4b74d71705c0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef __array
#define __array(type, item, len) \
- BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
- ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+ do { \
+ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
+ mutex_lock(&event_storage_mutex); \
+ snprintf(event_storage, sizeof(event_storage), \
+ "%s[%d]", #type, len); \
+ ret = trace_define_field(event_call, event_storage, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), FILTER_OTHER); \
- if (ret) \
- return ret;
+ mutex_unlock(&event_storage_mutex); \
+ if (ret) \
+ return ret; \
+ } while (0);
#undef __array_desc
#define __array_desc(type, container, item, len) \
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6f233698518e..76b05980225c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,15 +15,19 @@
#include "trace.h"
#include "trace_output.h"
+/* When set, irq functions will be ignored */
+static int ftrace_graph_skip_irqs;
+
struct fgraph_cpu_data {
pid_t last_pid;
int depth;
+ int depth_irq;
int ignore;
unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
};
struct fgraph_data {
- struct fgraph_cpu_data *cpu_data;
+ struct fgraph_cpu_data __percpu *cpu_data;
/* Place to preserve last processed entry. */
struct ftrace_graph_ent_entry ent;
@@ -41,6 +45,7 @@ struct fgraph_data {
#define TRACE_GRAPH_PRINT_PROC 0x8
#define TRACE_GRAPH_PRINT_DURATION 0x10
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
+#define TRACE_GRAPH_PRINT_IRQS 0x40
static struct tracer_opt trace_opts[] = {
/* Display overruns? (for self-debug purpose) */
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
/* Display absolute time of an entry */
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
+ /* Display interrupts */
+ { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
{ } /* Empty entry */
};
static struct tracer_flags tracer_flags = {
/* Don't display overruns and proc by default */
.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
- TRACE_GRAPH_PRINT_DURATION,
+ TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
.opts = trace_opts
};
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr,
return 1;
}
+static inline int ftrace_graph_ignore_irqs(void)
+{
+ if (!ftrace_graph_skip_irqs)
+ return 0;
+
+ return in_irq();
+}
+
int trace_graph_entry(struct ftrace_graph_ent *trace)
{
struct trace_array *tr = graph_array;
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
return 0;
/* trace it when it is-nested-in or is a function enabled. */
- if (!(trace->depth || ftrace_graph_addr(trace->func)))
+ if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
+ ftrace_graph_ignore_irqs())
return 0;
local_irq_save(flags);
@@ -246,6 +262,34 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
return trace_graph_entry(trace);
}
+static void
+__trace_graph_function(struct trace_array *tr,
+ unsigned long ip, unsigned long flags, int pc)
+{
+ u64 time = trace_clock_local();
+ struct ftrace_graph_ent ent = {
+ .func = ip,
+ .depth = 0,
+ };
+ struct ftrace_graph_ret ret = {
+ .func = ip,
+ .depth = 0,
+ .calltime = time,
+ .rettime = time,
+ };
+
+ __trace_graph_entry(tr, &ent, flags, pc);
+ __trace_graph_return(tr, &ret, flags, pc);
+}
+
+void
+trace_graph_function(struct trace_array *tr,
+ unsigned long ip, unsigned long parent_ip,
+ unsigned long flags, int pc)
+{
+ __trace_graph_function(tr, ip, flags, pc);
+}
+
void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned long flags,
@@ -649,8 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
/* Print nsecs (we don't want to exceed 7 numbers) */
if (len < 7) {
- snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu",
- nsecs_rem);
+ size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);
+
+ snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
ret = trace_seq_printf(s, ".%s", nsecs_str);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -855,6 +900,108 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
return 0;
}
+/*
+ * Entry check for irq code
+ *
+ * returns 1 if
+ * - we are inside irq code
+ * - we just extered irq code
+ *
+ * retunns 0 if
+ * - funcgraph-interrupts option is set
+ * - we are not inside irq code
+ */
+static int
+check_irq_entry(struct trace_iterator *iter, u32 flags,
+ unsigned long addr, int depth)
+{
+ int cpu = iter->cpu;
+ int *depth_irq;
+ struct fgraph_data *data = iter->private;
+
+ /*
+ * If we are either displaying irqs, or we got called as
+ * a graph event and private data does not exist,
+ * then we bypass the irq check.
+ */
+ if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
+ (!data))
+ return 0;
+
+ depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
+
+ /*
+ * We are inside the irq code
+ */
+ if (*depth_irq >= 0)
+ return 1;
+
+ if ((addr < (unsigned long)__irqentry_text_start) ||
+ (addr >= (unsigned long)__irqentry_text_end))
+ return 0;
+
+ /*
+ * We are entering irq code.
+ */
+ *depth_irq = depth;
+ return 1;
+}
+
+/*
+ * Return check for irq code
+ *
+ * returns 1 if
+ * - we are inside irq code
+ * - we just left irq code
+ *
+ * returns 0 if
+ * - funcgraph-interrupts option is set
+ * - we are not inside irq code
+ */
+static int
+check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
+{
+ int cpu = iter->cpu;
+ int *depth_irq;
+ struct fgraph_data *data = iter->private;
+
+ /*
+ * If we are either displaying irqs, or we got called as
+ * a graph event and private data does not exist,
+ * then we bypass the irq check.
+ */
+ if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
+ (!data))
+ return 0;
+
+ depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
+
+ /*
+ * We are not inside the irq code.
+ */
+ if (*depth_irq == -1)
+ return 0;
+
+ /*
+ * We are inside the irq code, and this is returning entry.
+ * Let's not trace it and clear the entry depth, since
+ * we are out of irq code.
+ *
+ * This condition ensures that we 'leave the irq code' once
+ * we are out of the entry depth. Thus protecting us from
+ * the RETURN entry loss.
+ */
+ if (*depth_irq >= depth) {
+ *depth_irq = -1;
+ return 1;
+ }
+
+ /*
+ * We are inside the irq code, and this is not the entry.
+ */
+ return 1;
+}
+
static enum print_line_t
print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
struct trace_iterator *iter, u32 flags)
@@ -865,6 +1012,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
static enum print_line_t ret;
int cpu = iter->cpu;
+ if (check_irq_entry(iter, flags, call->func, call->depth))
+ return TRACE_TYPE_HANDLED;
+
if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
return TRACE_TYPE_PARTIAL_LINE;
@@ -902,6 +1052,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
int ret;
int i;
+ if (check_irq_return(iter, flags, trace->depth))
+ return TRACE_TYPE_HANDLED;
+
if (data) {
struct fgraph_cpu_data *cpu_data;
int cpu = iter->cpu;
@@ -1054,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
enum print_line_t
-print_graph_function_flags(struct trace_iterator *iter, u32 flags)
+__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
{
struct ftrace_graph_ent_entry *field;
struct fgraph_data *data = iter->private;
@@ -1117,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
static enum print_line_t
print_graph_function(struct trace_iterator *iter)
{
- return print_graph_function_flags(iter, tracer_flags.val);
+ return __print_graph_function_flags(iter, tracer_flags.val);
+}
+
+enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
+ u32 flags)
+{
+ if (trace_flags & TRACE_ITER_LATENCY_FMT)
+ flags |= TRACE_GRAPH_PRINT_DURATION;
+ else
+ flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+
+ return __print_graph_function_flags(iter, flags);
}
static enum print_line_t
@@ -1149,7 +1313,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
seq_printf(s, "#%.*s|||| / \n", size, spaces);
}
-void print_graph_headers_flags(struct seq_file *s, u32 flags)
+static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
{
int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
@@ -1190,6 +1354,23 @@ void print_graph_headers(struct seq_file *s)
print_graph_headers_flags(s, tracer_flags.val);
}
+void print_graph_headers_flags(struct seq_file *s, u32 flags)
+{
+ struct trace_iterator *iter = s->private;
+
+ if (trace_flags & TRACE_ITER_LATENCY_FMT) {
+ /* print nothing if the buffers are empty */
+ if (trace_empty(iter))
+ return;
+
+ print_trace_header(s, iter);
+ flags |= TRACE_GRAPH_PRINT_DURATION;
+ } else
+ flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+
+ __print_graph_headers_flags(s, flags);
+}
+
void graph_trace_open(struct trace_iterator *iter)
{
/* pid and depth on the last trace processed */
@@ -1210,9 +1391,12 @@ void graph_trace_open(struct trace_iterator *iter)
pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+ int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
+
*pid = -1;
*depth = 0;
*ignore = 0;
+ *depth_irq = -1;
}
iter->private = data;
@@ -1235,6 +1419,14 @@ void graph_trace_close(struct trace_iterator *iter)
}
}
+static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
+{
+ if (bit == TRACE_GRAPH_PRINT_IRQS)
+ ftrace_graph_skip_irqs = !set;
+
+ return 0;
+}
+
static struct trace_event_functions graph_functions = {
.trace = print_graph_function_event,
};
@@ -1261,6 +1453,7 @@ static struct tracer graph_trace __read_mostly = {
.print_line = print_graph_function,
.print_header = print_graph_headers,
.flags = &tracer_flags,
+ .set_flag = func_graph_set_flag,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function_graph,
#endif
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 73a6b0601f2e..5cf8c602b880 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -87,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence;
#ifdef CONFIG_FUNCTION_TRACER
/*
- * irqsoff uses its own tracer function to keep the overhead down:
+ * Prologue for the preempt and irqs off function tracers.
+ *
+ * Returns 1 if it is OK to continue, and data->disabled is
+ * incremented.
+ * 0 if the trace is to be ignored, and data->disabled
+ * is kept the same.
+ *
+ * Note, this function is also used outside this ifdef but
+ * inside the #ifdef of the function graph tracer below.
+ * This is OK, since the function graph tracer is
+ * dependent on the function tracer.
*/
-static void
-irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+static int func_prolog_dec(struct trace_array *tr,
+ struct trace_array_cpu **data,
+ unsigned long *flags)
{
- struct trace_array *tr = irqsoff_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
long disabled;
int cpu;
@@ -106,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
*/
cpu = raw_smp_processor_id();
if (likely(!per_cpu(tracing_cpu, cpu)))
- return;
+ return 0;
- local_save_flags(flags);
+ local_save_flags(*flags);
/* slight chance to get a false positive on tracing_cpu */
- if (!irqs_disabled_flags(flags))
- return;
+ if (!irqs_disabled_flags(*flags))
+ return 0;
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
+ *data = tr->data[cpu];
+ disabled = atomic_inc_return(&(*data)->disabled);
if (likely(disabled == 1))
- trace_function(tr, ip, parent_ip, flags, preempt_count());
+ return 1;
+
+ atomic_dec(&(*data)->disabled);
+
+ return 0;
+}
+
+/*
+ * irqsoff uses its own tracer function to keep the overhead down:
+ */
+static void
+irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = irqsoff_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+
+ if (!func_prolog_dec(tr, &data, &flags))
+ return;
+
+ trace_function(tr, ip, parent_ip, flags, preempt_count());
atomic_dec(&data->disabled);
}
@@ -155,30 +183,16 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
- long disabled;
int ret;
- int cpu;
int pc;
- cpu = raw_smp_processor_id();
- if (likely(!per_cpu(tracing_cpu, cpu)))
+ if (!func_prolog_dec(tr, &data, &flags))
return 0;
- local_save_flags(flags);
- /* slight chance to get a false positive on tracing_cpu */
- if (!irqs_disabled_flags(flags))
- return 0;
-
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
-
- if (likely(disabled == 1)) {
- pc = preempt_count();
- ret = __trace_graph_entry(tr, trace, flags, pc);
- } else
- ret = 0;
-
+ pc = preempt_count();
+ ret = __trace_graph_entry(tr, trace, flags, pc);
atomic_dec(&data->disabled);
+
return ret;
}
@@ -187,27 +201,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
- long disabled;
- int cpu;
int pc;
- cpu = raw_smp_processor_id();
- if (likely(!per_cpu(tracing_cpu, cpu)))
+ if (!func_prolog_dec(tr, &data, &flags))
return;
- local_save_flags(flags);
- /* slight chance to get a false positive on tracing_cpu */
- if (!irqs_disabled_flags(flags))
- return;
-
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
-
- if (likely(disabled == 1)) {
- pc = preempt_count();
- __trace_graph_return(tr, trace, flags, pc);
- }
-
+ pc = preempt_count();
+ __trace_graph_return(tr, trace, flags, pc);
atomic_dec(&data->disabled);
}
@@ -229,75 +229,33 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
{
- u32 flags = GRAPH_TRACER_FLAGS;
-
- if (trace_flags & TRACE_ITER_LATENCY_FMT)
- flags |= TRACE_GRAPH_PRINT_DURATION;
- else
- flags |= TRACE_GRAPH_PRINT_ABS_TIME;
-
/*
* In graph mode call the graph tracer output function,
* otherwise go with the TRACE_FN event handler
*/
if (is_graph())
- return print_graph_function_flags(iter, flags);
+ return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
return TRACE_TYPE_UNHANDLED;
}
static void irqsoff_print_header(struct seq_file *s)
{
- if (is_graph()) {
- struct trace_iterator *iter = s->private;
- u32 flags = GRAPH_TRACER_FLAGS;
-
- if (trace_flags & TRACE_ITER_LATENCY_FMT) {
- /* print nothing if the buffers are empty */
- if (trace_empty(iter))
- return;
-
- print_trace_header(s, iter);
- flags |= TRACE_GRAPH_PRINT_DURATION;
- } else
- flags |= TRACE_GRAPH_PRINT_ABS_TIME;
-
- print_graph_headers_flags(s, flags);
- } else
+ if (is_graph())
+ print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
+ else
trace_default_header(s);
}
static void
-trace_graph_function(struct trace_array *tr,
- unsigned long ip, unsigned long flags, int pc)
-{
- u64 time = trace_clock_local();
- struct ftrace_graph_ent ent = {
- .func = ip,
- .depth = 0,
- };
- struct ftrace_graph_ret ret = {
- .func = ip,
- .depth = 0,
- .calltime = time,
- .rettime = time,
- };
-
- __trace_graph_entry(tr, &ent, flags, pc);
- __trace_graph_return(tr, &ret, flags, pc);
-}
-
-static void
__trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip,
unsigned long flags, int pc)
{
- if (!is_graph())
+ if (is_graph())
+ trace_graph_function(tr, ip, parent_ip, flags, pc);
+ else
trace_function(tr, ip, parent_ip, flags, pc);
- else {
- trace_graph_function(tr, parent_ip, flags, pc);
- trace_graph_function(tr, ip, flags, pc);
- }
}
#else
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 7b8ecd751d93..3c5c5dfea0b3 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -13,7 +13,6 @@
#include <linux/kdb.h>
#include <linux/ftrace.h>
-#include "../debug/kdb/kdb_private.h"
#include "trace.h"
#include "trace_output.h"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8b27c9849b42..2dec9bcde8b4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -31,7 +31,6 @@
#include <linux/perf_event.h>
#include <linux/stringify.h>
#include <linux/limits.h>
-#include <linux/uaccess.h>
#include <asm/bitsperlong.h>
#include "trace.h"
@@ -514,8 +513,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
-/* Check the name is good for event/group */
-static int check_event_name(const char *name)
+/* Check the name is good for event/group/fields */
+static int is_good_name(const char *name)
{
if (!isalpha(*name) && *name != '_')
return 0;
@@ -557,7 +556,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
else
tp->rp.kp.pre_handler = kprobe_dispatcher;
- if (!event || !check_event_name(event)) {
+ if (!event || !is_good_name(event)) {
ret = -EINVAL;
goto error;
}
@@ -567,7 +566,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
if (!tp->call.name)
goto error;
- if (!group || !check_event_name(group)) {
+ if (!group || !is_good_name(group)) {
ret = -EINVAL;
goto error;
}
@@ -648,7 +647,7 @@ static int register_trace_probe(struct trace_probe *tp)
}
ret = register_probe_event(tp);
if (ret) {
- pr_warning("Faild to register probe event(%d)\n", ret);
+ pr_warning("Failed to register probe event(%d)\n", ret);
goto end;
}
@@ -883,7 +882,7 @@ static int create_trace_probe(int argc, char **argv)
int i, ret = 0;
int is_return = 0, is_delete = 0;
char *symbol = NULL, *event = NULL, *group = NULL;
- char *arg, *tmp;
+ char *arg;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
@@ -992,26 +991,36 @@ static int create_trace_probe(int argc, char **argv)
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+ /* Increment count for freeing args in error case */
+ tp->nr_args++;
+
/* Parse argument name */
arg = strchr(argv[i], '=');
- if (arg)
+ if (arg) {
*arg++ = '\0';
- else
+ tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+ } else {
arg = argv[i];
+ /* If argument name is omitted, set "argN" */
+ snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
+ tp->args[i].name = kstrdup(buf, GFP_KERNEL);
+ }
- tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
if (!tp->args[i].name) {
- pr_info("Failed to allocate argument%d name '%s'.\n",
- i, argv[i]);
+ pr_info("Failed to allocate argument[%d] name.\n", i);
ret = -ENOMEM;
goto error;
}
- tmp = strchr(tp->args[i].name, ':');
- if (tmp)
- *tmp = '_'; /* convert : to _ */
+
+ if (!is_good_name(tp->args[i].name)) {
+ pr_info("Invalid argument[%d] name: %s\n",
+ i, tp->args[i].name);
+ ret = -EINVAL;
+ goto error;
+ }
if (conflict_field_name(tp->args[i].name, tp->args, i)) {
- pr_info("Argument%d name '%s' conflicts with "
+ pr_info("Argument[%d] name '%s' conflicts with "
"another field.\n", i, argv[i]);
ret = -EINVAL;
goto error;
@@ -1020,12 +1029,9 @@ static int create_trace_probe(int argc, char **argv)
/* Parse fetch argument */
ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
if (ret) {
- pr_info("Parse error at argument%d. (%d)\n", i, ret);
- kfree(tp->args[i].name);
+ pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
goto error;
}
-
- tp->nr_args++;
}
ret = register_trace_probe(tp);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 4086eae6e81b..7319559ed59f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,48 +31,98 @@ static int wakeup_rt;
static arch_spinlock_t wakeup_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+static void wakeup_reset(struct trace_array *tr);
static void __wakeup_reset(struct trace_array *tr);
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
+static void wakeup_graph_return(struct ftrace_graph_ret *trace);
static int save_lat_flag;
+#define TRACE_DISPLAY_GRAPH 1
+
+static struct tracer_opt trace_opts[] = {
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ /* display latency trace as call graph */
+ { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
+#endif
+ { } /* Empty entry */
+};
+
+static struct tracer_flags tracer_flags = {
+ .val = 0,
+ .opts = trace_opts,
+};
+
+#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
+
#ifdef CONFIG_FUNCTION_TRACER
+
/*
- * irqsoff uses its own tracer function to keep the overhead down:
+ * Prologue for the wakeup function tracers.
+ *
+ * Returns 1 if it is OK to continue, and preemption
+ * is disabled and data->disabled is incremented.
+ * 0 if the trace is to be ignored, and preemption
+ * is not disabled and data->disabled is
+ * kept the same.
+ *
+ * Note, this function is also used outside this ifdef but
+ * inside the #ifdef of the function graph tracer below.
+ * This is OK, since the function graph tracer is
+ * dependent on the function tracer.
*/
-static void
-wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+static int
+func_prolog_preempt_disable(struct trace_array *tr,
+ struct trace_array_cpu **data,
+ int *pc)
{
- struct trace_array *tr = wakeup_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
long disabled;
int cpu;
- int pc;
if (likely(!wakeup_task))
- return;
+ return 0;
- pc = preempt_count();
+ *pc = preempt_count();
preempt_disable_notrace();
cpu = raw_smp_processor_id();
if (cpu != wakeup_current_cpu)
goto out_enable;
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
+ *data = tr->data[cpu];
+ disabled = atomic_inc_return(&(*data)->disabled);
if (unlikely(disabled != 1))
goto out;
- local_irq_save(flags);
+ return 1;
- trace_function(tr, ip, parent_ip, flags, pc);
+out:
+ atomic_dec(&(*data)->disabled);
+
+out_enable:
+ preempt_enable_notrace();
+ return 0;
+}
+/*
+ * wakeup uses its own tracer function to keep the overhead down:
+ */
+static void
+wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int pc;
+
+ if (!func_prolog_preempt_disable(tr, &data, &pc))
+ return;
+
+ local_irq_save(flags);
+ trace_function(tr, ip, parent_ip, flags, pc);
local_irq_restore(flags);
- out:
atomic_dec(&data->disabled);
- out_enable:
preempt_enable_notrace();
}
@@ -82,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly =
};
#endif /* CONFIG_FUNCTION_TRACER */
+static int start_func_tracer(int graph)
+{
+ int ret;
+
+ if (!graph)
+ ret = register_ftrace_function(&trace_ops);
+ else
+ ret = register_ftrace_graph(&wakeup_graph_return,
+ &wakeup_graph_entry);
+
+ if (!ret && tracing_is_enabled())
+ tracer_enabled = 1;
+ else
+ tracer_enabled = 0;
+
+ return ret;
+}
+
+static void stop_func_tracer(int graph)
+{
+ tracer_enabled = 0;
+
+ if (!graph)
+ unregister_ftrace_function(&trace_ops);
+ else
+ unregister_ftrace_graph();
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
+{
+
+ if (!(bit & TRACE_DISPLAY_GRAPH))
+ return -EINVAL;
+
+ if (!(is_graph() ^ set))
+ return 0;
+
+ stop_func_tracer(!set);
+
+ wakeup_reset(wakeup_trace);
+ tracing_max_latency = 0;
+
+ return start_func_tracer(set);
+}
+
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
+{
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int pc, ret = 0;
+
+ if (!func_prolog_preempt_disable(tr, &data, &pc))
+ return 0;
+
+ local_save_flags(flags);
+ ret = __trace_graph_entry(tr, trace, flags, pc);
+ atomic_dec(&data->disabled);
+ preempt_enable_notrace();
+
+ return ret;
+}
+
+static void wakeup_graph_return(struct ftrace_graph_ret *trace)
+{
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int pc;
+
+ if (!func_prolog_preempt_disable(tr, &data, &pc))
+ return;
+
+ local_save_flags(flags);
+ __trace_graph_return(tr, trace, flags, pc);
+ atomic_dec(&data->disabled);
+
+ preempt_enable_notrace();
+ return;
+}
+
+static void wakeup_trace_open(struct trace_iterator *iter)
+{
+ if (is_graph())
+ graph_trace_open(iter);
+}
+
+static void wakeup_trace_close(struct trace_iterator *iter)
+{
+ if (iter->private)
+ graph_trace_close(iter);
+}
+
+#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
+
+static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
+{
+ /*
+ * In graph mode call the graph tracer output function,
+ * otherwise go with the TRACE_FN event handler
+ */
+ if (is_graph())
+ return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
+
+ return TRACE_TYPE_UNHANDLED;
+}
+
+static void wakeup_print_header(struct seq_file *s)
+{
+ if (is_graph())
+ print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
+ else
+ trace_default_header(s);
+}
+
+static void
+__trace_function(struct trace_array *tr,
+ unsigned long ip, unsigned long parent_ip,
+ unsigned long flags, int pc)
+{
+ if (is_graph())
+ trace_graph_function(tr, ip, parent_ip, flags, pc);
+ else
+ trace_function(tr, ip, parent_ip, flags, pc);
+}
+#else
+#define __trace_function trace_function
+
+static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
+{
+ return -EINVAL;
+}
+
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
+{
+ return -1;
+}
+
+static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
+{
+ return TRACE_TYPE_UNHANDLED;
+}
+
+static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
+static void wakeup_print_header(struct seq_file *s) { }
+static void wakeup_trace_open(struct trace_iterator *iter) { }
+static void wakeup_trace_close(struct trace_iterator *iter) { }
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
/*
* Should this new latency be reported/recorded?
*/
@@ -152,7 +352,7 @@ probe_wakeup_sched_switch(void *ignore,
/* The task we are waiting for is waking up */
data = wakeup_trace->data[wakeup_cpu];
- trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
+ __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
T0 = data->preempt_timestamp;
@@ -252,7 +452,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
* is not called by an assembly function (where as schedule is)
* it should be safe to use it here.
*/
- trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+ __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
out_locked:
arch_spin_unlock(&wakeup_lock);
@@ -303,12 +503,8 @@ static void start_wakeup_tracer(struct trace_array *tr)
*/
smp_wmb();
- register_ftrace_function(&trace_ops);
-
- if (tracing_is_enabled())
- tracer_enabled = 1;
- else
- tracer_enabled = 0;
+ if (start_func_tracer(is_graph()))
+ printk(KERN_ERR "failed to start wakeup tracer\n");
return;
fail_deprobe_wake_new:
@@ -320,7 +516,7 @@ fail_deprobe:
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
- unregister_ftrace_function(&trace_ops);
+ stop_func_tracer(is_graph());
unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
unregister_trace_sched_wakeup(probe_wakeup, NULL);
@@ -379,9 +575,15 @@ static struct tracer wakeup_tracer __read_mostly =
.start = wakeup_tracer_start,
.stop = wakeup_tracer_stop,
.print_max = 1,
+ .print_header = wakeup_print_header,
+ .print_line = wakeup_print_line,
+ .flags = &tracer_flags,
+ .set_flag = wakeup_set_flag,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
+ .open = wakeup_trace_open,
+ .close = wakeup_trace_close,
.use_max_tr = 1,
};
@@ -394,9 +596,15 @@ static struct tracer wakeup_rt_tracer __read_mostly =
.stop = wakeup_tracer_stop,
.wait_pipe = poll_wait_pipe,
.print_max = 1,
+ .print_header = wakeup_print_header,
+ .print_line = wakeup_print_line,
+ .flags = &tracer_flags,
+ .set_flag = wakeup_set_flag,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
+ .open = wakeup_trace_open,
+ .close = wakeup_trace_close,
.use_max_tr = 1,
};
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 155a415b3209..562c56e048fd 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
static int trace_wakeup_test_thread(void *data)
{
/* Make this a RT thread, doesn't need to be too high */
- struct sched_param param = { .sched_priority = 5 };
+ static struct sched_param param = { .sched_priority = 5 };
struct completion *x = data;
sched_setscheduler(current, SCHED_FIFO, &param);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index a6b7e0e0f3eb..4c5dead0c239 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -195,6 +195,7 @@ static const struct file_operations stack_max_size_fops = {
.open = tracing_open_generic,
.read = stack_max_size_read,
.write = stack_max_size_write,
+ .llseek = default_llseek,
};
static void *
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index a7cc3793baf6..209b379a4721 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void)
{
int ret, cpu;
+ for_each_possible_cpu(cpu) {
+ spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
+ INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
+ }
+
ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
if (ret)
goto out;
@@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void)
if (ret)
goto no_creation;
- for_each_possible_cpu(cpu) {
- spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
- INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
- }
-
return 0;
no_creation:
OpenPOWER on IntegriCloud