From 8cbd9cc6254065c97c4bac42daa55ba1abe73a8e Mon Sep 17 00:00:00 2001 From: David Sharp Date: Tue, 13 Nov 2012 12:18:21 -0800 Subject: tracing,x86: Add a TSC trace_clock In order to promote interoperability between userspace tracers and ftrace, add a trace_clock that reports raw TSC values which will then be recorded in the ring buffer. Userspace tracers that also record TSCs are then on exactly the same time base as the kernel and events can be unambiguously interlaced. Tested: Enabled a tracepoint and the "tsc" trace_clock and saw very large timestamp values. v2: Move arch-specific bits out of generic code. v3: Rename "x86-tsc", cleanups v7: Generic arch bits in Kbuild. Google-Bug-Id: 6980623 Link: http://lkml.kernel.org/r/1352837903-32191-1-git-send-email-dhsharp@google.com Acked-by: Ingo Molnar Cc: Masami Hiramatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: David Sharp Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c1434b5ce4d1..0d20620c0d27 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -488,6 +488,7 @@ static struct { { trace_clock_local, "local" }, { trace_clock_global, "global" }, { trace_clock_counter, "counter" }, + ARCH_TRACE_CLOCKS }; int trace_clock_id; -- cgit v1.2.1 From 8be0709f10e3dd5d7d07933ad61a9f18c4b93ca5 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Tue, 13 Nov 2012 12:18:22 -0800 Subject: tracing: Format non-nanosec times from tsc clock without a decimal point. With the addition of the "tsc" clock, formatting timestamps to look like fractional seconds is misleading. Mark clocks as either in nanoseconds or not, and format non-nanosecond timestamps as decimal integers. Tested: $ cd /sys/kernel/debug/tracing/ $ cat trace_clock [local] global tsc $ echo sched_switch > set_event $ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on $ cat trace -0 [000] 6330.555552: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120 sleep-29964 [000] 6330.555628: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... $ echo 1 > options/latency-format $ cat trace -0 0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120 sleep-29964 0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... $ echo tsc > trace_clock $ cat trace $ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on $ echo 0 > options/latency-format $ cat trace -0 [000] 16490053398357: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120 sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... echo 1 > options/latency-format $ cat trace -0 0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120 sleep-31128 0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... v2: Move arch-specific bits out of generic code. v4: Fix x86_32 build due to 64-bit division. Google-Bug-Id: 6980623 Link: http://lkml.kernel.org/r/1352837903-32191-2-git-send-email-dhsharp@google.com Cc: Masami Hiramatsu Signed-off-by: David Sharp Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 15 +++++++-- kernel/trace/trace.h | 4 --- kernel/trace/trace_output.c | 78 ++++++++++++++++++++++++++++++--------------- 3 files changed, 65 insertions(+), 32 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0d20620c0d27..d943e69569cd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -484,10 +484,11 @@ static const char *trace_options[] = { static struct { u64 (*func)(void); const char *name; + int in_ns; /* is this clock in nanoseconds? */ } trace_clocks[] = { - { trace_clock_local, "local" }, - { trace_clock_global, "global" }, - { trace_clock_counter, "counter" }, + { trace_clock_local, "local", 1 }, + { trace_clock_global, "global", 1 }, + { trace_clock_counter, "counter", 0 }, ARCH_TRACE_CLOCKS }; @@ -2478,6 +2479,10 @@ __tracing_open(struct inode *inode, struct file *file) if (ring_buffer_overruns(iter->tr->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; + /* Output in nanoseconds only if we are using a clock in nanoseconds. */ + if (trace_clocks[trace_clock_id].in_ns) + iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + /* stop the trace while dumping */ tracing_stop(); @@ -3339,6 +3344,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; + /* Output in nanoseconds only if we are using a clock in nanoseconds. */ + if (trace_clocks[trace_clock_id].in_ns) + iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + iter->cpu_file = cpu_file; iter->tr = &global_trace; mutex_init(&iter->mutex); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 55010ed175f0..c75d7988902c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -406,10 +406,6 @@ void tracing_stop_sched_switch_record(void); void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); int is_tracing_stopped(void); -enum trace_file_type { - TRACE_FILE_LAT_FMT = 1, - TRACE_FILE_ANNOTATE = 2, -}; extern cpumask_var_t __read_mostly tracing_buffer_mask; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 123b189c732c..194d79602dc7 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -610,24 +610,54 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) return trace_print_lat_fmt(s, entry); } -static unsigned long preempt_mark_thresh = 100; +static unsigned long preempt_mark_thresh_us = 100; static int -lat_print_timestamp(struct trace_seq *s, u64 abs_usecs, - unsigned long rel_usecs) +lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) { - return trace_seq_printf(s, " %4lldus%c: ", abs_usecs, - rel_usecs > preempt_mark_thresh ? '!' : - rel_usecs > 1 ? '+' : ' '); + unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE; + unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; + unsigned long long abs_ts = iter->ts - iter->tr->time_start; + unsigned long long rel_ts = next_ts - iter->ts; + struct trace_seq *s = &iter->seq; + + if (in_ns) { + abs_ts = ns2usecs(abs_ts); + rel_ts = ns2usecs(rel_ts); + } + + if (verbose && in_ns) { + unsigned long abs_usec = do_div(abs_ts, USEC_PER_MSEC); + unsigned long abs_msec = (unsigned long)abs_ts; + unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC); + unsigned long rel_msec = (unsigned long)rel_ts; + + return trace_seq_printf( + s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ", + ns2usecs(iter->ts), + abs_msec, abs_usec, + rel_msec, rel_usec); + } else if (verbose && !in_ns) { + return trace_seq_printf( + s, "[%016llx] %lld (+%lld): ", + iter->ts, abs_ts, rel_ts); + } else if (!verbose && in_ns) { + return trace_seq_printf( + s, " %4lldus%c: ", + abs_ts, + rel_ts > preempt_mark_thresh_us ? '!' : + rel_ts > 1 ? '+' : ' '); + } else { /* !verbose && !in_ns */ + return trace_seq_printf(s, " %4lld: ", abs_ts); + } } int trace_print_context(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; - unsigned long long t = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(t, USEC_PER_SEC); - unsigned long secs = (unsigned long)t; + unsigned long long t; + unsigned long secs, usec_rem; char comm[TASK_COMM_LEN]; int ret; @@ -644,8 +674,13 @@ int trace_print_context(struct trace_iterator *iter) return 0; } - return trace_seq_printf(s, " %5lu.%06lu: ", - secs, usec_rem); + if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) { + t = ns2usecs(iter->ts); + usec_rem = do_div(t, USEC_PER_SEC); + secs = (unsigned long)t; + return trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem); + } else + return trace_seq_printf(s, " %12llu: ", iter->ts); } int trace_print_lat_context(struct trace_iterator *iter) @@ -659,36 +694,29 @@ int trace_print_lat_context(struct trace_iterator *iter) *next_entry = trace_find_next_entry(iter, NULL, &next_ts); unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); - unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start); - unsigned long rel_usecs; /* Restore the original ent_size */ iter->ent_size = ent_size; if (!next_entry) next_ts = iter->ts; - rel_usecs = ns2usecs(next_ts - iter->ts); if (verbose) { char comm[TASK_COMM_LEN]; trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]" - " %ld.%03ldms (+%ld.%03ldms): ", comm, - entry->pid, iter->cpu, entry->flags, - entry->preempt_count, iter->idx, - ns2usecs(iter->ts), - abs_usecs / USEC_PER_MSEC, - abs_usecs % USEC_PER_MSEC, - rel_usecs / USEC_PER_MSEC, - rel_usecs % USEC_PER_MSEC); + ret = trace_seq_printf( + s, "%16s %5d %3d %d %08x %08lx ", + comm, entry->pid, iter->cpu, entry->flags, + entry->preempt_count, iter->idx); } else { ret = lat_print_generic(s, entry, iter->cpu); - if (ret) - ret = lat_print_timestamp(s, abs_usecs, rel_usecs); } + if (ret) + ret = lat_print_timestamp(iter, next_ts); + return ret; } -- cgit v1.2.1 From 11043d8b125671a32253cddb0b05177be0e976f6 Mon Sep 17 00:00:00 2001 From: Yoshihiro YUNOMAE Date: Tue, 13 Nov 2012 12:18:23 -0800 Subject: tracing: Show raw time stamp on stats per cpu using counter or tsc mode for trace_clock Show raw time stamp values for stats per cpu if you choose counter or tsc mode for trace_clock. Although a unit of tracing time stamp is nsec in local or global mode, the units in counter and TSC mode are tracing counter and cycles respectively. Link: http://lkml.kernel.org/r/1352837903-32191-3-git-send-email-dhsharp@google.com Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Yoshihiro YUNOMAE Signed-off-by: David Sharp Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d943e69569cd..b69cc380322d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4388,13 +4388,24 @@ tracing_stats_read(struct file *filp, char __user *ubuf, cnt = ring_buffer_bytes_cpu(tr->buffer, cpu); trace_seq_printf(s, "bytes: %ld\n", cnt); - t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu)); - usec_rem = do_div(t, USEC_PER_SEC); - trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); + if (trace_clocks[trace_clock_id].in_ns) { + /* local or global for trace_clock */ + t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu)); + usec_rem = do_div(t, USEC_PER_SEC); + trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", + t, usec_rem); + + t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu)); + usec_rem = do_div(t, USEC_PER_SEC); + trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); + } else { + /* counter or tsc mode for trace_clock */ + trace_seq_printf(s, "oldest event ts: %llu\n", + ring_buffer_oldest_event_ts(tr->buffer, cpu)); - t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu)); - usec_rem = do_div(t, USEC_PER_SEC); - trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); + trace_seq_printf(s, "now ts: %llu\n", + ring_buffer_time_stamp(tr->buffer, cpu)); + } cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); trace_seq_printf(s, "dropped events: %ld\n", cnt); -- cgit v1.2.1