From 944ac4259e39801c843a915c3da8194ac9af0440 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 19:26:08 -0400 Subject: ftrace: ftrace dump on oops control Impact: add (default-off) dump-trace-on-oops flag Currently, ftrace is set up to dump its contents to the console if the kernel panics or oops. This can be annoying if you have trace data in the buffers and you experience an oops, but the trace data is old or static. Usually when you want ftrace to dump its contents is when you are debugging your system and you have set up ftrace to trace the events leading to an oops. This patch adds a control variable called "ftrace_dump_on_oops" that will enable the ftrace dump to console on oops. This variable is default off but a developer can enable it either through the kernel command line by adding "ftrace_dump_on_oops" or at run time by setting (or disabling) /proc/sys/kernel/ftrace_dump_on_oops. v2: Replaced /** with /* as Randy explained that kernel-doc does not yet handle variables. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d345d649d073..47f46cbdd860 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -63,6 +63,28 @@ static cpumask_t __read_mostly tracing_buffer_mask; static int tracing_disabled = 1; +/* + * ftrace_dump_on_oops - variable to dump ftrace buffer on oops + * + * If there is an oops (or kernel panic) and the ftrace_dump_on_oops + * is set, then ftrace_dump is called. This will output the contents + * of the ftrace buffers to the console. This is very useful for + * capturing traces that lead to crashes and outputing it to a + * serial console. + * + * It is default off, but you can enable it with either specifying + * "ftrace_dump_on_oops" in the kernel command line, or setting + * /proc/sys/kernel/ftrace_dump_on_oops to true. + */ +int ftrace_dump_on_oops; + +static int __init set_ftrace_dump_on_oops(char *str) +{ + ftrace_dump_on_oops = 1; + return 1; +} +__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); + long ns2usecs(cycle_t nsec) { @@ -3021,7 +3043,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk); static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); return NOTIFY_OK; } @@ -3037,7 +3060,8 @@ static int trace_die_handler(struct notifier_block *self, { switch (val) { case DIE_OOPS: - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); break; default: break; @@ -3078,7 +3102,6 @@ trace_printk_seq(struct trace_seq *s) trace_seq_reset(s); } - void ftrace_dump(void) { static DEFINE_SPINLOCK(ftrace_dump_lock); -- cgit v1.2.3 From b807c3d0f8e39ed7cbbbe6da162650e305e8de15 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 30 Oct 2008 16:08:33 -0400 Subject: ftrace: nmi update statistics Impact: add more debug info to /debugfs/tracing/dyn_ftrace_total_info This patch adds dynamic ftrace NMI update statistics to the /debugfs/tracing/dyn_ftrace_total_info stat file. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 26 ++++++++++++++++++++++++-- kernel/trace/trace.c | 31 ++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index fe5f859130b5..6685b0fc1b44 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -91,6 +91,19 @@ static int mod_code_write; static void *mod_code_ip; static void *mod_code_newcode; +static int nmi_wait_count; +static atomic_t nmi_update_count; + +int ftrace_arch_read_dyn_info(char *buf, int size) +{ + int r; + + r = snprintf(buf, size, "%u %u", + nmi_wait_count, + atomic_read(&nmi_update_count)); + return r; +} + static void ftrace_mod_code(void) { /* @@ -109,8 +122,10 @@ void ftrace_nmi_enter(void) atomic_inc(&in_nmi); /* Must have in_nmi seen before reading write flag */ smp_mb(); - if (mod_code_write) + if (mod_code_write) { ftrace_mod_code(); + atomic_inc(&nmi_update_count); + } } void ftrace_nmi_exit(void) @@ -122,8 +137,15 @@ void ftrace_nmi_exit(void) static void wait_for_nmi(void) { - while (atomic_read(&in_nmi)) + int waited = 0; + + while (atomic_read(&in_nmi)) { + waited = 1; cpu_relax(); + } + + if (waited) + nmi_wait_count++; } static int diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a610ca771558..bc36febc0771 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2815,22 +2815,39 @@ static struct file_operations tracing_mark_fops = { #ifdef CONFIG_DYNAMIC_FTRACE +#define DYN_INFO_BUF_SIZE 1023 +static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1]; +static DEFINE_MUTEX(dyn_info_mutex); + +int __weak ftrace_arch_read_dyn_info(char *buf, int size) +{ + return 0; +} + static ssize_t -tracing_read_long(struct file *filp, char __user *ubuf, +tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { unsigned long *p = filp->private_data; - char buf[64]; + char *buf = ftrace_dyn_info_buffer; int r; - r = sprintf(buf, "%ld\n", *p); + mutex_lock(&dyn_info_mutex); + r = sprintf(buf, "%ld ", *p); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + r += ftrace_arch_read_dyn_info(buf+r, DYN_INFO_BUF_SIZE-r); + buf[r++] = '\n'; + + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + + mutex_unlock(&dyn_info_mutex); + + return r; } -static struct file_operations tracing_read_long_fops = { +static struct file_operations tracing_dyn_info_fops = { .open = tracing_open_generic, - .read = tracing_read_long, + .read = tracing_read_dyn_info, }; #endif @@ -2939,7 +2956,7 @@ static __init int tracer_init_debugfs(void) #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, - &tracing_read_long_fops); + &tracing_dyn_info_fops); if (!entry) pr_warning("Could not create debugfs " "'dyn_ftrace_total_info' entry\n"); -- cgit v1.2.3 From a26a2a27396c0a0877aa701f8f92d08ba550a6c9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 00:03:22 -0400 Subject: ftrace: nmi safe code clean ups Impact: cleanup This patch cleans up the NMI safe code for dynamic ftrace as suggested by Andrew Morton. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/arm/include/asm/ftrace.h | 4 ++-- arch/powerpc/include/asm/ftrace.h | 4 ++-- arch/sh/include/asm/ftrace.h | 4 ++-- arch/sparc/include/asm/ftrace.h | 4 ++-- arch/x86/include/asm/ftrace.h | 10 +++++----- arch/x86/kernel/ftrace.c | 16 ++++++++-------- include/linux/ftrace.h | 3 +++ kernel/trace/trace.c | 9 ++++----- 8 files changed, 28 insertions(+), 26 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index d4c24a7a9280..3f3a1d1508ea 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -2,8 +2,8 @@ #define _ASM_ARM_FTRACE #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 7652755dc000..1cd72700fbc0 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -2,8 +2,8 @@ #define _ASM_POWERPC_FTRACE #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index cdf2cb0b9ffe..31ada0370cb6 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -2,8 +2,8 @@ #define __ASM_SH_FTRACE_H #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index 33a95feeb137..62055ac0496e 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -2,8 +2,8 @@ #define _ASM_SPARC64_FTRACE #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #ifdef CONFIG_MCOUNT diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index f2ed6b704a75..a23468194b8c 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -22,16 +22,16 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) -#endif +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif +#endif /* __ASSEMBLY__ */ #else /* CONFIG_FUNCTION_TRACER */ #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 6685b0fc1b44..69149337f2fe 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -67,7 +67,7 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) * * Two buffers are added: An IP buffer and a "code" buffer. * - * 1) Put in the instruction pointer into the IP buffer + * 1) Put the instruction pointer into the IP buffer * and the new code into the "code" buffer. * 2) Set a flag that says we are modifying code * 3) Wait for any running NMIs to finish. @@ -85,14 +85,14 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) * are the same as what exists. */ -static atomic_t in_nmi; -static int mod_code_status; -static int mod_code_write; -static void *mod_code_ip; -static void *mod_code_newcode; +static atomic_t in_nmi = ATOMIC_INIT(0); +static int mod_code_status; /* holds return value of text write */ +static int mod_code_write; /* set when NMI should do the write */ +static void *mod_code_ip; /* holds the IP to write to */ +static void *mod_code_newcode; /* holds the text to write to the IP */ -static int nmi_wait_count; -static atomic_t nmi_update_count; +static unsigned nmi_wait_count; +static atomic_t nmi_update_count = ATOMIC_INIT(0); int ftrace_arch_read_dyn_info(char *buf, int size) { diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2b..22240dfe912e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,6 +74,9 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); + /** * ftrace_modify_code - modify code segment * @ip: the address of the code segment diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bc36febc0771..7f86067d760c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2815,10 +2815,6 @@ static struct file_operations tracing_mark_fops = { #ifdef CONFIG_DYNAMIC_FTRACE -#define DYN_INFO_BUF_SIZE 1023 -static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1]; -static DEFINE_MUTEX(dyn_info_mutex); - int __weak ftrace_arch_read_dyn_info(char *buf, int size) { return 0; @@ -2828,14 +2824,17 @@ static ssize_t tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + static char ftrace_dyn_info_buffer[1024]; + static DEFINE_MUTEX(dyn_info_mutex); unsigned long *p = filp->private_data; char *buf = ftrace_dyn_info_buffer; + int size = ARRAY_SIZE(ftrace_dyn_info_buffer); int r; mutex_lock(&dyn_info_mutex); r = sprintf(buf, "%ld ", *p); - r += ftrace_arch_read_dyn_info(buf+r, DYN_INFO_BUF_SIZE-r); + r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); buf[r++] = '\n'; r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -- cgit v1.2.3 From d9e540762f5cdd89f24e518ad1fd31142d0b9726 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 1 Nov 2008 19:57:37 +0100 Subject: ftrace: ftrace_dump_on_oops=[tracer] Impact: add new (optional) debug boot option In order to facilitate early boot trouble, allow one to specify a tracer on the kernel boot line. Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 8 +++++ kernel/trace/trace.c | 58 ++++++++++++++++++++++++------------- 2 files changed, 46 insertions(+), 20 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b6..4862284d3119 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -765,6 +765,14 @@ and is between 256 and 4096 characters. It is defined in the file parameter will force ia64_sal_cache_flush to call ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. + ftrace=[tracer] + [ftrace] will set and start the specified tracer + as early as possible in order to facilitate early + boot debugging. + + ftrace_dump_on_oops + [ftrace] will dump the trace buffers on oops. + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bdb1df00fb10..482583eb8001 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -79,6 +79,15 @@ static int tracing_disabled = 1; */ int ftrace_dump_on_oops; +static int tracing_set_tracer(char *buf); + +static int __init set_ftrace(char *str) +{ + tracing_set_tracer(str); + return 1; +} +__setup("ftrace", set_ftrace); + static int __init set_ftrace_dump_on_oops(char *str) { ftrace_dump_on_oops = 1; @@ -2394,29 +2403,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static ssize_t -tracing_set_trace_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int tracing_set_tracer(char *buf) { struct trace_array *tr = &global_trace; struct tracer *t; - char buf[max_tracer_type_len+1]; - int i; - size_t ret; - - ret = cnt; - - if (cnt > max_tracer_type_len) - cnt = max_tracer_type_len; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - /* strip ending whitespace. */ - for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) - buf[i] = 0; + int ret = 0; mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { @@ -2440,6 +2431,33 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, out: mutex_unlock(&trace_types_lock); + return ret; +} + +static ssize_t +tracing_set_trace_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[max_tracer_type_len+1]; + int i; + size_t ret; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; + + ret = tracing_set_tracer(buf); + if (!ret) + ret = cnt; + if (ret > 0) filp->f_pos += ret; -- cgit v1.2.3 From 182e9f5f704ed6b9175142fe8da33c9ce0c52b52 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 3 Nov 2008 23:15:56 -0500 Subject: ftrace: insert in the ftrace_preempt_disable()/enable() functions Impact: use new, consolidated APIs in ftrace plugins This patch replaces the schedule safe preempt disable code with the ftrace_preempt_disable() and ftrace_preempt_enable() safe functions. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 27 +++++++++------------------ kernel/trace/trace.c | 8 ++------ kernel/trace/trace_sched_wakeup.c | 13 ++----------- kernel/trace/trace_stack.c | 8 ++------ 4 files changed, 15 insertions(+), 41 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cedf4e268285..151f6a748676 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -16,6 +16,8 @@ #include #include +#include "trace.h" + /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 @@ -1122,8 +1124,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, return NULL; /* If we are tracing schedule, we don't want to recurse */ - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); @@ -1154,10 +1155,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, return event; out: - if (resched) - preempt_enable_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); return NULL; } @@ -1199,12 +1197,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, /* * Only the last preempt count needs to restore preemption. */ - if (preempt_count() == 1) { - if (per_cpu(rb_need_resched, cpu)) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); - } else + if (preempt_count() == 1) + ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); + else preempt_enable_no_resched_notrace(); return 0; @@ -1237,8 +1232,7 @@ int ring_buffer_write(struct ring_buffer *buffer, if (atomic_read(&buffer->record_disabled)) return -EBUSY; - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); @@ -1264,10 +1258,7 @@ int ring_buffer_write(struct ring_buffer *buffer, ret = 0; out: - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); return ret; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e4c40c868d67..3e7bf5eb9007 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -904,8 +904,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) return; pc = preempt_count(); - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); local_save_flags(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -915,10 +914,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) trace_function(tr, data, ip, parent_ip, flags, pc); atomic_dec(&data->disabled); - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); } static struct ftrace_ops trace_ops __read_mostly = diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3ae93f16b565..7bc4abf6fca8 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) return; pc = preempt_count(); - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) out: atomic_dec(&data->disabled); - /* - * To prevent recursion from the scheduler, if the - * resched flag was set before we entered, then - * don't reschedule. - */ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); } static struct ftrace_ops trace_ops __read_mostly = diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index be682b62fe58..d39e8b7de6a2 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -107,8 +107,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!ftrace_enabled || stack_trace_disabled)) return; - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); cpu = raw_smp_processor_id(); /* no atomic needed, we only modify this variable by this cpu */ @@ -120,10 +119,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) out: per_cpu(trace_active, cpu)--; /* prevent recursion in schedule */ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); } static struct ftrace_ops trace_ops __read_mostly = -- cgit v1.2.3 From b2a866f9344cb30d7ddf5d67b5b8393daf8bef07 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 3 Nov 2008 23:15:57 -0500 Subject: ftrace: function tracer with irqs disabled Impact: disable interrupts during trace entry creation (as opposed to preempt) To help with performance, I set the ftracer to not disable interrupts, and only to disable preemption. If an interrupt occurred, it would not be traced, because the function tracer protects itself from recursion. This may be faster, but the trace output might miss some traces. This patch makes the fuction trace disable interrupts, but it also adds a runtime feature to disable preemption instead. It does this by having two different tracer functions. When the function tracer is enabled, it will check to see which version is requested (irqs disabled or preemption disabled). Then it will use the corresponding function as the tracer. Irq disabling is the default behaviour, but if the user wants better performance, with the chance of missing traces, then they can choose the preempt disabled version. Running hackbench 3 times with the irqs disabled and 3 times with the preempt disabled function tracer yielded: tracing type times entries recorded ------------ -------- ---------------- irq disabled 43.393 166433066 43.282 166172618 43.298 166256704 preempt disabled 38.969 159871710 38.943 159972935 39.325 161056510 Average: irqs disabled: 43.324 166287462 preempt disabled: 39.079 160300385 preempt is 10.8 percent faster than irqs disabled. I wrote a patch to count function trace recursion and reran hackbench. With irq disabled: 1,150 times the function tracer did not trace due to recursion. with preempt disabled: 5,117,718 times. The thousand times with irq disabled could be due to NMIs, or simply a case where it called a function that was not protected by notrace. But we also see that a large amount of the trace is lost with the preempt version. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 40 +++++++++++++++++++++++++++++++++++++++- kernel/trace/trace.h | 1 + 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3e7bf5eb9007..d576dbd6defe 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -244,6 +244,7 @@ static const char *trace_options[] = { "stacktrace", "sched-tree", "ftrace_printk", + "ftrace_preempt", NULL }; @@ -891,7 +892,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) #ifdef CONFIG_FUNCTION_TRACER static void -function_trace_call(unsigned long ip, unsigned long parent_ip) +function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -917,6 +918,37 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) ftrace_preempt_enable(resched); } +static void +function_trace_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + if (unlikely(!ftrace_function_enabled)) + return; + + /* + * Need to use raw, since this must be called before the + * recursive protection is performed. + */ + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + trace_function(tr, data, ip, parent_ip, flags, pc); + } + + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + static struct ftrace_ops trace_ops __read_mostly = { .func = function_trace_call, @@ -925,6 +957,12 @@ static struct ftrace_ops trace_ops __read_mostly = void tracing_start_function_trace(void) { ftrace_function_enabled = 0; + + if (trace_flags & TRACE_ITER_PREEMPTONLY) + trace_ops.func = function_trace_call_preempt_only; + else + trace_ops.func = function_trace_call; + register_ftrace_function(&trace_ops); if (tracer_enabled) ftrace_function_enabled = 1; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 10c6dae76894..bb547e933af7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -415,6 +415,7 @@ enum trace_iterator_flags { TRACE_ITER_STACKTRACE = 0x100, TRACE_ITER_SCHED_TREE = 0x200, TRACE_ITER_PRINTK = 0x400, + TRACE_ITER_PREEMPTONLY = 0x800, }; extern struct tracer nop_trace; -- cgit v1.2.3 From d7ad44b697c9d13e445ddc7d16f736fbac333249 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 13:20:08 +0100 Subject: tracing/fastboot: use sched switch tracer from boot tracer Impact: enhance boot trace output with scheduling events Use the sched_switch tracer from the boot tracer. We also can trace schedule events inside the initcalls. Sched tracing is disabled after the initcall has finished and then reenabled before the next one is started. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 ++ kernel/trace/trace.h | 1 + kernel/trace/trace_boot.c | 6 ++++++ kernel/trace/trace_sched_switch.c | 6 +++--- 4 files changed, 12 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e4c40c868d67..50d7018163f6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3251,6 +3251,8 @@ __init static int tracer_alloc_buffers(void) register_tracer(&nop_trace); #ifdef CONFIG_BOOT_TRACER + /* We don't want to launch sched_switch tracer yet */ + global_trace.ctrl = 0; register_tracer(&boot_tracer); current_trace = &boot_tracer; current_trace->init(&global_trace); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8465ad052707..9911277b268b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -49,6 +49,7 @@ struct ftrace_entry { unsigned long parent_ip; }; extern struct tracer boot_tracer; +extern struct tracer sched_switch_trace; /* Used by the boot tracer */ /* * Context switch trace entry - which task (and prio) we switched from/to: diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index d104d5b46413..6bbc8794a6df 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -27,10 +27,14 @@ void start_boot_trace(void) void enable_boot_trace(void) { + if (pre_initcalls_finished) + tracing_start_cmdline_record(); } void disable_boot_trace(void) { + if (pre_initcalls_finished) + tracing_stop_cmdline_record(); } void reset_boot_trace(struct trace_array *tr) @@ -45,6 +49,8 @@ static void boot_trace_init(struct trace_array *tr) for_each_cpu_mask(cpu, cpu_possible_map) tracing_reset(tr, cpu); + + sched_switch_trace.init(tr); } static void boot_trace_ctrl_update(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 96620c714300..9d7bdac331dd 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -127,6 +127,7 @@ static void tracing_start_sched_switch(void) long ref; mutex_lock(&tracepoint_mutex); + tracer_enabled = 1; ref = atomic_inc_return(&sched_ref); if (ref == 1) tracing_sched_register(); @@ -138,6 +139,7 @@ static void tracing_stop_sched_switch(void) long ref; mutex_lock(&tracepoint_mutex); + tracer_enabled = 0; ref = atomic_dec_and_test(&sched_ref); if (ref) tracing_sched_unregister(); @@ -158,12 +160,10 @@ static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); tracing_start_cmdline_record(); - tracer_enabled = 1; } static void stop_sched_trace(struct trace_array *tr) { - tracer_enabled = 0; tracing_stop_cmdline_record(); } @@ -190,7 +190,7 @@ static void sched_switch_trace_ctrl_update(struct trace_array *tr) stop_sched_trace(tr); } -static struct tracer sched_switch_trace __read_mostly = +struct tracer sched_switch_trace __read_mostly = { .name = "sched_switch", .init = sched_switch_trace_init, -- cgit v1.2.3 From 0f04870148ecb825133bc2733f473b1c5773ac0b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: soft tracing stop and start Impact: add way to quickly start stop tracing from the kernel This patch adds a soft stop and start to the trace. This simply disables function tracing via the ftrace_disabled flag, and disables the trace buffers to prevent recording. The tracing code may still be executed, but the trace will not be recorded. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ++++ kernel/trace/trace.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 84 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 794ab907dbfe..7a75fc6d41f4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -216,6 +216,9 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_TRACING extern int ftrace_dump_on_oops; +extern void tracing_start(void); +extern void tracing_stop(void); + extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -246,6 +249,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); +static inline void tracing_start(void) { } +static inline void tracing_stop(void) { } static inline int ftrace_printk(const char *fmt, ...) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 29ab40a764c8..113aea9447ec 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -43,6 +43,15 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; + +/* + * Kill all tracing for good (never come back). + * It is initialized to 1 but will turn to zero if the initialization + * of the tracer is successful. But that is the only place that sets + * this back to zero. + */ +int tracing_disabled = 1; + static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); static inline void ftrace_disable_cpu(void) @@ -62,8 +71,6 @@ static cpumask_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu_mask(cpu, tracing_buffer_mask) -static int tracing_disabled = 1; - /* * ftrace_dump_on_oops - variable to dump ftrace buffer on oops * @@ -613,6 +620,76 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } +static int trace_stop_count; +static DEFINE_SPINLOCK(tracing_start_lock); + +/** + * tracing_start - quick start of the tracer + * + * If tracing is enabled but was stopped by tracing_stop, + * this will start the tracer back up. + */ +void tracing_start(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + if (tracing_disabled) + return; + + spin_lock_irqsave(&tracing_start_lock, flags); + if (--trace_stop_count) + goto out; + + if (trace_stop_count < 0) { + /* Someone screwed up their debugging */ + WARN_ON_ONCE(1); + trace_stop_count = 0; + goto out; + } + + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + ftrace_start(); + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + +/** + * tracing_stop - quick stop of the tracer + * + * Light weight way to stop tracing. Use in conjunction with + * tracing_start. + */ +void tracing_stop(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + ftrace_stop(); + spin_lock_irqsave(&tracing_start_lock, flags); + if (trace_stop_count++) + goto out; + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + void trace_stop_cmdline_recording(void); static void trace_save_cmdline(struct task_struct *tsk) -- cgit v1.2.3 From 9036990d462e09366f7297a2d1da6582c3e6b1d3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: restructure tracing start/stop infrastructure Impact: change where tracing is started up and stopped Currently, when a new tracer is selected via echo'ing a tracer name into the current_tracer file, the startup is only done if tracing_enabled is set to one. If tracing_enabled is changed to zero (by echo'ing 0 into the tracing_enabled file) a full shutdown is performed. The full startup and shutdown of a tracer can be expensive and the user can lose out traces when echo'ing in 0 to the tracing_enabled file, because the process takes too long. There can also be places that the user would like to start and stop the tracer several times and doing the full startup and shutdown of a tracer might be too expensive. This patch performs the full startup and shutdown when a tracer is selected. It also adds a way to do a quick start or stop of a tracer. The quick version is just a flag that prevents the tracing from taking place, but the overhead of the code is still there. For example, the startup of a tracer may enable tracepoints, or enable the function tracer. The stop and start will just set a flag to have the tracer ignore the calls when the tracepoint or function trace is called. The overhead of the tracer may still be present when the tracer is stopped, but no tracing will occur. Setting the tracer to the 'nop' tracer (or any other tracer) will perform the shutdown of the tracer which will disable the tracepoint or disable the function tracer. The tracing_enabled file will simply start or stop tracing. This change is all internal. The end result for the user should be the same as before. If tracing_enabled is not set, no trace will happen. If tracing_enabled is set, then the trace will happen. The tracing_enabled variable is static between tracers. Enabling tracing_enabled and going to another tracer will keep tracing_enabled enabled. Same is true with disabling tracing_enabled. This patch will now provide a fast start/stop method to the users for enabling or disabling tracing. Note: There were two methods to the struct tracer that were never used: The methods start and stop. These were to be used as a hook to the reading of the trace output, but ended up not being necessary. These two methods are now used to enable the start and stop of each tracer, in case the tracer needs to do more than just not write into the buffer. For example, the irqsoff tracer must stop recording max latencies when tracing is stopped. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 64 +++++++++++++++++---------------------- kernel/trace/trace.h | 5 +-- kernel/trace/trace_functions.c | 6 ++++ kernel/trace/trace_irqsoff.c | 41 ++++++++++++++++++++++--- kernel/trace/trace_sched_switch.c | 13 ++++++++ kernel/trace/trace_sched_wakeup.c | 39 +++++++++++++++++++++--- 6 files changed, 120 insertions(+), 48 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 113aea9447ec..ff1e9ed9b587 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -150,6 +150,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data); /* tracer_enabled is used to toggle activation of a tracer */ static int tracer_enabled = 1; +/** + * tracing_is_enabled - return tracer_enabled status + * + * This function is used by other tracers to know the status + * of the tracer_enabled flag. Tracers may use this function + * to know if it should enable their features when starting + * up. See irqsoff tracer for an example (start_irqsoff_tracer). + */ +int tracing_is_enabled(void) +{ + return tracer_enabled; +} + /* function tracing enabled */ int ftrace_function_enabled; @@ -1041,8 +1054,7 @@ void tracing_start_function_trace(void) trace_ops.func = function_trace_call; register_ftrace_function(&trace_ops); - if (tracer_enabled) - ftrace_function_enabled = 1; + ftrace_function_enabled = 1; } void tracing_stop_function_trace(void) @@ -1189,10 +1201,6 @@ static void *s_start(struct seq_file *m, loff_t *pos) atomic_inc(&trace_record_cmdline_disabled); - /* let the tracer grab locks here if needed */ - if (current_trace->start) - current_trace->start(iter); - if (*pos != iter->pos) { iter->ent = NULL; iter->cpu = 0; @@ -1219,14 +1227,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) static void s_stop(struct seq_file *m, void *p) { - struct trace_iterator *iter = m->private; - atomic_dec(&trace_record_cmdline_disabled); - - /* let the tracer release locks here if needed */ - if (current_trace && current_trace == iter->trace && iter->trace->stop) - iter->trace->stop(iter); - mutex_unlock(&trace_types_lock); } @@ -2056,10 +2057,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) m->private = iter; /* stop the trace while dumping */ - if (iter->tr->ctrl) { - tracer_enabled = 0; - ftrace_function_enabled = 0; - } + tracing_stop(); if (iter->trace && iter->trace->open) iter->trace->open(iter); @@ -2104,14 +2102,7 @@ int tracing_release(struct inode *inode, struct file *file) iter->trace->close(iter); /* reenable tracing if it was previously enabled */ - if (iter->tr->ctrl) { - tracer_enabled = 1; - /* - * It is safe to enable function tracing even if it - * isn't used - */ - ftrace_function_enabled = 1; - } + tracing_start(); mutex_unlock(&trace_types_lock); seq_release(inode, file); @@ -2449,11 +2440,10 @@ static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_array *tr = filp->private_data; char buf[64]; int r; - r = sprintf(buf, "%ld\n", tr->ctrl); + r = sprintf(buf, "%u\n", tracer_enabled); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2481,16 +2471,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, val = !!val; mutex_lock(&trace_types_lock); - if (tr->ctrl ^ val) { - if (val) + if (tracer_enabled ^ val) { + if (val) { tracer_enabled = 1; - else + if (current_trace->start) + current_trace->start(tr); + tracing_start(); + } else { tracer_enabled = 0; - - tr->ctrl = val; - - if (current_trace && current_trace->ctrl_update) - current_trace->ctrl_update(tr); + tracing_stop(); + if (current_trace->stop) + current_trace->stop(tr); + } } mutex_unlock(&trace_types_lock); @@ -3372,7 +3364,7 @@ __init static int tracer_alloc_buffers(void) #endif /* All seems OK, enable tracing */ - global_trace.ctrl = tracer_enabled; + global_trace.ctrl = 1; tracing_disabled = 0; atomic_notifier_chain_register(&panic_notifier_list, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc14a6bc1094..3422489fad5e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -237,11 +237,11 @@ struct tracer { const char *name; void (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); + void (*start)(struct trace_array *tr); + void (*stop)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); - void (*start)(struct trace_iterator *iter); - void (*stop)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); @@ -282,6 +282,7 @@ struct trace_iterator { long idx; }; +int tracing_is_enabled(void); void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); int tracing_open_generic(struct inode *inode, struct file *filp); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 0f85a64003d3..9f1b0de71284 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -62,11 +62,17 @@ static void function_trace_ctrl_update(struct trace_array *tr) stop_function_trace(tr); } +static void function_trace_start(struct trace_array *tr) +{ + function_reset(tr); +} + static struct tracer function_trace __read_mostly = { .name = "function", .init = function_trace_init, .reset = function_trace_reset, + .start = function_trace_start, .ctrl_update = function_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function, diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 9c74071c10e0..a87a20fa3fc6 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ +/* + * save_tracer_enabled is used to save the state of the tracer_enabled + * variable when we disable it when we open a trace output file. + */ +static int save_tracer_enabled; + static void start_irqsoff_tracer(struct trace_array *tr) { register_ftrace_function(&trace_ops); - tracer_enabled = 1; + if (tracing_is_enabled()) { + tracer_enabled = 1; + save_tracer_enabled = 1; + } else { + tracer_enabled = 0; + save_tracer_enabled = 0; + } } static void stop_irqsoff_tracer(struct trace_array *tr) { tracer_enabled = 0; + save_tracer_enabled = 0; unregister_ftrace_function(&trace_ops); } @@ -389,17 +402,29 @@ static void irqsoff_tracer_ctrl_update(struct trace_array *tr) stop_irqsoff_tracer(tr); } +static void irqsoff_tracer_start(struct trace_array *tr) +{ + irqsoff_tracer_reset(tr); + tracer_enabled = 1; + save_tracer_enabled = 1; +} + +static void irqsoff_tracer_stop(struct trace_array *tr) +{ + tracer_enabled = 0; + save_tracer_enabled = 0; +} + static void irqsoff_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ - if (iter->tr->ctrl) - stop_irqsoff_tracer(iter->tr); + tracer_enabled = 0; } static void irqsoff_tracer_close(struct trace_iterator *iter) { - if (iter->tr->ctrl) - start_irqsoff_tracer(iter->tr); + /* restart tracing */ + tracer_enabled = save_tracer_enabled; } #ifdef CONFIG_IRQSOFF_TRACER @@ -414,6 +439,8 @@ static struct tracer irqsoff_tracer __read_mostly = .name = "irqsoff", .init = irqsoff_tracer_init, .reset = irqsoff_tracer_reset, + .start = irqsoff_tracer_start, + .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, .ctrl_update = irqsoff_tracer_ctrl_update, @@ -440,6 +467,8 @@ static struct tracer preemptoff_tracer __read_mostly = .name = "preemptoff", .init = preemptoff_tracer_init, .reset = irqsoff_tracer_reset, + .start = irqsoff_tracer_start, + .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, .ctrl_update = irqsoff_tracer_ctrl_update, @@ -468,6 +497,8 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .name = "preemptirqsoff", .init = preemptirqsoff_tracer_init, .reset = irqsoff_tracer_reset, + .start = irqsoff_tracer_start, + .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, .ctrl_update = irqsoff_tracer_ctrl_update, diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 888944d3409d..91c699be8c87 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -186,11 +186,24 @@ static void sched_switch_trace_ctrl_update(struct trace_array *tr) stop_sched_trace(tr); } +static void sched_switch_trace_start(struct trace_array *tr) +{ + sched_switch_reset(tr); + tracing_start_sched_switch(); +} + +static void sched_switch_trace_stop(struct trace_array *tr) +{ + tracing_stop_sched_switch(); +} + struct tracer sched_switch_trace __read_mostly = { .name = "sched_switch", .init = sched_switch_trace_init, .reset = sched_switch_trace_reset, + .start = sched_switch_trace_start, + .stop = sched_switch_trace_stop, .ctrl_update = sched_switch_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_sched_switch, diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 7bc4abf6fca8..240577bc8ba5 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -262,6 +262,12 @@ out: atomic_dec(&wakeup_trace->data[cpu]->disabled); } +/* + * save_tracer_enabled is used to save the state of the tracer_enabled + * variable when we disable it when we open a trace output file. + */ +static int save_tracer_enabled; + static void start_wakeup_tracer(struct trace_array *tr) { int ret; @@ -300,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr) register_ftrace_function(&trace_ops); - tracer_enabled = 1; + if (tracing_is_enabled()) { + tracer_enabled = 1; + save_tracer_enabled = 1; + } else { + tracer_enabled = 0; + save_tracer_enabled = 0; + } return; fail_deprobe_wake_new: @@ -312,6 +324,7 @@ fail_deprobe: static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; + save_tracer_enabled = 0; unregister_ftrace_function(&trace_ops); unregister_trace_sched_switch(probe_wakeup_sched_switch); unregister_trace_sched_wakeup_new(probe_wakeup); @@ -343,18 +356,32 @@ static void wakeup_tracer_ctrl_update(struct trace_array *tr) stop_wakeup_tracer(tr); } +static void wakeup_tracer_start(struct trace_array *tr) +{ + wakeup_reset(tr); + tracer_enabled = 1; + save_tracer_enabled = 1; +} + +static void wakeup_tracer_stop(struct trace_array *tr) +{ + tracer_enabled = 0; + save_tracer_enabled = 0; +} + static void wakeup_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ - if (iter->tr->ctrl) - stop_wakeup_tracer(iter->tr); + tracer_enabled = 0; } static void wakeup_tracer_close(struct trace_iterator *iter) { /* forget about any processes we were recording */ - if (iter->tr->ctrl) - start_wakeup_tracer(iter->tr); + if (save_tracer_enabled) { + wakeup_reset(iter->tr); + tracer_enabled = 1; + } } static struct tracer wakeup_tracer __read_mostly = @@ -362,6 +389,8 @@ static struct tracer wakeup_tracer __read_mostly = .name = "wakeup", .init = wakeup_tracer_init, .reset = wakeup_tracer_reset, + .start = wakeup_tracer_start, + .stop = wakeup_tracer_stop, .open = wakeup_tracer_open, .close = wakeup_tracer_close, .ctrl_update = wakeup_tracer_ctrl_update, -- cgit v1.2.3 From 49833fc232bd6a5076496994d855f601354501d7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: enable trace_printk by default Impact: have the ftrace_printk enabled on startup It is confusing to have to "echo trace_printk > /debug/tracing/iter_ctrl" after adding ftrace_printk in the kernel. Currently the trace_printk is set to off by default. ftrace_printk should only be in open kernel code when used for debugging, and thus it should be enabled by default. It may also be used to record data within a tracer, but those ftrace_printks should be within wrappers that are either enabled by trace_points or have a variable protecting the code path from being entered when the tracer is disabled. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d55ccfc8d674..dbbdacfaaf9e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -205,7 +205,7 @@ static DEFINE_MUTEX(trace_types_lock); static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds iter_ctrl options */ -unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK; /** * trace_wake_up - wake up tasks waiting for trace input -- cgit v1.2.3 From bbf5b1a0cecb56de6236db8b01c5bfb7ab8ba8b2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: remove ctrl_update method Impact: Remove the ctrl_update tracer method With the new quick start/stop method of tracing, the ctrl_update method is out of date. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 -- kernel/trace/trace.h | 1 - kernel/trace/trace_boot.c | 9 -------- kernel/trace/trace_functions.c | 9 -------- kernel/trace/trace_irqsoff.c | 11 --------- kernel/trace/trace_mmiotrace.c | 11 +++------ kernel/trace/trace_nop.c | 10 -------- kernel/trace/trace_sched_switch.c | 10 -------- kernel/trace/trace_sched_wakeup.c | 9 -------- kernel/trace/trace_selftest.c | 48 +++++++++++++++++++++------------------ kernel/trace/trace_sysprof.c | 10 -------- 11 files changed, 29 insertions(+), 101 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dbbdacfaaf9e..9e83188172a1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3357,8 +3357,6 @@ __init static int tracer_alloc_buffers(void) register_tracer(&nop_trace); #ifdef CONFIG_BOOT_TRACER - /* We don't want to launch sched_switch tracer yet */ - global_trace.ctrl = 0; register_tracer(&boot_tracer); current_trace = &boot_tracer; current_trace->init(&global_trace); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 25abfc45f081..e481edaae1c4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -244,7 +244,6 @@ struct tracer { ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); - void (*ctrl_update)(struct trace_array *tr); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 0203c1054012..8f71915e8bb4 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -58,14 +58,6 @@ static void boot_trace_init(struct trace_array *tr) tracing_sched_switch_assign_trace(tr); } -static void boot_trace_ctrl_update(struct trace_array *tr) -{ - if (tr->ctrl) - enable_boot_trace(); - else - disable_boot_trace(); -} - static enum print_line_t initcall_print_line(struct trace_iterator *iter) { int ret; @@ -102,7 +94,6 @@ struct tracer boot_tracer __read_mostly = .name = "initcall", .init = boot_trace_init, .reset = reset_boot_trace, - .ctrl_update = boot_trace_ctrl_update, .print_line = initcall_print_line, }; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 9f1b0de71284..e980b872bef5 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -54,14 +54,6 @@ static void function_trace_reset(struct trace_array *tr) stop_function_trace(tr); } -static void function_trace_ctrl_update(struct trace_array *tr) -{ - if (tr->ctrl) - start_function_trace(tr); - else - stop_function_trace(tr); -} - static void function_trace_start(struct trace_array *tr) { function_reset(tr); @@ -73,7 +65,6 @@ static struct tracer function_trace __read_mostly = .init = function_trace_init, .reset = function_trace_reset, .start = function_trace_start, - .ctrl_update = function_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function, #endif diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 3509086cdc4c..ffdf592a54e3 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -394,14 +394,6 @@ static void irqsoff_tracer_reset(struct trace_array *tr) stop_irqsoff_tracer(tr); } -static void irqsoff_tracer_ctrl_update(struct trace_array *tr) -{ - if (tr->ctrl) - start_irqsoff_tracer(tr); - else - stop_irqsoff_tracer(tr); -} - static void irqsoff_tracer_start(struct trace_array *tr) { tracer_enabled = 1; @@ -442,7 +434,6 @@ static struct tracer irqsoff_tracer __read_mostly = .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, - .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_irqsoff, @@ -470,7 +461,6 @@ static struct tracer preemptoff_tracer __read_mostly = .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, - .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptoff, @@ -500,7 +490,6 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .stop = irqsoff_tracer_stop, .open = irqsoff_tracer_open, .close = irqsoff_tracer_close, - .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptirqsoff, diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index f28484618ff0..fa9354e78b57 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -49,15 +49,10 @@ static void mmio_trace_reset(struct trace_array *tr) mmio_trace_array = NULL; } -static void mmio_trace_ctrl_update(struct trace_array *tr) +static void mmio_trace_start(struct trace_array *tr) { pr_debug("in %s\n", __func__); - if (tr->ctrl) { - mmio_reset_data(tr); - enable_mmiotrace(); - } else { - disable_mmiotrace(); - } + mmio_reset_data(tr); } static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) @@ -298,10 +293,10 @@ static struct tracer mmio_tracer __read_mostly = .name = "mmiotrace", .init = mmio_trace_init, .reset = mmio_trace_reset, + .start = mmio_trace_start, .pipe_open = mmio_pipe_open, .close = mmio_close, .read = mmio_read, - .ctrl_update = mmio_trace_ctrl_update, .print_line = mmio_print_line, }; diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 4592b4862515..e3c5fbfcdd36 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -42,21 +42,11 @@ static void nop_trace_reset(struct trace_array *tr) stop_nop_trace(tr); } -static void nop_trace_ctrl_update(struct trace_array *tr) -{ - /* When starting a new trace, reset the buffers */ - if (tr->ctrl) - start_nop_trace(tr); - else - stop_nop_trace(tr); -} - struct tracer nop_trace __read_mostly = { .name = "nop", .init = nop_trace_init, .reset = nop_trace_reset, - .ctrl_update = nop_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_nop, #endif diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 79410db64d6f..7b73fd11e4aa 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -220,15 +220,6 @@ static void sched_switch_trace_reset(struct trace_array *tr) stop_sched_trace(tr); } -static void sched_switch_trace_ctrl_update(struct trace_array *tr) -{ - /* When starting a new trace, reset the buffers */ - if (tr->ctrl) - start_sched_trace(tr); - else - stop_sched_trace(tr); -} - static void sched_switch_trace_start(struct trace_array *tr) { sched_switch_reset(tr); @@ -247,7 +238,6 @@ static struct tracer sched_switch_trace __read_mostly = .reset = sched_switch_trace_reset, .start = sched_switch_trace_start, .stop = sched_switch_trace_stop, - .ctrl_update = sched_switch_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_sched_switch, #endif diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 240577bc8ba5..23e54d4e4d92 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -348,14 +348,6 @@ static void wakeup_tracer_reset(struct trace_array *tr) } } -static void wakeup_tracer_ctrl_update(struct trace_array *tr) -{ - if (tr->ctrl) - start_wakeup_tracer(tr); - else - stop_wakeup_tracer(tr); -} - static void wakeup_tracer_start(struct trace_array *tr) { wakeup_reset(tr); @@ -393,7 +385,6 @@ static struct tracer wakeup_tracer __read_mostly = .stop = wakeup_tracer_stop, .open = wakeup_tracer_open, .close = wakeup_tracer_close, - .ctrl_update = wakeup_tracer_ctrl_update, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 90bc752a7580..746934340474 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -134,13 +134,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); ftrace_enabled = 0; /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); + tracing_start(); /* we should only have one item */ if (!ret && count != 1) { @@ -148,6 +148,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ret = -1; goto out; } + out: ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; @@ -185,13 +186,13 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); ftrace_enabled = 0; /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); + tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -232,13 +233,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) udelay(100); local_irq_enable(); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (!ret) ret = trace_test_buffer(&max_tr, &count); trace->reset(tr); + tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -269,13 +270,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) udelay(100); preempt_enable(); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (!ret) ret = trace_test_buffer(&max_tr, &count); trace->reset(tr); + tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -312,27 +313,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * local_irq_enable(); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); - if (ret) + if (ret) { + tracing_start(); goto out; + } ret = trace_test_buffer(&max_tr, &count); - if (ret) + if (ret) { + tracing_start(); goto out; + } if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); ret = -1; + tracing_start(); goto out; } /* do the test by disabling interrupts first this time */ tracing_max_latency = 0; - tr->ctrl = 1; - trace->ctrl_update(tr); + tracing_start(); preempt_disable(); local_irq_disable(); udelay(100); @@ -341,8 +345,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * local_irq_enable(); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (ret) @@ -358,6 +361,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * out: trace->reset(tr); + tracing_start(); tracing_max_latency = save_max; return ret; @@ -448,8 +452,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (!ret) @@ -457,6 +460,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) trace->reset(tr); + tracing_start(); tracing_max_latency = save_max; @@ -485,11 +489,11 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); + tracing_start(); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -513,11 +517,11 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ - tr->ctrl = 0; - trace->ctrl_update(tr); + tracing_stop(); /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); + tracing_start(); return ret; } diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 9587d3bcba55..8097430edff9 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -275,21 +275,11 @@ static void stack_trace_reset(struct trace_array *tr) stop_stack_trace(tr); } -static void stack_trace_ctrl_update(struct trace_array *tr) -{ - /* When starting a new trace, reset the buffers */ - if (tr->ctrl) - start_stack_trace(tr); - else - stop_stack_trace(tr); -} - static struct tracer stack_trace __read_mostly = { .name = "sysprof", .init = stack_trace_init, .reset = stack_trace_reset, - .ctrl_update = stack_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_sysprof, #endif -- cgit v1.2.3 From c76f06945be50564f925799ddfb6235ee4c26aa0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: remove trace array ctrl Impact: remove obsolete variable in trace_array structure With the new start / stop method of ftrace, the ctrl variable in the trace_array structure is now obsolete. Remove it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 20 +++++--------------- kernel/trace/trace.h | 1 - kernel/trace/trace_functions.c | 6 ++---- kernel/trace/trace_irqsoff.c | 7 ++----- kernel/trace/trace_mmiotrace.c | 11 +++++------ kernel/trace/trace_nop.c | 6 ++---- kernel/trace/trace_sched_switch.c | 6 ++---- kernel/trace/trace_sched_wakeup.c | 12 ++++-------- kernel/trace/trace_selftest.c | 8 -------- kernel/trace/trace_sysprof.c | 6 ++---- 10 files changed, 24 insertions(+), 59 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9e83188172a1..58435415b366 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -537,7 +537,6 @@ int register_tracer(struct tracer *type) if (type->selftest) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; - int saved_ctrl = tr->ctrl; int i; /* * Run a selftest on this tracer. @@ -550,13 +549,11 @@ int register_tracer(struct tracer *type) tracing_reset(tr, i); } current_trace = type; - tr->ctrl = 0; /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); /* the test is responsible for resetting too */ current_trace = saved_tracer; - tr->ctrl = saved_ctrl; if (ret) { printk(KERN_CONT "FAILED!\n"); goto out; @@ -966,7 +963,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) int cpu; int pc; - if (tracing_disabled || !tr->ctrl) + if (tracing_disabled) return; pc = preempt_count(); @@ -2820,7 +2817,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, unsigned long val; char buf[64]; int ret; - struct trace_array *tr = filp->private_data; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2840,12 +2836,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); - if (tr->ctrl) { - cnt = -EBUSY; - pr_info("ftrace: please disable tracing" - " before modifying buffer size\n"); - goto out; - } + tracing_stop(); if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); @@ -2878,6 +2869,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, if (tracing_disabled) cnt = -ENOMEM; out: + tracing_start(); max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); @@ -2900,9 +2892,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, { char *buf; char *end; - struct trace_array *tr = &global_trace; - if (!tr->ctrl || tracing_disabled) + if (tracing_disabled) return -EINVAL; if (cnt > TRACE_BUF_SIZE) @@ -3131,7 +3122,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) unsigned long flags, irq_flags; int cpu, len = 0, size, pc; - if (!tr->ctrl || tracing_disabled) + if (tracing_disabled) return 0; pc = preempt_count(); @@ -3365,7 +3356,6 @@ __init static int tracer_alloc_buffers(void) #endif /* All seems OK, enable tracing */ - global_trace.ctrl = 1; tracing_disabled = 0; atomic_notifier_chain_register(&panic_notifier_list, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e481edaae1c4..cfda9d219e66 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -172,7 +172,6 @@ struct trace_iterator; struct trace_array { struct ring_buffer *buffer; unsigned long entries; - long ctrl; int cpu; cycle_t time_start; struct task_struct *waiter; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index e980b872bef5..8693b7a0a5b2 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -44,14 +44,12 @@ static void stop_function_trace(struct trace_array *tr) static void function_trace_init(struct trace_array *tr) { - if (tr->ctrl) - start_function_trace(tr); + start_function_trace(tr); } static void function_trace_reset(struct trace_array *tr) { - if (tr->ctrl) - stop_function_trace(tr); + stop_function_trace(tr); } static void function_trace_start(struct trace_array *tr) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index ffdf592a54e3..d919d4eaa7cc 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -383,15 +383,12 @@ static void __irqsoff_tracer_init(struct trace_array *tr) irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); - - if (tr->ctrl) - start_irqsoff_tracer(tr); + start_irqsoff_tracer(tr); } static void irqsoff_tracer_reset(struct trace_array *tr) { - if (tr->ctrl) - stop_irqsoff_tracer(tr); + stop_irqsoff_tracer(tr); } static void irqsoff_tracer_start(struct trace_array *tr) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index fa9354e78b57..51bcf370215e 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -34,17 +34,16 @@ static void mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; - if (tr->ctrl) { - mmio_reset_data(tr); - enable_mmiotrace(); - } + + mmio_reset_data(tr); + enable_mmiotrace(); } static void mmio_trace_reset(struct trace_array *tr) { pr_debug("in %s\n", __func__); - if (tr->ctrl) - disable_mmiotrace(); + + disable_mmiotrace(); mmio_reset_data(tr); mmio_trace_array = NULL; } diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index e3c5fbfcdd36..2ef1d227e7d8 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -32,14 +32,12 @@ static void nop_trace_init(struct trace_array *tr) for_each_online_cpu(cpu) tracing_reset(tr, cpu); - if (tr->ctrl) - start_nop_trace(tr); + start_nop_trace(tr); } static void nop_trace_reset(struct trace_array *tr) { - if (tr->ctrl) - stop_nop_trace(tr); + stop_nop_trace(tr); } struct tracer nop_trace __read_mostly = diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 7b73fd11e4aa..be35bdfe2e38 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -209,14 +209,12 @@ static void stop_sched_trace(struct trace_array *tr) static void sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; - - if (tr->ctrl) - start_sched_trace(tr); + start_sched_trace(tr); } static void sched_switch_trace_reset(struct trace_array *tr) { - if (tr->ctrl && sched_ref) + if (sched_ref) stop_sched_trace(tr); } diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 23e54d4e4d92..983f2b1478c9 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -334,18 +334,14 @@ static void stop_wakeup_tracer(struct trace_array *tr) static void wakeup_tracer_init(struct trace_array *tr) { wakeup_trace = tr; - - if (tr->ctrl) - start_wakeup_tracer(tr); + start_wakeup_tracer(tr); } static void wakeup_tracer_reset(struct trace_array *tr) { - if (tr->ctrl) { - stop_wakeup_tracer(tr); - /* make sure we put back any tasks we are tracing */ - wakeup_reset(tr); - } + stop_wakeup_tracer(tr); + /* make sure we put back any tasks we are tracing */ + wakeup_reset(tr); } static void wakeup_tracer_start(struct trace_array *tr) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 746934340474..ea4e5d3b15df 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -110,7 +110,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_set_filter(func_name, strlen(func_name), 1); /* enable tracing */ - tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ @@ -181,7 +180,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 1; tracer_enabled = 1; - tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); @@ -224,7 +222,6 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; @@ -261,7 +258,6 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; @@ -298,7 +294,6 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * int ret; /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* reset the max latency */ @@ -427,7 +422,6 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) wait_for_completion(&isrt); /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* reset the max latency */ tracing_max_latency = 0; @@ -484,7 +478,6 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr int ret; /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); @@ -512,7 +505,6 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ msleep(100); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 8097430edff9..05f753422aea 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -265,14 +265,12 @@ static void stack_trace_init(struct trace_array *tr) { sysprof_trace = tr; - if (tr->ctrl) - start_stack_trace(tr); + start_stack_trace(tr); } static void stack_trace_reset(struct trace_array *tr) { - if (tr->ctrl) - stop_stack_trace(tr); + stop_stack_trace(tr); } static struct tracer stack_trace __read_mostly = -- cgit v1.2.3 From a309720c876d7ad2e224bfd1982c92ae4364c82e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: display start of CPU buffer in trace output Impact: change in trace output Because the trace buffers are per cpu ring buffers, the start of the trace can be confusing. If one CPU is very active at the end of the trace, its history will not go as far back as the other CPU traces. This means that output for a particular CPU may not appear for the first part of a trace. To help annotate what is happening, and to prevent any more confusion, this patch adds a line that annotates the start of a CPU buffer output. For example: automount-3495 [001] 184.596443: dnotify_parent <-vfs_write [...] automount-3495 [001] 184.596449: dput <-path_put automount-3496 [002] 184.596450: down_read_trylock <-do_page_fault [...] sshd-3497 [001] 184.597069: up_read <-do_page_fault -0 [000] 184.597074: __exit_idle <-exit_idle [...] automount-3496 [002] 184.597257: filemap_fault <-__do_fault -0 [003] 184.597261: exit_idle <-smp_apic_timer_interrupt Note, parsers of a trace output should always ignore any lines that start with a '#'. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 19 +++++++++++++++++++ kernel/trace/trace.h | 2 ++ 2 files changed, 21 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 58435415b366..f147f198b9a6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1478,6 +1478,17 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) trace_seq_putc(s, '\n'); } +static void test_cpu_buff_start(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + + if (cpu_isset(iter->cpu, iter->started)) + return; + + cpu_set(iter->cpu, iter->started); + trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); +} + static enum print_line_t print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { @@ -1497,6 +1508,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; + test_cpu_buff_start(iter); + next_entry = find_next_entry(iter, NULL, &next_ts); if (!next_entry) next_ts = iter->ts; @@ -1612,6 +1625,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; + test_cpu_buff_start(iter); + comm = trace_find_cmdline(iter->ent->pid); t = ns2usecs(iter->ts); @@ -2631,6 +2646,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) return -ENOMEM; mutex_lock(&trace_types_lock); + + /* trace pipe does not show start of buffer */ + cpus_setall(iter->started); + iter->tr = &global_trace; iter->trace = current_trace; filp->private_data = iter; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cfda9d219e66..978145088fb8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -277,6 +277,8 @@ struct trace_iterator { unsigned long iter_flags; loff_t pos; long idx; + + cpumask_t started; }; int tracing_is_enabled(void); -- cgit v1.2.3 From 5aa1ba6a6c710e747838a22d798ac97a8b248745 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 10 Nov 2008 23:07:30 -0500 Subject: ftrace: prevent ftrace_special from recursion Impact: stop ftrace_special from recursion The ftrace_special is used to help debug areas of the kernel. Because of this, if it is put in certain locations, the fact that it allows recursion can become a problem if the kernel developer using does not realize that. This patch changes ftrace_special to not allow recursion into itself to make it more robust. It also changes from preempt disable interrupts disable to prevent any loss of trace entries. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0c22fe2d43a7..216bbe7547a4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -960,6 +960,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; + unsigned long flags; int cpu; int pc; @@ -967,14 +968,15 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) return; pc = preempt_count(); - preempt_disable_notrace(); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (likely(!atomic_read(&data->disabled))) + if (likely(atomic_inc_return(&data->disabled) == 1)) ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); - preempt_enable_notrace(); + atomic_dec(&data->disabled); + local_irq_restore(flags); } #ifdef CONFIG_FUNCTION_TRACER -- cgit v1.2.3 From 15e6cb3673ea6277999642802406a764b49391b0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 07:14:25 +0100 Subject: tracing: add a tracer to catch execution time of kernel functions Impact: add new tracing plugin which can trace full (entry+exit) function calls This tracer uses the low level function return ftrace plugin to measure the execution time of the kernel functions. The first field is the caller of the function, the second is the measured function, and the last one is the execution time in nanoseconds. - v3: - HAVE_FUNCTION_RET_TRACER have been added. Each arch that support ftrace return should enable it. - ftrace_return_stub becomes ftrace_stub. - CONFIG_FUNCTION_RET_TRACER depends now on CONFIG_FUNCTION_TRACER - Return traces printing can be used for other tracers on trace.c - Adapt to the new tracing API (no more ctrl_update callback) - Correct the check of "disabled" during insertion. - Minor changes... Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 14 ++++++ kernel/trace/Makefile | 1 + kernel/trace/ftrace.c | 16 +++++++ kernel/trace/trace.c | 65 +++++++++++++++++++++++---- kernel/trace/trace.h | 35 +++++++++++++++ kernel/trace/trace_functions_return.c | 82 +++++++++++++++++++++++++++++++++++ 6 files changed, 205 insertions(+), 8 deletions(-) create mode 100644 kernel/trace/trace_functions_return.c (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index fc4febc3334a..d986216c8327 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -9,6 +9,9 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool +config HAVE_FUNCTION_RET_TRACER + bool + config HAVE_FUNCTION_TRACE_MCOUNT_TEST bool help @@ -54,6 +57,17 @@ config FUNCTION_TRACER (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. +config FUNCTION_RET_TRACER + bool "Kernel Function return Tracer" + depends on !DYNAMIC_FTRACE + depends on HAVE_FUNCTION_RET_TRACER + depends on FUNCTION_TRACER + help + Enable the kernel to trace a function at its return. + It's first purpose is to trace the duration of functions. + This is done by setting the current return address on the thread + info structure of the current task. + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c8228b1a49e9..3e1f361bbc17 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -24,5 +24,6 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o +obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o libftrace-y := ftrace.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4d2e751bfb11..f03fe74ecd67 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1484,3 +1484,19 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, return ret; } +#ifdef CONFIG_FUNCTION_RET_TRACER +trace_function_return_t ftrace_function_return = + (trace_function_return_t)ftrace_stub; +void register_ftrace_return(trace_function_return_t func) +{ + ftrace_function_return = func; +} + +void unregister_ftrace_return(void) +{ + ftrace_function_return = (trace_function_return_t)ftrace_stub; +} +#endif + + + diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 216bbe7547a4..a3f7ae9cd8e1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -244,13 +244,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } -/* - * TRACE_ITER_SYM_MASK masks the options in trace_flags that - * control the output of kernel symbols. - */ -#define TRACE_ITER_SYM_MASK \ - (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) - /* These must match the bit postions in trace_iterator_flags */ static const char *trace_options[] = { "print-parent", @@ -810,6 +803,35 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } +#ifdef CONFIG_FUNCTION_RET_TRACER +static void __trace_function_return(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_retfunc *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_ret_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_FN_RET; + entry->ip = trace->func; + entry->parent_ip = trace->ret; + entry->rettime = trace->rettime; + entry->calltime = trace->calltime; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} +#endif + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags, @@ -1038,6 +1060,29 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) raw_local_irq_restore(flags); } +#ifdef CONFIG_FUNCTION_RET_TRACER +void trace_function_return(struct ftrace_retfunc *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_function_return(tr, data, trace, flags, pc); + } + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} +#endif /* CONFIG_FUNCTION_RET_TRACER */ + static struct ftrace_ops trace_ops __read_mostly = { .func = function_trace_call, @@ -1285,7 +1330,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -static int +int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { int ret; @@ -1738,6 +1783,10 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } + case TRACE_FN_RET: { + return print_return_function(iter); + break; + } } return TRACE_TYPE_HANDLED; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 978145088fb8..e40ce0c14690 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -22,6 +22,7 @@ enum trace_type { TRACE_MMIO_RW, TRACE_MMIO_MAP, TRACE_BOOT, + TRACE_FN_RET, __TRACE_LAST_TYPE }; @@ -48,6 +49,15 @@ struct ftrace_entry { unsigned long ip; unsigned long parent_ip; }; + +/* Function return entry */ +struct ftrace_ret_entry { + struct trace_entry ent; + unsigned long ip; + unsigned long parent_ip; + unsigned long long calltime; + unsigned long long rettime; +}; extern struct tracer boot_tracer; /* @@ -218,6 +228,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ + IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET); \ __ftrace_bad_type(); \ } while (0) @@ -321,6 +332,8 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); +void +trace_function_return(struct ftrace_retfunc *trace); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); @@ -393,6 +406,10 @@ extern void *head_page(struct trace_array_cpu *data); extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); extern void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter); + +extern int +seq_print_ip_sym(struct trace_seq *s, unsigned long ip, + unsigned long sym_flags); extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt); extern long ns2usecs(cycle_t nsec); @@ -400,6 +417,17 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; +/* Standard output formatting function used for function return traces */ +#ifdef CONFIG_FUNCTION_RET_TRACER +extern enum print_line_t print_return_function(struct trace_iterator *iter); +#else +static inline enum print_line_t +print_return_function(struct trace_iterator *iter) +{ + return TRACE_TYPE_UNHANDLED; +} +#endif + /* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. @@ -422,6 +450,13 @@ enum trace_iterator_flags { TRACE_ITER_PREEMPTONLY = 0x800, }; +/* + * TRACE_ITER_SYM_MASK masks the options in trace_flags that + * control the output of kernel symbols. + */ +#define TRACE_ITER_SYM_MASK \ + (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) + extern struct tracer nop_trace; /** diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c new file mode 100644 index 000000000000..7680b21537dd --- /dev/null +++ b/kernel/trace/trace_functions_return.c @@ -0,0 +1,82 @@ +/* + * + * Function return tracer. + * Copyright (c) 2008 Frederic Weisbecker + * Mostly borrowed from function tracer which + * is Copyright (c) Steven Rostedt + * + */ +#include +#include +#include +#include + +#include "trace.h" + + +static void start_return_trace(struct trace_array *tr) +{ + register_ftrace_return(&trace_function_return); +} + +static void stop_return_trace(struct trace_array *tr) +{ + unregister_ftrace_return(); +} + +static void return_trace_init(struct trace_array *tr) +{ + int cpu; + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + + start_return_trace(tr); +} + +static void return_trace_reset(struct trace_array *tr) +{ + stop_return_trace(tr); +} + + +enum print_line_t +print_return_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct ftrace_ret_entry *field; + int ret; + + if (entry->type == TRACE_FN_RET) { + trace_assign_type(field, entry); + ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = seq_print_ip_sym(s, field->ip, + trace_flags & TRACE_ITER_SYM_MASK); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = trace_seq_printf(s, " (%llu ns)\n", + field->rettime - field->calltime); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + else + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer return_trace __read_mostly = +{ + .name = "return", + .init = return_trace_init, + .reset = return_trace_reset, + .print_line = print_return_function +}; + +static __init int init_return_trace(void) +{ + return register_tracer(&return_trace); +} + +device_initcall(init_return_trace); -- cgit v1.2.3 From 52f232cb720a7babb752849cbc2cab2d24021209 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 00:14:40 -0500 Subject: tracing: likely/unlikely branch annotation tracer Impact: new likely/unlikely branch tracer This patch adds a way to record the instances of the likely() and unlikely() branch condition annotations. When "unlikely" is set in /debugfs/tracing/iter_ctrl the unlikely conditions will be added to any of the ftrace tracers. The change takes effect when a new tracer is passed into the current_tracer file. For example: bash-3471 [003] 357.014755: [INCORRECT] sched_info_dequeued:sched_stats.h:177 bash-3471 [003] 357.014756: [correct] update_curr:sched_fair.c:489 bash-3471 [003] 357.014758: [correct] calc_delta_fair:sched_fair.c:411 bash-3471 [003] 357.014759: [correct] account_group_exec_runtime:sched_stats.h:356 bash-3471 [003] 357.014761: [correct] update_curr:sched_fair.c:489 bash-3471 [003] 357.014763: [INCORRECT] calc_delta_fair:sched_fair.c:411 bash-3471 [003] 357.014765: [correct] calc_delta_mine:sched.c:1279 Which shows the normal tracer heading, as well as whether the condition was correct "[correct]" or was mistaken "[INCORRECT]", followed by the function, file name and line number. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 22 ++++++++ kernel/trace/Makefile | 6 +++ kernel/trace/trace.c | 29 +++++++++++ kernel/trace/trace.h | 39 +++++++++++++++ kernel/trace/trace_unlikely.c | 114 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 210 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a604f24c755f..8abcaf821beb 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -175,6 +175,28 @@ config TRACE_UNLIKELY_PROFILE Say N if unsure. +config TRACING_UNLIKELY + bool + help + Selected by tracers that will trace the likely and unlikely + conditions. This prevents the tracers themselves from being + profiled. Profiling the tracing infrastructure can only happen + when the likelys and unlikelys are not being traced. + +config UNLIKELY_TRACER + bool "Trace likely/unlikely instances" + depends on TRACE_UNLIKELY_PROFILE + select TRACING_UNLIKELY + help + This traces the events of likely and unlikely condition + calls in the kernel. The difference between this and the + "Trace likely/unlikely profiler" is that this is not a + histogram of the callers, but actually places the calling + events into a running trace buffer to see when and where the + events happened, as well as their results. + + Say N if unsure. + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 98e70ee27986..c938d03516c0 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -10,6 +10,12 @@ CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif +# If unlikely tracing is enabled, do not trace these files +ifdef CONFIG_TRACING_UNLIKELY +KBUILD_CFLAGS += '-Dlikely(x)=likely_notrace(x)' +KBUILD_CFLAGS += '-Dunlikely(x)=unlikely_notrace(x)' +endif + obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a3f7ae9cd8e1..83d38634bc90 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -258,6 +258,9 @@ static const char *trace_options[] = { "sched-tree", "ftrace_printk", "ftrace_preempt", +#ifdef CONFIG_UNLIKELY_TRACER + "unlikely", +#endif NULL }; @@ -1648,6 +1651,18 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_seq_print_cont(s, iter); break; } + case TRACE_UNLIKELY: { + struct trace_unlikely *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? "correct" : "INCORRECT", + field->func, + field->file, + field->line); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1787,6 +1802,18 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) return print_return_function(iter); break; } + case TRACE_UNLIKELY: { + struct trace_unlikely *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? "correct" : "INCORRECT", + field->func, + field->file, + field->line); + break; + } } return TRACE_TYPE_HANDLED; } @@ -2592,6 +2619,7 @@ static int tracing_set_tracer(char *buf) if (t == current_trace) goto out; + trace_unlikely_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); @@ -2599,6 +2627,7 @@ static int tracing_set_tracer(char *buf) if (t->init) t->init(tr); + trace_unlikely_enable(tr); out: mutex_unlock(&trace_types_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b5f91f198fd4..9635aa2c4fc1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -22,6 +22,7 @@ enum trace_type { TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, + TRACE_UNLIKELY, TRACE_BOOT_CALL, TRACE_BOOT_RET, TRACE_FN_RET, @@ -134,6 +135,16 @@ struct trace_boot_ret { struct boot_trace_ret boot_ret; }; +#define TRACE_FUNC_SIZE 30 +#define TRACE_FILE_SIZE 20 +struct trace_unlikely { + struct trace_entry ent; + unsigned line; + char func[TRACE_FUNC_SIZE+1]; + char file[TRACE_FILE_SIZE+1]; + char correct; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -236,6 +247,7 @@ extern void __ftrace_bad_type(void); TRACE_MMIO_MAP); \ IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ + IF_ASSIGN(var, ent, struct trace_unlikely, TRACE_UNLIKELY); \ IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ __ftrace_bad_type(); \ } while (0) @@ -456,6 +468,9 @@ enum trace_iterator_flags { TRACE_ITER_SCHED_TREE = 0x200, TRACE_ITER_PRINTK = 0x400, TRACE_ITER_PREEMPTONLY = 0x800, +#ifdef CONFIG_UNLIKELY_TRACER + TRACE_ITER_UNLIKELY = 0x1000, +#endif }; /* @@ -515,4 +530,28 @@ static inline void ftrace_preempt_enable(int resched) preempt_enable_notrace(); } +#ifdef CONFIG_UNLIKELY_TRACER +extern int enable_unlikely_tracing(struct trace_array *tr); +extern void disable_unlikely_tracing(void); +static inline int trace_unlikely_enable(struct trace_array *tr) +{ + if (trace_flags & TRACE_ITER_UNLIKELY) + return enable_unlikely_tracing(tr); + return 0; +} +static inline void trace_unlikely_disable(void) +{ + /* due to races, always disable */ + disable_unlikely_tracing(); +} +#else +static inline int trace_unlikely_enable(struct trace_array *tr) +{ + return 0; +} +static inline void trace_unlikely_disable(void) +{ +} +#endif /* CONFIG_UNLIKELY_TRACER */ + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_unlikely.c b/kernel/trace/trace_unlikely.c index 94932696069f..7290e0e7b4e3 100644 --- a/kernel/trace/trace_unlikely.c +++ b/kernel/trace/trace_unlikely.c @@ -15,8 +15,122 @@ #include #include "trace.h" +#ifdef CONFIG_UNLIKELY_TRACER + +static int unlikely_tracing_enabled __read_mostly; +static DEFINE_MUTEX(unlikely_tracing_mutex); +static struct trace_array *unlikely_tracer; + +static void +probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) +{ + struct trace_array *tr = unlikely_tracer; + struct ring_buffer_event *event; + struct trace_unlikely *entry; + unsigned long flags, irq_flags; + int cpu, pc; + const char *p; + + /* + * I would love to save just the ftrace_likely_data pointer, but + * this code can also be used by modules. Ugly things can happen + * if the module is unloaded, and then we go and read the + * pointer. This is slower, but much safer. + */ + + if (unlikely(!tr)) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) + goto out; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + + pc = preempt_count(); + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_UNLIKELY; + + /* Strip off the path, only save the file */ + p = f->file + strlen(f->file); + while (p >= f->file && *p != '/') + p--; + p++; + + strncpy(entry->func, f->func, TRACE_FUNC_SIZE); + strncpy(entry->file, p, TRACE_FILE_SIZE); + entry->func[TRACE_FUNC_SIZE] = 0; + entry->file[TRACE_FILE_SIZE] = 0; + entry->line = f->line; + entry->correct = val == expect; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + out: + atomic_dec(&tr->data[cpu]->disabled); + local_irq_restore(flags); +} + +static inline +void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) +{ + if (!unlikely_tracing_enabled) + return; + + probe_likely_condition(f, val, expect); +} + +int enable_unlikely_tracing(struct trace_array *tr) +{ + int ret = 0; + + mutex_lock(&unlikely_tracing_mutex); + unlikely_tracer = tr; + /* + * Must be seen before enabling. The reader is a condition + * where we do not need a matching rmb() + */ + smp_wmb(); + unlikely_tracing_enabled++; + mutex_unlock(&unlikely_tracing_mutex); + + return ret; +} + +void disable_unlikely_tracing(void) +{ + mutex_lock(&unlikely_tracing_mutex); + + if (!unlikely_tracing_enabled) + goto out_unlock; + + unlikely_tracing_enabled--; + + out_unlock: + mutex_unlock(&unlikely_tracing_mutex); +} +#else +static inline +void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) +{ +} +#endif /* CONFIG_UNLIKELY_TRACER */ + void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) { + /* + * I would love to have a trace point here instead, but the + * trace point code is so inundated with unlikely and likely + * conditions that the recursive nightmare that exists is too + * much to try to get working. At least for now. + */ + trace_likely_condition(f, val, expect); + /* FIXME: Make this atomic! */ if (val == expect) f->correct++; -- cgit v1.2.3 From f88c4ae9f8c3939bee4337c75c7a673b5de7a8a7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 11:55:41 +0100 Subject: tracing: branch tracer, tweak output Impact: modify the tracer output, to make it a bit easier to read Change the output from: > bash-3471 [003] 357.014755: [INCORRECT] sched_info_dequeued:sched_stats.h:177 > bash-3471 [003] 357.014756: [correct] update_curr:sched_fair.c:489 > bash-3471 [003] 357.014758: [correct] calc_delta_fair:sched_fair.c:411 to: > bash-3471 [003] 357.014755: [ MISS ] sched_info_dequeued:sched_stats.h:177 > bash-3471 [003] 357.014756: [ .... ] update_curr:sched_fair.c:489 > bash-3471 [003] 357.014758: [ .... ] calc_delta_fair:sched_fair.c:411 it's good to have fields aligned vertically, and the only important information is a prediction miss, so display only that information. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 83d38634bc90..728a46ec6b06 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1657,7 +1657,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_assign_type(field, entry); trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? "correct" : "INCORRECT", + field->correct ? " .... " : " MISS ", field->func, field->file, field->line); @@ -1808,7 +1808,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_assign_type(field, entry); trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? "correct" : "INCORRECT", + field->correct ? " .... " : " MISS ", field->func, field->file, field->line); -- cgit v1.2.3 From 68d119f0a66f7e3663304343b072e56a2693446b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 14:09:30 +0100 Subject: tracing: finetune branch-tracer output Steve suggested the to change the output from this: > bash-3471 [003] 357.014755: [ MISS ] sched_info_dequeued:sched_stats.h:177 > bash-3471 [003] 357.014756: [ .... ] update_curr:sched_fair.c:489 > bash-3471 [003] 357.014758: [ .... ] calc_delta_fair:sched_fair.c:411 to this: > bash-3471 [003] 357.014755: [ MISS ] sched_info_dequeued:sched_stats.h:177 > bash-3471 [003] 357.014756: [ ok ] update_curr:sched_fair.c:489 > bash-3471 [003] 357.014758: [ ok ] calc_delta_fair:sched_fair.c:411 as it makes it clearer to the user what it means exactly. Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 728a46ec6b06..d842db14a59b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1657,7 +1657,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_assign_type(field, entry); trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? " .... " : " MISS ", + field->correct ? " ok " : " MISS ", field->func, field->file, field->line); @@ -1808,7 +1808,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_assign_type(field, entry); trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? " .... " : " MISS ", + field->correct ? " ok " : " MISS ", field->func, field->file, field->line); -- cgit v1.2.3 From 2ed84eeb8808cf3c9f039213ca137ffd7d753f0e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 15:24:24 -0500 Subject: trace: rename unlikely profiler to branch profiler Impact: name change of unlikely tracer and profiler Ingo Molnar suggested changing the config from UNLIKELY_PROFILE to BRANCH_PROFILING. I never did like the "unlikely" name so I went one step farther, and renamed all the unlikely configurations to a "BRANCH" variant. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsyscall_64.c | 2 +- arch/x86/vdso/vclock_gettime.c | 2 +- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/compiler.h | 19 ++++++++++--------- kernel/trace/Kconfig | 10 +++++----- kernel/trace/Makefile | 7 +++---- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 6 +++--- kernel/trace/trace_unlikely.c | 4 ++-- 9 files changed, 27 insertions(+), 27 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index ece02932ea57..6f3d3d4cd973 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -18,7 +18,7 @@ */ /* Disable profiling for userspace code: */ -#define DISABLE_UNLIKELY_PROFILE +#define DISABLE_BRANCH_PROFILING #include #include diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6e667631e7dc..d9d35824c56f 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -10,7 +10,7 @@ */ /* Disable profiling for userspace code: */ -#define DISABLE_UNLIKELY_PROFILE +#define DISABLE_BRANCH_PROFILING #include #include diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e10beb5335c9..a5e4ed9baec8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -45,7 +45,7 @@ #define MCOUNT_REC() #endif -#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +#ifdef CONFIG_TRACE_BRANCH_PROFILING #define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ *(_ftrace_likely) \ VMLINUX_SYMBOL(__stop_likely_profile) = .; \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 63b7d9089d6e..c7d804a7a4d6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,26 +59,27 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -/* - * Note: DISABLE_UNLIKELY_PROFILE can be used by special lowlevel code - * to disable branch tracing on a per file basis. - */ -#if defined(CONFIG_TRACE_UNLIKELY_PROFILE) && !defined(DISABLE_UNLIKELY_PROFILE) -struct ftrace_likely_data { +struct ftrace_branch_data { const char *func; const char *file; unsigned line; unsigned long correct; unsigned long incorrect; }; -void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); + +/* + * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code + * to disable branch tracing on a per file basis. + */ +#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING) +void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define likely_notrace(x) __builtin_expect(!!(x), 1) #define unlikely_notrace(x) __builtin_expect(!!(x), 0) #define likely_check(x) ({ \ int ______r; \ - static struct ftrace_likely_data \ + static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_likely"))) \ ______f = { \ @@ -93,7 +94,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); }) #define unlikely_check(x) ({ \ int ______r; \ - static struct ftrace_likely_data \ + static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_unlikely"))) \ ______f = { \ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8abcaf821beb..9c89526b6b7c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -159,7 +159,7 @@ config BOOT_TRACER selected, because the self-tests are an initcall as well and that would invalidate the boot trace. ) -config TRACE_UNLIKELY_PROFILE +config TRACE_BRANCH_PROFILING bool "Trace likely/unlikely profiler" depends on DEBUG_KERNEL select TRACING @@ -175,7 +175,7 @@ config TRACE_UNLIKELY_PROFILE Say N if unsure. -config TRACING_UNLIKELY +config TRACING_BRANCHES bool help Selected by tracers that will trace the likely and unlikely @@ -183,10 +183,10 @@ config TRACING_UNLIKELY profiled. Profiling the tracing infrastructure can only happen when the likelys and unlikelys are not being traced. -config UNLIKELY_TRACER +config BRANCH_TRACER bool "Trace likely/unlikely instances" - depends on TRACE_UNLIKELY_PROFILE - select TRACING_UNLIKELY + depends on TRACE_BRANCH_PROFILING + select TRACING_BRANCHES help This traces the events of likely and unlikely condition calls in the kernel. The difference between this and the diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c938d03516c0..0087df7ba44e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -11,9 +11,8 @@ obj-y += trace_selftest_dynamic.o endif # If unlikely tracing is enabled, do not trace these files -ifdef CONFIG_TRACING_UNLIKELY -KBUILD_CFLAGS += '-Dlikely(x)=likely_notrace(x)' -KBUILD_CFLAGS += '-Dunlikely(x)=unlikely_notrace(x)' +ifdef CONFIG_TRACING_BRANCHES +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING endif obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o @@ -31,6 +30,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o -obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o +obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_unlikely.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d842db14a59b..bad59d32a4a9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -258,7 +258,7 @@ static const char *trace_options[] = { "sched-tree", "ftrace_printk", "ftrace_preempt", -#ifdef CONFIG_UNLIKELY_TRACER +#ifdef CONFIG_BRANCH_TRACER "unlikely", #endif NULL diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9635aa2c4fc1..dccae6312941 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -468,7 +468,7 @@ enum trace_iterator_flags { TRACE_ITER_SCHED_TREE = 0x200, TRACE_ITER_PRINTK = 0x400, TRACE_ITER_PREEMPTONLY = 0x800, -#ifdef CONFIG_UNLIKELY_TRACER +#ifdef CONFIG_BRANCH_TRACER TRACE_ITER_UNLIKELY = 0x1000, #endif }; @@ -530,7 +530,7 @@ static inline void ftrace_preempt_enable(int resched) preempt_enable_notrace(); } -#ifdef CONFIG_UNLIKELY_TRACER +#ifdef CONFIG_BRANCH_TRACER extern int enable_unlikely_tracing(struct trace_array *tr); extern void disable_unlikely_tracing(void); static inline int trace_unlikely_enable(struct trace_array *tr) @@ -552,6 +552,6 @@ static inline int trace_unlikely_enable(struct trace_array *tr) static inline void trace_unlikely_disable(void) { } -#endif /* CONFIG_UNLIKELY_TRACER */ +#endif /* CONFIG_BRANCH_TRACER */ #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_unlikely.c b/kernel/trace/trace_unlikely.c index 7290e0e7b4e3..856eb3b7f694 100644 --- a/kernel/trace/trace_unlikely.c +++ b/kernel/trace/trace_unlikely.c @@ -15,7 +15,7 @@ #include #include "trace.h" -#ifdef CONFIG_UNLIKELY_TRACER +#ifdef CONFIG_BRANCH_TRACER static int unlikely_tracing_enabled __read_mostly; static DEFINE_MUTEX(unlikely_tracing_mutex); @@ -119,7 +119,7 @@ static inline void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) { } -#endif /* CONFIG_UNLIKELY_TRACER */ +#endif /* CONFIG_BRANCH_TRACER */ void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) { -- cgit v1.2.3 From 9f029e83e968e5661d7be045bbcb620dbb909938 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 15:24:24 -0500 Subject: ftrace: rename unlikely iter_ctrl to branch Impact: rename of iter_ctrl unlikely to branch The unlikely name is ugly. This patch converts the iter_ctrl command "unlikely" and "nounlikely" to "branch" and "nobranch" respectively. It also renames a lot of internal functions to use "branch" instead of "unlikely". Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 14 ++++++------ kernel/trace/trace.h | 26 +++++++++++----------- kernel/trace/trace_unlikely.c | 50 +++++++++++++++++++++---------------------- 3 files changed, 45 insertions(+), 45 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bad59d32a4a9..4bf070bb5272 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -259,7 +259,7 @@ static const char *trace_options[] = { "ftrace_printk", "ftrace_preempt", #ifdef CONFIG_BRANCH_TRACER - "unlikely", + "branch", #endif NULL }; @@ -1651,8 +1651,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_seq_print_cont(s, iter); break; } - case TRACE_UNLIKELY: { - struct trace_unlikely *field; + case TRACE_BRANCH: { + struct trace_branch *field; trace_assign_type(field, entry); @@ -1802,8 +1802,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) return print_return_function(iter); break; } - case TRACE_UNLIKELY: { - struct trace_unlikely *field; + case TRACE_BRANCH: { + struct trace_branch *field; trace_assign_type(field, entry); @@ -2619,7 +2619,7 @@ static int tracing_set_tracer(char *buf) if (t == current_trace) goto out; - trace_unlikely_disable(); + trace_branch_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); @@ -2627,7 +2627,7 @@ static int tracing_set_tracer(char *buf) if (t->init) t->init(tr); - trace_unlikely_enable(tr); + trace_branch_enable(tr); out: mutex_unlock(&trace_types_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dccae6312941..7fbf37b27453 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -22,7 +22,7 @@ enum trace_type { TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, - TRACE_UNLIKELY, + TRACE_BRANCH, TRACE_BOOT_CALL, TRACE_BOOT_RET, TRACE_FN_RET, @@ -137,7 +137,7 @@ struct trace_boot_ret { #define TRACE_FUNC_SIZE 30 #define TRACE_FILE_SIZE 20 -struct trace_unlikely { +struct trace_branch { struct trace_entry ent; unsigned line; char func[TRACE_FUNC_SIZE+1]; @@ -247,7 +247,7 @@ extern void __ftrace_bad_type(void); TRACE_MMIO_MAP); \ IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ - IF_ASSIGN(var, ent, struct trace_unlikely, TRACE_UNLIKELY); \ + IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ __ftrace_bad_type(); \ } while (0) @@ -469,7 +469,7 @@ enum trace_iterator_flags { TRACE_ITER_PRINTK = 0x400, TRACE_ITER_PREEMPTONLY = 0x800, #ifdef CONFIG_BRANCH_TRACER - TRACE_ITER_UNLIKELY = 0x1000, + TRACE_ITER_BRANCH = 0x1000, #endif }; @@ -531,25 +531,25 @@ static inline void ftrace_preempt_enable(int resched) } #ifdef CONFIG_BRANCH_TRACER -extern int enable_unlikely_tracing(struct trace_array *tr); -extern void disable_unlikely_tracing(void); -static inline int trace_unlikely_enable(struct trace_array *tr) +extern int enable_branch_tracing(struct trace_array *tr); +extern void disable_branch_tracing(void); +static inline int trace_branch_enable(struct trace_array *tr) { - if (trace_flags & TRACE_ITER_UNLIKELY) - return enable_unlikely_tracing(tr); + if (trace_flags & TRACE_ITER_BRANCH) + return enable_branch_tracing(tr); return 0; } -static inline void trace_unlikely_disable(void) +static inline void trace_branch_disable(void) { /* due to races, always disable */ - disable_unlikely_tracing(); + disable_branch_tracing(); } #else -static inline int trace_unlikely_enable(struct trace_array *tr) +static inline int trace_branch_enable(struct trace_array *tr) { return 0; } -static inline void trace_unlikely_disable(void) +static inline void trace_branch_disable(void) { } #endif /* CONFIG_BRANCH_TRACER */ diff --git a/kernel/trace/trace_unlikely.c b/kernel/trace/trace_unlikely.c index 856eb3b7f694..e5d5969853a3 100644 --- a/kernel/trace/trace_unlikely.c +++ b/kernel/trace/trace_unlikely.c @@ -17,16 +17,16 @@ #ifdef CONFIG_BRANCH_TRACER -static int unlikely_tracing_enabled __read_mostly; -static DEFINE_MUTEX(unlikely_tracing_mutex); -static struct trace_array *unlikely_tracer; +static int branch_tracing_enabled __read_mostly; +static DEFINE_MUTEX(branch_tracing_mutex); +static struct trace_array *branch_tracer; static void -probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) +probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) { - struct trace_array *tr = unlikely_tracer; + struct trace_array *tr = branch_tracer; struct ring_buffer_event *event; - struct trace_unlikely *entry; + struct trace_branch *entry; unsigned long flags, irq_flags; int cpu, pc; const char *p; @@ -54,7 +54,7 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) pc = preempt_count(); entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_UNLIKELY; + entry->ent.type = TRACE_BRANCH; /* Strip off the path, only save the file */ p = f->file + strlen(f->file); @@ -77,51 +77,51 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) } static inline -void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) +void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) { - if (!unlikely_tracing_enabled) + if (!branch_tracing_enabled) return; probe_likely_condition(f, val, expect); } -int enable_unlikely_tracing(struct trace_array *tr) +int enable_branch_tracing(struct trace_array *tr) { int ret = 0; - mutex_lock(&unlikely_tracing_mutex); - unlikely_tracer = tr; + mutex_lock(&branch_tracing_mutex); + branch_tracer = tr; /* * Must be seen before enabling. The reader is a condition * where we do not need a matching rmb() */ smp_wmb(); - unlikely_tracing_enabled++; - mutex_unlock(&unlikely_tracing_mutex); + branch_tracing_enabled++; + mutex_unlock(&branch_tracing_mutex); return ret; } -void disable_unlikely_tracing(void) +void disable_branch_tracing(void) { - mutex_lock(&unlikely_tracing_mutex); + mutex_lock(&branch_tracing_mutex); - if (!unlikely_tracing_enabled) + if (!branch_tracing_enabled) goto out_unlock; - unlikely_tracing_enabled--; + branch_tracing_enabled--; out_unlock: - mutex_unlock(&unlikely_tracing_mutex); + mutex_unlock(&branch_tracing_mutex); } #else static inline -void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) +void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) { } #endif /* CONFIG_BRANCH_TRACER */ -void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) +void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect) { /* * I would love to have a trace point here instead, but the @@ -148,7 +148,7 @@ static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_pointer *f = m->private; - struct ftrace_likely_data *p = v; + struct ftrace_branch_data *p = v; (*pos)++; @@ -180,7 +180,7 @@ static void t_stop(struct seq_file *m, void *p) static int t_show(struct seq_file *m, void *v) { - struct ftrace_likely_data *p = v; + struct ftrace_branch_data *p = v; const char *f; unsigned long percent; @@ -252,7 +252,7 @@ static struct ftrace_pointer ftrace_unlikely_pos = { .stop = __stop_unlikely_profile, }; -static __init int ftrace_unlikely_init(void) +static __init int ftrace_branch_init(void) { struct dentry *d_tracer; struct dentry *entry; @@ -275,4 +275,4 @@ static __init int ftrace_unlikely_init(void) return 0; } -device_initcall(ftrace_unlikely_init); +device_initcall(ftrace_branch_init); -- cgit v1.2.3 From a94c80e78bc9f4493ffc25a02d5d7bcd93c399d0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 17:52:36 -0500 Subject: ftrace: rename trace_entries to buffer_size_kb Impact: rename of debugfs file trace_entries to buffer_size_kb The original ftrace had fixed size entries, and the number of entries was shown and modified via the file called trace_entries. By converting to the unified trace buffer, we now allow for variable size entries which makes the meaning of trace_entries pointless. Since trace_size might be confused to the size of the trace, this patch names it "buffer_size_kb" (thanks to Arjan van de Ven for this idea). [ mingo@elte.hu: changed from buffer_size to buffer_size_kb ] ( Note, the units are still bytes - the next patch changes that, to keep the wide rename patch separate from the unit-change patch. ) Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 18 +++++++++--------- kernel/trace/trace.c | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 9cc4d685dde5..a1b58777839b 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -94,7 +94,7 @@ of ftrace. Here is a list of some of the key files: only be recorded if the latency is greater than the value in this file. (in microseconds) - trace_entries: This sets or displays the number of bytes each CPU + buffer_size_kb: This sets or displays the number of bytes each CPU buffer can hold. The tracer buffers are the same size for each CPU. The displayed number is the size of the CPU buffer and not total size of all buffers. The @@ -1299,13 +1299,13 @@ trace entries ------------- Having too much or not enough data can be troublesome in diagnosing -an issue in the kernel. The file trace_entries is used to modify +an issue in the kernel. The file buffer_size_kb is used to modify the size of the internal trace buffers. The number listed is the number of entries that can be recorded per CPU. To know the full size, multiply the number of possible CPUS with the number of entries. - # cat /debug/tracing/trace_entries + # cat /debug/tracing/buffer_size_kb 65620 Note, to modify this, you must have tracing completely disabled. To do that, @@ -1313,8 +1313,8 @@ echo "nop" into the current_tracer. If the current_tracer is not set to "nop", an EINVAL error will be returned. # echo nop > /debug/tracing/current_tracer - # echo 100000 > /debug/tracing/trace_entries - # cat /debug/tracing/trace_entries + # echo 100000 > /debug/tracing/buffer_size_kb + # cat /debug/tracing/buffer_size_kb 100045 @@ -1323,8 +1323,8 @@ are held in individual pages. It allocates the number of pages it takes to fulfill the request. If more entries may fit on the last page then they will be added. - # echo 1 > /debug/tracing/trace_entries - # cat /debug/tracing/trace_entries + # echo 1 > /debug/tracing/buffer_size_kb + # cat /debug/tracing/buffer_size_kb 85 This shows us that 85 entries can fit in a single page. @@ -1332,8 +1332,8 @@ This shows us that 85 entries can fit in a single page. The number of pages which will be allocated is limited to a percentage of available memory. Allocating too much will produce an error. - # echo 1000000000000 > /debug/tracing/trace_entries + # echo 1000000000000 > /debug/tracing/buffer_size_kb -bash: echo: write error: Cannot allocate memory - # cat /debug/tracing/trace_entries + # cat /debug/tracing/buffer_size_kb 85 diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4bf070bb5272..b42d42056fa4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3198,11 +3198,11 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_pipe' entry\n"); - entry = debugfs_create_file("trace_entries", 0644, d_tracer, + entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, &global_trace, &tracing_entries_fops); if (!entry) pr_warning("Could not create debugfs " - "'trace_entries' entry\n"); + "'buffer_size_kb' entry\n"); entry = debugfs_create_file("trace_marker", 0220, d_tracer, NULL, &tracing_mark_fops); -- cgit v1.2.3 From 1696b2b0f44a8d42f3e6b1ea90c21790871c04d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 13 Nov 2008 00:09:35 -0500 Subject: ftrace: show buffer size in kilobytes Impact: change the units of buffer_size_kb to kilobytes This patch changes the units of the buffer_size_kb file to kilobytes. Reading and writing to the file uses kilobytes as units. To help users to know what units are used, the output of the file now looks like: # cat /debug/tracing/buffer_size_kb 1408 Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 22 +++++----------------- kernel/trace/trace.c | 5 ++++- 2 files changed, 9 insertions(+), 18 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index a1b58777839b..6d3fe4cdf921 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -94,10 +94,10 @@ of ftrace. Here is a list of some of the key files: only be recorded if the latency is greater than the value in this file. (in microseconds) - buffer_size_kb: This sets or displays the number of bytes each CPU + buffer_size_kb: This sets or displays the number of kilobytes each CPU buffer can hold. The tracer buffers are the same size for each CPU. The displayed number is the size of the - CPU buffer and not total size of all buffers. The + CPU buffer and not total size of all buffers. The trace buffers are allocated in pages (blocks of memory that the kernel uses for allocation, usually 4 KB in size). If the last page allocated has room for more bytes @@ -1306,28 +1306,16 @@ the full size, multiply the number of possible CPUS with the number of entries. # cat /debug/tracing/buffer_size_kb -65620 +1408 (units kilobytes) Note, to modify this, you must have tracing completely disabled. To do that, echo "nop" into the current_tracer. If the current_tracer is not set to "nop", an EINVAL error will be returned. # echo nop > /debug/tracing/current_tracer - # echo 100000 > /debug/tracing/buffer_size_kb + # echo 10000 > /debug/tracing/buffer_size_kb # cat /debug/tracing/buffer_size_kb -100045 - - -Notice that we echoed in 100,000 but the size is 100,045. The entries -are held in individual pages. It allocates the number of pages it takes -to fulfill the request. If more entries may fit on the last page -then they will be added. - - # echo 1 > /debug/tracing/buffer_size_kb - # cat /debug/tracing/buffer_size_kb -85 - -This shows us that 85 entries can fit in a single page. +10000 (units kilobytes) The number of pages which will be allocated is limited to a percentage of available memory. Allocating too much will produce an error. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b42d42056fa4..d664aae2e10a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2905,7 +2905,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, char buf[64]; int r; - r = sprintf(buf, "%lu\n", tr->entries); + r = sprintf(buf, "%lu\n", tr->entries >> 10); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2945,6 +2945,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, atomic_inc(&max_tr.data[cpu]->disabled); } + /* value is in KB */ + val <<= 10; + if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); if (ret < 0) { -- cgit v1.2.3 From ee6bce52276c0717ed3e63296e5d9465d339e923 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 17:52:37 -0500 Subject: ftrace: rename iter_ctrl to trace_options Impact: rename file /debug/tracing/iter_ctrl to /debug/tracing/trace_options The original ftrace had a file called "iter_ctrl" that would control the way the output was iterated. But this file grew into a catch all for different trace options. This patch renames the file from iter_ctrl to trace_options to reflect this change. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 14 +++++++------- kernel/trace/trace.c | 18 +++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 6d3fe4cdf921..753f4de4b175 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -82,7 +82,7 @@ of ftrace. Here is a list of some of the key files: tracer is not adding more data, they will display the same information every time they are read. - iter_ctrl: This file lets the user control the amount of data + trace_options: This file lets the user control the amount of data that is displayed in one of the above output files. @@ -316,23 +316,23 @@ The above is mostly meaningful for kernel developers. The rest is the same as the 'trace' file. -iter_ctrl ---------- +trace_options +------------- -The iter_ctrl file is used to control what gets printed in the trace +The trace_options file is used to control what gets printed in the trace output. To see what is available, simply cat the file: - cat /debug/tracing/iter_ctrl + cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ noblock nostacktrace nosched-tree To disable one of the options, echo in the option prepended with "no". - echo noprint-parent > /debug/tracing/iter_ctrl + echo noprint-parent > /debug/tracing/trace_options To enable an option, leave off the "no". - echo sym-offset > /debug/tracing/iter_ctrl + echo sym-offset > /debug/tracing/trace_options Here are the available options: diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d664aae2e10a..240423a9d1af 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -204,7 +204,7 @@ static DEFINE_MUTEX(trace_types_lock); /* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); -/* trace_flags holds iter_ctrl options */ +/* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK; /** @@ -2411,7 +2411,7 @@ static struct file_operations tracing_cpumask_fops = { }; static ssize_t -tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, +tracing_trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { char *buf; @@ -2448,7 +2448,7 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, } static ssize_t -tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, +tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[64]; @@ -2493,8 +2493,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, static struct file_operations tracing_iter_fops = { .open = tracing_open_generic, - .read = tracing_iter_ctrl_read, - .write = tracing_iter_ctrl_write, + .read = tracing_trace_options_read, + .write = tracing_trace_options_write, }; static const char readme_msg[] = @@ -2508,9 +2508,9 @@ static const char readme_msg[] = "# echo sched_switch > /debug/tracing/current_tracer\n" "# cat /debug/tracing/current_tracer\n" "sched_switch\n" - "# cat /debug/tracing/iter_ctrl\n" + "# cat /debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" - "# echo print-parent > /debug/tracing/iter_ctrl\n" + "# echo print-parent > /debug/tracing/trace_options\n" "# echo 1 > /debug/tracing/tracing_enabled\n" "# cat /debug/tracing/trace > /tmp/trace.txt\n" "echo 0 > /debug/tracing/tracing_enabled\n" @@ -3148,10 +3148,10 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); - entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, + entry = debugfs_create_file("trace_options", 0644, d_tracer, NULL, &tracing_iter_fops); if (!entry) - pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + pr_warning("Could not create debugfs 'trace_options' entry\n"); entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, NULL, &tracing_cpumask_fops); -- cgit v1.2.3 From 12ef7d448613ead2babd41c3ebfa1fe03c20edef Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 17:52:38 -0500 Subject: ftrace: CPU buffer start annotation clean ups Impact: better handling of CPU buffer start annotation Because of the confusion with the per CPU buffers wrapping where one CPU might be more active at the end of the trace than the other CPUs causing that one CPU to have a shorter history. Kernel developers were confused by the "missing" data of that one CPU at the beginning of the trace output. An annotation was added to the trace output to show that the buffer had started: # tracer: function # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | ##### CPU 3 buffer started #### -0 [003] 158.192959: smp_apic_timer_interrupt [...] -0 [003] 161.556520: default_idle ##### CPU 1 buffer started #### -0 [001] 161.592494: hrtimer_force_reprogram [etc] But this annotation gets a bit messy when tracers do not fill the buffers. This patch does a couple of things: One) it adds a flag to trace_options to disable these annotations Two) it does not annotate if the tracer did not overflow its buffer. This makes the output much cleaner. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 16 +++++++++++++++- kernel/trace/trace.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 240423a9d1af..4a904623e05d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -205,7 +205,8 @@ static DEFINE_MUTEX(trace_types_lock); static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ -unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK; +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | + TRACE_ITER_ANNOTATE; /** * trace_wake_up - wake up tasks waiting for trace input @@ -261,6 +262,7 @@ static const char *trace_options[] = { #ifdef CONFIG_BRANCH_TRACER "branch", #endif + "annotate", NULL }; @@ -1113,6 +1115,7 @@ void tracing_stop_function_trace(void) enum trace_file_type { TRACE_FILE_LAT_FMT = 1, + TRACE_FILE_ANNOTATE = 2, }; static void trace_iterator_increment(struct trace_iterator *iter, int cpu) @@ -1532,6 +1535,12 @@ static void test_cpu_buff_start(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; + if (!(trace_flags & TRACE_ITER_ANNOTATE)) + return; + + if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) + return; + if (cpu_isset(iter->cpu, iter->started)) return; @@ -2132,6 +2141,11 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) iter->trace = current_trace; iter->pos = -1; + /* Annotate start of buffers if we had overruns */ + if (ring_buffer_overruns(iter->tr->buffer)) + iter->iter_flags |= TRACE_FILE_ANNOTATE; + + for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9e015f5bea1d..790ea8c0e1f3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -473,6 +473,7 @@ enum trace_iterator_flags { #ifdef CONFIG_BRANCH_TRACER TRACE_ITER_BRANCH = 0x1000, #endif + TRACE_ITER_ANNOTATE = 0x2000, }; /* -- cgit v1.2.3 From d51ad7ac48f991c4a8834485727efa99a691cb87 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 15 Nov 2008 15:48:29 -0500 Subject: ftrace: replace raw_local_irq_save with local_irq_save Impact: fix lockdep disabling itself when function tracing is enabled The raw_local_irq_saves used in ftrace is causing problems with lockdep. (it thinks the irq flags are out of sync and disables itself with a warning) The raw ops here are not needed, and the normal local_irq_save is fine. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- kernel/trace/trace_selftest.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a904623e05d..dff4bee591b9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1051,7 +1051,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) * Need to use raw, since this must be called before the * recursive protection is performed. */ - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1062,7 +1062,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } #ifdef CONFIG_FUNCTION_RET_TRACER diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 24e6e075e6d6..5cb64ea061b5 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -52,7 +52,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) int cpu, ret = 0; /* Don't allow flipping of max traces now */ - raw_local_irq_save(flags); + local_irq_save(flags); __raw_spin_lock(&ftrace_max_lock); cnt = ring_buffer_entries(tr->buffer); @@ -63,7 +63,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) break; } __raw_spin_unlock(&ftrace_max_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); if (count) *count = cnt; -- cgit v1.2.3 From e6e7a65aabdb696cf05a56cfd495c49a11fd4cde Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Nov 2008 05:53:19 +0100 Subject: tracing/ftrace: fix unexpected -EINVAL when longest tracer name is set Impact: fix confusing write() -EINVAL when changing the tracer The following commit d9e540762f5cdd89f24e518ad1fd31142d0b9726 remade alive the bug which made the set of a new tracer returning -EINVAL if this is the longest name of tracer. This patch corrects it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dff4bee591b9..80898f4870cc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2655,6 +2655,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, char buf[max_tracer_type_len+1]; int i; size_t ret; + int err; + + ret = cnt; if (cnt > max_tracer_type_len) cnt = max_tracer_type_len; @@ -2668,12 +2671,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) buf[i] = 0; - ret = tracing_set_tracer(buf); - if (!ret) - ret = cnt; + err = tracing_set_tracer(buf); + if (err) + return err; - if (ret > 0) - filp->f_pos += ret; + filp->f_pos += ret; return ret; } -- cgit v1.2.3 From 1c80025a49855b12fa09bb6db71820e3367b1369 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Nov 2008 05:57:26 +0100 Subject: tracing/ftrace: change the type of the init() callback Impact: extend the ->init() method with the ability to fail This bring a way to know if the initialization of a tracer successed. A tracer must return 0 on success and a traditional error (ie: -ENOMEM) if it fails. If a tracer fails to init, it is free to print a detailed warn. The tracing api will not and switch to a new tracer will just return the error from the init callback. Note: this will be used for the return tracer. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 7 ++-- kernel/trace/trace.h | 3 +- kernel/trace/trace_boot.c | 3 +- kernel/trace/trace_branch.c | 3 +- kernel/trace/trace_functions.c | 3 +- kernel/trace/trace_functions_return.c | 3 +- kernel/trace/trace_irqsoff.c | 9 +++-- kernel/trace/trace_mmiotrace.c | 3 +- kernel/trace/trace_nop.c | 3 +- kernel/trace/trace_sched_switch.c | 3 +- kernel/trace/trace_sched_wakeup.c | 3 +- kernel/trace/trace_selftest.c | 66 ++++++++++++++++++++++++++++++----- kernel/trace/trace_sysprof.c | 3 +- 13 files changed, 88 insertions(+), 24 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 80898f4870cc..396fda034e3f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2638,8 +2638,11 @@ static int tracing_set_tracer(char *buf) current_trace->reset(tr); current_trace = t; - if (t->init) - t->init(tr); + if (t->init) { + ret = t->init(tr); + if (ret) + goto out; + } trace_branch_enable(tr); out: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 790ea8c0e1f3..cdbd5cc22be8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -264,7 +264,8 @@ enum print_line_t { */ struct tracer { const char *name; - void (*init)(struct trace_array *tr); + /* Your tracer should raise a warning if init fails */ + int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index cb333b7fd113..a4fa2c57e34e 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -47,7 +47,7 @@ static void reset_boot_trace(struct trace_array *tr) tracing_reset(tr, cpu); } -static void boot_trace_init(struct trace_array *tr) +static int boot_trace_init(struct trace_array *tr) { int cpu; boot_trace = tr; @@ -56,6 +56,7 @@ static void boot_trace_init(struct trace_array *tr) tracing_reset(tr, cpu); tracing_sched_switch_assign_trace(tr); + return 0; } static enum print_line_t diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 85265553918f..44bd39539d61 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -125,7 +125,7 @@ static void stop_branch_trace(struct trace_array *tr) disable_branch_tracing(); } -static void branch_trace_init(struct trace_array *tr) +static int branch_trace_init(struct trace_array *tr) { int cpu; @@ -133,6 +133,7 @@ static void branch_trace_init(struct trace_array *tr) tracing_reset(tr, cpu); start_branch_trace(tr); + return 0; } static void branch_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8693b7a0a5b2..e74f6d0a3216 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -42,9 +42,10 @@ static void stop_function_trace(struct trace_array *tr) tracing_stop_cmdline_record(); } -static void function_trace_init(struct trace_array *tr) +static int function_trace_init(struct trace_array *tr) { start_function_trace(tr); + return 0; } static void function_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c index 7680b21537dd..61185f756a13 100644 --- a/kernel/trace/trace_functions_return.c +++ b/kernel/trace/trace_functions_return.c @@ -24,13 +24,14 @@ static void stop_return_trace(struct trace_array *tr) unregister_ftrace_return(); } -static void return_trace_init(struct trace_array *tr) +static int return_trace_init(struct trace_array *tr) { int cpu; for_each_online_cpu(cpu) tracing_reset(tr, cpu); start_return_trace(tr); + return 0; } static void return_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index d919d4eaa7cc..7c2e326bbc8b 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -416,11 +416,12 @@ static void irqsoff_tracer_close(struct trace_iterator *iter) } #ifdef CONFIG_IRQSOFF_TRACER -static void irqsoff_tracer_init(struct trace_array *tr) +static int irqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF; __irqsoff_tracer_init(tr); + return 0; } static struct tracer irqsoff_tracer __read_mostly = { @@ -442,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly = #endif #ifdef CONFIG_PREEMPT_TRACER -static void preemptoff_tracer_init(struct trace_array *tr) +static int preemptoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_PREEMPT_OFF; __irqsoff_tracer_init(tr); + return 0; } static struct tracer preemptoff_tracer __read_mostly = @@ -471,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly = #if defined(CONFIG_IRQSOFF_TRACER) && \ defined(CONFIG_PREEMPT_TRACER) -static void preemptirqsoff_tracer_init(struct trace_array *tr) +static int preemptirqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; __irqsoff_tracer_init(tr); + return 0; } static struct tracer preemptirqsoff_tracer __read_mostly = diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 51bcf370215e..433d650eda9f 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -30,13 +30,14 @@ static void mmio_reset_data(struct trace_array *tr) tracing_reset(tr, cpu); } -static void mmio_trace_init(struct trace_array *tr) +static int mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; mmio_reset_data(tr); enable_mmiotrace(); + return 0; } static void mmio_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 2ef1d227e7d8..0e77415caed3 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -24,7 +24,7 @@ static void stop_nop_trace(struct trace_array *tr) /* Nothing to do! */ } -static void nop_trace_init(struct trace_array *tr) +static int nop_trace_init(struct trace_array *tr) { int cpu; ctx_trace = tr; @@ -33,6 +33,7 @@ static void nop_trace_init(struct trace_array *tr) tracing_reset(tr, cpu); start_nop_trace(tr); + return 0; } static void nop_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index be35bdfe2e38..863390557b44 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -206,10 +206,11 @@ static void stop_sched_trace(struct trace_array *tr) tracing_stop_sched_switch_record(); } -static void sched_switch_trace_init(struct trace_array *tr) +static int sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; start_sched_trace(tr); + return 0; } static void sched_switch_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 983f2b1478c9..0067b49746c1 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -331,10 +331,11 @@ static void stop_wakeup_tracer(struct trace_array *tr) unregister_trace_sched_wakeup(probe_wakeup); } -static void wakeup_tracer_init(struct trace_array *tr) +static int wakeup_tracer_init(struct trace_array *tr) { wakeup_trace = tr; start_wakeup_tracer(tr); + return 0; } static void wakeup_tracer_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 5cb64ea061b5..88c8eb70f54a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -71,6 +71,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) return ret; } +static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) +{ + printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n", + trace->name, init_ret); +} #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE @@ -111,7 +116,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_set_filter(func_name, strlen(func_name), 1); /* enable tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } /* Sleep for a 1/10 of a second */ msleep(100); @@ -181,7 +190,12 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 1; tracer_enabled = 1; - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ @@ -223,7 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* reset the max latency */ tracing_max_latency = 0; /* disable interrupts for a bit */ @@ -272,7 +291,12 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) } /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* reset the max latency */ tracing_max_latency = 0; /* disable preemption for a bit */ @@ -321,7 +345,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * } /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } /* reset the max latency */ tracing_max_latency = 0; @@ -449,7 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) wait_for_completion(&isrt); /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* reset the max latency */ tracing_max_latency = 0; @@ -505,7 +538,12 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ @@ -532,7 +570,12 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return 0; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ @@ -554,7 +597,12 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 05f753422aea..54960edb96d0 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -261,11 +261,12 @@ static void stop_stack_trace(struct trace_array *tr) mutex_unlock(&sample_timer_lock); } -static void stack_trace_init(struct trace_array *tr) +static int stack_trace_init(struct trace_array *tr) { sysprof_trace = tr; start_stack_trace(tr); + return 0; } static void stack_trace_reset(struct trace_array *tr) -- cgit v1.2.3 From 0c726da983de0704254250ef6495ca152e7abcca Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 16 Nov 2008 16:07:58 +0530 Subject: tracing: branch tracer, fix writing to trace/trace_options Impact: fix trace_options behavior writing to trace/trace_options use the index of the array to find the value of the flag. With branch tracer flag defined conditionally, this breaks writing to trace_options with branch tracer disabled. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 -- kernel/trace/trace.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a904623e05d..b04923b72ce0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -259,9 +259,7 @@ static const char *trace_options[] = { "sched-tree", "ftrace_printk", "ftrace_preempt", -#ifdef CONFIG_BRANCH_TRACER "branch", -#endif "annotate", NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 790ea8c0e1f3..b41d7b4c2cae 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -470,9 +470,7 @@ enum trace_iterator_flags { TRACE_ITER_SCHED_TREE = 0x200, TRACE_ITER_PRINTK = 0x400, TRACE_ITER_PREEMPTONLY = 0x800, -#ifdef CONFIG_BRANCH_TRACER TRACE_ITER_BRANCH = 0x1000, -#endif TRACE_ITER_ANNOTATE = 0x2000, }; -- cgit v1.2.3 From adf9f19574334c9a29a2bc956009fcac7edf1a6b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Nov 2008 19:23:42 +0100 Subject: tracing/ftrace: implement a set_flag callback for tracers Impact: give a way to send specific messages to tracers The current implementation of tracing uses some flags to control the output of general tracers. But we have no way to implement custom flags handling for a specific tracer. This patch proposes a new callback for the struct tracer which called set_flag and a structure that represents a 32 bits variable flag. A tracer can implement a struct tracer_flags on which it puts the initial value of the flag integer. Than it can place a range of flags with their name and their flag mask on the flag integer. The structure that implement a single flag is called struct tracer_opt. These custom flags will be available through the trace_options file like the general tracing flags. Changing their value is done like the other general flags. For example if you have a flag that calls "foo", you can activate it by writing "foo" or "nofoo" on trace_options. Note that the set_flag callback is optional and is only needed if you want the flags changing to be signaled to your tracer and let it to accept or refuse their assignment. V2: Some arrangements in coding style.... Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++---- kernel/trace/trace.h | 26 +++++++++++++++ 2 files changed, 112 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2596b5a968c4..9531fddcfb8d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -43,6 +43,20 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +/* For tracers that don't implement custom flags */ +static struct tracer_opt dummy_tracer_opt[] = { + { } +}; + +static struct tracer_flags dummy_tracer_flags = { + .val = 0, + .opts = dummy_tracer_opt +}; + +static int dummy_set_flag(u32 old_flags, u32 bit, int set) +{ + return 0; +} /* * Kill all tracing for good (never come back). @@ -529,6 +543,14 @@ int register_tracer(struct tracer *type) } } + if (!type->set_flag) + type->set_flag = &dummy_set_flag; + if (!type->flags) + type->flags = &dummy_tracer_flags; + else + if (!type->flags->opts) + type->flags->opts = dummy_tracer_opt; + #ifdef CONFIG_FTRACE_STARTUP_TEST if (type->selftest) { struct tracer *saved_tracer = current_trace; @@ -2426,10 +2448,13 @@ static ssize_t tracing_trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + int i; char *buf; int r = 0; int len = 0; - int i; + u32 tracer_flags = current_trace->flags->val; + struct tracer_opt *trace_opts = current_trace->flags->opts; + /* calulate max size */ for (i = 0; trace_options[i]; i++) { @@ -2437,6 +2462,15 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, len += 3; /* "no" and space */ } + /* + * Increase the size with names of options specific + * of the current tracer. + */ + for (i = 0; trace_opts[i].name; i++) { + len += strlen(trace_opts[i].name); + len += 3; /* "no" and space */ + } + /* +2 for \n and \0 */ buf = kmalloc(len + 2, GFP_KERNEL); if (!buf) @@ -2449,6 +2483,15 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, r += sprintf(buf + r, "no%s ", trace_options[i]); } + for (i = 0; trace_opts[i].name; i++) { + if (tracer_flags & trace_opts[i].bit) + r += sprintf(buf + r, "%s ", + trace_opts[i].name); + else + r += sprintf(buf + r, "no%s ", + trace_opts[i].name); + } + r += sprintf(buf + r, "\n"); WARN_ON(r >= len + 2); @@ -2459,6 +2502,40 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, return r; } +/* Try to assign a tracer specific option */ +static int set_tracer_option(struct tracer *trace, char *cmp, int neg) +{ + struct tracer_flags *trace_flags = trace->flags; + struct tracer_opt *opts = NULL; + int ret = 0, i = 0; + int len; + + for (i = 0; trace_flags->opts[i].name; i++) { + opts = &trace_flags->opts[i]; + len = strlen(opts->name); + + if (strncmp(cmp, opts->name, len) == 0) { + ret = trace->set_flag(trace_flags->val, + opts->bit, !neg); + break; + } + } + /* Not found */ + if (!trace_flags->opts[i].name) + return -EINVAL; + + /* Refused to handle */ + if (ret) + return ret; + + if (neg) + trace_flags->val &= ~opts->bit; + else + trace_flags->val |= opts->bit; + + return 0; +} + static ssize_t tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -2466,6 +2543,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, char buf[64]; char *cmp = buf; int neg = 0; + int ret; int i; if (cnt >= sizeof(buf)) @@ -2492,11 +2570,13 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, break; } } - /* - * If no option could be set, return an error: - */ - if (!trace_options[i]) - return -EINVAL; + + /* If no option could be set, test the specific tracer options */ + if (!trace_options[i]) { + ret = set_tracer_option(current_trace, cmp, neg); + if (ret) + return ret; + } filp->f_pos += cnt; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 37947f6b92bf..9d22618bf99f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -259,6 +259,29 @@ enum print_line_t { TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ }; + +/* + * An option specific to a tracer. This is a boolean value. + * The bit is the bit index that sets its value on the + * flags value in struct tracer_flags. + */ +struct tracer_opt { + const char *name; /* Will appear on the trace_options file */ + u32 bit; /* Mask assigned in val field in tracer_flags */ +}; + +/* + * The set of specific options for a tracer. Your tracer + * have to set the initial value of the flags val. + */ +struct tracer_flags { + u32 val; + struct tracer_opt *opts; +}; + +/* Makes more easy to define a tracer opt */ +#define TRACER_OPT(s, b) .name = #s, .bit = b + /* * A specific tracer, represented by methods that operate on a trace array: */ @@ -280,8 +303,11 @@ struct tracer { struct trace_array *tr); #endif enum print_line_t (*print_line)(struct trace_iterator *iter); + /* If you handled the flag setting, return 0 */ + int (*set_flag)(u32 old_flags, u32 bit, int set); struct tracer *next; int print_max; + struct tracer_flags *flags; }; struct trace_seq { -- cgit v1.2.3 From 0231022cc32d5f2e7f3c06b75691dda0ad6aec33 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Nov 2008 03:22:41 +0100 Subject: tracing/function-return-tracer: add the overrun field Impact: help to find the better depth of trace We decided to arbitrary define the depth of function return trace as "20". Perhaps this is not enough. To help finding an optimal depth, we measure now the overrun: the number of functions that have been missed for the current thread. By default this is not displayed, we have to do set a particular flag on the return tracer: echo overrun > /debug/tracing/trace_options And the overrun will be printed on the right. As the trace shows below, the current 20 depth is not enough. update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838) update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838) do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838) tick_do_update_jiffies64+0xbf/0x160 -> do_timer (5339 ns) (Overruns: 2838) tick_sched_timer+0x6a/0xf0 -> tick_do_update_jiffies64 (7209 ns) (Overruns: 2838) vgacon_set_cursor_size+0x98/0x120 -> native_io_delay (2613 ns) (Overruns: 274) vgacon_cursor+0x16e/0x1d0 -> vgacon_set_cursor_size (33151 ns) (Overruns: 274) set_cursor+0x5f/0x80 -> vgacon_cursor (36432 ns) (Overruns: 274) con_flush_chars+0x34/0x40 -> set_cursor (38790 ns) (Overruns: 274) release_console_sem+0x1ec/0x230 -> up (721 ns) (Overruns: 274) release_console_sem+0x225/0x230 -> wake_up_klogd (316 ns) (Overruns: 274) con_flush_chars+0x39/0x40 -> release_console_sem (2996 ns) (Overruns: 274) con_write+0x22/0x30 -> con_flush_chars (46067 ns) (Overruns: 274) n_tty_write+0x1cc/0x360 -> con_write (292670 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2a/0x90 -> native_apic_mem_write (330 ns) (Overruns: 274) irq_enter+0x17/0x70 -> idle_cpu (413 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2f/0x90 -> irq_enter (1525 ns) (Overruns: 274) ktime_get_ts+0x40/0x70 -> getnstimeofday (465 ns) (Overruns: 274) ktime_get_ts+0x60/0x70 -> set_normalized_timespec (436 ns) (Overruns: 274) ktime_get+0x16/0x30 -> ktime_get_ts (2501 ns) (Overruns: 274) hrtimer_interrupt+0x77/0x1a0 -> ktime_get (3439 ns) (Overruns: 274) Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/include/asm/thread_info.h | 7 +++++++ arch/x86/kernel/ftrace.c | 10 ++++++--- include/linux/ftrace.h | 2 ++ include/linux/sched.h | 1 + kernel/trace/trace.c | 1 + kernel/trace/trace.h | 1 + kernel/trace/trace_functions_return.c | 38 +++++++++++++++++++++++++++++------ 7 files changed, 51 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a71158369fd4..e90e81ef6ab9 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -21,6 +21,7 @@ struct task_struct; struct exec_domain; #include #include +#include struct thread_info { struct task_struct *task; /* main task structure */ @@ -45,6 +46,11 @@ struct thread_info { int curr_ret_stack; /* Stack of return addresses for return function tracing */ struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE]; + /* + * Number of functions that haven't been traced + * because of depth overrun. + */ + atomic_t trace_overrun; #endif }; @@ -61,6 +67,7 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ .curr_ret_stack = -1,\ + .trace_overrun = ATOMIC_INIT(0) \ } #else #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 924153edd973..356bb1eb6e9a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -353,8 +353,10 @@ static int push_return_trace(unsigned long ret, unsigned long long time, struct thread_info *ti = current_thread_info(); /* The return trace stack is full */ - if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) + if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) { + atomic_inc(&ti->trace_overrun); return -EBUSY; + } index = ++ti->curr_ret_stack; barrier(); @@ -367,7 +369,7 @@ static int push_return_trace(unsigned long ret, unsigned long long time, /* Retrieve a function return address to the trace stack on thread info.*/ static void pop_return_trace(unsigned long *ret, unsigned long long *time, - unsigned long *func) + unsigned long *func, unsigned long *overrun) { int index; @@ -376,6 +378,7 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, *ret = ti->ret_stack[index].ret; *func = ti->ret_stack[index].func; *time = ti->ret_stack[index].calltime; + *overrun = atomic_read(&ti->trace_overrun); ti->curr_ret_stack--; } @@ -386,7 +389,8 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, unsigned long ftrace_return_to_handler(void) { struct ftrace_retfunc trace; - pop_return_trace(&trace.ret, &trace.calltime, &trace.func); + pop_return_trace(&trace.ret, &trace.calltime, &trace.func, + &trace.overrun); trace.rettime = cpu_clock(raw_smp_processor_id()); ftrace_function_return(&trace); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f1af1aab00e6..f7ba4ea5e128 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -318,6 +318,8 @@ struct ftrace_retfunc { unsigned long func; /* Current function */ unsigned long long calltime; unsigned long long rettime; + /* Number of functions that overran the depth limit for current task */ + unsigned long overrun; }; #ifdef CONFIG_FUNCTION_RET_TRACER diff --git a/include/linux/sched.h b/include/linux/sched.h index 61c8cc36028a..c8e0db464206 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2016,6 +2016,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct * used. */ task_thread_info(p)->curr_ret_stack = -1; + atomic_set(&task_thread_info(p)->trace_overrun, 0); #endif } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9531fddcfb8d..e97c29a6e7b0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -853,6 +853,7 @@ static void __trace_function_return(struct trace_array *tr, entry->parent_ip = trace->ret; entry->rettime = trace->rettime; entry->calltime = trace->calltime; + entry->overrun = trace->overrun; ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); } #endif diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9d22618bf99f..2cb12fd98f6b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -60,6 +60,7 @@ struct ftrace_ret_entry { unsigned long parent_ip; unsigned long long calltime; unsigned long long rettime; + unsigned long overrun; }; extern struct tracer boot_tracer; diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c index a68564af022b..e00d64509c9c 100644 --- a/kernel/trace/trace_functions_return.c +++ b/kernel/trace/trace_functions_return.c @@ -14,6 +14,19 @@ #include "trace.h" +#define TRACE_RETURN_PRINT_OVERRUN 0x1 +static struct tracer_opt trace_opts[] = { + /* Display overruns or not */ + { TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) }, + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, /* Don't display overruns by default */ + .opts = trace_opts +}; + + static int return_trace_init(struct trace_array *tr) { int cpu; @@ -42,26 +55,39 @@ print_return_function(struct trace_iterator *iter) ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + ret = seq_print_ip_sym(s, field->ip, trace_flags & TRACE_ITER_SYM_MASK); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, " (%llu ns)\n", + + ret = trace_seq_printf(s, " (%llu ns)", field->rettime - field->calltime); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - else - return TRACE_TYPE_HANDLED; + + if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)", + field->overrun); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; } return TRACE_TYPE_UNHANDLED; } -static struct tracer return_trace __read_mostly = -{ +static struct tracer return_trace __read_mostly = { .name = "return", .init = return_trace_init, .reset = return_trace_reset, - .print_line = print_return_function + .print_line = print_return_function, + .flags = &tracer_flags, }; static __init int init_return_trace(void) -- cgit v1.2.3 From a22506347d038a66506c6f57e9b97104128e280d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 18 Nov 2008 18:06:35 +0100 Subject: ftrace: preemptoff selftest not working Impact: fix preemptoff and preemptirqsoff tracer self-tests I was wondering why the preemptoff and preemptirqsoff tracer selftests don't work on s390. After all its just that they get called from non-preemptible context: kernel_init() will execute all initcalls, however the first line in kernel_init() is lock_kernel(), which causes the preempt_count to be increased. Any later calls to add_preempt_count() (especially those from the selftests) will therefore not result in a call to trace_preempt_off() since the check below in add_preempt_count() will be false: if (preempt_count() == val) trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); Hence the trace buffer will be empty. Fix this by releasing the BKL during the self-tests. Signed-off-by: Heiko Carstens Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 396fda034e3f..16892121cb7c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -532,6 +532,13 @@ int register_tracer(struct tracer *type) } #ifdef CONFIG_FTRACE_STARTUP_TEST + /* + * When this gets called we hold the BKL which means that preemption + * is disabled. Various trace selftests however need to disable + * and enable preemption for successful tests. So we drop the BKL here + * and grab it after the tests again. + */ + unlock_kernel(); if (type->selftest) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; @@ -562,6 +569,7 @@ int register_tracer(struct tracer *type) } printk(KERN_CONT "PASSED\n"); } + lock_kernel(); #endif type->next = trace_types; -- cgit v1.2.3 From 86fa2f60674540df0b34f5c547ed0c1cf3a8f212 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 19 Nov 2008 10:00:15 +0100 Subject: ftrace: fix selftest locking Impact: fix self-test boot crash Self-test failure forgot to re-lock the BKL - crashing the next initcall: Testing tracer irqsoff: .. no entries found ..FAILED! initcall init_irqsoff_tracer+0x0/0x11 returned 0 after 3906 usecs calling init_mmio_trace+0x0/0xf @ 1 ------------[ cut here ]------------ Kernel BUG at c0c0a915 [verbose debug info unavailable] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC last sysfs file: Pid: 1, comm: swapper Not tainted (2.6.28-rc5-tip #53704) EIP: 0060:[] EFLAGS: 00010286 CPU: 1 EIP is at unlock_kernel+0x10/0x2b EAX: ffffffff EBX: 00000000 ECX: 00000000 EDX: f7030000 ESI: c12da19c EDI: 00000000 EBP: f7039f54 ESP: f7039f54 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process swapper (pid: 1, ti=f7038000 task=f7030000 task.ti=f7038000) Stack: f7039f6c c0164d30 c013fed8 a7d8d7b4 00000000 00000000 f7039f74 c12fb78a f7039fd0 c0101132 c12fb77d 00000000 6f727200 6f632072 2d206564 c1002031 0000000f f7039fa2 f7039fb0 3531b171 00000000 00000000 0000002f c12ca480 Call Trace: [] ? register_tracer+0x66/0x13f [] ? ktime_get+0x19/0x1b [] ? init_mmio_trace+0xd/0xf [] ? do_one_initcall+0x4a/0x111 [] ? init_mmio_trace+0x0/0xf [] ? init_irq_proc+0x46/0x59 [] ? kernel_init+0x104/0x152 [] ? kernel_init+0x0/0x152 [] ? kernel_thread_helper+0x7/0x10 Code: 58 14 43 75 0a b8 00 9b 2d c1 e8 51 43 7a ff 64 a1 00 a0 37 c1 89 58 14 5b 5d c3 55 64 8b 15 00 a0 37 c1 83 7a 14 00 89 e5 79 04 <0f> 0b eb fe 8b 42 14 48 85 c0 89 42 14 79 0a b8 00 9b 2d c1 e8 EIP: [] unlock_kernel+0x10/0x2b SS:ESP 0068:f7039f54 ---[ end trace a7919e7f17c0a725 ]--- Kernel panic - not syncing: Attempted to kill init! So clean up the flow a bit. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 16892121cb7c..24b6238884f0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -520,7 +520,15 @@ int register_tracer(struct tracer *type) return -1; } + /* + * When this gets called we hold the BKL which means that + * preemption is disabled. Various trace selftests however + * need to disable and enable preemption for successful tests. + * So we drop the BKL here and grab it after the tests again. + */ + unlock_kernel(); mutex_lock(&trace_types_lock); + for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ @@ -532,13 +540,6 @@ int register_tracer(struct tracer *type) } #ifdef CONFIG_FTRACE_STARTUP_TEST - /* - * When this gets called we hold the BKL which means that preemption - * is disabled. Various trace selftests however need to disable - * and enable preemption for successful tests. So we drop the BKL here - * and grab it after the tests again. - */ - unlock_kernel(); if (type->selftest) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; @@ -550,9 +551,9 @@ int register_tracer(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - for_each_tracing_cpu(i) { + for_each_tracing_cpu(i) tracing_reset(tr, i); - } + current_trace = type; /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); @@ -564,12 +565,11 @@ int register_tracer(struct tracer *type) goto out; } /* Only reset on passing, to avoid touching corrupted buffers */ - for_each_tracing_cpu(i) { + for_each_tracing_cpu(i) tracing_reset(tr, i); - } + printk(KERN_CONT "PASSED\n"); } - lock_kernel(); #endif type->next = trace_types; @@ -580,6 +580,7 @@ int register_tracer(struct tracer *type) out: mutex_unlock(&trace_types_lock); + lock_kernel(); return ret; } -- cgit v1.2.3 From 02b67518e2b1c490787dac7f35e1204e74fe21ba Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sat, 22 Nov 2008 13:28:47 +0200 Subject: tracing: add support for userspace stacktraces in tracing/iter_ctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: add new (default-off) tracing visualization feature Usage example: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled .... run application ... echo 0 >tracing_enabled Then read one of 'trace','latency_trace','trace_pipe'. To get the best output you can compile your userspace programs with frame pointers (at least glibc + the app you are tracing). Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 5 ++- arch/x86/kernel/stacktrace.c | 57 +++++++++++++++++++++++++++ include/linux/stacktrace.h | 8 ++++ kernel/trace/trace.c | 93 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 9 +++++ 5 files changed, 171 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 753f4de4b175..79a80f79c062 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -324,7 +324,7 @@ output. To see what is available, simply cat the file: cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ - noblock nostacktrace nosched-tree + noblock nostacktrace nosched-tree nouserstacktrace To disable one of the options, echo in the option prepended with "no". @@ -378,6 +378,9 @@ Here are the available options: When a trace is recorded, so is the stack of functions. This allows for back traces of trace sites. + userstacktrace - This option changes the trace. + It records a stacktrace of the current userspace thread. + sched-tree - TBD (any users??) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index a03e7f6d90c3..b15153060417 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -6,6 +6,7 @@ #include #include #include +#include #include static void save_stack_warning(void *data, char *msg) @@ -83,3 +84,59 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ + +struct stack_frame { + const void __user *next_fp; + unsigned long return_address; +}; + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + int ret; + + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) + return 0; + + ret = 1; + pagefault_disable(); + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) + ret = 0; + pagefault_enable(); + + return ret; +} + +void save_stack_trace_user(struct stack_trace *trace) +{ + /* + * Trace user stack if we are not a kernel thread + */ + if (current->mm) { + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->bp; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame frame; + frame.next_fp = NULL; + frame.return_address = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + if (frame.return_address) + trace->entries[trace->nr_entries++] = + frame.return_address; + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + } + } + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index b106fd8e0d5c..68de51468f5d 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); + +#ifdef CONFIG_X86 +extern void save_stack_trace_user(struct stack_trace *trace); +#else +# define save_stack_trace_user(trace) do { } while (0) +#endif + #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4ee6f0375222..ced8b4fa9f51 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -275,6 +275,7 @@ static const char *trace_options[] = { "ftrace_preempt", "branch", "annotate", + "userstacktrace", NULL }; @@ -918,6 +919,44 @@ void __trace_stack(struct trace_array *tr, ftrace_trace_stack(tr, data, flags, skip, preempt_count()); } +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, int pc) +{ + struct userstack_entry *entry; + struct stack_trace trace; + struct ring_buffer_event *event; + unsigned long irq_flags; + + if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_USER_STACK; + + memset(&entry->caller, 0, sizeof(entry->caller)); + + trace.nr_entries = 0; + trace.max_entries = FTRACE_STACK_ENTRIES; + trace.skip = 0; + trace.entries = entry->caller; + + save_stack_trace_user(&trace); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +} + +void __trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags) +{ + ftrace_trace_userstack(tr, data, flags, preempt_count()); +} + static void ftrace_trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3, @@ -941,6 +980,7 @@ ftrace_trace_special(void *__tr, void *__data, entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, irq_flags, 4, pc); + ftrace_trace_userstack(tr, data, irq_flags, pc); trace_wake_up(); } @@ -979,6 +1019,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 5, pc); + ftrace_trace_userstack(tr, data, flags, pc); } void @@ -1008,6 +1049,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 6, pc); + ftrace_trace_userstack(tr, data, flags, pc); trace_wake_up(); } @@ -1387,6 +1429,31 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static int +seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, + unsigned long sym_flags) +{ + int ret = 1; + unsigned i; + + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + unsigned long ip = entry->caller[i]; + + if (ip == ULONG_MAX || !ret) + break; + if (i) + ret = trace_seq_puts(s, " <- "); + if (!ip) { + ret = trace_seq_puts(s, "??"); + continue; + } + if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + } + + return ret; +} + static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); @@ -1702,6 +1769,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + seq_print_userip_objs(field, s, sym_flags); + if (entry->flags & TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1853,6 +1930,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + ret = seq_print_userip_objs(field, s, sym_flags); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = trace_seq_putc(s, '\n'); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + break; + } } return TRACE_TYPE_HANDLED; } @@ -1912,6 +2002,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2000,6 +2091,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2054,6 +2146,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2cb12fd98f6b..17bb4c830b01 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -26,6 +26,7 @@ enum trace_type { TRACE_BOOT_CALL, TRACE_BOOT_RET, TRACE_FN_RET, + TRACE_USER_STACK, __TRACE_LAST_TYPE }; @@ -42,6 +43,7 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + int tgid; }; /* @@ -99,6 +101,11 @@ struct stack_entry { unsigned long caller[FTRACE_STACK_ENTRIES]; }; +struct userstack_entry { + struct trace_entry ent; + unsigned long caller[FTRACE_STACK_ENTRIES]; +}; + /* * ftrace_printk entry: */ @@ -240,6 +247,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ + IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ @@ -500,6 +508,7 @@ enum trace_iterator_flags { TRACE_ITER_PREEMPTONLY = 0x800, TRACE_ITER_BRANCH = 0x1000, TRACE_ITER_ANNOTATE = 0x2000, + TRACE_ITER_USERSTACKTRACE = 0x4000 }; /* -- cgit v1.2.3 From b54d3de9f3b8956653b06f1a32e9f9321c6d9027 Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sat, 22 Nov 2008 13:28:48 +0200 Subject: tracing: identify which executable object the userspace address belongs to MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: modify+improve the userstacktrace tracing visualization feature Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely. Example usage: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sym-userobj >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled cat trace_pipe >/tmp/trace& .... run application ... echo 0 >tracing_enabled cat /tmp/trace You'll see stack entries like: /lib/libpthread-2.7.so[+0xd370] You can convert them to function/line using: addr2line -fie /lib/libpthread-2.7.so 0xd370 Or: addr2line -fie /usr/lib/debug/libpthread-2.7.so 0xd370 For non-PIC/PIE executables this won't work: a.out[+0x73b] You need to run the following: addr2line -fie a.out 0x40073b (where 0x400000 is the default load address of a.out) Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 13 +++++++- kernel/trace/trace.c | 86 ++++++++++++++++++++++++++++++++++++++++++++---- kernel/trace/trace.h | 3 +- 3 files changed, 93 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 79a80f79c062..35a78bc6651d 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -324,7 +324,7 @@ output. To see what is available, simply cat the file: cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ - noblock nostacktrace nosched-tree nouserstacktrace + noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj To disable one of the options, echo in the option prepended with "no". @@ -381,6 +381,17 @@ Here are the available options: userstacktrace - This option changes the trace. It records a stacktrace of the current userspace thread. + sym-userobj - when user stacktrace are enabled, look up which object the + address belongs to, and print a relative address + This is especially useful when ASLR is on, otherwise you don't + get a chance to resolve the address to object/file/line after the app is no + longer running + + The lookup is performed when you read trace,trace_pipe,latency_trace. Example: + + a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 +x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] + sched-tree - TBD (any users??) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ced8b4fa9f51..62776b71b1c5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,7 @@ static const char *trace_options[] = { "branch", "annotate", "userstacktrace", + "sym-userobj", NULL }; @@ -422,6 +424,28 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) return trace_seq_putmem(s, hex, j); } +static int +trace_seq_path(struct trace_seq *s, struct path *path) +{ + unsigned char *p; + + if (s->len >= (PAGE_SIZE - 1)) + return 0; + p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); + if (!IS_ERR(p)) { + p = mangle_path(s->buffer + s->len, p, "\n"); + if (p) { + s->len = p - s->buffer; + return 1; + } + } else { + s->buffer[s->len++] = '?'; + return 1; + } + + return 0; +} + static void trace_seq_reset(struct trace_seq *s) { @@ -802,6 +826,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; + entry->tgid = (tsk) ? tsk->tgid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | @@ -1429,28 +1454,73 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, + unsigned long ip, unsigned long sym_flags) +{ + struct file *file = NULL; + unsigned long vmstart = 0; + int ret = 1; + + if (mm) { + const struct vm_area_struct *vma = find_vma(mm, ip); + if (vma) { + file = vma->vm_file; + vmstart = vma->vm_start; + } + } + if (file) { + ret = trace_seq_path(s, &file->f_path); + if (ret) + ret = trace_seq_printf(s, "[+0x%lx]", + ip - vmstart); + } + if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + return ret; +} + static int seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, - unsigned long sym_flags) + unsigned long sym_flags) { + struct mm_struct *mm = NULL; int ret = 1; unsigned i; + if (trace_flags & TRACE_ITER_SYM_USEROBJ) { + struct task_struct *task; + /* + * we do the lookup on the thread group leader, + * since individual threads might have already quit! + */ + rcu_read_lock(); + task = find_task_by_vpid(entry->ent.tgid); + rcu_read_unlock(); + + if (task) + mm = get_task_mm(task); + } + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { unsigned long ip = entry->caller[i]; if (ip == ULONG_MAX || !ret) break; - if (i) + if (i && ret) ret = trace_seq_puts(s, " <- "); if (!ip) { - ret = trace_seq_puts(s, "??"); + if (ret) + ret = trace_seq_puts(s, "??"); continue; } - if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + if (!ret) + break; + if (ret) + ret = seq_print_user_ip(s, mm, ip, sym_flags); } + if (mm) + mmput(mm); return ret; } @@ -1775,8 +1845,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_assign_type(field, entry); seq_print_userip_objs(field, s, sym_flags); - if (entry->flags & TRACE_FLAG_CONT) - trace_seq_print_cont(s, iter); + trace_seq_putc(s, '\n'); break; } default: @@ -3581,6 +3650,9 @@ void ftrace_dump(void) atomic_inc(&global_trace.data[cpu]->disabled); } + /* don't look at user memory in panic mode */ + trace_flags &= ~TRACE_ITER_SYM_USEROBJ; + printk(KERN_TRACE "Dumping ftrace buffer:\n"); iter.tr = &global_trace; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 17bb4c830b01..28c15c2ebc22 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -508,7 +508,8 @@ enum trace_iterator_flags { TRACE_ITER_PREEMPTONLY = 0x800, TRACE_ITER_BRANCH = 0x1000, TRACE_ITER_ANNOTATE = 0x2000, - TRACE_ITER_USERSTACKTRACE = 0x4000 + TRACE_ITER_USERSTACKTRACE = 0x4000, + TRACE_ITER_SYM_USEROBJ = 0x8000 }; /* -- cgit v1.2.3 From 69bb54ec05f57da7f6fac2cec0820cbc970df20f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 12:59:38 -0500 Subject: ftrace: add ftrace_off_permanent Impact: add new API to disable all of ftrace on anomalies It case of a serious anomaly being detected (like something caught by lockdep) it is a good idea to disable all tracing immediately, without grabing any locks. This patch adds ftrace_off_permanent that disables the tracers, function tracing and ring buffers without a way to enable them again. This should only be used when something serious has been detected. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ kernel/trace/trace.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e128..13e9cfc09928 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -257,6 +257,7 @@ extern int ftrace_dump_on_oops; extern void tracing_start(void); extern void tracing_stop(void); +extern void ftrace_off_permanent(void); extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -290,6 +291,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } +static inline void ftrace_off_permanent(void) { } static inline int ftrace_printk(const char *fmt, ...) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4ee6f0375222..0dbfb23ced97 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -660,6 +660,21 @@ static void trace_init_cmdlines(void) static int trace_stop_count; static DEFINE_SPINLOCK(tracing_start_lock); +/** + * ftrace_off_permanent - disable all ftrace code permanently + * + * This should only be called when a serious anomally has + * been detected. This will turn off the function tracing, + * ring buffers, and other tracing utilites. It takes no + * locks and can be called from any context. + */ +void ftrace_off_permanent(void) +{ + tracing_disabled = 1; + ftrace_stop(); + tracing_off_permanent(); +} + /** * tracing_start - quick start of the tracer * -- cgit v1.2.3 From 8d7c6a96164651dbbab449ef0b5c20ae1f76a3a1 Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sun, 23 Nov 2008 12:39:06 +0200 Subject: tracing/stack-tracer: fix style issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- arch/x86/kernel/stacktrace.c | 51 +++++++++++++++++++++++++------------------- include/linux/stacktrace.h | 2 +- kernel/trace/trace.c | 7 +++--- 3 files changed, 33 insertions(+), 27 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index b15153060417..10786af95545 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); struct stack_frame { const void __user *next_fp; - unsigned long return_address; + unsigned long ret_addr; }; static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) @@ -108,33 +108,40 @@ static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) return ret; } +static inline void __save_stack_trace_user(struct stack_trace *trace) +{ + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->bp; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame frame; + + frame.next_fp = NULL; + frame.ret_addr = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + if (frame.ret_addr) { + trace->entries[trace->nr_entries++] = + frame.ret_addr; + } + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + } +} + void save_stack_trace_user(struct stack_trace *trace) { /* * Trace user stack if we are not a kernel thread */ if (current->mm) { - const struct pt_regs *regs = task_pt_regs(current); - const void __user *fp = (const void __user *)regs->bp; - - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = regs->ip; - - while (trace->nr_entries < trace->max_entries) { - struct stack_frame frame; - frame.next_fp = NULL; - frame.return_address = 0; - if (!copy_stack_frame(fp, &frame)) - break; - if ((unsigned long)fp < regs->sp) - break; - if (frame.return_address) - trace->entries[trace->nr_entries++] = - frame.return_address; - if (fp == frame.next_fp) - break; - fp = frame.next_fp; - } + __save_stack_trace_user(trace); } if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 68de51468f5d..fd42d6851109 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -25,7 +25,7 @@ extern void save_stack_trace_user(struct stack_trace *trace); #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) -# define save_stack_trace_user(trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 62776b71b1c5..dedf35f36971 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -948,9 +948,9 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, int pc) { + struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; - struct ring_buffer_event *event; unsigned long irq_flags; if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) @@ -1471,8 +1471,7 @@ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, if (file) { ret = trace_seq_path(s, &file->f_path); if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", - ip - vmstart); + ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); } if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) ret = trace_seq_printf(s, " <" IP_FMT ">", ip); @@ -1485,7 +1484,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, { struct mm_struct *mm = NULL; int ret = 1; - unsigned i; + unsigned int i; if (trace_flags & TRACE_ITER_SYM_USEROBJ) { struct task_struct *task; -- cgit v1.2.3 From cffa10aecb6891f090a4d53a075bc40c082c45fc Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sun, 23 Nov 2008 12:39:07 +0200 Subject: tracing/stack-tracer: fix locking and refcounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: fix refcounting/object-access bug Hold mmap_sem while looking up/accessing vma. Hold the RCU lock while using the task we looked up. Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dedf35f36971..4c3bd82cec49 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1462,11 +1462,15 @@ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, int ret = 1; if (mm) { - const struct vm_area_struct *vma = find_vma(mm, ip); + const struct vm_area_struct *vma; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ip); if (vma) { file = vma->vm_file; vmstart = vma->vm_start; } + up_read(&mm->mmap_sem); } if (file) { ret = trace_seq_path(s, &file->f_path); @@ -1494,10 +1498,9 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, */ rcu_read_lock(); task = find_task_by_vpid(entry->ent.tgid); - rcu_read_unlock(); - if (task) mm = get_task_mm(task); + rcu_read_unlock(); } for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { -- cgit v1.2.3 From e38da59269be8c0196d16dff1be5bb26076afc6a Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sun, 23 Nov 2008 13:08:10 +0200 Subject: tracing/stack-tracer: avoid races accessing file Impact: fix race vma->vm_file reference is only stable while holding the mmap_sem, so move usage of it to within the critical section. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4c3bd82cec49..48d1536f1ca4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1470,13 +1470,13 @@ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, file = vma->vm_file; vmstart = vma->vm_start; } + if (file) { + ret = trace_seq_path(s, &file->f_path); + if (ret) + ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); + } up_read(&mm->mmap_sem); } - if (file) { - ret = trace_seq_path(s, &file->f_path); - if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); - } if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) ret = trace_seq_printf(s, " <" IP_FMT ">", ip); return ret; -- cgit v1.2.3 From 8bba1bf5e2434c83f2fe8b1422604ace9bbe4cb8 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 09:12:31 +0100 Subject: x86, ftrace: call trace->open() before stopping tracing; add trace->print_header() Add a callback to allow an ftrace plug-in to write its own header. Move the call to trace->open() up a few lines. The changes are required by the BTS ftrace plug-in. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 11 +++++++---- kernel/trace/trace.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a45b59e53fbc..8df8fdd69c95 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2298,7 +2298,9 @@ static int s_show(struct seq_file *m, void *v) seq_printf(m, "# tracer: %s\n", iter->trace->name); seq_puts(m, "#\n"); } - if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + if (iter->trace && iter->trace->print_header) + iter->trace->print_header(m); + else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { /* print nothing if the buffers are empty */ if (trace_empty(iter)) return 0; @@ -2350,6 +2352,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) iter->trace = current_trace; iter->pos = -1; + /* Notify the tracer early; before we stop tracing. */ + if (iter->trace && iter->trace->open) + iter->trace->open(iter); + /* Annotate start of buffers if we had overruns */ if (ring_buffer_overruns(iter->tr->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; @@ -2375,9 +2381,6 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) /* stop the trace while dumping */ tracing_stop(); - if (iter->trace && iter->trace->open) - iter->trace->open(iter); - mutex_unlock(&trace_types_lock); out: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 28c15c2ebc22..717f9f045c6f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -311,6 +311,7 @@ struct tracer { int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif + void (*print_header)(struct seq_file *m); enum print_line_t (*print_line)(struct trace_iterator *iter); /* If you handled the flag setting, return 0 */ int (*set_flag)(u32 old_flags, u32 bit, int set); -- cgit v1.2.3 From fb52607afcd0629776f1dc9e657647ceae81dd50 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 25 Nov 2008 21:07:04 +0100 Subject: tracing/function-return-tracer: change the name into function-graph-tracer Impact: cleanup This patch changes the name of the "return function tracer" into function-graph-tracer which is a more suitable name for a tracing which makes one able to retrieve the ordered call stack during the code flow. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/ftrace.h | 4 +- arch/x86/kernel/Makefile | 4 +- arch/x86/kernel/entry_32.S | 12 ++--- arch/x86/kernel/ftrace.c | 12 ++--- include/linux/ftrace.h | 24 ++++----- include/linux/ftrace_irq.h | 2 +- include/linux/sched.h | 2 +- kernel/Makefile | 2 +- kernel/fork.c | 4 +- kernel/sched.c | 2 +- kernel/trace/Kconfig | 19 ++++--- kernel/trace/Makefile | 2 +- kernel/trace/ftrace.c | 26 +++++----- kernel/trace/trace.c | 18 +++---- kernel/trace/trace.h | 12 ++--- kernel/trace/trace_functions_graph.c | 98 ++++++++++++++++++++++++++++++++++++ 17 files changed, 173 insertions(+), 72 deletions(-) create mode 100644 kernel/trace/trace_functions_graph.c (limited to 'kernel/trace/trace.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e49a4fd718fe..0842b1127684 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -29,7 +29,7 @@ config X86 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_RET_TRACER if X86_32 + select HAVE_FUNCTION_GRAPH_TRACER if X86_32 select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 754a3e082f94..7e61b4ceb9a4 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -28,7 +28,7 @@ struct dyn_arch_ftrace { #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifndef __ASSEMBLY__ @@ -51,6 +51,6 @@ struct ftrace_ret_stack { extern void return_to_handler(void); #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index af2bc36ca1c4..64939a0c3986 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,7 +14,7 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_ftrace.o = -pg endif -ifdef CONFIG_FUNCTION_RET_TRACER +ifdef CONFIG_FUNCTION_GRAPH_TRACER # Don't trace __switch_to() but let it for function tracer CFLAGS_REMOVE_process_32.o = -pg endif @@ -70,7 +70,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_RET_TRACER) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 74defe21ba42..2b1f0f081a6b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1188,9 +1188,9 @@ ENTRY(mcount) cmpl $ftrace_stub, ftrace_trace_function jnz trace -#ifdef CONFIG_FUNCTION_RET_TRACER - cmpl $ftrace_stub, ftrace_function_return - jnz ftrace_return_caller +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + cmpl $ftrace_stub, ftrace_graph_function + jnz ftrace_graph_caller #endif .globl ftrace_stub ftrace_stub: @@ -1215,8 +1215,8 @@ END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_RET_TRACER -ENTRY(ftrace_return_caller) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) cmpl $0, function_trace_stop jne ftrace_stub @@ -1230,7 +1230,7 @@ ENTRY(ftrace_return_caller) popl %ecx popl %eax ret -END(ftrace_return_caller) +END(ftrace_graph_caller) .globl return_to_handler return_to_handler: diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bb137f7297ed..3595a4c14aba 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -323,7 +323,7 @@ int __init ftrace_dyn_arch_init(void *data) } #endif -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifndef CONFIG_DYNAMIC_FTRACE @@ -389,11 +389,11 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, */ unsigned long ftrace_return_to_handler(void) { - struct ftrace_retfunc trace; + struct ftrace_graph_ret trace; pop_return_trace(&trace.ret, &trace.calltime, &trace.func, &trace.overrun); trace.rettime = cpu_clock(raw_smp_processor_id()); - ftrace_function_return(&trace); + ftrace_graph_function(&trace); return trace.ret; } @@ -440,12 +440,12 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) ); if (WARN_ON(faulted)) { - unregister_ftrace_return(); + unregister_ftrace_graph(); return; } if (WARN_ON(!__kernel_text_address(old))) { - unregister_ftrace_return(); + unregister_ftrace_graph(); *parent = old; return; } @@ -456,4 +456,4 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) *parent = old; } -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7854d87b97b2..b4ac734ad8d6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -115,8 +115,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); -#ifdef CONFIG_FUNCTION_RET_TRACER -extern void ftrace_return_caller(void); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern void ftrace_graph_caller(void); #endif /** @@ -315,7 +315,7 @@ ftrace_init_module(struct module *mod, /* * Structure that defines a return function trace. */ -struct ftrace_retfunc { +struct ftrace_graph_ret { unsigned long ret; /* Return address */ unsigned long func; /* Current function */ unsigned long long calltime; @@ -324,22 +324,22 @@ struct ftrace_retfunc { unsigned long overrun; }; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ -typedef void (*trace_function_return_t)(struct ftrace_retfunc *); +typedef void (*trace_function_graph_t)(struct ftrace_graph_ret *); -extern int register_ftrace_return(trace_function_return_t func); +extern int register_ftrace_graph(trace_function_graph_t func); /* The current handler in use */ -extern trace_function_return_t ftrace_function_return; -extern void unregister_ftrace_return(void); +extern trace_function_graph_t ftrace_graph_function; +extern void unregister_ftrace_graph(void); -extern void ftrace_retfunc_init_task(struct task_struct *t); -extern void ftrace_retfunc_exit_task(struct task_struct *t); +extern void ftrace_graph_init_task(struct task_struct *t); +extern void ftrace_graph_exit_task(struct task_struct *t); #else -static inline void ftrace_retfunc_init_task(struct task_struct *t) { } -static inline void ftrace_retfunc_exit_task(struct task_struct *t) { } +static inline void ftrace_graph_init_task(struct task_struct *t) { } +static inline void ftrace_graph_exit_task(struct task_struct *t) { } #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 0b4df55d7a74..366a054d0b05 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,7 @@ #define _LINUX_FTRACE_IRQ_H -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/include/linux/sched.h b/include/linux/sched.h index d02a0ca70ee9..7ad48f2a2758 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1365,7 +1365,7 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored adress in ret_stack */ int curr_ret_stack; /* Stack of return addresses for return function tracing */ diff --git a/kernel/Makefile b/kernel/Makefile index 03a45e7e87b7..703cf3b7389c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,7 +21,7 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -pg endif -ifdef CONFIG_FUNCTION_RET_TRACER +ifdef CONFIG_FUNCTION_GRAPH_TRACER CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address() CFLAGS_REMOVE_module.o = -pg # For __module_text_address() endif diff --git a/kernel/fork.c b/kernel/fork.c index d6e1a3205f62..5f82a999c032 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -140,7 +140,7 @@ void free_task(struct task_struct *tsk) prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); - ftrace_retfunc_exit_task(tsk); + ftrace_graph_exit_task(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -1271,7 +1271,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); - ftrace_retfunc_init_task(p); + ftrace_graph_init_task(p); proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index 388d9db044ab..52490bf6b884 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,7 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; - ftrace_retfunc_init_task(idle); + ftrace_graph_init_task(idle); } /* diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 620feadff67a..eb9b901e0777 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -12,7 +12,7 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool -config HAVE_FUNCTION_RET_TRACER +config HAVE_FUNCTION_GRAPH_TRACER bool config HAVE_FUNCTION_TRACE_MCOUNT_TEST @@ -63,15 +63,18 @@ config FUNCTION_TRACER (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. -config FUNCTION_RET_TRACER - bool "Kernel Function return Tracer" - depends on HAVE_FUNCTION_RET_TRACER +config FUNCTION_GRAPH_TRACER + bool "Kernel Function Graph Tracer" + depends on HAVE_FUNCTION_GRAPH_TRACER depends on FUNCTION_TRACER help - Enable the kernel to trace a function at its return. - It's first purpose is to trace the duration of functions. - This is done by setting the current return address on the thread - info structure of the current task. + Enable the kernel to trace a function at both its return + and its entry. + It's first purpose is to trace the duration of functions and + draw a call graph for each thread with some informations like + the return value. + This is done by setting the current return address on the current + task structure into a stack of calls. config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cef4bcb4e822..08c5fe6ddc09 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o -obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_BTS_TRACER) += trace_bts.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 53042f118f23..9e19976af727 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -395,11 +395,11 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) unsigned long ip, fl; unsigned long ftrace_addr; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER if (ftrace_tracing_type == FTRACE_TYPE_ENTER) ftrace_addr = (unsigned long)ftrace_caller; else - ftrace_addr = (unsigned long)ftrace_return_caller; + ftrace_addr = (unsigned long)ftrace_graph_caller; #else ftrace_addr = (unsigned long)ftrace_caller; #endif @@ -1496,13 +1496,13 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, return ret; } -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER static atomic_t ftrace_retfunc_active; /* The callback that hooks the return of a function */ -trace_function_return_t ftrace_function_return = - (trace_function_return_t)ftrace_stub; +trace_function_graph_t ftrace_graph_function = + (trace_function_graph_t)ftrace_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ @@ -1549,7 +1549,7 @@ free: } /* Allocate a return stack for each task */ -static int start_return_tracing(void) +static int start_graph_tracing(void) { struct ftrace_ret_stack **ret_stack_list; int ret; @@ -1569,7 +1569,7 @@ static int start_return_tracing(void) return ret; } -int register_ftrace_return(trace_function_return_t func) +int register_ftrace_graph(trace_function_graph_t func) { int ret = 0; @@ -1584,13 +1584,13 @@ int register_ftrace_return(trace_function_return_t func) goto out; } atomic_inc(&ftrace_retfunc_active); - ret = start_return_tracing(); + ret = start_graph_tracing(); if (ret) { atomic_dec(&ftrace_retfunc_active); goto out; } ftrace_tracing_type = FTRACE_TYPE_RETURN; - ftrace_function_return = func; + ftrace_graph_function = func; ftrace_startup(); out: @@ -1598,12 +1598,12 @@ out: return ret; } -void unregister_ftrace_return(void) +void unregister_ftrace_graph(void) { mutex_lock(&ftrace_sysctl_lock); atomic_dec(&ftrace_retfunc_active); - ftrace_function_return = (trace_function_return_t)ftrace_stub; + ftrace_graph_function = (trace_function_graph_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ ftrace_tracing_type = FTRACE_TYPE_ENTER; @@ -1612,7 +1612,7 @@ void unregister_ftrace_return(void) } /* Allocate a return stack for newly created task */ -void ftrace_retfunc_init_task(struct task_struct *t) +void ftrace_graph_init_task(struct task_struct *t) { if (atomic_read(&ftrace_retfunc_active)) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH @@ -1626,7 +1626,7 @@ void ftrace_retfunc_init_task(struct task_struct *t) t->ret_stack = NULL; } -void ftrace_retfunc_exit_task(struct task_struct *t) +void ftrace_graph_exit_task(struct task_struct *t) { struct ftrace_ret_stack *ret_stack = t->ret_stack; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8df8fdd69c95..f21ab2c68fd4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -878,15 +878,15 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -static void __trace_function_return(struct trace_array *tr, +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void __trace_function_graph(struct trace_array *tr, struct trace_array_cpu *data, - struct ftrace_retfunc *trace, + struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_ret_entry *entry; + struct ftrace_graph_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -1177,8 +1177,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) local_irq_restore(flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -void trace_function_return(struct ftrace_retfunc *trace) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void trace_function_graph(struct ftrace_graph_ret *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1193,12 +1193,12 @@ void trace_function_return(struct ftrace_retfunc *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_return(tr, data, trace, flags, pc); + __trace_function_graph(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); raw_local_irq_restore(flags); } -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ static struct ftrace_ops trace_ops __read_mostly = { @@ -2001,7 +2001,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_FN_RET: { - return print_return_function(iter); + return print_graph_function(iter); break; } case TRACE_BRANCH: { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3abd645e8af2..72b5ef868765 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -57,7 +57,7 @@ struct ftrace_entry { }; /* Function return entry */ -struct ftrace_ret_entry { +struct ftrace_graph_entry { struct trace_entry ent; unsigned long ip; unsigned long parent_ip; @@ -264,7 +264,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ - IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ + IF_ASSIGN(var, ent, struct ftrace_graph_entry, TRACE_FN_RET);\ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ __ftrace_bad_type(); \ } while (0) @@ -398,7 +398,7 @@ void trace_function(struct trace_array *tr, unsigned long parent_ip, unsigned long flags, int pc); void -trace_function_return(struct ftrace_retfunc *trace); +trace_function_graph(struct ftrace_graph_ret *trace); void trace_bts(struct trace_array *tr, unsigned long from, @@ -489,11 +489,11 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ -#ifdef CONFIG_FUNCTION_RET_TRACER -extern enum print_line_t print_return_function(struct trace_iterator *iter); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern enum print_line_t print_graph_function(struct trace_iterator *iter); #else static inline enum print_line_t -print_return_function(struct trace_iterator *iter) +print_graph_function(struct trace_iterator *iter) { return TRACE_TYPE_UNHANDLED; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c new file mode 100644 index 000000000000..f5bad4624d2b --- /dev/null +++ b/kernel/trace/trace_functions_graph.c @@ -0,0 +1,98 @@ +/* + * + * Function graph tracer. + * Copyright (c) 2008 Frederic Weisbecker + * Mostly borrowed from function tracer which + * is Copyright (c) Steven Rostedt + * + */ +#include +#include +#include +#include + +#include "trace.h" + + +#define TRACE_GRAPH_PRINT_OVERRUN 0x1 +static struct tracer_opt trace_opts[] = { + /* Display overruns or not */ + { TRACER_OPT(overrun, TRACE_GRAPH_PRINT_OVERRUN) }, + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, /* Don't display overruns by default */ + .opts = trace_opts +}; + + +static int graph_trace_init(struct trace_array *tr) +{ + int cpu; + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + + return register_ftrace_graph(&trace_function_graph); +} + +static void graph_trace_reset(struct trace_array *tr) +{ + unregister_ftrace_graph(); +} + + +enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct ftrace_graph_entry *field; + int ret; + + if (entry->type == TRACE_FN_RET) { + trace_assign_type(field, entry); + ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = seq_print_ip_sym(s, field->ip, + trace_flags & TRACE_ITER_SYM_MASK); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " (%llu ns)", + field->rettime - field->calltime); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)", + field->overrun); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer graph_trace __read_mostly = { + .name = "function-graph", + .init = graph_trace_init, + .reset = graph_trace_reset, + .print_line = print_graph_function, + .flags = &tracer_flags, +}; + +static __init int init_graph_trace(void) +{ + return register_tracer(&graph_trace); +} + +device_initcall(init_graph_trace); -- cgit v1.2.3 From 287b6e68ca7209caec40b2f44f837c580a413bae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 Nov 2008 00:57:25 +0100 Subject: tracing/function-return-tracer: set a more human readable output Impact: feature This patch sets a C-like output for the function graph tracing. For this aim, we now call two handler for each function: one on the entry and one other on return. This way we can draw a well-ordered call stack. The pid of the previous trace is loosely stored to be compared against the one of the current trace to see if there were a context switch. Without this little feature, the call tree would seem broken at some locations. We could use the sched_tracer to capture these sched_events but this way of processing is much more simpler. 2 spaces have been chosen for indentation to fit the screen while deep calls. The time of execution in nanosecs is printed just after closed braces, it seems more easy this way to find the corresponding function. If the time was printed as a first column, it would be not so easy to find the corresponding function if it is called on a deep depth. I plan to output the return value but on 32 bits CPU, the return value can be 32 or 64, and its difficult to guess on which case we are. I don't know what would be the better solution on X86-32: only print eax (low-part) or even edx (high-part). Actually it's thee same problem when a function return a 8 bits value, the high part of eax could contain junk values... Here is an example of trace: sys_read() { fget_light() { } 526 vfs_read() { rw_verify_area() { security_file_permission() { cap_file_permission() { } 519 } 1564 } 2640 do_sync_read() { pipe_read() { __might_sleep() { } 511 pipe_wait() { prepare_to_wait() { } 760 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { update_min_vruntime() { } 504 } 1587 clear_buddies() { } 512 add_cfs_task_weight() { } 519 update_min_vruntime() { } 511 } 5602 dequeue_entity() { update_curr() { update_min_vruntime() { } 496 } 1631 clear_buddies() { } 496 update_min_vruntime() { } 527 } 4580 hrtick_update() { hrtick_start_fair() { } 488 } 1489 } 13700 } 14949 } 16016 msecs_to_jiffies() { } 496 put_prev_task_fair() { } 504 pick_next_task_fair() { } 489 pick_next_task_rt() { } 496 pick_next_task_fair() { } 489 pick_next_task_idle() { } 489 ------------8<---------- thread 4 ------------8<---------- finish_task_switch() { } 1203 do_softirq() { __do_softirq() { __local_bh_disable() { } 669 rcu_process_callbacks() { __rcu_process_callbacks() { cpu_quiet() { rcu_start_batch() { } 503 } 1647 } 3128 __rcu_process_callbacks() { } 542 } 5362 _local_bh_enable() { } 587 } 8880 } 9986 kthread_should_stop() { } 669 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { calc_delta_mine() { } 511 update_min_vruntime() { } 511 } 2813 Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 32 +++++++---- include/linux/ftrace.h | 25 ++++++-- kernel/trace/ftrace.c | 30 +++++----- kernel/trace/trace.c | 67 ++++++++++++++++++---- kernel/trace/trace.h | 28 +++++---- kernel/trace/trace_functions_graph.c | 104 ++++++++++++++++++++++++++-------- kernel/trace/trace_functions_return.c | 98 -------------------------------- 7 files changed, 208 insertions(+), 176 deletions(-) delete mode 100644 kernel/trace/trace_functions_return.c (limited to 'kernel/trace/trace.c') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3595a4c14aba..26b2d92d48b3 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -347,7 +347,7 @@ void ftrace_nmi_exit(void) /* Add a function return address to the trace stack on thread info.*/ static int push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func) + unsigned long func, int *depth) { int index; @@ -365,21 +365,22 @@ static int push_return_trace(unsigned long ret, unsigned long long time, current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; current->ret_stack[index].calltime = time; + *depth = index; return 0; } /* Retrieve a function return address to the trace stack on thread info.*/ -static void pop_return_trace(unsigned long *ret, unsigned long long *time, - unsigned long *func, unsigned long *overrun) +static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) { int index; index = current->curr_ret_stack; *ret = current->ret_stack[index].ret; - *func = current->ret_stack[index].func; - *time = current->ret_stack[index].calltime; - *overrun = atomic_read(¤t->trace_overrun); + trace->func = current->ret_stack[index].func; + trace->calltime = current->ret_stack[index].calltime; + trace->overrun = atomic_read(¤t->trace_overrun); + trace->depth = index; current->curr_ret_stack--; } @@ -390,12 +391,13 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, unsigned long ftrace_return_to_handler(void) { struct ftrace_graph_ret trace; - pop_return_trace(&trace.ret, &trace.calltime, &trace.func, - &trace.overrun); + unsigned long ret; + + pop_return_trace(&trace, &ret); trace.rettime = cpu_clock(raw_smp_processor_id()); - ftrace_graph_function(&trace); + ftrace_graph_return(&trace); - return trace.ret; + return ret; } /* @@ -407,6 +409,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) unsigned long old; unsigned long long calltime; int faulted; + struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; @@ -452,8 +455,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) calltime = cpu_clock(raw_smp_processor_id()); - if (push_return_trace(old, calltime, self_addr) == -EBUSY) + if (push_return_trace(old, calltime, + self_addr, &trace.depth) == -EBUSY) { *parent = old; + return; + } + + trace.func = self_addr; + ftrace_graph_entry(&trace); + } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b4ac734ad8d6..fc2d54987198 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -312,27 +312,40 @@ ftrace_init_module(struct module *mod, #endif +/* + * Structure that defines an entry function trace. + */ +struct ftrace_graph_ent { + unsigned long func; /* Current function */ + int depth; +}; + /* * Structure that defines a return function trace. */ struct ftrace_graph_ret { - unsigned long ret; /* Return address */ unsigned long func; /* Current function */ unsigned long long calltime; unsigned long long rettime; /* Number of functions that overran the depth limit for current task */ unsigned long overrun; + int depth; }; #ifdef CONFIG_FUNCTION_GRAPH_TRACER #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 -/* Type of a callback handler of tracing return function */ -typedef void (*trace_function_graph_t)(struct ftrace_graph_ret *); +/* Type of the callback handlers for tracing function graph*/ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ +typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ + +extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc); + +/* The current handlers in use */ +extern trace_func_graph_ret_t ftrace_graph_return; +extern trace_func_graph_ent_t ftrace_graph_entry; -extern int register_ftrace_graph(trace_function_graph_t func); -/* The current handler in use */ -extern trace_function_graph_t ftrace_graph_function; extern void unregister_ftrace_graph(void); extern void ftrace_graph_init_task(struct task_struct *t); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9e19976af727..7e2d3b91692d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1498,12 +1498,13 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static atomic_t ftrace_retfunc_active; - -/* The callback that hooks the return of a function */ -trace_function_graph_t ftrace_graph_function = - (trace_function_graph_t)ftrace_stub; +static atomic_t ftrace_graph_active; +/* The callbacks that hook a function */ +trace_func_graph_ret_t ftrace_graph_return = + (trace_func_graph_ret_t)ftrace_stub; +trace_func_graph_ent_t ftrace_graph_entry = + (trace_func_graph_ent_t)ftrace_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -1569,7 +1570,8 @@ static int start_graph_tracing(void) return ret; } -int register_ftrace_graph(trace_function_graph_t func) +int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc) { int ret = 0; @@ -1583,14 +1585,15 @@ int register_ftrace_graph(trace_function_graph_t func) ret = -EBUSY; goto out; } - atomic_inc(&ftrace_retfunc_active); + atomic_inc(&ftrace_graph_active); ret = start_graph_tracing(); if (ret) { - atomic_dec(&ftrace_retfunc_active); + atomic_dec(&ftrace_graph_active); goto out; } ftrace_tracing_type = FTRACE_TYPE_RETURN; - ftrace_graph_function = func; + ftrace_graph_return = retfunc; + ftrace_graph_entry = entryfunc; ftrace_startup(); out: @@ -1602,8 +1605,9 @@ void unregister_ftrace_graph(void) { mutex_lock(&ftrace_sysctl_lock); - atomic_dec(&ftrace_retfunc_active); - ftrace_graph_function = (trace_function_graph_t)ftrace_stub; + atomic_dec(&ftrace_graph_active); + ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; + ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ ftrace_tracing_type = FTRACE_TYPE_ENTER; @@ -1614,7 +1618,7 @@ void unregister_ftrace_graph(void) /* Allocate a return stack for newly created task */ void ftrace_graph_init_task(struct task_struct *t) { - if (atomic_read(&ftrace_retfunc_active)) { + if (atomic_read(&ftrace_graph_active)) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH * sizeof(struct ftrace_ret_stack), GFP_KERNEL); @@ -1638,5 +1642,3 @@ void ftrace_graph_exit_task(struct task_struct *t) } #endif - - diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f21ab2c68fd4..9d5f7c94f251 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -879,14 +879,38 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void __trace_function_graph(struct trace_array *tr, +static void __trace_graph_entry(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_graph_ent *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_graph_ent_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_GRAPH_ENT; + entry->graph_ent = *trace; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} + +static void __trace_graph_return(struct trace_array *tr, struct trace_array_cpu *data, struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_graph_entry *entry; + struct ftrace_graph_ret_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -898,12 +922,8 @@ static void __trace_function_graph(struct trace_array *tr, return; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_FN_RET; - entry->ip = trace->func; - entry->parent_ip = trace->ret; - entry->rettime = trace->rettime; - entry->calltime = trace->calltime; - entry->overrun = trace->overrun; + entry->ent.type = TRACE_GRAPH_RET; + entry->ret = *trace; ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); } #endif @@ -1178,7 +1198,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -void trace_function_graph(struct ftrace_graph_ret *trace) +void trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1193,7 +1213,28 @@ void trace_function_graph(struct ftrace_graph_ret *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_graph(tr, data, trace, flags, pc); + __trace_graph_entry(tr, data, trace, flags, pc); + } + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +void trace_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); raw_local_irq_restore(flags); @@ -2000,9 +2041,11 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } - case TRACE_FN_RET: { + case TRACE_GRAPH_RET: { + return print_graph_function(iter); + } + case TRACE_GRAPH_ENT: { return print_graph_function(iter); - break; } case TRACE_BRANCH: { struct trace_branch *field; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 72b5ef868765..ffe1bb1eb620 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -25,7 +25,8 @@ enum trace_type { TRACE_BRANCH, TRACE_BOOT_CALL, TRACE_BOOT_RET, - TRACE_FN_RET, + TRACE_GRAPH_RET, + TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_BTS, @@ -56,14 +57,16 @@ struct ftrace_entry { unsigned long parent_ip; }; +/* Function call entry */ +struct ftrace_graph_ent_entry { + struct trace_entry ent; + struct ftrace_graph_ent graph_ent; +}; + /* Function return entry */ -struct ftrace_graph_entry { - struct trace_entry ent; - unsigned long ip; - unsigned long parent_ip; - unsigned long long calltime; - unsigned long long rettime; - unsigned long overrun; +struct ftrace_graph_ret_entry { + struct trace_entry ent; + struct ftrace_graph_ret ret; }; extern struct tracer boot_tracer; @@ -264,7 +267,10 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ - IF_ASSIGN(var, ent, struct ftrace_graph_entry, TRACE_FN_RET);\ + IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ + TRACE_GRAPH_ENT); \ + IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ + TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ __ftrace_bad_type(); \ } while (0) @@ -397,9 +403,9 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); -void -trace_function_graph(struct ftrace_graph_ret *trace); +void trace_graph_return(struct ftrace_graph_ret *trace); +void trace_graph_entry(struct ftrace_graph_ent *trace); void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index f5bad4624d2b..b6f0cc2a00cb 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -13,6 +13,7 @@ #include "trace.h" +#define TRACE_GRAPH_INDENT 2 #define TRACE_GRAPH_PRINT_OVERRUN 0x1 static struct tracer_opt trace_opts[] = { @@ -26,6 +27,8 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; +/* pid on the last trace processed */ +static pid_t last_pid = -1; static int graph_trace_init(struct trace_array *tr) { @@ -33,7 +36,8 @@ static int graph_trace_init(struct trace_array *tr) for_each_online_cpu(cpu) tracing_reset(tr, cpu); - return register_ftrace_graph(&trace_function_graph); + return register_ftrace_graph(&trace_graph_return, + &trace_graph_entry); } static void graph_trace_reset(struct trace_array *tr) @@ -41,45 +45,97 @@ static void graph_trace_reset(struct trace_array *tr) unregister_ftrace_graph(); } +/* If the pid changed since the last trace, output this event */ +static int verif_pid(struct trace_seq *s, pid_t pid) +{ + if (last_pid != -1 && last_pid == pid) + return 1; -enum print_line_t -print_graph_function(struct trace_iterator *iter) + last_pid = pid; + return trace_seq_printf(s, "\n------------8<---------- thread %d" + " ------------8<----------\n\n", + pid); +} + +static enum print_line_t +print_graph_entry(struct ftrace_graph_ent *call, struct trace_seq *s, + struct trace_entry *ent) { - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - struct ftrace_graph_entry *field; + int i; int ret; - if (entry->type == TRACE_FN_RET) { - trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (!verif_pid(s, ent->pid)) + return TRACE_TYPE_PARTIAL_LINE; - ret = seq_print_ip_sym(s, field->ip, - trace_flags & TRACE_ITER_SYM_MASK); + for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + } + + ret = seq_print_ip_sym(s, call->func, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, "() {\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, + struct trace_entry *ent) +{ + int i; + int ret; + + if (!verif_pid(s, ent->pid)) + return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, " (%llu ns)", - field->rettime - field->calltime); + for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "} "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { - ret = trace_seq_printf(s, " (Overruns: %lu)", - field->overrun); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + ret = trace_seq_printf(s, "%llu\n", trace->rettime - trace->calltime); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, "\n"); + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)\n", + trace->overrun); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + } + return TRACE_TYPE_HANDLED; +} + +enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; - return TRACE_TYPE_HANDLED; + switch (entry->type) { + case TRACE_GRAPH_ENT: { + struct ftrace_graph_ent_entry *field; + trace_assign_type(field, entry); + return print_graph_entry(&field->graph_ent, s, entry); + } + case TRACE_GRAPH_RET: { + struct ftrace_graph_ret_entry *field; + trace_assign_type(field, entry); + return print_graph_return(&field->ret, s, entry); + } + default: + return TRACE_TYPE_UNHANDLED; } - return TRACE_TYPE_UNHANDLED; } static struct tracer graph_trace __read_mostly = { diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c deleted file mode 100644 index e00d64509c9c..000000000000 --- a/kernel/trace/trace_functions_return.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * - * Function return tracer. - * Copyright (c) 2008 Frederic Weisbecker - * Mostly borrowed from function tracer which - * is Copyright (c) Steven Rostedt - * - */ -#include -#include -#include -#include - -#include "trace.h" - - -#define TRACE_RETURN_PRINT_OVERRUN 0x1 -static struct tracer_opt trace_opts[] = { - /* Display overruns or not */ - { TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) }, - { } /* Empty entry */ -}; - -static struct tracer_flags tracer_flags = { - .val = 0, /* Don't display overruns by default */ - .opts = trace_opts -}; - - -static int return_trace_init(struct trace_array *tr) -{ - int cpu; - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); - - return register_ftrace_return(&trace_function_return); -} - -static void return_trace_reset(struct trace_array *tr) -{ - unregister_ftrace_return(); -} - - -enum print_line_t -print_return_function(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - struct ftrace_ret_entry *field; - int ret; - - if (entry->type == TRACE_FN_RET) { - trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = seq_print_ip_sym(s, field->ip, - trace_flags & TRACE_ITER_SYM_MASK); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, " (%llu ns)", - field->rettime - field->calltime); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) { - ret = trace_seq_printf(s, " (Overruns: %lu)", - field->overrun); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - ret = trace_seq_printf(s, "\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; - } - return TRACE_TYPE_UNHANDLED; -} - -static struct tracer return_trace __read_mostly = { - .name = "return", - .init = return_trace_init, - .reset = return_trace_reset, - .print_line = print_return_function, - .flags = &tracer_flags, -}; - -static __init int init_return_trace(void) -{ - return register_tracer(&return_trace); -} - -device_initcall(init_return_trace); -- cgit v1.2.3 From 660c7f9be96321fc80026d76411bd15e6f418a72 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Nov 2008 00:16:26 -0500 Subject: ftrace: add thread comm to function graph tracer Impact: enhancement to function graph tracer Export the trace_find_cmdline so the function graph tracer can use it to print the comms of the threads. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 1 + kernel/trace/trace_functions_graph.c | 21 ++++++++++++++++----- 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9d5f7c94f251..5811e0a5f732 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -804,7 +804,7 @@ static void trace_save_cmdline(struct task_struct *tsk) spin_unlock(&trace_cmdline_lock); } -static char *trace_find_cmdline(int pid) +char *trace_find_cmdline(int pid) { char *cmdline = "<...>"; unsigned map; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ffe1bb1eb620..7adacf349ef7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -450,6 +450,7 @@ struct tracer_switch_ops { struct tracer_switch_ops *next; }; +char *trace_find_cmdline(int pid); #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b6f0cc2a00cb..bbb81e7b6c40 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -32,29 +32,40 @@ static pid_t last_pid = -1; static int graph_trace_init(struct trace_array *tr) { - int cpu; + int cpu, ret; + for_each_online_cpu(cpu) tracing_reset(tr, cpu); - return register_ftrace_graph(&trace_graph_return, + ret = register_ftrace_graph(&trace_graph_return, &trace_graph_entry); + if (ret) + return ret; + tracing_start_cmdline_record(); + + return 0; } static void graph_trace_reset(struct trace_array *tr) { - unregister_ftrace_graph(); + tracing_stop_cmdline_record(); + unregister_ftrace_graph(); } /* If the pid changed since the last trace, output this event */ static int verif_pid(struct trace_seq *s, pid_t pid) { + char *comm; + if (last_pid != -1 && last_pid == pid) return 1; last_pid = pid; - return trace_seq_printf(s, "\n------------8<---------- thread %d" + comm = trace_find_cmdline(pid); + + return trace_seq_printf(s, "\n------------8<---------- thread %s-%d" " ------------8<----------\n\n", - pid); + comm, pid); } static enum print_line_t -- cgit v1.2.3 From c7425acb42fff1e723b05fbf4ea11e9a455d95dc Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Fri, 28 Nov 2008 11:17:56 +0200 Subject: tracing, alpha: fix build: add missing #ifdef CONFIG_STACKTRACE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are architectures that still have no stacktrace support. Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5811e0a5f732..91887a280ab9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -983,6 +983,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, int pc) { +#ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; @@ -1008,6 +1009,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, save_stack_trace_user(&trace); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +#endif } void __trace_userstack(struct trace_array *tr, -- cgit v1.2.3 From a5e25883a445dce94a087ca479b21a5959cd5c18 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 15:34:05 -0500 Subject: ftrace: replace raw_local_irq_save with local_irq_save Impact: fix for lockdep and ftrace The raw_local_irq_save/restore confuses lockdep. This patch converts them to the local_irq_save/restore variants. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 1 + kernel/trace/trace.c | 12 ++++++------ kernel/trace/trace_branch.c | 4 ++-- kernel/trace/trace_stack.c | 8 ++++---- 4 files changed, 13 insertions(+), 12 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 46a404173db2..74b1878b8bb8 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -25,6 +25,7 @@ * Thanks to Arjan van de Ven for coming up with the initial idea of * mapping lock dependencies runtime. */ +#define DISABLE_BRANCH_PROFILING #include #include #include diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 91887a280ab9..380de630ebce 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1209,7 +1209,7 @@ void trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1218,7 +1218,7 @@ void trace_graph_entry(struct ftrace_graph_ent *trace) __trace_graph_entry(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } void trace_graph_return(struct ftrace_graph_ret *trace) @@ -1230,7 +1230,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) int cpu; int pc; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1239,7 +1239,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) __trace_graph_return(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -2645,7 +2645,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - raw_local_irq_disable(); + local_irq_disable(); __raw_spin_lock(&ftrace_max_lock); for_each_tracing_cpu(cpu) { /* @@ -2662,7 +2662,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, } } __raw_spin_unlock(&ftrace_max_lock); - raw_local_irq_enable(); + local_irq_enable(); tracing_cpumask = tracing_cpumask_new; diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index bc972753568d..6c00feb3bac7 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -42,7 +42,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (unlikely(!tr)) return; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; @@ -74,7 +74,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) out: atomic_dec(&tr->data[cpu]->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } static inline diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index fde3be15c642..06a16115be0f 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -48,7 +48,7 @@ static inline void check_stack(void) if (!object_is_on_stack(&this_size)) return; - raw_local_irq_save(flags); + local_irq_save(flags); __raw_spin_lock(&max_stack_lock); /* a race could have already updated it */ @@ -96,7 +96,7 @@ static inline void check_stack(void) out: __raw_spin_unlock(&max_stack_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); } static void @@ -162,11 +162,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, if (ret < 0) return ret; - raw_local_irq_save(flags); + local_irq_save(flags); __raw_spin_lock(&max_stack_lock); *ptr = val; __raw_spin_unlock(&max_stack_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); return count; } -- cgit v1.2.3 From e49dc19c6a19ea112fcb94b7c62ec62cdd5c08aa Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 23:50:05 -0500 Subject: ftrace: function graph return for function entry Impact: feature, let entry function decide to trace or not This patch lets the graph tracer entry function decide if the tracing should be done at the end as well. This requires all function graph entry functions return 1 if it should trace, or 0 if the return should not be traced. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 3 +++ arch/x86/kernel/entry_64.S | 3 +++ arch/x86/kernel/ftrace.c | 7 ++++++- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 10 +++++++--- kernel/trace/trace.c | 4 +++- kernel/trace/trace.h | 2 +- 7 files changed, 24 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 826682abed1d..43ceb3f454bf 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1196,6 +1196,9 @@ ENTRY(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER cmpl $ftrace_stub, ftrace_graph_return jnz ftrace_graph_caller + + cmpl $ftrace_graph_entry_stub, ftrace_graph_entry + jnz ftrace_graph_caller #endif .globl ftrace_stub ftrace_stub: diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9060ba6497e2..54e0bbdccb99 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -120,6 +120,9 @@ ENTRY(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER cmpq $ftrace_stub, ftrace_graph_return jnz ftrace_graph_caller + + cmpq $ftrace_graph_entry_stub, ftrace_graph_entry + jnz ftrace_graph_caller #endif .globl ftrace_stub diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index adba8e9a427c..d278ad2ebda2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -425,6 +425,7 @@ static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = index; + barrier(); current->curr_ret_stack--; } @@ -506,7 +507,11 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } trace.func = self_addr; - ftrace_graph_entry(&trace); + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 58ca1c3a3f4d..469ceb3e85ba 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -371,7 +371,7 @@ struct ftrace_graph_ret { #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a44af05ae2d0..65b9e863056b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1636,11 +1636,15 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, static atomic_t ftrace_graph_active; +int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) +{ + return 0; +} + /* The callbacks that hook a function */ trace_func_graph_ret_t ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; -trace_func_graph_ent_t ftrace_graph_entry = - (trace_func_graph_ent_t)ftrace_stub; +trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -1738,7 +1742,7 @@ void unregister_ftrace_graph(void) atomic_dec(&ftrace_graph_active); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; - ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub; + ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); mutex_unlock(&ftrace_sysctl_lock); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 380de630ebce..8b6409a62b54 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1200,7 +1200,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -void trace_graph_entry(struct ftrace_graph_ent *trace) +int trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1219,6 +1219,8 @@ void trace_graph_entry(struct ftrace_graph_ent *trace) } atomic_dec(&data->disabled); local_irq_restore(flags); + + return 1; } void trace_graph_return(struct ftrace_graph_ret *trace) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f96f4e787ff3..0565ae9a2210 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -412,7 +412,7 @@ void trace_function(struct trace_array *tr, unsigned long flags, int pc); void trace_graph_return(struct ftrace_graph_ret *trace); -void trace_graph_entry(struct ftrace_graph_ent *trace); +int trace_graph_entry(struct ftrace_graph_ent *trace); void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to); -- cgit v1.2.3 From ea4e2bc4d9f7370e57a343ccb5e7c0ad3222ec3c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 3 Dec 2008 15:36:57 -0500 Subject: ftrace: graph of a single function This patch adds the file: /debugfs/tracing/set_graph_function which can be used along with the function graph tracer. When this file is empty, the function graph tracer will act as usual. When the file has a function in it, the function graph tracer will only trace that function. For example: # echo blk_unplug > /debugfs/tracing/set_graph_function # cat /debugfs/tracing/trace [...] ------------------------------------------ | 2) make-19003 => kjournald-2219 ------------------------------------------ 2) | blk_unplug() { 2) | dm_unplug_all() { 2) | dm_get_table() { 2) 1.381 us | _read_lock(); 2) 0.911 us | dm_table_get(); 2) 1. 76 us | _read_unlock(); 2) + 12.912 us | } 2) | dm_table_unplug_all() { 2) | blk_unplug() { 2) 0.778 us | generic_unplug_device(); 2) 2.409 us | } 2) 5.992 us | } 2) 0.813 us | dm_table_put(); 2) + 29. 90 us | } 2) + 34.532 us | } You can add up to 32 functions into this file. Currently we limit it to 32, but this may change with later improvements. To add another function, use the append '>>': # echo sys_read >> /debugfs/tracing/set_graph_function # cat /debugfs/tracing/set_graph_function blk_unplug sys_read Using the '>' will clear out the function and write anew: # echo sys_write > /debug/tracing/set_graph_function # cat /debug/tracing/set_graph_function sys_write Note, if you have function graph running while doing this, the small time between clearing it and updating it will cause the graph to record all functions. This should not be an issue because after it sets the filter, only those functions will be recorded from then on. If you need to only record a particular function then set this file first before starting the function graph tracer. In the future this side effect may be corrected. The set_graph_function file is similar to the set_ftrace_filter but it does not take wild cards nor does it allow for more than one function to be set with a single write. There is no technical reason why this is the case, I just do not have the time yet to implement that. Note, dynamic ftrace must be enabled for this to appear because it uses the dynamic ftrace records to match the name to the mcount call sites. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 46 ++++++++++ include/linux/sched.h | 4 + kernel/trace/ftrace.c | 227 +++++++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.c | 8 ++ kernel/trace/trace.h | 30 ++++++- 5 files changed, 314 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 469ceb3e85ba..b295d3106bfe 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -7,6 +7,7 @@ #include #include #include +#include #ifdef CONFIG_FUNCTION_TRACER @@ -391,4 +392,49 @@ static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } #endif +#ifdef CONFIG_TRACING +#include + +/* flags for current->trace */ +enum { + TSK_TRACE_FL_TRACE_BIT = 0, + TSK_TRACE_FL_GRAPH_BIT = 1, +}; +enum { + TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT, + TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT, +}; + +static inline void set_tsk_trace_trace(struct task_struct *tsk) +{ + set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); +} + +static inline void clear_tsk_trace_trace(struct task_struct *tsk) +{ + clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); +} + +static inline int test_tsk_trace_trace(struct task_struct *tsk) +{ + return tsk->trace & TSK_TRACE_FL_TRACE; +} + +static inline void set_tsk_trace_graph(struct task_struct *tsk) +{ + set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); +} + +static inline void clear_tsk_trace_graph(struct task_struct *tsk) +{ + clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); +} + +static inline int test_tsk_trace_graph(struct task_struct *tsk) +{ + return tsk->trace & TSK_TRACE_FL_GRAPH; +} + +#endif /* CONFIG_TRACING */ + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2d0a93c31228..4c152e0acc9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1380,6 +1380,10 @@ struct task_struct { */ atomic_t trace_overrun; #endif +#ifdef CONFIG_TRACING + /* state flags for use by tracers */ + unsigned long trace; +#endif }; /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 65b9e863056b..b17a30350f06 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1320,6 +1320,224 @@ static struct file_operations ftrace_notrace_fops = { .release = ftrace_notrace_release, }; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +static DEFINE_MUTEX(graph_lock); + +int ftrace_graph_count; +unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; + +static void * +g_next(struct seq_file *m, void *v, loff_t *pos) +{ + unsigned long *array = m->private; + int index = *pos; + + (*pos)++; + + if (index >= ftrace_graph_count) + return NULL; + + return &array[index]; +} + +static void *g_start(struct seq_file *m, loff_t *pos) +{ + void *p = NULL; + + mutex_lock(&graph_lock); + + p = g_next(m, p, pos); + + return p; +} + +static void g_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&graph_lock); +} + +static int g_show(struct seq_file *m, void *v) +{ + unsigned long *ptr = v; + char str[KSYM_SYMBOL_LEN]; + + if (!ptr) + return 0; + + kallsyms_lookup(*ptr, NULL, NULL, NULL, str); + + seq_printf(m, "%s\n", str); + + return 0; +} + +static struct seq_operations ftrace_graph_seq_ops = { + .start = g_start, + .next = g_next, + .stop = g_stop, + .show = g_show, +}; + +static int +ftrace_graph_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + mutex_lock(&graph_lock); + if ((file->f_mode & FMODE_WRITE) && + !(file->f_flags & O_APPEND)) { + ftrace_graph_count = 0; + memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); + } + + if (file->f_mode & FMODE_READ) { + ret = seq_open(file, &ftrace_graph_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = ftrace_graph_funcs; + } + } else + file->private_data = ftrace_graph_funcs; + mutex_unlock(&graph_lock); + + return ret; +} + +static ssize_t +ftrace_graph_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + if (file->f_mode & FMODE_READ) + return seq_read(file, ubuf, cnt, ppos); + else + return -EPERM; +} + +static int +ftrace_set_func(unsigned long *array, int idx, char *buffer) +{ + char str[KSYM_SYMBOL_LEN]; + struct dyn_ftrace *rec; + struct ftrace_page *pg; + int found = 0; + int i; + + if (ftrace_disabled) + return -ENODEV; + + /* should not be called from interrupt context */ + spin_lock(&ftrace_lock); + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + for (i = 0; i < pg->index; i++) { + rec = &pg->records[i]; + + if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) + continue; + + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + if (strcmp(str, buffer) == 0) { + found = 1; + array[idx] = rec->ip; + break; + } + } + } + spin_unlock(&ftrace_lock); + + return found ? 0 : -EINVAL; +} + +static ssize_t +ftrace_graph_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned char buffer[FTRACE_BUFF_MAX+1]; + unsigned long *array; + size_t read = 0; + ssize_t ret; + int index = 0; + char ch; + + if (!cnt || cnt < 0) + return 0; + + mutex_lock(&graph_lock); + + if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { + ret = -EBUSY; + goto out; + } + + if (file->f_mode & FMODE_READ) { + struct seq_file *m = file->private_data; + array = m->private; + } else + array = file->private_data; + + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + + /* skip white space */ + while (cnt && isspace(ch)) { + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + if (isspace(ch)) { + *ppos += read; + ret = read; + goto out; + } + + while (cnt && !isspace(ch)) { + if (index < FTRACE_BUFF_MAX) + buffer[index++] = ch; + else { + ret = -EINVAL; + goto out; + } + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + buffer[index] = 0; + + /* we allow only one at a time */ + ret = ftrace_set_func(array, ftrace_graph_count, buffer); + if (ret) + goto out; + + ftrace_graph_count++; + + file->f_pos += read; + + ret = read; + out: + mutex_unlock(&graph_lock); + + return ret; +} + +static const struct file_operations ftrace_graph_fops = { + .open = ftrace_graph_open, + .read = ftrace_graph_read, + .write = ftrace_graph_write, +}; +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { struct dentry *entry; @@ -1347,6 +1565,15 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) pr_warning("Could not create debugfs " "'set_ftrace_notrace' entry\n"); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + entry = debugfs_create_file("set_graph_function", 0444, d_tracer, + NULL, + &ftrace_graph_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_graph_function' entry\n"); +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8b6409a62b54..710b39acd81b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1209,6 +1209,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; + if (!ftrace_graph_addr(trace->func)) + return 0; + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -1217,6 +1220,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) pc = preempt_count(); __trace_graph_entry(tr, data, trace, flags, pc); } + /* Only do the atomic if it is not already set */ + if (!test_tsk_trace_graph(current)) + set_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); @@ -1240,6 +1246,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace) pc = preempt_count(); __trace_graph_return(tr, data, trace, flags, pc); } + if (!trace->depth) + clear_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0565ae9a2210..41f026bfc9ed 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -505,13 +505,41 @@ extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern enum print_line_t print_graph_function(struct trace_iterator *iter); + +#ifdef CONFIG_DYNAMIC_FTRACE +/* TODO: make this variable */ +#define FTRACE_GRAPH_MAX_FUNCS 32 +extern int ftrace_graph_count; +extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; + +static inline int ftrace_graph_addr(unsigned long addr) +{ + int i; + + if (!ftrace_graph_count || test_tsk_trace_graph(current)) + return 1; + + for (i = 0; i < ftrace_graph_count; i++) { + if (addr == ftrace_graph_funcs[i]) + return 1; + } + + return 0; +} #else +static inline int ftrace_trace_addr(unsigned long addr) +{ + return 1 +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function(struct trace_iterator *iter) { return TRACE_TYPE_UNHANDLED; } -#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ /* * trace_iterator_flags is an enumeration that defines bit -- cgit v1.2.3 From 804a685162a7080386714166776f57255a75238e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 3 Dec 2008 15:36:59 -0500 Subject: ftrace: trace single pid for function graph tracer Impact: New feature This patch makes the changes to set_ftrace_pid apply to the function graph tracer. # echo $$ > /debugfs/tracing/set_ftrace_pid # echo function_graph > /debugfs/tracing/current_tracer Will cause only the current task to be traced. Note, the trace flags are also inherited by child processes, so the children of the shell will also be traced. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.c | 3 +++ kernel/trace/trace.h | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c5049f54a275..57592a9dd630 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -48,7 +48,7 @@ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; /* set when tracing only a pid */ -static int ftrace_pid_trace = -1; +int ftrace_pid_trace = -1; /* Quick disabling of function tracer. */ int function_trace_stop; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 710b39acd81b..1bd9574404e5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1209,6 +1209,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; + if (!ftrace_trace_task(current)) + return 0; + if (!ftrace_graph_addr(trace->func)) return 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 41f026bfc9ed..95fff37ed970 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -541,6 +541,16 @@ print_graph_function(struct trace_iterator *iter) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +extern int ftrace_pid_trace; + +static inline int ftrace_trace_task(struct task_struct *task) +{ + if (ftrace_pid_trace < 0) + return 1; + + return test_tsk_trace_trace(task); +} + /* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. -- cgit v1.2.3 From 1fd8f2a3f9a91b287a876cef830b21baafc8a799 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 3 Dec 2008 23:45:11 +0100 Subject: tracing/function-graph-tracer: handle ftrace_printk entries Handle the TRACE_PRINT entries from the function grapg tracer and output them as a C comment just below the function that called it, as if it was a comment inside this function. Example with an ftrace_printk inside might_sleep() function: void __might_sleep(char *file, int line) { static unsigned long prev_jiffy; /* ratelimiting */ ftrace_printk("Hi I'm a comment in might_sleep() :-)"); A chunk of a resulting trace: 0) | _reiserfs_free_block() { 0) | reiserfs_read_bitmap_block() { 0) | __bread() { 0) | __getblk() { 0) | __find_get_block() { 0) 0.698 us | mark_page_accessed(); 0) 2.267 us | } 0) | __might_sleep() { 0) | /* Hi I'm a comment in might_sleep() :-) */ 0) 1.321 us | } 0) 5.872 us | } 0) 7.313 us | } 0) 8.718 us | } And this patch brings two minor fixes: - The newline after a switch-out task has disappeared - The "|" sign just before the cpu number on task-switch has been deleted. 0) 0.616 us | pick_next_task_rt(); 0) 1.457 us | _spin_trylock(); 0) 0.653 us | _spin_unlock(); 0) 0.728 us | _spin_trylock(); 0) 0.631 us | _spin_unlock(); 0) 0.729 us | native_load_sp0(); 0) 0.593 us | native_load_tls(); ------------------------------------------ 0) cat-2834 => migrati-3 ------------------------------------------ 0) | finish_task_switch() { 0) 0.841 us | _spin_unlock_irq(); 0) 0.616 us | post_schedule_rt(); 0) 3.882 us | } Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 28 +++++++++++--- kernel/trace/trace.h | 4 +- kernel/trace/trace_functions_graph.c | 72 +++++++++++++++++++++++++++++++++++- kernel/trace/trace_mmiotrace.c | 2 +- 4 files changed, 97 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8b6409a62b54..1ca74c0cee6a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3335,7 +3335,7 @@ static int mark_printk(const char *fmt, ...) int ret; va_list args; va_start(args, fmt); - ret = trace_vprintk(0, fmt, args); + ret = trace_vprintk(0, -1, fmt, args); va_end(args); return ret; } @@ -3564,9 +3564,16 @@ static __init int tracer_init_debugfs(void) return 0; } -int trace_vprintk(unsigned long ip, const char *fmt, va_list args) +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) { - static DEFINE_SPINLOCK(trace_buf_lock); + /* + * Raw Spinlock because a normal spinlock would be traced here + * and append an irrelevant couple spin_lock_irqsave/ + * spin_unlock_irqrestore traced by ftrace around this + * TRACE_PRINTK trace. + */ + static raw_spinlock_t trace_buf_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; struct ring_buffer_event *event; @@ -3587,7 +3594,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) if (unlikely(atomic_read(&data->disabled))) goto out; - spin_lock_irqsave(&trace_buf_lock, flags); + local_irq_save(flags); + __raw_spin_lock(&trace_buf_lock); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -3601,13 +3609,15 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_PRINT; entry->ip = ip; + entry->depth = depth; memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); out_unlock: - spin_unlock_irqrestore(&trace_buf_lock, flags); + __raw_spin_unlock(&trace_buf_lock); + local_irq_restore(flags); out: preempt_enable_notrace(); @@ -3625,7 +3635,13 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - ret = trace_vprintk(ip, fmt, ap); + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ret = trace_vprintk(ip, current->curr_ret_stack, fmt, ap); +#else + ret = trace_vprintk(ip, -1, fmt, ap); +#endif + va_end(ap); return ret; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0565ae9a2210..fce98898205a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -117,6 +117,7 @@ struct userstack_entry { struct print_entry { struct trace_entry ent; unsigned long ip; + int depth; char buf[]; }; @@ -498,7 +499,8 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt); extern long ns2usecs(cycle_t nsec); -extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); +extern int +trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); extern unsigned long trace_flags; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index c66578f2fdc2..32b7fb9a19df 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -173,7 +173,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu) */ ret = trace_seq_printf(s, - "\n ------------------------------------------\n |"); + " ------------------------------------------\n"); if (!ret) TRACE_TYPE_PARTIAL_LINE; @@ -477,6 +477,71 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, return TRACE_TYPE_HANDLED; } +static enum print_line_t +print_graph_comment(struct print_entry *trace, struct trace_seq *s, + struct trace_entry *ent, struct trace_iterator *iter) +{ + int i; + int ret; + + /* Pid */ + if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + /* Cpu */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { + ret = print_graph_cpu(s, iter->cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* Proc */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { + ret = print_graph_proc(s, ent->pid); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " | "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* No overhead */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* No time */ + ret = trace_seq_printf(s, " | "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Indentation */ + if (trace->depth > 0) + for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + /* The comment */ + ret = trace_seq_printf(s, "/* %s", trace->buf); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (ent->flags & TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + + ret = trace_seq_printf(s, " */\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + + enum print_line_t print_graph_function(struct trace_iterator *iter) { @@ -495,6 +560,11 @@ print_graph_function(struct trace_iterator *iter) trace_assign_type(field, entry); return print_graph_return(&field->ret, s, entry, iter->cpu); } + case TRACE_PRINT: { + struct print_entry *field; + trace_assign_type(field, entry); + return print_graph_comment(field, s, entry, iter); + } default: return TRACE_TYPE_UNHANDLED; } diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 2a98a206acc2..2fb6da6523b3 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -366,5 +366,5 @@ void mmio_trace_mapping(struct mmiotrace_map *map) int mmio_trace_printk(const char *fmt, va_list args) { - return trace_vprintk(0, fmt, args); + return trace_vprintk(0, -1, fmt, args); } -- cgit v1.2.3 From ff32504fdc56407654584ef187b20022c94a3486 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 4 Dec 2008 23:47:35 +0100 Subject: tracing/ftrace: don't insert TRACE_PRINT during selftests Impact: fix tracer selfstests false results After setting a ftrace_printk somewhere in th kernel, I saw the Function tracer selftest failing. When a selftest occurs, the ring buffer is lurked to see if some entries were inserted. But concurrent insertion such as ftrace_printk could occured at the same time and could give false positive or negative results. This patch prevent prevent from TRACE_PRINT entries insertion during selftests. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ea38652d631c..5dca6ef1fbeb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -44,6 +44,14 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +/* We need to change this state when a selftest is running. + * A selftest will lurk into the ring-buffer to count the + * entries inserted during the selftest although some concurrent + * insertions into the ring-buffer such as ftrace_printk could occurred + * at the same time, giving false positive or negative results. + */ +static atomic_t tracing_selftest_running = ATOMIC_INIT(0); + /* For tracers that don't implement custom flags */ static struct tracer_opt dummy_tracer_opt[] = { { } @@ -589,6 +597,8 @@ int register_tracer(struct tracer *type) struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; int i; + + atomic_set(&tracing_selftest_running, 1); /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current @@ -603,6 +613,7 @@ int register_tracer(struct tracer *type) /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); + atomic_set(&tracing_selftest_running, 0); /* the test is responsible for resetting too */ current_trace = saved_tracer; if (ret) { @@ -3594,7 +3605,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) unsigned long flags, irq_flags; int cpu, len = 0, size, pc; - if (tracing_disabled) + if (tracing_disabled || atomic_read(&tracing_selftest_running)) return 0; pc = preempt_count(); -- cgit v1.2.3 From 21a8c466f99063eeb8567318b4e305eda9015408 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 4 Dec 2008 23:51:23 +0100 Subject: tracing/ftrace: provide the macro task_curr_ret_stack() Impact: cleanup As suggested by Steven Rostedt, this patch provide a new macro task_curr_ret_stack() to move the cpp conditionnal CONFIG into the linux/ftrace.h headers. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 11 +++++++++++ kernel/trace/trace.c | 8 +------- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b295d3106bfe..b9b4d0a22d10 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CONFIG_FUNCTION_TRACER @@ -387,9 +388,19 @@ extern void unregister_ftrace_graph(void); extern void ftrace_graph_init_task(struct task_struct *t); extern void ftrace_graph_exit_task(struct task_struct *t); + +static inline int task_curr_ret_stack(struct task_struct *t) +{ + return t->curr_ret_stack; +} #else static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } + +static inline int task_curr_ret_stack(struct task_struct *tsk) +{ + return -1; +} #endif #ifdef CONFIG_TRACING diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5dca6ef1fbeb..7a93c663e52a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3657,13 +3657,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - ret = trace_vprintk(ip, current->curr_ret_stack, fmt, ap); -#else - ret = trace_vprintk(ip, -1, fmt, ap); -#endif - + ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); va_end(ap); return ret; } -- cgit v1.2.3 From 8e1b82e0866befaa0b2920be296c6e4c3fc7f422 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Dec 2008 03:41:33 +0100 Subject: tracing/function-graph-tracer: turn tracing_selftest_running into an int Impact: cleanup Apply some suggestions of Steven Rostedt: _turn tracing_selftest_running into a simple int (no need of an atomic_t) _set it __read_mostly _fix a comment style Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7a93c663e52a..33549537f30f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -44,13 +44,14 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; -/* We need to change this state when a selftest is running. +/* + * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the * entries inserted during the selftest although some concurrent * insertions into the ring-buffer such as ftrace_printk could occurred * at the same time, giving false positive or negative results. */ -static atomic_t tracing_selftest_running = ATOMIC_INIT(0); +static bool __read_mostly tracing_selftest_running; /* For tracers that don't implement custom flags */ static struct tracer_opt dummy_tracer_opt[] = { @@ -574,6 +575,8 @@ int register_tracer(struct tracer *type) unlock_kernel(); mutex_lock(&trace_types_lock); + tracing_selftest_running = true; + for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ @@ -598,7 +601,6 @@ int register_tracer(struct tracer *type) struct trace_array *tr = &global_trace; int i; - atomic_set(&tracing_selftest_running, 1); /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current @@ -613,7 +615,6 @@ int register_tracer(struct tracer *type) /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); - atomic_set(&tracing_selftest_running, 0); /* the test is responsible for resetting too */ current_trace = saved_tracer; if (ret) { @@ -635,6 +636,7 @@ int register_tracer(struct tracer *type) max_tracer_type_len = len; out: + tracing_selftest_running = false; mutex_unlock(&trace_types_lock); lock_kernel(); @@ -3605,7 +3607,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) unsigned long flags, irq_flags; int cpu, len = 0, size, pc; - if (tracing_disabled || atomic_read(&tracing_selftest_running)) + if (tracing_disabled || tracing_selftest_running) return 0; pc = preempt_count(); -- cgit v1.2.3 From 380c4b1411ccd6885f92b2c8ceb08433a720f44e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Dec 2008 03:43:41 +0100 Subject: tracing/function-graph-tracer: append the tracing_graph_flag Impact: Provide a way to pause the function graph tracer As suggested by Steven Rostedt, the previous patch that prevented from spinlock function tracing shouldn't use the raw_spinlock to fix it. It's much better to follow lockdep with normal spinlock, so this patch adds a new flag for each task to make the function graph tracer able to be paused. We also can send an ftrace_printk whithout worrying of the irrelevant traced spinlock during insertion. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 5 ++++- include/linux/ftrace.h | 13 +++++++++++++ include/linux/sched.h | 2 ++ kernel/trace/ftrace.c | 2 ++ kernel/trace/trace.c | 18 +++++------------- 5 files changed, 26 insertions(+), 14 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index f98c4076a170..1b43086b097a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -476,7 +476,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) &return_to_handler; /* Nmi's are currently unsupported */ - if (atomic_read(&in_nmi)) + if (unlikely(atomic_read(&in_nmi))) + return; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 449fa8e9e34f..11cac81eed08 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -401,6 +401,16 @@ static inline int task_curr_ret_stack(struct task_struct *t) { return t->curr_ret_stack; } + +static inline void pause_graph_tracing(void) +{ + atomic_inc(¤t->tracing_graph_pause); +} + +static inline void unpause_graph_tracing(void) +{ + atomic_dec(¤t->tracing_graph_pause); +} #else #define __notrace_funcgraph @@ -412,6 +422,9 @@ static inline int task_curr_ret_stack(struct task_struct *tsk) { return -1; } + +static inline void pause_graph_tracing(void) { } +static inline void unpause_graph_tracing(void) { } #endif #ifdef CONFIG_TRACING diff --git a/include/linux/sched.h b/include/linux/sched.h index 4c152e0acc9e..4b81fc5f7731 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1379,6 +1379,8 @@ struct task_struct { * because of depth overrun. */ atomic_t trace_overrun; + /* Pause for the tracing */ + atomic_t tracing_graph_pause; #endif #ifdef CONFIG_TRACING /* state flags for use by tracers */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2971fe48f55e..a12f80efceaa 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1998,6 +1998,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) /* Make sure IRQs see the -1 first: */ barrier(); t->ret_stack = ret_stack_list[start++]; + atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); } } while_each_thread(g, t); @@ -2077,6 +2078,7 @@ void ftrace_graph_init_task(struct task_struct *t) if (!t->ret_stack) return; t->curr_ret_stack = -1; + atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); } else t->ret_stack = NULL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 33549537f30f..0b8659bd5ad2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3590,14 +3590,7 @@ static __init int tracer_init_debugfs(void) int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) { - /* - * Raw Spinlock because a normal spinlock would be traced here - * and append an irrelevant couple spin_lock_irqsave/ - * spin_unlock_irqrestore traced by ftrace around this - * TRACE_PRINTK trace. - */ - static raw_spinlock_t trace_buf_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(trace_buf_lock); static char trace_buf[TRACE_BUF_SIZE]; struct ring_buffer_event *event; @@ -3618,8 +3611,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) if (unlikely(atomic_read(&data->disabled))) goto out; - local_irq_save(flags); - __raw_spin_lock(&trace_buf_lock); + pause_graph_tracing(); + spin_lock_irqsave(&trace_buf_lock, irq_flags); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -3640,9 +3633,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) ring_buffer_unlock_commit(tr->buffer, event, irq_flags); out_unlock: - __raw_spin_unlock(&trace_buf_lock); - local_irq_restore(flags); - + spin_unlock_irqrestore(&trace_buf_lock, irq_flags); + unpause_graph_tracing(); out: preempt_enable_notrace(); -- cgit v1.2.3 From e726f5f91effd8944c76475a2688093a03ba0d10 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Dec 2008 16:55:53 +0100 Subject: tracing/function-graph-tracer: fix 'flags' variable mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this warning: kernel/trace/trace.c: In function ‘trace_vprintk’: kernel/trace/trace.c:3626: warning: ‘flags’ may be used uninitialized in this function shows some confusion about irq_flags / flags use here. We already have irq_flags so remove the extra flags variable. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0b8659bd5ad2..8ebe0070c47a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3596,9 +3596,9 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - struct print_entry *entry; - unsigned long flags, irq_flags; int cpu, len = 0, size, pc; + struct print_entry *entry; + unsigned long irq_flags; if (tracing_disabled || tracing_selftest_running) return 0; @@ -3623,7 +3623,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); + tracing_generic_entry_update(&entry->ent, irq_flags, pc); entry->ent.type = TRACE_PRINT; entry->ip = ip; entry->depth = depth; -- cgit v1.2.3