diff options
40 files changed, 842 insertions, 430 deletions
diff --git a/Documentation/markers.txt b/Documentation/markers.txt index 089f6138fcd9..6d275e4ef385 100644 --- a/Documentation/markers.txt +++ b/Documentation/markers.txt @@ -70,6 +70,20 @@ a printk warning which identifies the inconsistency: "Format mismatch for probe probe_name (format), marker (format)" +Another way to use markers is to simply define the marker without generating any +function call to actually call into the marker. This is useful in combination +with tracepoint probes in a scheme like this : + +void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk); + +DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name, + "arg1 %u pid %d"); + +notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk) +{ + struct marker *marker = &GET_MARKER(kernel_irq_entry); + /* write data to trace buffers ... */ +} * Probe / marker example diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt index 5d354e167494..2d42241a25c3 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/tracepoints.txt @@ -3,28 +3,30 @@ Mathieu Desnoyers -This document introduces Linux Kernel Tracepoints and their use. It provides -examples of how to insert tracepoints in the kernel and connect probe functions -to them and provides some examples of probe functions. +This document introduces Linux Kernel Tracepoints and their use. It +provides examples of how to insert tracepoints in the kernel and +connect probe functions to them and provides some examples of probe +functions. * Purpose of tracepoints -A tracepoint placed in code provides a hook to call a function (probe) that you -can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or -"off" (no probe is attached). When a tracepoint is "off" it has no effect, -except for adding a tiny time penalty (checking a condition for a branch) and -space penalty (adding a few bytes for the function call at the end of the -instrumented function and adds a data structure in a separate section). When a -tracepoint is "on", the function you provide is called each time the tracepoint -is executed, in the execution context of the caller. When the function provided -ends its execution, it returns to the caller (continuing from the tracepoint -site). +A tracepoint placed in code provides a hook to call a function (probe) +that you can provide at runtime. A tracepoint can be "on" (a probe is +connected to it) or "off" (no probe is attached). When a tracepoint is +"off" it has no effect, except for adding a tiny time penalty +(checking a condition for a branch) and space penalty (adding a few +bytes for the function call at the end of the instrumented function +and adds a data structure in a separate section). When a tracepoint +is "on", the function you provide is called each time the tracepoint +is executed, in the execution context of the caller. When the function +provided ends its execution, it returns to the caller (continuing from +the tracepoint site). You can put tracepoints at important locations in the code. They are lightweight hooks that can pass an arbitrary number of parameters, -which prototypes are described in a tracepoint declaration placed in a header -file. +which prototypes are described in a tracepoint declaration placed in a +header file. They can be used for tracing and performance accounting. @@ -42,7 +44,7 @@ In include/trace/subsys.h : #include <linux/tracepoint.h> -DEFINE_TRACE(subsys_eventname, +DECLARE_TRACE(subsys_eventname, TPPTOTO(int firstarg, struct task_struct *p), TPARGS(firstarg, p)); @@ -50,6 +52,8 @@ In subsys/file.c (where the tracing statement must be added) : #include <trace/subsys.h> +DEFINE_TRACE(subsys_eventname); + void somefct(void) { ... @@ -61,31 +65,41 @@ Where : - subsys_eventname is an identifier unique to your event - subsys is the name of your subsystem. - eventname is the name of the event to trace. -- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function - called by this tracepoint. -- TPARGS(firstarg, p) are the parameters names, same as found in the prototype. -Connecting a function (probe) to a tracepoint is done by providing a probe -(function to call) for the specific tracepoint through -register_trace_subsys_eventname(). Removing a probe is done through -unregister_trace_subsys_eventname(); it will remove the probe sure there is no -caller left using the probe when it returns. Probe removal is preempt-safe -because preemption is disabled around the probe call. See the "Probe example" -section below for a sample probe module. - -The tracepoint mechanism supports inserting multiple instances of the same -tracepoint, but a single definition must be made of a given tracepoint name over -all the kernel to make sure no type conflict will occur. Name mangling of the -tracepoints is done using the prototypes to make sure typing is correct. -Verification of probe type correctness is done at the registration site by the -compiler. Tracepoints can be put in inline functions, inlined static functions, -and unrolled loops as well as regular functions. - -The naming scheme "subsys_event" is suggested here as a convention intended -to limit collisions. Tracepoint names are global to the kernel: they are -considered as being the same whether they are in the core kernel image or in -modules. +- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the + function called by this tracepoint. +- TPARGS(firstarg, p) are the parameters names, same as found in the + prototype. + +Connecting a function (probe) to a tracepoint is done by providing a +probe (function to call) for the specific tracepoint through +register_trace_subsys_eventname(). Removing a probe is done through +unregister_trace_subsys_eventname(); it will remove the probe. + +tracepoint_synchronize_unregister() must be called before the end of +the module exit function to make sure there is no caller left using +the probe. This, and the fact that preemption is disabled around the +probe call, make sure that probe removal and module unload are safe. +See the "Probe example" section below for a sample probe module. + +The tracepoint mechanism supports inserting multiple instances of the +same tracepoint, but a single definition must be made of a given +tracepoint name over all the kernel to make sure no type conflict will +occur. Name mangling of the tracepoints is done using the prototypes +to make sure typing is correct. Verification of probe type correctness +is done at the registration site by the compiler. Tracepoints can be +put in inline functions, inlined static functions, and unrolled loops +as well as regular functions. + +The naming scheme "subsys_event" is suggested here as a convention +intended to limit collisions. Tracepoint names are global to the +kernel: they are considered as being the same whether they are in the +core kernel image or in modules. + +If the tracepoint has to be used in kernel modules, an +EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be +used to export the defined tracepoints. * Probe / tracepoint example diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9b6a1fa19e70..2bb43b433e07 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -17,6 +17,14 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } + +#ifdef CONFIG_DYNAMIC_FTRACE + +struct dyn_arch_ftrace { + /* No extra data needed for x86 */ +}; + +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f97621149839..74defe21ba42 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1190,7 +1190,7 @@ ENTRY(mcount) jnz trace #ifdef CONFIG_FUNCTION_RET_TRACER cmpl $ftrace_stub, ftrace_function_return - jnz trace_return + jnz ftrace_return_caller #endif .globl ftrace_stub ftrace_stub: @@ -1211,9 +1211,15 @@ trace: popl %ecx popl %eax jmp ftrace_stub +END(mcount) +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_RET_TRACER -trace_return: +ENTRY(ftrace_return_caller) + cmpl $0, function_trace_stop + jne ftrace_stub + pushl %eax pushl %ecx pushl %edx @@ -1223,7 +1229,8 @@ trace_return: popl %edx popl %ecx popl %eax - jmp ftrace_stub + ret +END(ftrace_return_caller) .globl return_to_handler return_to_handler: @@ -1237,10 +1244,7 @@ return_to_handler: popl %ecx popl %eax ret -#endif /* CONFIG_FUNCTION_RET_TRACER */ -END(mcount) -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FUNCTION_TRACER */ +#endif .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index fe832738e1e2..924153edd973 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -24,133 +24,6 @@ #include <asm/nmi.h> - -#ifdef CONFIG_FUNCTION_RET_TRACER - -/* - * These functions are picked from those used on - * this page for dynamic ftrace. They have been - * simplified to ignore all traces in NMI context. - */ -static atomic_t in_nmi; - -void ftrace_nmi_enter(void) -{ - atomic_inc(&in_nmi); -} - -void ftrace_nmi_exit(void) -{ - atomic_dec(&in_nmi); -} - -/* Add a function return address to the trace stack on thread info.*/ -static int push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func) -{ - int index; - struct thread_info *ti = current_thread_info(); - - /* The return trace stack is full */ - if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) - return -EBUSY; - - index = ++ti->curr_ret_stack; - ti->ret_stack[index].ret = ret; - ti->ret_stack[index].func = func; - ti->ret_stack[index].calltime = time; - - return 0; -} - -/* Retrieve a function return address to the trace stack on thread info.*/ -static void pop_return_trace(unsigned long *ret, unsigned long long *time, - unsigned long *func) -{ - int index; - - struct thread_info *ti = current_thread_info(); - index = ti->curr_ret_stack; - *ret = ti->ret_stack[index].ret; - *func = ti->ret_stack[index].func; - *time = ti->ret_stack[index].calltime; - ti->curr_ret_stack--; -} - -/* - * Send the trace to the ring-buffer. - * @return the original return address. - */ -unsigned long ftrace_return_to_handler(void) -{ - struct ftrace_retfunc trace; - pop_return_trace(&trace.ret, &trace.calltime, &trace.func); - trace.rettime = cpu_clock(raw_smp_processor_id()); - ftrace_function_return(&trace); - - return trace.ret; -} - -/* - * Hook the return address and push it in the stack of return addrs - * in current thread info. - */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) -{ - unsigned long old; - unsigned long long calltime; - int faulted; - unsigned long return_hooker = (unsigned long) - &return_to_handler; - - /* Nmi's are currently unsupported */ - if (atomic_read(&in_nmi)) - return; - - /* - * Protect against fault, even if it shouldn't - * happen. This tool is too much intrusive to - * ignore such a protection. - */ - asm volatile( - "1: movl (%[parent_old]), %[old]\n" - "2: movl %[return_hooker], (%[parent_replaced])\n" - " movl $0, %[faulted]\n" - - ".section .fixup, \"ax\"\n" - "3: movl $1, %[faulted]\n" - ".previous\n" - - ".section __ex_table, \"a\"\n" - " .long 1b, 3b\n" - " .long 2b, 3b\n" - ".previous\n" - - : [parent_replaced] "=r" (parent), [old] "=r" (old), - [faulted] "=r" (faulted) - : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) - : "memory" - ); - - if (WARN_ON(faulted)) { - unregister_ftrace_return(); - return; - } - - if (WARN_ON(!__kernel_text_address(old))) { - unregister_ftrace_return(); - *parent = old; - return; - } - - calltime = cpu_clock(raw_smp_processor_id()); - - if (push_return_trace(old, calltime, self_addr) == -EBUSY) - *parent = old; -} - -#endif - #ifdef CONFIG_DYNAMIC_FTRACE union ftrace_code_union { @@ -166,7 +39,7 @@ static int ftrace_calc_offset(long ip, long addr) return (int)(addr - ip); } -unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -311,12 +184,12 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; -unsigned char *ftrace_nop_replace(void) +static unsigned char *ftrace_nop_replace(void) { return ftrace_nop; } -int +static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -349,6 +222,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return 0; } +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_call_replace(ip, addr); + new = ftrace_nop_replace(); + + return ftrace_modify_code(rec->ip, old, new); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + + return ftrace_modify_code(rec->ip, old, new); +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -426,3 +322,133 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } #endif + +#ifdef CONFIG_FUNCTION_RET_TRACER + +#ifndef CONFIG_DYNAMIC_FTRACE + +/* + * These functions are picked from those used on + * this page for dynamic ftrace. They have been + * simplified to ignore all traces in NMI context. + */ +static atomic_t in_nmi; + +void ftrace_nmi_enter(void) +{ + atomic_inc(&in_nmi); +} + +void ftrace_nmi_exit(void) +{ + atomic_dec(&in_nmi); +} +#endif /* !CONFIG_DYNAMIC_FTRACE */ + +/* Add a function return address to the trace stack on thread info.*/ +static int push_return_trace(unsigned long ret, unsigned long long time, + unsigned long func) +{ + int index; + struct thread_info *ti = current_thread_info(); + + /* The return trace stack is full */ + if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) + return -EBUSY; + + index = ++ti->curr_ret_stack; + barrier(); + ti->ret_stack[index].ret = ret; + ti->ret_stack[index].func = func; + ti->ret_stack[index].calltime = time; + + return 0; +} + +/* Retrieve a function return address to the trace stack on thread info.*/ +static void pop_return_trace(unsigned long *ret, unsigned long long *time, + unsigned long *func) +{ + int index; + + struct thread_info *ti = current_thread_info(); + index = ti->curr_ret_stack; + *ret = ti->ret_stack[index].ret; + *func = ti->ret_stack[index].func; + *time = ti->ret_stack[index].calltime; + ti->curr_ret_stack--; +} + +/* + * Send the trace to the ring-buffer. + * @return the original return address. + */ +unsigned long ftrace_return_to_handler(void) +{ + struct ftrace_retfunc trace; + pop_return_trace(&trace.ret, &trace.calltime, &trace.func); + trace.rettime = cpu_clock(raw_smp_processor_id()); + ftrace_function_return(&trace); + + return trace.ret; +} + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +{ + unsigned long old; + unsigned long long calltime; + int faulted; + unsigned long return_hooker = (unsigned long) + &return_to_handler; + + /* Nmi's are currently unsupported */ + if (atomic_read(&in_nmi)) + return; + + /* + * Protect against fault, even if it shouldn't + * happen. This tool is too much intrusive to + * ignore such a protection. + */ + asm volatile( + "1: movl (%[parent_old]), %[old]\n" + "2: movl %[return_hooker], (%[parent_replaced])\n" + " movl $0, %[faulted]\n" + + ".section .fixup, \"ax\"\n" + "3: movl $1, %[faulted]\n" + ".previous\n" + + ".section __ex_table, \"a\"\n" + " .long 1b, 3b\n" + " .long 2b, 3b\n" + ".previous\n" + + : [parent_replaced] "=r" (parent), [old] "=r" (old), + [faulted] "=r" (faulted) + : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : "memory" + ); + + if (WARN_ON(faulted)) { + unregister_ftrace_return(); + return; + } + + if (WARN_ON(!__kernel_text_address(old))) { + unregister_ftrace_return(); + *parent = old; + return; + } + + calltime = cpu_clock(raw_smp_processor_id()); + + if (push_return_trace(old, calltime, self_addr) == -EBUSY) + *parent = old; +} + +#endif /* CONFIG_FUNCTION_RET_TRACER */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a5e4ed9baec8..3b46ae464933 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -71,6 +71,7 @@ VMLINUX_SYMBOL(__start___markers) = .; \ *(__markers) \ VMLINUX_SYMBOL(__stop___markers) = .; \ + . = ALIGN(32); \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ VMLINUX_SYMBOL(__stop___tracepoints) = .; \ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4fbc4a8b86a5..f1af1aab00e6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -25,6 +25,17 @@ struct ftrace_ops { extern int function_trace_stop; +/* + * Type of the current tracing. + */ +enum ftrace_tracing_type_t { + FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */ + FTRACE_TYPE_RETURN, /* Hook the return of the function */ +}; + +/* Current tracing type, default is FTRACE_TYPE_ENTER */ +extern enum ftrace_tracing_type_t ftrace_tracing_type; + /** * ftrace_stop - stop function tracer. * @@ -74,6 +85,9 @@ static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE +/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ +#include <asm/ftrace.h> + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -88,6 +102,7 @@ struct dyn_ftrace { struct list_head list; unsigned long ip; /* address of mcount call-site */ unsigned long flags; + struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); @@ -95,22 +110,43 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); -extern unsigned char *ftrace_nop_replace(void); -extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +#ifdef CONFIG_FUNCTION_RET_TRACER +extern void ftrace_return_caller(void); +#endif -/* May be defined in arch */ -extern int ftrace_arch_read_dyn_info(char *buf, int size); +/** + * ftrace_make_nop - convert code into top + * @mod: module structure if called by module load initialization + * @rec: the mcount call site record + * @addr: the address that the call site should be calling + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr); /** - * ftrace_modify_code - modify code segment - * @ip: the address of the code segment - * @old_code: the contents of what is expected to be there - * @new_code: the code to patch in + * ftrace_make_call - convert a nop call site into a call to addr + * @rec: the mcount call site record + * @addr: the address that the call site should call * * This is a very sensitive operation and great care needs * to be taken by the arch. The operation should carefully @@ -118,6 +154,8 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * what we expect it to be, and then on success of the compare, * it should write to the location. * + * The code segment at @rec->ip should be a nop + * * Return must be: * 0 on success * -EFAULT on error reading the location @@ -125,8 +163,11 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * -EPERM on error writing to the location * Any other value will be considered a failure. */ -extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code); +extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); + + +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); @@ -259,11 +300,13 @@ static inline void ftrace_dump(void) { } #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(unsigned long *start, unsigned long *end); +extern void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } static inline void -ftrace_init_module(unsigned long *start, unsigned long *end) { } +ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { } #endif @@ -281,7 +324,7 @@ struct ftrace_retfunc { /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); -extern void register_ftrace_return(trace_function_return_t func); +extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); diff --git a/include/linux/marker.h b/include/linux/marker.h index 4cf45472d9f5..34c14bc957f5 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -12,6 +12,7 @@ * See the file COPYING for more details. */ +#include <stdarg.h> #include <linux/types.h> struct module; @@ -48,10 +49,28 @@ struct marker { void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; + const char *tp_name; /* Optional tracepoint name */ + void *tp_cb; /* Optional tracepoint callback */ } __attribute__((aligned(8))); #ifdef CONFIG_MARKERS +#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ + static const char __mstrtab_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name "\0" format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ + NULL, tp_name_str, tp_cb } + +#define DEFINE_MARKER(name, format) \ + _DEFINE_MARKER(name, NULL, NULL, format) + +#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ + _DEFINE_MARKER(name, #tp_name, tp_cb, format) + /* * Note : the empty asm volatile with read constraint is used here instead of a * "used" attribute to fix a gcc 4.1.x bug. @@ -65,14 +84,7 @@ struct marker { */ #define __trace_mark(generic, name, call_private, format, args...) \ do { \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL }; \ + DEFINE_MARKER(name, format); \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ @@ -80,14 +92,39 @@ struct marker { } \ } while (0) +#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ + do { \ + void __check_tp_type(void) \ + { \ + register_trace_##tp_name(tp_cb); \ + } \ + DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ + __mark_check_format(format, ## args); \ + (*__mark_##name.call)(&__mark_##name, call_private, \ + ## args); \ + } while (0) + extern void marker_update_probe_range(struct marker *begin, struct marker *end); + +#define GET_MARKER(name) (__mark_##name) + #else /* !CONFIG_MARKERS */ +#define DEFINE_MARKER(name, tp_name, tp_cb, format) #define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) +#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ + do { \ + void __check_tp_type(void) \ + { \ + register_trace_##tp_name(tp_cb); \ + } \ + __mark_check_format(format, ## args); \ + } while (0) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) { } +#define GET_MARKER(name) #endif /* CONFIG_MARKERS */ /** @@ -117,6 +154,20 @@ static inline void marker_update_probe_range(struct marker *begin, __trace_mark(1, name, NULL, format, ## args) /** + * trace_mark_tp - Marker in a tracepoint callback + * @name: marker name, not quoted. + * @tp_name: tracepoint name, not quoted. + * @tp_cb: tracepoint callback. Should have an associated global symbol so it + * is not optimized away by the compiler (should not be static). + * @format: format string + * @args...: variable argument list + * + * Places a marker in a tracepoint callback. + */ +#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ + __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) + +/** * MARK_NOARGS - Format string for a marker with no argument. */ #define MARK_NOARGS " " diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 86f1f5e43e33..895dc9c1088c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -142,6 +142,7 @@ struct rcu_head { * on the write-side to insure proper synchronization. */ #define rcu_read_lock_sched() preempt_disable() +#define rcu_read_lock_sched_notrace() preempt_disable_notrace() /* * rcu_read_unlock_sched - marks the end of a RCU-classic critical section @@ -149,6 +150,7 @@ struct rcu_head { * See rcu_read_lock_sched for more information. */ #define rcu_read_unlock_sched() preempt_enable() +#define rcu_read_unlock_sched_notrace() preempt_enable_notrace() diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63064e9403f2..757005458366 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -24,8 +24,12 @@ struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ void **funcs; -} __attribute__((aligned(8))); - +} __attribute__((aligned(32))); /* + * Aligned on 32 bytes because it is + * globally visible and gcc happily + * align these on the structure size. + * Keep in sync with vmlinux.lds.h. + */ #define TPPROTO(args...) args #define TPARGS(args...) args @@ -40,14 +44,14 @@ struct tracepoint { do { \ void **it_func; \ \ - rcu_read_lock_sched(); \ + rcu_read_lock_sched_notrace(); \ it_func = rcu_dereference((tp)->funcs); \ if (it_func) { \ do { \ ((void(*)(proto))(*it_func))(args); \ } while (*(++it_func)); \ } \ - rcu_read_unlock_sched(); \ + rcu_read_unlock_sched_notrace(); \ } while (0) /* @@ -55,35 +59,40 @@ struct tracepoint { * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ + extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - static const char __tpstrtab_##name[] \ - __attribute__((section("__tracepoints_strings"))) \ - = #name ":" #proto; \ - static struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"), aligned(8))) = \ - { __tpstrtab_##name, 0, NULL }; \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ TPPROTO(proto), TPARGS(args)); \ } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_register(#name ":" #proto, \ - (void *)probe); \ + return tracepoint_probe_register(#name, (void *)probe); \ } \ - static inline void unregister_trace_##name(void (*probe)(proto))\ + static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - tracepoint_probe_unregister(#name ":" #proto, \ - (void *)probe); \ + return tracepoint_probe_unregister(#name, (void *)probe);\ } +#define DEFINE_TRACE(name) \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) = #name; \ + struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"), aligned(32))) = \ + { __tpstrtab_##name, 0, NULL } + +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ + EXPORT_SYMBOL_GPL(__tracepoint_##name) +#define EXPORT_TRACEPOINT_SYMBOL(name) \ + EXPORT_SYMBOL(__tracepoint_##name) + extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -92,8 +101,14 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, { \ return -ENOSYS; \ } \ - static inline void unregister_trace_##name(void (*probe)(proto))\ - { } + static inline int unregister_trace_##name(void (*probe)(proto)) \ + { \ + return -ENOSYS; \ + } + +#define DEFINE_TRACE(name) +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) +#define EXPORT_TRACEPOINT_SYMBOL(name) static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) diff --git a/include/trace/sched.h b/include/trace/sched.h index ad47369d01b5..9b2854abf7e2 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -4,52 +4,52 @@ #include <linux/sched.h> #include <linux/tracepoint.h> -DEFINE_TRACE(sched_kthread_stop, +DECLARE_TRACE(sched_kthread_stop, TPPROTO(struct task_struct *t), TPARGS(t)); -DEFINE_TRACE(sched_kthread_stop_ret, +DECLARE_TRACE(sched_kthread_stop_ret, TPPROTO(int ret), TPARGS(ret)); -DEFINE_TRACE(sched_wait_task, +DECLARE_TRACE(sched_wait_task, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup, +DECLARE_TRACE(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup_new, +DECLARE_TRACE(sched_wakeup_new, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_switch, +DECLARE_TRACE(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next)); -DEFINE_TRACE(sched_migrate_task, +DECLARE_TRACE(sched_migrate_task, TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), TPARGS(rq, p, dest_cpu)); -DEFINE_TRACE(sched_process_free, +DECLARE_TRACE(sched_process_free, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_exit, +DECLARE_TRACE(sched_process_exit, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_wait, +DECLARE_TRACE(sched_process_wait, TPPROTO(struct pid *pid), TPARGS(pid)); -DEFINE_TRACE(sched_process_fork, +DECLARE_TRACE(sched_process_fork, TPPROTO(struct task_struct *parent, struct task_struct *child), TPARGS(parent, child)); -DEFINE_TRACE(sched_signal_send, +DECLARE_TRACE(sched_signal_send, TPPROTO(int sig, struct task_struct *p), TPARGS(sig, p)); diff --git a/init/Kconfig b/init/Kconfig index 86b00c53fade..f5bacb438711 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -808,6 +808,7 @@ config TRACEPOINTS config MARKERS bool "Activate markers" + depends on TRACEPOINTS help Place an empty function call at each marker site. Can be dynamically changed for a probe function. diff --git a/kernel/exit.c b/kernel/exit.c index ae2b92be5fae..f995d2418668 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -54,6 +54,10 @@ #include <asm/pgtable.h> #include <asm/mmu_context.h> +DEFINE_TRACE(sched_process_free); +DEFINE_TRACE(sched_process_exit); +DEFINE_TRACE(sched_process_wait); + static void exit_mm(struct task_struct * tsk); static inline int task_detached(struct task_struct *p) diff --git a/kernel/fork.c b/kernel/fork.c index f6083561dfe0..0837d0deee5f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -79,6 +79,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ +DEFINE_TRACE(sched_process_fork); + int nr_processes(void) { int cpu; diff --git a/kernel/kthread.c b/kernel/kthread.c index 8e7a7ce3ed0a..4fbc456f393d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; +DEFINE_TRACE(sched_kthread_stop); +DEFINE_TRACE(sched_kthread_stop_ret); + struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ diff --git a/kernel/marker.c b/kernel/marker.c index 2898b647d415..ea54f2647868 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -81,7 +81,7 @@ struct marker_entry { * though the function pointer change and the marker enabling are two distinct * operations that modifies the execution flow of preemptible code. */ -void __mark_empty_function(void *probe_private, void *call_private, +notrace void __mark_empty_function(void *probe_private, void *call_private, const char *fmt, va_list *args) { } @@ -97,7 +97,8 @@ EXPORT_SYMBOL_GPL(__mark_empty_function); * need to put a full smp_rmb() in this branch. This is why we do not use * rcu_dereference() for the pointer read. */ -void marker_probe_cb(const struct marker *mdata, void *call_private, ...) +notrace void marker_probe_cb(const struct marker *mdata, + void *call_private, ...) { va_list args; char ptype; @@ -107,7 +108,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...) * sure the teardown of the callbacks can be done correctly when they * are in modules and they insure RCU read coherency. */ - rcu_read_lock_sched(); + rcu_read_lock_sched_notrace(); ptype = mdata->ptype; if (likely(!ptype)) { marker_probe_func *func; @@ -145,7 +146,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...) va_end(args); } } - rcu_read_unlock_sched(); + rcu_read_unlock_sched_notrace(); } EXPORT_SYMBOL_GPL(marker_probe_cb); @@ -157,12 +158,13 @@ EXPORT_SYMBOL_GPL(marker_probe_cb); * * Should be connected to markers "MARK_NOARGS". */ -static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) +static notrace void marker_probe_cb_noarg(const struct marker *mdata, + void *call_private, ...) { va_list args; /* not initialized */ char ptype; - rcu_read_lock_sched(); + rcu_read_lock_sched_notrace(); ptype = mdata->ptype; if (likely(!ptype)) { marker_probe_func *func; @@ -195,7 +197,7 @@ static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private multi[i].func(multi[i].probe_private, call_private, mdata->format, &args); } - rcu_read_unlock_sched(); + rcu_read_unlock_sched_notrace(); } static void free_old_closure(struct rcu_head *head) @@ -477,7 +479,7 @@ static int marker_set_format(struct marker_entry *entry, const char *format) static int set_marker(struct marker_entry *entry, struct marker *elem, int active) { - int ret; + int ret = 0; WARN_ON(strcmp(entry->name, elem->name) != 0); if (entry->format) { @@ -529,9 +531,40 @@ static int set_marker(struct marker_entry *entry, struct marker *elem, */ smp_wmb(); elem->ptype = entry->ptype; + + if (elem->tp_name && (active ^ elem->state)) { + WARN_ON(!elem->tp_cb); + /* + * It is ok to directly call the probe registration because type + * checking has been done in the __trace_mark_tp() macro. + */ + + if (active) { + /* + * try_module_get should always succeed because we hold + * lock_module() to get the tp_cb address. + */ + ret = try_module_get(__module_text_address( + (unsigned long)elem->tp_cb)); + BUG_ON(!ret); + ret = tracepoint_probe_register_noupdate( + elem->tp_name, + elem->tp_cb); + } else { + ret = tracepoint_probe_unregister_noupdate( + elem->tp_name, + elem->tp_cb); + /* + * tracepoint_probe_update_all() must be called + * before the module containing tp_cb is unloaded. + */ + module_put(__module_text_address( + (unsigned long)elem->tp_cb)); + } + } elem->state = active; - return 0; + return ret; } /* @@ -542,7 +575,24 @@ static int set_marker(struct marker_entry *entry, struct marker *elem, */ static void disable_marker(struct marker *elem) { + int ret; + /* leave "call" as is. It is known statically. */ + if (elem->tp_name && elem->state) { + WARN_ON(!elem->tp_cb); + /* + * It is ok to directly call the probe registration because type + * checking has been done in the __trace_mark_tp() macro. + */ + ret = tracepoint_probe_unregister_noupdate(elem->tp_name, + elem->tp_cb); + WARN_ON(ret); + /* + * tracepoint_probe_update_all() must be called + * before the module containing tp_cb is unloaded. + */ + module_put(__module_text_address((unsigned long)elem->tp_cb)); + } elem->state = 0; elem->single.func = __mark_empty_function; /* Update the function before setting the ptype */ @@ -606,6 +656,7 @@ static void marker_update_probes(void) marker_update_probe_range(__start___markers, __stop___markers); /* Markers in modules. */ module_update_markers(); + tracepoint_probe_update_all(); } /** @@ -653,10 +704,11 @@ int marker_probe_register(const char *name, const char *format, goto end; } mutex_unlock(&markers_mutex); - marker_update_probes(); /* may update entry */ + marker_update_probes(); mutex_lock(&markers_mutex); entry = get_marker(name); - WARN_ON(!entry); + if (!entry) + goto end; if (entry->rcu_pending) rcu_barrier_sched(); entry->oldptr = old; @@ -697,7 +749,7 @@ int marker_probe_unregister(const char *name, rcu_barrier_sched(); old = marker_entry_remove_probe(entry, probe, probe_private); mutex_unlock(&markers_mutex); - marker_update_probes(); /* may update entry */ + marker_update_probes(); mutex_lock(&markers_mutex); entry = get_marker(name); if (!entry) @@ -778,10 +830,11 @@ int marker_probe_unregister_private_data(marker_probe_func *probe, rcu_barrier_sched(); old = marker_entry_remove_probe(entry, NULL, probe_private); mutex_unlock(&markers_mutex); - marker_update_probes(); /* may update entry */ + marker_update_probes(); mutex_lock(&markers_mutex); entry = get_marker_from_private_data(probe, probe_private); - WARN_ON(!entry); + if (!entry) + goto end; if (entry->rcu_pending) rcu_barrier_sched(); entry->oldptr = old; @@ -842,3 +895,36 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe, return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(marker_get_private_data); + +#ifdef CONFIG_MODULES + +int marker_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + switch (val) { + case MODULE_STATE_COMING: + marker_update_probe_range(mod->markers, + mod->markers + mod->num_markers); + break; + case MODULE_STATE_GOING: + marker_update_probe_range(mod->markers, + mod->markers + mod->num_markers); + break; + } + return 0; +} + +struct notifier_block marker_module_nb = { + .notifier_call = marker_module_notify, + .priority = 0, +}; + +static int init_markers(void) +{ + return register_module_notifier(&marker_module_nb); +} +__initcall(init_markers); + +#endif /* CONFIG_MODULES */ diff --git a/kernel/module.c b/kernel/module.c index 1f4cc00e0c20..89bcf7c1327d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2184,24 +2184,15 @@ static noinline struct module *load_module(void __user *umod, struct mod_debug *debug; unsigned int num_debug; -#ifdef CONFIG_MARKERS - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); -#endif debug = section_objs(hdr, sechdrs, secstrings, "__verbose", sizeof(*debug), &num_debug); dynamic_printk_setup(debug, num_debug); - -#ifdef CONFIG_TRACEPOINTS - tracepoint_update_probe_range(mod->tracepoints, - mod->tracepoints + mod->num_tracepoints); -#endif } /* sechdrs[0].sh_size is always zero */ mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", sizeof(*mseg), &num_mcount); - ftrace_init_module(mseg, mseg + num_mcount); + ftrace_init_module(mod, mseg, mseg + num_mcount); err = module_finalize(hdr, sechdrs, mod); if (err < 0) diff --git a/kernel/profile.c b/kernel/profile.c index 9830a037d8db..5b7d1ac7124c 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = { }; #ifdef CONFIG_SMP -static void __init profile_nop(void *unused) +static inline void profile_nop(void *unused) { } diff --git a/kernel/sched.c b/kernel/sched.c index 50a21f964679..327f91c63c99 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -118,6 +118,12 @@ */ #define RUNTIME_INF ((u64)~0ULL) +DEFINE_TRACE(sched_wait_task); +DEFINE_TRACE(sched_wakeup); +DEFINE_TRACE(sched_wakeup_new); +DEFINE_TRACE(sched_switch); +DEFINE_TRACE(sched_migrate_task); + #ifdef CONFIG_SMP /* * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) diff --git a/kernel/signal.c b/kernel/signal.c index 4530fc654455..e9afe63da24b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -41,6 +41,8 @@ static struct kmem_cache *sigqueue_cachep; +DEFINE_TRACE(sched_signal_send); + static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9c89526b6b7c..b8378fad29a3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -59,7 +59,6 @@ config FUNCTION_TRACER config FUNCTION_RET_TRACER bool "Kernel Function return Tracer" - depends on !DYNAMIC_FTRACE depends on HAVE_FUNCTION_RET_TRACER depends on FUNCTION_TRACER help diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 54cb9a7d15e5..f212da486689 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -50,6 +50,9 @@ static int last_ftrace_enabled; /* Quick disabling of function tracer. */ int function_trace_stop; +/* By default, current tracing type is normal tracing. */ +enum ftrace_tracing_type_t ftrace_tracing_type = FTRACE_TYPE_ENTER; + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -334,7 +337,7 @@ ftrace_record_ip(unsigned long ip) { struct dyn_ftrace *rec; - if (!ftrace_enabled || ftrace_disabled) + if (ftrace_disabled) return NULL; rec = ftrace_alloc_dyn_node(ip); @@ -348,107 +351,138 @@ ftrace_record_ip(unsigned long ip) return rec; } -#define FTRACE_ADDR ((long)(ftrace_caller)) +static void print_ip_ins(const char *fmt, unsigned char *p) +{ + int i; + + printk(KERN_CONT "%s", fmt); + + for (i = 0; i < MCOUNT_INSN_SIZE; i++) + printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); +} + +static void ftrace_bug(int failed, unsigned long ip) +{ + switch (failed) { + case -EFAULT: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace faulted on modifying "); + print_ip_sym(ip); + break; + case -EINVAL: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace failed to modify "); + print_ip_sym(ip); + print_ip_ins(" actual: ", (unsigned char *)ip); + printk(KERN_CONT "\n"); + break; + case -EPERM: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace faulted on writing "); + print_ip_sym(ip); + break; + default: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace faulted on unknown error "); + print_ip_sym(ip); + } +} + static int -__ftrace_replace_code(struct dyn_ftrace *rec, - unsigned char *old, unsigned char *new, int enable) +__ftrace_replace_code(struct dyn_ftrace *rec, int enable) { unsigned long ip, fl; + unsigned long ftrace_addr; + +#ifdef CONFIG_FUNCTION_RET_TRACER + if (ftrace_tracing_type == FTRACE_TYPE_ENTER) + ftrace_addr = (unsigned long)ftrace_caller; + else + ftrace_addr = (unsigned long)ftrace_return_caller; +#else + ftrace_addr = (unsigned long)ftrace_caller; +#endif ip = rec->ip; - if (ftrace_filtered && enable) { + /* + * If this record is not to be traced and + * it is not enabled then do nothing. + * + * If this record is not to be traced and + * it is enabled then disabled it. + * + */ + if (rec->flags & FTRACE_FL_NOTRACE) { + if (rec->flags & FTRACE_FL_ENABLED) + rec->flags &= ~FTRACE_FL_ENABLED; + else + return 0; + + } else if (ftrace_filtered && enable) { /* - * If filtering is on: - * - * If this record is set to be filtered and - * is enabled then do nothing. - * - * If this record is set to be filtered and - * it is not enabled, enable it. - * - * If this record is not set to be filtered - * and it is not enabled do nothing. - * - * If this record is set not to trace then - * do nothing. - * - * If this record is set not to trace and - * it is enabled then disable it. - * - * If this record is not set to be filtered and - * it is enabled, disable it. + * Filtering is on: */ - fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | - FTRACE_FL_ENABLED); + fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED); - if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || - (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || - !fl || (fl == FTRACE_FL_NOTRACE)) + /* Record is filtered and enabled, do nothing */ + if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) return 0; - /* - * If it is enabled disable it, - * otherwise enable it! - */ - if (fl & FTRACE_FL_ENABLED) { - /* swap new and old */ - new = old; - old = ftrace_call_replace(ip, FTRACE_ADDR); + /* Record is not filtered and is not enabled do nothing */ + if (!fl) + return 0; + + /* Record is not filtered but enabled, disable it */ + if (fl == FTRACE_FL_ENABLED) rec->flags &= ~FTRACE_FL_ENABLED; - } else { - new = ftrace_call_replace(ip, FTRACE_ADDR); + else + /* Otherwise record is filtered but not enabled, enable it */ rec->flags |= FTRACE_FL_ENABLED; - } } else { + /* Disable or not filtered */ if (enable) { - /* - * If this record is set not to trace and is - * not enabled, do nothing. - */ - fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); - if (fl == FTRACE_FL_NOTRACE) - return 0; - - new = ftrace_call_replace(ip, FTRACE_ADDR); - } else - old = ftrace_call_replace(ip, FTRACE_ADDR); - - if (enable) { + /* if record is enabled, do nothing */ if (rec->flags & FTRACE_FL_ENABLED) return 0; + rec->flags |= FTRACE_FL_ENABLED; + } else { + + /* if record is not enabled do nothing */ if (!(rec->flags & FTRACE_FL_ENABLED)) return 0; + rec->flags &= ~FTRACE_FL_ENABLED; } } - return ftrace_modify_code(ip, old, new); + if (rec->flags & FTRACE_FL_ENABLED) + return ftrace_make_call(rec, ftrace_addr); + else + return ftrace_make_nop(NULL, rec, ftrace_addr); } static void ftrace_replace_code(int enable) { int i, failed; - unsigned char *new = NULL, *old = NULL; struct dyn_ftrace *rec; struct ftrace_page *pg; - if (enable) - old = ftrace_nop_replace(); - else - new = ftrace_nop_replace(); - for (pg = ftrace_pages_start; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { rec = &pg->records[i]; - /* don't modify code that has already faulted */ - if (rec->flags & FTRACE_FL_FAILED) + /* + * Skip over free records and records that have + * failed. + */ + if (rec->flags & FTRACE_FL_FREE || + rec->flags & FTRACE_FL_FAILED) continue; /* ignore updates to this record's mcount site */ @@ -459,68 +493,30 @@ static void ftrace_replace_code(int enable) unfreeze_record(rec); } - failed = __ftrace_replace_code(rec, old, new, enable); + failed = __ftrace_replace_code(rec, enable); if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { ftrace_free_rec(rec); - } + } else + ftrace_bug(failed, rec->ip); } } } } -static void print_ip_ins(const char *fmt, unsigned char *p) -{ - int i; - - printk(KERN_CONT "%s", fmt); - - for (i = 0; i < MCOUNT_INSN_SIZE; i++) - printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); -} - static int -ftrace_code_disable(struct dyn_ftrace *rec) +ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) { unsigned long ip; - unsigned char *nop, *call; int ret; ip = rec->ip; - nop = ftrace_nop_replace(); - call = ftrace_call_replace(ip, mcount_addr); - - ret = ftrace_modify_code(ip, call, nop); + ret = ftrace_make_nop(mod, rec, mcount_addr); if (ret) { - switch (ret) { - case -EFAULT: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace faulted on modifying "); - print_ip_sym(ip); - break; - case -EINVAL: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace failed to modify "); - print_ip_sym(ip); - print_ip_ins(" expected: ", call); - print_ip_ins(" actual: ", (unsigned char *)ip); - print_ip_ins(" replace: ", nop); - printk(KERN_CONT "\n"); - break; - case -EPERM: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace faulted on writing "); - print_ip_sym(ip); - break; - default: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace faulted on unknown error "); - print_ip_sym(ip); - } - + ftrace_bug(ret, ip); rec->flags |= FTRACE_FL_FAILED; return 0; } @@ -560,8 +556,7 @@ static void ftrace_startup(void) mutex_lock(&ftrace_start_lock); ftrace_start_up++; - if (ftrace_start_up == 1) - command |= FTRACE_ENABLE_CALLS; + command |= FTRACE_ENABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { saved_ftrace_func = ftrace_trace_function; @@ -639,7 +634,7 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ftrace_update_code(void) +static int ftrace_update_code(struct module *mod) { struct dyn_ftrace *p, *t; cycle_t start, stop; @@ -656,7 +651,7 @@ static int ftrace_update_code(void) list_del_init(&p->list); /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(p)) { + if (ftrace_code_disable(mod, p)) { p->flags |= FTRACE_FL_CONVERTED; ftrace_update_cnt++; } else @@ -699,7 +694,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) cnt = num_to_init / ENTRIES_PER_PAGE; pr_info("ftrace: allocating %ld entries in %d pages\n", - num_to_init, cnt); + num_to_init, cnt + 1); for (i = 0; i < cnt; i++) { pg->next = (void *)get_zeroed_page(GFP_KERNEL); @@ -782,13 +777,11 @@ static void *t_start(struct seq_file *m, loff_t *pos) void *p = NULL; loff_t l = -1; - if (*pos != iter->pos) { - for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) - ; - } else { - l = *pos; - p = t_next(m, p, &l); - } + if (*pos > iter->pos) + *pos = iter->pos; + + l = *pos; + p = t_next(m, p, &l); return p; } @@ -799,15 +792,21 @@ static void t_stop(struct seq_file *m, void *p) static int t_show(struct seq_file *m, void *v) { + struct ftrace_iterator *iter = m->private; struct dyn_ftrace *rec = v; char str[KSYM_SYMBOL_LEN]; + int ret = 0; if (!rec) return 0; kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); - seq_printf(m, "%s\n", str); + ret = seq_printf(m, "%s\n", str); + if (ret < 0) { + iter->pos--; + iter->idx--; + } return 0; } @@ -833,7 +832,7 @@ ftrace_avail_open(struct inode *inode, struct file *file) return -ENOMEM; iter->pg = ftrace_pages_start; - iter->pos = -1; + iter->pos = 0; ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { @@ -920,7 +919,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable) if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; - iter->pos = -1; + iter->pos = 0; iter->flags = enable ? FTRACE_ITER_FILTER : FTRACE_ITER_NOTRACE; @@ -1211,7 +1210,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftrace_start_lock); - if (iter->filtered && ftrace_start_up && ftrace_enabled) + if (ftrace_start_up && ftrace_enabled) ftrace_run_update_code(FTRACE_ENABLE_CALLS); mutex_unlock(&ftrace_start_lock); mutex_unlock(&ftrace_sysctl_lock); @@ -1298,7 +1297,8 @@ static __init int ftrace_init_debugfs(void) fs_initcall(ftrace_init_debugfs); -static int ftrace_convert_nops(unsigned long *start, +static int ftrace_convert_nops(struct module *mod, + unsigned long *start, unsigned long *end) { unsigned long *p; @@ -1309,23 +1309,32 @@ static int ftrace_convert_nops(unsigned long *start, p = start; while (p < end) { addr = ftrace_call_adjust(*p++); + /* + * Some architecture linkers will pad between + * the different mcount_loc sections of different + * object files to satisfy alignments. + * Skip any NULL pointers. + */ + if (!addr) + continue; ftrace_record_ip(addr); } /* disable interrupts to prevent kstop machine */ local_irq_save(flags); - ftrace_update_code(); + ftrace_update_code(mod); local_irq_restore(flags); mutex_unlock(&ftrace_start_lock); return 0; } -void ftrace_init_module(unsigned long *start, unsigned long *end) +void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { if (ftrace_disabled || start == end) return; - ftrace_convert_nops(start, end); + ftrace_convert_nops(mod, start, end); } extern unsigned long __start_mcount_loc[]; @@ -1355,7 +1364,8 @@ void __init ftrace_init(void) last_ftrace_enabled = ftrace_enabled = 1; - ret = ftrace_convert_nops(__start_mcount_loc, + ret = ftrace_convert_nops(NULL, + __start_mcount_loc, __stop_mcount_loc); return; @@ -1411,10 +1421,17 @@ int register_ftrace_function(struct ftrace_ops *ops) return -1; mutex_lock(&ftrace_sysctl_lock); + + if (ftrace_tracing_type == FTRACE_TYPE_RETURN) { + ret = -EBUSY; + goto out; + } + ret = __register_ftrace_function(ops); ftrace_startup(); - mutex_unlock(&ftrace_sysctl_lock); +out: + mutex_unlock(&ftrace_sysctl_lock); return ret; } @@ -1480,16 +1497,45 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, } #ifdef CONFIG_FUNCTION_RET_TRACER + +/* The callback that hooks the return of a function */ trace_function_return_t ftrace_function_return = (trace_function_return_t)ftrace_stub; -void register_ftrace_return(trace_function_return_t func) + +int register_ftrace_return(trace_function_return_t func) { + int ret = 0; + + mutex_lock(&ftrace_sysctl_lock); + + /* + * Don't launch return tracing if normal function + * tracing is already running. + */ + if (ftrace_trace_function != ftrace_stub) { + ret = -EBUSY; + goto out; + } + + ftrace_tracing_type = FTRACE_TYPE_RETURN; ftrace_function_return = func; + ftrace_startup(); + +out: + mutex_unlock(&ftrace_sysctl_lock); + return ret; } void unregister_ftrace_return(void) { + mutex_lock(&ftrace_sysctl_lock); + ftrace_function_return = (trace_function_return_t)ftrace_stub; + ftrace_shutdown(); + /* Restore normal tracing type */ + ftrace_tracing_type = FTRACE_TYPE_ENTER; + + mutex_unlock(&ftrace_sysctl_lock); } #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b04923b72ce0..2596b5a968c4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1049,7 +1049,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) * Need to use raw, since this must be called before the * recursive protection is performed. */ - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1060,7 +1060,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } #ifdef CONFIG_FUNCTION_RET_TRACER @@ -2636,8 +2636,11 @@ static int tracing_set_tracer(char *buf) current_trace->reset(tr); current_trace = t; - if (t->init) - t->init(tr); + if (t->init) { + ret = t->init(tr); + if (ret) + goto out; + } trace_branch_enable(tr); out: @@ -2653,6 +2656,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, char buf[max_tracer_type_len+1]; int i; size_t ret; + int err; + + ret = cnt; if (cnt > max_tracer_type_len) cnt = max_tracer_type_len; @@ -2666,12 +2672,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) buf[i] = 0; - ret = tracing_set_tracer(buf); - if (!ret) - ret = cnt; + err = tracing_set_tracer(buf); + if (err) + return err; - if (ret > 0) - filp->f_pos += ret; + filp->f_pos += ret; return ret; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b41d7b4c2cae..37947f6b92bf 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -264,7 +264,8 @@ enum print_line_t { */ struct tracer { const char *name; - void (*init)(struct trace_array *tr); + /* Your tracer should raise a warning if init fails */ + int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index cb333b7fd113..a4fa2c57e34e 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -47,7 +47,7 @@ static void reset_boot_trace(struct trace_array *tr) tracing_reset(tr, cpu); } -static void boot_trace_init(struct trace_array *tr) +static int boot_trace_init(struct trace_array *tr) { int cpu; boot_trace = tr; @@ -56,6 +56,7 @@ static void boot_trace_init(struct trace_array *tr) tracing_reset(tr, cpu); tracing_sched_switch_assign_trace(tr); + return 0; } static enum print_line_t diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 2511e32572ca..23f9b02ce967 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -125,7 +125,7 @@ static void stop_branch_trace(struct trace_array *tr) disable_branch_tracing(); } -static void branch_trace_init(struct trace_array *tr) +static int branch_trace_init(struct trace_array *tr) { int cpu; @@ -133,6 +133,7 @@ static void branch_trace_init(struct trace_array *tr) tracing_reset(tr, cpu); start_branch_trace(tr); + return 0; } static void branch_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8693b7a0a5b2..e74f6d0a3216 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -42,9 +42,10 @@ static void stop_function_trace(struct trace_array *tr) tracing_stop_cmdline_record(); } -static void function_trace_init(struct trace_array *tr) +static int function_trace_init(struct trace_array *tr) { start_function_trace(tr); + return 0; } static void function_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c index 7680b21537dd..a68564af022b 100644 --- a/kernel/trace/trace_functions_return.c +++ b/kernel/trace/trace_functions_return.c @@ -14,28 +14,18 @@ #include "trace.h" -static void start_return_trace(struct trace_array *tr) -{ - register_ftrace_return(&trace_function_return); -} - -static void stop_return_trace(struct trace_array *tr) -{ - unregister_ftrace_return(); -} - -static void return_trace_init(struct trace_array *tr) +static int return_trace_init(struct trace_array *tr) { int cpu; for_each_online_cpu(cpu) tracing_reset(tr, cpu); - start_return_trace(tr); + return register_ftrace_return(&trace_function_return); } static void return_trace_reset(struct trace_array *tr) { - stop_return_trace(tr); + unregister_ftrace_return(); } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index d919d4eaa7cc..7c2e326bbc8b 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -416,11 +416,12 @@ static void irqsoff_tracer_close(struct trace_iterator *iter) } #ifdef CONFIG_IRQSOFF_TRACER -static void irqsoff_tracer_init(struct trace_array *tr) +static int irqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF; __irqsoff_tracer_init(tr); + return 0; } static struct tracer irqsoff_tracer __read_mostly = { @@ -442,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly = #endif #ifdef CONFIG_PREEMPT_TRACER -static void preemptoff_tracer_init(struct trace_array *tr) +static int preemptoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_PREEMPT_OFF; __irqsoff_tracer_init(tr); + return 0; } static struct tracer preemptoff_tracer __read_mostly = @@ -471,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly = #if defined(CONFIG_IRQSOFF_TRACER) && \ defined(CONFIG_PREEMPT_TRACER) -static void preemptirqsoff_tracer_init(struct trace_array *tr) +static int preemptirqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; __irqsoff_tracer_init(tr); + return 0; } static struct tracer preemptirqsoff_tracer __read_mostly = diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 51bcf370215e..433d650eda9f 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -30,13 +30,14 @@ static void mmio_reset_data(struct trace_array *tr) tracing_reset(tr, cpu); } -static void mmio_trace_init(struct trace_array *tr) +static int mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; mmio_reset_data(tr); enable_mmiotrace(); + return 0; } static void mmio_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 2ef1d227e7d8..0e77415caed3 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -24,7 +24,7 @@ static void stop_nop_trace(struct trace_array *tr) /* Nothing to do! */ } -static void nop_trace_init(struct trace_array *tr) +static int nop_trace_init(struct trace_array *tr) { int cpu; ctx_trace = tr; @@ -33,6 +33,7 @@ static void nop_trace_init(struct trace_array *tr) tracing_reset(tr, cpu); start_nop_trace(tr); + return 0; } static void nop_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index be35bdfe2e38..863390557b44 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -206,10 +206,11 @@ static void stop_sched_trace(struct trace_array *tr) tracing_stop_sched_switch_record(); } -static void sched_switch_trace_init(struct trace_array *tr) +static int sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; start_sched_trace(tr); + return 0; } static void sched_switch_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 983f2b1478c9..0067b49746c1 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -331,10 +331,11 @@ static void stop_wakeup_tracer(struct trace_array *tr) unregister_trace_sched_wakeup(probe_wakeup); } -static void wakeup_tracer_init(struct trace_array *tr) +static int wakeup_tracer_init(struct trace_array *tr) { wakeup_trace = tr; start_wakeup_tracer(tr); + return 0; } static void wakeup_tracer_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 24e6e075e6d6..88c8eb70f54a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -52,7 +52,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) int cpu, ret = 0; /* Don't allow flipping of max traces now */ - raw_local_irq_save(flags); + local_irq_save(flags); __raw_spin_lock(&ftrace_max_lock); cnt = ring_buffer_entries(tr->buffer); @@ -63,7 +63,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) break; } __raw_spin_unlock(&ftrace_max_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); if (count) *count = cnt; @@ -71,6 +71,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) return ret; } +static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) +{ + printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n", + trace->name, init_ret); +} #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE @@ -111,7 +116,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_set_filter(func_name, strlen(func_name), 1); /* enable tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } /* Sleep for a 1/10 of a second */ msleep(100); @@ -181,7 +190,12 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 1; tracer_enabled = 1; - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ @@ -223,7 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* reset the max latency */ tracing_max_latency = 0; /* disable interrupts for a bit */ @@ -272,7 +291,12 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) } /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* reset the max latency */ tracing_max_latency = 0; /* disable preemption for a bit */ @@ -321,7 +345,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * } /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } /* reset the max latency */ tracing_max_latency = 0; @@ -449,7 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) wait_for_completion(&isrt); /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* reset the max latency */ tracing_max_latency = 0; @@ -505,7 +538,12 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ @@ -532,7 +570,12 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return 0; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ @@ -554,7 +597,12 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 05f753422aea..54960edb96d0 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -261,11 +261,12 @@ static void stop_stack_trace(struct trace_array *tr) mutex_unlock(&sample_timer_lock); } -static void stack_trace_init(struct trace_array *tr) +static int stack_trace_init(struct trace_array *tr) { sysprof_trace = tr; start_stack_trace(tr); + return 0; } static void stack_trace_reset(struct trace_array *tr) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e96590f17de1..79602740bbb5 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -262,6 +262,7 @@ static void set_tracepoint(struct tracepoint_entry **entry, static void disable_tracepoint(struct tracepoint *elem) { elem->state = 0; + rcu_assign_pointer(elem->funcs, NULL); } /** @@ -540,3 +541,36 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter) iter->tracepoint = NULL; } EXPORT_SYMBOL_GPL(tracepoint_iter_reset); + +#ifdef CONFIG_MODULES + +int tracepoint_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + switch (val) { + case MODULE_STATE_COMING: + tracepoint_update_probe_range(mod->tracepoints, + mod->tracepoints + mod->num_tracepoints); + break; + case MODULE_STATE_GOING: + tracepoint_update_probe_range(mod->tracepoints, + mod->tracepoints + mod->num_tracepoints); + break; + } + return 0; +} + +struct notifier_block tracepoint_module_nb = { + .notifier_call = tracepoint_module_notify, + .priority = 0, +}; + +static int init_tracepoints(void) +{ + return register_module_notifier(&tracepoint_module_nb); +} +__initcall(init_tracepoints); + +#endif /* CONFIG_MODULES */ diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h index 0216b55bd640..01724e04c556 100644 --- a/samples/tracepoints/tp-samples-trace.h +++ b/samples/tracepoints/tp-samples-trace.h @@ -4,10 +4,10 @@ #include <linux/proc_fs.h> /* for struct inode and struct file */ #include <linux/tracepoint.h> -DEFINE_TRACE(subsys_event, +DECLARE_TRACE(subsys_event, TPPROTO(struct inode *inode, struct file *file), TPARGS(inode, file)); -DEFINE_TRACE(subsys_eventb, +DECLARE_TRACE(subsys_eventb, TPPROTO(void), TPARGS()); #endif diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c index 55abfdda4bd4..e3a964889dc7 100644 --- a/samples/tracepoints/tracepoint-probe-sample.c +++ b/samples/tracepoints/tracepoint-probe-sample.c @@ -46,6 +46,7 @@ void __exit tp_sample_trace_exit(void) { unregister_trace_subsys_eventb(probe_subsys_eventb); unregister_trace_subsys_event(probe_subsys_event); + tracepoint_synchronize_unregister(); } module_exit(tp_sample_trace_exit); diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c index 5e9fcf4afffe..685a5acb4562 100644 --- a/samples/tracepoints/tracepoint-probe-sample2.c +++ b/samples/tracepoints/tracepoint-probe-sample2.c @@ -33,6 +33,7 @@ module_init(tp_sample_trace_init); void __exit tp_sample_trace_exit(void) { unregister_trace_subsys_event(probe_subsys_event); + tracepoint_synchronize_unregister(); } module_exit(tp_sample_trace_exit); diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c index 4ae4b7fcc043..00d169792a3e 100644 --- a/samples/tracepoints/tracepoint-sample.c +++ b/samples/tracepoints/tracepoint-sample.c @@ -13,6 +13,9 @@ #include <linux/proc_fs.h> #include "tp-samples-trace.h" +DEFINE_TRACE(subsys_event); +DEFINE_TRACE(subsys_eventb); + struct proc_dir_entry *pentry_example; static int my_open(struct inode *inode, struct file *file) |