From 5168ae50a66e3ff7184c2b16d661bd6d70367e50 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Jun 2010 09:36:50 -0400 Subject: tracing: Remove ftrace_preempt_disable/enable The ftrace_preempt_disable/enable functions were to address a recursive race caused by the function tracer. The function tracer traces all functions which makes it easily susceptible to recursion. One area was preempt_enable(). This would call the scheduler and the schedulre would call the function tracer and loop. (So was it thought). The ftrace_preempt_disable/enable was made to protect against recursion inside the scheduler by storing the NEED_RESCHED flag. If it was set before the ftrace_preempt_disable() it would not call schedule on ftrace_preempt_enable(), thinking that if it was set before then it would have already scheduled unless it was already in the scheduler. This worked fine except in the case of SMP, where another task would set the NEED_RESCHED flag for a task on another CPU, and then kick off an IPI to trigger it. This could cause the NEED_RESCHED to be saved at ftrace_preempt_disable() but the IPI to arrive in the the preempt disabled section. The ftrace_preempt_enable() would not call the scheduler because the flag was already set before entring the section. This bug would cause a missed preemption check and cause lower latencies. Investigating further, I found that the recusion caused by the function tracer was not due to schedule(), but due to preempt_schedule(). Now that preempt_schedule is completely annotated with notrace, the recusion no longer is an issue. Reported-by: Thomas Gleixner Signed-off-by: Steven Rostedt --- kernel/trace/trace_sched_wakeup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace_sched_wakeup.c') diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0e73bc2ef8c5..c9fd5bd02036 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -46,7 +46,6 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; long disabled; - int resched; int cpu; int pc; @@ -54,7 +53,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) return; pc = preempt_count(); - resched = ftrace_preempt_disable(); + preempt_disable_notrace(); cpu = raw_smp_processor_id(); if (cpu != wakeup_current_cpu) @@ -74,7 +73,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) out: atomic_dec(&data->disabled); out_enable: - ftrace_preempt_enable(resched); + preempt_enable_notrace(); } static struct ftrace_ops trace_ops __read_mostly = -- cgit v1.2.3 From ef710e100c1068d3dd5774d2b34c5485219e06ce Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 1 Jul 2010 14:34:35 +0900 Subject: tracing: Shrink max latency ringbuffer if unnecessary Documentation/trace/ftrace.txt says buffer_size_kb: This sets or displays the number of kilobytes each CPU buffer can hold. The tracer buffers are the same size for each CPU. The displayed number is the size of the CPU buffer and not total size of all buffers. The trace buffers are allocated in pages (blocks of memory that the kernel uses for allocation, usually 4 KB in size). If the last page allocated has room for more bytes than requested, the rest of the page will be used, making the actual allocation bigger than requested. ( Note, the size may not be a multiple of the page size due to buffer management overhead. ) This can only be updated when the current_tracer is set to "nop". But it's incorrect. currently total memory consumption is 'buffer_size_kb x CPUs x 2'. Why two times difference is there? because ftrace implicitly allocate the buffer for max latency too. That makes sad result when admin want to use large buffer. (If admin want full logging and makes detail analysis). example, If admin have 24 CPUs machine and write 200MB to buffer_size_kb, the system consume ~10GB memory (200MB x 24 x 2). umm.. 5GB memory waste is usually unacceptable. Fortunatelly, almost all users don't use max latency feature. The max latency buffer can be disabled easily. This patch shrink buffer size of the max latency buffer if unnecessary. Signed-off-by: KOSAKI Motohiro LKML-Reference: <20100701104554.DA2D.A69D9226@jp.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 38 ++++++++++++++++++++++++++++++++------ kernel/trace/trace.h | 1 + kernel/trace/trace_irqsoff.c | 3 +++ kernel/trace/trace_sched_wakeup.c | 2 ++ 4 files changed, 38 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace_sched_wakeup.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index af9042977c08..f7488f44d26b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -660,6 +660,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); + if (!current_trace->use_max_tr) { + WARN_ON_ONCE(1); + return; + } arch_spin_lock(&ftrace_max_lock); tr->buffer = max_tr.buffer; @@ -686,6 +690,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); + if (!current_trace->use_max_tr) { + WARN_ON_ONCE(1); + return; + } + arch_spin_lock(&ftrace_max_lock); ftrace_disable_cpu(); @@ -2801,6 +2810,9 @@ static int tracing_resize_ring_buffer(unsigned long size) if (ret < 0) return ret; + if (!current_trace->use_max_tr) + goto out; + ret = ring_buffer_resize(max_tr.buffer, size); if (ret < 0) { int r; @@ -2828,11 +2840,14 @@ static int tracing_resize_ring_buffer(unsigned long size) return ret; } + max_tr.entries = size; + out: global_trace.entries = size; return ret; } + /** * tracing_update_buffers - used by tracing facility to expand ring buffers * @@ -2893,12 +2908,26 @@ static int tracing_set_tracer(const char *buf) trace_branch_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); - + if (current_trace && current_trace->use_max_tr) { + /* + * We don't free the ring buffer. instead, resize it because + * The max_tr ring buffer has some state (e.g. ring->clock) and + * we want preserve it. + */ + ring_buffer_resize(max_tr.buffer, 1); + max_tr.entries = 1; + } destroy_trace_option_files(topts); current_trace = t; topts = create_trace_option_files(current_trace); + if (current_trace->use_max_tr) { + ret = ring_buffer_resize(max_tr.buffer, global_trace.entries); + if (ret < 0) + goto out; + max_tr.entries = global_trace.entries; + } if (t->init) { ret = tracer_init(t, tr); @@ -3480,7 +3509,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, } tracing_start(); - max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); return cnt; @@ -4578,16 +4606,14 @@ __init static int tracer_alloc_buffers(void) #ifdef CONFIG_TRACER_MAX_TRACE - max_tr.buffer = ring_buffer_alloc(ring_buf_size, - TRACE_BUFFER_FLAGS); + max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS); if (!max_tr.buffer) { printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); WARN_ON(1); ring_buffer_free(global_trace.buffer); goto out_free_cpumask; } - max_tr.entries = ring_buffer_size(max_tr.buffer); - WARN_ON(max_tr.entries != global_trace.entries); + max_tr.entries = 1; #endif /* Allocate the first page for all buffers */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7778f067fc8b..cb629b3b108c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -276,6 +276,7 @@ struct tracer { struct tracer *next; int print_max; struct tracer_flags *flags; + int use_max_tr; }; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 6fd486e0cef4..73a6b0601f2e 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -649,6 +649,7 @@ static struct tracer irqsoff_tracer __read_mostly = #endif .open = irqsoff_trace_open, .close = irqsoff_trace_close, + .use_max_tr = 1, }; # define register_irqsoff(trace) register_tracer(&trace) #else @@ -681,6 +682,7 @@ static struct tracer preemptoff_tracer __read_mostly = #endif .open = irqsoff_trace_open, .close = irqsoff_trace_close, + .use_max_tr = 1, }; # define register_preemptoff(trace) register_tracer(&trace) #else @@ -715,6 +717,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly = #endif .open = irqsoff_trace_open, .close = irqsoff_trace_close, + .use_max_tr = 1, }; # define register_preemptirqsoff(trace) register_tracer(&trace) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index c9fd5bd02036..4086eae6e81b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -382,6 +382,7 @@ static struct tracer wakeup_tracer __read_mostly = #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, #endif + .use_max_tr = 1, }; static struct tracer wakeup_rt_tracer __read_mostly = @@ -396,6 +397,7 @@ static struct tracer wakeup_rt_tracer __read_mostly = #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, #endif + .use_max_tr = 1, }; __init static int init_wakeup_tracer(void) -- cgit v1.2.3