From d1e7e02f30be672c6f6ee40908be83877a0d49d1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 2 Apr 2009 15:16:56 +0800 Subject: tracing: disable seeking for trace_pipe_raw Impact: disable pread() We set tracing_buffers_fops.llseek to no_llseek, but we can still perform pread() to read this file. That is not expected. This fix uses nonseekable_open() to disable it. tracing_buffers_fops.llseek is still set to no_llseek, it mark this file is a "non-seekable device" and is used by sys_splice(). See also do_splice() or manual of splice(2): ERRORS EINVAL Target file system doesn't support splicing; neither of the descriptors refers to a pipe; or offset given for non-seekable device. Signed-off-by: Lai Jiangshan Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49D46668.8030806@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9d28476a9851..24b0168f1a29 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3285,7 +3285,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) filp->private_data = info; - return 0; + return nonseekable_open(inode, filp); out: kfree(info); -- cgit v1.2.1 From ddd538f3e6a1a4bec2f6942f83a753263e6577b4 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 2 Apr 2009 15:16:59 +0800 Subject: tracing: allocate page when needed Impact: Cleanup Sometimes, we open trace_pipe_raw, but we don't read(2) it, we just splice(2) it, thus, the page is not used. Signed-off-by: Lai Jiangshan Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49D4666B.4010608@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 24b0168f1a29..8e189ffb899a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3277,19 +3277,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) info->tr = &global_trace; info->cpu = cpu; - info->spare = ring_buffer_alloc_read_page(info->tr->buffer); + info->spare = NULL; /* Force reading ring buffer for first read */ info->read = (unsigned int)-1; - if (!info->spare) - goto out; filp->private_data = info; return nonseekable_open(inode, filp); - - out: - kfree(info); - return -ENOMEM; } static ssize_t @@ -3304,6 +3298,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (!count) return 0; + if (!info->spare) + info->spare = ring_buffer_alloc_read_page(info->tr->buffer); + if (!info->spare) + return -ENOMEM; + /* Do we have previous read data to read? */ if (info->read < PAGE_SIZE) goto read; @@ -3342,7 +3341,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) { struct ftrace_buffer_info *info = file->private_data; - ring_buffer_free_read_page(info->tr->buffer, info->spare); + if (info->spare) + ring_buffer_free_read_page(info->tr->buffer, info->spare); kfree(info); return 0; -- cgit v1.2.1 From c7625a555f55d7ae49236cde551786c88f5a5ce1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 2 Apr 2009 15:17:04 +0800 Subject: tracing: update file->f_pos when splice(2) it Impact: Cleanup These two lines: if (unlikely(*ppos)) return -ESPIPE; in tracing_buffers_splice_read() are not needed, VFS layer has disabled seek(2). We remove these two lines, and then we can update file->f_pos. And tracing_buffers_read() updates file->f_pos, this fix make tracing_buffers_splice_read() updates file->f_pos too. Signed-off-by: Lai Jiangshan Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49D46670.4010503@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8e189ffb899a..94629760dabf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3428,13 +3428,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, int size, i; size_t ret; - /* - * We can't seek on a buffer input - */ - if (unlikely(*ppos)) - return -ESPIPE; - - for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { struct page *page; int r; @@ -3474,6 +3467,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.partial[i].offset = 0; spd.partial[i].private = (unsigned long)ref; spd.nr_pages++; + *ppos += size; } spd.nr_pages = i; -- cgit v1.2.1 From 93cfb3c9fd83d877a8f1ffad9ff862b617b32828 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 2 Apr 2009 15:17:08 +0800 Subject: tracing: fix splice return too large I got these from strace: splice(0x3, 0, 0x5, 0, 0x1000, 0x1) = 12288 splice(0x3, 0, 0x5, 0, 0x1000, 0x1) = 12288 splice(0x3, 0, 0x5, 0, 0x1000, 0x1) = 12288 splice(0x3, 0, 0x5, 0, 0x1000, 0x1) = 16384 splice(0x3, 0, 0x5, 0, 0x1000, 0x1) = 8192 splice(0x3, 0, 0x5, 0, 0x1000, 0x1) = 8192 splice(0x3, 0, 0x5, 0, 0x1000, 0x1) = 8192 I wanted to splice_read 4096 bytes, but it returns 8192 or larger. It is because the return value of tracing_buffers_splice_read() does not include "zero out any left over data" bytes. But tracing_buffers_read() includes these bytes, we make them consistent. Signed-off-by: Lai Jiangshan Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49D46674.9030804@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 94629760dabf..1ce5dc6372b8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3428,7 +3428,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, int size, i; size_t ret; - for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { + if (*ppos & (PAGE_SIZE - 1)) { + WARN_ONCE(1, "Ftrace: previous read must page-align\n"); + return -EINVAL; + } + + if (len & (PAGE_SIZE - 1)) { + WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); + if (len < PAGE_SIZE) + return -EINVAL; + len &= PAGE_MASK; + } + + for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) { struct page *page; int r; @@ -3467,7 +3479,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.partial[i].offset = 0; spd.partial[i].private = (unsigned long)ref; spd.nr_pages++; - *ppos += size; + *ppos += PAGE_SIZE; } spd.nr_pages = i; -- cgit v1.2.1 From 4d1f4372dbea068ba4ee3d98231133a4a4ee15bd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 10 Apr 2009 08:48:36 +0800 Subject: tracing: fix document references When moving documents to Documentation/trace/, I forgot to grep Kconfig to find out those references. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Pekka Enberg Cc: Pekka Paalanen Cc: eduard.munteanu@linux360.ro LKML-Reference: <49DE97EF.7080208@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2246141bda4d..417d1985e299 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -312,7 +312,7 @@ config KMEMTRACE and profile kernel code. This requires an userspace application to use. See - Documentation/vm/kmemtrace.txt for more information. + Documentation/trace/kmemtrace.txt for more information. Saying Y will make the kernel somewhat larger and slower. However, if you disable kmemtrace at run-time or boot-time, the performance @@ -403,7 +403,7 @@ config MMIOTRACE implementation and works via page faults. Tracing is disabled by default and can be enabled at run-time. - See Documentation/tracers/mmiotrace.txt. + See Documentation/trace/mmiotrace.txt. If you are not helping to develop drivers, say N. config MMIOTRACE_TEST -- cgit v1.2.1 From 0462b5664b2bda5a18fef7efb5bb32ce36590c1a Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Wed, 8 Apr 2009 17:00:13 +0800 Subject: ftrace: Output REC->var instead of __entry->var for trace format print fmt: "irq=%d return=%s", __entry->irq, __entry->ret ? \"handled\" : \"unhandled\" "__entry" should be convert to "REC" by __stringify() macro. Signed-off-by: Zhao Lei Acked-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49DC679D.2090901@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events_stage_2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 30743f7d4110..d363c6672c6c 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -105,10 +105,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ return 0; #undef __entry -#define __entry "REC" +#define __entry REC #undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) #undef TP_fast_assign #define TP_fast_assign(args...) args -- cgit v1.2.1 From 8433a40eb7f2c4883ad57f9900f63e4d59240eb7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 11 Apr 2009 15:52:18 +0800 Subject: tracing/filters: NIL-terminate user input filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure messages from user space are NIL-terminated strings, otherwise we could dump random memory while reading filter file. Try this: # echo 'parent_comm ==' > events/sched/sched_process_fork/filter # cat events/sched/sched_process_fork/filter parent_comm == � Signed-off-by: Li Zefan Acked-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49E04C32.6060508@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 64ec4d278ffb..054bc1802bcd 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -503,6 +503,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; + buf[cnt] = '\0'; pred = kzalloc(sizeof(*pred), GFP_KERNEL); if (!pred) @@ -569,6 +570,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; + buf[cnt] = '\0'; pred = kzalloc(sizeof(*pred), GFP_KERNEL); if (!pred) -- cgit v1.2.1 From bcabd91c271e50eebc0cb9220ac92700332b452e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 11 Apr 2009 15:52:35 +0800 Subject: tracing/filters: fix NULL pointer dereference Try this, and you'll see NULL pointer dereference bug: # echo -n 'parent_comm ==' > sched/sched_process_fork/filter Because we passed NULL ptr to simple_strtoull(). Signed-off-by: Li Zefan Acked-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49E04C43.1050504@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events_filter.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 026be412f356..9d2162fd2305 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -410,6 +410,11 @@ int filter_parse(char **pbuf, struct filter_pred *pred) } } + if (!val_str) { + pred->field_name = NULL; + return -EINVAL; + } + pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); if (!pred->field_name) return -ENOMEM; -- cgit v1.2.1 From a3e0ab050774117d4a6173087c8bf3888662a83f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 11 Apr 2009 15:52:51 +0800 Subject: tracing/filters: allow user input integer to be oct or hex Before patch: # echo 'parent_pid == 0x10' > events/sched/sched_process_fork/filter # cat sched/sched_process_fork/filter parent_pid == 0 After patch: # cat sched/sched_process_fork/filter parent_pid == 16 Also check the input more strictly. Signed-off-by: Li Zefan Acked-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49E04C53.4010600@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events_filter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9d2162fd2305..49b3ef54ec46 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -419,12 +419,13 @@ int filter_parse(char **pbuf, struct filter_pred *pred) if (!pred->field_name) return -ENOMEM; - pred->val = simple_strtoull(val_str, &tmp, 10); + pred->val = simple_strtoull(val_str, &tmp, 0); if (tmp == val_str) { pred->str_val = kstrdup(val_str, GFP_KERNEL); if (!pred->str_val) return -ENOMEM; - } + } else if (*tmp != '\0') + return -EINVAL; return 0; } -- cgit v1.2.1 From 44e9c8b7adc52079f0535f9de0c2c2477831389b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 11 Apr 2009 15:55:28 +0800 Subject: tracing/filters: return proper error code when writing filter file - propagate return value of filter_add_pred() to the user - return -ENOSPC but not -ENOMEM or -EINVAL when the filter array is full Signed-off-by: Li Zefan Acked-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49E04CF0.3010105@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 10 ++++++---- kernel/trace/trace_events_filter.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 054bc1802bcd..576f4fa2af0d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -521,9 +521,10 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } - if (filter_add_pred(call, pred)) { + err = filter_add_pred(call, pred); + if (err < 0) { filter_free_pred(pred); - return -EINVAL; + return err; } *ppos += cnt; @@ -588,10 +589,11 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } - if (filter_add_subsystem_pred(system, pred)) { + err = filter_add_subsystem_pred(system, pred); + if (err < 0) { filter_free_subsystem_preds(system); filter_free_pred(pred); - return -EINVAL; + return err; } *ppos += cnt; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 49b3ef54ec46..e03cbf1e38f3 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -215,7 +215,7 @@ static int __filter_add_pred(struct ftrace_event_call *call, } } - return -ENOMEM; + return -ENOSPC; } static int is_string_field(const char *type) @@ -319,7 +319,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system, } if (i == MAX_FILTER_PRED) - return -EINVAL; + return -ENOSPC; events_for_each(call) { int err; -- cgit v1.2.1