diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/extable.c | 15 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 6 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 473 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 68 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace.h | 1 | ||||
-rw-r--r-- | kernel/trace/trace_clock.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 19 | ||||
-rw-r--r-- | kernel/trace/trace_output.h | 33 | ||||
-rw-r--r-- | kernel/trace/trace_stat.c | 47 | ||||
-rw-r--r-- | kernel/trace/trace_workqueue.c | 12 |
13 files changed, 399 insertions, 300 deletions
diff --git a/kernel/extable.c b/kernel/extable.c index 0df6253730be..b54a6017b6b5 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -15,11 +15,22 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/ftrace.h> +#include <linux/memory.h> #include <linux/module.h> +#include <linux/mutex.h> #include <linux/init.h> -#include <linux/ftrace.h> -#include <asm/uaccess.h> + #include <asm/sections.h> +#include <asm/uaccess.h> + +/* + * mutex protecting text section modification (dynamic code patching). + * some users need to sleep (allocating memory...) while they hold this lock. + * + * NOT exported to modules - patching kernel text is a really delicate matter. + */ +DEFINE_MUTEX(text_mutex); extern struct exception_table_entry __start___ex_table[]; extern struct exception_table_entry __stop___ex_table[]; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b0a46f889659..8a4d72931042 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -63,7 +63,11 @@ config TRACING # config TRACING_SUPPORT bool - depends on TRACE_IRQFLAGS_SUPPORT + # PPC32 has no irqflags tracing support, but it can use most of the + # tracers anyway, they were tested to build and work. Note that new + # exceptions to this list aren't welcomed, better implement the + # irqflags tracing for your architecture. + depends on TRACE_IRQFLAGS_SUPPORT || PPC32 depends on STACKTRACE_SUPPORT default y diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b171778e3863..947c5b3f90c4 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -30,7 +30,7 @@ static unsigned int blktrace_seq __read_mostly = 1; static struct trace_array *blk_tr; -static int __read_mostly blk_tracer_enabled; +static bool blk_tracer_enabled __read_mostly; /* Select an alternative, minimalistic output than the original one */ #define TRACE_BLK_OPT_CLASSIC 0x1 @@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = { }; /* Global reference count of probes */ -static DEFINE_MUTEX(blk_probe_mutex); static atomic_t blk_probes_ref = ATOMIC_INIT(0); -static int blk_register_tracepoints(void); +static void blk_register_tracepoints(void); static void blk_unregister_tracepoints(void); /* @@ -60,22 +59,39 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, const void *data, size_t len) { struct blk_io_trace *t; + struct ring_buffer_event *event = NULL; + int pc = 0; + int cpu = smp_processor_id(); + bool blk_tracer = blk_tracer_enabled; + + if (blk_tracer) { + pc = preempt_count(); + event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, + sizeof(*t) + len, + 0, pc); + if (!event) + return; + t = ring_buffer_event_data(event); + goto record_it; + } if (!bt->rchan) return; t = relay_reserve(bt->rchan, sizeof(*t) + len); if (t) { - const int cpu = smp_processor_id(); - t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; t->time = ktime_to_ns(ktime_get()); +record_it: t->device = bt->dev; t->action = action; t->pid = pid; t->cpu = cpu; t->pdu_len = len; memcpy((void *) t + sizeof(*t), data, len); + + if (blk_tracer) + trace_buffer_unlock_commit(blk_tr, event, 0, pc); } } @@ -111,14 +127,8 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) unsigned long flags; char *buf; - if (blk_tr) { - va_start(args, fmt); - ftrace_vprintk(fmt, args); - va_end(args); - return; - } - - if (!bt->msg_data) + if (unlikely(bt->trace_state != Blktrace_running && + !blk_tracer_enabled)) return; local_irq_save(flags); @@ -148,8 +158,8 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, /* * Data direction bit lookup */ -static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), - BLK_TC_ACT(BLK_TC_WRITE) }; +static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), + BLK_TC_ACT(BLK_TC_WRITE) }; /* The ilog2() calls fall out because they're constant */ #define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ @@ -169,9 +179,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, unsigned long *sequence; pid_t pid; int cpu, pc = 0; + bool blk_tracer = blk_tracer_enabled; - if (unlikely(bt->trace_state != Blktrace_running || - !blk_tracer_enabled)) + if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) return; what |= ddir_act[rw & WRITE]; @@ -186,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, return; cpu = raw_smp_processor_id(); - if (blk_tr) { + if (blk_tracer) { tracing_record_cmdline(current); pc = preempt_count(); @@ -236,7 +246,7 @@ record_it: if (pdu_len) memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); - if (blk_tr) { + if (blk_tracer) { trace_buffer_unlock_commit(blk_tr, event, 0, pc); return; } @@ -248,7 +258,7 @@ record_it: static struct dentry *blk_tree_root; static DEFINE_MUTEX(blk_tree_mutex); -static void blk_trace_cleanup(struct blk_trace *bt) +static void blk_trace_free(struct blk_trace *bt) { debugfs_remove(bt->msg_file); debugfs_remove(bt->dropped_file); @@ -256,10 +266,13 @@ static void blk_trace_cleanup(struct blk_trace *bt) free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); - mutex_lock(&blk_probe_mutex); +} + +static void blk_trace_cleanup(struct blk_trace *bt) +{ + blk_trace_free(bt); if (atomic_dec_and_test(&blk_probes_ref)) blk_unregister_tracepoints(); - mutex_unlock(&blk_probe_mutex); } int blk_trace_remove(struct request_queue *q) @@ -270,8 +283,7 @@ int blk_trace_remove(struct request_queue *q) if (!bt) return -EINVAL; - if (bt->trace_state == Blktrace_setup || - bt->trace_state == Blktrace_stopped) + if (bt->trace_state != Blktrace_running) blk_trace_cleanup(bt); return 0; @@ -414,11 +426,11 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (buts->name[i] == '/') buts->name[i] = '_'; - ret = -ENOMEM; bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) - goto err; + return -ENOMEM; + ret = -ENOMEM; bt->sequence = alloc_percpu(unsigned long); if (!bt->sequence) goto err; @@ -429,11 +441,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ret = -ENOENT; + mutex_lock(&blk_tree_mutex); if (!blk_tree_root) { blk_tree_root = debugfs_create_dir("block", NULL); - if (!blk_tree_root) - return -ENOMEM; + if (!blk_tree_root) { + mutex_unlock(&blk_tree_mutex); + goto err; + } } + mutex_unlock(&blk_tree_mutex); dir = debugfs_create_dir(buts->name, blk_tree_root); @@ -471,14 +487,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; - mutex_lock(&blk_probe_mutex); - if (atomic_add_return(1, &blk_probes_ref) == 1) { - ret = blk_register_tracepoints(); - if (ret) - goto probe_err; - } - mutex_unlock(&blk_probe_mutex); - ret = -EBUSY; old_bt = xchg(&q->blk_trace, bt); if (old_bt) { @@ -486,22 +494,12 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, goto err; } + if (atomic_inc_return(&blk_probes_ref) == 1) + blk_register_tracepoints(); + return 0; -probe_err: - atomic_dec(&blk_probes_ref); - mutex_unlock(&blk_probe_mutex); err: - if (bt) { - if (bt->msg_file) - debugfs_remove(bt->msg_file); - if (bt->dropped_file) - debugfs_remove(bt->dropped_file); - free_percpu(bt->sequence); - free_percpu(bt->msg_data); - if (bt->rchan) - relay_close(bt->rchan); - kfree(bt); - } + blk_trace_free(bt); return ret; } @@ -863,7 +861,7 @@ void blk_add_driver_data(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_add_driver_data); -static int blk_register_tracepoints(void) +static void blk_register_tracepoints(void) { int ret; @@ -901,7 +899,6 @@ static int blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_remap(blk_add_trace_remap); WARN_ON(ret); - return 0; } static void blk_unregister_tracepoints(void) @@ -934,25 +931,31 @@ static void blk_unregister_tracepoints(void) static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) { int i = 0; + int tc = t->action >> BLK_TC_SHIFT; + + if (t->action == BLK_TN_MESSAGE) { + rwbs[i++] = 'N'; + goto out; + } - if (t->action & BLK_TC_DISCARD) + if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; - else if (t->action & BLK_TC_WRITE) + else if (tc & BLK_TC_WRITE) rwbs[i++] = 'W'; else if (t->bytes) rwbs[i++] = 'R'; else rwbs[i++] = 'N'; - if (t->action & BLK_TC_AHEAD) + if (tc & BLK_TC_AHEAD) rwbs[i++] = 'A'; - if (t->action & BLK_TC_BARRIER) + if (tc & BLK_TC_BARRIER) rwbs[i++] = 'B'; - if (t->action & BLK_TC_SYNC) + if (tc & BLK_TC_SYNC) rwbs[i++] = 'S'; - if (t->action & BLK_TC_META) + if (tc & BLK_TC_META) rwbs[i++] = 'M'; - +out: rwbs[i] = '\0'; } @@ -979,7 +982,7 @@ static inline unsigned long long t_sector(const struct trace_entry *ent) static inline __u16 t_error(const struct trace_entry *ent) { - return te_blk_io_trace(ent)->sector; + return te_blk_io_trace(ent)->error; } static __u64 get_pdu_int(const struct trace_entry *ent) @@ -999,29 +1002,31 @@ static void get_pdu_remap(const struct trace_entry *ent, r->sector = be64_to_cpu(sector); } -static int blk_log_action_iter(struct trace_iterator *iter, const char *act) +typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); + +static int blk_log_action_classic(struct trace_iterator *iter, const char *act) { char rwbs[6]; - unsigned long long ts = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(ts, USEC_PER_SEC); + unsigned long long ts = iter->ts; + unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; - const struct trace_entry *ent = iter->ent; - const struct blk_io_trace *t = (const struct blk_io_trace *)ent; + const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); return trace_seq_printf(&iter->seq, - "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ", + "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", MAJOR(t->device), MINOR(t->device), iter->cpu, - secs, usec_rem, ent->pid, act, rwbs); + secs, nsec_rem, iter->ent->pid, act, rwbs); } -static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, - const char *act) +static int blk_log_action(struct trace_iterator *iter, const char *act) { char rwbs[6]; + const struct blk_io_trace *t = te_blk_io_trace(iter->ent); + fill_rwbs(rwbs, t); - return trace_seq_printf(s, "%3d,%-3d %2s %3s ", + return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", MAJOR(t->device), MINOR(t->device), act, rwbs); } @@ -1085,6 +1090,17 @@ static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) get_pdu_int(ent), cmd); } +static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) +{ + int ret; + const struct blk_io_trace *t = te_blk_io_trace(ent); + + ret = trace_seq_putmem(s, t + 1, t->pdu_len); + if (ret) + return trace_seq_putc(s, '\n'); + return ret; +} + /* * struct tracer operations */ @@ -1099,11 +1115,7 @@ static void blk_tracer_print_header(struct seq_file *m) static void blk_tracer_start(struct trace_array *tr) { - mutex_lock(&blk_probe_mutex); - if (atomic_add_return(1, &blk_probes_ref) == 1) - if (blk_register_tracepoints()) - atomic_dec(&blk_probes_ref); - mutex_unlock(&blk_probe_mutex); + blk_tracer_enabled = true; trace_flags &= ~TRACE_ITER_CONTEXT_INFO; } @@ -1111,38 +1123,24 @@ static int blk_tracer_init(struct trace_array *tr) { blk_tr = tr; blk_tracer_start(tr); - mutex_lock(&blk_probe_mutex); - blk_tracer_enabled++; - mutex_unlock(&blk_probe_mutex); return 0; } static void blk_tracer_stop(struct trace_array *tr) { + blk_tracer_enabled = false; trace_flags |= TRACE_ITER_CONTEXT_INFO; - mutex_lock(&blk_probe_mutex); - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); - mutex_unlock(&blk_probe_mutex); } static void blk_tracer_reset(struct trace_array *tr) { - if (!atomic_read(&blk_probes_ref)) - return; - - mutex_lock(&blk_probe_mutex); - blk_tracer_enabled--; - WARN_ON(blk_tracer_enabled < 0); - mutex_unlock(&blk_probe_mutex); - blk_tracer_stop(tr); } -static struct { +static const struct { const char *act[2]; int (*print)(struct trace_seq *s, const struct trace_entry *ent); -} what2act[] __read_mostly = { +} what2act[] = { [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, @@ -1160,29 +1158,48 @@ static struct { [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, }; -static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, - int flags) +static enum print_line_t print_one_line(struct trace_iterator *iter, + bool classic) { struct trace_seq *s = &iter->seq; - const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; - const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1); + const struct blk_io_trace *t; + u16 what; int ret; + bool long_act; + blk_log_action_t *log_action; - if (!trace_print_context(iter)) - return TRACE_TYPE_PARTIAL_LINE; + t = te_blk_io_trace(iter->ent); + what = t->action & ((1 << BLK_TC_SHIFT) - 1); + long_act = !!(trace_flags & TRACE_ITER_VERBOSE); + log_action = classic ? &blk_log_action_classic : &blk_log_action; - if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) + if (t->action == BLK_TN_MESSAGE) { + ret = log_action(iter, long_act ? "message" : "m"); + if (ret) + ret = blk_log_msg(s, iter->ent); + goto out; + } + + if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) ret = trace_seq_printf(s, "Bad pc action %x\n", what); else { - const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); - ret = blk_log_action_seq(s, t, what2act[what].act[long_act]); + ret = log_action(iter, what2act[what].act[long_act]); if (ret) ret = what2act[what].print(s, iter->ent); } - +out: return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; } +static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, + int flags) +{ + if (!trace_print_context(iter)) + return TRACE_TYPE_PARTIAL_LINE; + + return print_one_line(iter, false); +} + static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1190,7 +1207,7 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) const int offset = offsetof(struct blk_io_trace, sector); struct blk_io_trace old = { .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, - .time = ns2usecs(iter->ts), + .time = iter->ts, }; if (!trace_seq_putmem(s, &old, offset)) @@ -1208,26 +1225,10 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags) static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) { - const struct blk_io_trace *t; - u16 what; - int ret; - if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) return TRACE_TYPE_UNHANDLED; - t = (const struct blk_io_trace *)iter->ent; - what = t->action & ((1 << BLK_TC_SHIFT) - 1); - - if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) - ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what); - else { - const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); - ret = blk_log_action_iter(iter, what2act[what].act[long_act]); - if (ret) - ret = what2act[what].print(&iter->seq, iter->ent); - } - - return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; + return print_one_line(iter, true); } static struct tracer blk_tracer __read_mostly = { @@ -1273,7 +1274,10 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; - kfree(bt); + if (atomic_dec_and_test(&blk_probes_ref)) + blk_unregister_tracepoints(); + + blk_trace_free(bt); return 0; } @@ -1283,26 +1287,33 @@ static int blk_trace_remove_queue(struct request_queue *q) static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) { struct blk_trace *old_bt, *bt = NULL; - int ret; + int ret = -ENOMEM; - ret = -ENOMEM; bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) - goto err; + return -ENOMEM; + + bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); + if (!bt->msg_data) + goto free_bt; bt->dev = dev; bt->act_mask = (u16)-1; bt->end_lba = -1ULL; - bt->trace_state = Blktrace_running; old_bt = xchg(&q->blk_trace, bt); if (old_bt != NULL) { (void)xchg(&q->blk_trace, old_bt); - kfree(bt); ret = -EBUSY; + goto free_bt; } + + if (atomic_inc_return(&blk_probes_ref) == 1) + blk_register_tracepoints(); return 0; -err: + +free_bt: + blk_trace_free(bt); return ret; } @@ -1310,72 +1321,6 @@ err: * sysfs interface to enable and configure tracing */ -static ssize_t sysfs_blk_trace_enable_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hd_struct *p = dev_to_part(dev); - struct block_device *bdev; - ssize_t ret = -ENXIO; - - lock_kernel(); - bdev = bdget(part_devt(p)); - if (bdev != NULL) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q != NULL) { - mutex_lock(&bdev->bd_mutex); - ret = sprintf(buf, "%u\n", !!q->blk_trace); - mutex_unlock(&bdev->bd_mutex); - } - - bdput(bdev); - } - - unlock_kernel(); - return ret; -} - -static ssize_t sysfs_blk_trace_enable_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct block_device *bdev; - struct request_queue *q; - struct hd_struct *p; - int value; - ssize_t ret = -ENXIO; - - if (count == 0 || sscanf(buf, "%d", &value) != 1) - goto out; - - lock_kernel(); - p = dev_to_part(dev); - bdev = bdget(part_devt(p)); - if (bdev == NULL) - goto out_unlock_kernel; - - q = bdev_get_queue(bdev); - if (q == NULL) - goto out_bdput; - - mutex_lock(&bdev->bd_mutex); - if (value) - ret = blk_trace_setup_queue(q, bdev->bd_dev); - else - ret = blk_trace_remove_queue(q); - mutex_unlock(&bdev->bd_mutex); - - if (ret == 0) - ret = count; -out_bdput: - bdput(bdev); -out_unlock_kernel: - unlock_kernel(); -out: - return ret; -} - static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -1387,8 +1332,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, sysfs_blk_trace_attr_show, \ sysfs_blk_trace_attr_store) -static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, - sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store); +static BLK_TRACE_DEVICE_ATTR(enable); static BLK_TRACE_DEVICE_ATTR(act_mask); static BLK_TRACE_DEVICE_ATTR(pid); static BLK_TRACE_DEVICE_ATTR(start_lba); @@ -1408,53 +1352,85 @@ struct attribute_group blk_trace_attr_group = { .attrs = blk_trace_attrs, }; -static int blk_str2act_mask(const char *str) +static const struct { + int mask; + const char *str; +} mask_maps[] = { + { BLK_TC_READ, "read" }, + { BLK_TC_WRITE, "write" }, + { BLK_TC_BARRIER, "barrier" }, + { BLK_TC_SYNC, "sync" }, + { BLK_TC_QUEUE, "queue" }, + { BLK_TC_REQUEUE, "requeue" }, + { BLK_TC_ISSUE, "issue" }, + { BLK_TC_COMPLETE, "complete" }, + { BLK_TC_FS, "fs" }, + { BLK_TC_PC, "pc" }, + { BLK_TC_AHEAD, "ahead" }, + { BLK_TC_META, "meta" }, + { BLK_TC_DISCARD, "discard" }, + { BLK_TC_DRV_DATA, "drv_data" }, +}; + +static int blk_trace_str2mask(const char *str) { + int i; int mask = 0; - char *copy = kstrdup(str, GFP_KERNEL), *s; + char *s, *token; - if (copy == NULL) + s = kstrdup(str, GFP_KERNEL); + if (s == NULL) return -ENOMEM; - - s = strstrip(copy); + s = strstrip(s); while (1) { - char *sep = strchr(s, ','); - - if (sep != NULL) - *sep = '\0'; - - if (strcasecmp(s, "barrier") == 0) - mask |= BLK_TC_BARRIER; - else if (strcasecmp(s, "complete") == 0) - mask |= BLK_TC_COMPLETE; - else if (strcasecmp(s, "fs") == 0) - mask |= BLK_TC_FS; - else if (strcasecmp(s, "issue") == 0) - mask |= BLK_TC_ISSUE; - else if (strcasecmp(s, "pc") == 0) - mask |= BLK_TC_PC; - else if (strcasecmp(s, "queue") == 0) - mask |= BLK_TC_QUEUE; - else if (strcasecmp(s, "read") == 0) - mask |= BLK_TC_READ; - else if (strcasecmp(s, "requeue") == 0) - mask |= BLK_TC_REQUEUE; - else if (strcasecmp(s, "sync") == 0) - mask |= BLK_TC_SYNC; - else if (strcasecmp(s, "write") == 0) - mask |= BLK_TC_WRITE; - - if (sep == NULL) + token = strsep(&s, ","); + if (token == NULL) break; - s = sep + 1; + if (*token == '\0') + continue; + + for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { + if (strcasecmp(token, mask_maps[i].str) == 0) { + mask |= mask_maps[i].mask; + break; + } + } + if (i == ARRAY_SIZE(mask_maps)) { + mask = -EINVAL; + break; + } } - kfree(copy); + kfree(s); return mask; } +static ssize_t blk_trace_mask2str(char *buf, int mask) +{ + int i; + char *p = buf; + + for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { + if (mask & mask_maps[i].mask) { + p += sprintf(p, "%s%s", + (p == buf) ? "" : ",", mask_maps[i].str); + } + } + *p++ = '\n'; + + return p - buf; +} + +static struct request_queue *blk_trace_get_queue(struct block_device *bdev) +{ + if (bdev->bd_disk == NULL) + return NULL; + + return bdev_get_queue(bdev); +} + static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1469,20 +1445,29 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, if (bdev == NULL) goto out_unlock_kernel; - q = bdev_get_queue(bdev); + q = blk_trace_get_queue(bdev); if (q == NULL) goto out_bdput; + mutex_lock(&bdev->bd_mutex); + + if (attr == &dev_attr_enable) { + ret = sprintf(buf, "%u\n", !!q->blk_trace); + goto out_unlock_bdev; + } + if (q->blk_trace == NULL) ret = sprintf(buf, "disabled\n"); else if (attr == &dev_attr_act_mask) - ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask); + ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); else if (attr == &dev_attr_pid) ret = sprintf(buf, "%u\n", q->blk_trace->pid); else if (attr == &dev_attr_start_lba) ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); else if (attr == &dev_attr_end_lba) ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); + +out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); out_bdput: bdput(bdev); @@ -1499,7 +1484,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct request_queue *q; struct hd_struct *p; u64 value; - ssize_t ret = -ENXIO; + ssize_t ret = -EINVAL; if (count == 0) goto out; @@ -1507,24 +1492,36 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (attr == &dev_attr_act_mask) { if (sscanf(buf, "%llx", &value) != 1) { /* Assume it is a list of trace category names */ - value = blk_str2act_mask(buf); - if (value < 0) + ret = blk_trace_str2mask(buf); + if (ret < 0) goto out; + value = ret; } } else if (sscanf(buf, "%llu", &value) != 1) goto out; + ret = -ENXIO; + lock_kernel(); p = dev_to_part(dev); bdev = bdget(part_devt(p)); if (bdev == NULL) goto out_unlock_kernel; - q = bdev_get_queue(bdev); + q = blk_trace_get_queue(bdev); if (q == NULL) goto out_bdput; mutex_lock(&bdev->bd_mutex); + + if (attr == &dev_attr_enable) { + if (value) + ret = blk_trace_setup_queue(q, bdev->bd_dev); + else + ret = blk_trace_remove_queue(q); + goto out_unlock_bdev; + } + ret = 0; if (q->blk_trace == NULL) ret = blk_trace_setup_queue(q, bdev->bd_dev); @@ -1538,13 +1535,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, q->blk_trace->start_lba = value; else if (attr == &dev_attr_end_lba) q->blk_trace->end_lba = value; - ret = count; } + +out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); out_bdput: bdput(bdev); out_unlock_kernel: unlock_kernel(); out: - return ret; + return ret ? ret : count; } + diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7847806eefef..1752a63f37c0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -29,6 +29,8 @@ #include <linux/list.h> #include <linux/hash.h> +#include <trace/sched.h> + #include <asm/ftrace.h> #include "trace.h" @@ -339,7 +341,7 @@ static inline int record_frozen(struct dyn_ftrace *rec) static void ftrace_free_rec(struct dyn_ftrace *rec) { - rec->ip = (unsigned long)ftrace_free_records; + rec->freelist = ftrace_free_records; ftrace_free_records = rec; rec->flags |= FTRACE_FL_FREE; } @@ -356,9 +358,14 @@ void ftrace_release(void *start, unsigned long size) mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { - if ((rec->ip >= s) && (rec->ip < e) && - !(rec->flags & FTRACE_FL_FREE)) + if ((rec->ip >= s) && (rec->ip < e)) { + /* + * rec->ip is changed in ftrace_free_rec() + * It should not between s and e if record was freed. + */ + FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); ftrace_free_rec(rec); + } } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); } @@ -377,7 +384,7 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) return NULL; } - ftrace_free_records = (void *)rec->ip; + ftrace_free_records = rec->freelist; memset(rec, 0, sizeof(*rec)); return rec; } @@ -409,7 +416,7 @@ ftrace_record_ip(unsigned long ip) return NULL; rec->ip = ip; - rec->flags = (unsigned long)ftrace_new_addrs; + rec->newlist = ftrace_new_addrs; ftrace_new_addrs = rec; return rec; @@ -729,7 +736,7 @@ static int ftrace_update_code(struct module *mod) return -1; p = ftrace_new_addrs; - ftrace_new_addrs = (struct dyn_ftrace *)p->flags; + ftrace_new_addrs = p->newlist; p->flags = 0L; /* convert record (i.e, patch mcount-call with NOP) */ @@ -2262,7 +2269,7 @@ ftrace_pid_read(struct file *file, char __user *ubuf, if (ftrace_pid_trace == ftrace_swapper_pid) r = sprintf(buf, "swapper tasks\n"); else if (ftrace_pid_trace) - r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace)); + r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace)); else r = sprintf(buf, "no pid\n"); @@ -2590,6 +2597,38 @@ free: return ret; } +static void +ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, + struct task_struct *next) +{ + unsigned long long timestamp; + int index; + + /* + * Does the user want to count the time a function was asleep. + * If so, do not update the time stamps. + */ + if (trace_flags & TRACE_ITER_SLEEP_TIME) + return; + + timestamp = trace_clock_local(); + + prev->ftrace_timestamp = timestamp; + + /* only process tasks that we timestamped */ + if (!next->ftrace_timestamp) + return; + + /* + * Update all the counters in next to make up for the + * time next was sleeping. + */ + timestamp -= next->ftrace_timestamp; + + for (index = next->curr_ret_stack; index >= 0; index--) + next->ret_stack[index].calltime += timestamp; +} + /* Allocate a return stack for each task */ static int start_graph_tracing(void) { @@ -2611,6 +2650,13 @@ static int start_graph_tracing(void) ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); + if (!ret) { + ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); + if (ret) + pr_info("ftrace_graph: Couldn't activate tracepoint" + " probe to kernel_sched_switch\n"); + } + kfree(ret_stack_list); return ret; } @@ -2643,6 +2689,12 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_lock); + /* we currently allow only one tracer registered at a time */ + if (atomic_read(&ftrace_graph_active)) { + ret = -EBUSY; + goto out; + } + ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; register_pm_notifier(&ftrace_suspend_notifier); @@ -2668,6 +2720,7 @@ void unregister_ftrace_graph(void) mutex_lock(&ftrace_lock); atomic_dec(&ftrace_graph_active); + unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); @@ -2688,6 +2741,7 @@ void ftrace_graph_init_task(struct task_struct *t) t->curr_ret_stack = -1; atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); + t->ftrace_timestamp = 0; } else t->ret_stack = NULL; } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a09027ec1714..edce2ff38944 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -580,8 +580,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) extern int ring_buffer_page_too_big(void); #ifdef CONFIG_HOTPLUG_CPU -static int __cpuinit rb_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); +static int rb_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu); #endif /** @@ -2861,8 +2861,8 @@ static __init int rb_init_debugfs(void) fs_initcall(rb_init_debugfs); #ifdef CONFIG_HOTPLUG_CPU -static int __cpuinit rb_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int rb_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) { struct ring_buffer *buffer = container_of(self, struct ring_buffer, cpu_notify); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 89f0c2544ad0..a0174a40c563 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -255,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; /** * trace_wake_up - wake up tasks waiting for trace input @@ -316,6 +316,7 @@ static const char *trace_options[] = { "context-info", "latency-format", "global-clock", + "sleep-time", NULL }; @@ -382,7 +383,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) return cnt; } -ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) +static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) { int len; void *ret; @@ -3535,6 +3536,9 @@ struct dentry *tracing_init_dentry(void) if (d_tracer) return d_tracer; + if (!debugfs_initialized()) + return NULL; + d_tracer = debugfs_create_dir("tracing", NULL); if (!d_tracer && !once) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 90a848debcba..cb0ce3fc36d3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -685,6 +685,7 @@ enum trace_iterator_flags { TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ TRACE_ITER_LATENCY_FMT = 0x40000, TRACE_ITER_GLOBAL_CLK = 0x80000, + TRACE_ITER_SLEEP_TIME = 0x100000, }; /* diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 05b176abfd30..b588fd81f7f9 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -18,6 +18,7 @@ #include <linux/percpu.h> #include <linux/sched.h> #include <linux/ktime.h> +#include <linux/trace_clock.h> /* * trace_clock_local(): the simplest and least coherent tracing clock. diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e876816fa8e7..d28687e7b3a7 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -57,9 +57,9 @@ static struct tracer_flags tracer_flags = { /* Add a function return address to the trace stack on thread info.*/ int -ftrace_push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func, int *depth) +ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) { + unsigned long long calltime; int index; if (!current->ret_stack) @@ -71,11 +71,13 @@ ftrace_push_return_trace(unsigned long ret, unsigned long long time, return -EBUSY; } + calltime = trace_clock_local(); + index = ++current->curr_ret_stack; barrier(); current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; - current->ret_stack[index].calltime = time; + current->ret_stack[index].calltime = calltime; *depth = index; return 0; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 19261fdd2455..d72b9a63b247 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -137,7 +137,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c) return 1; } -int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) +int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) { if (len > ((PAGE_SIZE - 1) - s->len)) return 0; @@ -148,10 +148,10 @@ int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) return len; } -int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) +int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len) { unsigned char hex[HEX_CHARS]; - unsigned char *data = mem; + const unsigned char *data = mem; int i, j; #ifdef __BIG_ENDIAN @@ -167,6 +167,19 @@ int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) return trace_seq_putmem(s, hex, j); } +void *trace_seq_reserve(struct trace_seq *s, size_t len) +{ + void *ret; + + if (len > ((PAGE_SIZE - 1) - s->len)) + return NULL; + + ret = s->buffer + s->len; + s->len += len; + + return ret; +} + int trace_seq_path(struct trace_seq *s, struct path *path) { unsigned char *p; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 35c422fb51a9..e0bde39c2dd9 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -29,24 +29,27 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt); -int trace_seq_puts(struct trace_seq *s, const char *str); -int trace_seq_putc(struct trace_seq *s, unsigned char c); -int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len); -int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len); -int trace_seq_path(struct trace_seq *s, struct path *path); -int seq_print_userip_objs(const struct userstack_entry *entry, - struct trace_seq *s, unsigned long sym_flags); -int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, - unsigned long ip, unsigned long sym_flags); +extern int trace_seq_puts(struct trace_seq *s, const char *str); +extern int trace_seq_putc(struct trace_seq *s, unsigned char c); +extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); +extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len); +extern void *trace_seq_reserve(struct trace_seq *s, size_t len); +extern int trace_seq_path(struct trace_seq *s, struct path *path); +extern int seq_print_userip_objs(const struct userstack_entry *entry, + struct trace_seq *s, unsigned long sym_flags); +extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, + unsigned long ip, unsigned long sym_flags); -int trace_print_context(struct trace_iterator *iter); -int trace_print_lat_context(struct trace_iterator *iter); +extern int trace_print_context(struct trace_iterator *iter); +extern int trace_print_lat_context(struct trace_iterator *iter); -struct trace_event *ftrace_find_event(int type); -int register_ftrace_event(struct trace_event *event); -int unregister_ftrace_event(struct trace_event *event); +extern struct trace_event *ftrace_find_event(int type); +extern int register_ftrace_event(struct trace_event *event); +extern int unregister_ftrace_event(struct trace_event *event); -enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); +extern enum print_line_t trace_nop_print(struct trace_iterator *iter, + int flags); #define MAX_MEMHEX_BYTES 8 #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 39310e3434ee..acdebd771a93 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -75,7 +75,7 @@ static int stat_seq_init(struct tracer_stat_session *session) { struct trace_stat_list *iter_entry, *new_entry; struct tracer_stat *ts = session->ts; - void *prev_stat; + void *stat; int ret = 0; int i; @@ -85,6 +85,10 @@ static int stat_seq_init(struct tracer_stat_session *session) if (!ts->stat_cmp) ts->stat_cmp = dummy_cmp; + stat = ts->stat_start(); + if (!stat) + goto exit; + /* * The first entry. Actually this is the second, but the first * one (the stat_list head) is pointless. @@ -99,14 +103,19 @@ static int stat_seq_init(struct tracer_stat_session *session) list_add(&new_entry->list, &session->stat_list); - new_entry->stat = ts->stat_start(); - prev_stat = new_entry->stat; + new_entry->stat = stat; /* * Iterate over the tracer stat entries and store them in a sorted * list. */ for (i = 1; ; i++) { + stat = ts->stat_next(stat, i); + + /* End of insertion */ + if (!stat) + break; + new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); if (!new_entry) { ret = -ENOMEM; @@ -114,31 +123,23 @@ static int stat_seq_init(struct tracer_stat_session *session) } INIT_LIST_HEAD(&new_entry->list); - new_entry->stat = ts->stat_next(prev_stat, i); + new_entry->stat = stat; - /* End of insertion */ - if (!new_entry->stat) - break; - - list_for_each_entry(iter_entry, &session->stat_list, list) { + list_for_each_entry_reverse(iter_entry, &session->stat_list, + list) { /* Insertion with a descendent sorting */ - if (ts->stat_cmp(new_entry->stat, - iter_entry->stat) > 0) { - - list_add_tail(&new_entry->list, - &iter_entry->list); - break; + if (ts->stat_cmp(iter_entry->stat, + new_entry->stat) >= 0) { - /* The current smaller value */ - } else if (list_is_last(&iter_entry->list, - &session->stat_list)) { list_add(&new_entry->list, &iter_entry->list); break; } } - prev_stat = new_entry->stat; + /* The current larger value */ + if (list_empty(&new_entry->list)) + list_add(&new_entry->list, &session->stat_list); } exit: mutex_unlock(&session->stat_mutex); @@ -160,7 +161,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos) /* If we are in the beginning of the file, print the headers */ if (!*pos && session->ts->stat_headers) - session->ts->stat_headers(s); + return SEQ_START_TOKEN; return seq_list_start(&session->stat_list, *pos); } @@ -169,6 +170,9 @@ static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) { struct tracer_stat_session *session = s->private; + if (p == SEQ_START_TOKEN) + return seq_list_start(&session->stat_list, *pos); + return seq_list_next(p, &session->stat_list, pos); } @@ -183,6 +187,9 @@ static int stat_seq_show(struct seq_file *s, void *v) struct tracer_stat_session *session = s->private; struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); + if (v == SEQ_START_TOKEN) + return session->ts->stat_headers(s); + return session->ts->stat_show(s, l->stat); } diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 9ab035b58cf1..797201e4a137 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -196,6 +196,11 @@ static int workqueue_stat_show(struct seq_file *s, void *p) struct pid *pid; struct task_struct *tsk; + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + if (&cws->list == workqueue_cpu_stat(cpu)->list.next) + seq_printf(s, "\n"); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); + pid = find_get_pid(cws->pid); if (pid) { tsk = get_pid_task(pid, PIDTYPE_PID); @@ -208,18 +213,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p) put_pid(pid); } - spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); - if (&cws->list == workqueue_cpu_stat(cpu)->list.next) - seq_printf(s, "\n"); - spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); - return 0; } static int workqueue_stat_headers(struct seq_file *s) { seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); - seq_printf(s, "# | | | |\n\n"); + seq_printf(s, "# | | | |\n"); return 0; } |