summaryrefslogtreecommitdiffstats
path: root/include/trace/events/block.h
diff options
context:
space:
mode:
authorLi Zefan <lizf@cn.fujitsu.com>2009-06-09 13:43:05 +0800
committerSteven Rostedt <rostedt@goodmis.org>2009-06-09 12:34:23 -0400
commit55782138e47d9baf2f7d3a7af9e7cf42adf72c56 (patch)
treec7ccabae20e27bbeb08b69a358e8b86c98d1d9f3 /include/trace/events/block.h
parentf57a8a1911342265e7acdc190333c4e9235a6632 (diff)
downloadtalos-op-linux-55782138e47d9baf2f7d3a7af9e7cf42adf72c56.tar.gz
talos-op-linux-55782138e47d9baf2f7d3a7af9e7cf42adf72c56.zip
tracing/events: convert block trace points to TRACE_EVENT()
TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions ... Cons: - no dev_t info for the output of plug, unplug_timer and unplug_io events. no dev_t info for getrq and sleeprq events if bio == NULL. no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL. This is mainly because we can't get the deivce from a request queue. But this may change in the future. - A packet command is converted to a string in TP_assign, not TP_print. While blktrace do the convertion just before output. Since pc requests should be rather rare, this is not a big issue. - In blktrace, an event can have 2 different print formats, but a TRACE_EVENT has a unique format, which means we have some unused data in a trace entry. The overhead is minimized by using __dynamic_array() instead of __array(). I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing: dd dd + ioctl blktrace dd + TRACE_EVENT (splice) 1 7.36s, 42.7 MB/s 7.50s, 42.0 MB/s 7.41s, 42.5 MB/s 2 7.43s, 42.3 MB/s 7.48s, 42.1 MB/s 7.43s, 42.4 MB/s 3 7.38s, 42.6 MB/s 7.45s, 42.2 MB/s 7.41s, 42.5 MB/s So the overhead of tracing is very small, and no regression when using those trace events vs blktrace. And the binary output of TRACE_EVENT is much smaller than blktrace: # ls -l -h -rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0 -rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1 -rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out Following are some comparisons between TRACE_EVENT and blktrace: plug: kjournald-480 [000] 303.084981: block_plug: [kjournald] kjournald-480 [000] 303.084981: 8,0 P N [kjournald] unplug_io: kblockd/0-118 [000] 300.052973: block_unplug_io: [kblockd/0] 1 kblockd/0-118 [000] 300.052974: 8,0 U N [kblockd/0] 1 remap: kjournald-480 [000] 303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384 kjournald-480 [000] 303.085043: 8,0 A W 102736992 + 8 <- (8,8) 33384 bio_backmerge: kjournald-480 [000] 303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald] kjournald-480 [000] 303.085086: 8,0 M W 102737032 + 8 [kjournald] getrq: kjournald-480 [000] 303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald] kjournald-480 [000] 303.084975: 8,0 G W 102736984 + 8 [kjournald] bash-2066 [001] 1072.953770: 8,0 G N [bash] bash-2066 [001] 1072.953773: block_getrq: 0,0 N 0 + 0 [bash] rq_complete: konsole-2065 [001] 300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0] konsole-2065 [001] 300.053191: 8,0 C W 103669040 + 16 [0] ksoftirqd/1-7 [001] 1072.953811: 8,0 C N (5a 00 08 00 00 00 00 00 24 00) [0] ksoftirqd/1-7 [001] 1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0] rq_insert: kjournald-480 [000] 303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald] kjournald-480 [000] 303.084986: 8,0 I W 102736984 + 8 [kjournald] Changelog from v2 -> v3: - use the newly introduced __dynamic_array(). Changelog from v1 -> v2: - use __string() instead of __array() to minimize the memory required to store hex dump of rq->cmd(). - support large pc requests. - add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT. - some cleanups. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> LKML-Reference: <4A2DF669.5070905@cn.fujitsu.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'include/trace/events/block.h')
-rw-r--r--include/trace/events/block.h483
1 files changed, 483 insertions, 0 deletions
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
new file mode 100644
index 000000000000..a99d1e565bb0
--- /dev/null
+++ b/include/trace/events/block.h
@@ -0,0 +1,483 @@
+#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLOCK_H
+
+#include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
+TRACE_EVENT(block_rq_abort,
+
+ TP_PROTO(struct request_queue *q, struct request *rq),
+
+ TP_ARGS(q, rq),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __field( int, errors )
+ __array( char, rwbs, 6 )
+ __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+ __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
+ __entry->nr_sector = blk_pc_request(rq) ?
+ 0 : rq->hard_nr_sectors;
+ __entry->errors = rq->errors;
+
+ blk_fill_rwbs_rq(__entry->rwbs, rq);
+ blk_dump_cmd(__get_str(cmd), rq);
+ ),
+
+ TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rwbs, __get_str(cmd),
+ __entry->sector, __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_insert,
+
+ TP_PROTO(struct request_queue *q, struct request *rq),
+
+ TP_ARGS(q, rq),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __field( unsigned int, bytes )
+ __array( char, rwbs, 6 )
+ __array( char, comm, TASK_COMM_LEN )
+ __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+ __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
+ __entry->nr_sector = blk_pc_request(rq) ?
+ 0 : rq->hard_nr_sectors;
+ __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0;
+
+ blk_fill_rwbs_rq(__entry->rwbs, rq);
+ blk_dump_cmd(__get_str(cmd), rq);
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rwbs, __entry->bytes, __get_str(cmd),
+ __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_issue,
+
+ TP_PROTO(struct request_queue *q, struct request *rq),
+
+ TP_ARGS(q, rq),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __field( unsigned int, bytes )
+ __array( char, rwbs, 6 )
+ __array( char, comm, TASK_COMM_LEN )
+ __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+ __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
+ __entry->nr_sector = blk_pc_request(rq) ?
+ 0 : rq->hard_nr_sectors;
+ __entry->bytes = blk_pc_request(rq) ? rq->data_len : 0;
+
+ blk_fill_rwbs_rq(__entry->rwbs, rq);
+ blk_dump_cmd(__get_str(cmd), rq);
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rwbs, __entry->bytes, __get_str(cmd),
+ __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_requeue,
+
+ TP_PROTO(struct request_queue *q, struct request *rq),
+
+ TP_ARGS(q, rq),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __field( int, errors )
+ __array( char, rwbs, 6 )
+ __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+ __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
+ __entry->nr_sector = blk_pc_request(rq) ?
+ 0 : rq->hard_nr_sectors;
+ __entry->errors = rq->errors;
+
+ blk_fill_rwbs_rq(__entry->rwbs, rq);
+ blk_dump_cmd(__get_str(cmd), rq);
+ ),
+
+ TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rwbs, __get_str(cmd),
+ __entry->sector, __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_complete,
+
+ TP_PROTO(struct request_queue *q, struct request *rq),
+
+ TP_ARGS(q, rq),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __field( int, errors )
+ __array( char, rwbs, 6 )
+ __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+ __entry->sector = blk_pc_request(rq) ? 0 : rq->hard_sector;
+ __entry->nr_sector = blk_pc_request(rq) ?
+ 0 : rq->hard_nr_sectors;
+ __entry->errors = rq->errors;
+
+ blk_fill_rwbs_rq(__entry->rwbs, rq);
+ blk_dump_cmd(__get_str(cmd), rq);
+ ),
+
+ TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rwbs, __get_str(cmd),
+ __entry->sector, __entry->nr_sector, __entry->errors)
+);
+TRACE_EVENT(block_bio_bounce,
+
+ TP_PROTO(struct request_queue *q, struct bio *bio),
+
+ TP_ARGS(q, bio),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __array( char, rwbs, 6 )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->sector = bio->bi_sector;
+ __entry->nr_sector = bio->bi_size >> 9;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_complete,
+
+ TP_PROTO(struct request_queue *q, struct bio *bio),
+
+ TP_ARGS(q, bio),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned, nr_sector )
+ __field( int, error )
+ __array( char, rwbs, 6 )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->sector = bio->bi_sector;
+ __entry->nr_sector = bio->bi_size >> 9;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u [%d]",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ __entry->sector, __entry->nr_sector, __entry->error)
+);
+
+TRACE_EVENT(block_bio_backmerge,
+
+ TP_PROTO(struct request_queue *q, struct bio *bio),
+
+ TP_ARGS(q, bio),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __array( char, rwbs, 6 )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->sector = bio->bi_sector;
+ __entry->nr_sector = bio->bi_size >> 9;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_frontmerge,
+
+ TP_PROTO(struct request_queue *q, struct bio *bio),
+
+ TP_ARGS(q, bio),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned, nr_sector )
+ __array( char, rwbs, 6 )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->sector = bio->bi_sector;
+ __entry->nr_sector = bio->bi_size >> 9;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_queue,
+
+ TP_PROTO(struct request_queue *q, struct bio *bio),
+
+ TP_ARGS(q, bio),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __array( char, rwbs, 6 )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->sector = bio->bi_sector;
+ __entry->nr_sector = bio->bi_size >> 9;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_getrq,
+
+ TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+ TP_ARGS(q, bio, rw),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __array( char, rwbs, 6 )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
+ __entry->sector = bio ? bio->bi_sector : 0;
+ __entry->nr_sector = bio ? bio->bi_size >> 9 : 0;
+ blk_fill_rwbs(__entry->rwbs,
+ bio ? bio->bi_rw : 0, __entry->nr_sector);
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_sleeprq,
+
+ TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+ TP_ARGS(q, bio, rw),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __array( char, rwbs, 6 )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
+ __entry->sector = bio ? bio->bi_sector : 0;
+ __entry->nr_sector = bio ? bio->bi_size >> 9 : 0;
+ blk_fill_rwbs(__entry->rwbs,
+ bio ? bio->bi_rw : 0, __entry->nr_sector);
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u [%s]",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_plug,
+
+ TP_PROTO(struct request_queue *q),
+
+ TP_ARGS(q),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("[%s]", __entry->comm)
+);
+
+TRACE_EVENT(block_unplug_timer,
+
+ TP_PROTO(struct request_queue *q),
+
+ TP_ARGS(q),
+
+ TP_STRUCT__entry(
+ __field( int, nr_rq )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE];
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_unplug_io,
+
+ TP_PROTO(struct request_queue *q),
+
+ TP_ARGS(q),
+
+ TP_STRUCT__entry(
+ __field( int, nr_rq )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE];
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_split,
+
+ TP_PROTO(struct request_queue *q, struct bio *bio,
+ unsigned int new_sector),
+
+ TP_ARGS(q, bio, new_sector),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( sector_t, new_sector )
+ __array( char, rwbs, 6 )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->sector = bio->bi_sector;
+ __entry->new_sector = new_sector;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ ),
+
+ TP_printk("%d,%d %s %llu / %llu [%s]",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ __entry->sector, __entry->new_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_remap,
+
+ TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+ sector_t from),
+
+ TP_ARGS(q, bio, dev, from),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, sector )
+ __field( unsigned int, nr_sector )
+ __field( dev_t, old_dev )
+ __field( sector_t, old_sector )
+ __array( char, rwbs, 6 )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->sector = bio->bi_sector;
+ __entry->nr_sector = bio->bi_size >> 9;
+ __entry->old_dev = dev;
+ __entry->old_sector = from;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ __entry->sector, __entry->nr_sector,
+ MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+ __entry->old_sector)
+);
+
+#endif /* _TRACE_BLOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
OpenPOWER on IntegriCloud