summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStewart Smith <stewart@linux.ibm.com>2018-11-19 13:49:46 +1100
committerAndrew Jeffery <andrew@aj.id.au>2019-03-29 17:10:08 +1030
commitef0c8360065eba7f353f4ac866a1f6f86bf18cf4 (patch)
treebe6dda92f007a49ff12cc38c337a65b593d9fa32
parent4519bb82624e8a80905b2914334edd27fbe61029 (diff)
downloadphosphor-mboxbridge-ef0c8360065eba7f353f4ac866a1f6f86bf18cf4.tar.gz
phosphor-mboxbridge-ef0c8360065eba7f353f4ac866a1f6f86bf18cf4.zip
Add --trace support (in blktrace format)
In an effort understand what PNOR requests come from the host, it'd be good to be able to trace what requests come in and visualise them. blktrace is some Linux infrastructure for tracing block device activity all the way through the linux block layer, for which there is a variety of existing tooling. These tools process the (typically) kernel produced blktrace output. We can produce this same output programatically from mboxd though. This patch gives us the (option) to start mboxd in a mode where it will write a blktrace file out, which can be fed into tools like blkparse(1) or tools like iowatcher[1] to generate charts (and video). A quirk of the blktrace format is that it's very geared towards a full IO subsystem, so we can't directly map window operations (what we know in mboxd) to specific IO ops (i.e. we don't get "firmware read one page out of this window before closing it"). So, for each Window opening (or reusing a cached one), we write THREE blktrace events: a Queue, Dispatch, and Complete. We can usk tools like blkparse to do everything from get a detailed list of what windows were opened and for how long: 0,0 0 1 0.000000000 0 Q R 0 + 8 [(null)] 0,0 0 2 0.000000000 0 D R 0 + 8 [(null)] 0,0 0 3 0.000182022 0 C R 0 + 8 [0] 0,0 0 4 0.042416351 0 Q R 4144 + 2040 [(null)] 0,0 0 5 0.042416351 0 D R 4144 + 2040 [(null)] 0,0 0 6 0.060802662 0 C R 4144 + 2040 [0] 0,0 0 7 0.084775813 0 Q R 64 + 288 [(null)] 0,0 0 8 0.084775813 0 D R 64 + 288 [(null)] 0,0 0 9 0.087835720 0 C R 64 + 288 [0] 0,0 0 10 1.429234244 0 Q R 8488 + 2048 [(null)] to getting a simple summary at the end of how many windows were opened read and read/write: CPU0 (0,0): Reads Queued: 90, 74,040KiB Writes Queued: 6, 2,664KiB Read Dispatches: 90, 74,040KiB Write Dispatches: 6, 2,664KiB Reads Requeued: 0 Writes Requeued: 0 Reads Completed: 90, 74,040KiB Writes Completed: 6, 2,664KiB Read Merges: 0, 0KiB Write Merges: 0, 0KiB Read depth: 1 Write depth: 1 IO unplugs: 0 Timer unplugs: 0 If you change the window size to something tiny, like 4096 bytes, you can get detailed paging information for hostboot at the expense of IPL time. Pretty graphs and animations: https://www.flamingspork.com/blog/?p=4419 [1] iowatcher: http://masoncoding.com/iowatcher/ Change-Id: I5dd02b6bc616c441abf54d87a5d67c972cbaf228 Signed-off-by: Stewart Smith <stewart@linux.ibm.com> [AJ: Resolve merge conflicts, some tidy ups] Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
-rw-r--r--mboxd.c20
-rw-r--r--mboxd.h6
-rw-r--r--protocol.c109
3 files changed, 133 insertions, 2 deletions
diff --git a/mboxd.c b/mboxd.c
index 2026023..637f416 100644
--- a/mboxd.c
+++ b/mboxd.c
@@ -54,7 +54,8 @@ const char* USAGE =
"\t\t\t\t(default: fill the reserved memory region)\n"
"\t-w | --window-size\tThe window size (power of 2) in MB\n"
"\t\t\t\t(default: 1MB)\n"
- "\t-f | --flash\t\tSize of flash in [K|M] bytes\n\n";
+ "\t-f | --flash\t\tSize of flash in [K|M] bytes\n\n"
+ "\t-t | --trace\t\tFile to write trace data to (in blktrace format)\n\n";
static int dbus_init(struct mbox_context *context,
const struct transport_ops **ops)
@@ -236,6 +237,7 @@ static bool parse_cmdline(int argc, char **argv,
{ "window-num", optional_argument, 0, 'n' },
{ "verbose", no_argument, 0, 'v' },
{ "syslog", no_argument, 0, 's' },
+ { "trace", optional_argument, 0, 't' },
{ "version", no_argument, 0, 'V' },
{ "help", no_argument, 0, 'h' },
{ 0, 0, 0, 0 }
@@ -246,7 +248,7 @@ static bool parse_cmdline(int argc, char **argv,
context->current = NULL; /* No current window */
- while ((opt = getopt_long(argc, argv, "f:b:w::n::vsVh", long_options, NULL))
+ while ((opt = getopt_long(argc, argv, "f:b:w::n::vst::Vh", long_options, NULL))
!= -1) {
switch (opt) {
case 0:
@@ -309,6 +311,17 @@ static bool parse_cmdline(int argc, char **argv,
case 'V':
printf("%s V%s\n", THIS_NAME, PACKAGE_VERSION);
exit(0);
+ case 't':
+ context->blktracefd = open(argv[optind],
+ O_CREAT|O_TRUNC|O_WRONLY,
+ 0666);
+ printf("Recording blktrace output to %s\n",
+ argv[optind]);
+ if (context->blktracefd == -1) {
+ perror("Couldn't open blktrace file for writing");
+ exit(2);
+ }
+ break;
case 'h':
return false; /* This will print the usage message */
default:
@@ -480,6 +493,9 @@ cleanup_protocol:
cleanup_backend:
backend_free(&context->backend);
cleanup_context:
+ if (context->blktracefd)
+ close(context->blktracefd);
+
free(context);
return rc;
diff --git a/mboxd.h b/mboxd.h
index 96235d7..6b14a24 100644
--- a/mboxd.h
+++ b/mboxd.h
@@ -5,6 +5,7 @@
#define MBOX_H
#include <assert.h>
+#include <linux/blktrace_api.h>
#include <mtd/mtd-abi.h>
#include <systemd/sd-bus.h>
#include <poll.h>
@@ -101,6 +102,11 @@ struct mbox_context {
uint32_t mem_size;
/* LPC Bus Base Address (bytes) */
uint32_t lpc_base;
+
+ /* Tracing */
+ int blktracefd;
+ struct blk_io_trace trace;
+ int64_t blktrace_start;
};
#endif /* MBOX_H */
diff --git a/protocol.c b/protocol.c
index 7158bfc..ab1c332 100644
--- a/protocol.c
+++ b/protocol.c
@@ -5,6 +5,7 @@
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
+#include <unistd.h>
#include "backend.h"
#include "common.h"
@@ -13,6 +14,7 @@
#include "protocol.h"
#include "windows.h"
+
#define BLOCK_SIZE_SHIFT_V1 12 /* 4K */
static inline uint8_t protocol_get_bmc_event_mask(struct mbox_context *context)
@@ -155,6 +157,109 @@ static inline uint16_t get_lpc_addr_shifted(struct mbox_context *context)
return lpc_addr >> context->backend.block_size_shift;
}
+static inline int64_t blktrace_gettime(void)
+{
+ struct timespec ts;
+ int64_t n;
+
+ clock_gettime(CLOCK_REALTIME, &ts);
+ n = (int64_t)(ts.tv_sec) * (int64_t)1000000000 + (int64_t)(ts.tv_nsec);
+
+ return n;
+}
+
+static void blktrace_flush_start(struct mbox_context *context)
+{
+ struct blk_io_trace *trace = &context->trace;
+ struct timespec now;
+
+ if (!context->blktracefd)
+ return;
+
+ if (!context->blktrace_start) {
+ clock_gettime(CLOCK_REALTIME, &now);
+ context->blktrace_start = blktrace_gettime();
+ }
+
+ trace->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->sector = context->current->flash_offset / 512;
+ trace->bytes = context->current->size;
+ if (context->current_is_write)
+ trace->action = BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_WRITE);
+ else
+ trace->action = BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_READ);
+ trace->pid = 0;
+ trace->device = 0;
+ trace->cpu = 0;
+ trace->error = 0;
+ trace->pdu_len = 0;
+ write(context->blktracefd, trace, sizeof(*trace));
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->action &= ~BLK_TA_QUEUE;
+ trace->action |= BLK_TA_ISSUE;
+ write(context->blktracefd, trace, sizeof(*trace));
+}
+
+static void blktrace_flush_done(struct mbox_context *context)
+{
+ struct blk_io_trace *trace = &context->trace;
+
+ if (!context->blktracefd)
+ return;
+
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->action &= ~BLK_TA_ISSUE;
+ trace->action |= BLK_TA_COMPLETE;
+ write(context->blktracefd, trace, sizeof(*trace));
+}
+
+static void blktrace_window_start(struct mbox_context *context)
+{
+ struct blk_io_trace *trace = &context->trace;
+
+ if (!context->blktracefd)
+ return;
+
+ if (!context->blktrace_start)
+ context->blktrace_start = blktrace_gettime();
+
+ trace->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->action = BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_READ);
+ trace->pid = 0;
+ trace->device = 0;
+ trace->cpu = 0;
+ trace->error = 0;
+ trace->pdu_len = 0;
+}
+
+static void blktrace_window_done(struct mbox_context *context)
+{
+ struct blk_io_trace *trace = &context->trace;
+
+ if (!context->blktracefd)
+ return;
+
+ trace->sector = context->current->flash_offset / 512;
+ trace->bytes = context->current->size;
+ write(context->blktracefd, trace, sizeof(*trace));
+ trace->sequence++;
+ trace->action &= ~BLK_TA_QUEUE;
+ trace->action |= BLK_TA_ISSUE;
+ write(context->blktracefd, trace, sizeof(*trace));
+
+ trace->sequence++;
+ trace->time = blktrace_gettime() - context->blktrace_start;
+ trace->action &= ~BLK_TA_ISSUE;
+ trace->action |= BLK_TA_COMPLETE;
+ write(context->blktracefd, trace, sizeof(*trace));
+}
+
static int protocol_v1_create_window(struct mbox_context *context,
struct protocol_create_window *io)
{
@@ -180,7 +285,9 @@ static int protocol_v1_create_window(struct mbox_context *context,
* write_flush() to make sure we pick the right one.
*/
if (context->current_is_write) {
+ blktrace_flush_start(context);
rc = context->protocol->flush(context, NULL);
+ blktrace_flush_done(context);
if (rc < 0) {
MSG_ERR("Couldn't Flush Write Window\n");
return rc;
@@ -192,6 +299,7 @@ static int protocol_v1_create_window(struct mbox_context *context,
/* Offset the host has requested */
MSG_INFO("Host requested flash @ 0x%.8x\n", offset);
/* Check if we have an existing window */
+ blktrace_window_start(context);
context->current = windows_search(context, offset,
context->version == API_VERSION_1);
@@ -206,6 +314,7 @@ static int protocol_v1_create_window(struct mbox_context *context,
return rc;
}
}
+ blktrace_window_done(context);
context->current_is_write = !io->req.ro;
OpenPOWER on IntegriCloud