summaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-record.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r--tools/perf/builtin-record.c366
1 files changed, 338 insertions, 28 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0980dfe3396b..f3f7f3100336 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -23,7 +23,6 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
-#include "util/drv_configs.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/symbol.h"
@@ -39,8 +38,10 @@
#include "util/bpf-loader.h"
#include "util/trigger.h"
#include "util/perf-hooks.h"
+#include "util/cpu-set-sched.h"
#include "util/time-utils.h"
#include "util/units.h"
+#include "util/bpf-event.h"
#include "asm/bug.h"
#include <errno.h>
@@ -81,12 +82,17 @@ struct record {
bool timestamp_boundary;
struct switch_output switch_output;
unsigned long long samples;
+ cpu_set_t affinity_mask;
};
static volatile int auxtrace_record__snapshot_started;
static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
static DEFINE_TRIGGER(switch_output_trigger);
+static const char *affinity_tags[PERF_AFFINITY_MAX] = {
+ "SYS", "NODE", "CPU"
+};
+
static bool switch_output_signal(struct record *rec)
{
return rec->switch_output.signal &&
@@ -124,6 +130,210 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse
return 0;
}
+#ifdef HAVE_AIO_SUPPORT
+static int record__aio_write(struct aiocb *cblock, int trace_fd,
+ void *buf, size_t size, off_t off)
+{
+ int rc;
+
+ cblock->aio_fildes = trace_fd;
+ cblock->aio_buf = buf;
+ cblock->aio_nbytes = size;
+ cblock->aio_offset = off;
+ cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
+
+ do {
+ rc = aio_write(cblock);
+ if (rc == 0) {
+ break;
+ } else if (errno != EAGAIN) {
+ cblock->aio_fildes = -1;
+ pr_err("failed to queue perf data, error: %m\n");
+ break;
+ }
+ } while (1);
+
+ return rc;
+}
+
+static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
+{
+ void *rem_buf;
+ off_t rem_off;
+ size_t rem_size;
+ int rc, aio_errno;
+ ssize_t aio_ret, written;
+
+ aio_errno = aio_error(cblock);
+ if (aio_errno == EINPROGRESS)
+ return 0;
+
+ written = aio_ret = aio_return(cblock);
+ if (aio_ret < 0) {
+ if (aio_errno != EINTR)
+ pr_err("failed to write perf data, error: %m\n");
+ written = 0;
+ }
+
+ rem_size = cblock->aio_nbytes - written;
+
+ if (rem_size == 0) {
+ cblock->aio_fildes = -1;
+ /*
+ * md->refcount is incremented in perf_mmap__push() for
+ * every enqueued aio write request so decrement it because
+ * the request is now complete.
+ */
+ perf_mmap__put(md);
+ rc = 1;
+ } else {
+ /*
+ * aio write request may require restart with the
+ * reminder if the kernel didn't write whole
+ * chunk at once.
+ */
+ rem_off = cblock->aio_offset + written;
+ rem_buf = (void *)(cblock->aio_buf + written);
+ record__aio_write(cblock, cblock->aio_fildes,
+ rem_buf, rem_size, rem_off);
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static int record__aio_sync(struct perf_mmap *md, bool sync_all)
+{
+ struct aiocb **aiocb = md->aio.aiocb;
+ struct aiocb *cblocks = md->aio.cblocks;
+ struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
+ int i, do_suspend;
+
+ do {
+ do_suspend = 0;
+ for (i = 0; i < md->aio.nr_cblocks; ++i) {
+ if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
+ if (sync_all)
+ aiocb[i] = NULL;
+ else
+ return i;
+ } else {
+ /*
+ * Started aio write is not complete yet
+ * so it has to be waited before the
+ * next allocation.
+ */
+ aiocb[i] = &cblocks[i];
+ do_suspend = 1;
+ }
+ }
+ if (!do_suspend)
+ return -1;
+
+ while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
+ if (!(errno == EAGAIN || errno == EINTR))
+ pr_err("failed to sync perf data, error: %m\n");
+ }
+ } while (1);
+}
+
+static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
+{
+ struct record *rec = to;
+ int ret, trace_fd = rec->session->data->file.fd;
+
+ rec->samples++;
+
+ ret = record__aio_write(cblock, trace_fd, bf, size, off);
+ if (!ret) {
+ rec->bytes_written += size;
+ if (switch_output_size(rec))
+ trigger_hit(&switch_output_trigger);
+ }
+
+ return ret;
+}
+
+static off_t record__aio_get_pos(int trace_fd)
+{
+ return lseek(trace_fd, 0, SEEK_CUR);
+}
+
+static void record__aio_set_pos(int trace_fd, off_t pos)
+{
+ lseek(trace_fd, pos, SEEK_SET);
+}
+
+static void record__aio_mmap_read_sync(struct record *rec)
+{
+ int i;
+ struct perf_evlist *evlist = rec->evlist;
+ struct perf_mmap *maps = evlist->mmap;
+
+ if (!rec->opts.nr_cblocks)
+ return;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ struct perf_mmap *map = &maps[i];
+
+ if (map->base)
+ record__aio_sync(map, true);
+ }
+}
+
+static int nr_cblocks_default = 1;
+static int nr_cblocks_max = 4;
+
+static int record__aio_parse(const struct option *opt,
+ const char *str,
+ int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+
+ if (unset) {
+ opts->nr_cblocks = 0;
+ } else {
+ if (str)
+ opts->nr_cblocks = strtol(str, NULL, 0);
+ if (!opts->nr_cblocks)
+ opts->nr_cblocks = nr_cblocks_default;
+ }
+
+ return 0;
+}
+#else /* HAVE_AIO_SUPPORT */
+static int nr_cblocks_max = 0;
+
+static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
+{
+ return -1;
+}
+
+static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
+ void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
+{
+ return -1;
+}
+
+static off_t record__aio_get_pos(int trace_fd __maybe_unused)
+{
+ return -1;
+}
+
+static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
+{
+}
+
+static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
+{
+}
+#endif
+
+static int record__aio_enabled(struct record *rec)
+{
+ return rec->opts.nr_cblocks > 0;
+}
+
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -327,9 +537,13 @@ static int record__mmap_evlist(struct record *rec,
struct record_opts *opts = &rec->opts;
char msg[512];
+ if (opts->affinity != PERF_AFFINITY_SYS)
+ cpu__setup_cpunode_map();
+
if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
- opts->auxtrace_snapshot_mode) < 0) {
+ opts->auxtrace_snapshot_mode,
+ opts->nr_cblocks, opts->affinity) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
@@ -362,7 +576,6 @@ static int record__open(struct record *rec)
struct perf_evlist *evlist = rec->evlist;
struct perf_session *session = rec->session;
struct record_opts *opts = &rec->opts;
- struct perf_evsel_config_term *err_term;
int rc = 0;
/*
@@ -391,7 +604,12 @@ try_again:
ui__warning("%s\n", msg);
goto try_again;
}
-
+ if ((errno == EINVAL || errno == EBADF) &&
+ pos->leader != pos &&
+ pos->weak_group) {
+ pos = perf_evlist__reset_weak_group(evlist, pos);
+ goto try_again;
+ }
rc = -errno;
perf_evsel__open_strerror(pos, &opts->target,
errno, msg, sizeof(msg));
@@ -410,14 +628,6 @@ try_again:
goto out;
}
- if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
- pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
- err_term->val.drv_cfg, perf_evsel__name(pos), errno,
- str_error_r(errno, msg, sizeof(msg)));
- rc = -1;
- goto out;
- }
-
rc = record__mmap(rec);
if (rc)
goto out;
@@ -450,10 +660,9 @@ static int process_sample_event(struct perf_tool *tool,
static int process_buildids(struct record *rec)
{
- struct perf_data *data = &rec->data;
struct perf_session *session = rec->session;
- if (data->size == 0)
+ if (perf_data__size(&rec->data) == 0)
return 0;
/*
@@ -513,6 +722,16 @@ static struct perf_event_header finished_round_event = {
.type = PERF_RECORD_FINISHED_ROUND,
};
+static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
+{
+ if (rec->opts.affinity != PERF_AFFINITY_SYS &&
+ !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
+ CPU_ZERO(&rec->affinity_mask);
+ CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
+ sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
+ }
+}
+
static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
bool overwrite)
{
@@ -520,6 +739,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
int i;
int rc = 0;
struct perf_mmap *maps;
+ int trace_fd = rec->data.file.fd;
+ off_t off;
if (!evlist)
return 0;
@@ -531,13 +752,31 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
return 0;
+ if (record__aio_enabled(rec))
+ off = record__aio_get_pos(trace_fd);
+
for (i = 0; i < evlist->nr_mmaps; i++) {
struct perf_mmap *map = &maps[i];
if (map->base) {
- if (perf_mmap__push(map, rec, record__pushfn) != 0) {
- rc = -1;
- goto out;
+ record__adjust_affinity(rec, map);
+ if (!record__aio_enabled(rec)) {
+ if (perf_mmap__push(map, rec, record__pushfn) != 0) {
+ rc = -1;
+ goto out;
+ }
+ } else {
+ int idx;
+ /*
+ * Call record__aio_sync() to wait till map->data buffer
+ * becomes available after previous aio write request.
+ */
+ idx = record__aio_sync(map, false);
+ if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
+ record__aio_set_pos(trace_fd, off);
+ rc = -1;
+ goto out;
+ }
}
}
@@ -548,6 +787,9 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
}
}
+ if (record__aio_enabled(rec))
+ record__aio_set_pos(trace_fd, off);
+
/*
* Mark the round finished in case we wrote
* at least one event.
@@ -592,6 +834,9 @@ static void record__init_features(struct record *rec)
if (!rec->opts.full_auxtrace)
perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+ if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
+ perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+
perf_header__clear_feat(&session->header, HEADER_STAT);
}
@@ -605,7 +850,7 @@ record__finish_output(struct record *rec)
return;
rec->session->header.data_size += rec->bytes_written;
- data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
+ data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
if (!rec->no_buildid) {
process_buildids(rec);
@@ -633,8 +878,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
process_synthesized_event,
&rec->session->machines.host,
- rec->opts.sample_address,
- rec->opts.proc_map_timeout);
+ rec->opts.sample_address);
thread_map__put(thread_map);
return err;
}
@@ -650,6 +894,8 @@ record__switch_output(struct record *rec, bool at_exit)
/* Same Size: "2015122520103046"*/
char timestamp[] = "InvalidTimestamp";
+ record__aio_mmap_read_sync(rec);
+
record__synthesize(rec, true);
if (target__none(&rec->opts.target))
record__synthesize_workload(rec, true);
@@ -672,7 +918,7 @@ record__switch_output(struct record *rec, bool at_exit)
if (!quiet)
fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
- data->file.path, timestamp);
+ data->path, timestamp);
/* Output tracking events */
if (!at_exit) {
@@ -847,9 +1093,14 @@ static int record__synthesize(struct record *rec, bool tail)
return err;
}
+ err = perf_event__synthesize_bpf_events(tool, process_synthesized_event,
+ machine, opts);
+ if (err < 0)
+ pr_warning("Couldn't synthesize bpf events.\n");
+
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address,
- opts->proc_map_timeout, 1);
+ 1);
out:
return err;
}
@@ -897,6 +1148,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__init_features(rec);
+ if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+ session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+
if (forks) {
err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
argv, data->is_pipe,
@@ -1157,6 +1411,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__synthesize_workload(rec, true);
out_child:
+ record__aio_mmap_read_sync(rec);
+
if (forks) {
int exit_status;
@@ -1205,7 +1461,7 @@ out_child:
fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
perf_data__size(data) / 1024.0 / 1024.0,
- data->file.path, postfix, samples);
+ data->path, postfix, samples);
}
out_delete_session:
@@ -1290,6 +1546,13 @@ static int perf_record_config(const char *var, const char *value, void *cb)
var = "call-graph.record-mode";
return perf_default_config(var, value, cb);
}
+#ifdef HAVE_AIO_SUPPORT
+ if (!strcmp(var, "record.aio")) {
+ rec->opts.nr_cblocks = strtol(value, NULL, 0);
+ if (!rec->opts.nr_cblocks)
+ rec->opts.nr_cblocks = nr_cblocks_default;
+ }
+#endif
return 0;
}
@@ -1337,6 +1600,19 @@ static const struct clockid_map clockids[] = {
CLOCKID_END,
};
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+ struct timespec res;
+
+ *res_ns = 0;
+ if (!clock_getres(clk_id, &res))
+ *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+ else
+ pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+ return 0;
+}
+
static int parse_clockid(const struct option *opt, const char *str, int unset)
{
struct record_opts *opts = (struct record_opts *)opt->value;
@@ -1360,7 +1636,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
/* if its a number, we're done */
if (sscanf(str, "%d", &opts->clockid) == 1)
- return 0;
+ return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
/* allow a "CLOCK_" prefix to the name */
if (!strncasecmp(str, "CLOCK_", 6))
@@ -1369,7 +1645,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
for (cm = clockids; cm->name; cm++) {
if (!strcasecmp(str, cm->name)) {
opts->clockid = cm->clockid;
- return 0;
+ return get_clockid_res(opts->clockid,
+ &opts->clockid_res_ns);
}
}
@@ -1378,6 +1655,21 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
return -1;
}
+static int record__parse_affinity(const struct option *opt, const char *str, int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+
+ if (unset || !str)
+ return 0;
+
+ if (!strcasecmp(str, "node"))
+ opts->affinity = PERF_AFFINITY_NODE;
+ else if (!strcasecmp(str, "cpu"))
+ opts->affinity = PERF_AFFINITY_CPU;
+
+ return 0;
+}
+
static int record__parse_mmap_pages(const struct option *opt,
const char *str,
int unset __maybe_unused)
@@ -1521,7 +1813,6 @@ static struct record record = {
.uses_mmap = true,
.default_per_cpu = true,
},
- .proc_map_timeout = 500,
},
.tool = {
.sample = process_sample_event,
@@ -1571,7 +1862,7 @@ static struct option __record_options[] = {
OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
"list of cpus to monitor"),
OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
- OPT_STRING('o', "output", &record.data.file.path, "file",
+ OPT_STRING('o', "output", &record.data.path, "file",
"output file name"),
OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
&record.opts.no_inherit_set,
@@ -1579,6 +1870,7 @@ static struct option __record_options[] = {
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
"synthesize non-sample events at the end of output"),
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
+ OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"),
OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
"Fail if the specified frequency can't be used"),
OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
@@ -1651,7 +1943,7 @@ static struct option __record_options[] = {
parse_clockid),
OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
"opts", "AUX area tracing Snapshot Mode", ""),
- OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
+ OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
"Record namespaces events"),
@@ -1681,6 +1973,14 @@ static struct option __record_options[] = {
"signal"),
OPT_BOOLEAN(0, "dry-run", &dry_run,
"Parse options then exit"),
+#ifdef HAVE_AIO_SUPPORT
+ OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
+ &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
+ record__aio_parse),
+#endif
+ OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
+ "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
+ record__parse_affinity),
OPT_END()
};
@@ -1715,6 +2015,9 @@ int cmd_record(int argc, const char **argv)
# undef REASON
#endif
+ CPU_ZERO(&rec->affinity_mask);
+ rec->opts.affinity = PERF_AFFINITY_SYS;
+
rec->evlist = perf_evlist__new();
if (rec->evlist == NULL)
return -ENOMEM;
@@ -1873,6 +2176,13 @@ int cmd_record(int argc, const char **argv)
goto out;
}
+ if (rec->opts.nr_cblocks > nr_cblocks_max)
+ rec->opts.nr_cblocks = nr_cblocks_max;
+ if (verbose > 0)
+ pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
+
+ pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
+
err = __cmd_record(&record, argc, argv);
out:
perf_evlist__delete(rec->evlist);
OpenPOWER on IntegriCloud