diff options
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r-- | tools/perf/builtin-record.c | 366 |
1 files changed, 338 insertions, 28 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0980dfe3396b..f3f7f3100336 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -23,7 +23,6 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/debug.h" -#include "util/drv_configs.h" #include "util/session.h" #include "util/tool.h" #include "util/symbol.h" @@ -39,8 +38,10 @@ #include "util/bpf-loader.h" #include "util/trigger.h" #include "util/perf-hooks.h" +#include "util/cpu-set-sched.h" #include "util/time-utils.h" #include "util/units.h" +#include "util/bpf-event.h" #include "asm/bug.h" #include <errno.h> @@ -81,12 +82,17 @@ struct record { bool timestamp_boundary; struct switch_output switch_output; unsigned long long samples; + cpu_set_t affinity_mask; }; static volatile int auxtrace_record__snapshot_started; static DEFINE_TRIGGER(auxtrace_snapshot_trigger); static DEFINE_TRIGGER(switch_output_trigger); +static const char *affinity_tags[PERF_AFFINITY_MAX] = { + "SYS", "NODE", "CPU" +}; + static bool switch_output_signal(struct record *rec) { return rec->switch_output.signal && @@ -124,6 +130,210 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse return 0; } +#ifdef HAVE_AIO_SUPPORT +static int record__aio_write(struct aiocb *cblock, int trace_fd, + void *buf, size_t size, off_t off) +{ + int rc; + + cblock->aio_fildes = trace_fd; + cblock->aio_buf = buf; + cblock->aio_nbytes = size; + cblock->aio_offset = off; + cblock->aio_sigevent.sigev_notify = SIGEV_NONE; + + do { + rc = aio_write(cblock); + if (rc == 0) { + break; + } else if (errno != EAGAIN) { + cblock->aio_fildes = -1; + pr_err("failed to queue perf data, error: %m\n"); + break; + } + } while (1); + + return rc; +} + +static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) +{ + void *rem_buf; + off_t rem_off; + size_t rem_size; + int rc, aio_errno; + ssize_t aio_ret, written; + + aio_errno = aio_error(cblock); + if (aio_errno == EINPROGRESS) + return 0; + + written = aio_ret = aio_return(cblock); + if (aio_ret < 0) { + if (aio_errno != EINTR) + pr_err("failed to write perf data, error: %m\n"); + written = 0; + } + + rem_size = cblock->aio_nbytes - written; + + if (rem_size == 0) { + cblock->aio_fildes = -1; + /* + * md->refcount is incremented in perf_mmap__push() for + * every enqueued aio write request so decrement it because + * the request is now complete. + */ + perf_mmap__put(md); + rc = 1; + } else { + /* + * aio write request may require restart with the + * reminder if the kernel didn't write whole + * chunk at once. + */ + rem_off = cblock->aio_offset + written; + rem_buf = (void *)(cblock->aio_buf + written); + record__aio_write(cblock, cblock->aio_fildes, + rem_buf, rem_size, rem_off); + rc = 0; + } + + return rc; +} + +static int record__aio_sync(struct perf_mmap *md, bool sync_all) +{ + struct aiocb **aiocb = md->aio.aiocb; + struct aiocb *cblocks = md->aio.cblocks; + struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ + int i, do_suspend; + + do { + do_suspend = 0; + for (i = 0; i < md->aio.nr_cblocks; ++i) { + if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { + if (sync_all) + aiocb[i] = NULL; + else + return i; + } else { + /* + * Started aio write is not complete yet + * so it has to be waited before the + * next allocation. + */ + aiocb[i] = &cblocks[i]; + do_suspend = 1; + } + } + if (!do_suspend) + return -1; + + while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { + if (!(errno == EAGAIN || errno == EINTR)) + pr_err("failed to sync perf data, error: %m\n"); + } + } while (1); +} + +static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off) +{ + struct record *rec = to; + int ret, trace_fd = rec->session->data->file.fd; + + rec->samples++; + + ret = record__aio_write(cblock, trace_fd, bf, size, off); + if (!ret) { + rec->bytes_written += size; + if (switch_output_size(rec)) + trigger_hit(&switch_output_trigger); + } + + return ret; +} + +static off_t record__aio_get_pos(int trace_fd) +{ + return lseek(trace_fd, 0, SEEK_CUR); +} + +static void record__aio_set_pos(int trace_fd, off_t pos) +{ + lseek(trace_fd, pos, SEEK_SET); +} + +static void record__aio_mmap_read_sync(struct record *rec) +{ + int i; + struct perf_evlist *evlist = rec->evlist; + struct perf_mmap *maps = evlist->mmap; + + if (!rec->opts.nr_cblocks) + return; + + for (i = 0; i < evlist->nr_mmaps; i++) { + struct perf_mmap *map = &maps[i]; + + if (map->base) + record__aio_sync(map, true); + } +} + +static int nr_cblocks_default = 1; +static int nr_cblocks_max = 4; + +static int record__aio_parse(const struct option *opt, + const char *str, + int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + + if (unset) { + opts->nr_cblocks = 0; + } else { + if (str) + opts->nr_cblocks = strtol(str, NULL, 0); + if (!opts->nr_cblocks) + opts->nr_cblocks = nr_cblocks_default; + } + + return 0; +} +#else /* HAVE_AIO_SUPPORT */ +static int nr_cblocks_max = 0; + +static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused) +{ + return -1; +} + +static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused, + void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused) +{ + return -1; +} + +static off_t record__aio_get_pos(int trace_fd __maybe_unused) +{ + return -1; +} + +static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) +{ +} + +static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) +{ +} +#endif + +static int record__aio_enabled(struct record *rec) +{ + return rec->opts.nr_cblocks > 0; +} + static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -327,9 +537,13 @@ static int record__mmap_evlist(struct record *rec, struct record_opts *opts = &rec->opts; char msg[512]; + if (opts->affinity != PERF_AFFINITY_SYS) + cpu__setup_cpunode_map(); + if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, - opts->auxtrace_snapshot_mode) < 0) { + opts->auxtrace_snapshot_mode, + opts->nr_cblocks, opts->affinity) < 0) { if (errno == EPERM) { pr_err("Permission error mapping pages.\n" "Consider increasing " @@ -362,7 +576,6 @@ static int record__open(struct record *rec) struct perf_evlist *evlist = rec->evlist; struct perf_session *session = rec->session; struct record_opts *opts = &rec->opts; - struct perf_evsel_config_term *err_term; int rc = 0; /* @@ -391,7 +604,12 @@ try_again: ui__warning("%s\n", msg); goto try_again; } - + if ((errno == EINVAL || errno == EBADF) && + pos->leader != pos && + pos->weak_group) { + pos = perf_evlist__reset_weak_group(evlist, pos); + goto try_again; + } rc = -errno; perf_evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); @@ -410,14 +628,6 @@ try_again: goto out; } - if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", - err_term->val.drv_cfg, perf_evsel__name(pos), errno, - str_error_r(errno, msg, sizeof(msg))); - rc = -1; - goto out; - } - rc = record__mmap(rec); if (rc) goto out; @@ -450,10 +660,9 @@ static int process_sample_event(struct perf_tool *tool, static int process_buildids(struct record *rec) { - struct perf_data *data = &rec->data; struct perf_session *session = rec->session; - if (data->size == 0) + if (perf_data__size(&rec->data) == 0) return 0; /* @@ -513,6 +722,16 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; +static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) +{ + if (rec->opts.affinity != PERF_AFFINITY_SYS && + !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { + CPU_ZERO(&rec->affinity_mask); + CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); + sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); + } +} + static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, bool overwrite) { @@ -520,6 +739,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli int i; int rc = 0; struct perf_mmap *maps; + int trace_fd = rec->data.file.fd; + off_t off; if (!evlist) return 0; @@ -531,13 +752,31 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) return 0; + if (record__aio_enabled(rec)) + off = record__aio_get_pos(trace_fd); + for (i = 0; i < evlist->nr_mmaps; i++) { struct perf_mmap *map = &maps[i]; if (map->base) { - if (perf_mmap__push(map, rec, record__pushfn) != 0) { - rc = -1; - goto out; + record__adjust_affinity(rec, map); + if (!record__aio_enabled(rec)) { + if (perf_mmap__push(map, rec, record__pushfn) != 0) { + rc = -1; + goto out; + } + } else { + int idx; + /* + * Call record__aio_sync() to wait till map->data buffer + * becomes available after previous aio write request. + */ + idx = record__aio_sync(map, false); + if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) { + record__aio_set_pos(trace_fd, off); + rc = -1; + goto out; + } } } @@ -548,6 +787,9 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli } } + if (record__aio_enabled(rec)) + record__aio_set_pos(trace_fd, off); + /* * Mark the round finished in case we wrote * at least one event. @@ -592,6 +834,9 @@ static void record__init_features(struct record *rec) if (!rec->opts.full_auxtrace) perf_header__clear_feat(&session->header, HEADER_AUXTRACE); + if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) + perf_header__clear_feat(&session->header, HEADER_CLOCKID); + perf_header__clear_feat(&session->header, HEADER_STAT); } @@ -605,7 +850,7 @@ record__finish_output(struct record *rec) return; rec->session->header.data_size += rec->bytes_written; - data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); + data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); if (!rec->no_buildid) { process_buildids(rec); @@ -633,8 +878,7 @@ static int record__synthesize_workload(struct record *rec, bool tail) err = perf_event__synthesize_thread_map(&rec->tool, thread_map, process_synthesized_event, &rec->session->machines.host, - rec->opts.sample_address, - rec->opts.proc_map_timeout); + rec->opts.sample_address); thread_map__put(thread_map); return err; } @@ -650,6 +894,8 @@ record__switch_output(struct record *rec, bool at_exit) /* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp"; + record__aio_mmap_read_sync(rec); + record__synthesize(rec, true); if (target__none(&rec->opts.target)) record__synthesize_workload(rec, true); @@ -672,7 +918,7 @@ record__switch_output(struct record *rec, bool at_exit) if (!quiet) fprintf(stderr, "[ perf record: Dump %s.%s ]\n", - data->file.path, timestamp); + data->path, timestamp); /* Output tracking events */ if (!at_exit) { @@ -847,9 +1093,14 @@ static int record__synthesize(struct record *rec, bool tail) return err; } + err = perf_event__synthesize_bpf_events(tool, process_synthesized_event, + machine, opts); + if (err < 0) + pr_warning("Couldn't synthesize bpf events.\n"); + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, process_synthesized_event, opts->sample_address, - opts->proc_map_timeout, 1); + 1); out: return err; } @@ -897,6 +1148,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__init_features(rec); + if (rec->opts.use_clockid && rec->opts.clockid_res_ns) + session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; + if (forks) { err = perf_evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, @@ -1157,6 +1411,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__synthesize_workload(rec, true); out_child: + record__aio_mmap_read_sync(rec); + if (forks) { int exit_status; @@ -1205,7 +1461,7 @@ out_child: fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", perf_data__size(data) / 1024.0 / 1024.0, - data->file.path, postfix, samples); + data->path, postfix, samples); } out_delete_session: @@ -1290,6 +1546,13 @@ static int perf_record_config(const char *var, const char *value, void *cb) var = "call-graph.record-mode"; return perf_default_config(var, value, cb); } +#ifdef HAVE_AIO_SUPPORT + if (!strcmp(var, "record.aio")) { + rec->opts.nr_cblocks = strtol(value, NULL, 0); + if (!rec->opts.nr_cblocks) + rec->opts.nr_cblocks = nr_cblocks_default; + } +#endif return 0; } @@ -1337,6 +1600,19 @@ static const struct clockid_map clockids[] = { CLOCKID_END, }; +static int get_clockid_res(clockid_t clk_id, u64 *res_ns) +{ + struct timespec res; + + *res_ns = 0; + if (!clock_getres(clk_id, &res)) + *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; + else + pr_warning("WARNING: Failed to determine specified clock resolution.\n"); + + return 0; +} + static int parse_clockid(const struct option *opt, const char *str, int unset) { struct record_opts *opts = (struct record_opts *)opt->value; @@ -1360,7 +1636,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) /* if its a number, we're done */ if (sscanf(str, "%d", &opts->clockid) == 1) - return 0; + return get_clockid_res(opts->clockid, &opts->clockid_res_ns); /* allow a "CLOCK_" prefix to the name */ if (!strncasecmp(str, "CLOCK_", 6)) @@ -1369,7 +1645,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) for (cm = clockids; cm->name; cm++) { if (!strcasecmp(str, cm->name)) { opts->clockid = cm->clockid; - return 0; + return get_clockid_res(opts->clockid, + &opts->clockid_res_ns); } } @@ -1378,6 +1655,21 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) return -1; } +static int record__parse_affinity(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + + if (unset || !str) + return 0; + + if (!strcasecmp(str, "node")) + opts->affinity = PERF_AFFINITY_NODE; + else if (!strcasecmp(str, "cpu")) + opts->affinity = PERF_AFFINITY_CPU; + + return 0; +} + static int record__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused) @@ -1521,7 +1813,6 @@ static struct record record = { .uses_mmap = true, .default_per_cpu = true, }, - .proc_map_timeout = 500, }, .tool = { .sample = process_sample_event, @@ -1571,7 +1862,7 @@ static struct option __record_options[] = { OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"), OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), - OPT_STRING('o', "output", &record.data.file.path, "file", + OPT_STRING('o', "output", &record.data.path, "file", "output file name"), OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, &record.opts.no_inherit_set, @@ -1579,6 +1870,7 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"), OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), + OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"), OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"), OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", @@ -1651,7 +1943,7 @@ static struct option __record_options[] = { parse_clockid), OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, "opts", "AUX area tracing Snapshot Mode", ""), - OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, + OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, "per thread proc mmap processing timeout in ms"), OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, "Record namespaces events"), @@ -1681,6 +1973,14 @@ static struct option __record_options[] = { "signal"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), +#ifdef HAVE_AIO_SUPPORT + OPT_CALLBACK_OPTARG(0, "aio", &record.opts, + &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", + record__aio_parse), +#endif + OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", + "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", + record__parse_affinity), OPT_END() }; @@ -1715,6 +2015,9 @@ int cmd_record(int argc, const char **argv) # undef REASON #endif + CPU_ZERO(&rec->affinity_mask); + rec->opts.affinity = PERF_AFFINITY_SYS; + rec->evlist = perf_evlist__new(); if (rec->evlist == NULL) return -ENOMEM; @@ -1873,6 +2176,13 @@ int cmd_record(int argc, const char **argv) goto out; } + if (rec->opts.nr_cblocks > nr_cblocks_max) + rec->opts.nr_cblocks = nr_cblocks_max; + if (verbose > 0) + pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); + + pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); + err = __cmd_record(&record, argc, argv); out: perf_evlist__delete(rec->evlist); |