summaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-record.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r--tools/perf/builtin-record.c638
1 files changed, 512 insertions, 126 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4e2d953d4bc5..b5063d3b6fd0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -8,10 +8,7 @@
*/
#include "builtin.h"
-#include "perf.h"
-
#include "util/build-id.h"
-#include "util/util.h"
#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/config.h"
@@ -23,9 +20,12 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
+#include "util/mmap.h"
+#include "util/target.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/symbol.h"
+#include "util/record.h"
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/data.h"
@@ -39,10 +39,12 @@
#include "util/trigger.h"
#include "util/perf-hooks.h"
#include "util/cpu-set-sched.h"
+#include "util/synthetic-events.h"
#include "util/time-utils.h"
#include "util/units.h"
#include "util/bpf-event.h"
#include "asm/bug.h"
+#include "perf.h"
#include <errno.h>
#include <inttypes.h>
@@ -53,7 +55,13 @@
#include <signal.h>
#include <sys/mman.h>
#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/err.h>
+#include <linux/string.h>
#include <linux/time64.h>
+#include <linux/zalloc.h>
struct switch_output {
bool enabled;
@@ -73,7 +81,7 @@ struct record {
u64 bytes_written;
struct perf_data data;
struct auxtrace_record *itr;
- struct perf_evlist *evlist;
+ struct evlist *evlist;
struct perf_session *session;
int realtime_prio;
bool no_buildid;
@@ -86,8 +94,11 @@ struct record {
struct switch_output switch_output;
unsigned long long samples;
cpu_set_t affinity_mask;
+ unsigned long output_max_size; /* = 0: unlimited */
};
+static volatile int done;
+
static volatile int auxtrace_record__snapshot_started;
static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
static DEFINE_TRIGGER(switch_output_trigger);
@@ -115,7 +126,13 @@ static bool switch_output_time(struct record *rec)
trigger_is_ready(&switch_output_trigger);
}
-static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
+static bool record__output_max_size_exceeded(struct record *rec)
+{
+ return rec->output_max_size &&
+ (rec->bytes_written >= rec->output_max_size);
+}
+
+static int record__write(struct record *rec, struct mmap *map __maybe_unused,
void *bf, size_t size)
{
struct perf_data_file *file = &rec->session->data->file;
@@ -127,12 +144,24 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse
rec->bytes_written += size;
+ if (record__output_max_size_exceeded(rec) && !done) {
+ fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
+ " stopping session ]\n",
+ rec->bytes_written >> 10);
+ done = 1;
+ }
+
if (switch_output_size(rec))
trigger_hit(&switch_output_trigger);
return 0;
}
+static int record__aio_enabled(struct record *rec);
+static int record__comp_enabled(struct record *rec);
+static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
+ void *src, size_t src_size);
+
#ifdef HAVE_AIO_SUPPORT
static int record__aio_write(struct aiocb *cblock, int trace_fd,
void *buf, size_t size, off_t off)
@@ -159,7 +188,7 @@ static int record__aio_write(struct aiocb *cblock, int trace_fd,
return rc;
}
-static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
+static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
{
void *rem_buf;
off_t rem_off;
@@ -183,11 +212,11 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
if (rem_size == 0) {
cblock->aio_fildes = -1;
/*
- * md->refcount is incremented in perf_mmap__push() for
- * every enqueued aio write request so decrement it because
- * the request is now complete.
+ * md->refcount is incremented in record__aio_pushfn() for
+ * every aio write request started in record__aio_push() so
+ * decrement it because the request is now complete.
*/
- perf_mmap__put(md);
+ perf_mmap__put(&md->core);
rc = 1;
} else {
/*
@@ -205,7 +234,7 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
return rc;
}
-static int record__aio_sync(struct perf_mmap *md, bool sync_all)
+static int record__aio_sync(struct mmap *md, bool sync_all)
{
struct aiocb **aiocb = md->aio.aiocb;
struct aiocb *cblocks = md->aio.cblocks;
@@ -240,18 +269,89 @@ static int record__aio_sync(struct perf_mmap *md, bool sync_all)
} while (1);
}
-static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
+struct record_aio {
+ struct record *rec;
+ void *data;
+ size_t size;
+};
+
+static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
{
- struct record *rec = to;
- int ret, trace_fd = rec->session->data->file.fd;
+ struct record_aio *aio = to;
- rec->samples++;
+ /*
+ * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
+ * to release space in the kernel buffer as fast as possible, calling
+ * perf_mmap__consume() from perf_mmap__push() function.
+ *
+ * That lets the kernel to proceed with storing more profiling data into
+ * the kernel buffer earlier than other per-cpu kernel buffers are handled.
+ *
+ * Coping can be done in two steps in case the chunk of profiling data
+ * crosses the upper bound of the kernel buffer. In this case we first move
+ * part of data from map->start till the upper bound and then the reminder
+ * from the beginning of the kernel buffer till the end of the data chunk.
+ */
+
+ if (record__comp_enabled(aio->rec)) {
+ size = zstd_compress(aio->rec->session, aio->data + aio->size,
+ mmap__mmap_len(map) - aio->size,
+ buf, size);
+ } else {
+ memcpy(aio->data + aio->size, buf, size);
+ }
+
+ if (!aio->size) {
+ /*
+ * Increment map->refcount to guard map->aio.data[] buffer
+ * from premature deallocation because map object can be
+ * released earlier than aio write request started on
+ * map->aio.data[] buffer is complete.
+ *
+ * perf_mmap__put() is done at record__aio_complete()
+ * after started aio request completion or at record__aio_push()
+ * if the request failed to start.
+ */
+ perf_mmap__get(&map->core);
+ }
- ret = record__aio_write(cblock, trace_fd, bf, size, off);
+ aio->size += size;
+
+ return size;
+}
+
+static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
+{
+ int ret, idx;
+ int trace_fd = rec->session->data->file.fd;
+ struct record_aio aio = { .rec = rec, .size = 0 };
+
+ /*
+ * Call record__aio_sync() to wait till map->aio.data[] buffer
+ * becomes available after previous aio write operation.
+ */
+
+ idx = record__aio_sync(map, false);
+ aio.data = map->aio.data[idx];
+ ret = perf_mmap__push(map, &aio, record__aio_pushfn);
+ if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
+ return ret;
+
+ rec->samples++;
+ ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
if (!ret) {
- rec->bytes_written += size;
+ *off += aio.size;
+ rec->bytes_written += aio.size;
if (switch_output_size(rec))
trigger_hit(&switch_output_trigger);
+ } else {
+ /*
+ * Decrement map->refcount incremented in record__aio_pushfn()
+ * back if record__aio_write() operation failed to start, otherwise
+ * map->refcount is decremented in record__aio_complete() after
+ * aio write operation finishes successfully.
+ */
+ perf_mmap__put(&map->core);
}
return ret;
@@ -270,16 +370,16 @@ static void record__aio_set_pos(int trace_fd, off_t pos)
static void record__aio_mmap_read_sync(struct record *rec)
{
int i;
- struct perf_evlist *evlist = rec->evlist;
- struct perf_mmap *maps = evlist->mmap;
+ struct evlist *evlist = rec->evlist;
+ struct mmap *maps = evlist->mmap;
- if (!rec->opts.nr_cblocks)
+ if (!record__aio_enabled(rec))
return;
- for (i = 0; i < evlist->nr_mmaps; i++) {
- struct perf_mmap *map = &maps[i];
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
+ struct mmap *map = &maps[i];
- if (map->base)
+ if (map->core.base)
record__aio_sync(map, true);
}
}
@@ -307,13 +407,8 @@ static int record__aio_parse(const struct option *opt,
#else /* HAVE_AIO_SUPPORT */
static int nr_cblocks_max = 0;
-static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
-{
- return -1;
-}
-
-static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
- void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
+static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
+ off_t *off __maybe_unused)
{
return -1;
}
@@ -337,6 +432,67 @@ static int record__aio_enabled(struct record *rec)
return rec->opts.nr_cblocks > 0;
}
+#define MMAP_FLUSH_DEFAULT 1
+static int record__mmap_flush_parse(const struct option *opt,
+ const char *str,
+ int unset)
+{
+ int flush_max;
+ struct record_opts *opts = (struct record_opts *)opt->value;
+ static struct parse_tag tags[] = {
+ { .tag = 'B', .mult = 1 },
+ { .tag = 'K', .mult = 1 << 10 },
+ { .tag = 'M', .mult = 1 << 20 },
+ { .tag = 'G', .mult = 1 << 30 },
+ { .tag = 0 },
+ };
+
+ if (unset)
+ return 0;
+
+ if (str) {
+ opts->mmap_flush = parse_tag_value(str, tags);
+ if (opts->mmap_flush == (int)-1)
+ opts->mmap_flush = strtol(str, NULL, 0);
+ }
+
+ if (!opts->mmap_flush)
+ opts->mmap_flush = MMAP_FLUSH_DEFAULT;
+
+ flush_max = evlist__mmap_size(opts->mmap_pages);
+ flush_max /= 4;
+ if (opts->mmap_flush > flush_max)
+ opts->mmap_flush = flush_max;
+
+ return 0;
+}
+
+#ifdef HAVE_ZSTD_SUPPORT
+static unsigned int comp_level_default = 1;
+
+static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
+{
+ struct record_opts *opts = opt->value;
+
+ if (unset) {
+ opts->comp_level = 0;
+ } else {
+ if (str)
+ opts->comp_level = strtol(str, NULL, 0);
+ if (!opts->comp_level)
+ opts->comp_level = comp_level_default;
+ }
+
+ return 0;
+}
+#endif
+static unsigned int comp_level_max = 22;
+
+static int record__comp_enabled(struct record *rec)
+{
+ return rec->opts.comp_level > 0;
+}
+
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -346,15 +502,19 @@ static int process_synthesized_event(struct perf_tool *tool,
return record__write(rec, NULL, event, event->header.size);
}
-static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
+static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
{
struct record *rec = to;
+ if (record__comp_enabled(rec)) {
+ size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
+ bf = map->data;
+ }
+
rec->samples++;
return record__write(rec, map, bf, size);
}
-static volatile int done;
static volatile int signr = -1;
static volatile int child_finished;
@@ -386,7 +546,7 @@ static void record__sig_exit(void)
#ifdef HAVE_AUXTRACE_SUPPORT
static int record__process_auxtrace(struct perf_tool *tool,
- struct perf_mmap *map,
+ struct mmap *map,
union perf_event *event, void *data1,
size_t len1, void *data2, size_t len2)
{
@@ -395,7 +555,7 @@ static int record__process_auxtrace(struct perf_tool *tool,
size_t padding;
u8 pad[8] = {0};
- if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
+ if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
off_t file_offset;
int fd = perf_data__fd(data);
int err;
@@ -424,7 +584,7 @@ static int record__process_auxtrace(struct perf_tool *tool,
}
static int record__auxtrace_mmap_read(struct record *rec,
- struct perf_mmap *map)
+ struct mmap *map)
{
int ret;
@@ -440,7 +600,7 @@ static int record__auxtrace_mmap_read(struct record *rec,
}
static int record__auxtrace_mmap_read_snapshot(struct record *rec,
- struct perf_mmap *map)
+ struct mmap *map)
{
int ret;
@@ -461,8 +621,8 @@ static int record__auxtrace_read_snapshot_all(struct record *rec)
int i;
int rc = 0;
- for (i = 0; i < rec->evlist->nr_mmaps; i++) {
- struct perf_mmap *map = &rec->evlist->mmap[i];
+ for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
+ struct mmap *map = &rec->evlist->mmap[i];
if (!map->auxtrace_mmap.base)
continue;
@@ -476,19 +636,35 @@ out:
return rc;
}
-static void record__read_auxtrace_snapshot(struct record *rec)
+static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
{
pr_debug("Recording AUX area tracing snapshot\n");
if (record__auxtrace_read_snapshot_all(rec) < 0) {
trigger_error(&auxtrace_snapshot_trigger);
} else {
- if (auxtrace_record__snapshot_finish(rec->itr))
+ if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
trigger_error(&auxtrace_snapshot_trigger);
else
trigger_ready(&auxtrace_snapshot_trigger);
}
}
+static int record__auxtrace_snapshot_exit(struct record *rec)
+{
+ if (trigger_is_error(&auxtrace_snapshot_trigger))
+ return 0;
+
+ if (!auxtrace_record__snapshot_started &&
+ auxtrace_record__snapshot_start(rec->itr))
+ return -1;
+
+ record__read_auxtrace_snapshot(rec, true);
+ if (trigger_is_error(&auxtrace_snapshot_trigger))
+ return -1;
+
+ return 0;
+}
+
static int record__auxtrace_init(struct record *rec)
{
int err;
@@ -504,6 +680,11 @@ static int record__auxtrace_init(struct record *rec)
if (err)
return err;
+ err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
+ rec->opts.auxtrace_sample_opts);
+ if (err)
+ return err;
+
return auxtrace_parse_filters(rec->evlist);
}
@@ -511,13 +692,14 @@ static int record__auxtrace_init(struct record *rec)
static inline
int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
- struct perf_mmap *map __maybe_unused)
+ struct mmap *map __maybe_unused)
{
return 0;
}
static inline
-void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
+void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
+ bool on_exit __maybe_unused)
{
}
@@ -527,6 +709,12 @@ int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
return 0;
}
+static inline
+int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
+{
+ return 0;
+}
+
static int record__auxtrace_init(struct record *rec __maybe_unused)
{
return 0;
@@ -534,19 +722,53 @@ static int record__auxtrace_init(struct record *rec __maybe_unused)
#endif
+static bool record__kcore_readable(struct machine *machine)
+{
+ char kcore[PATH_MAX];
+ int fd;
+
+ scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
+
+ fd = open(kcore, O_RDONLY);
+ if (fd < 0)
+ return false;
+
+ close(fd);
+
+ return true;
+}
+
+static int record__kcore_copy(struct machine *machine, struct perf_data *data)
+{
+ char from_dir[PATH_MAX];
+ char kcore_dir[PATH_MAX];
+ int ret;
+
+ snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
+
+ ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
+ if (ret)
+ return ret;
+
+ return kcore_copy(from_dir, kcore_dir);
+}
+
static int record__mmap_evlist(struct record *rec,
- struct perf_evlist *evlist)
+ struct evlist *evlist)
{
struct record_opts *opts = &rec->opts;
+ bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
+ opts->auxtrace_sample_mode;
char msg[512];
if (opts->affinity != PERF_AFFINITY_SYS)
cpu__setup_cpunode_map();
- if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
+ if (evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
- opts->auxtrace_snapshot_mode,
- opts->nr_cblocks, opts->affinity) < 0) {
+ auxtrace_overwrite,
+ opts->nr_cblocks, opts->affinity,
+ opts->mmap_flush, opts->comp_level) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
@@ -575,8 +797,8 @@ static int record__mmap(struct record *rec)
static int record__open(struct record *rec)
{
char msg[BUFSIZ];
- struct perf_evsel *pos;
- struct perf_evlist *evlist = rec->evlist;
+ struct evsel *pos;
+ struct evlist *evlist = rec->evlist;
struct perf_session *session = rec->session;
struct record_opts *opts = &rec->opts;
int rc = 0;
@@ -590,18 +812,18 @@ static int record__open(struct record *rec)
if (perf_evlist__add_dummy(evlist))
return -ENOMEM;
- pos = perf_evlist__first(evlist);
+ pos = evlist__first(evlist);
pos->tracking = 0;
- pos = perf_evlist__last(evlist);
+ pos = evlist__last(evlist);
pos->tracking = 1;
- pos->attr.enable_on_exec = 1;
+ pos->core.attr.enable_on_exec = 1;
}
perf_evlist__config(evlist, opts, &callchain_param);
evlist__for_each_entry(evlist, pos) {
try_again:
- if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
+ if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
@@ -623,6 +845,17 @@ try_again:
pos->supported = true;
}
+ if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
+ pr_warning(
+"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
+"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
+"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
+"file is not found in the buildid cache or in the vmlinux path.\n\n"
+"Samples in kernel modules won't be resolved at all.\n\n"
+"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
+"even with a suitable vmlinux or kallsyms file.\n\n");
+ }
+
if (perf_evlist__apply_filters(evlist, &pos)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
pos->filter, perf_evsel__name(pos), errno,
@@ -644,7 +877,7 @@ out:
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
- struct perf_evsel *evsel,
+ struct evsel *evsel,
struct machine *machine)
{
struct record *rec = container_of(tool, struct record, tool);
@@ -725,7 +958,7 @@ static struct perf_event_header finished_round_event = {
.type = PERF_RECORD_FINISHED_ROUND,
};
-static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
+static void record__adjust_affinity(struct record *rec, struct mmap *map)
{
if (rec->opts.affinity != PERF_AFFINITY_SYS &&
!CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
@@ -735,15 +968,46 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
}
}
-static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
- bool overwrite)
+static size_t process_comp_header(void *record, size_t increment)
+{
+ struct perf_record_compressed *event = record;
+ size_t size = sizeof(*event);
+
+ if (increment) {
+ event->header.size += increment;
+ return increment;
+ }
+
+ event->header.type = PERF_RECORD_COMPRESSED;
+ event->header.size = size;
+
+ return size;
+}
+
+static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
+ void *src, size_t src_size)
+{
+ size_t compressed;
+ size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
+
+ compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
+ max_record_size, process_comp_header);
+
+ session->bytes_transferred += src_size;
+ session->bytes_compressed += compressed;
+
+ return compressed;
+}
+
+static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
+ bool overwrite, bool synch)
{
u64 bytes_written = rec->bytes_written;
int i;
int rc = 0;
- struct perf_mmap *maps;
+ struct mmap *maps;
int trace_fd = rec->data.file.fd;
- off_t off;
+ off_t off = 0;
if (!evlist)
return 0;
@@ -758,32 +1022,38 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (record__aio_enabled(rec))
off = record__aio_get_pos(trace_fd);
- for (i = 0; i < evlist->nr_mmaps; i++) {
- struct perf_mmap *map = &maps[i];
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
+ u64 flush = 0;
+ struct mmap *map = &maps[i];
- if (map->base) {
+ if (map->core.base) {
record__adjust_affinity(rec, map);
+ if (synch) {
+ flush = map->core.flush;
+ map->core.flush = 1;
+ }
if (!record__aio_enabled(rec)) {
- if (perf_mmap__push(map, rec, record__pushfn) != 0) {
+ if (perf_mmap__push(map, rec, record__pushfn) < 0) {
+ if (synch)
+ map->core.flush = flush;
rc = -1;
goto out;
}
} else {
- int idx;
- /*
- * Call record__aio_sync() to wait till map->data buffer
- * becomes available after previous aio write request.
- */
- idx = record__aio_sync(map, false);
- if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
+ if (record__aio_push(rec, map, &off) < 0) {
record__aio_set_pos(trace_fd, off);
+ if (synch)
+ map->core.flush = flush;
rc = -1;
goto out;
}
}
+ if (synch)
+ map->core.flush = flush;
}
if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
+ !rec->opts.auxtrace_sample_mode &&
record__auxtrace_mmap_read(rec, map) != 0) {
rc = -1;
goto out;
@@ -806,15 +1076,15 @@ out:
return rc;
}
-static int record__mmap_read_all(struct record *rec)
+static int record__mmap_read_all(struct record *rec, bool synch)
{
int err;
- err = record__mmap_read_evlist(rec, rec->evlist, false);
+ err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
if (err)
return err;
- return record__mmap_read_evlist(rec, rec->evlist, true);
+ return record__mmap_read_evlist(rec, rec->evlist, true, synch);
}
static void record__init_features(struct record *rec)
@@ -828,7 +1098,7 @@ static void record__init_features(struct record *rec)
if (rec->no_buildid)
perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
- if (!have_tracepoints(&rec->evlist->entries))
+ if (!have_tracepoints(&rec->evlist->core.entries))
perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
if (!rec->opts.branch_stack)
@@ -841,6 +1111,8 @@ static void record__init_features(struct record *rec)
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
+ if (!record__comp_enabled(rec))
+ perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
perf_header__clear_feat(&session->header, HEADER_STAT);
}
@@ -871,7 +1143,7 @@ record__finish_output(struct record *rec)
static int record__synthesize_workload(struct record *rec, bool tail)
{
int err;
- struct thread_map *thread_map;
+ struct perf_thread_map *thread_map;
if (rec->opts.tail_synthesize != tail)
return 0;
@@ -884,7 +1156,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
process_synthesized_event,
&rec->session->machines.host,
rec->opts.sample_address);
- thread_map__put(thread_map);
+ perf_thread_map__put(thread_map);
return err;
}
@@ -934,7 +1206,7 @@ record__switch_output(struct record *rec, bool at_exit)
rec->switch_output.cur_file = n;
if (rec->switch_output.filenames[n]) {
remove(rec->switch_output.filenames[n]);
- free(rec->switch_output.filenames[n]);
+ zfree(&rec->switch_output.filenames[n]);
}
rec->switch_output.filenames[n] = new_filename;
} else {
@@ -979,23 +1251,14 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
static void snapshot_sig_handler(int sig);
static void alarm_sig_handler(int sig);
-int __weak
-perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
- struct perf_tool *tool __maybe_unused,
- perf_event__handler_t process __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- return 0;
-}
-
static const struct perf_event_mmap_page *
-perf_evlist__pick_pc(struct perf_evlist *evlist)
+perf_evlist__pick_pc(struct evlist *evlist)
{
if (evlist) {
- if (evlist->mmap && evlist->mmap[0].base)
- return evlist->mmap[0].base;
- if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
- return evlist->overwrite_mmap[0].base;
+ if (evlist->mmap && evlist->mmap[0].core.base)
+ return evlist->mmap[0].core.base;
+ if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
+ return evlist->overwrite_mmap[0].core.base;
}
return NULL;
}
@@ -1042,7 +1305,7 @@ static int record__synthesize(struct record *rec, bool tail)
return err;
}
- if (have_tracepoints(&rec->evlist->entries)) {
+ if (have_tracepoints(&rec->evlist->core.entries)) {
/*
* FIXME err <= 0 here actually means that
* there were no tracepoints so its not really
@@ -1066,6 +1329,15 @@ static int record__synthesize(struct record *rec, bool tail)
if (err)
goto out;
+ /* Synthesize id_index before auxtrace_info */
+ if (rec->opts.auxtrace_sample_mode) {
+ err = perf_event__synthesize_id_index(tool,
+ process_synthesized_event,
+ session->evlist, machine);
+ if (err)
+ goto out;
+ }
+
if (rec->opts.full_auxtrace) {
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
session, process_synthesized_event);
@@ -1099,7 +1371,7 @@ static int record__synthesize(struct record *rec, bool tail)
if (err)
goto out;
- err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
+ err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
process_synthesized_event,
NULL);
if (err < 0) {
@@ -1107,7 +1379,7 @@ static int record__synthesize(struct record *rec, bool tail)
return err;
}
- err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
+ err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
process_synthesized_event, NULL);
if (err < 0) {
pr_err("Couldn't synthesize cpu map.\n");
@@ -1119,7 +1391,7 @@ static int record__synthesize(struct record *rec, bool tail)
if (err < 0)
pr_warning("Couldn't synthesize bpf events.\n");
- err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
+ err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
process_synthesized_event, opts->sample_address,
1);
out:
@@ -1137,8 +1409,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
struct perf_data *data = &rec->data;
struct perf_session *session;
bool disabled = false, draining = false;
- struct perf_evlist *sb_evlist = NULL;
+ struct evlist *sb_evlist = NULL;
int fd;
+ float ratio = 0;
atexit(record__sig_exit);
signal(SIGCHLD, sig_handler);
@@ -1160,14 +1433,28 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
}
session = perf_session__new(data, false, tool);
- if (session == NULL) {
+ if (IS_ERR(session)) {
pr_err("Perf session creation failed.\n");
- return -1;
+ return PTR_ERR(session);
}
fd = perf_data__fd(data);
rec->session = session;
+ if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
+ pr_err("Compression initialization failed.\n");
+ return -1;
+ }
+
+ session->header.env.comp_type = PERF_COMP_ZSTD;
+ session->header.env.comp_level = rec->opts.comp_level;
+
+ if (rec->opts.kcore &&
+ !record__kcore_readable(&session->machines.host)) {
+ pr_err("ERROR: kcore is not readable.\n");
+ return -1;
+ }
+
record__init_features(rec);
if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
@@ -1190,13 +1477,22 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* because we synthesize event name through the pipe
* and need the id for that.
*/
- if (data->is_pipe && rec->evlist->nr_entries == 1)
+ if (data->is_pipe && rec->evlist->core.nr_entries == 1)
rec->opts.sample_id = true;
if (record__open(rec) != 0) {
err = -1;
goto out_child;
}
+ session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
+
+ if (rec->opts.kcore) {
+ err = record__kcore_copy(&session->machines.host, data);
+ if (err) {
+ pr_err("ERROR: Failed to copy kcore\n");
+ goto out_child;
+ }
+ }
err = bpf__apply_obj_config();
if (err) {
@@ -1267,7 +1563,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* so don't spoil it by prematurely enabling them.
*/
if (!target__none(&opts->target) && !opts->initial_delay)
- perf_evlist__enable(rec->evlist);
+ evlist__enable(rec->evlist);
/*
* Let the child rip
@@ -1320,7 +1616,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (opts->initial_delay) {
usleep(opts->initial_delay * USEC_PER_MSEC);
- perf_evlist__enable(rec->evlist);
+ evlist__enable(rec->evlist);
}
trigger_ready(&auxtrace_snapshot_trigger);
@@ -1340,7 +1636,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (trigger_is_hit(&switch_output_trigger) || done || draining)
perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
- if (record__mmap_read_all(rec) < 0) {
+ if (record__mmap_read_all(rec, false) < 0) {
trigger_error(&auxtrace_snapshot_trigger);
trigger_error(&switch_output_trigger);
err = -1;
@@ -1350,7 +1646,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (auxtrace_record__snapshot_started) {
auxtrace_record__snapshot_started = 0;
if (!trigger_is_error(&auxtrace_snapshot_trigger))
- record__read_auxtrace_snapshot(rec);
+ record__read_auxtrace_snapshot(rec, false);
if (trigger_is_error(&auxtrace_snapshot_trigger)) {
pr_err("AUX area tracing snapshot failed\n");
err = -1;
@@ -1399,7 +1695,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (hits == rec->samples) {
if (done || draining)
break;
- err = perf_evlist__poll(rec->evlist, -1);
+ err = evlist__poll(rec->evlist, -1);
/*
* Propagate error, only if there's any. Ignore positive
* number of returned events and interrupt error.
@@ -1408,7 +1704,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
err = 0;
waking++;
- if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
+ if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
draining = true;
}
@@ -1419,13 +1715,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
*/
if (done && !disabled && !target__none(&opts->target)) {
trigger_off(&auxtrace_snapshot_trigger);
- perf_evlist__disable(rec->evlist);
+ evlist__disable(rec->evlist);
disabled = true;
}
}
+
trigger_off(&auxtrace_snapshot_trigger);
trigger_off(&switch_output_trigger);
+ if (opts->auxtrace_snapshot_on_exit)
+ record__auxtrace_snapshot_exit(rec);
+
if (forks && workload_exec_errno) {
char msg[STRERR_BUFSIZE];
const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
@@ -1441,8 +1741,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__synthesize_workload(rec, true);
out_child:
+ record__mmap_read_all(rec, true);
record__aio_mmap_read_sync(rec);
+ if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
+ ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
+ session->header.env.comp_ratio = ratio + 0.5;
+ }
+
if (forks) {
int exit_status;
@@ -1489,12 +1795,19 @@ out_child:
else
samples[0] = '\0';
- fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
+ fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
perf_data__size(data) / 1024.0 / 1024.0,
data->path, postfix, samples);
+ if (ratio) {
+ fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
+ rec->session->bytes_transferred / 1024.0 / 1024.0,
+ ratio);
+ }
+ fprintf(stderr, " ]\n");
}
out_delete_session:
+ zstd_fini(&session->zstd_data);
perf_session__delete(session);
if (!opts->no_bpf_event)
@@ -1703,6 +2016,33 @@ static int record__parse_affinity(const struct option *opt, const char *str, int
return 0;
}
+static int parse_output_max_size(const struct option *opt,
+ const char *str, int unset)
+{
+ unsigned long *s = (unsigned long *)opt->value;
+ static struct parse_tag tags_size[] = {
+ { .tag = 'B', .mult = 1 },
+ { .tag = 'K', .mult = 1 << 10 },
+ { .tag = 'M', .mult = 1 << 20 },
+ { .tag = 'G', .mult = 1 << 30 },
+ { .tag = 0 },
+ };
+ unsigned long val;
+
+ if (unset) {
+ *s = 0;
+ return 0;
+ }
+
+ val = parse_tag_value(str, tags_size);
+ if (val != (unsigned long) -1) {
+ *s = val;
+ return 0;
+ }
+
+ return -1;
+}
+
static int record__parse_mmap_pages(const struct option *opt,
const char *str,
int unset __maybe_unused)
@@ -1748,7 +2088,7 @@ out_free:
static void switch_output_size_warn(struct record *rec)
{
- u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
+ u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
struct switch_output *s = &rec->switch_output;
wakeup_size /= 2;
@@ -1825,6 +2165,31 @@ static const char * const __record_usage[] = {
};
const char * const *record_usage = __record_usage;
+static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine)
+{
+ /*
+ * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
+ * no need to add them twice.
+ */
+ if (!(event->header.misc & PERF_RECORD_MISC_USER))
+ return 0;
+ return perf_event__process_mmap(tool, event, sample, machine);
+}
+
+static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine)
+{
+ /*
+ * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
+ * no need to add them twice.
+ */
+ if (!(event->header.misc & PERF_RECORD_MISC_USER))
+ return 0;
+
+ return perf_event__process_mmap2(tool, event, sample, machine);
+}
+
/*
* XXX Ideally would be local to cmd_record() and passed to a record__new
* because we need to have access to it in record__exit, that is called
@@ -1846,6 +2211,7 @@ static struct record record = {
.uses_mmap = true,
.default_per_cpu = true,
},
+ .mmap_flush = MMAP_FLUSH_DEFAULT,
},
.tool = {
.sample = process_sample_event,
@@ -1853,8 +2219,8 @@ static struct record record = {
.exit = perf_event__process_exit,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
- .mmap = perf_event__process_mmap,
- .mmap2 = perf_event__process_mmap2,
+ .mmap = build_id__process_mmap,
+ .mmap2 = build_id__process_mmap2,
.ordered_events = true,
},
};
@@ -1912,6 +2278,9 @@ static struct option __record_options[] = {
OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
"number of mmap data pages and AUX area tracing mmap pages",
record__parse_mmap_pages),
+ OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
+ "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
+ record__mmap_flush_parse),
OPT_BOOLEAN(0, "group", &record.opts.group,
"put the counters into a counter group"),
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
@@ -1947,6 +2316,7 @@ static struct option __record_options[] = {
parse_cgroups),
OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
"ms to wait before starting measurement after program start"),
+ OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
"user to profile"),
@@ -1965,10 +2335,10 @@ static struct option __record_options[] = {
"use per-thread mmaps"),
OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
"sample selected machine registers on interrupt,"
- " use -I ? to list register names", parse_regs),
+ " use '-I?' to list register names", parse_intr_regs),
OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
"sample selected machine registers on interrupt,"
- " use -I ? to list register names", parse_regs),
+ " use '--user-regs=?' to list register names", parse_user_regs),
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
"Record running/enabled time of read (:S) events"),
OPT_CALLBACK('k', "clockid", &record.opts,
@@ -1976,6 +2346,8 @@ static struct option __record_options[] = {
parse_clockid),
OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
"opts", "AUX area tracing Snapshot Mode", ""),
+ OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
+ "opts", "sample AUX area", ""),
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
@@ -1988,6 +2360,10 @@ static struct option __record_options[] = {
OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
"Configure all used events to run in user space.",
PARSE_OPT_EXCLUSIVE),
+ OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
+ "collect kernel callchains"),
+ OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
+ "collect user callchains"),
OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
"clang binary to use for compiling BPF scriptlets"),
OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
@@ -2016,6 +2392,13 @@ static struct option __record_options[] = {
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
record__parse_affinity),
+#ifdef HAVE_ZSTD_SUPPORT
+ OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
+ "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
+ record__parse_comp_level),
+#endif
+ OPT_CALLBACK(0, "max-size", &record.output_max_size,
+ "size", "Limit the maximum size of the output file", parse_output_max_size),
OPT_END()
};
@@ -2053,7 +2436,7 @@ int cmd_record(int argc, const char **argv)
CPU_ZERO(&rec->affinity_mask);
rec->opts.affinity = PERF_AFFINITY_SYS;
- rec->evlist = perf_evlist__new();
+ rec->evlist = evlist__new();
if (rec->evlist == NULL)
return -ENOMEM;
@@ -2075,6 +2458,15 @@ int cmd_record(int argc, const char **argv)
"cgroup monitoring only available in system-wide mode");
}
+
+ if (rec->opts.kcore)
+ rec->data.is_dir = true;
+
+ if (rec->opts.comp_level != 0) {
+ pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
+ rec->no_buildid = true;
+ }
+
if (rec->opts.record_switch_events &&
!perf_can_record_switch_events()) {
ui__error("kernel does not support recording context switch events\n");
@@ -2124,16 +2516,6 @@ int cmd_record(int argc, const char **argv)
err = -ENOMEM;
- if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
- pr_warning(
-"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
-"check /proc/sys/kernel/kptr_restrict.\n\n"
-"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
-"file is not found in the buildid cache or in the vmlinux path.\n\n"
-"Samples in kernel modules won't be resolved at all.\n\n"
-"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
-"even with a suitable vmlinux or kallsyms file.\n\n");
-
if (rec->no_buildid_cache || rec->no_buildid) {
disable_buildid_cache();
} else if (rec->switch_output.enabled) {
@@ -2168,7 +2550,7 @@ int cmd_record(int argc, const char **argv)
if (record.opts.overwrite)
record.opts.tail_synthesize = true;
- if (rec->evlist->nr_entries == 0 &&
+ if (rec->evlist->core.nr_entries == 0 &&
__perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out;
@@ -2220,14 +2602,18 @@ int cmd_record(int argc, const char **argv)
if (rec->opts.nr_cblocks > nr_cblocks_max)
rec->opts.nr_cblocks = nr_cblocks_max;
- if (verbose > 0)
- pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
+ pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
+ pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
+
+ if (rec->opts.comp_level > comp_level_max)
+ rec->opts.comp_level = comp_level_max;
+ pr_debug("comp level: %d\n", rec->opts.comp_level);
err = __cmd_record(&record, argc, argv);
out:
- perf_evlist__delete(rec->evlist);
+ evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
return err;
OpenPOWER on IntegriCloud