diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 368 |
1 files changed, 270 insertions, 98 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 49ee3c2033ec..0a15253b438c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * builtin-stat.c * @@ -37,14 +38,11 @@ * Mike Galbraith <efault@gmx.de> * Paul Mackerras <paulus@samba.org> * Jaswinder Singh Rajput <jaswinder@kernel.org> - * - * Released under the GPL v2. (and only v2, not any later version) */ -#include "perf.h" #include "builtin.h" +#include "perf.h" #include "util/cgroup.h" -#include "util/util.h" #include <subcmd/parse-options.h> #include "util/parse-events.h" #include "util/pmu.h" @@ -56,7 +54,6 @@ #include "util/stat.h" #include "util/header.h" #include "util/cpumap.h" -#include "util/thread.h" #include "util/thread_map.h" #include "util/counts.h" #include "util/group.h" @@ -64,10 +61,14 @@ #include "util/tool.h" #include "util/string2.h" #include "util/metricgroup.h" +#include "util/synthetic-events.h" +#include "util/target.h" +#include "util/time-utils.h" #include "util/top.h" #include "asm/bug.h" #include <linux/time64.h> +#include <linux/zalloc.h> #include <api/fs/fs.h> #include <errno.h> #include <signal.h> @@ -82,8 +83,10 @@ #include <unistd.h> #include <sys/time.h> #include <sys/resource.h> +#include <linux/err.h> -#include "sane_ctype.h" +#include <linux/ctype.h> +#include <perf/evlist.h> #define DEFAULT_SEPARATOR " " #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" @@ -131,7 +134,7 @@ static const char *smi_cost_attrs = { "}" }; -static struct perf_evlist *evsel_list; +static struct evlist *evsel_list; static struct target target = { .uid = UINT_MAX, @@ -165,8 +168,8 @@ struct perf_stat { u64 bytes_written; struct perf_tool tool; bool maps_allocated; - struct cpu_map *cpus; - struct thread_map *threads; + struct perf_cpu_map *cpus; + struct perf_thread_map *threads; enum aggr_mode aggr_mode; }; @@ -232,10 +235,10 @@ static int write_stat_round_event(u64 tm, u64 type) #define WRITE_STAT_ROUND_EVENT(time, interval) \ write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) -#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) +#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) static int -perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, +perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, struct perf_counts_values *count) { struct perf_sample_id *sid = SID(counter, cpu, thread); @@ -244,13 +247,27 @@ perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, process_synthesized_event, NULL); } +static int read_single_counter(struct evsel *counter, int cpu, + int thread, struct timespec *rs) +{ + if (counter->tool_event == PERF_TOOL_DURATION_TIME) { + u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; + struct perf_counts_values *count = + perf_counts(counter->counts, cpu, thread); + count->ena = count->run = val; + count->val = val; + return 0; + } + return perf_evsel__read_counter(counter, cpu, thread); +} + /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter(struct perf_evsel *counter) +static int read_counter(struct evsel *counter, struct timespec *rs) { - int nthreads = thread_map__nr(evsel_list->threads); + int nthreads = perf_thread_map__nr(evsel_list->core.threads); int ncpus, cpu, thread; if (target__has_cpu(&target) && !target__has_per_thread(&target)) @@ -261,7 +278,7 @@ static int read_counter(struct perf_evsel *counter) if (!counter->supported) return -ENOENT; - if (counter->system_wide) + if (counter->core.system_wide) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { @@ -274,15 +291,15 @@ static int read_counter(struct perf_evsel *counter) * The leader's group read loads data into its group members * (via perf_evsel__read_counter) and sets threir count->loaded. */ - if (!count->loaded && - perf_evsel__read_counter(counter, cpu, thread)) { + if (!perf_counts__is_loaded(counter->counts, cpu, thread) && + read_single_counter(counter, cpu, thread, rs)) { counter->counts->scaled = -1; perf_counts(counter->counts, cpu, thread)->ena = 0; perf_counts(counter->counts, cpu, thread)->run = 0; return -1; } - count->loaded = false; + perf_counts__set_loaded(counter->counts, cpu, thread, false); if (STAT_RECORD) { if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { @@ -304,13 +321,13 @@ static int read_counter(struct perf_evsel *counter) return 0; } -static void read_counters(void) +static void read_counters(struct timespec *rs) { - struct perf_evsel *counter; + struct evsel *counter; int ret; evlist__for_each_entry(evsel_list, counter) { - ret = read_counter(counter); + ret = read_counter(counter, rs); if (ret) pr_debug("failed to read counter %s\n", counter->name); @@ -323,11 +340,11 @@ static void process_interval(void) { struct timespec ts, rs; - read_counters(); - clock_gettime(CLOCK_MONOTONIC, &ts); diff_timespec(&rs, &ts, &ref_time); + read_counters(&rs); + if (STAT_RECORD) { if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) pr_err("failed to write stat round event\n"); @@ -349,7 +366,7 @@ static void enable_counters(void) * - we have initial delay configured */ if (!target__none(&target) || stat_config.initial_delay) - perf_evlist__enable(evsel_list); + evlist__enable(evsel_list); } static void disable_counters(void) @@ -360,7 +377,7 @@ static void disable_counters(void) * from counting before reading their constituent counters. */ if (!target__none(&target)) - perf_evlist__disable(evsel_list); + evlist__disable(evsel_list); } static volatile int workload_exec_errno; @@ -376,13 +393,13 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static bool perf_evsel__should_store_id(struct perf_evsel *counter) +static bool perf_evsel__should_store_id(struct evsel *counter) { - return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; + return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID; } static bool is_target_alive(struct target *_target, - struct thread_map *threads) + struct perf_thread_map *threads) { struct stat st; int i; @@ -410,7 +427,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int timeout = stat_config.timeout; char msg[BUFSIZ]; unsigned long long t0, t1; - struct perf_evsel *counter; + struct evsel *counter; struct timespec ts; size_t l; int status = 0; @@ -465,22 +482,22 @@ try_again: counter->supported = false; if ((counter->leader != counter) || - !(counter->leader->nr_members > 1)) + !(counter->leader->core.nr_members > 1)) continue; } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; } else if (target__has_per_thread(&target) && - evsel_list->threads && - evsel_list->threads->err_thread != -1) { + evsel_list->core.threads && + evsel_list->core.threads->err_thread != -1) { /* * For global --per-thread case, skip current * error thread. */ - if (!thread_map__remove(evsel_list->threads, - evsel_list->threads->err_thread)) { - evsel_list->threads->err_thread = -1; + if (!thread_map__remove(evsel_list->core.threads, + evsel_list->core.threads->err_thread)) { + evsel_list->core.threads->err_thread = -1; goto try_again; } } @@ -525,8 +542,8 @@ try_again: if (err < 0) return err; - err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list, - process_synthesized_event, is_pipe); + err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list, + process_synthesized_event, is_pipe); if (err < 0) return err; } @@ -566,7 +583,7 @@ try_again: enable_counters(); while (!done) { nanosleep(&ts, NULL); - if (!is_target_alive(&target, evsel_list->threads)) + if (!is_target_alive(&target, evsel_list->core.threads)) break; if (timeout) break; @@ -593,8 +610,14 @@ try_again: * avoid arbitrary skew, we must read all counters before closing any * group leaders. */ - read_counters(); - perf_evlist__close(evsel_list); + read_counters(&(struct timespec) { .tv_nsec = t1-t0 }); + + /* + * We need to keep evsel_list alive, because it's processed + * later the evsel_list will be closed after. + */ + if (!STAT_RECORD) + evlist__close(evsel_list); return WEXITSTATUS(status); } @@ -763,10 +786,14 @@ static struct option stat_options[] = { "stop workload and print counts after a timeout period in ms (>= 10ms)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), + OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode, + "aggregate counts per processor die", AGGR_DIE), OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, "aggregate counts per thread", AGGR_THREAD), + OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode, + "aggregate counts per numa node", AGGR_NODE), OPT_UINTEGER('D', "delay", &stat_config.initial_delay, "ms to wait before starting measurement after program start"), OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, @@ -778,35 +805,41 @@ static struct option stat_options[] = { OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", "monitor specified metrics or metric groups (separated by ,)", parse_metric_groups), + OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel, + "Configure all used events to run in kernel space.", + PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user, + "Configure all used events to run in user space.", + PARSE_OPT_EXCLUSIVE), OPT_END() }; static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, - struct cpu_map *map, int cpu) + struct perf_cpu_map *map, int cpu) { return cpu_map__get_socket(map, cpu, NULL); } +static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, + struct perf_cpu_map *map, int cpu) +{ + return cpu_map__get_die(map, cpu, NULL); +} + static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, - struct cpu_map *map, int cpu) + struct perf_cpu_map *map, int cpu) { return cpu_map__get_core(map, cpu, NULL); } -static int cpu_map__get_max(struct cpu_map *map) +static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused, + struct perf_cpu_map *map, int cpu) { - int i, max = -1; - - for (i = 0; i < map->nr; i++) { - if (map->map[i] > max) - max = map->map[i]; - } - - return max; + return cpu_map__get_node(map, cpu, NULL); } static int perf_stat__get_aggr(struct perf_stat_config *config, - aggr_get_id_t get_id, struct cpu_map *map, int idx) + aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) { int cpu; @@ -822,37 +855,84 @@ static int perf_stat__get_aggr(struct perf_stat_config *config, } static int perf_stat__get_socket_cached(struct perf_stat_config *config, - struct cpu_map *map, int idx) + struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } +static int perf_stat__get_die_cached(struct perf_stat_config *config, + struct perf_cpu_map *map, int idx) +{ + return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); +} + static int perf_stat__get_core_cached(struct perf_stat_config *config, - struct cpu_map *map, int idx) + struct perf_cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); } +static int perf_stat__get_node_cached(struct perf_stat_config *config, + struct perf_cpu_map *map, int idx) +{ + return perf_stat__get_aggr(config, perf_stat__get_node, map, idx); +} + +static bool term_percore_set(void) +{ + struct evsel *counter; + + evlist__for_each_entry(evsel_list, counter) { + if (counter->percore) + return true; + } + + return false; +} + static int perf_stat_init_aggr_mode(void) { int nr; switch (stat_config.aggr_mode) { case AGGR_SOCKET: - if (cpu_map__build_socket_map(evsel_list->cpus, &stat_config.aggr_map)) { + if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) { perror("cannot build socket map"); return -1; } stat_config.aggr_get_id = perf_stat__get_socket_cached; break; + case AGGR_DIE: + if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_cached; + break; case AGGR_CORE: - if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { + if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) { perror("cannot build core map"); return -1; } stat_config.aggr_get_id = perf_stat__get_core_cached; break; + case AGGR_NODE: + if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build core map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_node_cached; + break; case AGGR_NONE: + if (term_percore_set()) { + if (cpu_map__build_core_map(evsel_list->core.cpus, + &stat_config.aggr_map)) { + perror("cannot build core map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_core_cached; + } + break; case AGGR_GLOBAL: case AGGR_THREAD: case AGGR_UNSET: @@ -865,20 +945,20 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - nr = cpu_map__get_max(evsel_list->cpus); - stat_config.cpus_aggr_map = cpu_map__empty_new(nr + 1); + nr = perf_cpu_map__max(evsel_list->core.cpus); + stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } static void perf_stat__exit_aggr_mode(void) { - cpu_map__put(stat_config.aggr_map); - cpu_map__put(stat_config.cpus_aggr_map); + perf_cpu_map__put(stat_config.aggr_map); + perf_cpu_map__put(stat_config.cpus_aggr_map); stat_config.aggr_map = NULL; stat_config.cpus_aggr_map = NULL; } -static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) +static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx) { int cpu; @@ -893,7 +973,7 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, i return cpu; } -static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) +static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; int cpu = perf_env__get_cpu(env, map, idx); @@ -901,69 +981,147 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) return cpu == -1 ? -1 : env->cpu[cpu].socket_id; } -static int perf_env__get_core(struct cpu_map *map, int idx, void *data) +static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) { struct perf_env *env = data; - int core = -1, cpu = perf_env__get_cpu(env, map, idx); + int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { - int socket_id = env->cpu[cpu].socket_id; + /* + * Encode socket in bit range 15:8 + * die_id is relative to socket, + * we need a global id. So we combine + * socket + die id + */ + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) + return -1; + + die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + } + + return die_id; +} + +static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) +{ + struct perf_env *env = data; + int core = -1, cpu = perf_env__get_cpu(env, map, idx); + if (cpu != -1) { /* - * Encode socket in upper 16 bits - * core_id is relative to socket, and + * Encode socket in bit range 31:24 + * encode die id in bit range 23:16 + * core_id is relative to socket and die, * we need a global id. So we combine - * socket + core id. + * socket + die id + core id */ - core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) + return -1; + + core = (env->cpu[cpu].socket_id << 24) | + (env->cpu[cpu].die_id << 16) | + (env->cpu[cpu].core_id & 0xffff); } return core; } -static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, - struct cpu_map **sockp) +static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) +{ + int cpu = perf_env__get_cpu(data, map, idx); + + return perf_env__numa_node(data, cpu); +} + +static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, + struct perf_cpu_map **sockp) { return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); } -static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, - struct cpu_map **corep) +static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, + struct perf_cpu_map **diep) +{ + return cpu_map__build_map(cpus, diep, perf_env__get_die, env); +} + +static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, + struct perf_cpu_map **corep) { return cpu_map__build_map(cpus, corep, perf_env__get_core, env); } +static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus, + struct perf_cpu_map **nodep) +{ + return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); +} + static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, - struct cpu_map *map, int idx) + struct perf_cpu_map *map, int idx) { return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } +static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu_map *map, int idx) +{ + return perf_env__get_die(map, idx, &perf_stat.session->header.env); +} static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, - struct cpu_map *map, int idx) + struct perf_cpu_map *map, int idx) { return perf_env__get_core(map, idx, &perf_stat.session->header.env); } +static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu_map *map, int idx) +{ + return perf_env__get_node(map, idx, &perf_stat.session->header.env); +} + static int perf_stat_init_aggr_mode_file(struct perf_stat *st) { struct perf_env *env = &st->session->header.env; switch (stat_config.aggr_mode) { case AGGR_SOCKET: - if (perf_env__build_socket_map(env, evsel_list->cpus, &stat_config.aggr_map)) { + if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { perror("cannot build socket map"); return -1; } stat_config.aggr_get_id = perf_stat__get_socket_file; break; + case AGGR_DIE: + if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_file; + break; case AGGR_CORE: - if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { + if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { perror("cannot build core map"); return -1; } stat_config.aggr_get_id = perf_stat__get_core_file; break; + case AGGR_NODE: + if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { + perror("cannot build core map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_node_file; + break; case AGGR_NONE: case AGGR_GLOBAL: case AGGR_THREAD: @@ -1149,6 +1307,7 @@ static int add_default_attributes(void) if (stat_config.null_run) return 0; + bzero(&errinfo, sizeof(errinfo)); if (transaction_run) { /* Handle -T as -M transaction. Once platform specific metrics * support has been added to the json files, all archictures @@ -1206,6 +1365,7 @@ static int add_default_attributes(void) return -1; } if (err) { + parse_events_print_error(&errinfo, smi_cost_attrs); fprintf(stderr, "Cannot set up SMI cost events\n"); return -1; } @@ -1242,8 +1402,8 @@ static int add_default_attributes(void) fprintf(stderr, "Cannot set up top down events %s: %d\n", str, err); - free(str); parse_events_print_error(&errinfo, str); + free(str); return -1; } } else { @@ -1253,7 +1413,7 @@ static int add_default_attributes(void) free(str); } - if (!evsel_list->nr_entries) { + if (!evsel_list->core.nr_entries) { if (target__has_cpu(&target)) default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; @@ -1308,6 +1468,7 @@ static void init_features(struct perf_session *session) for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) perf_header__set_feat(&session->header, feat); + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); perf_header__clear_feat(&session->header, HEADER_BUILD_ID); perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); @@ -1331,9 +1492,9 @@ static int __cmd_record(int argc, const char **argv) } session = perf_session__new(data, false, NULL); - if (session == NULL) { - pr_err("Perf session creation failed.\n"); - return -1; + if (IS_ERR(session)) { + pr_err("Perf session creation failed\n"); + return PTR_ERR(session); } init_features(session); @@ -1347,8 +1508,8 @@ static int __cmd_record(int argc, const char **argv) static int process_stat_round_event(struct perf_session *session, union perf_event *event) { - struct stat_round_event *stat_round = &event->stat_round; - struct perf_evsel *counter; + struct perf_record_stat_round *stat_round = &event->stat_round; + struct evsel *counter; struct timespec tsh, *ts = NULL; const char **argv = session->header.env.cmdline_argv; int argc = session->header.env.nr_cmdline; @@ -1378,7 +1539,7 @@ int process_stat_config_event(struct perf_session *session, perf_event__read_stat_config(&stat_config, &event->stat_config); - if (cpu_map__empty(st->cpus)) { + if (perf_cpu_map__empty(st->cpus)) { if (st->aggr_mode != AGGR_UNSET) pr_warning("warning: processing task data, aggregation mode not set\n"); return 0; @@ -1403,7 +1564,7 @@ static int set_maps(struct perf_stat *st) if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) return -EINVAL; - perf_evlist__set_maps(evsel_list, st->cpus, st->threads); + perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads); if (perf_evlist__alloc_stats(evsel_list, true)) return -ENOMEM; @@ -1437,7 +1598,7 @@ int process_cpu_map_event(struct perf_session *session, { struct perf_tool *tool = session->tool; struct perf_stat *st = container_of(tool, struct perf_stat, tool); - struct cpu_map *cpus; + struct perf_cpu_map *cpus; if (st->cpus) { pr_warning("Extra cpu map event, ignoring.\n"); @@ -1478,7 +1639,7 @@ static void runtime_stat_delete(struct perf_stat_config *config) for (i = 0; i < config->stats_num; i++) runtime_stat__exit(&config->stats[i]); - free(config->stats); + zfree(&config->stats); } static const char * const stat_report_usage[] = { @@ -1506,8 +1667,12 @@ static int __cmd_report(int argc, const char **argv) OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), + OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode, + "aggregate counts per processor die", AGGR_DIE), OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), + OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode, + "aggregate counts per numa node", AGGR_NODE), OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, "disable CPU count aggregation", AGGR_NONE), OPT_END() @@ -1528,8 +1693,8 @@ static int __cmd_report(int argc, const char **argv) perf_stat.data.mode = PERF_DATA_MODE_READ; session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); perf_stat.session = session; stat_config.output = stderr; @@ -1560,14 +1725,14 @@ static void setup_system_wide(int forks) if (!forks) target.system_wide = true; else { - struct perf_evsel *counter; + struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { - if (!counter->system_wide) + if (!counter->core.system_wide) return; } - if (evsel_list->nr_entries) + if (evsel_list->core.nr_entries) target.system_wide = true; } } @@ -1586,7 +1751,7 @@ int cmd_stat(int argc, const char **argv) setlocale(LC_ALL, ""); - evsel_list = perf_evlist__new(); + evsel_list = evlist__new(); if (evsel_list == NULL) return -ENOMEM; @@ -1773,15 +1938,18 @@ int cmd_stat(int argc, const char **argv) * so we could print it out on output. */ if (stat_config.aggr_mode == AGGR_THREAD) { - thread_map__read_comms(evsel_list->threads); + thread_map__read_comms(evsel_list->core.threads); if (target.system_wide) { if (runtime_stat_new(&stat_config, - thread_map__nr(evsel_list->threads))) { + perf_thread_map__nr(evsel_list->core.threads))) { goto out; } } } + if (stat_config.aggr_mode == AGGR_NODE) + cpu__setup_cpunode_map(); + if (stat_config.times && interval) interval_count = true; else if (stat_config.times && !interval) { @@ -1845,8 +2013,11 @@ int cmd_stat(int argc, const char **argv) fprintf(output, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); + if (run_idx != 0) + perf_evlist__reset_prev_raw_counts(evsel_list); + status = run_perf_stat(argc, argv, run_idx); - if (forever && status != -1) { + if (forever && status != -1 && !interval) { print_counters(NULL, argc, argv); perf_stat__reset_stats(); } @@ -1887,18 +2058,19 @@ int cmd_stat(int argc, const char **argv) perf_session__write_header(perf_stat.session, evsel_list, fd, true); } + evlist__close(evsel_list); perf_session__delete(perf_stat.session); } perf_stat__exit_aggr_mode(); perf_evlist__free_stats(evsel_list); out: - free(stat_config.walltime_run); + zfree(&stat_config.walltime_run); if (smi_cost && smi_reset) sysfs__write_int(FREEZE_ON_SMI_PATH, 0); - perf_evlist__delete(evsel_list); + evlist__delete(evsel_list); runtime_stat_delete(&stat_config); |