summaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c512
1 files changed, 346 insertions, 166 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b55a534b4de0..a098c2ebf4ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -40,8 +40,8 @@
* Jaswinder Singh Rajput <jaswinder@kernel.org>
*/
-#include "perf.h"
#include "builtin.h"
+#include "perf.h"
#include "util/cgroup.h"
#include <subcmd/parse-options.h>
#include "util/parse-events.h"
@@ -54,7 +54,6 @@
#include "util/stat.h"
#include "util/header.h"
#include "util/cpumap.h"
-#include "util/thread.h"
#include "util/thread_map.h"
#include "util/counts.h"
#include "util/group.h"
@@ -62,7 +61,11 @@
#include "util/tool.h"
#include "util/string2.h"
#include "util/metricgroup.h"
+#include "util/synthetic-events.h"
+#include "util/target.h"
+#include "util/time-utils.h"
#include "util/top.h"
+#include "util/affinity.h"
#include "asm/bug.h"
#include <linux/time64.h>
@@ -81,8 +84,10 @@
#include <unistd.h>
#include <sys/time.h>
#include <sys/resource.h>
+#include <linux/err.h>
#include <linux/ctype.h>
+#include <perf/evlist.h>
#define DEFAULT_SEPARATOR " "
#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
@@ -130,7 +135,7 @@ static const char *smi_cost_attrs = {
"}"
};
-static struct perf_evlist *evsel_list;
+static struct evlist *evsel_list;
static struct target target = {
.uid = UINT_MAX,
@@ -164,8 +169,8 @@ struct perf_stat {
u64 bytes_written;
struct perf_tool tool;
bool maps_allocated;
- struct cpu_map *cpus;
- struct thread_map *threads;
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
enum aggr_mode aggr_mode;
};
@@ -231,10 +236,10 @@ static int write_stat_round_event(u64 tm, u64 type)
#define WRITE_STAT_ROUND_EVENT(time, interval) \
write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
-#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
static int
-perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
+perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
struct perf_counts_values *count)
{
struct perf_sample_id *sid = SID(counter, cpu, thread);
@@ -243,7 +248,7 @@ perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
process_synthesized_event, NULL);
}
-static int read_single_counter(struct perf_evsel *counter, int cpu,
+static int read_single_counter(struct evsel *counter, int cpu,
int thread, struct timespec *rs)
{
if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
@@ -261,57 +266,50 @@ static int read_single_counter(struct perf_evsel *counter, int cpu,
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
-static int read_counter(struct perf_evsel *counter, struct timespec *rs)
+static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
{
- int nthreads = thread_map__nr(evsel_list->threads);
- int ncpus, cpu, thread;
-
- if (target__has_cpu(&target) && !target__has_per_thread(&target))
- ncpus = perf_evsel__nr_cpus(counter);
- else
- ncpus = 1;
+ int nthreads = perf_thread_map__nr(evsel_list->core.threads);
+ int thread;
if (!counter->supported)
return -ENOENT;
- if (counter->system_wide)
+ if (counter->core.system_wide)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
- struct perf_counts_values *count;
-
- count = perf_counts(counter->counts, cpu, thread);
-
- /*
- * The leader's group read loads data into its group members
- * (via perf_evsel__read_counter) and sets threir count->loaded.
- */
- if (!count->loaded &&
- read_single_counter(counter, cpu, thread, rs)) {
- counter->counts->scaled = -1;
- perf_counts(counter->counts, cpu, thread)->ena = 0;
- perf_counts(counter->counts, cpu, thread)->run = 0;
- return -1;
- }
+ struct perf_counts_values *count;
- count->loaded = false;
+ count = perf_counts(counter->counts, cpu, thread);
- if (STAT_RECORD) {
- if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
- pr_err("failed to write stat event\n");
- return -1;
- }
- }
+ /*
+ * The leader's group read loads data into its group members
+ * (via perf_evsel__read_counter()) and sets their count->loaded.
+ */
+ if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
+ read_single_counter(counter, cpu, thread, rs)) {
+ counter->counts->scaled = -1;
+ perf_counts(counter->counts, cpu, thread)->ena = 0;
+ perf_counts(counter->counts, cpu, thread)->run = 0;
+ return -1;
+ }
- if (verbose > 1) {
- fprintf(stat_config.output,
- "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter),
- cpu,
- count->val, count->ena, count->run);
+ perf_counts__set_loaded(counter->counts, cpu, thread, false);
+
+ if (STAT_RECORD) {
+ if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ pr_err("failed to write stat event\n");
+ return -1;
}
}
+
+ if (verbose > 1) {
+ fprintf(stat_config.output,
+ "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ perf_evsel__name(counter),
+ cpu,
+ count->val, count->ena, count->run);
+ }
}
return 0;
@@ -319,16 +317,38 @@ static int read_counter(struct perf_evsel *counter, struct timespec *rs)
static void read_counters(struct timespec *rs)
{
- struct perf_evsel *counter;
- int ret;
+ struct evsel *counter;
+ struct affinity affinity;
+ int i, ncpus, cpu;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
+ if (!target__has_cpu(&target) || target__has_per_thread(&target))
+ ncpus = 1;
+ evlist__for_each_cpu(evsel_list, i, cpu) {
+ if (i >= ncpus)
+ break;
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evsel_list, counter) {
+ if (evsel__cpu_iter_skip(counter, cpu))
+ continue;
+ if (!counter->err) {
+ counter->err = read_counter_cpu(counter, rs,
+ counter->cpu_iter - 1);
+ }
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evsel_list, counter) {
- ret = read_counter(counter, rs);
- if (ret)
+ if (counter->err)
pr_debug("failed to read counter %s\n", counter->name);
-
- if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+ if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
+ counter->err = 0;
}
}
@@ -362,7 +382,7 @@ static void enable_counters(void)
* - we have initial delay configured
*/
if (!target__none(&target) || stat_config.initial_delay)
- perf_evlist__enable(evsel_list);
+ evlist__enable(evsel_list);
}
static void disable_counters(void)
@@ -373,7 +393,7 @@ static void disable_counters(void)
* from counting before reading their constituent counters.
*/
if (!target__none(&target))
- perf_evlist__disable(evsel_list);
+ evlist__disable(evsel_list);
}
static volatile int workload_exec_errno;
@@ -389,13 +409,13 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
workload_exec_errno = info->si_value.sival_int;
}
-static bool perf_evsel__should_store_id(struct perf_evsel *counter)
+static bool perf_evsel__should_store_id(struct evsel *counter)
{
- return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
+ return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
}
static bool is_target_alive(struct target *_target,
- struct thread_map *threads)
+ struct perf_thread_map *threads)
{
struct stat st;
int i;
@@ -416,6 +436,62 @@ static bool is_target_alive(struct target *_target,
return false;
}
+enum counter_recovery {
+ COUNTER_SKIP,
+ COUNTER_RETRY,
+ COUNTER_FATAL,
+};
+
+static enum counter_recovery stat_handle_error(struct evsel *counter)
+{
+ char msg[BUFSIZ];
+ /*
+ * PPC returns ENXIO for HW counters until 2.6.37
+ * (behavior changed with commit b0a873e).
+ */
+ if (errno == EINVAL || errno == ENOSYS ||
+ errno == ENOENT || errno == EOPNOTSUPP ||
+ errno == ENXIO) {
+ if (verbose > 0)
+ ui__warning("%s event is not supported by the kernel.\n",
+ perf_evsel__name(counter));
+ counter->supported = false;
+ /*
+ * errored is a sticky flag that means one of the counter's
+ * cpu event had a problem and needs to be reexamined.
+ */
+ counter->errored = true;
+
+ if ((counter->leader != counter) ||
+ !(counter->leader->core.nr_members > 1))
+ return COUNTER_SKIP;
+ } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+ if (verbose > 0)
+ ui__warning("%s\n", msg);
+ return COUNTER_RETRY;
+ } else if (target__has_per_thread(&target) &&
+ evsel_list->core.threads &&
+ evsel_list->core.threads->err_thread != -1) {
+ /*
+ * For global --per-thread case, skip current
+ * error thread.
+ */
+ if (!thread_map__remove(evsel_list->core.threads,
+ evsel_list->core.threads->err_thread)) {
+ evsel_list->core.threads->err_thread = -1;
+ return COUNTER_RETRY;
+ }
+ }
+
+ perf_evsel__open_strerror(counter, &target,
+ errno, msg, sizeof(msg));
+ ui__error("%s\n", msg);
+
+ if (child_pid != -1)
+ kill(child_pid, SIGTERM);
+ return COUNTER_FATAL;
+}
+
static int __run_perf_stat(int argc, const char **argv, int run_idx)
{
int interval = stat_config.interval;
@@ -423,12 +499,15 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int timeout = stat_config.timeout;
char msg[BUFSIZ];
unsigned long long t0, t1;
- struct perf_evsel *counter;
+ struct evsel *counter;
struct timespec ts;
size_t l;
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
+ struct affinity affinity;
+ int i, cpu;
+ bool second_pass = false;
if (interval) {
ts.tv_sec = interval / USEC_PER_MSEC;
@@ -453,61 +532,104 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (group)
perf_evlist__set_leader(evsel_list);
- evlist__for_each_entry(evsel_list, counter) {
+ if (affinity__setup(&affinity) < 0)
+ return -1;
+
+ evlist__for_each_cpu (evsel_list, i, cpu) {
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evsel_list, counter) {
+ if (evsel__cpu_iter_skip(counter, cpu))
+ continue;
+ if (counter->reset_group || counter->errored)
+ continue;
try_again:
- if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {
-
- /* Weak group failed. Reset the group. */
- if ((errno == EINVAL || errno == EBADF) &&
- counter->leader != counter &&
- counter->weak_group) {
- counter = perf_evlist__reset_weak_group(evsel_list, counter);
- goto try_again;
- }
+ if (create_perf_stat_counter(counter, &stat_config, &target,
+ counter->cpu_iter - 1) < 0) {
- /*
- * PPC returns ENXIO for HW counters until 2.6.37
- * (behavior changed with commit b0a873e).
- */
- if (errno == EINVAL || errno == ENOSYS ||
- errno == ENOENT || errno == EOPNOTSUPP ||
- errno == ENXIO) {
- if (verbose > 0)
- ui__warning("%s event is not supported by the kernel.\n",
- perf_evsel__name(counter));
- counter->supported = false;
-
- if ((counter->leader != counter) ||
- !(counter->leader->nr_members > 1))
- continue;
- } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
- if (verbose > 0)
- ui__warning("%s\n", msg);
- goto try_again;
- } else if (target__has_per_thread(&target) &&
- evsel_list->threads &&
- evsel_list->threads->err_thread != -1) {
/*
- * For global --per-thread case, skip current
- * error thread.
+ * Weak group failed. We cannot just undo this here
+ * because earlier CPUs might be in group mode, and the kernel
+ * doesn't support mixing group and non group reads. Defer
+ * it to later.
+ * Don't close here because we're in the wrong affinity.
*/
- if (!thread_map__remove(evsel_list->threads,
- evsel_list->threads->err_thread)) {
- evsel_list->threads->err_thread = -1;
+ if ((errno == EINVAL || errno == EBADF) &&
+ counter->leader != counter &&
+ counter->weak_group) {
+ perf_evlist__reset_weak_group(evsel_list, counter, false);
+ assert(counter->reset_group);
+ second_pass = true;
+ continue;
+ }
+
+ switch (stat_handle_error(counter)) {
+ case COUNTER_FATAL:
+ return -1;
+ case COUNTER_RETRY:
goto try_again;
+ case COUNTER_SKIP:
+ continue;
+ default:
+ break;
}
+
}
+ counter->supported = true;
+ }
+ }
- perf_evsel__open_strerror(counter, &target,
- errno, msg, sizeof(msg));
- ui__error("%s\n", msg);
+ if (second_pass) {
+ /*
+ * Now redo all the weak group after closing them,
+ * and also close errored counters.
+ */
- if (child_pid != -1)
- kill(child_pid, SIGTERM);
+ evlist__for_each_cpu(evsel_list, i, cpu) {
+ affinity__set(&affinity, cpu);
+ /* First close errored or weak retry */
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->reset_group && !counter->errored)
+ continue;
+ if (evsel__cpu_iter_skip_no_inc(counter, cpu))
+ continue;
+ perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
+ }
+ /* Now reopen weak */
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->reset_group && !counter->errored)
+ continue;
+ if (evsel__cpu_iter_skip(counter, cpu))
+ continue;
+ if (!counter->reset_group)
+ continue;
+try_again_reset:
+ pr_debug2("reopening weak %s\n", perf_evsel__name(counter));
+ if (create_perf_stat_counter(counter, &stat_config, &target,
+ counter->cpu_iter - 1) < 0) {
+
+ switch (stat_handle_error(counter)) {
+ case COUNTER_FATAL:
+ return -1;
+ case COUNTER_RETRY:
+ goto try_again_reset;
+ case COUNTER_SKIP:
+ continue;
+ default:
+ break;
+ }
+ }
+ counter->supported = true;
+ }
+ }
+ }
+ affinity__cleanup(&affinity);
- return -1;
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->supported) {
+ perf_evsel__free_fd(&counter->core);
+ continue;
}
- counter->supported = true;
l = strlen(counter->unit);
if (l > stat_config.unit_width)
@@ -538,8 +660,8 @@ try_again:
if (err < 0)
return err;
- err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list,
- process_synthesized_event, is_pipe);
+ err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
+ process_synthesized_event, is_pipe);
if (err < 0)
return err;
}
@@ -579,7 +701,7 @@ try_again:
enable_counters();
while (!done) {
nanosleep(&ts, NULL);
- if (!is_target_alive(&target, evsel_list->threads))
+ if (!is_target_alive(&target, evsel_list->core.threads))
break;
if (timeout)
break;
@@ -607,7 +729,13 @@ try_again:
* group leaders.
*/
read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
- perf_evlist__close(evsel_list);
+
+ /*
+ * We need to keep evsel_list alive, because it's processed
+ * later the evsel_list will be closed after.
+ */
+ if (!STAT_RECORD)
+ evlist__close(evsel_list);
return WEXITSTATUS(status);
}
@@ -782,6 +910,8 @@ static struct option stat_options[] = {
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
"aggregate counts per thread", AGGR_THREAD),
+ OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
+ "aggregate counts per numa node", AGGR_NODE),
OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
"ms to wait before starting measurement after program start"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
@@ -793,41 +923,41 @@ static struct option stat_options[] = {
OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
"monitor specified metrics or metric groups (separated by ,)",
parse_metric_groups),
+ OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
+ "Configure all used events to run in kernel space.",
+ PARSE_OPT_EXCLUSIVE),
+ OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
+ "Configure all used events to run in user space.",
+ PARSE_OPT_EXCLUSIVE),
OPT_END()
};
static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
- struct cpu_map *map, int cpu)
+ struct perf_cpu_map *map, int cpu)
{
return cpu_map__get_socket(map, cpu, NULL);
}
static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
- struct cpu_map *map, int cpu)
+ struct perf_cpu_map *map, int cpu)
{
return cpu_map__get_die(map, cpu, NULL);
}
static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
- struct cpu_map *map, int cpu)
+ struct perf_cpu_map *map, int cpu)
{
return cpu_map__get_core(map, cpu, NULL);
}
-static int cpu_map__get_max(struct cpu_map *map)
+static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu_map *map, int cpu)
{
- int i, max = -1;
-
- for (i = 0; i < map->nr; i++) {
- if (map->map[i] > max)
- max = map->map[i];
- }
-
- return max;
+ return cpu_map__get_node(map, cpu, NULL);
}
static int perf_stat__get_aggr(struct perf_stat_config *config,
- aggr_get_id_t get_id, struct cpu_map *map, int idx)
+ aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
{
int cpu;
@@ -843,26 +973,32 @@ static int perf_stat__get_aggr(struct perf_stat_config *config,
}
static int perf_stat__get_socket_cached(struct perf_stat_config *config,
- struct cpu_map *map, int idx)
+ struct perf_cpu_map *map, int idx)
{
return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
}
static int perf_stat__get_die_cached(struct perf_stat_config *config,
- struct cpu_map *map, int idx)
+ struct perf_cpu_map *map, int idx)
{
return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
}
static int perf_stat__get_core_cached(struct perf_stat_config *config,
- struct cpu_map *map, int idx)
+ struct perf_cpu_map *map, int idx)
{
return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
}
+static int perf_stat__get_node_cached(struct perf_stat_config *config,
+ struct perf_cpu_map *map, int idx)
+{
+ return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
+}
+
static bool term_percore_set(void)
{
- struct perf_evsel *counter;
+ struct evsel *counter;
evlist__for_each_entry(evsel_list, counter) {
if (counter->percore)
@@ -878,29 +1014,36 @@ static int perf_stat_init_aggr_mode(void)
switch (stat_config.aggr_mode) {
case AGGR_SOCKET:
- if (cpu_map__build_socket_map(evsel_list->cpus, &stat_config.aggr_map)) {
+ if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build socket map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_socket_cached;
break;
case AGGR_DIE:
- if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) {
+ if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build die map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_die_cached;
break;
case AGGR_CORE:
- if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
+ if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_core_cached;
break;
+ case AGGR_NODE:
+ if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
+ perror("cannot build core map");
+ return -1;
+ }
+ stat_config.aggr_get_id = perf_stat__get_node_cached;
+ break;
case AGGR_NONE:
if (term_percore_set()) {
- if (cpu_map__build_core_map(evsel_list->cpus,
+ if (cpu_map__build_core_map(evsel_list->core.cpus,
&stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
@@ -920,20 +1063,20 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
- nr = cpu_map__get_max(evsel_list->cpus);
- stat_config.cpus_aggr_map = cpu_map__empty_new(nr + 1);
+ nr = perf_cpu_map__max(evsel_list->core.cpus);
+ stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
}
static void perf_stat__exit_aggr_mode(void)
{
- cpu_map__put(stat_config.aggr_map);
- cpu_map__put(stat_config.cpus_aggr_map);
+ perf_cpu_map__put(stat_config.aggr_map);
+ perf_cpu_map__put(stat_config.cpus_aggr_map);
stat_config.aggr_map = NULL;
stat_config.cpus_aggr_map = NULL;
}
-static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
+static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
{
int cpu;
@@ -948,7 +1091,7 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, i
return cpu;
}
-static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
+static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
{
struct perf_env *env = data;
int cpu = perf_env__get_cpu(env, map, idx);
@@ -956,7 +1099,7 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
}
-static int perf_env__get_die(struct cpu_map *map, int idx, void *data)
+static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
{
struct perf_env *env = data;
int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
@@ -980,7 +1123,7 @@ static int perf_env__get_die(struct cpu_map *map, int idx, void *data)
return die_id;
}
-static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
+static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
{
struct perf_env *env = data;
int core = -1, cpu = perf_env__get_cpu(env, map, idx);
@@ -1010,67 +1153,93 @@ static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
return core;
}
-static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
- struct cpu_map **sockp)
+static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
+{
+ int cpu = perf_env__get_cpu(data, map, idx);
+
+ return perf_env__numa_node(data, cpu);
+}
+
+static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
+ struct perf_cpu_map **sockp)
{
return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
}
-static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus,
- struct cpu_map **diep)
+static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
+ struct perf_cpu_map **diep)
{
return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
}
-static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
- struct cpu_map **corep)
+static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
+ struct perf_cpu_map **corep)
{
return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
}
+static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
+ struct perf_cpu_map **nodep)
+{
+ return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
+}
+
static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
- struct cpu_map *map, int idx)
+ struct perf_cpu_map *map, int idx)
{
return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
}
static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
- struct cpu_map *map, int idx)
+ struct perf_cpu_map *map, int idx)
{
return perf_env__get_die(map, idx, &perf_stat.session->header.env);
}
static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
- struct cpu_map *map, int idx)
+ struct perf_cpu_map *map, int idx)
{
return perf_env__get_core(map, idx, &perf_stat.session->header.env);
}
+static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu_map *map, int idx)
+{
+ return perf_env__get_node(map, idx, &perf_stat.session->header.env);
+}
+
static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
{
struct perf_env *env = &st->session->header.env;
switch (stat_config.aggr_mode) {
case AGGR_SOCKET:
- if (perf_env__build_socket_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
+ if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build socket map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_socket_file;
break;
case AGGR_DIE:
- if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
+ if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build die map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_die_file;
break;
case AGGR_CORE:
- if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
+ if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_core_file;
break;
+ case AGGR_NODE:
+ if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
+ perror("cannot build core map");
+ return -1;
+ }
+ stat_config.aggr_get_id = perf_stat__get_node_file;
+ break;
case AGGR_NONE:
case AGGR_GLOBAL:
case AGGR_THREAD:
@@ -1256,6 +1425,7 @@ static int add_default_attributes(void)
if (stat_config.null_run)
return 0;
+ bzero(&errinfo, sizeof(errinfo));
if (transaction_run) {
/* Handle -T as -M transaction. Once platform specific metrics
* support has been added to the json files, all archictures
@@ -1313,6 +1483,7 @@ static int add_default_attributes(void)
return -1;
}
if (err) {
+ parse_events_print_error(&errinfo, smi_cost_attrs);
fprintf(stderr, "Cannot set up SMI cost events\n");
return -1;
}
@@ -1360,7 +1531,7 @@ static int add_default_attributes(void)
free(str);
}
- if (!evsel_list->nr_entries) {
+ if (!evsel_list->core.nr_entries) {
if (target__has_cpu(&target))
default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
@@ -1439,9 +1610,9 @@ static int __cmd_record(int argc, const char **argv)
}
session = perf_session__new(data, false, NULL);
- if (session == NULL) {
- pr_err("Perf session creation failed.\n");
- return -1;
+ if (IS_ERR(session)) {
+ pr_err("Perf session creation failed\n");
+ return PTR_ERR(session);
}
init_features(session);
@@ -1455,8 +1626,8 @@ static int __cmd_record(int argc, const char **argv)
static int process_stat_round_event(struct perf_session *session,
union perf_event *event)
{
- struct stat_round_event *stat_round = &event->stat_round;
- struct perf_evsel *counter;
+ struct perf_record_stat_round *stat_round = &event->stat_round;
+ struct evsel *counter;
struct timespec tsh, *ts = NULL;
const char **argv = session->header.env.cmdline_argv;
int argc = session->header.env.nr_cmdline;
@@ -1486,7 +1657,7 @@ int process_stat_config_event(struct perf_session *session,
perf_event__read_stat_config(&stat_config, &event->stat_config);
- if (cpu_map__empty(st->cpus)) {
+ if (perf_cpu_map__empty(st->cpus)) {
if (st->aggr_mode != AGGR_UNSET)
pr_warning("warning: processing task data, aggregation mode not set\n");
return 0;
@@ -1511,7 +1682,7 @@ static int set_maps(struct perf_stat *st)
if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
return -EINVAL;
- perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
+ perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
if (perf_evlist__alloc_stats(evsel_list, true))
return -ENOMEM;
@@ -1545,7 +1716,7 @@ int process_cpu_map_event(struct perf_session *session,
{
struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
- struct cpu_map *cpus;
+ struct perf_cpu_map *cpus;
if (st->cpus) {
pr_warning("Extra cpu map event, ignoring.\n");
@@ -1618,6 +1789,8 @@ static int __cmd_report(int argc, const char **argv)
"aggregate counts per processor die", AGGR_DIE),
OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
+ OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
+ "aggregate counts per numa node", AGGR_NODE),
OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
"disable CPU count aggregation", AGGR_NONE),
OPT_END()
@@ -1638,8 +1811,8 @@ static int __cmd_report(int argc, const char **argv)
perf_stat.data.mode = PERF_DATA_MODE_READ;
session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
- if (session == NULL)
- return -1;
+ if (IS_ERR(session))
+ return PTR_ERR(session);
perf_stat.session = session;
stat_config.output = stderr;
@@ -1670,14 +1843,14 @@ static void setup_system_wide(int forks)
if (!forks)
target.system_wide = true;
else {
- struct perf_evsel *counter;
+ struct evsel *counter;
evlist__for_each_entry(evsel_list, counter) {
- if (!counter->system_wide)
+ if (!counter->core.system_wide)
return;
}
- if (evsel_list->nr_entries)
+ if (evsel_list->core.nr_entries)
target.system_wide = true;
}
}
@@ -1696,7 +1869,7 @@ int cmd_stat(int argc, const char **argv)
setlocale(LC_ALL, "");
- evsel_list = perf_evlist__new();
+ evsel_list = evlist__new();
if (evsel_list == NULL)
return -ENOMEM;
@@ -1883,15 +2056,18 @@ int cmd_stat(int argc, const char **argv)
* so we could print it out on output.
*/
if (stat_config.aggr_mode == AGGR_THREAD) {
- thread_map__read_comms(evsel_list->threads);
+ thread_map__read_comms(evsel_list->core.threads);
if (target.system_wide) {
if (runtime_stat_new(&stat_config,
- thread_map__nr(evsel_list->threads))) {
+ perf_thread_map__nr(evsel_list->core.threads))) {
goto out;
}
}
}
+ if (stat_config.aggr_mode == AGGR_NODE)
+ cpu__setup_cpunode_map();
+
if (stat_config.times && interval)
interval_count = true;
else if (stat_config.times && !interval) {
@@ -1955,8 +2131,11 @@ int cmd_stat(int argc, const char **argv)
fprintf(output, "[ perf stat: executing run #%d ... ]\n",
run_idx + 1);
+ if (run_idx != 0)
+ perf_evlist__reset_prev_raw_counts(evsel_list);
+
status = run_perf_stat(argc, argv, run_idx);
- if (forever && status != -1) {
+ if (forever && status != -1 && !interval) {
print_counters(NULL, argc, argv);
perf_stat__reset_stats();
}
@@ -1997,6 +2176,7 @@ int cmd_stat(int argc, const char **argv)
perf_session__write_header(perf_stat.session, evsel_list, fd, true);
}
+ evlist__close(evsel_list);
perf_session__delete(perf_stat.session);
}
@@ -2008,7 +2188,7 @@ out:
if (smi_cost && smi_reset)
sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
- perf_evlist__delete(evsel_list);
+ evlist__delete(evsel_list);
runtime_stat_delete(&stat_config);
OpenPOWER on IntegriCloud