diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 348 |
1 files changed, 251 insertions, 97 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 59af5a8419e2..f5c454855908 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -63,7 +63,6 @@ #include "util/group.h" #include "util/session.h" #include "util/tool.h" -#include "util/group.h" #include "util/string2.h" #include "util/metricgroup.h" #include "asm/bug.h" @@ -169,6 +168,7 @@ static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; static bool append_file; +static bool interval_count; static const char *output_name; static int output_fd; static int print_free_counters_hint; @@ -214,8 +214,13 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, static void perf_stat__reset_stats(void) { + int i; + perf_evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); + + for (i = 0; i < stat_config.stats_num; i++) + perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); } static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -272,7 +277,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->enable_on_exec = 1; } - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); return perf_evsel__open_per_thread(evsel, evsel_list->threads); @@ -335,7 +340,7 @@ static int read_counter(struct perf_evsel *counter) int nthreads = thread_map__nr(evsel_list->threads); int ncpus, cpu, thread; - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) ncpus = perf_evsel__nr_cpus(counter); else ncpus = 1; @@ -458,19 +463,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static bool has_unit(struct perf_evsel *counter) -{ - return counter->unit && *counter->unit; -} - -static bool has_scale(struct perf_evsel *counter) -{ - return counter->scale != 1; -} - static int perf_stat_synthesize_config(bool is_pipe) { - struct perf_evsel *counter; int err; if (is_pipe) { @@ -482,53 +476,10 @@ static int perf_stat_synthesize_config(bool is_pipe) } } - /* - * Synthesize other events stuff not carried within - * attr event - unit, scale, name - */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->supported) - continue; - - /* - * Synthesize unit and scale only if it's defined. - */ - if (has_unit(counter)) { - err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel unit.\n"); - return err; - } - } - - if (has_scale(counter)) { - err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel scale.\n"); - return err; - } - } - - if (counter->own_cpus) { - err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel scale.\n"); - return err; - } - } - - /* - * Name is needed only for pipe output, - * perf.data carries event names. - */ - if (is_pipe) { - err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel name.\n"); - return err; - } - } - } + err = perf_event__synthesize_extra_attr(NULL, + evsel_list, + process_synthesized_event, + is_pipe); err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, process_synthesized_event, @@ -557,14 +508,13 @@ static int perf_stat_synthesize_config(bool is_pipe) #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -static int __store_counter_ids(struct perf_evsel *counter, - struct cpu_map *cpus, - struct thread_map *threads) +static int __store_counter_ids(struct perf_evsel *counter) { int cpu, thread; - for (cpu = 0; cpu < cpus->nr; cpu++) { - for (thread = 0; thread < threads->nr; thread++) { + for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) { + for (thread = 0; thread < xyarray__max_y(counter->fd); + thread++) { int fd = FD(counter, cpu, thread); if (perf_evlist__id_add_fd(evsel_list, counter, @@ -584,7 +534,7 @@ static int store_counter_ids(struct perf_evsel *counter) if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) return -ENOMEM; - return __store_counter_ids(counter, cpus, threads); + return __store_counter_ids(counter); } static bool perf_evsel__should_store_id(struct perf_evsel *counter) @@ -621,6 +571,8 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) static int __run_perf_stat(int argc, const char **argv) { int interval = stat_config.interval; + int times = stat_config.times; + int timeout = stat_config.timeout; char msg[BUFSIZ]; unsigned long long t0, t1; struct perf_evsel *counter; @@ -634,6 +586,9 @@ static int __run_perf_stat(int argc, const char **argv) if (interval) { ts.tv_sec = interval / USEC_PER_MSEC; ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; + } else if (timeout) { + ts.tv_sec = timeout / USEC_PER_MSEC; + ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; } else { ts.tv_sec = 1; ts.tv_nsec = 0; @@ -682,7 +637,19 @@ try_again: if (verbose > 0) ui__warning("%s\n", msg); goto try_again; - } + } else if (target__has_per_thread(&target) && + evsel_list->threads && + evsel_list->threads->err_thread != -1) { + /* + * For global --per-thread case, skip current + * error thread. + */ + if (!thread_map__remove(evsel_list->threads, + evsel_list->threads->err_thread)) { + evsel_list->threads->err_thread = -1; + goto try_again; + } + } perf_evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); @@ -746,10 +713,14 @@ try_again: perf_evlist__start_workload(evsel_list); enable_counters(); - if (interval) { + if (interval || timeout) { while (!waitpid(child_pid, &status, WNOHANG)) { nanosleep(&ts, NULL); + if (timeout) + break; process_interval(); + if (interval_count && !(--times)) + break; } } waitpid(child_pid, &status, 0); @@ -766,8 +737,13 @@ try_again: enable_counters(); while (!done) { nanosleep(&ts, NULL); - if (interval) + if (timeout) + break; + if (interval) { process_interval(); + if (interval_count && !(--times)) + break; + } } } @@ -967,7 +943,7 @@ static void print_metric_csv(void *ctx, char buf[64], *vals, *ends; if (unit == NULL || fmt == NULL) { - fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); + fprintf(out, "%s%s", csv_sep, csv_sep); return; } snprintf(buf, sizeof(buf), fmt, val); @@ -1151,7 +1127,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } static void printout(int id, int nr, struct perf_evsel *counter, double uval, - char *prefix, u64 run, u64 ena, double noise) + char *prefix, u64 run, u64 ena, double noise, + struct runtime_stat *st) { struct perf_stat_output_ctx out; struct outstate os = { @@ -1244,7 +1221,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), - &out, &metric_events); + &out, &metric_events, st); if (!csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); @@ -1268,7 +1245,33 @@ static void aggr_update_shadow(void) val += perf_counts(counter->counts, cpu, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(counter, id)); + first_shadow_cpu(counter, id), + &rt_stat); + } + } +} + +static void uniquify_event_name(struct perf_evsel *counter) +{ + char *new_name; + char *config; + + if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name, + strlen(counter->pmu_name))) + return; + + config = strchr(counter->name, '/'); + if (config) { + if (asprintf(&new_name, + "%s%s", counter->pmu_name, config) > 0) { + free(counter->name); + counter->name = new_name; + } + } else { + if (asprintf(&new_name, + "%s [%s]", counter->name, counter->pmu_name) > 0) { + free(counter->name); + counter->name = new_name; } } } @@ -1301,7 +1304,9 @@ static bool collect_data(struct perf_evsel *counter, if (counter->merged_stat) return false; cb(counter, data, true); - if (!no_merge && counter->auto_merge_stats) + if (no_merge) + uniquify_event_name(counter); + else if (counter->auto_merge_stats) collect_all_aliases(counter, cb, data); return true; } @@ -1388,7 +1393,8 @@ static void print_aggr(char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(id, nr, counter, uval, prefix, run, ena, 1.0); + printout(id, nr, counter, uval, prefix, run, ena, 1.0, + &rt_stat); if (!metric_only) fputc('\n', output); } @@ -1397,13 +1403,24 @@ static void print_aggr(char *prefix) } } -static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +static int cmp_val(const void *a, const void *b) { - FILE *output = stat_config.output; - int nthreads = thread_map__nr(counter->threads); - int ncpus = cpu_map__nr(counter->cpus); - int cpu, thread; + return ((struct perf_aggr_thread_value *)b)->val - + ((struct perf_aggr_thread_value *)a)->val; +} + +static struct perf_aggr_thread_value *sort_aggr_thread( + struct perf_evsel *counter, + int nthreads, int ncpus, + int *ret) +{ + int cpu, thread, i = 0; double uval; + struct perf_aggr_thread_value *buf; + + buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); + if (!buf) + return NULL; for (thread = 0; thread < nthreads; thread++) { u64 ena = 0, run = 0, val = 0; @@ -1414,13 +1431,63 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) run += perf_counts(counter->counts, cpu, thread)->run; } + uval = val * counter->scale; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (uval == 0.0 && target__has_per_thread(&target)) + continue; + + buf[i].counter = counter; + buf[i].id = thread; + buf[i].uval = uval; + buf[i].val = val; + buf[i].run = run; + buf[i].ena = ena; + i++; + } + + qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); + + if (ret) + *ret = i; + + return buf; +} + +static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +{ + FILE *output = stat_config.output; + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int thread, sorted_threads, id; + struct perf_aggr_thread_value *buf; + + buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); + if (!buf) { + perror("cannot sort aggr thread"); + return; + } + + for (thread = 0; thread < sorted_threads; thread++) { if (prefix) fprintf(output, "%s", prefix); - uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix, run, ena, 1.0); + id = buf[thread].id; + if (stat_config.stats) + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &stat_config.stats[id]); + else + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &rt_stat); fputc('\n', output); } + + free(buf); } struct caggr_data { @@ -1455,7 +1522,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); + printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, + cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); } @@ -1494,7 +1562,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); fputc('\n', output); } @@ -1526,7 +1595,8 @@ static void print_no_aggr_metric(char *prefix) run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); } fputc('\n', stat_config.output); } @@ -1582,7 +1652,8 @@ static void print_metric_headers(const char *prefix, bool no_indent) perf_stat__print_shadow_stats(counter, 0, 0, &out, - &metric_events); + &metric_events, + &rt_stat); } fputc('\n', stat_config.output); } @@ -1873,6 +1944,10 @@ static const struct option stat_options[] = { "command to run after to the measured command"), OPT_UINTEGER('I', "interval-print", &stat_config.interval, "print counts at regular interval in ms (>= 10)"), + OPT_INTEGER(0, "interval-count", &stat_config.times, + "print counts for fixed number of times"), + OPT_UINTEGER(0, "timeout", &stat_config.timeout, + "stop workload and print counts after a timeout period in ms (>= 10ms)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, @@ -2256,11 +2331,16 @@ static int add_default_attributes(void) return 0; if (transaction_run) { + struct parse_events_error errinfo; + if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) - err = parse_events(evsel_list, transaction_attrs, NULL); + err = parse_events(evsel_list, transaction_attrs, + &errinfo); else - err = parse_events(evsel_list, transaction_limited_attrs, NULL); + err = parse_events(evsel_list, + transaction_limited_attrs, + &errinfo); if (err) { fprintf(stderr, "Cannot set up transaction events\n"); return -1; @@ -2541,6 +2621,35 @@ int process_cpu_map_event(struct perf_tool *tool, return set_maps(st); } +static int runtime_stat_new(struct perf_stat_config *config, int nthreads) +{ + int i; + + config->stats = calloc(nthreads, sizeof(struct runtime_stat)); + if (!config->stats) + return -1; + + config->stats_num = nthreads; + + for (i = 0; i < nthreads; i++) + runtime_stat__init(&config->stats[i]); + + return 0; +} + +static void runtime_stat_delete(struct perf_stat_config *config) +{ + int i; + + if (!config->stats) + return; + + for (i = 0; i < config->stats_num; i++) + runtime_stat__exit(&config->stats[i]); + + free(config->stats); +} + static const char * const stat_report_usage[] = { "perf stat report [<options>]", NULL, @@ -2641,7 +2750,7 @@ int cmd_stat(int argc, const char **argv) int status = -EINVAL, run_idx; const char *mode; FILE *output = stderr; - unsigned int interval; + unsigned int interval, timeout; const char * const stat_subcommands[] = { "record", "report" }; setlocale(LC_ALL, ""); @@ -2672,6 +2781,7 @@ int cmd_stat(int argc, const char **argv) return __cmd_report(argc, argv); interval = stat_config.interval; + timeout = stat_config.timeout; /* * For record command the -o is already taken care of. @@ -2750,12 +2860,16 @@ int cmd_stat(int argc, const char **argv) run_count = 1; } - if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { - fprintf(stderr, "The --per-thread option is only available " - "when monitoring via -p -t options.\n"); - parse_options_usage(NULL, stat_options, "p", 1); - parse_options_usage(NULL, stat_options, "t", 1); - goto out; + if ((stat_config.aggr_mode == AGGR_THREAD) && + !target__has_task(&target)) { + if (!target.system_wide || target.cpu_list) { + fprintf(stderr, "The --per-thread option is only " + "available when monitoring via -p -t -a " + "options or only --per-thread.\n"); + parse_options_usage(NULL, stat_options, "p", 1); + parse_options_usage(NULL, stat_options, "t", 1); + goto out; + } } /* @@ -2779,6 +2893,9 @@ int cmd_stat(int argc, const char **argv) target__validate(&target); + if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) + target.per_thread = true; + if (perf_evlist__create_maps(evsel_list, &target) < 0) { if (target__has_task(&target)) { pr_err("Problems finding threads of monitor\n"); @@ -2796,8 +2913,15 @@ int cmd_stat(int argc, const char **argv) * Initialize thread_map with comm names, * so we could print it out on output. */ - if (stat_config.aggr_mode == AGGR_THREAD) + if (stat_config.aggr_mode == AGGR_THREAD) { thread_map__read_comms(evsel_list->threads); + if (target.system_wide) { + if (runtime_stat_new(&stat_config, + thread_map__nr(evsel_list->threads))) { + goto out; + } + } + } if (interval && interval < 100) { if (interval < 10) { @@ -2810,6 +2934,33 @@ int cmd_stat(int argc, const char **argv) "Please proceed with caution.\n"); } + if (stat_config.times && interval) + interval_count = true; + else if (stat_config.times && !interval) { + pr_err("interval-count option should be used together with " + "interval-print.\n"); + parse_options_usage(stat_usage, stat_options, "interval-count", 0); + parse_options_usage(stat_usage, stat_options, "I", 1); + goto out; + } + + if (timeout && timeout < 100) { + if (timeout < 10) { + pr_err("timeout must be >= 10ms.\n"); + parse_options_usage(stat_usage, stat_options, "timeout", 0); + goto out; + } else + pr_warning("timeout < 100ms. " + "The overhead percentage could be high in some cases. " + "Please proceed with caution.\n"); + } + if (timeout && interval) { + pr_err("timeout option is not supported with interval-print.\n"); + parse_options_usage(stat_usage, stat_options, "timeout", 0); + parse_options_usage(stat_usage, stat_options, "I", 1); + goto out; + } + if (perf_evlist__alloc_stats(evsel_list, interval)) goto out; @@ -2887,5 +3038,8 @@ out: sysfs__write_int(FREEZE_ON_SMI_PATH, 0); perf_evlist__delete(evsel_list); + + runtime_stat_delete(&stat_config); + return status; } |