diff options
Diffstat (limited to 'tools/perf/util')
216 files changed, 6770 insertions, 1739 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 94518c1bf8b6..ea0a452550b0 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -13,6 +13,7 @@ libperf-y += find_bit.o libperf-y += kallsyms.o libperf-y += levenshtein.o libperf-y += llvm-utils.o +libperf-y += mmap.o libperf-y += memswap.o libperf-y += parse-events.o libperf-y += perf_regs.o @@ -34,6 +35,7 @@ libperf-y += dso.o libperf-y += symbol.o libperf-y += symbol_fprintf.o libperf-y += color.o +libperf-y += metricgroup.o libperf-y += header.o libperf-y += callchain.o libperf-y += values.o @@ -42,7 +44,7 @@ libperf-y += machine.o libperf-y += map.o libperf-y += pstack.o libperf-y += session.o -libperf-$(CONFIG_AUDIT) += syscalltbl.o +libperf-$(CONFIG_TRACE) += syscalltbl.o libperf-y += ordered-events.o libperf-y += namespaces.o libperf-y += comm.o @@ -78,11 +80,20 @@ libperf-y += data.o libperf-y += tsc.o libperf-y += cloexec.o libperf-y += call-path.o +libperf-y += rwsem.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o +libperf-$(CONFIG_AUXTRACE) += arm-spe.o +libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o + +ifdef CONFIG_LIBOPENCSD +libperf-$(CONFIG_AUXTRACE) += cs-etm.o +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ +endif + libperf-y += parse-branch-options.o libperf-y += dump-insn.o libperf-y += parse-regs-options.o diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 39f17507578d..3802cee5e188 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-2.0 if [ $# -eq 1 ] ; then OUTPUT=$1 diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4397a8b6e6cd..28b233c3dcbe 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -26,7 +26,6 @@ #include <pthread.h> #include <linux/bitops.h> #include <linux/kernel.h> -#include <sys/utsname.h> #include "sane_ctype.h" @@ -49,10 +48,9 @@ struct arch { void *priv; unsigned int model; unsigned int family; - int (*init)(struct arch *arch); + int (*init)(struct arch *arch, char *cpuid); bool (*ins_is_fused)(struct arch *arch, const char *ins1, const char *ins2); - int (*cpuid_parse)(struct arch *arch, char *cpuid); struct { char comment_char; char skip_functions_char; @@ -132,10 +130,10 @@ static struct arch architectures[] = { }, { .name = "x86", + .init = x86__annotate_init, .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), .ins_is_fused = x86__ins_is_fused, - .cpuid_parse = x86__cpuid_parse, .objdump = { .comment_char = '#', }, @@ -166,7 +164,7 @@ static void ins__delete(struct ins_operands *ops) static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw); + return scnprintf(bf, size, "%-6s %s", ins->name, ops->raw); } int ins__scnprintf(struct ins *ins, char *bf, size_t size, @@ -231,12 +229,12 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { if (ops->target.name) - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name); + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name); if (ops->target.addr == 0) return ins__raw_scnprintf(ins, bf, size, ops); - return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr); + return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr); } static struct ins_ops call_ops = { @@ -300,7 +298,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, c++; } - return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64, + return scnprintf(bf, size, "%-6s %.*s%" PRIx64, ins->name, c ? c - ops->raw : 0, ops->raw, ops->target.offset); } @@ -323,6 +321,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) return 0; *addrp = strtoull(comment, &endptr, 16); + if (endptr == comment) + return 0; name = strchr(endptr, '<'); if (name == NULL) return -1; @@ -373,7 +373,7 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size, if (ops->locked.ins.ops == NULL) return ins__raw_scnprintf(ins, bf, size, ops); - printed = scnprintf(bf, size, "%-6.6s ", ins->name); + printed = scnprintf(bf, size, "%-6s ", ins->name); return printed + ins__scnprintf(&ops->locked.ins, bf + printed, size - printed, ops->locked.ops); } @@ -436,8 +436,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m return 0; comment = ltrim(comment); - comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; @@ -449,7 +449,7 @@ out_free_source: static int mov__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s,%s", ins->name, + return scnprintf(bf, size, "%-6s %s,%s", ins->name, ops->source.name ?: ops->source.raw, ops->target.name ?: ops->target.raw); } @@ -481,7 +481,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops return 0; comment = ltrim(comment); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; } @@ -489,7 +489,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops static int dec__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s", ins->name, + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name ?: ops->target.raw); } @@ -501,7 +501,7 @@ static struct ins_ops dec_ops = { static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, struct ins_operands *ops __maybe_unused) { - return scnprintf(bf, size, "%-6.6s", "nop"); + return scnprintf(bf, size, "%-6s", "nop"); } static struct ins_ops nop_ops = { @@ -606,9 +606,19 @@ static struct arch *arch__find(const char *name) int symbol__alloc_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); - const size_t size = symbol__size(sym); + size_t size = symbol__size(sym); size_t sizeof_sym_hist; + /* + * Add buffer of one element for zero length symbol. + * When sample is taken from first instruction of + * zero length symbol, perf still resolves it and + * shows symbol name in perf report and allows to + * annotate it. + */ + if (size == 0) + size = 1; + /* Check for overflow when calculating sizeof_sym_hist */ if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry)) return -1; @@ -869,32 +879,99 @@ out_free_name: return -1; } -static struct disasm_line *disasm_line__new(s64 offset, char *line, - size_t privsize, int line_nr, - struct arch *arch, - struct map *map) +struct annotate_args { + size_t privsize; + struct arch *arch; + struct map *map; + struct perf_evsel *evsel; + s64 offset; + char *line; + int line_nr; +}; + +static void annotation_line__delete(struct annotation_line *al) +{ + void *ptr = (void *) al - al->privsize; + + free_srcline(al->path); + zfree(&al->line); + free(ptr); +} + +/* + * Allocating the annotation line data with following + * structure: + * + * -------------------------------------- + * private space | struct annotation_line + * -------------------------------------- + * + * Size of the private space is stored in 'struct annotation_line'. + * + */ +static struct annotation_line * +annotation_line__new(struct annotate_args *args, size_t privsize) { - struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); + struct annotation_line *al; + struct perf_evsel *evsel = args->evsel; + size_t size = privsize + sizeof(*al); + int nr = 1; - if (dl != NULL) { - dl->offset = offset; - dl->line = strdup(line); - dl->line_nr = line_nr; - if (dl->line == NULL) + if (perf_evsel__is_group_event(evsel)) + nr = evsel->nr_members; + + size += sizeof(al->samples[0]) * nr; + + al = zalloc(size); + if (al) { + al = (void *) al + privsize; + al->privsize = privsize; + al->offset = args->offset; + al->line = strdup(args->line); + al->line_nr = args->line_nr; + al->samples_nr = nr; + } + + return al; +} + +/* + * Allocating the disasm annotation line data with + * following structure: + * + * ------------------------------------------------------------ + * privsize space | struct disasm_line | struct annotation_line + * ------------------------------------------------------------ + * + * We have 'struct annotation_line' member as last member + * of 'struct disasm_line' to have an easy access. + * + */ +static struct disasm_line *disasm_line__new(struct annotate_args *args) +{ + struct disasm_line *dl = NULL; + struct annotation_line *al; + size_t privsize = args->privsize + offsetof(struct disasm_line, al); + + al = annotation_line__new(args, privsize); + if (al != NULL) { + dl = disasm_line(al); + + if (dl->al.line == NULL) goto out_delete; - if (offset != -1) { - if (disasm_line__parse(dl->line, &dl->ins.name, &dl->ops.raw) < 0) + if (args->offset != -1) { + if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, arch, map); + disasm_line__init_ins(dl, args->arch, args->map); } } return dl; out_free_line: - zfree(&dl->line); + zfree(&dl->al.line); out_delete: free(dl); return NULL; @@ -902,30 +979,30 @@ out_delete: void disasm_line__free(struct disasm_line *dl) { - zfree(&dl->line); if (dl->ins.ops && dl->ins.ops->free) dl->ins.ops->free(&dl->ops); else ins__delete(&dl->ops); free((void *)dl->ins.name); dl->ins.name = NULL; - free(dl); + annotation_line__delete(&dl->al); } int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) { if (raw || !dl->ins.ops) - return scnprintf(bf, size, "%-6.6s %s", dl->ins.name, dl->ops.raw); + return scnprintf(bf, size, "%-6s %s", dl->ins.name, dl->ops.raw); return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } -static void disasm__add(struct list_head *head, struct disasm_line *line) +static void annotation_line__add(struct annotation_line *al, struct list_head *head) { - list_add_tail(&line->node, head); + list_add_tail(&al->node, head); } -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head) { list_for_each_entry_continue(pos, head, node) if (pos->offset >= 0) @@ -934,50 +1011,6 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa return NULL; } -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample) -{ - struct source_line *src_line = notes->src->lines; - double percent = 0.0; - - sample->nr_samples = sample->period = 0; - - if (src_line) { - size_t sizeof_src_line = sizeof(*src_line) + - sizeof(src_line->samples) * (src_line->nr_pcnt - 1); - - while (offset < end) { - src_line = (void *)notes->src->lines + - (sizeof_src_line * offset); - - if (*path == NULL) - *path = src_line->path; - - percent += src_line->samples[evidx].percent; - sample->nr_samples += src_line->samples[evidx].nr; - offset++; - } - } else { - struct sym_hist *h = annotation__histogram(notes, evidx); - unsigned int hits = 0; - u64 period = 0; - - while (offset < end) { - hits += h->addr[offset].nr_samples; - period += h->addr[offset].period; - ++offset; - } - - if (h->nr_samples) { - sample->period = period; - sample->nr_samples = hits; - percent = 100.0 * hits / h->nr_samples; - } - } - - return percent; -} - static const char *annotate__address_color(struct block_range *br) { double cov = block_range__coverage(br); @@ -1060,50 +1093,39 @@ static void annotate__branch_printf(struct block_range *br, u64 addr) } } +static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width) +{ + s64 offset = dl->al.offset; + const u64 addr = start + offset; + struct block_range *br; + + br = block_range__find(addr); + color_fprintf(stdout, annotate__address_color(br), " %*" PRIx64 ":", addr_fmt_width, addr); + color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); + annotate__branch_printf(br, addr); + return 0; +} -static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start, - struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct disasm_line *queue) +static int +annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, + struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, + int max_lines, struct annotation_line *queue, int addr_fmt_width) { + struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; static const char *prev_color; - if (dl->offset != -1) { - const char *path = NULL; - double percent, max_percent = 0.0; - double *ppercents = &percent; - struct sym_hist_entry sample; - struct sym_hist_entry *psamples = &sample; + if (al->offset != -1) { + double max_percent = 0.0; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); - s64 offset = dl->offset; - const u64 addr = start + offset; - struct disasm_line *next; - struct block_range *br; - - next = disasm__get_next_ip_line(¬es->src->source, dl); - - if (perf_evsel__is_group_event(evsel)) { - nr_percent = evsel->nr_members; - ppercents = calloc(nr_percent, sizeof(double)); - psamples = calloc(nr_percent, sizeof(struct sym_hist_entry)); - if (ppercents == NULL || psamples == NULL) { - return -1; - } - } - for (i = 0; i < nr_percent; i++) { - percent = disasm__calc_percent(notes, - notes->src->lines ? i : evsel->idx + i, - offset, - next ? next->offset : (s64) len, - &path, &sample); - - ppercents[i] = percent; - psamples[i] = sample; - if (percent > max_percent) - max_percent = percent; + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample = &al->samples[i]; + + if (sample->percent > max_percent) + max_percent = sample->percent; } if (max_percent < min_pcnt) @@ -1114,10 +1136,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (queue != NULL) { list_for_each_entry_from(queue, ¬es->src->source, node) { - if (queue == dl) + if (queue == al) break; - disasm_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL); + annotation_line__print(queue, sym, start, evsel, len, + 0, 0, 1, NULL, addr_fmt_width); } } @@ -1128,44 +1150,34 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * the same color than the percentage. Don't print it * twice for close colored addr with the same filename:line */ - if (path) { - if (!prev_line || strcmp(prev_line, path) + if (al->path) { + if (!prev_line || strcmp(prev_line, al->path) || color != prev_color) { - color_fprintf(stdout, color, " %s", path); - prev_line = path; + color_fprintf(stdout, color, " %s", al->path); + prev_line = al->path; prev_color = color; } } for (i = 0; i < nr_percent; i++) { - percent = ppercents[i]; - sample = psamples[i]; - color = get_percent_color(percent); + struct annotation_data *sample = &al->samples[i]; + + color = get_percent_color(sample->percent); if (symbol_conf.show_total_period) color_fprintf(stdout, color, " %11" PRIu64, - sample.period); + sample->he.period); else if (symbol_conf.show_nr_samples) color_fprintf(stdout, color, " %7" PRIu64, - sample.nr_samples); + sample->he.nr_samples); else - color_fprintf(stdout, color, " %7.2f", percent); + color_fprintf(stdout, color, " %7.2f", sample->percent); } - printf(" : "); + printf(" : "); - br = block_range__find(addr); - color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); - color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line); - annotate__branch_printf(br, addr); + disasm_line__print(dl, start, addr_fmt_width); printf("\n"); - - if (ppercents != &percent) - free(ppercents); - - if (psamples != &sample) - free(psamples); - } else if (max_lines && printed >= max_lines) return 1; else { @@ -1177,10 +1189,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - if (!*dl->line) + if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", dl->line); + printf(" %*s: %*s %s\n", width, " ", addr_fmt_width, " ", al->line); } return 0; @@ -1206,11 +1218,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * means that it's not a disassembly line so should be treated differently. * The ops.raw part will be parsed further according to type of the instruction. */ -static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - struct arch *arch, - FILE *file, size_t privsize, +static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, + struct annotate_args *args, int *line_nr) { + struct map *map = args->map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; char *line = NULL, *parsed_line, *tmp, *tmp2; @@ -1254,7 +1266,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map); + args->offset = offset; + args->line = parsed_line; + args->line_nr = *line_nr; + + dl = disasm_line__new(args); free(line); (*line_nr)++; @@ -1279,7 +1295,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, dl->ops.target.name = strdup(target.sym->name); } - disasm__add(¬es->src->source, dl); + annotation_line__add(&dl->al, ¬es->src->source); return 0; } @@ -1296,19 +1312,19 @@ static void delete_last_nop(struct symbol *sym) struct disasm_line *dl; while (!list_empty(list)) { - dl = list_entry(list->prev, struct disasm_line, node); + dl = list_entry(list->prev, struct disasm_line, al.node); if (dl->ins.ops) { if (dl->ins.ops != &nop_ops) return; } else { - if (!strstr(dl->line, " nop ") && - !strstr(dl->line, " nopl ") && - !strstr(dl->line, " nopw ")) + if (!strstr(dl->al.line, " nop ") && + !strstr(dl->al.line, " nopl ") && + !strstr(dl->al.line, " nopw ")) return; } - list_del(&dl->node); + list_del(&dl->al.node); disasm_line__free(dl); } } @@ -1403,25 +1419,11 @@ fallback: return 0; } -static const char *annotate__norm_arch(const char *arch_name) -{ - struct utsname uts; - - if (!arch_name) { /* Assume we are annotating locally. */ - if (uname(&uts) < 0) - return NULL; - arch_name = uts.machine; - } - return normalize_arch((char *)arch_name); -} - -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid) +static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { + struct map *map = args->map; struct dso *dso = map->dso; char command[PATH_MAX * 2]; - struct arch *arch = NULL; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1435,28 +1437,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map, if (err) return err; - arch_name = annotate__norm_arch(arch_name); - if (!arch_name) - return -1; - - arch = arch__find(arch_name); - if (arch == NULL) - return -ENOTSUP; - - if (parch) - *parch = arch; - - if (arch->init) { - err = arch->init(arch); - if (err) { - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); - return err; - } - } - - if (arch->cpuid_parse && cpuid) - arch->cpuid_parse(arch, cpuid); - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); @@ -1540,8 +1520,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, arch, file, privsize, - &lineno) < 0) + if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0) break; nline++; } @@ -1574,21 +1553,110 @@ out_close_stdout: goto out_remove_tmp; } -static void insert_source_line(struct rb_root *root, struct source_line *src_line) +static void calc_percent(struct sym_hist *hist, + struct annotation_data *sample, + s64 offset, s64 end) +{ + unsigned int hits = 0; + u64 period = 0; + + while (offset < end) { + hits += hist->addr[offset].nr_samples; + period += hist->addr[offset].period; + ++offset; + } + + if (hist->nr_samples) { + sample->he.period = period; + sample->he.nr_samples = hits; + sample->percent = 100.0 * hits / hist->nr_samples; + } +} + +static void annotation__calc_percent(struct annotation *notes, + struct perf_evsel *evsel, s64 len) +{ + struct annotation_line *al, *next; + + list_for_each_entry(al, ¬es->src->source, node) { + s64 end; + int i; + + if (al->offset == -1) + continue; + + next = annotation_line__next(al, ¬es->src->source); + end = next ? next->offset : len; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + struct sym_hist *hist; + + hist = annotation__histogram(notes, evsel->idx + i); + sample = &al->samples[i]; + + calc_percent(hist, sample, al->offset, end); + } + } +} + +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) { - struct source_line *iter; + struct annotation *notes = symbol__annotation(sym); + + annotation__calc_percent(notes, evsel, symbol__size(sym)); +} + +int symbol__annotate(struct symbol *sym, struct map *map, + struct perf_evsel *evsel, size_t privsize, + struct arch **parch) +{ + struct annotate_args args = { + .privsize = privsize, + .map = map, + .evsel = evsel, + }; + struct perf_env *env = perf_evsel__env(evsel); + const char *arch_name = perf_env__arch(env); + struct arch *arch; + int err; + + if (!arch_name) + return -1; + + args.arch = arch = arch__find(arch_name); + if (arch == NULL) + return -ENOTSUP; + + if (parch) + *parch = arch; + + if (arch->init) { + err = arch->init(arch, env ? env->cpuid : NULL); + if (err) { + pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + return err; + } + } + + return symbol__disassemble(sym, &args); +} + +static void insert_source_line(struct rb_root *root, struct annotation_line *al) +{ + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; int i, ret; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - ret = strcmp(iter->path, src_line->path); + ret = strcmp(iter->path, al->path); if (ret == 0) { - for (i = 0; i < src_line->nr_pcnt; i++) - iter->samples[i].percent_sum += src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + iter->samples[i].percent_sum += al->samples[i].percent; return; } @@ -1598,18 +1666,18 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin p = &(*p)->rb_right; } - for (i = 0; i < src_line->nr_pcnt; i++) - src_line->samples[i].percent_sum = src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + al->samples[i].percent_sum = al->samples[i].percent; - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } -static int cmp_source_line(struct source_line *a, struct source_line *b) +static int cmp_source_line(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < a->nr_pcnt; i++) { + for (i = 0; i < a->samples_nr; i++) { if (a->samples[i].percent_sum == b->samples[i].percent_sum) continue; return a->samples[i].percent_sum > b->samples[i].percent_sum; @@ -1618,135 +1686,47 @@ static int cmp_source_line(struct source_line *a, struct source_line *b) return 0; } -static void __resort_source_line(struct rb_root *root, struct source_line *src_line) +static void __resort_source_line(struct rb_root *root, struct annotation_line *al) { - struct source_line *iter; + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - if (cmp_source_line(src_line, iter)) + if (cmp_source_line(al, iter)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; node = rb_first(src_root); while (node) { struct rb_node *next; - src_line = rb_entry(node, struct source_line, node); + al = rb_entry(node, struct annotation_line, rb_node); next = rb_next(node); rb_erase(node, src_root); - __resort_source_line(dest_root, src_line); + __resort_source_line(dest_root, al); node = next; } } -static void symbol__free_source_line(struct symbol *sym, int len) -{ - struct annotation *notes = symbol__annotation(sym); - struct source_line *src_line = notes->src->lines; - size_t sizeof_src_line; - int i; - - sizeof_src_line = sizeof(*src_line) + - (sizeof(src_line->samples) * (src_line->nr_pcnt - 1)); - - for (i = 0; i < len; i++) { - free_srcline(src_line->path); - src_line = (void *)src_line + sizeof_src_line; - } - - zfree(¬es->src->lines); -} - -/* Get the filename:line for the colored entries */ -static int symbol__get_source_line(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, - struct rb_root *root, int len) -{ - u64 start; - int i, k; - int evidx = evsel->idx; - struct source_line *src_line; - struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); - struct rb_root tmp_root = RB_ROOT; - int nr_pcnt = 1; - u64 nr_samples = h->nr_samples; - size_t sizeof_src_line = sizeof(struct source_line); - - if (perf_evsel__is_group_event(evsel)) { - for (i = 1; i < evsel->nr_members; i++) { - h = annotation__histogram(notes, evidx + i); - nr_samples += h->nr_samples; - } - nr_pcnt = evsel->nr_members; - sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples); - } - - if (!nr_samples) - return 0; - - src_line = notes->src->lines = calloc(len, sizeof_src_line); - if (!notes->src->lines) - return -1; - - start = map__rip_2objdump(map, sym->start); - - for (i = 0; i < len; i++) { - u64 offset; - double percent_max = 0.0; - - src_line->nr_pcnt = nr_pcnt; - - for (k = 0; k < nr_pcnt; k++) { - double percent = 0.0; - - h = annotation__histogram(notes, evidx + k); - nr_samples = h->addr[i].nr_samples; - if (h->nr_samples) - percent = 100.0 * nr_samples / h->nr_samples; - - if (percent > percent_max) - percent_max = percent; - src_line->samples[k].percent = percent; - src_line->samples[k].nr = nr_samples; - } - - if (percent_max <= 0.5) - goto next; - - offset = start + i; - src_line->path = get_srcline(map->dso, offset, NULL, - false, true); - insert_source_line(&tmp_root, src_line); - - next: - src_line = (void *)src_line + sizeof_src_line; - } - - resort_source_line(root, &tmp_root); - return 0; -} - static void print_summary(struct rb_root *root, const char *filename) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; printf("\nSorted summary for file %s\n", filename); @@ -1764,9 +1744,9 @@ static void print_summary(struct rb_root *root, const char *filename) char *path; int i; - src_line = rb_entry(node, struct source_line, node); - for (i = 0; i < src_line->nr_pcnt; i++) { - percent = src_line->samples[i].percent_sum; + al = rb_entry(node, struct annotation_line, rb_node); + for (i = 0; i < al->samples_nr; i++) { + percent = al->samples[i].percent_sum; color = get_percent_color(percent); color_fprintf(stdout, color, " %7.2f", percent); @@ -1774,7 +1754,7 @@ static void print_summary(struct rb_root *root, const char *filename) percent_max = percent; } - path = src_line->path; + path = al->path; color = get_percent_color(percent_max); color_fprintf(stdout, color, " %s\n", path); @@ -1795,6 +1775,19 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel) printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples); } +static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) +{ + char bf[32]; + struct annotation_line *line; + + list_for_each_entry_reverse(line, lines, node) { + if (line->offset != -1) + return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset); + } + + return 0; +} + int symbol__annotate_printf(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool full_paths, int min_pcnt, int max_lines, int context) @@ -1805,9 +1798,9 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, const char *evsel_name = perf_evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->idx); - struct disasm_line *pos, *queue = NULL; + struct annotation_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); - int printed = 2, queue_len = 0; + int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; u64 len; int width = symbol_conf.show_total_period ? 12 : 8; @@ -1838,15 +1831,21 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); + addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, start); + list_for_each_entry(pos, ¬es->src->source, node) { + int err; + if (context && queue == NULL) { queue = pos; queue_len = 0; } - switch (disasm_line__print(pos, sym, start, evsel, len, - min_pcnt, printed, max_lines, - queue)) { + err = annotation_line__print(pos, sym, start, evsel, len, + min_pcnt, printed, max_lines, + queue, addr_fmt_width); + + switch (err) { case 0: ++printed; if (context) { @@ -1901,13 +1900,13 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) } } -void disasm__purge(struct list_head *head) +void annotated_source__purge(struct annotated_source *as) { - struct disasm_line *pos, *n; + struct annotation_line *al, *n; - list_for_each_entry_safe(pos, n, head, node) { - list_del(&pos->node); - disasm_line__free(pos); + list_for_each_entry_safe(al, n, &as->source, node) { + list_del(&al->node); + disasm_line__free(disasm_line(al)); } } @@ -1915,10 +1914,10 @@ static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) { size_t printed; - if (dl->offset == -1) - return fprintf(fp, "%s\n", dl->line); + if (dl->al.offset == -1) + return fprintf(fp, "%s\n", dl->al.line); - printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->ins.name); + printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name); if (dl->ops.raw[0] != '\0') { printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ", @@ -1933,38 +1932,73 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp) struct disasm_line *pos; size_t printed = 0; - list_for_each_entry(pos, head, node) + list_for_each_entry(pos, head, al.node) printed += disasm_line__fprintf(pos, fp); return printed; } +static void annotation__calc_lines(struct annotation *notes, struct map *map, + struct rb_root *root, u64 start) +{ + struct annotation_line *al; + struct rb_root tmp_root = RB_ROOT; + + list_for_each_entry(al, ¬es->src->source, node) { + double percent_max = 0.0; + int i; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + + sample = &al->samples[i]; + + if (sample->percent > percent_max) + percent_max = sample->percent; + } + + if (percent_max <= 0.5) + continue; + + al->path = get_srcline(map->dso, start + al->offset, NULL, + false, true, start + al->offset); + insert_source_line(&tmp_root, al); + } + + resort_source_line(root, &tmp_root); +} + +static void symbol__calc_lines(struct symbol *sym, struct map *map, + struct rb_root *root) +{ + struct annotation *notes = symbol__annotation(sym); + u64 start = map__rip_2objdump(map, sym->start); + + annotation__calc_lines(notes, map, root, start); +} + int symbol__tty_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool print_lines, bool full_paths, int min_pcnt, int max_lines) { struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - u64 len; - if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, NULL) < 0) return -1; - len = symbol__size(sym); + symbol__calc_percent(sym, evsel); if (print_lines) { srcline_full_filename = full_paths; - symbol__get_source_line(sym, map, evsel, &source_line, len); + symbol__calc_lines(sym, map, &source_line); print_summary(&source_line, dso->long_name); } symbol__annotate_printf(sym, map, evsel, full_paths, min_pcnt, max_lines, 0); - if (print_lines) - symbol__free_source_line(sym, len); - disasm__purge(&symbol__annotation(sym)->src->source); + annotated_source__purge(symbol__annotation(sym)->src); return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 9ce575c25fd9..ce427445671f 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_ANNOTATE_H #define __PERF_ANNOTATE_H @@ -58,33 +59,55 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); struct annotation; +struct sym_hist_entry { + u64 nr_samples; + u64 period; +}; + +struct annotation_data { + double percent; + double percent_sum; + struct sym_hist_entry he; +}; + +struct annotation_line { + struct list_head node; + struct rb_node rb_node; + s64 offset; + char *line; + int line_nr; + float ipc; + u64 cycles; + size_t privsize; + char *path; + int samples_nr; + struct annotation_data samples[0]; +}; + struct disasm_line { - struct list_head node; - s64 offset; - char *line; - struct ins ins; - int line_nr; - float ipc; - u64 cycles; - struct ins_operands ops; + struct ins ins; + struct ins_operands ops; + + /* This needs to be at the end. */ + struct annotation_line al; }; +static inline struct disasm_line *disasm_line(struct annotation_line *al) +{ + return al ? container_of(al, struct disasm_line, al) : NULL; +} + static inline bool disasm_line__has_offset(const struct disasm_line *dl) { return dl->ops.target.offset_avail; } -struct sym_hist_entry { - u64 nr_samples; - u64 period; -}; - void disasm_line__free(struct disasm_line *dl); -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos); +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample); +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { u64 nr_samples; @@ -103,19 +126,6 @@ struct cyc_hist { u16 reset; }; -struct source_line_samples { - double percent; - double percent_sum; - u64 nr; -}; - -struct source_line { - struct rb_node node; - char *path; - int nr_pcnt; - struct source_line_samples samples[1]; -}; - /** struct annotated_source - symbols with hits have this attached as in sannotation * * @histogram: Array of addr hit histograms per event being monitored @@ -131,7 +141,6 @@ struct source_line { */ struct annotated_source { struct list_head source; - struct source_line *lines; int nr_histograms; size_t sizeof_sym_hist; struct cyc_hist *cycles_hist; @@ -168,9 +177,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid); +int symbol__annotate(struct symbol *sym, struct map *map, + struct perf_evsel *evsel, size_t privsize, + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, @@ -197,7 +206,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int min_pcnt, int max_lines, int context); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); -void disasm__purge(struct list_head *head); +void annotated_source__purge(struct annotated_source *as); bool ui__has_annotation(void); diff --git a/tools/perf/util/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-pkt-decoder.c new file mode 100644 index 000000000000..b94001b756c7 --- /dev/null +++ b/tools/perf/util/arm-spe-pkt-decoder.c @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> + +#include "arm-spe-pkt-decoder.h" + +#define BIT(n) (1ULL << (n)) + +#define NS_FLAG BIT(63) +#define EL_FLAG (BIT(62) | BIT(61)) + +#define SPE_HEADER0_PAD 0x0 +#define SPE_HEADER0_END 0x1 +#define SPE_HEADER0_ADDRESS 0x30 /* address packet (short) */ +#define SPE_HEADER0_ADDRESS_MASK 0x38 +#define SPE_HEADER0_COUNTER 0x18 /* counter packet (short) */ +#define SPE_HEADER0_COUNTER_MASK 0x38 +#define SPE_HEADER0_TIMESTAMP 0x71 +#define SPE_HEADER0_TIMESTAMP 0x71 +#define SPE_HEADER0_EVENTS 0x2 +#define SPE_HEADER0_EVENTS_MASK 0xf +#define SPE_HEADER0_SOURCE 0x3 +#define SPE_HEADER0_SOURCE_MASK 0xf +#define SPE_HEADER0_CONTEXT 0x24 +#define SPE_HEADER0_CONTEXT_MASK 0x3c +#define SPE_HEADER0_OP_TYPE 0x8 +#define SPE_HEADER0_OP_TYPE_MASK 0x3c +#define SPE_HEADER1_ALIGNMENT 0x0 +#define SPE_HEADER1_ADDRESS 0xb0 /* address packet (extended) */ +#define SPE_HEADER1_ADDRESS_MASK 0xf8 +#define SPE_HEADER1_COUNTER 0x98 /* counter packet (extended) */ +#define SPE_HEADER1_COUNTER_MASK 0xf8 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le16_to_cpu bswap_16 +#define le32_to_cpu bswap_32 +#define le64_to_cpu bswap_64 +#define memcpy_le64(d, s, n) do { \ + memcpy((d), (s), (n)); \ + *(d) = le64_to_cpu(*(d)); \ +} while (0) +#else +#define le16_to_cpu +#define le32_to_cpu +#define le64_to_cpu +#define memcpy_le64 memcpy +#endif + +static const char * const arm_spe_packet_name[] = { + [ARM_SPE_PAD] = "PAD", + [ARM_SPE_END] = "END", + [ARM_SPE_TIMESTAMP] = "TS", + [ARM_SPE_ADDRESS] = "ADDR", + [ARM_SPE_COUNTER] = "LAT", + [ARM_SPE_CONTEXT] = "CONTEXT", + [ARM_SPE_OP_TYPE] = "OP-TYPE", + [ARM_SPE_EVENTS] = "EVENTS", + [ARM_SPE_DATA_SOURCE] = "DATA-SOURCE", +}; + +const char *arm_spe_pkt_name(enum arm_spe_pkt_type type) +{ + return arm_spe_packet_name[type]; +} + +/* return ARM SPE payload size from its encoding, + * which is in bits 5:4 of the byte. + * 00 : byte + * 01 : halfword (2) + * 10 : word (4) + * 11 : doubleword (8) + */ +static int payloadlen(unsigned char byte) +{ + return 1 << ((byte & 0x30) >> 4); +} + +static int arm_spe_get_payload(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + size_t payload_len = payloadlen(buf[0]); + + if (len < 1 + payload_len) + return ARM_SPE_NEED_MORE_BYTES; + + buf++; + + switch (payload_len) { + case 1: packet->payload = *(uint8_t *)buf; break; + case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break; + case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break; + case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break; + default: return ARM_SPE_BAD_PACKET; + } + + return 1 + payload_len; +} + +static int arm_spe_get_pad(struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_PAD; + return 1; +} + +static int arm_spe_get_alignment(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + unsigned int alignment = 1 << ((buf[0] & 0xf) + 1); + + if (len < alignment) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_PAD; + return alignment - (((uintptr_t)buf) & (alignment - 1)); +} + +static int arm_spe_get_end(struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_END; + return 1; +} + +static int arm_spe_get_timestamp(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_TIMESTAMP; + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_events(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + int ret = arm_spe_get_payload(buf, len, packet); + + packet->type = ARM_SPE_EVENTS; + + /* we use index to identify Events with a less number of + * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS, + * LLC-REFILL, and REMOTE-ACCESS events are identified iff + * index > 1. + */ + packet->index = ret - 1; + + return ret; +} + +static int arm_spe_get_data_source(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_DATA_SOURCE; + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_context(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_CONTEXT; + packet->index = buf[0] & 0x3; + + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_op_type(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_OP_TYPE; + packet->index = buf[0] & 0x3; + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_counter(const unsigned char *buf, size_t len, + const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ + if (len < 2) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_COUNTER; + if (ext_hdr) + packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + else + packet->index = buf[0] & 0x7; + + packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); + + return 1 + ext_hdr + 2; +} + +static int arm_spe_get_addr(const unsigned char *buf, size_t len, + const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ + if (len < 8) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_ADDRESS; + if (ext_hdr) + packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + else + packet->index = buf[0] & 0x7; + + memcpy_le64(&packet->payload, buf + 1, 8); + + return 1 + ext_hdr + 8; +} + +static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + unsigned int byte; + + memset(packet, 0, sizeof(struct arm_spe_pkt)); + + if (!len) + return ARM_SPE_NEED_MORE_BYTES; + + byte = buf[0]; + if (byte == SPE_HEADER0_PAD) + return arm_spe_get_pad(packet); + else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */ + return arm_spe_get_end(packet); + else if (byte & 0xc0 /* 0y11xxxxxx */) { + if (byte & 0x80) { + if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS) + return arm_spe_get_addr(buf, len, 0, packet); + if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER) + return arm_spe_get_counter(buf, len, 0, packet); + } else + if (byte == SPE_HEADER0_TIMESTAMP) + return arm_spe_get_timestamp(buf, len, packet); + else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS) + return arm_spe_get_events(buf, len, packet); + else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE) + return arm_spe_get_data_source(buf, len, packet); + else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT) + return arm_spe_get_context(buf, len, packet); + else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE) + return arm_spe_get_op_type(buf, len, packet); + } else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) { + /* 16-bit header */ + byte = buf[1]; + if (byte == SPE_HEADER1_ALIGNMENT) + return arm_spe_get_alignment(buf, len, packet); + else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS) + return arm_spe_get_addr(buf, len, 1, packet); + else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER) + return arm_spe_get_counter(buf, len, 1, packet); + } + + return ARM_SPE_BAD_PACKET; +} + +int arm_spe_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + int ret; + + ret = arm_spe_do_get_packet(buf, len, packet); + /* put multiple consecutive PADs on the same line, up to + * the fixed-width output format of 16 bytes per line. + */ + if (ret > 0 && packet->type == ARM_SPE_PAD) { + while (ret < 16 && len > (size_t)ret && !buf[ret]) + ret += 1; + } + return ret; +} + +int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, + size_t buf_len) +{ + int ret, ns, el, idx = packet->index; + unsigned long long payload = packet->payload; + const char *name = arm_spe_pkt_name(packet->type); + + switch (packet->type) { + case ARM_SPE_BAD: + case ARM_SPE_PAD: + case ARM_SPE_END: + return snprintf(buf, buf_len, "%s", name); + case ARM_SPE_EVENTS: { + size_t blen = buf_len; + + ret = 0; + ret = snprintf(buf, buf_len, "EV"); + buf += ret; + blen -= ret; + if (payload & 0x1) { + ret = snprintf(buf, buf_len, " EXCEPTION-GEN"); + buf += ret; + blen -= ret; + } + if (payload & 0x2) { + ret = snprintf(buf, buf_len, " RETIRED"); + buf += ret; + blen -= ret; + } + if (payload & 0x4) { + ret = snprintf(buf, buf_len, " L1D-ACCESS"); + buf += ret; + blen -= ret; + } + if (payload & 0x8) { + ret = snprintf(buf, buf_len, " L1D-REFILL"); + buf += ret; + blen -= ret; + } + if (payload & 0x10) { + ret = snprintf(buf, buf_len, " TLB-ACCESS"); + buf += ret; + blen -= ret; + } + if (payload & 0x20) { + ret = snprintf(buf, buf_len, " TLB-REFILL"); + buf += ret; + blen -= ret; + } + if (payload & 0x40) { + ret = snprintf(buf, buf_len, " NOT-TAKEN"); + buf += ret; + blen -= ret; + } + if (payload & 0x80) { + ret = snprintf(buf, buf_len, " MISPRED"); + buf += ret; + blen -= ret; + } + if (idx > 1) { + if (payload & 0x100) { + ret = snprintf(buf, buf_len, " LLC-ACCESS"); + buf += ret; + blen -= ret; + } + if (payload & 0x200) { + ret = snprintf(buf, buf_len, " LLC-REFILL"); + buf += ret; + blen -= ret; + } + if (payload & 0x400) { + ret = snprintf(buf, buf_len, " REMOTE-ACCESS"); + buf += ret; + blen -= ret; + } + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + case ARM_SPE_OP_TYPE: + switch (idx) { + case 0: return snprintf(buf, buf_len, "%s", payload & 0x1 ? + "COND-SELECT" : "INSN-OTHER"); + case 1: { + size_t blen = buf_len; + + if (payload & 0x1) + ret = snprintf(buf, buf_len, "ST"); + else + ret = snprintf(buf, buf_len, "LD"); + buf += ret; + blen -= ret; + if (payload & 0x2) { + if (payload & 0x4) { + ret = snprintf(buf, buf_len, " AT"); + buf += ret; + blen -= ret; + } + if (payload & 0x8) { + ret = snprintf(buf, buf_len, " EXCL"); + buf += ret; + blen -= ret; + } + if (payload & 0x10) { + ret = snprintf(buf, buf_len, " AR"); + buf += ret; + blen -= ret; + } + } else if (payload & 0x4) { + ret = snprintf(buf, buf_len, " SIMD-FP"); + buf += ret; + blen -= ret; + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + case 2: { + size_t blen = buf_len; + + ret = snprintf(buf, buf_len, "B"); + buf += ret; + blen -= ret; + if (payload & 0x1) { + ret = snprintf(buf, buf_len, " COND"); + buf += ret; + blen -= ret; + } + if (payload & 0x2) { + ret = snprintf(buf, buf_len, " IND"); + buf += ret; + blen -= ret; + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + default: return 0; + } + case ARM_SPE_DATA_SOURCE: + case ARM_SPE_TIMESTAMP: + return snprintf(buf, buf_len, "%s %lld", name, payload); + case ARM_SPE_ADDRESS: + switch (idx) { + case 0: + case 1: ns = !!(packet->payload & NS_FLAG); + el = (packet->payload & EL_FLAG) >> 61; + payload &= ~(0xffULL << 56); + return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d", + (idx == 1) ? "TGT" : "PC", payload, el, ns); + case 2: return snprintf(buf, buf_len, "VA 0x%llx", payload); + case 3: ns = !!(packet->payload & NS_FLAG); + payload &= ~(0xffULL << 56); + return snprintf(buf, buf_len, "PA 0x%llx ns=%d", + payload, ns); + default: return 0; + } + case ARM_SPE_CONTEXT: + return snprintf(buf, buf_len, "%s 0x%lx el%d", name, + (unsigned long)payload, idx + 1); + case ARM_SPE_COUNTER: { + size_t blen = buf_len; + + ret = snprintf(buf, buf_len, "%s %d ", name, + (unsigned short)payload); + buf += ret; + blen -= ret; + switch (idx) { + case 0: ret = snprintf(buf, buf_len, "TOT"); break; + case 1: ret = snprintf(buf, buf_len, "ISSUE"); break; + case 2: ret = snprintf(buf, buf_len, "XLAT"); break; + default: ret = 0; + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + default: + break; + } + + return snprintf(buf, buf_len, "%s 0x%llx (%d)", + name, payload, packet->index); +} diff --git a/tools/perf/util/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-pkt-decoder.h new file mode 100644 index 000000000000..d786ef65113f --- /dev/null +++ b/tools/perf/util/arm-spe-pkt-decoder.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__ +#define INCLUDE__ARM_SPE_PKT_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define ARM_SPE_PKT_DESC_MAX 256 + +#define ARM_SPE_NEED_MORE_BYTES -1 +#define ARM_SPE_BAD_PACKET -2 + +enum arm_spe_pkt_type { + ARM_SPE_BAD, + ARM_SPE_PAD, + ARM_SPE_END, + ARM_SPE_TIMESTAMP, + ARM_SPE_ADDRESS, + ARM_SPE_COUNTER, + ARM_SPE_CONTEXT, + ARM_SPE_OP_TYPE, + ARM_SPE_EVENTS, + ARM_SPE_DATA_SOURCE, +}; + +struct arm_spe_pkt { + enum arm_spe_pkt_type type; + unsigned char index; + uint64_t payload; +}; + +const char *arm_spe_pkt_name(enum arm_spe_pkt_type); + +int arm_spe_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet); + +int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len); +#endif diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c new file mode 100644 index 000000000000..6067267cc76c --- /dev/null +++ b/tools/perf/util/arm-spe.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include <endian.h> +#include <errno.h> +#include <byteswap.h> +#include <inttypes.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> + +#include "cpumap.h" +#include "color.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "session.h" +#include "util.h" +#include "thread.h" +#include "debug.h" +#include "auxtrace.h" +#include "arm-spe.h" +#include "arm-spe-pkt-decoder.h" + +struct arm_spe { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + u32 pmu_type; +}; + +struct arm_spe_queue { + struct arm_spe *spe; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + bool on_heap; + bool done; + pid_t pid; + pid_t tid; + int cpu; +}; + +static void arm_spe_dump(struct arm_spe *spe __maybe_unused, + unsigned char *buf, size_t len) +{ + struct arm_spe_pkt packet; + size_t pos = 0; + int ret, pkt_len, i; + char desc[ARM_SPE_PKT_DESC_MAX]; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... ARM SPE data: size %zu bytes\n", + len); + + while (len) { + ret = arm_spe_get_packet(buf, len, &packet); + if (ret > 0) + pkt_len = ret; + else + pkt_len = 1; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < pkt_len; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < 16; i++) + color_fprintf(stdout, color, " "); + if (ret > 0) { + ret = arm_spe_pkt_desc(&packet, desc, + ARM_SPE_PKT_DESC_MAX); + if (ret > 0) + color_fprintf(stdout, color, " %s\n", desc); + } else { + color_fprintf(stdout, color, " Bad packet!\n"); + } + pos += pkt_len; + buf += pkt_len; + len -= pkt_len; + } +} + +static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, + size_t len) +{ + printf(".\n"); + arm_spe_dump(spe, buf, len); +} + +static int arm_spe_process_event(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static int arm_spe_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data__fd(session->data); + int err; + + if (perf_data__is_pipe(session->data)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&spe->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + arm_spe_dump_event(spe, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + + return 0; +} + +static int arm_spe_flush(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static void arm_spe_free_queue(void *priv) +{ + struct arm_spe_queue *speq = priv; + + if (!speq) + return; + free(speq); +} + +static void arm_spe_free_events(struct perf_session *session) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + struct auxtrace_queues *queues = &spe->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + arm_spe_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + auxtrace_queues__free(queues); +} + +static void arm_spe_free(struct perf_session *session) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + + auxtrace_heap__free(&spe->heap); + arm_spe_free_events(session); + session->auxtrace = NULL; + free(spe); +} + +static const char * const arm_spe_info_fmts[] = { + [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", +}; + +static void arm_spe_print_info(u64 *arr) +{ + if (!dump_trace) + return; + + fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); +} + +int arm_spe_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE; + struct arm_spe *spe; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + spe = zalloc(sizeof(struct arm_spe)); + if (!spe) + return -ENOMEM; + + err = auxtrace_queues__init(&spe->queues); + if (err) + goto err_free; + + spe->session = session; + spe->machine = &session->machines.host; /* No kvm support */ + spe->auxtrace_type = auxtrace_info->type; + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + + spe->auxtrace.process_event = arm_spe_process_event; + spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; + spe->auxtrace.flush_events = arm_spe_flush; + spe->auxtrace.free_events = arm_spe_free_events; + spe->auxtrace.free = arm_spe_free; + session->auxtrace = &spe->auxtrace; + + arm_spe_print_info(&auxtrace_info->priv[0]); + + return 0; + +err_free: + free(spe); + return err; +} diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h new file mode 100644 index 000000000000..98d3235781c3 --- /dev/null +++ b/tools/perf/util/arm-spe.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#ifndef INCLUDE__PERF_ARM_SPE_H__ +#define INCLUDE__PERF_ARM_SPE_H__ + +#define ARM_SPE_PMU_NAME "arm_spe_" + +enum { + ARM_SPE_PMU_TYPE, + ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_AUXTRACE_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) + +union perf_event; +struct perf_session; +struct perf_pmu; + +struct auxtrace_record *arm_spe_recording_init(int *err, + struct perf_pmu *arm_spe_pmu); + +int arm_spe_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu); +#endif diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 5547457566a7..9faf3b5367db 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,9 +31,6 @@ #include <sys/param.h> #include <stdlib.h> #include <stdio.h> -#include <string.h> -#include <limits.h> -#include <errno.h> #include <linux/list.h> #include "../perf.h" @@ -55,8 +52,10 @@ #include "debug.h" #include <subcmd/parse-options.h> +#include "cs-etm.h" #include "intel-pt.h" #include "intel-bts.h" +#include "arm-spe.h" #include "sane_ctype.h" #include "symbol/kallsyms.h" @@ -208,7 +207,7 @@ static int auxtrace_queues__grow(struct auxtrace_queues *queues, static void *auxtrace_copy_data(u64 size, struct perf_session *session) { - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); void *p; ssize_t ret; @@ -305,7 +304,7 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues, if (session->one_mmap) { buffer->data = buffer->data_offset - session->one_mmap_offset + session->one_mmap_addr; - } else if (perf_data_file__is_pipe(session->file)) { + } else if (perf_data__is_pipe(session->data)) { buffer->data = auxtrace_copy_data(buffer->size, session); if (!buffer->data) return -ENOMEM; @@ -913,7 +912,10 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, return intel_pt_process_auxtrace_info(event, session); case PERF_AUXTRACE_INTEL_BTS: return intel_bts_process_auxtrace_info(event, session); + case PERF_AUXTRACE_ARM_SPE: + return arm_spe_process_auxtrace_info(event, session); case PERF_AUXTRACE_CS_ETM: + return cs_etm__process_auxtrace_info(event, session); case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 33b5e6cdf38c..453c148d2158 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -43,6 +43,7 @@ enum auxtrace_type { PERF_AUXTRACE_INTEL_PT, PERF_AUXTRACE_INTEL_BTS, PERF_AUXTRACE_CS_ETM, + PERF_AUXTRACE_ARM_SPE, }; enum itrace_period_type { @@ -378,7 +379,7 @@ struct addr_filters { static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm) { struct perf_event_mmap_page *pc = mm->userpg; - u64 head = ACCESS_ONCE(pc->aux_head); + u64 head = READ_ONCE(pc->aux_head); /* Ensure all reads are done after we read the head */ rmb(); @@ -389,7 +390,7 @@ static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm) { struct perf_event_mmap_page *pc = mm->userpg; #if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) - u64 head = ACCESS_ONCE(pc->aux_head); + u64 head = READ_ONCE(pc->aux_head); #else u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0); #endif diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c index 7b3e1d75d803..f1451c987eec 100644 --- a/tools/perf/util/block-range.c +++ b/tools/perf/util/block-range.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "block-range.h" #include "annotate.h" diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h index a8c841381131..a5ba719d69fb 100644 --- a/tools/perf/util/block-range.h +++ b/tools/perf/util/block-range.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_BLOCK_RANGE_H #define __PERF_BLOCK_RANGE_H diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 4a1264c66101..af7ad814b2c3 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bpf-loader.c * @@ -93,7 +94,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz); perf_clang__cleanup(); if (err) { - pr_warning("bpf: builtin compilation failed: %d, try external compiler\n", err); + pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err); err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); if (err) return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); @@ -1532,7 +1533,7 @@ int bpf__apply_obj_config(void) (strcmp("__bpf_stdout__", \ bpf_map__name(pos)) == 0)) -int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) +int bpf__setup_stdout(struct perf_evlist *evlist) { struct bpf_map_priv *tmpl_priv = NULL; struct bpf_object *obj, *tmp; diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 48863867878b..5d3aefd6fae7 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> * Copyright (C) 2015, Huawei Inc. diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c index 827f9140f3b8..29347756b0af 100644 --- a/tools/perf/util/bpf-prologue.c +++ b/tools/perf/util/bpf-prologue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bpf-prologue.c * diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h index ba564838375f..c50c7358009f 100644 --- a/tools/perf/util/bpf-prologue.h +++ b/tools/perf/util/bpf-prologue.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2015, He Kuang <hekuang@huawei.com> * Copyright (C) 2015, Huawei Inc. diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index c1a06fcd7e70..7f8553630c4d 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * build-id.c * diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index c94b0dcbfd74..f0c565164a97 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_BUILD_ID_H_ #define PERF_BUILD_ID_H_ 1 diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h index ccafcf72b37a..e513366f2ee0 100644 --- a/tools/perf/util/c++/clang-c.h +++ b/tools/perf/util/c++/clang-c.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_UTIL_CLANG_C_H #define PERF_UTIL_CLANG_C_H diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp index 9b11e8c82798..a4014d786676 100644 --- a/tools/perf/util/c++/clang-test.cpp +++ b/tools/perf/util/c++/clang-test.cpp @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "clang.h" #include "clang-c.h" #include "llvm/IR/Function.h" diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index 1e974152cac2..1bfc946e37dc 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * llvm C frontend for perf. Support dynamically compile C file * diff --git a/tools/perf/util/c++/clang.h b/tools/perf/util/c++/clang.h index dd8b0427550d..6ce33e22f23c 100644 --- a/tools/perf/util/c++/clang.h +++ b/tools/perf/util/c++/clang.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_UTIL_CLANG_H #define PERF_UTIL_CLANG_H diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 0175765c05b9..9f2e36ef5072 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_CACHE_H #define __PERF_CACHE_H diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 510b513e0f01..32ef7bdca1cf 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com> * @@ -36,6 +37,15 @@ struct callchain_param callchain_param = { CALLCHAIN_PARAM_DEFAULT }; +/* + * Are there any events usind DWARF callchains? + * + * I.e. + * + * -e cycles/call-graph=dwarf/ + */ +bool dwarf_callchain_users; + struct callchain_param callchain_param_default = { CALLCHAIN_PARAM_DEFAULT }; @@ -65,8 +75,6 @@ static int parse_callchain_mode(const char *value) callchain_param.mode = CHAIN_FOLDED; return 0; } - - pr_err("Invalid callchain mode: %s\n", value); return -1; } @@ -82,8 +90,6 @@ static int parse_callchain_order(const char *value) callchain_param.order_set = true; return 0; } - - pr_err("Invalid callchain order: %s\n", value); return -1; } @@ -105,8 +111,6 @@ static int parse_callchain_sort_key(const char *value) callchain_param.branch_callstack = 1; return 0; } - - pr_err("Invalid callchain sort key: %s\n", value); return -1; } @@ -124,8 +128,6 @@ static int parse_callchain_value(const char *value) callchain_param.value = CCVAL_COUNT; return 0; } - - pr_err("Invalid callchain config key: %s\n", value); return -1; } @@ -272,6 +274,7 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) ret = 0; param->record_mode = CALLCHAIN_DWARF; param->dump_size = default_stack_dump_size; + dwarf_callchain_users = true; tok = strtok_r(NULL, ",", &saveptr); if (tok) { @@ -319,12 +322,27 @@ int perf_callchain_config(const char *var, const char *value) return ret; } - if (!strcmp(var, "print-type")) - return parse_callchain_mode(value); - if (!strcmp(var, "order")) - return parse_callchain_order(value); - if (!strcmp(var, "sort-key")) - return parse_callchain_sort_key(value); + if (!strcmp(var, "print-type")){ + int ret; + ret = parse_callchain_mode(value); + if (ret == -1) + pr_err("Invalid callchain mode: %s\n", value); + return ret; + } + if (!strcmp(var, "order")){ + int ret; + ret = parse_callchain_order(value); + if (ret == -1) + pr_err("Invalid callchain order: %s\n", value); + return ret; + } + if (!strcmp(var, "sort-key")){ + int ret; + ret = parse_callchain_sort_key(value); + if (ret == -1) + pr_err("Invalid callchain sort key: %s\n", value); + return ret; + } if (!strcmp(var, "threshold")) { callchain_param.min_percent = strtod(value, &endptr); if (value == endptr) { @@ -559,6 +577,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; call->ms.map = map__get(cursor_node->map); + call->srcline = cursor_node->srcline; if (cursor_node->branch) { call->branch_count = 1; @@ -637,99 +656,120 @@ enum match_result { MATCH_GT, }; -static enum match_result match_chain_srcline(struct callchain_cursor_node *node, - struct callchain_list *cnode) +static enum match_result match_chain_strings(const char *left, + const char *right) { - char *left = NULL; - char *right = NULL; enum match_result ret = MATCH_EQ; int cmp; - if (cnode->ms.map) - left = get_srcline(cnode->ms.map->dso, - map__rip_2objdump(cnode->ms.map, cnode->ip), - cnode->ms.sym, true, false); - if (node->map) - right = get_srcline(node->map->dso, - map__rip_2objdump(node->map, node->ip), - node->sym, true, false); - if (left && right) cmp = strcmp(left, right); else if (!left && right) cmp = 1; else if (left && !right) cmp = -1; - else if (cnode->ip == node->ip) - cmp = 0; else - cmp = (cnode->ip < node->ip) ? -1 : 1; + return MATCH_ERROR; if (cmp != 0) ret = cmp < 0 ? MATCH_LT : MATCH_GT; - free_srcline(left); - free_srcline(right); return ret; } +/* + * We need to always use relative addresses because we're aggregating + * callchains from multiple threads, i.e. different address spaces, so + * comparing absolute addresses make no sense as a symbol in a DSO may end up + * in a different address when used in a different binary or even the same + * binary but with some sort of address randomization technique, thus we need + * to compare just relative addresses. -acme + */ +static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip, + struct map *right_map, u64 right_ip) +{ + struct dso *left_dso = left_map ? left_map->dso : NULL; + struct dso *right_dso = right_map ? right_map->dso : NULL; + + if (left_dso != right_dso) + return left_dso < right_dso ? MATCH_LT : MATCH_GT; + + if (left_ip != right_ip) + return left_ip < right_ip ? MATCH_LT : MATCH_GT; + + return MATCH_EQ; +} + static enum match_result match_chain(struct callchain_cursor_node *node, struct callchain_list *cnode) { - struct symbol *sym = node->sym; - u64 left, right; - - if (callchain_param.key == CCKEY_SRCLINE) { - enum match_result match = match_chain_srcline(node, cnode); + enum match_result match = MATCH_ERROR; + switch (callchain_param.key) { + case CCKEY_SRCLINE: + match = match_chain_strings(cnode->srcline, node->srcline); if (match != MATCH_ERROR) - return match; + break; + /* otherwise fall-back to symbol-based comparison below */ + __fallthrough; + case CCKEY_FUNCTION: + if (node->sym && cnode->ms.sym) { + /* + * Compare inlined frames based on their symbol name + * because different inlined frames will have the same + * symbol start. Otherwise do a faster comparison based + * on the symbol start address. + */ + if (cnode->ms.sym->inlined || node->sym->inlined) { + match = match_chain_strings(cnode->ms.sym->name, + node->sym->name); + if (match != MATCH_ERROR) + break; + } else { + match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start, + node->map, node->sym->start); + break; + } + } + /* otherwise fall-back to IP-based comparison below */ + __fallthrough; + case CCKEY_ADDRESS: + default: + match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->map, node->ip); + break; } - if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) { - left = cnode->ms.sym->start; - right = sym->start; - } else { - left = cnode->ip; - right = node->ip; - } + if (match == MATCH_EQ && node->branch) { + cnode->branch_count++; - if (left == right) { - if (node->branch) { - cnode->branch_count++; + if (node->branch_from) { + /* + * It's "to" of a branch + */ + cnode->brtype_stat.branch_to = true; - if (node->branch_from) { - /* - * It's "to" of a branch - */ - cnode->brtype_stat.branch_to = true; - - if (node->branch_flags.predicted) - cnode->predicted_count++; + if (node->branch_flags.predicted) + cnode->predicted_count++; - if (node->branch_flags.abort) - cnode->abort_count++; + if (node->branch_flags.abort) + cnode->abort_count++; - branch_type_count(&cnode->brtype_stat, - &node->branch_flags, - node->branch_from, - node->ip); - } else { - /* - * It's "from" of a branch - */ - cnode->brtype_stat.branch_to = false; - cnode->cycles_count += - node->branch_flags.cycles; - cnode->iter_count += node->nr_loop_iter; - cnode->iter_cycles += node->iter_cycles; - } + branch_type_count(&cnode->brtype_stat, + &node->branch_flags, + node->branch_from, + node->ip); + } else { + /* + * It's "from" of a branch + */ + cnode->brtype_stat.branch_to = false; + cnode->cycles_count += node->branch_flags.cycles; + cnode->iter_count += node->nr_loop_iter; + cnode->iter_cycles += node->iter_cycles; } - - return MATCH_EQ; } - return left > right ? MATCH_GT : MATCH_LT; + return match; } /* @@ -958,7 +998,7 @@ merge_chain_branch(struct callchain_cursor *cursor, list_for_each_entry_safe(list, next_list, &src->val, list) { callchain_cursor_append(cursor, list->ip, list->ms.map, list->ms.sym, - false, NULL, 0, 0, 0); + false, NULL, 0, 0, 0, list->srcline); list_del(&list->list); map__zput(list->ms.map); free(list); @@ -998,7 +1038,8 @@ int callchain_merge(struct callchain_cursor *cursor, int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, bool branch, struct branch_flags *flags, - int nr_loop_iter, u64 iter_cycles, u64 branch_from) + int nr_loop_iter, u64 iter_cycles, u64 branch_from, + const char *srcline) { struct callchain_cursor_node *node = *cursor->last; @@ -1017,6 +1058,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, node->branch = branch; node->nr_loop_iter = nr_loop_iter; node->iter_cycles = iter_cycles; + node->srcline = srcline; if (flags) memcpy(&node->branch_flags, flags, @@ -1059,10 +1101,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * { al->map = node->map; al->sym = node->sym; - if (node->map) - al->addr = node->map->map_ip(node->map, node->ip); - else - al->addr = node->ip; + al->srcline = node->srcline; + al->addr = node->ip; if (al->sym == NULL) { if (hide_unresolved) @@ -1104,16 +1144,15 @@ char *callchain_list__sym_name(struct callchain_list *cl, int printed; if (cl->ms.sym) { - if (show_srcline && cl->ms.map && !cl->srcline) - cl->srcline = get_srcline(cl->ms.map->dso, - map__rip_2objdump(cl->ms.map, - cl->ip), - cl->ms.sym, false, show_addr); - if (cl->srcline) - printed = scnprintf(bf, bfsize, "%s %s", - cl->ms.sym->name, cl->srcline); + const char *inlined = cl->ms.sym->inlined ? " (inlined)" : ""; + + if (show_srcline && cl->srcline) + printed = scnprintf(bf, bfsize, "%s %s%s", + cl->ms.sym->name, cl->srcline, + inlined); else - printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + printed = scnprintf(bf, bfsize, "%s%s", + cl->ms.sym->name, inlined); } else printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); @@ -1521,7 +1560,7 @@ int callchain_cursor__copy(struct callchain_cursor *dst, node->branch, &node->branch_flags, node->nr_loop_iter, node->iter_cycles, - node->branch_from); + node->branch_from, node->srcline); if (rc) break; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 1ed6fc61d0a5..154560b1eb65 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_CALLCHAIN_H #define __PERF_CALLCHAIN_H @@ -88,6 +89,8 @@ enum chain_value { CCVAL_COUNT, }; +extern bool dwarf_callchain_users; + struct callchain_param { bool enabled; enum perf_call_graph_mode record_mode; @@ -121,7 +124,7 @@ struct callchain_list { u64 iter_count; u64 iter_cycles; struct branch_type_stat brtype_stat; - char *srcline; + const char *srcline; struct list_head list; }; @@ -135,6 +138,7 @@ struct callchain_cursor_node { u64 ip; struct map *map; struct symbol *sym; + const char *srcline; bool branch; struct branch_flags branch_flags; u64 branch_from; @@ -201,7 +205,8 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, bool branch, struct branch_flags *flags, - int nr_loop_iter, u64 iter_cycles, u64 branch_from); + int nr_loop_iter, u64 iter_cycles, u64 branch_from, + const char *srcline); /* Close a cursor writing session. Initialize for the reader */ static inline void callchain_cursor_commit(struct callchain_cursor *cursor) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 0e77bc9e5f3c..984f69144f87 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "util.h" #include "../perf.h" #include <subcmd/parse-options.h> @@ -5,6 +6,9 @@ #include "cgroup.h" #include "evlist.h" #include <linux/stringify.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> int nr_cgroups; diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index d91966b97cbd..afafc87e9201 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CGROUP_H__ #define __CGROUP_H__ diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 4b4f00df58a8..ca0fff6272be 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <sched.h> #include "util.h" diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 94a5a7d829d5..78216b1015c4 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_CLOEXEC_H #define __PERF_CLOEXEC_H diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index dbbf89b050a5..39e628b8938e 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include "cache.h" #include "config.h" diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 52122bcc3170..22777b1812ee 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_COLOR_H #define __PERF_COLOR_H diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 7bc981b6bf29..7798a2cc8a86 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "comm.h" #include "util.h" #include <errno.h> @@ -5,6 +6,7 @@ #include <stdio.h> #include <string.h> #include <linux/refcount.h> +#include "rwsem.h" struct comm_str { char *str; @@ -14,6 +16,7 @@ struct comm_str { /* Should perhaps be moved to struct machine */ static struct rb_root comm_str_root; +static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,}; static struct comm_str *comm_str__get(struct comm_str *cs) { @@ -25,7 +28,9 @@ static struct comm_str *comm_str__get(struct comm_str *cs) static void comm_str__put(struct comm_str *cs) { if (cs && refcount_dec_and_test(&cs->refcnt)) { + down_write(&comm_str_lock); rb_erase(&cs->rb_node, &comm_str_root); + up_write(&comm_str_lock); zfree(&cs->str); free(cs); } @@ -50,7 +55,8 @@ static struct comm_str *comm_str__alloc(const char *str) return cs; } -static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) +static +struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -81,6 +87,17 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) return new; } +static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) +{ + struct comm_str *cs; + + down_write(&comm_str_lock); + cs = __comm_str__findnew(str, root); + up_write(&comm_str_lock); + + return cs; +} + struct comm *comm__new(const char *str, u64 timestamp, bool exec) { struct comm *comm = zalloc(sizeof(*comm)); diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h index 71c9c39340d4..3e5c438fe85e 100644 --- a/tools/perf/util/comm.h +++ b/tools/perf/util/comm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_COMM_H #define __PERF_COMM_H diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index 67fd1bb7c2b7..ecca688a25fb 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_COMPRESS_H #define PERF_COMPRESS_H diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index bc75596f9e79..84eb9393c7db 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * config.c * @@ -700,10 +701,7 @@ struct perf_config_set *perf_config_set__new(void) if (set) { INIT_LIST_HEAD(&set->sections); - if (perf_config_set__init(set) < 0) { - perf_config_set__delete(set); - set = NULL; - } + perf_config_set__init(set); } return set; diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index b6bb11f3f165..baf82bf227ac 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_CONFIG_H #define __PERF_CONFIG_H diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index c4af82ab7808..03032b410c29 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <stdlib.h> #include "evsel.h" diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index cb45a6aecf9d..0d1050ccc586 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_COUNTS_H #define __PERF_COUNTS_H diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 37b3bb79ee08..1ccbd3342069 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "util.h" #include <api/fs/fs.h> #include "../perf.h" diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 6b8bff87481d..ed8999d1a640 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_CPUMAP_H #define __PERF_CPUMAP_H diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build new file mode 100644 index 000000000000..bc22c39c727f --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c new file mode 100644 index 000000000000..1fb01849f1c7 --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -0,0 +1,513 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen <tor@ti.com> + * Author: Mathieu Poirier <mathieu.poirier@linaro.org> + */ + +#include <linux/err.h> +#include <linux/list.h> +#include <stdlib.h> +#include <opencsd/c_api/opencsd_c_api.h> +#include <opencsd/etmv4/trc_pkt_types_etmv4.h> +#include <opencsd/ocsd_if_types.h> + +#include "cs-etm.h" +#include "cs-etm-decoder.h" +#include "intlist.h" +#include "util.h" + +#define MAX_BUFFER 1024 + +/* use raw logging */ +#ifdef CS_DEBUG_RAW +#define CS_LOG_RAW_FRAMES +#ifdef CS_RAW_PACKED +#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT | \ + OCSD_DFRMTR_PACKED_RAW_OUT) +#else +#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT) +#endif +#endif + +struct cs_etm_decoder { + void *data; + void (*packet_printer)(const char *msg); + bool trace_on; + dcd_tree_handle_t dcd_tree; + cs_etm_mem_cb_type mem_access; + ocsd_datapath_resp_t prev_return; + u32 packet_count; + u32 head; + u32 tail; + struct cs_etm_packet packet_buffer[MAX_BUFFER]; +}; + +static u32 +cs_etm_decoder__mem_access(const void *context, + const ocsd_vaddr_t address, + const ocsd_mem_space_acc_t mem_space __maybe_unused, + const u32 req_size, + u8 *buffer) +{ + struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + + return decoder->mem_access(decoder->data, + address, + req_size, + buffer); +} + +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, + u64 start, u64 end, + cs_etm_mem_cb_type cb_func) +{ + decoder->mem_access = cb_func; + + if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end, + OCSD_MEM_SPACE_ANY, + cs_etm_decoder__mem_access, decoder)) + return -1; + + return 0; +} + +int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) +{ + ocsd_datapath_resp_t dp_ret; + + dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, + 0, 0, NULL, NULL); + if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) + return -1; + + return 0; +} + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, + struct cs_etm_packet *packet) +{ + if (!decoder || !packet) + return -EINVAL; + + /* Nothing to do, might as well just return */ + if (decoder->packet_count == 0) + return 0; + + *packet = decoder->packet_buffer[decoder->head]; + + decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); + + decoder->packet_count--; + + return 1; +} + +static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, + ocsd_etmv4_cfg *config) +{ + config->reg_configr = params->etmv4.reg_configr; + config->reg_traceidr = params->etmv4.reg_traceidr; + config->reg_idr0 = params->etmv4.reg_idr0; + config->reg_idr1 = params->etmv4.reg_idr1; + config->reg_idr2 = params->etmv4.reg_idr2; + config->reg_idr8 = params->etmv4.reg_idr8; + config->reg_idr9 = 0; + config->reg_idr10 = 0; + config->reg_idr11 = 0; + config->reg_idr12 = 0; + config->reg_idr13 = 0; + config->arch_ver = ARCH_V8; + config->core_prof = profile_CortexA; +} + +static void cs_etm_decoder__print_str_cb(const void *p_context, + const char *msg, + const int str_len) +{ + if (p_context && str_len) + ((struct cs_etm_decoder *)p_context)->packet_printer(msg); +} + +static int +cs_etm_decoder__init_def_logger_printing(struct cs_etm_decoder_params *d_params, + struct cs_etm_decoder *decoder) +{ + int ret = 0; + + if (d_params->packet_printer == NULL) + return -1; + + decoder->packet_printer = d_params->packet_printer; + + /* + * Set up a library default logger to process any printers + * (packet/raw frame) we add later. + */ + ret = ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1); + if (ret != 0) + return -1; + + /* no stdout / err / file output */ + ret = ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL); + if (ret != 0) + return -1; + + /* + * Set the string CB for the default logger, passes strings to + * perf print logger. + */ + ret = ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree, + (void *)decoder, + cs_etm_decoder__print_str_cb); + if (ret != 0) + ret = -1; + + return 0; +} + +#ifdef CS_LOG_RAW_FRAMES +static void +cs_etm_decoder__init_raw_frame_logging(struct cs_etm_decoder_params *d_params, + struct cs_etm_decoder *decoder) +{ + /* Only log these during a --dump operation */ + if (d_params->operation == CS_ETM_OPERATION_PRINT) { + /* set up a library default logger to process the + * raw frame printer we add later + */ + ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1); + + /* no stdout / err / file output */ + ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL); + + /* set the string CB for the default logger, + * passes strings to perf print logger. + */ + ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree, + (void *)decoder, + cs_etm_decoder__print_str_cb); + + /* use the built in library printer for the raw frames */ + ocsd_dt_set_raw_frame_printer(decoder->dcd_tree, + CS_RAW_DEBUG_FLAGS); + } +} +#else +static void +cs_etm_decoder__init_raw_frame_logging( + struct cs_etm_decoder_params *d_params __maybe_unused, + struct cs_etm_decoder *decoder __maybe_unused) +{ +} +#endif + +static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder, + const char *decoder_name, + void *trace_config) +{ + u8 csid; + + if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, + OCSD_CREATE_FLG_PACKET_PROC, + trace_config, &csid)) + return -1; + + if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) + return -1; + + return 0; +} + +static int +cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + const char *decoder_name; + ocsd_etmv4_cfg trace_config_etmv4; + void *trace_config; + + switch (t_params->protocol) { + case CS_ETM_PROTO_ETMV4i: + cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); + decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + trace_config = &trace_config_etmv4; + break; + default: + return -1; + } + + return cs_etm_decoder__create_packet_printer(decoder, + decoder_name, + trace_config); +} + +static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) +{ + int i; + + decoder->head = 0; + decoder->tail = 0; + decoder->packet_count = 0; + for (i = 0; i < MAX_BUFFER; i++) { + decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].exc = false; + decoder->packet_buffer[i].exc_ret = false; + decoder->packet_buffer[i].cpu = INT_MIN; + } +} + +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem, + const u8 trace_chan_id, + enum cs_etm_sample_type sample_type) +{ + u32 et = 0; + struct int_node *inode = NULL; + + if (decoder->packet_count >= MAX_BUFFER - 1) + return OCSD_RESP_FATAL_SYS_ERR; + + /* Search the RB tree for the cpu associated with this traceID */ + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return OCSD_RESP_FATAL_SYS_ERR; + + et = decoder->tail; + decoder->packet_buffer[et].sample_type = sample_type; + decoder->packet_buffer[et].start_addr = elem->st_addr; + decoder->packet_buffer[et].end_addr = elem->en_addr; + decoder->packet_buffer[et].exc = false; + decoder->packet_buffer[et].exc_ret = false; + decoder->packet_buffer[et].cpu = *((int *)inode->priv); + + /* Wrap around if need be */ + et = (et + 1) & (MAX_BUFFER - 1); + + decoder->tail = et; + decoder->packet_count++; + + if (decoder->packet_count == MAX_BUFFER - 1) + return OCSD_RESP_WAIT; + + return OCSD_RESP_CONT; +} + +static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( + const void *context, + const ocsd_trc_index_t indx __maybe_unused, + const u8 trace_chan_id __maybe_unused, + const ocsd_generic_trace_elem *elem) +{ + ocsd_datapath_resp_t resp = OCSD_RESP_CONT; + struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + + switch (elem->elem_type) { + case OCSD_GEN_TRC_ELEM_UNKNOWN: + break; + case OCSD_GEN_TRC_ELEM_NO_SYNC: + decoder->trace_on = false; + break; + case OCSD_GEN_TRC_ELEM_TRACE_ON: + decoder->trace_on = true; + break; + case OCSD_GEN_TRC_ELEM_INSTR_RANGE: + resp = cs_etm_decoder__buffer_packet(decoder, elem, + trace_chan_id, + CS_ETM_RANGE); + break; + case OCSD_GEN_TRC_ELEM_EXCEPTION: + decoder->packet_buffer[decoder->tail].exc = true; + break; + case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: + decoder->packet_buffer[decoder->tail].exc_ret = true; + break; + case OCSD_GEN_TRC_ELEM_PE_CONTEXT: + case OCSD_GEN_TRC_ELEM_EO_TRACE: + case OCSD_GEN_TRC_ELEM_ADDR_NACC: + case OCSD_GEN_TRC_ELEM_TIMESTAMP: + case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: + case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: + case OCSD_GEN_TRC_ELEM_EVENT: + case OCSD_GEN_TRC_ELEM_SWTRACE: + case OCSD_GEN_TRC_ELEM_CUSTOM: + default: + break; + } + + return resp; +} + +static int cs_etm_decoder__create_etm_packet_decoder( + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + const char *decoder_name; + ocsd_etmv4_cfg trace_config_etmv4; + void *trace_config; + u8 csid; + + switch (t_params->protocol) { + case CS_ETM_PROTO_ETMV4i: + cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); + decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + trace_config = &trace_config_etmv4; + break; + default: + return -1; + } + + if (ocsd_dt_create_decoder(decoder->dcd_tree, + decoder_name, + OCSD_CREATE_FLG_FULL_DECODER, + trace_config, &csid)) + return -1; + + if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, + cs_etm_decoder__gen_trace_elem_printer, + decoder)) + return -1; + + return 0; +} + +static int +cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + if (d_params->operation == CS_ETM_OPERATION_PRINT) + return cs_etm_decoder__create_etm_packet_printer(t_params, + decoder); + else if (d_params->operation == CS_ETM_OPERATION_DECODE) + return cs_etm_decoder__create_etm_packet_decoder(t_params, + decoder); + + return -1; +} + +struct cs_etm_decoder * +cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params t_params[]) +{ + struct cs_etm_decoder *decoder; + ocsd_dcd_tree_src_t format; + u32 flags; + int i, ret; + + if ((!t_params) || (!d_params)) + return NULL; + + decoder = zalloc(sizeof(*decoder)); + + if (!decoder) + return NULL; + + decoder->data = d_params->data; + decoder->prev_return = OCSD_RESP_CONT; + cs_etm_decoder__clear_buffer(decoder); + format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED : + OCSD_TRC_SRC_SINGLE); + flags = 0; + flags |= (d_params->fsyncs ? OCSD_DFRMTR_HAS_FSYNCS : 0); + flags |= (d_params->hsyncs ? OCSD_DFRMTR_HAS_HSYNCS : 0); + flags |= (d_params->frame_aligned ? OCSD_DFRMTR_FRAME_MEM_ALIGN : 0); + + /* + * Drivers may add barrier frames when used with perf, set up to + * handle this. Barriers const of FSYNC packet repeated 4 times. + */ + flags |= OCSD_DFRMTR_RESET_ON_4X_FSYNC; + + /* Create decode tree for the data source */ + decoder->dcd_tree = ocsd_create_dcd_tree(format, flags); + + if (decoder->dcd_tree == 0) + goto err_free_decoder; + + /* init library print logging support */ + ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder); + if (ret != 0) + goto err_free_decoder_tree; + + /* init raw frame logging if required */ + cs_etm_decoder__init_raw_frame_logging(d_params, decoder); + + for (i = 0; i < num_cpu; i++) { + ret = cs_etm_decoder__create_etm_decoder(d_params, + &t_params[i], + decoder); + if (ret != 0) + goto err_free_decoder_tree; + } + + return decoder; + +err_free_decoder_tree: + ocsd_destroy_dcd_tree(decoder->dcd_tree); +err_free_decoder: + free(decoder); + return NULL; +} + +int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, + u64 indx, const u8 *buf, + size_t len, size_t *consumed) +{ + int ret = 0; + ocsd_datapath_resp_t cur = OCSD_RESP_CONT; + ocsd_datapath_resp_t prev_return = decoder->prev_return; + size_t processed = 0; + u32 count; + + while (processed < len) { + if (OCSD_DATA_RESP_IS_WAIT(prev_return)) { + cur = ocsd_dt_process_data(decoder->dcd_tree, + OCSD_OP_FLUSH, + 0, + 0, + NULL, + NULL); + } else if (OCSD_DATA_RESP_IS_CONT(prev_return)) { + cur = ocsd_dt_process_data(decoder->dcd_tree, + OCSD_OP_DATA, + indx + processed, + len - processed, + &buf[processed], + &count); + processed += count; + } else { + ret = -EINVAL; + break; + } + + /* + * Return to the input code if the packet buffer is full. + * Flushing will get done once the packet buffer has been + * processed. + */ + if (OCSD_DATA_RESP_IS_WAIT(cur)) + break; + + prev_return = cur; + } + + decoder->prev_return = cur; + *consumed = processed; + + return ret; +} + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder) +{ + if (!decoder) + return; + + ocsd_destroy_dcd_tree(decoder->dcd_tree); + decoder->dcd_tree = NULL; + free(decoder); +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h new file mode 100644 index 000000000000..3d2e6205d186 --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen <tor@ti.com> + * Author: Mathieu Poirier <mathieu.poirier@linaro.org> + */ + +#ifndef INCLUDE__CS_ETM_DECODER_H__ +#define INCLUDE__CS_ETM_DECODER_H__ + +#include <linux/types.h> +#include <stdio.h> + +struct cs_etm_decoder; + +struct cs_etm_buffer { + const unsigned char *buf; + size_t len; + u64 offset; + u64 ref_timestamp; +}; + +enum cs_etm_sample_type { + CS_ETM_RANGE = 1 << 0, +}; + +struct cs_etm_packet { + enum cs_etm_sample_type sample_type; + u64 start_addr; + u64 end_addr; + u8 exc; + u8 exc_ret; + int cpu; +}; + +struct cs_etm_queue; + +typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64, + size_t, u8 *); + +struct cs_etmv4_trace_params { + u32 reg_idr0; + u32 reg_idr1; + u32 reg_idr2; + u32 reg_idr8; + u32 reg_configr; + u32 reg_traceidr; +}; + +struct cs_etm_trace_params { + int protocol; + union { + struct cs_etmv4_trace_params etmv4; + }; +}; + +struct cs_etm_decoder_params { + int operation; + void (*packet_printer)(const char *msg); + cs_etm_mem_cb_type mem_acc_cb; + u8 formatted; + u8 fsyncs; + u8 hsyncs; + u8 frame_aligned; + void *data; +}; + +/* + * The following enums are indexed starting with 1 to align with the + * open source coresight trace decoder library. + */ +enum { + CS_ETM_PROTO_ETMV3 = 1, + CS_ETM_PROTO_ETMV4i, + CS_ETM_PROTO_ETMV4d, +}; + +enum { + CS_ETM_OPERATION_PRINT = 1, + CS_ETM_OPERATION_DECODE, +}; + +int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, + u64 indx, const u8 *buf, + size_t len, size_t *consumed); + +struct cs_etm_decoder * +cs_etm_decoder__new(int num_cpu, + struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params t_params[]); + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder); + +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, + u64 start, u64 end, + cs_etm_mem_cb_type cb_func); + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, + struct cs_etm_packet *packet); + +int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); + +#endif /* INCLUDE__CS_ETM_DECODER_H__ */ diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c new file mode 100644 index 000000000000..b9f0a53dfa65 --- /dev/null +++ b/tools/perf/util/cs-etm.c @@ -0,0 +1,1023 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen <tor@ti.com> + * Author: Mathieu Poirier <mathieu.poirier@linaro.org> + */ + +#include <linux/bitops.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/types.h> + +#include <stdlib.h> + +#include "auxtrace.h" +#include "color.h" +#include "cs-etm.h" +#include "cs-etm-decoder/cs-etm-decoder.h" +#include "debug.h" +#include "evlist.h" +#include "intlist.h" +#include "machine.h" +#include "map.h" +#include "perf.h" +#include "thread.h" +#include "thread_map.h" +#include "thread-stack.h" +#include "util.h" + +#define MAX_TIMESTAMP (~0ULL) + +struct cs_etm_auxtrace { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + struct itrace_synth_opts synth_opts; + struct perf_session *session; + struct machine *machine; + struct thread *unknown_thread; + + u8 timeless_decoding; + u8 snapshot_mode; + u8 data_queued; + u8 sample_branches; + + int num_cpu; + u32 auxtrace_type; + u64 branches_sample_type; + u64 branches_id; + u64 **metadata; + u64 kernel_start; + unsigned int pmu_type; +}; + +struct cs_etm_queue { + struct cs_etm_auxtrace *etm; + struct thread *thread; + struct cs_etm_decoder *decoder; + struct auxtrace_buffer *buffer; + const struct cs_etm_state *state; + union perf_event *event_buf; + unsigned int queue_nr; + pid_t pid, tid; + int cpu; + u64 time; + u64 timestamp; + u64 offset; +}; + +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, + pid_t tid, u64 time_); + +static void cs_etm__packet_dump(const char *pkt_string) +{ + const char *color = PERF_COLOR_BLUE; + int len = strlen(pkt_string); + + if (len && (pkt_string[len-1] == '\n')) + color_fprintf(stdout, color, " %s", pkt_string); + else + color_fprintf(stdout, color, " %s\n", pkt_string); + + fflush(stdout); +} + +static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, + struct auxtrace_buffer *buffer) +{ + int i, ret; + const char *color = PERF_COLOR_BLUE; + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + struct cs_etm_decoder *decoder; + size_t buffer_used = 0; + + fprintf(stdout, "\n"); + color_fprintf(stdout, color, + ". ... CoreSight ETM Trace data: size %zu bytes\n", + buffer->size); + + /* Use metadata to fill in trace parameters for trace decoder */ + t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + for (i = 0; i < etm->num_cpu; i++) { + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].etmv4.reg_configr = + etm->metadata[i][CS_ETMV4_TRCCONFIGR]; + t_params[i].etmv4.reg_traceidr = + etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + } + + /* Set decoder parameters to simply print the trace packets */ + d_params.packet_printer = cs_etm__packet_dump; + d_params.operation = CS_ETM_OPERATION_PRINT; + d_params.formatted = true; + d_params.fsyncs = false; + d_params.hsyncs = false; + d_params.frame_aligned = true; + + decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + + zfree(&t_params); + + if (!decoder) + return; + do { + size_t consumed; + + ret = cs_etm_decoder__process_data_block( + decoder, buffer->offset, + &((u8 *)buffer->data)[buffer_used], + buffer->size - buffer_used, &consumed); + if (ret) + break; + + buffer_used += consumed; + } while (buffer_used < buffer->size); + + cs_etm_decoder__free(decoder); +} + +static int cs_etm__flush_events(struct perf_session *session, + struct perf_tool *tool) +{ + int ret; + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + if (dump_trace) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + if (!etm->timeless_decoding) + return -EINVAL; + + ret = cs_etm__update_queues(etm); + + if (ret < 0) + return ret; + + return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); +} + +static void cs_etm__free_queue(void *priv) +{ + struct cs_etm_queue *etmq = priv; + + free(etmq); +} + +static void cs_etm__free_events(struct perf_session *session) +{ + unsigned int i; + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + struct auxtrace_queues *queues = &aux->queues; + + for (i = 0; i < queues->nr_queues; i++) { + cs_etm__free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + + auxtrace_queues__free(queues); +} + +static void cs_etm__free(struct perf_session *session) +{ + int i; + struct int_node *inode, *tmp; + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + cs_etm__free_events(session); + session->auxtrace = NULL; + + /* First remove all traceID/CPU# nodes for the RB tree */ + intlist__for_each_entry_safe(inode, tmp, traceid_list) + intlist__remove(traceid_list, inode); + /* Then the RB tree itself */ + intlist__delete(traceid_list); + + for (i = 0; i < aux->num_cpu; i++) + zfree(&aux->metadata[i]); + + zfree(&aux->metadata); + zfree(&aux); +} + +static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, + size_t size, u8 *buffer) +{ + u8 cpumode; + u64 offset; + int len; + struct thread *thread; + struct machine *machine; + struct addr_location al; + + if (!etmq) + return -1; + + machine = etmq->etm->machine; + if (address >= etmq->etm->kernel_start) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = etmq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) + return -EINVAL; + thread = etmq->etm->unknown_thread; + } + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al); + + if (!al.map || !al.map->dso) + return 0; + + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) + return 0; + + offset = al.map->map_ip(al.map, address); + + map__load(al.map); + + len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); + + if (len <= 0) + return 0; + + return len; +} + +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, + unsigned int queue_nr) +{ + int i; + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + struct cs_etm_queue *etmq; + + etmq = zalloc(sizeof(*etmq)); + if (!etmq) + return NULL; + + etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!etmq->event_buf) + goto out_free; + + etmq->etm = etm; + etmq->queue_nr = queue_nr; + etmq->pid = -1; + etmq->tid = -1; + etmq->cpu = -1; + + /* Use metadata to fill in trace parameters for trace decoder */ + t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + + if (!t_params) + goto out_free; + + for (i = 0; i < etm->num_cpu; i++) { + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].etmv4.reg_configr = + etm->metadata[i][CS_ETMV4_TRCCONFIGR]; + t_params[i].etmv4.reg_traceidr = + etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + } + + /* Set decoder parameters to simply print the trace packets */ + d_params.packet_printer = cs_etm__packet_dump; + d_params.operation = CS_ETM_OPERATION_DECODE; + d_params.formatted = true; + d_params.fsyncs = false; + d_params.hsyncs = false; + d_params.frame_aligned = true; + d_params.data = etmq; + + etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + + zfree(&t_params); + + if (!etmq->decoder) + goto out_free; + + /* + * Register a function to handle all memory accesses required by + * the trace decoder library. + */ + if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, + 0x0L, ((u64) -1L), + cs_etm__mem_access)) + goto out_free_decoder; + + etmq->offset = 0; + + return etmq; + +out_free_decoder: + cs_etm_decoder__free(etmq->decoder); +out_free: + zfree(&etmq->event_buf); + free(etmq); + + return NULL; +} + +static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct cs_etm_queue *etmq = queue->priv; + + if (list_empty(&queue->head) || etmq) + return 0; + + etmq = cs_etm__alloc_queue(etm, queue_nr); + + if (!etmq) + return -ENOMEM; + + queue->priv = etmq; + + if (queue->cpu != -1) + etmq->cpu = queue->cpu; + + etmq->tid = queue->tid; + + return 0; +} + +static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) +{ + unsigned int i; + int ret; + + for (i = 0; i < etm->queues.nr_queues; i++) { + ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); + if (ret) + return ret; + } + + return 0; +} + +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) +{ + if (etm->queues.new_data) { + etm->queues.new_data = false; + return cs_etm__setup_queues(etm); + } + + return 0; +} + +static int +cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) +{ + struct auxtrace_buffer *aux_buffer = etmq->buffer; + struct auxtrace_buffer *old_buffer = aux_buffer; + struct auxtrace_queue *queue; + + queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; + + aux_buffer = auxtrace_buffer__next(queue, aux_buffer); + + /* If no more data, drop the previous auxtrace_buffer and return */ + if (!aux_buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + buff->len = 0; + return 0; + } + + etmq->buffer = aux_buffer; + + /* If the aux_buffer doesn't have data associated, try to load it */ + if (!aux_buffer->data) { + /* get the file desc associated with the perf data file */ + int fd = perf_data__fd(etmq->etm->session->data); + + aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); + if (!aux_buffer->data) + return -ENOMEM; + } + + /* If valid, drop the previous buffer */ + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + + buff->offset = aux_buffer->offset; + buff->len = aux_buffer->size; + buff->buf = aux_buffer->data; + + buff->ref_timestamp = aux_buffer->reference; + + return buff->len; +} + +static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue) +{ + struct cs_etm_queue *etmq = queue->priv; + + /* CPU-wide tracing isn't supported yet */ + if (queue->tid == -1) + return; + + if ((!etmq->thread) && (etmq->tid != -1)) + etmq->thread = machine__find_thread(etm->machine, -1, + etmq->tid); + + if (etmq->thread) { + etmq->pid = etmq->thread->pid_; + if (queue->cpu == -1) + etmq->cpu = etmq->thread->cpu; + } +} + +/* + * The cs etm packet encodes an instruction range between a branch target + * and the next taken branch. Generate sample accordingly. + */ +static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, + struct cs_etm_packet *packet) +{ + int ret = 0; + struct cs_etm_auxtrace *etm = etmq->etm; + struct perf_sample sample = {.ip = 0,}; + union perf_event *event = etmq->event_buf; + u64 start_addr = packet->start_addr; + u64 end_addr = packet->end_addr; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + sample.ip = start_addr; + sample.pid = etmq->pid; + sample.tid = etmq->tid; + sample.addr = end_addr; + sample.id = etmq->etm->branches_id; + sample.stream_id = etmq->etm->branches_id; + sample.period = 1; + sample.cpu = packet->cpu; + sample.flags = 0; + sample.cpumode = PERF_RECORD_MISC_USER; + + ret = perf_session__deliver_synth_event(etm->session, event, &sample); + + if (ret) + pr_err( + "CS ETM Trace: failed to deliver instruction event, error %d\n", + ret); + + return ret; +} + +struct cs_etm_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int cs_etm__event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct cs_etm_synth *cs_etm_synth = + container_of(tool, struct cs_etm_synth, dummy_tool); + + return perf_session__deliver_synth_event(cs_etm_synth->session, + event, NULL); +} + +static int cs_etm__synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct cs_etm_synth cs_etm_synth; + + memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); + cs_etm_synth.session = session; + + return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, + &id, cs_etm__event_synth); +} + +static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->attr.type == etm->pmu_type) { + found = true; + break; + } + } + + if (!found) { + pr_debug("No selected events with CoreSight Trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + if (etm->timeless_decoding) + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + else + attr.sample_type |= PERF_SAMPLE_TIME; + + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + /* create new id val to be a fixed offset from evsel id */ + id = evsel->id[0] + 1000000000; + + if (!id) + id = 1; + + if (etm->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + err = cs_etm__synth_event(session, &attr, id); + if (err) + return err; + etm->sample_branches = true; + etm->branches_sample_type = attr.sample_type; + etm->branches_id = id; + } + + return 0; +} + +static int cs_etm__sample(struct cs_etm_queue *etmq) +{ + int ret; + struct cs_etm_packet packet; + + while (1) { + ret = cs_etm_decoder__get_packet(etmq->decoder, &packet); + if (ret <= 0) + return ret; + + /* + * If the packet contains an instruction range, generate an + * instruction sequence event. + */ + if (packet.sample_type & CS_ETM_RANGE) + cs_etm__synth_branch_sample(etmq, &packet); + } + + return 0; +} + +static int cs_etm__run_decoder(struct cs_etm_queue *etmq) +{ + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_buffer buffer; + size_t buffer_used, processed; + int err = 0; + + if (!etm->kernel_start) + etm->kernel_start = machine__kernel_start(etm->machine); + + /* Go through each buffer in the queue and decode them one by one */ +more: + buffer_used = 0; + memset(&buffer, 0, sizeof(buffer)); + err = cs_etm__get_trace(&buffer, etmq); + if (err <= 0) + return err; + /* + * We cannot assume consecutive blocks in the data file are contiguous, + * reset the decoder to force re-sync. + */ + err = cs_etm_decoder__reset(etmq->decoder); + if (err != 0) + return err; + + /* Run trace decoder until buffer consumed or end of trace */ + do { + processed = 0; + + err = cs_etm_decoder__process_data_block( + etmq->decoder, + etmq->offset, + &buffer.buf[buffer_used], + buffer.len - buffer_used, + &processed); + + if (err) + return err; + + etmq->offset += processed; + buffer_used += processed; + + /* + * Nothing to do with an error condition, let's hope the next + * chunk will be better. + */ + err = cs_etm__sample(etmq); + } while (buffer.len > buffer_used); + +goto more; + + return err; +} + +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, + pid_t tid, u64 time_) +{ + unsigned int i; + struct auxtrace_queues *queues = &etm->queues; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &etm->queues.queue_array[i]; + struct cs_etm_queue *etmq = queue->priv; + + if (etmq && ((tid == -1) || (etmq->tid == tid))) { + etmq->time = time_; + cs_etm__set_pid_tid_cpu(etm, queue); + cs_etm__run_decoder(etmq); + } + } + + return 0; +} + +static int cs_etm__process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + int err = 0; + u64 timestamp; + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("CoreSight ETM Trace requires ordered events\n"); + return -EINVAL; + } + + if (!etm->timeless_decoding) + return -EINVAL; + + if (sample->time && (sample->time != (u64) -1)) + timestamp = sample->time; + else + timestamp = 0; + + if (timestamp || etm->timeless_decoding) { + err = cs_etm__update_queues(etm); + if (err) + return err; + } + + if (event->header.type == PERF_RECORD_EXIT) + return cs_etm__process_timeless_queues(etm, + event->fork.tid, + sample->time); + + return 0; +} + +static int cs_etm__process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + if (!etm->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data__fd(session->data); + bool is_pipe = perf_data__is_pipe(session->data); + int err; + + if (is_pipe) + data_offset = 0; + else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&etm->queues, session, + event, data_offset, &buffer); + if (err) + return err; + + if (dump_trace) + if (auxtrace_buffer__get_data(buffer, fd)) { + cs_etm__dump_event(etm, buffer); + auxtrace_buffer__put_data(buffer); + } + } + + return 0; +} + +static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) +{ + struct perf_evsel *evsel; + struct perf_evlist *evlist = etm->session->evlist; + bool timeless_decoding = true; + + /* + * Circle through the list of event and complain if we find one + * with the time bit set. + */ + evlist__for_each_entry(evlist, evsel) { + if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) + timeless_decoding = false; + } + + return timeless_decoding; +} + +static const char * const cs_etm_global_header_fmts[] = { + [CS_HEADER_VERSION_0] = " Header version %llx\n", + [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", + [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", +}; + +static const char * const cs_etm_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_ETMCR] = " ETMCR %llx\n", + [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", + [CS_ETM_ETMCCER] = " ETMCCER %llx\n", + [CS_ETM_ETMIDR] = " ETMIDR %llx\n", +}; + +static const char * const cs_etmv4_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", + [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", + [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", + [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", + [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", + [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", + [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", +}; + +static void cs_etm__print_auxtrace_info(u64 *val, int num) +{ + int i, j, cpu = 0; + + for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { + if (val[i] == __perf_cs_etmv3_magic) + for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + else if (val[i] == __perf_cs_etmv4_magic) + for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + else + /* failure.. return */ + return; + } +} + +int cs_etm__process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + struct cs_etm_auxtrace *etm = NULL; + struct int_node *inode; + unsigned int pmu_type; + int event_header_size = sizeof(struct perf_event_header); + int info_header_size; + int total_size = auxtrace_info->header.size; + int priv_size = 0; + int num_cpu; + int err = 0, idx = -1; + int i, j, k; + u64 *ptr, *hdr = NULL; + u64 **metadata = NULL; + + /* + * sizeof(auxtrace_info_event::type) + + * sizeof(auxtrace_info_event::reserved) == 8 + */ + info_header_size = 8; + + if (total_size < (event_header_size + info_header_size)) + return -EINVAL; + + priv_size = total_size - event_header_size - info_header_size; + + /* First the global part */ + ptr = (u64 *) auxtrace_info->priv; + + /* Look for version '0' of the header */ + if (ptr[0] != 0) + return -EINVAL; + + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); + if (!hdr) + return -ENOMEM; + + /* Extract header information - see cs-etm.h for format */ + for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + hdr[i] = ptr[i]; + num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; + pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & + 0xffffffff); + + /* + * Create an RB tree for traceID-CPU# tuple. Since the conversion has + * to be made for each packet that gets decoded, optimizing access in + * anything other than a sequential array is worth doing. + */ + traceid_list = intlist__new(NULL); + if (!traceid_list) { + err = -ENOMEM; + goto err_free_hdr; + } + + metadata = zalloc(sizeof(*metadata) * num_cpu); + if (!metadata) { + err = -ENOMEM; + goto err_free_traceid_list; + } + + /* + * The metadata is stored in the auxtrace_info section and encodes + * the configuration of the ARM embedded trace macrocell which is + * required by the trace decoder to properly decode the trace due + * to its highly compressed nature. + */ + for (j = 0; j < num_cpu; j++) { + if (ptr[i] == __perf_cs_etmv3_magic) { + metadata[j] = zalloc(sizeof(*metadata[j]) * + CS_ETM_PRIV_MAX); + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; + } + for (k = 0; k < CS_ETM_PRIV_MAX; k++) + metadata[j][k] = ptr[i + k]; + + /* The traceID is our handle */ + idx = metadata[j][CS_ETM_ETMTRACEIDR]; + i += CS_ETM_PRIV_MAX; + } else if (ptr[i] == __perf_cs_etmv4_magic) { + metadata[j] = zalloc(sizeof(*metadata[j]) * + CS_ETMV4_PRIV_MAX); + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; + } + for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) + metadata[j][k] = ptr[i + k]; + + /* The traceID is our handle */ + idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; + i += CS_ETMV4_PRIV_MAX; + } + + /* Get an RB node for this CPU */ + inode = intlist__findnew(traceid_list, idx); + + /* Something went wrong, no need to continue */ + if (!inode) { + err = PTR_ERR(inode); + goto err_free_metadata; + } + + /* + * The node for that CPU should not be taken. + * Back out if that's the case. + */ + if (inode->priv) { + err = -EINVAL; + goto err_free_metadata; + } + /* All good, associate the traceID with the CPU# */ + inode->priv = &metadata[j][CS_ETM_CPU]; + } + + /* + * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and + * CS_ETMV4_PRIV_MAX mark how many double words are in the + * global metadata, and each cpu's metadata respectively. + * The following tests if the correct number of double words was + * present in the auxtrace info section. + */ + if (i * 8 != priv_size) { + err = -EINVAL; + goto err_free_metadata; + } + + etm = zalloc(sizeof(*etm)); + + if (!etm) { + err = -ENOMEM; + goto err_free_metadata; + } + + err = auxtrace_queues__init(&etm->queues); + if (err) + goto err_free_etm; + + etm->session = session; + etm->machine = &session->machines.host; + + etm->num_cpu = num_cpu; + etm->pmu_type = pmu_type; + etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); + etm->metadata = metadata; + etm->auxtrace_type = auxtrace_info->type; + etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); + + etm->auxtrace.process_event = cs_etm__process_event; + etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; + etm->auxtrace.flush_events = cs_etm__flush_events; + etm->auxtrace.free_events = cs_etm__free_events; + etm->auxtrace.free = cs_etm__free; + session->auxtrace = &etm->auxtrace; + + if (dump_trace) { + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); + return 0; + } + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + etm->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&etm->synth_opts); + etm->synth_opts.callchain = false; + } + + err = cs_etm__synth_events(etm, session); + if (err) + goto err_free_queues; + + err = auxtrace_queues__process_index(&etm->queues, session); + if (err) + goto err_free_queues; + + etm->data_queued = etm->queues.populated; + + return 0; + +err_free_queues: + auxtrace_queues__free(&etm->queues); + session->auxtrace = NULL; +err_free_etm: + zfree(&etm); +err_free_metadata: + /* No need to check @metadata[j], free(NULL) is supported */ + for (j = 0; j < num_cpu; j++) + free(metadata[j]); + zfree(&metadata); +err_free_traceid_list: + intlist__delete(traceid_list); +err_free_hdr: + zfree(&hdr); + + return -EINVAL; +} diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 3cc6bc3263fe..5864d5dca616 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -18,6 +18,9 @@ #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__ #define INCLUDE__UTIL_PERF_CS_ETM_H__ +#include "util/event.h" +#include "util/session.h" + /* Versionning header in case things need tro change in the future. That way * decoding of old snapshot is still possible. */ @@ -61,6 +64,9 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* RB tree for quick conversion between traceID and CPUs */ +struct intlist *traceid_list; + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -71,4 +77,16 @@ static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) +#ifdef HAVE_CSTRACE_SUPPORT +int cs_etm__process_auxtrace_info(union perf_event *event, + struct perf_session *session); +#else +static inline int +cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + return -1; +} +#endif + #endif diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index 4b261c2ec0f1..ee4c1e8ed54b 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Sane locale-independent, ASCII ctype. * diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 2346cecb8ea2..5744c12641a5 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1577,10 +1577,10 @@ int bt_convert__perf2ctf(const char *input, const char *path, struct perf_data_convert_opts *opts) { struct perf_session *session; - struct perf_data_file file = { - .path = input, - .mode = PERF_DATA_MODE_READ, - .force = opts->force, + struct perf_data data = { + .file.path = input, + .mode = PERF_DATA_MODE_READ, + .force = opts->force, }; struct convert c = { .tool = { @@ -1619,7 +1619,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, err = -1; /* perf.data session */ - session = perf_session__new(&file, 0, &c.tool); + session = perf_session__new(&data, 0, &c.tool); if (!session) goto free_writer; @@ -1650,7 +1650,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, fprintf(stderr, "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", - file.path, path); + data.file.path, path); fprintf(stderr, "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h index 9a3b587f76c1..821674d63c4e 100644 --- a/tools/perf/util/data-convert-bt.h +++ b/tools/perf/util/data-convert-bt.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __DATA_CONVERT_BT_H #define __DATA_CONVERT_BT_H #include "data-convert.h" diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h index 5314962fe95b..af90b6076c06 100644 --- a/tools/perf/util/data-convert.h +++ b/tools/perf/util/data-convert.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __DATA_CONVERT_H #define __DATA_CONVERT_H diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 263f5a906ba5..d8cfc19ddb10 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -1,8 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/kernel.h> #include <sys/types.h> #include <sys/stat.h> #include <errno.h> +#include <fcntl.h> #include <unistd.h> #include <string.h> @@ -10,66 +12,56 @@ #include "util.h" #include "debug.h" -#ifndef O_CLOEXEC -#ifdef __sparc__ -#define O_CLOEXEC 0x400000 -#elif defined(__alpha__) || defined(__hppa__) -#define O_CLOEXEC 010000000 -#else -#define O_CLOEXEC 02000000 -#endif -#endif - -static bool check_pipe(struct perf_data_file *file) +static bool check_pipe(struct perf_data *data) { struct stat st; bool is_pipe = false; - int fd = perf_data_file__is_read(file) ? + int fd = perf_data__is_read(data) ? STDIN_FILENO : STDOUT_FILENO; - if (!file->path) { + if (!data->file.path) { if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) is_pipe = true; } else { - if (!strcmp(file->path, "-")) + if (!strcmp(data->file.path, "-")) is_pipe = true; } if (is_pipe) - file->fd = fd; + data->file.fd = fd; - return file->is_pipe = is_pipe; + return data->is_pipe = is_pipe; } -static int check_backup(struct perf_data_file *file) +static int check_backup(struct perf_data *data) { struct stat st; - if (!stat(file->path, &st) && st.st_size) { + if (!stat(data->file.path, &st) && st.st_size) { /* TODO check errors properly */ char oldname[PATH_MAX]; snprintf(oldname, sizeof(oldname), "%s.old", - file->path); + data->file.path); unlink(oldname); - rename(file->path, oldname); + rename(data->file.path, oldname); } return 0; } -static int open_file_read(struct perf_data_file *file) +static int open_file_read(struct perf_data *data) { struct stat st; int fd; char sbuf[STRERR_BUFSIZE]; - fd = open(file->path, O_RDONLY); + fd = open(data->file.path, O_RDONLY); if (fd < 0) { int err = errno; - pr_err("failed to open %s: %s", file->path, + pr_err("failed to open %s: %s", data->file.path, str_error_r(err, sbuf, sizeof(sbuf))); - if (err == ENOENT && !strcmp(file->path, "perf.data")) + if (err == ENOENT && !strcmp(data->file.path, "perf.data")) pr_err(" (try 'perf record' first)"); pr_err("\n"); return -err; @@ -78,19 +70,19 @@ static int open_file_read(struct perf_data_file *file) if (fstat(fd, &st) < 0) goto out_close; - if (!file->force && st.st_uid && (st.st_uid != geteuid())) { + if (!data->force && st.st_uid && (st.st_uid != geteuid())) { pr_err("File %s not owned by current user or root (use -f to override)\n", - file->path); + data->file.path); goto out_close; } if (!st.st_size) { - pr_info("zero-sized file (%s), nothing to do!\n", - file->path); + pr_info("zero-sized data (%s), nothing to do!\n", + data->file.path); goto out_close; } - file->size = st.st_size; + data->size = st.st_size; return fd; out_close: @@ -98,49 +90,49 @@ static int open_file_read(struct perf_data_file *file) return -1; } -static int open_file_write(struct perf_data_file *file) +static int open_file_write(struct perf_data *data) { int fd; char sbuf[STRERR_BUFSIZE]; - if (check_backup(file)) + if (check_backup(data)) return -1; - fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, + fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, S_IRUSR|S_IWUSR); if (fd < 0) - pr_err("failed to open %s : %s\n", file->path, + pr_err("failed to open %s : %s\n", data->file.path, str_error_r(errno, sbuf, sizeof(sbuf))); return fd; } -static int open_file(struct perf_data_file *file) +static int open_file(struct perf_data *data) { int fd; - fd = perf_data_file__is_read(file) ? - open_file_read(file) : open_file_write(file); + fd = perf_data__is_read(data) ? + open_file_read(data) : open_file_write(data); - file->fd = fd; + data->file.fd = fd; return fd < 0 ? -1 : 0; } -int perf_data_file__open(struct perf_data_file *file) +int perf_data__open(struct perf_data *data) { - if (check_pipe(file)) + if (check_pipe(data)) return 0; - if (!file->path) - file->path = "perf.data"; + if (!data->file.path) + data->file.path = "perf.data"; - return open_file(file); + return open_file(data); } -void perf_data_file__close(struct perf_data_file *file) +void perf_data__close(struct perf_data *data) { - close(file->fd); + close(data->file.fd); } ssize_t perf_data_file__write(struct perf_data_file *file, @@ -149,42 +141,48 @@ ssize_t perf_data_file__write(struct perf_data_file *file, return writen(file->fd, buf, size); } -int perf_data_file__switch(struct perf_data_file *file, +ssize_t perf_data__write(struct perf_data *data, + void *buf, size_t size) +{ + return perf_data_file__write(&data->file, buf, size); +} + +int perf_data__switch(struct perf_data *data, const char *postfix, size_t pos, bool at_exit) { char *new_filepath; int ret; - if (check_pipe(file)) + if (check_pipe(data)) return -EINVAL; - if (perf_data_file__is_read(file)) + if (perf_data__is_read(data)) return -EINVAL; - if (asprintf(&new_filepath, "%s.%s", file->path, postfix) < 0) + if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0) return -ENOMEM; /* * Only fire a warning, don't return error, continue fill * original file. */ - if (rename(file->path, new_filepath)) - pr_warning("Failed to rename %s to %s\n", file->path, new_filepath); + if (rename(data->file.path, new_filepath)) + pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath); if (!at_exit) { - close(file->fd); - ret = perf_data_file__open(file); + close(data->file.fd); + ret = perf_data__open(data); if (ret < 0) goto out; - if (lseek(file->fd, pos, SEEK_SET) == (off_t)-1) { + if (lseek(data->file.fd, pos, SEEK_SET) == (off_t)-1) { ret = -errno; pr_debug("Failed to lseek to %zu: %s", pos, strerror(errno)); goto out; } } - ret = file->fd; + ret = data->file.fd; out: free(new_filepath); return ret; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index ae510ce16cb1..4828f7feea89 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_DATA_H #define __PERF_DATA_H @@ -9,51 +10,57 @@ enum perf_data_mode { }; struct perf_data_file { - const char *path; - int fd; + const char *path; + int fd; +}; + +struct perf_data { + struct perf_data_file file; bool is_pipe; bool force; unsigned long size; enum perf_data_mode mode; }; -static inline bool perf_data_file__is_read(struct perf_data_file *file) +static inline bool perf_data__is_read(struct perf_data *data) { - return file->mode == PERF_DATA_MODE_READ; + return data->mode == PERF_DATA_MODE_READ; } -static inline bool perf_data_file__is_write(struct perf_data_file *file) +static inline bool perf_data__is_write(struct perf_data *data) { - return file->mode == PERF_DATA_MODE_WRITE; + return data->mode == PERF_DATA_MODE_WRITE; } -static inline int perf_data_file__is_pipe(struct perf_data_file *file) +static inline int perf_data__is_pipe(struct perf_data *data) { - return file->is_pipe; + return data->is_pipe; } -static inline int perf_data_file__fd(struct perf_data_file *file) +static inline int perf_data__fd(struct perf_data *data) { - return file->fd; + return data->file.fd; } -static inline unsigned long perf_data_file__size(struct perf_data_file *file) +static inline unsigned long perf_data__size(struct perf_data *data) { - return file->size; + return data->size; } -int perf_data_file__open(struct perf_data_file *file); -void perf_data_file__close(struct perf_data_file *file); +int perf_data__open(struct perf_data *data); +void perf_data__close(struct perf_data *data); +ssize_t perf_data__write(struct perf_data *data, + void *buf, size_t size); ssize_t perf_data_file__write(struct perf_data_file *file, void *buf, size_t size); /* * If at_exit is set, only rename current perf.data to - * perf.data.<postfix>, continue write on original file. + * perf.data.<postfix>, continue write on original data. * Set at_exit when flushing the last output. * * Return value is fd of new output. */ -int perf_data_file__switch(struct perf_data_file *file, +int perf_data__switch(struct perf_data *data, const char *postfix, size_t pos, bool at_exit); #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index a5b3777ffee6..f3a71db83947 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* For general debugging purposes */ #include "../perf.h" @@ -111,50 +112,53 @@ int dump_printf(const char *fmt, ...) return ret; } -static void trace_event_printer(enum binary_printer_ops op, - unsigned int val, void *extra) +static int trace_event_printer(enum binary_printer_ops op, + unsigned int val, void *extra, FILE *fp) { const char *color = PERF_COLOR_BLUE; union perf_event *event = (union perf_event *)extra; unsigned char ch = (unsigned char)val; + int printed = 0; switch (op) { case BINARY_PRINT_DATA_BEGIN: - printf("."); - color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", - event->header.size); + printed += fprintf(fp, "."); + printed += color_fprintf(fp, color, "\n. ... raw event: size %d bytes\n", + event->header.size); break; case BINARY_PRINT_LINE_BEGIN: - printf("."); + printed += fprintf(fp, "."); break; case BINARY_PRINT_ADDR: - color_fprintf(stdout, color, " %04x: ", val); + printed += color_fprintf(fp, color, " %04x: ", val); break; case BINARY_PRINT_NUM_DATA: - color_fprintf(stdout, color, " %02x", val); + printed += color_fprintf(fp, color, " %02x", val); break; case BINARY_PRINT_NUM_PAD: - color_fprintf(stdout, color, " "); + printed += color_fprintf(fp, color, " "); break; case BINARY_PRINT_SEP: - color_fprintf(stdout, color, " "); + printed += color_fprintf(fp, color, " "); break; case BINARY_PRINT_CHAR_DATA: - color_fprintf(stdout, color, "%c", + printed += color_fprintf(fp, color, "%c", isprint(ch) ? ch : '.'); break; case BINARY_PRINT_CHAR_PAD: - color_fprintf(stdout, color, " "); + printed += color_fprintf(fp, color, " "); break; case BINARY_PRINT_LINE_END: - color_fprintf(stdout, color, "\n"); + printed += color_fprintf(fp, color, "\n"); break; case BINARY_PRINT_DATA_END: - printf("\n"); + printed += fprintf(fp, "\n"); break; default: break; } + + return printed; } void trace_event(union perf_event *event) diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index c818bdb1c1ab..77445dfc5c7d 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* For debugging general purposes */ #ifndef __PERF_DEBUG_H #define __PERF_DEBUG_H diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index cb66d334f532..e4c486756053 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <sys/types.h> #include <stdio.h> #include <string.h> diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h index a981c1f968fe..f936c8eabe5d 100644 --- a/tools/perf/util/demangle-java.h +++ b/tools/perf/util/demangle-java.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_DEMANGLE_JAVA #define __PERF_DEMANGLE_JAVA 1 /* diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c index f9dafa888c06..423afbbd386b 100644 --- a/tools/perf/util/demangle-rust.c +++ b/tools/perf/util/demangle-rust.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <string.h> #include "util.h" #include "debug.h" diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h index 7b41ead7e0dd..2fca618b1aa5 100644 --- a/tools/perf/util/demangle-rust.h +++ b/tools/perf/util/demangle-rust.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_DEMANGLE_RUST #define __PERF_DEMANGLE_RUST 1 diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index b9e087fb8247..36ef45b2e89d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <asm/bug.h> #include <linux/kernel.h> #include <sys/time.h> @@ -6,9 +7,11 @@ #include <sys/stat.h> #include <unistd.h> #include <errno.h> +#include <fcntl.h> #include "compress.h" #include "path.h" #include "symbol.h" +#include "srcline.h" #include "dso.h" #include "machine.h" #include "auxtrace.h" @@ -443,7 +446,7 @@ static int do_open(char *name) char sbuf[STRERR_BUFSIZE]; do { - fd = open(name, O_RDONLY); + fd = open(name, O_RDONLY|O_CLOEXEC); if (fd >= 0) return fd; @@ -1200,6 +1203,8 @@ struct dso *dso__new(const char *name) for (i = 0; i < MAP__NR_TYPES; ++i) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; dso->data.cache = RB_ROOT; + dso->inlined_nodes = RB_ROOT; + dso->srclines = RB_ROOT; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; @@ -1231,6 +1236,10 @@ void dso__delete(struct dso *dso) if (!RB_EMPTY_NODE(&dso->rb_node)) pr_err("DSO %s is still in rbtree when being deleted!\n", dso->long_name); + + /* free inlines first, as they reference symbols */ + inlines__tree_delete(&dso->inlined_nodes); + srcline__tree_delete(&dso->srclines); for (i = 0; i < MAP__NR_TYPES; ++i) symbols__delete(&dso->symbols[i]); @@ -1365,9 +1374,9 @@ void __dsos__add(struct dsos *dsos, struct dso *dso) void dsos__add(struct dsos *dsos, struct dso *dso) { - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); __dsos__add(dsos, dso); - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); } struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) @@ -1386,9 +1395,9 @@ struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) { struct dso *dso; - pthread_rwlock_rdlock(&dsos->lock); + down_read(&dsos->lock); dso = __dsos__find(dsos, name, cmp_short); - pthread_rwlock_unlock(&dsos->lock); + up_read(&dsos->lock); return dso; } @@ -1415,9 +1424,9 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name) struct dso *dsos__findnew(struct dsos *dsos, const char *name) { struct dso *dso; - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); dso = dso__get(__dsos__findnew(dsos, name)); - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); return dso; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index f886141678eb..c229dbe0277a 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_DSO #define __PERF_DSO @@ -6,7 +7,7 @@ #include <linux/rbtree.h> #include <sys/types.h> #include <stdbool.h> -#include <pthread.h> +#include "rwsem.h" #include <linux/types.h> #include <linux/bitops.h> #include "map.h" @@ -129,7 +130,7 @@ struct dso_cache { struct dsos { struct list_head head; struct rb_root root; /* rbtree root sorted by long name */ - pthread_rwlock_t lock; + struct rw_semaphore lock; }; struct auxtrace_cache; @@ -141,6 +142,8 @@ struct dso { struct rb_root *root; /* root of rbtree that rb_node is in */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; + struct rb_root inlined_nodes; + struct rb_root srclines; struct { u64 addr; struct symbol *symbol; diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c index ffbdb19f05d0..10988d3de7ce 100644 --- a/tools/perf/util/dump-insn.c +++ b/tools/perf/util/dump-insn.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include "dump-insn.h" diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h index 90fb115981cf..0e06280a8860 100644 --- a/tools/perf/util/dump-insn.h +++ b/tools/perf/util/dump-insn.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_DUMP_INSN_H #define __PERF_DUMP_INSN_H 1 diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index c708395b3cb6..db55eddce8cd 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. * diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 9e21538c42ae..6d311868d850 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,7 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 #include "cpumap.h" #include "env.h" +#include "sane_ctype.h" #include "util.h" #include <errno.h> +#include <sys/utsname.h> struct perf_env perf_env; @@ -92,3 +95,48 @@ void cpu_cache_level__free(struct cpu_cache_level *cache) free(cache->map); free(cache->size); } + +/* + * Return architecture name in a normalized form. + * The conversion logic comes from the Makefile. + */ +static const char *normalize_arch(char *arch) +{ + if (!strcmp(arch, "x86_64")) + return "x86"; + if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') + return "x86"; + if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) + return "sparc"; + if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) + return "arm64"; + if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) + return "arm"; + if (!strncmp(arch, "s390", 4)) + return "s390"; + if (!strncmp(arch, "parisc", 6)) + return "parisc"; + if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) + return "powerpc"; + if (!strncmp(arch, "mips", 4)) + return "mips"; + if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) + return "sh"; + + return arch; +} + +const char *perf_env__arch(struct perf_env *env) +{ + struct utsname uts; + char *arch_name; + + if (!env) { /* Assume local operation */ + if (uname(&uts) < 0) + return NULL; + arch_name = uts.machine; + } else + arch_name = env->arch; + + return normalize_arch(arch_name); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index b164dfd2dcbf..bf970f57dce0 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_ENV_H #define __PERF_ENV_H @@ -64,4 +65,6 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__arch(struct perf_env *env); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1c905ba3641b..44e603c27944 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,5 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 #include <dirent.h> #include <errno.h> +#include <fcntl.h> #include <inttypes.h> #include <linux/kernel.h> #include <linux/types.h> @@ -677,21 +679,21 @@ out: return err; } -int perf_event__synthesize_threads(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data, - unsigned int proc_map_timeout) +static int __perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data, + unsigned int proc_map_timeout, + struct dirent **dirent, + int start, + int num) { - DIR *proc; - char proc_path[PATH_MAX]; - struct dirent *dirent; union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *namespaces_event; int err = -1; - - if (machine__is_default_guest(machine)) - return 0; + char *end; + pid_t pid; + int i; comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); if (comm_event == NULL) @@ -711,31 +713,25 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (namespaces_event == NULL) goto out_free_fork; - snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); - proc = opendir(proc_path); - - if (proc == NULL) - goto out_free_namespaces; - - while ((dirent = readdir(proc)) != NULL) { - char *end; - pid_t pid = strtol(dirent->d_name, &end, 10); + for (i = start; i < start + num; i++) { + if (!isdigit(dirent[i]->d_name[0])) + continue; - if (*end) /* only interested in proper numerical dirents */ + pid = (pid_t)strtol(dirent[i]->d_name, &end, 10); + /* only interested in proper numerical dirents */ + if (*end) continue; /* - * We may race with exiting thread, so don't stop just because - * one thread couldn't be synthesized. - */ + * We may race with exiting thread, so don't stop just because + * one thread couldn't be synthesized. + */ __event__synthesize_thread(comm_event, mmap_event, fork_event, namespaces_event, pid, 1, process, tool, machine, mmap_data, proc_map_timeout); } - err = 0; - closedir(proc); -out_free_namespaces: + free(namespaces_event); out_free_fork: free(fork_event); @@ -747,6 +743,118 @@ out: return err; } +struct synthesize_threads_arg { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + bool mmap_data; + unsigned int proc_map_timeout; + struct dirent **dirent; + int num; + int start; +}; + +static void *synthesize_threads_worker(void *arg) +{ + struct synthesize_threads_arg *args = arg; + + __perf_event__synthesize_threads(args->tool, args->process, + args->machine, args->mmap_data, + args->proc_map_timeout, args->dirent, + args->start, args->num); + return NULL; +} + +int perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data, + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize) +{ + struct synthesize_threads_arg *args = NULL; + pthread_t *synthesize_threads = NULL; + char proc_path[PATH_MAX]; + struct dirent **dirent; + int num_per_thread; + int m, n, i, j; + int thread_nr; + int base = 0; + int err = -1; + + + if (machine__is_default_guest(machine)) + return 0; + + snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); + n = scandir(proc_path, &dirent, 0, alphasort); + if (n < 0) + return err; + + if (nr_threads_synthesize == UINT_MAX) + thread_nr = sysconf(_SC_NPROCESSORS_ONLN); + else + thread_nr = nr_threads_synthesize; + + if (thread_nr <= 1) { + err = __perf_event__synthesize_threads(tool, process, + machine, mmap_data, + proc_map_timeout, + dirent, base, n); + goto free_dirent; + } + if (thread_nr > n) + thread_nr = n; + + synthesize_threads = calloc(sizeof(pthread_t), thread_nr); + if (synthesize_threads == NULL) + goto free_dirent; + + args = calloc(sizeof(*args), thread_nr); + if (args == NULL) + goto free_threads; + + num_per_thread = n / thread_nr; + m = n % thread_nr; + for (i = 0; i < thread_nr; i++) { + args[i].tool = tool; + args[i].process = process; + args[i].machine = machine; + args[i].mmap_data = mmap_data; + args[i].proc_map_timeout = proc_map_timeout; + args[i].dirent = dirent; + } + for (i = 0; i < m; i++) { + args[i].num = num_per_thread + 1; + args[i].start = i * args[i].num; + } + if (i != 0) + base = args[i-1].start + args[i-1].num; + for (j = i; j < thread_nr; j++) { + args[j].num = num_per_thread; + args[j].start = base + (j - i) * args[i].num; + } + + for (i = 0; i < thread_nr; i++) { + if (pthread_create(&synthesize_threads[i], NULL, + synthesize_threads_worker, &args[i])) + goto out_join; + } + err = 0; +out_join: + for (i = 0; i < thread_nr; i++) + pthread_join(synthesize_threads[i], NULL); + free(args); +free_threads: + free(synthesize_threads); +free_dirent: + for (i = 0; i < n; i++) + free(dirent[i]); + free(dirent); + + return err; +} + struct process_symbol_args { const char *name; u64 start; @@ -1327,6 +1435,11 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) event->context_switch.next_prev_tid); } +static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -1359,6 +1472,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_SWITCH_CPU_WIDE: ret += perf_event__fprintf_switch(event, fp); break; + case PERF_RECORD_LOST: + ret += perf_event__fprintf_lost(event, fp); + break; default: ret += fprintf(fp, "\n"); } @@ -1497,6 +1613,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al, al->sym = NULL; al->cpu = sample->cpu; al->socket = -1; + al->srcline = NULL; if (al->cpu >= 0) { struct perf_env *env = machine->env; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index ee7bcc898d35..0f794744919c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_RECORD_H #define __PERF_RECORD_H @@ -204,6 +205,7 @@ struct perf_sample { u32 flags; u16 insn_len; u8 cpumode; + u16 misc; char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; @@ -680,7 +682,8 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool, int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, - unsigned int proc_map_timeout); + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize); int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); @@ -772,8 +775,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format); int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, - const struct perf_sample *sample, - bool swapped); + const struct perf_sample *sample); pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6a0d7ffbeba0..ac35cd214feb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -25,6 +25,7 @@ #include "parse-events.h" #include <subcmd/parse-options.h> +#include <fcntl.h> #include <sys/ioctl.h> #include <sys/mman.h> @@ -33,9 +34,6 @@ #include <linux/log2.h> #include <linux/err.h> -static void perf_mmap__munmap(struct perf_mmap *map); -static void perf_mmap__put(struct perf_mmap *map); - #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) @@ -128,7 +126,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); fdarray__exit(&evlist->pollfd); } @@ -260,7 +258,7 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist) .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct perf_evsel *evsel = perf_evsel__new(&attr); + struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries); if (evsel == NULL) return -ENOMEM; @@ -678,11 +676,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) { int i; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return 0; for (i = 0; i < evlist->nr_mmaps; i++) { - int fd = evlist->backward_mmap[i].fd; + int fd = evlist->overwrite_mmap[i].fd; int err; if (fd < 0) @@ -704,129 +702,6 @@ static int perf_evlist__resume(struct perf_evlist *evlist) return perf_evlist__set_paused(evlist, false); } -/* When check_messup is true, 'end' must points to a good entry */ -static union perf_event * -perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, - u64 end, u64 *prev) -{ - unsigned char *data = md->base + page_size; - union perf_event *event = NULL; - int diff = end - start; - - if (check_messup) { - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the 'end', we got messed up. - * - * In either case, truncate and restart at 'end'. - */ - if (diff > md->mask / 2 || diff < 0) { - fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); - - /* - * 'end' points to a known good entry, start there. - */ - start = end; - diff = 0; - } - } - - if (diff >= (int)sizeof(event->header)) { - size_t size; - - event = (union perf_event *)&data[start & md->mask]; - size = event->header.size; - - if (size < sizeof(event->header) || diff < (int)size) { - event = NULL; - goto broken_event; - } - - /* - * Event straddles the mmap boundary -- header should always - * be inside due to u64 alignment of output. - */ - if ((start & md->mask) + size != ((start + size) & md->mask)) { - unsigned int offset = start; - unsigned int len = min(sizeof(*event), size), cpy; - void *dst = md->event_copy; - - do { - cpy = min(md->mask + 1 - (offset & md->mask), len); - memcpy(dst, &data[offset & md->mask], cpy); - offset += cpy; - dst += cpy; - len -= cpy; - } while (len); - - event = (union perf_event *) md->event_copy; - } - - start += size; - } - -broken_event: - if (prev) - *prev = start; - - return event; -} - -union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup) -{ - u64 head; - u64 old = md->prev; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&md->refcnt)) - return NULL; - - head = perf_mmap__read_head(md); - - return perf_mmap__read(md, check_messup, old, head, &md->prev); -} - -union perf_event * -perf_mmap__read_backward(struct perf_mmap *md) -{ - u64 head, end; - u64 start = md->prev; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&md->refcnt)) - return NULL; - - head = perf_mmap__read_head(md); - if (!head) - return NULL; - - /* - * 'head' pointer starts from 0. Kernel minus sizeof(record) form - * it each time when kernel writes to it, so in fact 'head' is - * negative. 'end' pointer is made manually by adding the size of - * the ring buffer to 'head' pointer, means the validate data can - * read is the whole ring buffer. If 'end' is positive, the ring - * buffer has not fully filled, so we must adjust 'end' to 0. - * - * However, since both 'head' and 'end' is unsigned, we can't - * simply compare 'end' against 0. Here we compare '-head' and - * the size of the ring buffer, where -head is the number of bytes - * kernel write to the ring buffer. - */ - if (-head < (u64)(md->mask + 1)) - end = 0; - else - end = head + md->mask + 1; - - return perf_mmap__read(md, false, start, end, &md->prev); -} - union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; @@ -837,7 +712,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int * No need for read-write ring buffer: kernel stop outputting when * it hit md->prev (perf_mmap__consume()). */ - return perf_mmap__read_forward(md, evlist->overwrite); + return perf_mmap__read_forward(md); } union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) @@ -857,94 +732,14 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) return perf_evlist__mmap_read_forward(evlist, idx); } -void perf_mmap__read_catchup(struct perf_mmap *md) -{ - u64 head; - - if (!refcount_read(&md->refcnt)) - return; - - head = perf_mmap__read_head(md); - md->prev = head; -} - void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) { perf_mmap__read_catchup(&evlist->mmap[idx]); } -static bool perf_mmap__empty(struct perf_mmap *md) -{ - return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; -} - -static void perf_mmap__get(struct perf_mmap *map) -{ - refcount_inc(&map->refcnt); -} - -static void perf_mmap__put(struct perf_mmap *md) -{ - BUG_ON(md->base && refcount_read(&md->refcnt) == 0); - - if (refcount_dec_and_test(&md->refcnt)) - perf_mmap__munmap(md); -} - -void perf_mmap__consume(struct perf_mmap *md, bool overwrite) -{ - if (!overwrite) { - u64 old = md->prev; - - perf_mmap__write_tail(md, old); - } - - if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md)) - perf_mmap__put(md); -} - void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) { - perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); -} - -int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, - struct auxtrace_mmap_params *mp __maybe_unused, - void *userpg __maybe_unused, - int fd __maybe_unused) -{ - return 0; -} - -void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) -{ -} - -void __weak auxtrace_mmap_params__init( - struct auxtrace_mmap_params *mp __maybe_unused, - off_t auxtrace_offset __maybe_unused, - unsigned int auxtrace_pages __maybe_unused, - bool auxtrace_overwrite __maybe_unused) -{ -} - -void __weak auxtrace_mmap_params__set_idx( - struct auxtrace_mmap_params *mp __maybe_unused, - struct perf_evlist *evlist __maybe_unused, - int idx __maybe_unused, - bool per_cpu __maybe_unused) -{ -} - -static void perf_mmap__munmap(struct perf_mmap *map) -{ - if (map->base != NULL) { - munmap(map->base, perf_mmap__mmap_len(map)); - map->base = NULL; - map->fd = -1; - refcount_set(&map->refcnt, 0); - } - auxtrace_mmap__munmap(&map->auxtrace_mmap); + perf_mmap__consume(&evlist->mmap[idx], false); } static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) @@ -955,16 +750,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) for (i = 0; i < evlist->nr_mmaps; i++) perf_mmap__munmap(&evlist->mmap[i]); - if (evlist->backward_mmap) + if (evlist->overwrite_mmap) for (i = 0; i < evlist->nr_mmaps; i++) - perf_mmap__munmap(&evlist->backward_mmap[i]); + perf_mmap__munmap(&evlist->overwrite_mmap[i]); } void perf_evlist__munmap(struct perf_evlist *evlist) { perf_evlist__munmap_nofree(evlist); zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); } static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) @@ -995,48 +790,6 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) return map; } -struct mmap_params { - int prot; - int mask; - struct auxtrace_mmap_params auxtrace_mp; -}; - -static int perf_mmap__mmap(struct perf_mmap *map, - struct mmap_params *mp, int fd) -{ - /* - * The last one will be done at perf_evlist__mmap_consume(), so that we - * make sure we don't prevent tools from consuming every last event in - * the ring buffer. - * - * I.e. we can get the POLLHUP meaning that the fd doesn't exist - * anymore, but the last events for it are still in the ring buffer, - * waiting to be consumed. - * - * Tools can chose to ignore this at their own discretion, but the - * evlist layer can't just drop it when filtering events in - * perf_evlist__filter_pollfd(). - */ - refcount_set(&map->refcnt, 2); - map->prev = 0; - map->mask = mp->mask; - map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, - MAP_SHARED, fd, 0); - if (map->base == MAP_FAILED) { - pr_debug2("failed to mmap perf event ring buffer, error %d\n", - errno); - map->base = NULL; - return -1; - } - map->fd = fd; - - if (auxtrace_mmap__mmap(&map->auxtrace_mmap, - &mp->auxtrace_mp, map->base, fd)) - return -1; - - return 0; -} - static bool perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, struct perf_evsel *evsel) @@ -1048,7 +801,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int cpu_idx, - int thread, int *_output, int *_output_backward) + int thread, int *_output, int *_output_overwrite) { struct perf_evsel *evsel; int revent; @@ -1060,18 +813,20 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, int fd; int cpu; + mp->prot = PROT_READ | PROT_WRITE; if (evsel->attr.write_backward) { - output = _output_backward; - maps = evlist->backward_mmap; + output = _output_overwrite; + maps = evlist->overwrite_mmap; if (!maps) { maps = perf_evlist__alloc_mmap(evlist); if (!maps) return -1; - evlist->backward_mmap = maps; + evlist->overwrite_mmap = maps; if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } + mp->prot &= ~PROT_WRITE; } if (evsel->system_wide && thread) @@ -1132,14 +887,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per cpu\n"); for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, true); for (thread = 0; thread < nr_threads; thread++) { if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, - thread, &output, &output_backward)) + thread, &output, &output_overwrite)) goto out_unmap; } } @@ -1160,13 +915,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per thread\n"); for (thread = 0; thread < nr_threads; thread++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, false); if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, - &output, &output_backward)) + &output, &output_overwrite)) goto out_unmap; } @@ -1300,15 +1055,18 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, * Return: %0 on success, negative error code otherwise. */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; const struct thread_map *threads = evlist->threads; - struct mmap_params mp = { - .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), - }; + /* + * Delay setting mp.prot: set it before calling perf_mmap__mmap. + * Its value is decided by evsel's write_backward. + * So &mp should not be passed through const pointer. + */ + struct mmap_params mp; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist); @@ -1318,7 +1076,6 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - evlist->overwrite = overwrite; evlist->mmap_len = perf_evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->mmap_len); mp.mask = evlist->mmap_len - page_size - 1; @@ -1339,10 +1096,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_per_cpu(evlist, &mp); } -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite) +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); + return perf_evlist__mmap_ex(evlist, pages, 0, false); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) @@ -1350,7 +1106,8 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) struct cpu_map *cpus; struct thread_map *threads; - threads = thread_map__new_str(target->pid, target->tid, target->uid); + threads = thread_map__new_str(target->pid, target->tid, target->uid, + target->per_thread); if (!threads) return -1; @@ -1830,6 +1587,17 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even return perf_evsel__parse_sample(evsel, event, sample); } +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp) +{ + struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (!evsel) + return -EFAULT; + return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); +} + size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) { struct perf_evsel *evsel; @@ -1987,13 +1755,13 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, RESUME, } action = NONE; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return; switch (old_state) { case BKW_MMAP_NOTREADY: { if (state != BKW_MMAP_RUNNING) - goto state_err;; + goto state_err; break; } case BKW_MMAP_RUNNING: { @@ -2034,3 +1802,15 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, state_err: return; } + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (!evsel->attr.exclude_kernel) + return false; + } + + return true; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bf2c4936e35f..75f8e0ad5d76 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_EVLIST_H #define __PERF_EVLIST_H 1 @@ -10,8 +11,8 @@ #include "../perf.h" #include "event.h" #include "evsel.h" +#include "mmap.h" #include "util.h" -#include "auxtrace.h" #include <signal.h> #include <unistd.h> @@ -23,62 +24,12 @@ struct record_opts; #define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) -/** - * struct perf_mmap - perf's ring buffer mmap details - * - * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this - */ -struct perf_mmap { - void *base; - int mask; - int fd; - refcount_t refcnt; - u64 prev; - struct auxtrace_mmap auxtrace_mmap; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); -}; - -static inline size_t -perf_mmap__mmap_len(struct perf_mmap *map) -{ - return map->mask + 1 + page_size; -} - -/* - * State machine of bkw_mmap_state: - * - * .________________(forbid)_____________. - * | V - * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY - * ^ ^ | ^ | - * | |__(forbid)____/ |___(forbid)___/| - * | | - * \_________________(3)_______________/ - * - * NOTREADY : Backward ring buffers are not ready - * RUNNING : Backward ring buffers are recording - * DATA_PENDING : We are required to collect data from backward ring buffers - * EMPTY : We have collected data from backward ring buffers. - * - * (0): Setup backward ring buffer - * (1): Pause ring buffers for reading - * (2): Read from ring buffers - * (3): Resume ring buffers for recording - */ -enum bkw_mmap_state { - BKW_MMAP_NOTREADY, - BKW_MMAP_RUNNING, - BKW_MMAP_DATA_PENDING, - BKW_MMAP_EMPTY, -}; - struct perf_evlist { struct list_head entries; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; int nr_entries; int nr_groups; int nr_mmaps; - bool overwrite; bool enabled; bool has_user_cpus; size_t mmap_len; @@ -92,12 +43,14 @@ struct perf_evlist { } workload; struct fdarray pollfd; struct perf_mmap *mmap; - struct perf_mmap *backward_mmap; + struct perf_mmap *overwrite_mmap; struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; struct events_stats stats; struct perf_env *env; + u64 first_sample_time; + u64 last_sample_time; }; struct perf_evsel_str_handler { @@ -176,12 +129,6 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); -union perf_event *perf_mmap__read_backward(struct perf_mmap *map); - -void perf_mmap__read_catchup(struct perf_mmap *md); -void perf_mmap__consume(struct perf_mmap *md, bool overwrite); - union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, @@ -222,10 +169,9 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite); -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite); +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); size_t perf_evlist__mmap_size(unsigned long pages); @@ -258,6 +204,10 @@ u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample); +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp); + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); @@ -285,25 +235,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); -static inline u64 perf_mmap__read_head(struct perf_mmap *mm) -{ - struct perf_event_mmap_page *pc = mm->base; - u64 head = ACCESS_ONCE(pc->data_head); - rmb(); - return head; -} - -static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) -{ - struct perf_event_mmap_page *pc = md->base; - - /* - * ensure all reads are done before we write the tail out. - */ - mb(); - pc->data_tail = tail; -} - bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str); void perf_evlist__to_front(struct perf_evlist *evlist, struct perf_evsel *move_evsel); @@ -384,4 +315,6 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *event); + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4bb89373eb52..66fa45198a11 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -36,6 +36,7 @@ #include "debug.h" #include "trace-event.h" #include "stat.h" +#include "memswap.h" #include "util/parse-branch-options.h" #include "sane_ctype.h" @@ -271,12 +272,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) return evsel; } +static bool perf_event_can_profile_kernel(void) +{ + return geteuid() == 0 || perf_event_paranoid() == -1; +} + struct perf_evsel *perf_evsel__new_cycles(bool precise) { struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, - .exclude_kernel = geteuid() != 0, + .exclude_kernel = !perf_event_can_profile_kernel(), }; struct perf_evsel *evsel; @@ -645,9 +651,9 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) return ret; } -void perf_evsel__config_callchain(struct perf_evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +static void __perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *param) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; @@ -678,7 +684,7 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel, if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); perf_evsel__set_sample_bit(evsel, STACK_USER); - attr->sample_regs_user = PERF_REGS_MASK; + attr->sample_regs_user |= PERF_REGS_MASK; attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; } else { @@ -693,6 +699,14 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel, } } +void perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *param) +{ + if (param->enabled) + return __perf_evsel__config_callchain(evsel, opts, param); +} + static void perf_evsel__reset_callgraph(struct perf_evsel *evsel, struct callchain_param *param) @@ -712,28 +726,32 @@ perf_evsel__reset_callgraph(struct perf_evsel *evsel, } static void apply_config_terms(struct perf_evsel *evsel, - struct record_opts *opts) + struct record_opts *opts, bool track) { struct perf_evsel_config_term *term; struct list_head *config_terms = &evsel->config_terms; struct perf_event_attr *attr = &evsel->attr; - struct callchain_param param; + /* callgraph default */ + struct callchain_param param = { + .record_mode = callchain_param.record_mode, + }; u32 dump_size = 0; int max_stack = 0; const char *callgraph_buf = NULL; - /* callgraph default */ - param.record_mode = callchain_param.record_mode; - list_for_each_entry(term, config_terms, list) { switch (term->type) { case PERF_EVSEL__CONFIG_TERM_PERIOD: - attr->sample_period = term->val.period; - attr->freq = 0; + if (!(term->weak && opts->user_interval != ULLONG_MAX)) { + attr->sample_period = term->val.period; + attr->freq = 0; + } break; case PERF_EVSEL__CONFIG_TERM_FREQ: - attr->sample_freq = term->val.freq; - attr->freq = 1; + if (!(term->weak && opts->user_freq != UINT_MAX)) { + attr->sample_freq = term->val.freq; + attr->freq = 1; + } break; case PERF_EVSEL__CONFIG_TERM_TIME: if (term->val.time) @@ -770,6 +788,8 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_OVERWRITE: attr->write_backward = term->val.overwrite ? 1 : 0; break; + case PERF_EVSEL__CONFIG_TERM_DRV_CFG: + break; default: break; } @@ -777,6 +797,8 @@ static void apply_config_terms(struct perf_evsel *evsel, /* User explicitly set per-event callgraph, clear the old setting and reset. */ if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { + bool sample_address = false; + if (max_stack) { param.max_stack = max_stack; if (callgraph_buf == NULL) @@ -796,6 +818,8 @@ static void apply_config_terms(struct perf_evsel *evsel, evsel->name); return; } + if (param.record_mode == CALLCHAIN_DWARF) + sample_address = true; } } if (dump_size > 0) { @@ -808,8 +832,14 @@ static void apply_config_terms(struct perf_evsel *evsel, perf_evsel__reset_callgraph(evsel, &callchain_param); /* set perf-event callgraph */ - if (param.enabled) + if (param.enabled) { + if (sample_address) { + perf_evsel__set_sample_bit(evsel, ADDR); + perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel->attr.mmap_data = track; + } perf_evsel__config_callchain(evsel, opts, ¶m); + } } } @@ -931,6 +961,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, perf_evsel__set_sample_bit(evsel, REGS_INTR); } + if (opts->sample_user_regs) { + attr->sample_regs_user |= opts->sample_user_regs; + perf_evsel__set_sample_bit(evsel, REGS_USER); + } + if (target__has_cpu(&opts->target) || opts->sample_cpu) perf_evsel__set_sample_bit(evsel, CPU); @@ -1035,7 +1070,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, * Apply event specific term settings, * it overloads any global configuration. */ - apply_config_terms(evsel, opts); + apply_config_terms(evsel, opts, track); evsel->ignore_missing_thread = opts->ignore_missing_thread; } @@ -1361,7 +1396,7 @@ perf_evsel__process_group_data(struct perf_evsel *leader, static int perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread) { - struct perf_stat_evsel *ps = leader->priv; + struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->attr.read_format; int size = perf_evsel__read_size(leader); u64 *data = ps->group_data; @@ -1560,6 +1595,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(use_clockid, p_unsigned); PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRf(write_backward, p_unsigned); + PRINT_ATTRf(namespaces, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); @@ -1582,10 +1618,46 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static void perf_evsel__remove_fd(struct perf_evsel *pos, + int nr_cpus, int nr_threads, + int thread_idx) +{ + for (int cpu = 0; cpu < nr_cpus; cpu++) + for (int thread = thread_idx; thread < nr_threads - 1; thread++) + FD(pos, cpu, thread) = FD(pos, cpu, thread + 1); +} + +static int update_fds(struct perf_evsel *evsel, + int nr_cpus, int cpu_idx, + int nr_threads, int thread_idx) +{ + struct perf_evsel *pos; + + if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) + return -EINVAL; + + evlist__for_each_entry(evsel->evlist, pos) { + nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + + perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); + + /* + * Since fds for next evsel has not been created, + * there is no need to iterate whole event list. + */ + if (pos == evsel) + break; + } + return 0; +} + static bool ignore_missing_thread(struct perf_evsel *evsel, + int nr_cpus, int cpu, struct thread_map *threads, int thread, int err) { + pid_t ignore_pid = thread_map__pid(threads, thread); + if (!evsel->ignore_missing_thread) return false; @@ -1601,11 +1673,18 @@ static bool ignore_missing_thread(struct perf_evsel *evsel, if (threads->nr == 1) return false; + /* + * We should remove fd for missing_thread first + * because thread_map__remove() will decrease threads->nr. + */ + if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) + return false; + if (thread_map__remove(threads, thread)) return false; pr_warning("WARNING: Ignored open failure for pid %d\n", - thread_map__pid(threads, thread)); + ignore_pid); return true; } @@ -1710,7 +1789,7 @@ retry_open: if (fd < 0) { err = -errno; - if (ignore_missing_thread(evsel, threads, thread, err)) { + if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { /* * We just removed 1 thread, so take a step * back on thread index and lower the upper @@ -1946,6 +2025,20 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset, #define OVERFLOW_CHECK_u64(offset) \ OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64)) +static int +perf_event__check_size(union perf_event *event, unsigned int sample_size) +{ + /* + * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes + * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to + * check the format does not go past the end of the event. + */ + if (sample_size + sizeof(event->header) > event->header.size) + return -EFAULT; + + return 0; +} + int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -1967,6 +2060,9 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + data->misc = event->header.misc; + data->id = -1ULL; + data->data_src = PERF_MEM_DATA_SRC_NONE; if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) @@ -1976,15 +2072,9 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array = event->sample.array; - /* - * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes - * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to - * check the format does not go past the end of the event. - */ - if (evsel->sample_size + sizeof(event->header) > event->header.size) + if (perf_event__check_size(event, evsel->sample_size)) return -EFAULT; - data->id = -1ULL; if (type & PERF_SAMPLE_IDENTIFIER) { data->id = *array; array++; @@ -2014,7 +2104,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->addr = 0; if (type & PERF_SAMPLE_ADDR) { data->addr = *array; array++; @@ -2106,14 +2195,27 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_RAW) { OVERFLOW_CHECK_u64(array); u.val64 = *array; - if (WARN_ONCE(swapped, - "Endianness of raw data not corrected!\n")) { - /* undo swap of u64, then swap on individual u32s */ + + /* + * Undo swap of u64, then swap on individual u32s, + * get the size of the raw area and undo all of the + * swap. The pevent interface handles endianity by + * itself. + */ + if (swapped) { u.val64 = bswap_64(u.val64); u.val32[0] = bswap_32(u.val32[0]); u.val32[1] = bswap_32(u.val32[1]); } data->raw_size = u.val32[0]; + + /* + * The raw data is aligned on 64bits including the + * u32 size, so it's safe to use mem_bswap_64. + */ + if (swapped) + mem_bswap_64((void *) array, data->raw_size); + array = (void *)array + sizeof(u32); OVERFLOW_CHECK(array, data->raw_size, max_size); @@ -2178,14 +2280,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->data_src = PERF_MEM_DATA_SRC_NONE; if (type & PERF_SAMPLE_DATA_SRC) { OVERFLOW_CHECK_u64(array); data->data_src = *array; array++; } - data->transaction = 0; if (type & PERF_SAMPLE_TRANSACTION) { OVERFLOW_CHECK_u64(array); data->transaction = *array; @@ -2218,6 +2318,50 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, return 0; } +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp) +{ + u64 type = evsel->attr.sample_type; + const u64 *array; + + if (!(type & PERF_SAMPLE_TIME)) + return -1; + + if (event->header.type != PERF_RECORD_SAMPLE) { + struct perf_sample data = { + .time = -1ULL, + }; + + if (!evsel->attr.sample_id_all) + return -1; + if (perf_evsel__parse_id_sample(evsel, event, &data)) + return -1; + + *timestamp = data.time; + return 0; + } + + array = event->sample.array; + + if (perf_event__check_size(event, evsel->sample_size)) + return -EFAULT; + + if (type & PERF_SAMPLE_IDENTIFIER) + array++; + + if (type & PERF_SAMPLE_IP) + array++; + + if (type & PERF_SAMPLE_TID) + array++; + + if (type & PERF_SAMPLE_TIME) + *timestamp = *array; + + return 0; +} + size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format) { @@ -2328,8 +2472,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, - const struct perf_sample *sample, - bool swapped) + const struct perf_sample *sample) { u64 *array; size_t sz; @@ -2354,15 +2497,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_TID) { u.val32[0] = sample->pid; u.val32[1] = sample->tid; - if (swapped) { - /* - * Inverse of what is done in perf_evsel__parse_sample - */ - u.val32[0] = bswap_32(u.val32[0]); - u.val32[1] = bswap_32(u.val32[1]); - u.val64 = bswap_64(u.val64); - } - *array = u.val64; array++; } @@ -2389,13 +2523,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_CPU) { u.val32[0] = sample->cpu; - if (swapped) { - /* - * Inverse of what is done in perf_evsel__parse_sample - */ - u.val32[0] = bswap_32(u.val32[0]); - u.val64 = bswap_64(u.val64); - } + u.val32[1] = 0; *array = u.val64; array++; } @@ -2442,15 +2570,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_RAW) { u.val32[0] = sample->raw_size; - if (WARN_ONCE(swapped, - "Endianness of raw data not corrected!\n")) { - /* - * Inverse of what is done in perf_evsel__parse_sample - */ - u.val32[0] = bswap_32(u.val32[0]); - u.val32[1] = bswap_32(u.val32[1]); - u.val64 = bswap_64(u.val64); - } *array = u.val64; array = (void *)array + sizeof(u32); @@ -2729,8 +2848,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, break; case EOPNOTSUPP: if (evsel->attr.sample_period != 0) - return scnprintf(msg, size, "%s", - "PMU Hardware doesn't support sampling/overflow-interrupts."); + return scnprintf(msg, size, + "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", + perf_evsel__name(evsel)); if (evsel->attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); @@ -2767,16 +2887,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, perf_evsel__name(evsel)); } -char *perf_evsel__env_arch(struct perf_evsel *evsel) -{ - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->arch; - return NULL; -} - -char *perf_evsel__env_cpuid(struct perf_evsel *evsel) +struct perf_env *perf_evsel__env(struct perf_evsel *evsel) { - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->cpuid; + if (evsel && evsel->evlist) + return evsel->evlist->env; return NULL; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index dd2c4b5112a5..846e41644525 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_EVSEL_H #define __PERF_EVSEL_H 1 @@ -37,7 +38,7 @@ struct cgroup_sel; * It is allocated within event parsing and attached to * perf_evsel::config_terms list head. */ -enum { +enum term_type { PERF_EVSEL__CONFIG_TERM_PERIOD, PERF_EVSEL__CONFIG_TERM_FREQ, PERF_EVSEL__CONFIG_TERM_TIME, @@ -48,12 +49,11 @@ enum { PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, - PERF_EVSEL__CONFIG_TERM_MAX, }; struct perf_evsel_config_term { struct list_head list; - int type; + enum term_type type; union { u64 period; u64 freq; @@ -66,8 +66,11 @@ struct perf_evsel_config_term { bool overwrite; char *branch; } val; + bool weak; }; +struct perf_stat_evsel; + /** struct perf_evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -101,6 +104,7 @@ struct perf_evsel { const char *unit; struct event_format *tp_format; off_t id_offset; + struct perf_stat_evsel *stats; void *priv; u64 db_id; struct cgroup_sel *cgrp; @@ -137,6 +141,7 @@ struct perf_evsel { const char * metric_name; struct perf_evsel **metric_events; bool collect_stat; + bool weak_group; }; union u64_swap { @@ -333,6 +338,10 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample); +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp); + static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) { return list_entry(evsel->node.next, struct perf_evsel, node); @@ -437,7 +446,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); -char *perf_evsel__env_arch(struct perf_evsel *evsel); -char *perf_evsel__env_cpuid(struct perf_evsel *evsel); +struct perf_env *perf_evsel__env(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 583f3a602506..06dfb027879d 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <inttypes.h> #include <stdio.h> #include <stdbool.h> @@ -157,7 +158,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, } } - if (print_dso) { + if (print_dso && (!node->sym || !node->sym->inlined)) { printed += fprintf(fp, " ("); printed += map__fprintf_dsoname(node->map, fp); printed += fprintf(fp, ")"); @@ -166,41 +167,12 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (print_srcline) printed += map__fprintf_srcline(node->map, addr, "\n ", fp); + if (node->sym && node->sym->inlined) + printed += fprintf(fp, " (inlined)"); + if (!print_oneline) printed += fprintf(fp, "\n"); - if (symbol_conf.inline_name && node->map) { - struct inline_node *inode; - - addr = map__rip_2objdump(node->map, node->ip), - inode = dso__parse_addr_inlines(node->map->dso, addr); - - if (inode) { - struct inline_list *ilist; - - list_for_each_entry(ilist, &inode->val, list) { - if (print_arrow) - printed += fprintf(fp, " <-"); - - /* IP is same, just skip it */ - if (print_ip) - printed += fprintf(fp, "%c%16s", - s, ""); - if (print_sym) - printed += fprintf(fp, " %s", - ilist->funcname); - if (print_srcline) - printed += fprintf(fp, "\n %s:%d", - ilist->filename, - ilist->line_nr); - if (!print_oneline) - printed += fprintf(fp, "\n"); - } - - inline_node__delete(inode); - } - } - if (symbol_conf.bt_stop_list && node->sym && strlist__has_entry(symbol_conf.bt_stop_list, diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 400ef9eab00a..046160831f90 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 diff --git a/tools/perf/util/find-vdso-map.c b/tools/perf/util/find-vdso-map.c index 95ef1cffc056..d7823e3508fc 100644 --- a/tools/perf/util/find-vdso-map.c +++ b/tools/perf/util/find-vdso-map.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 static int find_vdso_map(void **start, void **end) { FILE *maps; diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 2424bd9862a3..de322d51c7fe 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __GENELF_H__ #define __GENELF_H__ diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index 0ac2037c970c..ff17920a5ebc 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-2.0 echo "/* Automatically generated by $0 */ struct cmdname_help @@ -37,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#ifdef HAVE_LIBAUDIT_SUPPORT" +echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h index 116debe7a995..f36c7e31780a 100644 --- a/tools/perf/util/group.h +++ b/tools/perf/util/group.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef GROUP_H #define GROUP_H 1 diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 605bbd5404fb..a326e0d8b5b6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <inttypes.h> #include "util.h" @@ -14,9 +15,8 @@ #include <linux/bitops.h> #include <linux/stringify.h> #include <sys/stat.h> -#include <sys/types.h> #include <sys/utsname.h> -#include <unistd.h> +#include <linux/time64.h> #include "evlist.h" #include "evsel.h" @@ -36,6 +36,7 @@ #include <api/fs/fs.h> #include "asm/bug.h" #include "tool.h" +#include "time-utils.h" #include "sane_ctype.h" @@ -1181,6 +1182,20 @@ static int write_stat(struct feat_fd *ff __maybe_unused, return 0; } +static int write_sample_time(struct feat_fd *ff, + struct perf_evlist *evlist) +{ + int ret; + + ret = do_write(ff, &evlist->first_sample_time, + sizeof(evlist->first_sample_time)); + if (ret < 0) + return ret; + + return do_write(ff, &evlist->last_sample_time, + sizeof(evlist->last_sample_time)); +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1506,6 +1521,28 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp) } } +static void print_sample_time(struct feat_fd *ff, FILE *fp) +{ + struct perf_session *session; + char time_buf[32]; + double d; + + session = container_of(ff->ph, struct perf_session, header); + + timestamp__scnprintf_usec(session->evlist->first_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of first sample : %s\n", time_buf); + + timestamp__scnprintf_usec(session->evlist->last_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of last sample : %s\n", time_buf); + + d = (double)(session->evlist->last_sample_time - + session->evlist->first_sample_time) / NSEC_PER_MSEC; + + fprintf(fp, "# sample duration : %10.3f ms\n", d); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -1762,7 +1799,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) session = container_of(ff->ph, struct perf_session, header); - if (session->file->is_pipe) { + if (session->data->is_pipe) { /* Save events for reading later by print_event_desc, * since they can't be read again in pipe mode. */ ff->events = events; @@ -1771,7 +1808,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) for (evsel = events; evsel->attr.size; evsel++) perf_evlist__set_event_name(session->evlist, evsel); - if (!session->file->is_pipe) + if (!session->data->is_pipe) free_event_desc(events); return 0; @@ -2147,6 +2184,27 @@ out_free_caches: return -1; } +static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) +{ + struct perf_session *session; + u64 first_sample_time, last_sample_time; + int ret; + + session = container_of(ff->ph, struct perf_session, header); + + ret = do_read_u64(ff, &first_sample_time); + if (ret) + return -1; + + ret = do_read_u64(ff, &last_sample_time); + if (ret) + return -1; + + session->evlist->first_sample_time = first_sample_time; + session->evlist->last_sample_time = last_sample_time; + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2204,6 +2262,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(AUXTRACE, auxtrace, false), FEAT_OPN(STAT, stat, false), FEAT_OPN(CACHE, cache, true), + FEAT_OPR(SAMPLE_TIME, sample_time, false), }; struct header_print_data { @@ -2248,7 +2307,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) { struct header_print_data hd; struct perf_header *header = &session->header; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); struct stat st; int ret, bit; @@ -2264,7 +2323,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) perf_header__process_sections(header, fd, &hd, perf_file_section__fprintf_info); - if (session->file->is_pipe) + if (session->data->is_pipe) return 0; fprintf(fp, "# missing features: "); @@ -2757,7 +2816,7 @@ static int perf_header__read_pipe(struct perf_session *session) struct perf_pipe_file_header f_header; if (perf_file_header__read_pipe(&f_header, header, - perf_data_file__fd(session->file), + perf_data__fd(session->data), session->repipe) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; @@ -2860,13 +2919,13 @@ static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist, int perf_session__read_header(struct perf_session *session) { - struct perf_data_file *file = session->file; + struct perf_data *data = session->data; struct perf_header *header = &session->header; struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; int nr_attrs, nr_ids, i, j; - int fd = perf_data_file__fd(file); + int fd = perf_data__fd(data); session->evlist = perf_evlist__new(); if (session->evlist == NULL) @@ -2874,7 +2933,7 @@ int perf_session__read_header(struct perf_session *session) session->evlist->env = &header->env; session->machines.host.env = &header->env; - if (perf_data_file__is_pipe(file)) + if (perf_data__is_pipe(data)) return perf_header__read_pipe(session); if (perf_file_header__read(&f_header, header, fd) < 0) @@ -2889,7 +2948,7 @@ int perf_session__read_header(struct perf_session *session) if (f_header.data.size == 0) { pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n" "Was the 'perf record' command properly terminated?\n", - file->path); + data->file.path); } nr_attrs = f_header.attrs.size / f_header.attr_size; @@ -3257,6 +3316,74 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, return err; } +static bool has_unit(struct perf_evsel *counter) +{ + return counter->unit && *counter->unit; +} + +static bool has_scale(struct perf_evsel *counter) +{ + return counter->scale != 1; +} + +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe) +{ + struct perf_evsel *counter; + int err; + + /* + * Synthesize other events stuff not carried within + * attr event - unit, scale, name + */ + evlist__for_each_entry(evsel_list, counter) { + if (!counter->supported) + continue; + + /* + * Synthesize unit and scale only if it's defined. + */ + if (has_unit(counter)) { + err = perf_event__synthesize_event_update_unit(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel unit.\n"); + return err; + } + } + + if (has_scale(counter)) { + err = perf_event__synthesize_event_update_scale(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel counter.\n"); + return err; + } + } + + if (counter->own_cpus) { + err = perf_event__synthesize_event_update_cpus(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel cpus.\n"); + return err; + } + } + + /* + * Name is needed only for pipe output, + * perf.data carries event names. + */ + if (is_pipe) { + err = perf_event__synthesize_event_update_name(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel name.\n"); + return err; + } + } + } + return 0; +} + int perf_event__process_attr(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_evlist **pevlist) @@ -3397,7 +3524,7 @@ int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused, struct perf_session *session) { ssize_t size_read, padding, size = event->tracing_data.size; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); off_t offset = lseek(fd, 0, SEEK_CUR); char buf[BUFSIZ]; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index f7a16ee527b8..f28aaaa3a440 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_HEADER_H #define __PERF_HEADER_H @@ -8,6 +9,7 @@ #include <linux/types.h> #include "event.h" #include "env.h" +#include "pmu.h" enum { HEADER_RESERVED = 0, /* always cleared */ @@ -33,6 +35,7 @@ enum { HEADER_AUXTRACE, HEADER_STAT, HEADER_CACHE, + HEADER_SAMPLE_TIME, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -106,6 +109,11 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe); + int perf_event__process_feature(struct perf_tool *tool, union perf_event *event, struct perf_session *session); @@ -165,5 +173,5 @@ int write_padded(struct feat_fd *fd, const void *bf, */ int get_cpuid(char *buffer, size_t sz); -char *get_cpuid_str(void); +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 15b95300d7f3..4f07a5ba5030 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "cache.h" #include "config.h" #include <poll.h> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e60d8d8ea4c2..b6140950301e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "util.h" #include "build-id.h" #include "hist.h" @@ -596,6 +597,7 @@ __hists__add_entry(struct hists *hists, .map = al->map, .sym = al->sym, }, + .srcline = al->srcline ? strdup(al->srcline) : NULL, .socket = al->socket, .cpu = al->cpu, .cpumode = al->cpumode, @@ -950,6 +952,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, .map = al->map, .sym = al->sym, }, + .srcline = al->srcline ? strdup(al->srcline) : NULL, .parent = iter->parent, .raw_data = sample->raw_data, .raw_size = sample->raw_size, @@ -1141,11 +1144,6 @@ void hist_entry__delete(struct hist_entry *he) zfree(&he->mem_info); } - if (he->inline_node) { - inline_node__delete(he->inline_node); - he->inline_node = NULL; - } - zfree(&he->stat_acc); free_srcline(he->srcline); if (he->srcfile && he->srcfile[0]) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index e60dda26a920..f6630cb95eff 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_HIST_H #define __PERF_HIST_H diff --git a/tools/perf/util/include/asm/asm-offsets.h b/tools/perf/util/include/asm/asm-offsets.h index ed538942523d..3aff4cf44944 100644 --- a/tools/perf/util/include/asm/asm-offsets.h +++ b/tools/perf/util/include/asm/asm-offsets.h @@ -1 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* stub */ diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h index acffd5e4d1d4..2270481c7e0a 100644 --- a/tools/perf/util/include/asm/cpufeature.h +++ b/tools/perf/util/include/asm/cpufeature.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_CPUFEATURE_H #define PERF_CPUFEATURE_H diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h index afe38199e922..e9876be63dbf 100644 --- a/tools/perf/util/include/asm/dwarf2.h +++ b/tools/perf/util/include/asm/dwarf2.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_DWARF2_H #define PERF_DWARF2_H diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h index d0f72b8fcc35..6a6f4b990547 100644 --- a/tools/perf/util/include/asm/uaccess.h +++ b/tools/perf/util/include/asm/uaccess.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_ASM_UACCESS_H_ #define _PERF_ASM_UACCESS_H_ diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 43bfd8da7919..7d99a084e82d 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 06387cffe125..f01d48a8d707 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_LINUX_LINKAGE_H_ #define PERF_LINUX_LINKAGE_H_ diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 218ee2bac9a5..72db2744876d 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -67,7 +67,6 @@ struct intel_bts { u64 branches_sample_type; u64 branches_id; size_t branches_event_size; - bool synth_needs_swap; unsigned long num_events; }; @@ -303,8 +302,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, event.sample.header.size = bts->branches_event_size; ret = perf_event__synthesize_sample(&event, bts->branches_sample_type, - 0, &sample, - bts->synth_needs_swap); + 0, &sample); if (ret) return ret; } @@ -500,7 +498,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) } if (!buffer->data) { - int fd = perf_data_file__fd(btsq->bts->session->file); + int fd = perf_data__fd(btsq->bts->session->data); buffer->data = auxtrace_buffer__get_data(buffer, fd); if (!buffer->data) { @@ -664,10 +662,10 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session, if (!bts->data_queued) { struct auxtrace_buffer *buffer; off_t data_offset; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); int err; - if (perf_data_file__is_pipe(session->file)) { + if (perf_data__is_pipe(session->data)) { data_offset = 0; } else { data_offset = lseek(fd, 0, SEEK_CUR); @@ -841,8 +839,6 @@ static int intel_bts_synth_events(struct intel_bts *bts, __perf_evsel__sample_size(attr.sample_type); } - bts->synth_needs_swap = evsel->needs_swap; - return 0; } diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 10e0814bb8d2..1b704fbea9de 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -11,15 +11,21 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c @(diff -I 2>&1 | grep -q 'option requires an argument' && \ - test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ - diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ - diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ - diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ - || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true + test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \ + ((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \ + ((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk index 54e961659514..ddd5c4c21129 100644 --- a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk +++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk @@ -1,4 +1,5 @@ #!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 # gen-insn-attr-x86.awk: Instruction attribute table generator # Written by Masami Hiramatsu <mhiramat@redhat.com> # diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h index 125ecd2a300d..52dc8d911173 100644 --- a/tools/perf/util/intel-pt-decoder/inat.h +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -97,6 +97,16 @@ #define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) #define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) +/* Identifiers for segment registers */ +#define INAT_SEG_REG_IGNORE 0 +#define INAT_SEG_REG_DEFAULT 1 +#define INAT_SEG_REG_CS 2 +#define INAT_SEG_REG_SS 3 +#define INAT_SEG_REG_DS 4 +#define INAT_SEG_REG_ES 5 +#define INAT_SEG_REG_FS 6 +#define INAT_SEG_REG_GS 7 + /* Attribute search APIs */ extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); extern int inat_get_last_prefix_id(insn_byte_t last_pfx); diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index 12e377184ee4..e0b85930dd77 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) -ff: +ff: UD0 EndTable Table: 3-byte opcode 1 (0x0f 0x38) @@ -717,7 +717,7 @@ AVXcode: 2 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 80: INVEPT Gy,Mdq (66) -81: INVPID Gy,Mdq (66) +81: INVVPID Gy,Mdq (66) 82: INVPCID Gy,Mdq (66) 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) 88: vexpandps/d Vpd,Wpd (66),(ev) @@ -896,7 +896,7 @@ EndTable GrpTable: Grp3_1 0: TEST Eb,Ib -1: +1: TEST Eb,Ib 2: NOT Eb 3: NEG Eb 4: MUL AL,Eb @@ -970,6 +970,15 @@ GrpTable: Grp9 EndTable GrpTable: Grp10 +# all are UD1 +0: UD1 +1: UD1 +2: UD1 +3: UD1 +4: UD1 +5: UD1 +6: UD1 +7: UD1 EndTable # Grp11A and Grp11B are expressed as Grp11 in Intel SDM diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index b58f9fd1e2ee..3773d9c54f45 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -104,8 +104,6 @@ struct intel_pt { u64 pwrx_id; u64 cbr_id; - bool synth_needs_swap; - u64 tsc_bit; u64 mtc_bit; u64 mtc_freq_bits; @@ -271,7 +269,7 @@ next: ptq->buffer = buffer; if (!buffer->data) { - int fd = perf_data_file__fd(ptq->pt->session->file); + int fd = perf_data__fd(ptq->pt->session->data); buffer->data = auxtrace_buffer__get_data(buffer, fd); if (!buffer->data) @@ -1101,11 +1099,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, } static int intel_pt_inject_event(union perf_event *event, - struct perf_sample *sample, u64 type, - bool swapped) + struct perf_sample *sample, u64 type) { event->header.size = perf_event__sample_event_size(sample, type, 0); - return perf_event__synthesize_sample(event, type, 0, sample, swapped); + return perf_event__synthesize_sample(event, type, 0, sample); } static inline int intel_pt_opt_inject(struct intel_pt *pt, @@ -1115,7 +1112,7 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt, if (!pt->synth_opts.inject) return 0; - return intel_pt_inject_event(event, sample, type, pt->synth_needs_swap); + return intel_pt_inject_event(event, sample, type); } static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, @@ -2084,10 +2081,10 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session, if (!pt->data_queued) { struct auxtrace_buffer *buffer; off_t data_offset; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); int err; - if (perf_data_file__is_pipe(session->file)) { + if (perf_data__is_pipe(session->data)) { data_offset = 0; } else { data_offset = lseek(fd, 0, SEEK_CUR); @@ -2329,8 +2326,6 @@ static int intel_pt_synth_events(struct intel_pt *pt, id += 1; } - pt->synth_needs_swap = evsel->needs_swap; - return 0; } diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h index 020b9ca1b47e..85bab8735fa9 100644 --- a/tools/perf/util/intlist.h +++ b/tools/perf/util/intlist.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_INTLIST_H #define __PERF_INTLIST_H diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h index 3f42ee4d2a0b..6817ffc2a059 100644 --- a/tools/perf/util/jit.h +++ b/tools/perf/util/jit.h @@ -1,9 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __JIT_H__ #define __JIT_H__ #include <data.h> -int jit_process(struct perf_session *session, struct perf_data_file *output, +int jit_process(struct perf_session *session, struct perf_data *output, struct machine *machine, char *filename, pid_t pid, u64 *nbytes); int jit_inject_record(const char *filename); diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 9084930e1757..a1863000e972 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <sys/sysmacros.h> #include <sys/types.h> #include <errno.h> @@ -29,7 +30,7 @@ #include "sane_ctype.h" struct jit_buf_desc { - struct perf_data_file *output; + struct perf_data *output; struct perf_session *session; struct machine *machine; union jr_entry *entry; @@ -60,8 +61,8 @@ struct debug_line_info { struct jit_tool { struct perf_tool tool; - struct perf_data_file output; - struct perf_data_file input; + struct perf_data output; + struct perf_data input; u64 bytes_written; }; @@ -356,7 +357,7 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) { ssize_t size; - size = perf_data_file__write(jd->output, event, event->header.size); + size = perf_data__write(jd->output, event, event->header.size); if (size < 0) return -1; @@ -751,7 +752,7 @@ jit_detect(char *mmap_name, pid_t pid) int jit_process(struct perf_session *session, - struct perf_data_file *output, + struct perf_data *output, struct machine *machine, char *filename, pid_t pid, diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index d01e73592f6e..7b1f06567521 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_KVM_STAT_H #define __PERF_KVM_STAT_H diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c index f616e4f65b67..a217ecf0359d 100644 --- a/tools/perf/util/levenshtein.c +++ b/tools/perf/util/levenshtein.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "levenshtein.h" #include <errno.h> #include <stdlib.h> diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h index b0fcb6d8a881..34ca173c557d 100644 --- a/tools/perf/util/levenshtein.h +++ b/tools/perf/util/levenshtein.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_LEVENSHTEIN_H #define __PERF_LEVENSHTEIN_H diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 6559bc586643..66756e6be111 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file setups defines to compile arch specific binary from the * generic one. diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c index 957ffff72428..c5e568188e19 100644 --- a/tools/perf/util/libunwind/x86_32.c +++ b/tools/perf/util/libunwind/x86_32.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file setups defines to compile arch specific binary from the * generic one. diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 209b0c82eff4..4952b429caa7 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> * Copyright (C) 2015, Huawei Inc. diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h index c87a2a92a88f..d3ad8deb5db4 100644 --- a/tools/perf/util/llvm-utils.h +++ b/tools/perf/util/llvm-utils.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> * Copyright (C) 2015, Huawei Inc. diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 4ca7c5c6cdcd..07498eaddc08 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <lzma.h> #include <stdio.h> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index df709363ef69..b05a67464c03 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <dirent.h> #include <errno.h> #include <inttypes.h> @@ -30,7 +31,21 @@ static void dsos__init(struct dsos *dsos) { INIT_LIST_HEAD(&dsos->head); dsos->root = RB_ROOT; - pthread_rwlock_init(&dsos->lock, NULL); + init_rwsem(&dsos->lock); +} + +static void machine__threads_init(struct machine *machine) +{ + int i; + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + threads->entries = RB_ROOT; + init_rwsem(&threads->lock); + threads->nr = 0; + INIT_LIST_HEAD(&threads->dead); + threads->last_match = NULL; + } } int machine__init(struct machine *machine, const char *root_dir, pid_t pid) @@ -40,11 +55,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); - machine->threads = RB_ROOT; - pthread_rwlock_init(&machine->threads_lock, NULL); - machine->nr_threads = 0; - INIT_LIST_HEAD(&machine->dead_threads); - machine->last_match = NULL; + machine__threads_init(machine); machine->vdso_info = NULL; machine->env = NULL; @@ -120,7 +131,7 @@ static void dsos__purge(struct dsos *dsos) { struct dso *pos, *n; - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); list_for_each_entry_safe(pos, n, &dsos->head, node) { RB_CLEAR_NODE(&pos->rb_node); @@ -129,39 +140,52 @@ static void dsos__purge(struct dsos *dsos) dso__put(pos); } - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); } static void dsos__exit(struct dsos *dsos) { dsos__purge(dsos); - pthread_rwlock_destroy(&dsos->lock); + exit_rwsem(&dsos->lock); } void machine__delete_threads(struct machine *machine) { struct rb_node *nd; + int i; - pthread_rwlock_wrlock(&machine->threads_lock); - nd = rb_first(&machine->threads); - while (nd) { - struct thread *t = rb_entry(nd, struct thread, rb_node); + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + down_write(&threads->lock); + nd = rb_first(&threads->entries); + while (nd) { + struct thread *t = rb_entry(nd, struct thread, rb_node); - nd = rb_next(nd); - __machine__remove_thread(machine, t, false); + nd = rb_next(nd); + __machine__remove_thread(machine, t, false); + } + up_write(&threads->lock); } - pthread_rwlock_unlock(&machine->threads_lock); } void machine__exit(struct machine *machine) { + int i; + + if (machine == NULL) + return; + machine__destroy_kernel_maps(machine); map_groups__exit(&machine->kmaps); dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); zfree(&machine->current_tid); - pthread_rwlock_destroy(&machine->threads_lock); + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + exit_rwsem(&threads->lock); + } } void machine__delete(struct machine *machine) @@ -379,10 +403,11 @@ out_err: * lookup/new thread inserted. */ static struct thread *____machine__findnew_thread(struct machine *machine, + struct threads *threads, pid_t pid, pid_t tid, bool create) { - struct rb_node **p = &machine->threads.rb_node; + struct rb_node **p = &threads->entries.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -391,14 +416,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * so most of the time we dont have to look up * the full rbtree: */ - th = machine->last_match; + th = threads->last_match; if (th != NULL) { if (th->tid == tid) { machine__update_thread_pid(machine, th, pid); return thread__get(th); } - machine->last_match = NULL; + threads->last_match = NULL; } while (*p != NULL) { @@ -406,7 +431,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - machine->last_match = th; + threads->last_match = th; machine__update_thread_pid(machine, th, pid); return thread__get(th); } @@ -423,7 +448,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = thread__new(pid, tid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &machine->threads); + rb_insert_color(&th->rb_node, &threads->entries); /* * We have to initialize map_groups separately @@ -434,7 +459,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { - rb_erase_init(&th->rb_node, &machine->threads); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); thread__put(th); return NULL; @@ -443,8 +468,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * It is now in the rbtree, get a ref */ thread__get(th); - machine->last_match = th; - ++machine->nr_threads; + threads->last_match = th; + ++threads->nr; } return th; @@ -452,27 +477,30 @@ static struct thread *____machine__findnew_thread(struct machine *machine, struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { - return ____machine__findnew_thread(machine, pid, tid, true); + return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true); } struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_wrlock(&machine->threads_lock); + down_write(&threads->lock); th = __machine__findnew_thread(machine, pid, tid); - pthread_rwlock_unlock(&machine->threads_lock); + up_write(&threads->lock); return th; } struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_rdlock(&machine->threads_lock); - th = ____machine__findnew_thread(machine, pid, tid, false); - pthread_rwlock_unlock(&machine->threads_lock); + + down_read(&threads->lock); + th = ____machine__findnew_thread(machine, threads, pid, tid, false); + up_read(&threads->lock); return th; } @@ -564,7 +592,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, { struct dso *dso; - pthread_rwlock_wrlock(&machine->dsos.lock); + down_write(&machine->dsos.lock); dso = __dsos__find(&machine->dsos, m->name, true); if (!dso) { @@ -578,7 +606,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, dso__get(dso); out_unlock: - pthread_rwlock_unlock(&machine->dsos.lock); + up_write(&machine->dsos.lock); return dso; } @@ -719,21 +747,25 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) size_t machine__fprintf(struct machine *machine, FILE *fp) { - size_t ret; struct rb_node *nd; + size_t ret; + int i; - pthread_rwlock_rdlock(&machine->threads_lock); + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; - ret = fprintf(fp, "Threads: %u\n", machine->nr_threads); + down_read(&threads->lock); - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); + ret = fprintf(fp, "Threads: %u\n", threads->nr); - ret += thread__fprintf(pos, fp); - } + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + struct thread *pos = rb_entry(nd, struct thread, rb_node); - pthread_rwlock_unlock(&machine->threads_lock); + ret += thread__fprintf(pos, fp); + } + up_read(&threads->lock); + } return ret; } @@ -1292,7 +1324,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct dso *kernel = NULL; struct dso *dso; - pthread_rwlock_rdlock(&machine->dsos.lock); + down_read(&machine->dsos.lock); list_for_each_entry(dso, &machine->dsos.head, node) { @@ -1322,7 +1354,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, break; } - pthread_rwlock_unlock(&machine->dsos.lock); + up_read(&machine->dsos.lock); if (kernel == NULL) kernel = machine__findnew_dso(machine, kmmap_prefix); @@ -1479,23 +1511,25 @@ out_problem: static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) { - if (machine->last_match == th) - machine->last_match = NULL; + struct threads *threads = machine__threads(machine, th->tid); + + if (threads->last_match == th) + threads->last_match = NULL; BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) - pthread_rwlock_wrlock(&machine->threads_lock); - rb_erase_init(&th->rb_node, &machine->threads); + down_write(&threads->lock); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); - --machine->nr_threads; + --threads->nr; /* * Move it first to the dead_threads list, then drop the reference, * if this is the last reference, then the thread__delete destructor * will be called and we will remove it from the dead_threads list. */ - list_add_tail(&th->node, &machine->dead_threads); + list_add_tail(&th->node, &threads->dead); if (lock) - pthread_rwlock_unlock(&machine->threads_lock); + up_write(&threads->lock); thread__put(th); } @@ -1679,6 +1713,26 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, return mi; } +static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip) +{ + char *srcline = NULL; + + if (!map || callchain_param.key == CCKEY_FUNCTION) + return srcline; + + srcline = srcline__tree_find(&map->dso->srclines, ip); + if (!srcline) { + bool show_sym = false; + bool show_addr = callchain_param.key == CCKEY_ADDRESS; + + srcline = get_srcline(map->dso, map__rip_2objdump(map, ip), + sym, show_sym, show_addr, ip); + srcline__tree_insert(&map->dso->srclines, ip, srcline); + } + + return srcline; +} + struct iterations { int nr_loop_iter; u64 cycles; @@ -1698,6 +1752,7 @@ static int add_callchain_ip(struct thread *thread, struct addr_location al; int nr_loop_iter = 0; u64 iter_cycles = 0; + const char *srcline = NULL; al.filtered = 0; al.sym = NULL; @@ -1753,9 +1808,10 @@ static int add_callchain_ip(struct thread *thread, iter_cycles = iter->cycles; } + srcline = callchain_srcline(al.map, al.sym, al.addr); return callchain_cursor_append(cursor, al.addr, al.map, al.sym, branch, flags, nr_loop_iter, - iter_cycles, branch_from); + iter_cycles, branch_from, srcline); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -2068,15 +2124,54 @@ check_calls: return 0; } +static int append_inlines(struct callchain_cursor *cursor, + struct map *map, struct symbol *sym, u64 ip) +{ + struct inline_node *inline_node; + struct inline_list *ilist; + u64 addr; + int ret = 1; + + if (!symbol_conf.inline_name || !map || !sym) + return ret; + + addr = map__rip_2objdump(map, ip); + + inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr); + if (!inline_node) { + inline_node = dso__parse_addr_inlines(map->dso, addr, sym); + if (!inline_node) + return ret; + inlines__tree_insert(&map->dso->inlined_nodes, inline_node); + } + + list_for_each_entry(ilist, &inline_node->val, list) { + ret = callchain_cursor_append(cursor, ip, map, + ilist->symbol, false, + NULL, 0, 0, 0, ilist->srcline); + + if (ret != 0) + return ret; + } + + return ret; +} + static int unwind_entry(struct unwind_entry *entry, void *arg) { struct callchain_cursor *cursor = arg; + const char *srcline = NULL; if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; + + if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0) + return 0; + + srcline = callchain_srcline(entry->map, entry->sym, entry->ip); return callchain_cursor_append(cursor, entry->ip, entry->map, entry->sym, - false, NULL, 0, 0, 0); + false, NULL, 0, 0, 0, srcline); } static int thread__resolve_callchain_unwind(struct thread *thread, @@ -2109,7 +2204,7 @@ int thread__resolve_callchain(struct thread *thread, { int ret = 0; - callchain_cursor_reset(&callchain_cursor); + callchain_cursor_reset(cursor); if (callchain_param.order == ORDER_CALLEE) { ret = thread__resolve_callchain_sample(thread, cursor, @@ -2140,21 +2235,26 @@ int machine__for_each_thread(struct machine *machine, int (*fn)(struct thread *thread, void *p), void *priv) { + struct threads *threads; struct rb_node *nd; struct thread *thread; int rc = 0; + int i; - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - thread = rb_entry(nd, struct thread, rb_node); - rc = fn(thread, priv); - if (rc != 0) - return rc; - } + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + threads = &machine->threads[i]; + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + thread = rb_entry(nd, struct thread, rb_node); + rc = fn(thread, priv); + if (rc != 0) + return rc; + } - list_for_each_entry(thread, &machine->dead_threads, node) { - rc = fn(thread, priv); - if (rc != 0) - return rc; + list_for_each_entry(thread, &threads->dead, node) { + rc = fn(thread, priv); + if (rc != 0) + return rc; + } } return rc; } @@ -2183,12 +2283,16 @@ int machines__for_each_thread(struct machines *machines, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct thread_map *threads, perf_event__handler_t process, bool data_mmap, - unsigned int proc_map_timeout) + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize) { if (target__has_task(target)) return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout); else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout); + return perf_event__synthesize_threads(tool, process, + machine, data_mmap, + proc_map_timeout, + nr_threads_synthesize); /* command specified */ return 0; } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 3cdb1340f917..5ce860b64c74 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_MACHINE_H #define __PERF_MACHINE_H @@ -6,6 +7,7 @@ #include "map.h" #include "dso.h" #include "event.h" +#include "rwsem.h" struct addr_location; struct branch_stack; @@ -23,6 +25,17 @@ extern const char *ref_reloc_sym_names[]; struct vdso_info; +#define THREADS__TABLE_BITS 8 +#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) + +struct threads { + struct rb_root entries; + struct rw_semaphore lock; + unsigned int nr; + struct list_head dead; + struct thread *last_match; +}; + struct machine { struct rb_node rb_node; pid_t pid; @@ -30,11 +43,7 @@ struct machine { bool comm_exec; bool kptr_restrict_warned; char *root_dir; - struct rb_root threads; - pthread_rwlock_t threads_lock; - unsigned int nr_threads; - struct list_head dead_threads; - struct thread *last_match; + struct threads threads[THREADS__TABLE_SIZE]; struct vdso_info *vdso_info; struct perf_env *env; struct dsos dsos; @@ -48,6 +57,12 @@ struct machine { }; }; +static inline struct threads *machine__threads(struct machine *machine, pid_t tid) +{ + /* Cast it to handle tid == -1 */ + return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE]; +} + static inline struct map *__machine__kernel_map(struct machine *machine, enum map_type type) { @@ -243,15 +258,18 @@ int machines__for_each_thread(struct machines *machines, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct thread_map *threads, perf_event__handler_t process, bool data_mmap, - unsigned int proc_map_timeout); + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize); static inline int machine__synthesize_threads(struct machine *machine, struct target *target, struct thread_map *threads, bool data_mmap, - unsigned int proc_map_timeout) + unsigned int proc_map_timeout, + unsigned int nr_threads_synthesize) { return __machine__synthesize_threads(machine, NULL, target, threads, perf_event__process, data_mmap, - proc_map_timeout); + proc_map_timeout, + nr_threads_synthesize); } pid_t machine__get_current_tid(struct machine *machine, int cpu); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index bdaa0a4edc17..8fe57031e1a8 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "symbol.h" #include <errno.h> #include <inttypes.h> @@ -418,7 +419,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, map__rip_2objdump(map, addr), NULL, - true, true); + true, true, addr); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); @@ -488,7 +489,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) static void maps__init(struct maps *maps) { maps->entries = RB_ROOT; - pthread_rwlock_init(&maps->lock, NULL); + init_rwsem(&maps->lock); } void map_groups__init(struct map_groups *mg, struct machine *machine) @@ -517,9 +518,9 @@ static void __maps__purge(struct maps *maps) static void maps__exit(struct maps *maps) { - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); __maps__purge(maps); - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } void map_groups__exit(struct map_groups *mg) @@ -586,7 +587,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct symbol *sym; struct rb_node *nd; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); @@ -602,7 +603,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, sym = NULL; out: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return sym; } @@ -638,7 +639,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) size_t printed = 0; struct rb_node *nd; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); @@ -650,7 +651,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) } } - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return printed; } @@ -682,7 +683,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp struct rb_node *next; int err = 0; - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); root = &maps->entries; next = rb_first(root); @@ -750,7 +751,7 @@ put_map: err = 0; out: - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); return err; } @@ -771,7 +772,7 @@ int map_groups__clone(struct thread *thread, struct map *map; struct maps *maps = &parent->maps[type]; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (map = maps__first(maps); map; map = map__next(map)) { struct map *new = map__clone(map); @@ -788,7 +789,7 @@ int map_groups__clone(struct thread *thread, err = 0; out_unlock: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return err; } @@ -815,9 +816,9 @@ static void __maps__insert(struct maps *maps, struct map *map) void maps__insert(struct maps *maps, struct map *map) { - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); __maps__insert(maps, map); - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } static void __maps__remove(struct maps *maps, struct map *map) @@ -828,9 +829,9 @@ static void __maps__remove(struct maps *maps, struct map *map) void maps__remove(struct maps *maps, struct map *map) { - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); __maps__remove(maps, map); - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } struct map *maps__find(struct maps *maps, u64 ip) @@ -838,7 +839,7 @@ struct map *maps__find(struct maps *maps, u64 ip) struct rb_node **p, *parent = NULL; struct map *m; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); p = &maps->entries.rb_node; while (*p != NULL) { @@ -854,7 +855,7 @@ struct map *maps__find(struct maps *maps, u64 ip) m = NULL; out: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return m; } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 73aacf7a7dc4..edeb7291c8e1 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_MAP_H #define __PERF_MAP_H @@ -9,6 +10,7 @@ #include <stdio.h> #include <stdbool.h> #include <linux/types.h> +#include "rwsem.h" enum map_type { MAP__FUNCTION = 0, @@ -61,7 +63,7 @@ struct kmap { struct maps { struct rb_root entries; - pthread_rwlock_t lock; + struct rw_semaphore lock; }; struct map_groups { diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 28afe5fa84d6..93f74d8d3cdd 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stddef.h> #include <stdlib.h> #include <string.h> diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 40f72ee4f42a..a889ec2fa9f5 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_MEM_EVENTS_H #define __PERF_MEM_EVENTS_H diff --git a/tools/perf/util/memswap.c b/tools/perf/util/memswap.c index 55f7faa8d9ec..c1317e4983bc 100644 --- a/tools/perf/util/memswap.c +++ b/tools/perf/util/memswap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <byteswap.h> #include "memswap.h" #include <linux/types.h> diff --git a/tools/perf/util/memswap.h b/tools/perf/util/memswap.h index 7d1b1c34bb57..1e29ff903ca9 100644 --- a/tools/perf/util/memswap.h +++ b/tools/perf/util/memswap.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_MEMSWAP_H_ #define PERF_MEMSWAP_H_ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c new file mode 100644 index 000000000000..1ddc3d1d0147 --- /dev/null +++ b/tools/perf/util/metricgroup.c @@ -0,0 +1,492 @@ +/* + * Copyright (c) 2017, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +/* Manage metrics and groups of metrics from JSON files */ + +#include "metricgroup.h" +#include "evlist.h" +#include "strbuf.h" +#include "pmu.h" +#include "expr.h" +#include "rblist.h" +#include <string.h> +#include <stdbool.h> +#include <errno.h> +#include "pmu-events/pmu-events.h" +#include "strlist.h" +#include <assert.h> +#include <ctype.h> + +struct metric_event *metricgroup__lookup(struct rblist *metric_events, + struct perf_evsel *evsel, + bool create) +{ + struct rb_node *nd; + struct metric_event me = { + .evsel = evsel + }; + + if (!metric_events) + return NULL; + + nd = rblist__find(metric_events, &me); + if (nd) + return container_of(nd, struct metric_event, nd); + if (create) { + rblist__add_node(metric_events, &me); + nd = rblist__find(metric_events, &me); + if (nd) + return container_of(nd, struct metric_event, nd); + } + return NULL; +} + +static int metric_event_cmp(struct rb_node *rb_node, const void *entry) +{ + struct metric_event *a = container_of(rb_node, + struct metric_event, + nd); + const struct metric_event *b = entry; + + if (a->evsel == b->evsel) + return 0; + if ((char *)a->evsel < (char *)b->evsel) + return -1; + return +1; +} + +static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused, + const void *entry) +{ + struct metric_event *me = malloc(sizeof(struct metric_event)); + + if (!me) + return NULL; + memcpy(me, entry, sizeof(struct metric_event)); + me->evsel = ((struct metric_event *)entry)->evsel; + INIT_LIST_HEAD(&me->head); + return &me->nd; +} + +static void metricgroup__rblist_init(struct rblist *metric_events) +{ + rblist__init(metric_events); + metric_events->node_cmp = metric_event_cmp; + metric_events->node_new = metric_event_new; +} + +struct egroup { + struct list_head nd; + int idnum; + const char **ids; + const char *metric_name; + const char *metric_expr; +}; + +static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist, + const char **ids, + int idnum, + struct perf_evsel **metric_events) +{ + struct perf_evsel *ev, *start = NULL; + int ind = 0; + + evlist__for_each_entry (perf_evlist, ev) { + if (!strcmp(ev->name, ids[ind])) { + metric_events[ind] = ev; + if (ind == 0) + start = ev; + if (++ind == idnum) { + metric_events[ind] = NULL; + return start; + } + } else { + ind = 0; + start = NULL; + } + } + /* + * This can happen when an alias expands to multiple + * events, like for uncore events. + * We don't support this case for now. + */ + return NULL; +} + +static int metricgroup__setup_events(struct list_head *groups, + struct perf_evlist *perf_evlist, + struct rblist *metric_events_list) +{ + struct metric_event *me; + struct metric_expr *expr; + int i = 0; + int ret = 0; + struct egroup *eg; + struct perf_evsel *evsel; + + list_for_each_entry (eg, groups, nd) { + struct perf_evsel **metric_events; + + metric_events = calloc(sizeof(void *), eg->idnum + 1); + if (!metric_events) { + ret = -ENOMEM; + break; + } + evsel = find_evsel(perf_evlist, eg->ids, eg->idnum, + metric_events); + if (!evsel) { + pr_debug("Cannot resolve %s: %s\n", + eg->metric_name, eg->metric_expr); + continue; + } + for (i = 0; i < eg->idnum; i++) + metric_events[i]->collect_stat = true; + me = metricgroup__lookup(metric_events_list, evsel, true); + if (!me) { + ret = -ENOMEM; + break; + } + expr = malloc(sizeof(struct metric_expr)); + if (!expr) { + ret = -ENOMEM; + break; + } + expr->metric_expr = eg->metric_expr; + expr->metric_name = eg->metric_name; + expr->metric_events = metric_events; + list_add(&expr->nd, &me->head); + } + return ret; +} + +static bool match_metric(const char *n, const char *list) +{ + int len; + char *m; + + if (!list) + return false; + if (!strcmp(list, "all")) + return true; + if (!n) + return !strcasecmp(list, "No_group"); + len = strlen(list); + m = strcasestr(n, list); + if (!m) + return false; + if ((m == n || m[-1] == ';' || m[-1] == ' ') && + (m[len] == 0 || m[len] == ';')) + return true; + return false; +} + +struct mep { + struct rb_node nd; + const char *name; + struct strlist *metrics; +}; + +static int mep_cmp(struct rb_node *rb_node, const void *entry) +{ + struct mep *a = container_of(rb_node, struct mep, nd); + struct mep *b = (struct mep *)entry; + + return strcmp(a->name, b->name); +} + +static struct rb_node *mep_new(struct rblist *rl __maybe_unused, + const void *entry) +{ + struct mep *me = malloc(sizeof(struct mep)); + + if (!me) + return NULL; + memcpy(me, entry, sizeof(struct mep)); + me->name = strdup(me->name); + if (!me->name) + goto out_me; + me->metrics = strlist__new(NULL, NULL); + if (!me->metrics) + goto out_name; + return &me->nd; +out_name: + free((char *)me->name); +out_me: + free(me); + return NULL; +} + +static struct mep *mep_lookup(struct rblist *groups, const char *name) +{ + struct rb_node *nd; + struct mep me = { + .name = name + }; + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + rblist__add_node(groups, &me); + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + return NULL; +} + +static void mep_delete(struct rblist *rl __maybe_unused, + struct rb_node *nd) +{ + struct mep *me = container_of(nd, struct mep, nd); + + strlist__delete(me->metrics); + free((void *)me->name); + free(me); +} + +static void metricgroup__print_strlist(struct strlist *metrics, bool raw) +{ + struct str_node *sn; + int n = 0; + + strlist__for_each_entry (sn, metrics) { + if (raw) + printf("%s%s", n > 0 ? " " : "", sn->s); + else + printf(" %s\n", sn->s); + n++; + } + if (raw) + putchar('\n'); +} + +void metricgroup__print(bool metrics, bool metricgroups, char *filter, + bool raw) +{ + struct pmu_events_map *map = perf_pmu__find_map(NULL); + struct pmu_event *pe; + int i; + struct rblist groups; + struct rb_node *node, *next; + struct strlist *metriclist = NULL; + + if (!map) + return; + + if (!metricgroups) { + metriclist = strlist__new(NULL, NULL); + if (!metriclist) + return; + } + + rblist__init(&groups); + groups.node_new = mep_new; + groups.node_cmp = mep_cmp; + groups.node_delete = mep_delete; + for (i = 0; ; i++) { + const char *g; + pe = &map->table[i]; + + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + g = pe->metric_group; + if (!g && pe->metric_name) { + if (pe->name) + continue; + g = "No_group"; + } + if (g) { + char *omg; + char *mg = strdup(g); + + if (!mg) + return; + omg = mg; + while ((g = strsep(&mg, ";")) != NULL) { + struct mep *me; + char *s; + + if (*g == 0) + g = "No_group"; + while (isspace(*g)) + g++; + if (filter && !strstr(g, filter)) + continue; + if (raw) + s = (char *)pe->metric_name; + else { + if (asprintf(&s, "%s\n\t[%s]", + pe->metric_name, pe->desc) < 0) + return; + } + + if (!s) + continue; + + if (!metricgroups) { + strlist__add(metriclist, s); + } else { + me = mep_lookup(&groups, g); + if (!me) + continue; + strlist__add(me->metrics, s); + } + } + free(omg); + } + } + + if (metricgroups && !raw) + printf("\nMetric Groups:\n\n"); + else if (metrics && !raw) + printf("\nMetrics:\n\n"); + + for (node = rb_first(&groups.entries); node; node = next) { + struct mep *me = container_of(node, struct mep, nd); + + if (metricgroups) + printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n"); + if (metrics) + metricgroup__print_strlist(me->metrics, raw); + next = rb_next(node); + rblist__remove_node(&groups, node); + } + if (!metricgroups) + metricgroup__print_strlist(metriclist, raw); + strlist__delete(metriclist); +} + +static int metricgroup__add_metric(const char *metric, struct strbuf *events, + struct list_head *group_list) +{ + struct pmu_events_map *map = perf_pmu__find_map(NULL); + struct pmu_event *pe; + int ret = -EINVAL; + int i, j; + + if (!map) + return 0; + + for (i = 0; ; i++) { + pe = &map->table[i]; + + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + if (match_metric(pe->metric_group, metric) || + match_metric(pe->metric_name, metric)) { + const char **ids; + int idnum; + struct egroup *eg; + + pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); + + if (expr__find_other(pe->metric_expr, + NULL, &ids, &idnum) < 0) + continue; + if (events->len > 0) + strbuf_addf(events, ","); + for (j = 0; j < idnum; j++) { + pr_debug("found event %s\n", ids[j]); + strbuf_addf(events, "%s%s", + j == 0 ? "{" : ",", + ids[j]); + } + strbuf_addf(events, "}:W"); + + eg = malloc(sizeof(struct egroup)); + if (!eg) { + ret = -ENOMEM; + break; + } + eg->ids = ids; + eg->idnum = idnum; + eg->metric_name = pe->metric_name; + eg->metric_expr = pe->metric_expr; + list_add_tail(&eg->nd, group_list); + ret = 0; + } + } + return ret; +} + +static int metricgroup__add_metric_list(const char *list, struct strbuf *events, + struct list_head *group_list) +{ + char *llist, *nlist, *p; + int ret = -EINVAL; + + nlist = strdup(list); + if (!nlist) + return -ENOMEM; + llist = nlist; + + strbuf_init(events, 100); + strbuf_addf(events, "%s", ""); + + while ((p = strsep(&llist, ",")) != NULL) { + ret = metricgroup__add_metric(p, events, group_list); + if (ret == -EINVAL) { + fprintf(stderr, "Cannot find metric or group `%s'\n", + p); + break; + } + } + free(nlist); + return ret; +} + +static void metricgroup__free_egroups(struct list_head *group_list) +{ + struct egroup *eg, *egtmp; + int i; + + list_for_each_entry_safe (eg, egtmp, group_list, nd) { + for (i = 0; i < eg->idnum; i++) + free((char *)eg->ids[i]); + free(eg->ids); + free(eg); + } +} + +int metricgroup__parse_groups(const struct option *opt, + const char *str, + struct rblist *metric_events) +{ + struct parse_events_error parse_error; + struct perf_evlist *perf_evlist = *(struct perf_evlist **)opt->value; + struct strbuf extra_events; + LIST_HEAD(group_list); + int ret; + + if (metric_events->nr_entries == 0) + metricgroup__rblist_init(metric_events); + ret = metricgroup__add_metric_list(str, &extra_events, &group_list); + if (ret) + return ret; + pr_debug("adding %s\n", extra_events.buf); + memset(&parse_error, 0, sizeof(struct parse_events_error)); + ret = parse_events(perf_evlist, extra_events.buf, &parse_error); + if (ret) { + parse_events_print_error(&parse_error, extra_events.buf); + goto out; + } + strbuf_release(&extra_events); + ret = metricgroup__setup_events(&group_list, perf_evlist, + metric_events); +out: + metricgroup__free_egroups(&group_list); + return ret; +} diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h new file mode 100644 index 000000000000..06854e125ee7 --- /dev/null +++ b/tools/perf/util/metricgroup.h @@ -0,0 +1,31 @@ +#ifndef METRICGROUP_H +#define METRICGROUP_H 1 + +#include "linux/list.h" +#include "rblist.h" +#include <subcmd/parse-options.h> +#include "evlist.h" +#include "strbuf.h" + +struct metric_event { + struct rb_node nd; + struct perf_evsel *evsel; + struct list_head head; /* list of metric_expr */ +}; + +struct metric_expr { + struct list_head nd; + const char *metric_expr; + const char *metric_name; + struct perf_evsel **metric_events; +}; + +struct metric_event *metricgroup__lookup(struct rblist *metric_events, + struct perf_evsel *evsel, + bool create); +int metricgroup__parse_groups(const struct option *opt, + const char *str, + struct rblist *metric_events); + +void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); +#endif diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c new file mode 100644 index 000000000000..05076e683938 --- /dev/null +++ b/tools/perf/util/mmap.c @@ -0,0 +1,329 @@ +/* + * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further + * copyright notes. + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include <sys/mman.h> +#include <inttypes.h> +#include <asm/bug.h> +#include "debug.h" +#include "event.h" +#include "mmap.h" +#include "util.h" /* page_size */ + +size_t perf_mmap__mmap_len(struct perf_mmap *map) +{ + return map->mask + 1 + page_size; +} + +/* When check_messup is true, 'end' must points to a good entry */ +static union perf_event *perf_mmap__read(struct perf_mmap *map, + u64 start, u64 end, u64 *prev) +{ + unsigned char *data = map->base + page_size; + union perf_event *event = NULL; + int diff = end - start; + + if (diff >= (int)sizeof(event->header)) { + size_t size; + + event = (union perf_event *)&data[start & map->mask]; + size = event->header.size; + + if (size < sizeof(event->header) || diff < (int)size) { + event = NULL; + goto broken_event; + } + + /* + * Event straddles the mmap boundary -- header should always + * be inside due to u64 alignment of output. + */ + if ((start & map->mask) + size != ((start + size) & map->mask)) { + unsigned int offset = start; + unsigned int len = min(sizeof(*event), size), cpy; + void *dst = map->event_copy; + + do { + cpy = min(map->mask + 1 - (offset & map->mask), len); + memcpy(dst, &data[offset & map->mask], cpy); + offset += cpy; + dst += cpy; + len -= cpy; + } while (len); + + event = (union perf_event *)map->event_copy; + } + + start += size; + } + +broken_event: + if (prev) + *prev = start; + + return event; +} + +union perf_event *perf_mmap__read_forward(struct perf_mmap *map) +{ + u64 head; + u64 old = map->prev; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return NULL; + + head = perf_mmap__read_head(map); + + return perf_mmap__read(map, old, head, &map->prev); +} + +union perf_event *perf_mmap__read_backward(struct perf_mmap *map) +{ + u64 head, end; + u64 start = map->prev; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return NULL; + + head = perf_mmap__read_head(map); + if (!head) + return NULL; + + /* + * 'head' pointer starts from 0. Kernel minus sizeof(record) form + * it each time when kernel writes to it, so in fact 'head' is + * negative. 'end' pointer is made manually by adding the size of + * the ring buffer to 'head' pointer, means the validate data can + * read is the whole ring buffer. If 'end' is positive, the ring + * buffer has not fully filled, so we must adjust 'end' to 0. + * + * However, since both 'head' and 'end' is unsigned, we can't + * simply compare 'end' against 0. Here we compare '-head' and + * the size of the ring buffer, where -head is the number of bytes + * kernel write to the ring buffer. + */ + if (-head < (u64)(map->mask + 1)) + end = 0; + else + end = head + map->mask + 1; + + return perf_mmap__read(map, start, end, &map->prev); +} + +void perf_mmap__read_catchup(struct perf_mmap *map) +{ + u64 head; + + if (!refcount_read(&map->refcnt)) + return; + + head = perf_mmap__read_head(map); + map->prev = head; +} + +static bool perf_mmap__empty(struct perf_mmap *map) +{ + return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base; +} + +void perf_mmap__get(struct perf_mmap *map) +{ + refcount_inc(&map->refcnt); +} + +void perf_mmap__put(struct perf_mmap *map) +{ + BUG_ON(map->base && refcount_read(&map->refcnt) == 0); + + if (refcount_dec_and_test(&map->refcnt)) + perf_mmap__munmap(map); +} + +void perf_mmap__consume(struct perf_mmap *map, bool overwrite) +{ + if (!overwrite) { + u64 old = map->prev; + + perf_mmap__write_tail(map, old); + } + + if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) + perf_mmap__put(map); +} + +int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, + struct auxtrace_mmap_params *mp __maybe_unused, + void *userpg __maybe_unused, + int fd __maybe_unused) +{ + return 0; +} + +void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) +{ +} + +void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused, + off_t auxtrace_offset __maybe_unused, + unsigned int auxtrace_pages __maybe_unused, + bool auxtrace_overwrite __maybe_unused) +{ +} + +void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int idx __maybe_unused, + bool per_cpu __maybe_unused) +{ +} + +void perf_mmap__munmap(struct perf_mmap *map) +{ + if (map->base != NULL) { + munmap(map->base, perf_mmap__mmap_len(map)); + map->base = NULL; + map->fd = -1; + refcount_set(&map->refcnt, 0); + } + auxtrace_mmap__munmap(&map->auxtrace_mmap); +} + +int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) +{ + /* + * The last one will be done at perf_evlist__mmap_consume(), so that we + * make sure we don't prevent tools from consuming every last event in + * the ring buffer. + * + * I.e. we can get the POLLHUP meaning that the fd doesn't exist + * anymore, but the last events for it are still in the ring buffer, + * waiting to be consumed. + * + * Tools can chose to ignore this at their own discretion, but the + * evlist layer can't just drop it when filtering events in + * perf_evlist__filter_pollfd(). + */ + refcount_set(&map->refcnt, 2); + map->prev = 0; + map->mask = mp->mask; + map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + MAP_SHARED, fd, 0); + if (map->base == MAP_FAILED) { + pr_debug2("failed to mmap perf event ring buffer, error %d\n", + errno); + map->base = NULL; + return -1; + } + map->fd = fd; + + if (auxtrace_mmap__mmap(&map->auxtrace_mmap, + &mp->auxtrace_mp, map->base, fd)) + return -1; + + return 0; +} + +static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +{ + struct perf_event_header *pheader; + u64 evt_head = head; + int size = mask + 1; + + pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); + pheader = (struct perf_event_header *)(buf + (head & mask)); + *start = head; + while (true) { + if (evt_head - head >= (unsigned int)size) { + pr_debug("Finished reading overwrite ring buffer: rewind\n"); + if (evt_head - head > (unsigned int)size) + evt_head -= pheader->size; + *end = evt_head; + return 0; + } + + pheader = (struct perf_event_header *)(buf + (evt_head & mask)); + + if (pheader->size == 0) { + pr_debug("Finished reading overwrite ring buffer: get start\n"); + *end = evt_head; + return 0; + } + + evt_head += pheader->size; + pr_debug3("move evt_head: %"PRIx64"\n", evt_head); + } + WARN_ONCE(1, "Shouldn't get here\n"); + return -1; +} + +int perf_mmap__push(struct perf_mmap *md, bool overwrite, + void *to, int push(void *to, void *buf, size_t size)) +{ + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; + u64 end = head, start = old; + unsigned char *data = md->base + page_size; + unsigned long size; + void *buf; + int rc = 0; + + start = overwrite ? head : old; + end = overwrite ? old : head; + + if (start == end) + return 0; + + size = end - start; + if (size > (unsigned long)(md->mask) + 1) { + if (!overwrite) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + + md->prev = head; + perf_mmap__consume(md, overwrite); + return 0; + } + + /* + * Backward ring buffer is full. We still have a chance to read + * most of data from it. + */ + if (overwrite_rb_find_range(data, md->mask, head, &start, &end)) + return -1; + } + + if ((start & md->mask) + size != (end & md->mask)) { + buf = &data[start & md->mask]; + size = md->mask + 1 - (start & md->mask); + start += size; + + if (push(to, buf, size) < 0) { + rc = -1; + goto out; + } + } + + buf = &data[start & md->mask]; + size = end - start; + start += size; + + if (push(to, buf, size) < 0) { + rc = -1; + goto out; + } + + md->prev = head; + perf_mmap__consume(md, overwrite); +out: + return rc; +} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h new file mode 100644 index 000000000000..e43d7b55a55f --- /dev/null +++ b/tools/perf/util/mmap.h @@ -0,0 +1,97 @@ +#ifndef __PERF_MMAP_H +#define __PERF_MMAP_H 1 + +#include <linux/compiler.h> +#include <linux/refcount.h> +#include <linux/types.h> +#include <asm/barrier.h> +#include <stdbool.h> +#include "auxtrace.h" +#include "event.h" + +/** + * struct perf_mmap - perf's ring buffer mmap details + * + * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this + */ +struct perf_mmap { + void *base; + int mask; + int fd; + refcount_t refcnt; + u64 prev; + struct auxtrace_mmap auxtrace_mmap; + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); +}; + +/* + * State machine of bkw_mmap_state: + * + * .________________(forbid)_____________. + * | V + * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY + * ^ ^ | ^ | + * | |__(forbid)____/ |___(forbid)___/| + * | | + * \_________________(3)_______________/ + * + * NOTREADY : Backward ring buffers are not ready + * RUNNING : Backward ring buffers are recording + * DATA_PENDING : We are required to collect data from backward ring buffers + * EMPTY : We have collected data from backward ring buffers. + * + * (0): Setup backward ring buffer + * (1): Pause ring buffers for reading + * (2): Read from ring buffers + * (3): Resume ring buffers for recording + */ +enum bkw_mmap_state { + BKW_MMAP_NOTREADY, + BKW_MMAP_RUNNING, + BKW_MMAP_DATA_PENDING, + BKW_MMAP_EMPTY, +}; + +struct mmap_params { + int prot, mask; + struct auxtrace_mmap_params auxtrace_mp; +}; + +int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd); +void perf_mmap__munmap(struct perf_mmap *map); + +void perf_mmap__get(struct perf_mmap *map); +void perf_mmap__put(struct perf_mmap *map); + +void perf_mmap__consume(struct perf_mmap *map, bool overwrite); + +void perf_mmap__read_catchup(struct perf_mmap *md); + +static inline u64 perf_mmap__read_head(struct perf_mmap *mm) +{ + struct perf_event_mmap_page *pc = mm->base; + u64 head = READ_ONCE(pc->data_head); + rmb(); + return head; +} + +static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) +{ + struct perf_event_mmap_page *pc = md->base; + + /* + * ensure all reads are done before we write the tail out. + */ + mb(); + pc->data_tail = tail; +} + +union perf_event *perf_mmap__read_forward(struct perf_mmap *map); +union perf_event *perf_mmap__read_backward(struct perf_mmap *map); + +int perf_mmap__push(struct perf_mmap *md, bool backward, + void *to, int push(void *to, void *buf, size_t size)); + +size_t perf_mmap__mmap_len(struct perf_mmap *map); + +#endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index a58e91197729..5be021701f34 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -11,6 +11,7 @@ #include "event.h" #include <sys/types.h> #include <sys/stat.h> +#include <fcntl.h> #include <limits.h> #include <sched.h> #include <stdlib.h> diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 05d82601c9a6..760558dcfd18 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -9,9 +9,10 @@ #ifndef __PERF_NAMESPACES_H #define __PERF_NAMESPACES_H -#include "../perf.h" -#include <linux/list.h> +#include <sys/types.h> +#include <linux/perf_event.h> #include <linux/refcount.h> +#include <linux/types.h> struct namespaces_event; diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 4de398cfb577..bad9e0296e9a 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <inttypes.h> #include <linux/list.h> @@ -156,9 +157,8 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve } int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - u64 timestamp = sample->time; struct ordered_event *oevent; if (!timestamp || timestamp == ~0ULL) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index e11468a9a6e4..8c7a2948593e 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ORDERED_EVENTS_H #define __ORDERED_EVENTS_H @@ -44,7 +45,7 @@ struct ordered_events { }; int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index e71fb5f31e84..bd779d9f4d1e 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "perf.h" #include "util/util.h" #include "util/debug.h" diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h index 6086fd90eb23..11d1722733f2 100644 --- a/tools/perf/util/parse-branch-options.h +++ b/tools/perf/util/parse-branch-options.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_PARSE_BRANCH_OPTIONS_H #define _PERF_PARSE_BRANCH_OPTIONS_H 1 #include <stdint.h> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f6257fb4f08c..34589c427e52 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,8 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/hw_breakpoint.h> #include <linux/err.h> #include <dirent.h> #include <errno.h> #include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> #include <sys/param.h> #include "term.h" #include "../perf.h" @@ -28,6 +32,7 @@ #include "probe-file.h" #include "asm/bug.h" #include "util/parse-branch-options.h" +#include "metricgroup.h" #define MAX_NAME_LEN 100 @@ -309,10 +314,11 @@ static char *get_config_name(struct list_head *head_terms) static struct perf_evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct cpu_map *cpus, + char *name, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats) { struct perf_evsel *evsel; + struct cpu_map *cpus = pmu ? pmu->cpus : NULL; event_attr_init(attr); @@ -323,7 +329,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->cpus = cpu_map__get(cpus); evsel->own_cpus = cpu_map__get(cpus); - evsel->system_wide = !!cpus; + evsel->system_wide = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) @@ -1113,6 +1119,7 @@ do { \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ __t->val.__name = __val; \ + __t->weak = term->weak; \ list_add_tail(&__t->list, head_terms); \ } while (0) @@ -1218,11 +1225,17 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, struct perf_pmu_info info; struct perf_pmu *pmu; struct perf_evsel *evsel; + struct parse_events_error *err = parse_state->error; LIST_HEAD(config_terms); pmu = perf_pmu__find(name); - if (!pmu) + if (!pmu) { + if (asprintf(&err->str, + "Cannot find PMU `%s'. Missing kernel support?", + name) < 0) + err->str = NULL; return -EINVAL; + } if (pmu->default_config) { memcpy(&attr, pmu->default_config, @@ -1233,7 +1246,7 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats); + evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); return evsel ? 0 : -ENOMEM; } @@ -1254,7 +1267,7 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; evsel = __add_event(list, &parse_state->idx, &attr, - get_config_name(head_config), pmu->cpus, + get_config_name(head_config), pmu, &config_terms, auto_merge_stats); if (evsel) { evsel->unit = info.unit; @@ -1366,6 +1379,7 @@ struct event_modifier { int exclude_GH; int sample_read; int pinned; + int weak; }; static int get_event_modifier(struct event_modifier *mod, char *str, @@ -1384,6 +1398,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int exclude = eu | ek | eh; int exclude_GH = evsel ? evsel->exclude_GH : 0; + int weak = 0; memset(mod, 0, sizeof(*mod)); @@ -1421,6 +1436,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, sample_read = 1; } else if (*str == 'D') { pinned = 1; + } else if (*str == 'W') { + weak = 1; } else break; @@ -1451,6 +1468,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->exclude_GH = exclude_GH; mod->sample_read = sample_read; mod->pinned = pinned; + mod->weak = weak; return 0; } @@ -1464,7 +1482,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppPSDI") - 1)) + if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1)) return -1; while (*p) { @@ -1504,6 +1522,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->exclude_GH = mod.exclude_GH; evsel->sample_read = mod.sample_read; evsel->precise_max = mod.precise_max; + evsel->weak_group = mod.weak; if (perf_evsel__is_group_leader(evsel)) evsel->attr.pinned = mod.pinned; @@ -1726,8 +1745,8 @@ static int get_term_width(void) return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col; } -static void parse_events_print_error(struct parse_events_error *err, - const char *event) +void parse_events_print_error(struct parse_events_error *err, + const char *event) { const char *str = "invalid or unsupported event: "; char _buf[MAX_WIDTH]; @@ -1782,8 +1801,6 @@ static void parse_events_print_error(struct parse_events_error *err, zfree(&err->str); zfree(&err->help); } - - fprintf(stderr, "Run 'perf list' for a list of valid events\n"); } #undef MAX_WIDTH @@ -1795,8 +1812,10 @@ int parse_events_option(const struct option *opt, const char *str, struct parse_events_error err = { .idx = 0, }; int ret = parse_events(evlist, str, &err); - if (ret) + if (ret) { parse_events_print_error(&err, str); + fprintf(stderr, "Run 'perf list' for a list of valid events\n"); + } return ret; } @@ -2374,6 +2393,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_tracepoint_events(NULL, NULL, name_only); print_sdt_events(NULL, NULL, name_only); + + metricgroup__print(true, true, NULL, name_only); } int parse_events__is_hardcoded_term(struct parse_events_term *term) @@ -2393,6 +2414,7 @@ static int new_term(struct parse_events_term **_term, *term = *temp; INIT_LIST_HEAD(&term->list); + term->weak = false; switch (term->type_val) { case PARSE_EVENTS__TERM_TYPE_NUM: diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 635135125111..88108cd11b4c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_PARSE_EVENTS_H #define __PERF_PARSE_EVENTS_H /* @@ -100,6 +101,9 @@ struct parse_events_term { /* error string indexes for within parsed string */ int err_term; int err_val; + + /* Coming from implicit alias */ + bool weak; }; struct parse_events_error { @@ -202,6 +206,9 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); +void parse_events_print_error(struct parse_events_error *err, + const char *event); + #ifdef HAVE_LIBELF_SUPPORT /* * If the probe point starts with '%', diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index c42edeac451f..655ecff636a8 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -5,9 +5,13 @@ %option stack %option bison-locations %option yylineno +%option reject %{ #include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> #include "../perf.h" #include "parse-events.h" #include "parse-events-bison.h" @@ -53,9 +57,8 @@ static int str(yyscan_t scanner, int token) return token; } -static bool isbpf(yyscan_t scanner) +static bool isbpf_suffix(char *text) { - char *text = parse_events_get_text(scanner); int len = strlen(text); if (len < 2) @@ -68,6 +71,17 @@ static bool isbpf(yyscan_t scanner) return false; } +static bool isbpf(yyscan_t scanner) +{ + char *text = parse_events_get_text(scanner); + struct stat st; + + if (!isbpf_suffix(text)) + return false; + + return stat(text, &st) == 0; +} + /* * This function is called when the parser gets two kind of input: * @@ -141,6 +155,10 @@ do { \ yycolumn += yyleng; \ } while (0); +#define USER_REJECT \ + yycolumn -= yyleng; \ + REJECT + %} %x mem @@ -161,7 +179,7 @@ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpPGHSDI]+ +modifier_event [ukhpPGHSDIW]+ modifier_bp [rwx]{1,3} %% @@ -288,6 +306,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } +duration_time { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } /* @@ -322,8 +341,8 @@ r{num_raw_hex} { return raw(yyscanner); } {num_hex} { return value(yyscanner, 16); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } -{bpf_object} { if (!isbpf(yyscanner)) REJECT; return str(yyscanner, PE_BPF_OBJECT); } -{bpf_source} { if (!isbpf(yyscanner)) REJECT; return str(yyscanner, PE_BPF_SOURCE); } +{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } +{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } {name} { return pmu_str_check(yyscanner); } "/" { BEGIN(config); return '/'; } - { return '-'; } diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index 646ecf736aad..e6599e290f46 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "perf.h" #include "util/util.h" #include "util/debug.h" diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h index 7d762b188007..cdefb1acf6be 100644 --- a/tools/perf/util/parse-regs-options.h +++ b/tools/perf/util/parse-regs-options.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_PARSE_REGS_OPTIONS_H #define _PERF_PARSE_REGS_OPTIONS_H 1 struct option; diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 50ec3bc87a60..ca56ba2dd3da 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * I'm tired of doing "vsnprintf()" etc just to open a * file, so here's a "return static buffer with printf" @@ -17,6 +18,7 @@ #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> +#include <dirent.h> #include <unistd.h> static char bad_path[] = "/bad-path/"; @@ -76,3 +78,16 @@ bool is_regular_file(const char *file) return S_ISREG(st.st_mode); } + +/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ +bool is_directory(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return false; + + return S_ISDIR(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 9a276a58e3c2..f014f905df50 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -1,9 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_PATH_H #define _PERF_PATH_H +struct dirent; + int path__join(char *bf, size_t size, const char *path1, const char *path2); int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3); bool is_regular_file(const char *file); +bool is_directory(const char *base_path, const struct dirent *dent); #endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c index d55092964da2..4f3aa8d99ef4 100644 --- a/tools/perf/util/perf-hooks.c +++ b/tools/perf/util/perf-hooks.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * perf_hooks.c * diff --git a/tools/perf/util/perf-hooks.h b/tools/perf/util/perf-hooks.h index 838d5797bc1e..27fbec62d4fe 100644 --- a/tools/perf/util/perf-hooks.h +++ b/tools/perf/util/perf-hooks.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_UTIL_PERF_HOOKS_H #define PERF_UTIL_PERF_HOOKS_H diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index b2ae039eff85..2acfcc527cac 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include "perf_regs.h" #include "event.h" diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 32b37d19dcc3..c9319f8d17a6 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_REGS_H #define __PERF_REGS_H diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ac16a9db1fb5..57e38fdf0b34 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,7 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/list.h> #include <linux/compiler.h> #include <sys/types.h> #include <errno.h> +#include <fcntl.h> #include <sys/stat.h> #include <unistd.h> #include <stdio.h> @@ -10,6 +12,7 @@ #include <dirent.h> #include <api/fs/fs.h> #include <locale.h> +#include <regex.h> #include "util.h" #include "pmu.h" #include "parse-events.h" @@ -403,6 +406,11 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, parse_events_terms__purge(&list); return ret; } + /* + * Weak terms don't override command line options, + * which we don't want for implicit terms in aliases. + */ + cloned->weak = true; list_add_tail(&cloned->list, &list); } list_splice(&list, terms); @@ -470,17 +478,36 @@ static void pmu_read_sysfs(void) closedir(dir); } +static struct cpu_map *__pmu_cpumask(const char *path) +{ + FILE *file; + struct cpu_map *cpus; + + file = fopen(path, "r"); + if (!file) + return NULL; + + cpus = cpu_map__read(file); + fclose(file); + return cpus; +} + +/* + * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) + * may have a "cpus" file. + */ +#define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" +#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" + static struct cpu_map *pmu_cpumask(const char *name) { - struct stat st; char path[PATH_MAX]; - FILE *file; struct cpu_map *cpus; const char *sysfs = sysfs__mountpoint(); const char *templates[] = { - "%s/bus/event_source/devices/%s/cpumask", - "%s/bus/event_source/devices/%s/cpus", - NULL + CPUS_TEMPLATE_UNCORE, + CPUS_TEMPLATE_CPU, + NULL }; const char **template; @@ -489,20 +516,53 @@ static struct cpu_map *pmu_cpumask(const char *name) for (template = templates; *template; template++) { snprintf(path, PATH_MAX, *template, sysfs, name); - if (stat(path, &st) == 0) - break; + cpus = __pmu_cpumask(path); + if (cpus) + return cpus; } - if (!*template) - return NULL; + return NULL; +} - file = fopen(path, "r"); - if (!file) - return NULL; +static bool pmu_is_uncore(const char *name) +{ + char path[PATH_MAX]; + struct cpu_map *cpus; + const char *sysfs = sysfs__mountpoint(); - cpus = cpu_map__read(file); - fclose(file); - return cpus; + snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); + cpus = __pmu_cpumask(path); + cpu_map__put(cpus); + + return !!cpus; +} + +/* + * PMU CORE devices have different name other than cpu in sysfs on some + * platforms. looking for possible sysfs files to identify as core device. + */ +static int is_pmu_core(const char *name) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + /* Look for cpu sysfs (x86 and others) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs); + if ((stat(path, &st) == 0) && + (strncmp(name, "cpu", strlen("cpu")) == 0)) + return 1; + + /* Look for cpu sysfs (specific to arm) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", + sysfs, name); + if (stat(path, &st) == 0) + return 1; + + return 0; } /* @@ -511,21 +571,13 @@ static struct cpu_map *pmu_cpumask(const char *name) * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(void) +char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { return NULL; } -/* - * From the pmu_events_map, find the table of PMU events that corresponds - * to the current running CPU. Then, add all PMU events from that table - * as aliases. - */ -static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +static char *perf_pmu__getcpuid(struct perf_pmu *pmu) { - int i; - struct pmu_events_map *map; - struct pmu_event *pe; char *cpuid; static bool printed; @@ -533,39 +585,97 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) if (cpuid) cpuid = strdup(cpuid); if (!cpuid) - cpuid = get_cpuid_str(); + cpuid = get_cpuid_str(pmu); if (!cpuid) - return; + return NULL; if (!printed) { pr_debug("Using CPUID %s\n", cpuid); printed = true; } + return cpuid; +} + +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) +{ + struct pmu_events_map *map; + char *cpuid = perf_pmu__getcpuid(pmu); + int i; + + /* on some platforms which uses cpus map, cpuid can be NULL for + * PMUs other than CORE PMUs. + */ + if (!cpuid) + return NULL; i = 0; - while (1) { + for (;;) { + regex_t re; + regmatch_t pmatch[1]; + int match; + map = &pmu_events_map[i++]; - if (!map->table) - goto out; + if (!map->table) { + map = NULL; + break; + } - if (!strcmp(map->cpuid, cpuid)) + if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", map->cpuid); break; + } + + match = !regexec(&re, cpuid, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + + /* Verify the entire string matched. */ + if (match_len == strlen(cpuid)) + break; + } } + free(cpuid); + return map; +} + +/* + * From the pmu_events_map, find the table of PMU events that corresponds + * to the current running CPU. Then, add all PMU events from that table + * as aliases. + */ +static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) +{ + int i; + struct pmu_events_map *map; + struct pmu_event *pe; + const char *name = pmu->name; + + map = perf_pmu__find_map(pmu); + if (!map) + return; /* * Found a matching PMU events table. Create aliases */ i = 0; while (1) { - const char *pname; pe = &map->table[i++]; - if (!pe->name) + if (!pe->name) { + if (pe->metric_group || pe->metric_name) + continue; break; + } - pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) - continue; + if (!is_pmu_core(name)) { + /* check for uncore devices */ + if (pe->pmu == NULL) + continue; + if (strncmp(pe->pmu, name, strlen(pe->pmu))) + continue; + } /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, @@ -575,9 +685,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) (char *)pe->metric_expr, (char *)pe->metric_name); } - -out: - free(cpuid); } struct perf_event_attr * __weak @@ -610,19 +717,20 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, &aliases)) return NULL; - pmu_add_cpu_aliases(&aliases, name); pmu = zalloc(sizeof(*pmu)); if (!pmu) return NULL; pmu->cpus = pmu_cpumask(name); + pmu->name = strdup(name); + pmu->type = type; + pmu->is_uncore = pmu_is_uncore(name); + pmu_add_cpu_aliases(&aliases, pmu); INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); - pmu->name = strdup(name); - pmu->type = type; list_add_tail(&pmu->list, &pmus); pmu->default_config = perf_pmu__get_default_config(pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 389e9729331f..76fecec7b3f9 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PMU_H #define __PMU_H @@ -22,6 +23,7 @@ struct perf_pmu { char *name; __u32 type; bool selectable; + bool is_uncore; struct perf_event_attr *default_config; struct cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ @@ -90,4 +92,6 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); + #endif /* __PMU_H */ diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c index e908177b9976..23e367063446 100644 --- a/tools/perf/util/print_binary.c +++ b/tools/perf/util/print_binary.c @@ -1,41 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 #include "print_binary.h" #include <linux/log2.h> #include "sane_ctype.h" -void print_binary(unsigned char *data, size_t len, - size_t bytes_per_line, print_binary_t printer, - void *extra) +int binary__fprintf(unsigned char *data, size_t len, + size_t bytes_per_line, binary__fprintf_t printer, + void *extra, FILE *fp) { size_t i, j, mask; + int printed = 0; if (!printer) - return; + return 0; bytes_per_line = roundup_pow_of_two(bytes_per_line); mask = bytes_per_line - 1; - printer(BINARY_PRINT_DATA_BEGIN, 0, extra); + printed += printer(BINARY_PRINT_DATA_BEGIN, 0, extra, fp); for (i = 0; i < len; i++) { if ((i & mask) == 0) { - printer(BINARY_PRINT_LINE_BEGIN, -1, extra); - printer(BINARY_PRINT_ADDR, i, extra); + printed += printer(BINARY_PRINT_LINE_BEGIN, -1, extra, fp); + printed += printer(BINARY_PRINT_ADDR, i, extra, fp); } - printer(BINARY_PRINT_NUM_DATA, data[i], extra); + printed += printer(BINARY_PRINT_NUM_DATA, data[i], extra, fp); if (((i & mask) == mask) || i == len - 1) { for (j = 0; j < mask-(i & mask); j++) - printer(BINARY_PRINT_NUM_PAD, -1, extra); + printed += printer(BINARY_PRINT_NUM_PAD, -1, extra, fp); - printer(BINARY_PRINT_SEP, i, extra); + printer(BINARY_PRINT_SEP, i, extra, fp); for (j = i & ~mask; j <= i; j++) - printer(BINARY_PRINT_CHAR_DATA, data[j], extra); + printed += printer(BINARY_PRINT_CHAR_DATA, data[j], extra, fp); for (j = 0; j < mask-(i & mask); j++) - printer(BINARY_PRINT_CHAR_PAD, i, extra); - printer(BINARY_PRINT_LINE_END, -1, extra); + printed += printer(BINARY_PRINT_CHAR_PAD, i, extra, fp); + printed += printer(BINARY_PRINT_LINE_END, -1, extra, fp); } } - printer(BINARY_PRINT_DATA_END, -1, extra); + printed += printer(BINARY_PRINT_DATA_END, -1, extra, fp); + return printed; } int is_printable_array(char *p, unsigned int len) diff --git a/tools/perf/util/print_binary.h b/tools/perf/util/print_binary.h index da0427263d2d..2a1554afc957 100644 --- a/tools/perf/util/print_binary.h +++ b/tools/perf/util/print_binary.h @@ -1,7 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_PRINT_BINARY_H #define PERF_PRINT_BINARY_H #include <stddef.h> +#include <stdio.h> enum binary_printer_ops { BINARY_PRINT_DATA_BEGIN, @@ -16,12 +18,19 @@ enum binary_printer_ops { BINARY_PRINT_DATA_END, }; -typedef void (*print_binary_t)(enum binary_printer_ops op, - unsigned int val, void *extra); +typedef int (*binary__fprintf_t)(enum binary_printer_ops op, + unsigned int val, void *extra, FILE *fp); -void print_binary(unsigned char *data, size_t len, - size_t bytes_per_line, print_binary_t printer, - void *extra); +int binary__fprintf(unsigned char *data, size_t len, + size_t bytes_per_line, binary__fprintf_t printer, + void *extra, FILE *fp); + +static inline void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, binary__fprintf_t printer, + void *extra) +{ + binary__fprintf(data, len, bytes_per_line, printer, extra, stdout); +} int is_printable_array(char *p, unsigned int len); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b7aaf9b2294d..e1dbc9821617 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1325,27 +1325,30 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strchr(*arg, ':'); + ptr = strpbrk_esc(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup(*arg); + pev->group = strdup_esc(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - if (!pev->sdt && !is_c_func_name(*arg)) { + + pev->event = strdup_esc(*arg); + if (pev->event == NULL) + return -ENOMEM; + + if (!pev->sdt && !is_c_func_name(pev->event)) { + zfree(&pev->event); ng_name: + zfree(&pev->group); semantic_error("%s is bad for event name -it must " "follow C symbol-naming rule.\n", *arg); return -EINVAL; } - pev->event = strdup(*arg); - if (pev->event == NULL) - return -ENOMEM; - return 0; } @@ -1373,7 +1376,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk(arg, ";=@+%"); + ptr = strpbrk_esc(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1387,7 +1390,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup(ptr + 1); + pev->target = strdup_esc(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1421,13 +1424,14 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * * Otherwise, we consider arg to be a function specification. */ - if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) { + if (!strpbrk_esc(arg, "+@%")) { + ptr = strpbrk_esc(arg, ";:"); /* This is a file spec if it includes a '.' before ; or : */ - if (memchr(arg, '.', ptr - arg)) + if (ptr && memchr(arg, '.', ptr - arg)) file_spec = true; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1436,7 +1440,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup(arg); + tmp = strdup_esc(arg); if (tmp == NULL) return -ENOMEM; } @@ -1469,12 +1473,12 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg = ptr; c = nc; if (c == ';') { /* Lazy pattern must be the last part */ - pp->lazy_line = strdup(arg); + pp->lazy_line = strdup(arg); /* let leave escapes */ if (pp->lazy_line == NULL) return -ENOMEM; break; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1501,7 +1505,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup(arg); + pp->file = strdup_esc(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2573,7 +2577,8 @@ int show_perf_probe_events(struct strfilter *filter) } static int get_new_event_name(char *buf, size_t len, const char *base, - struct strlist *namelist, bool allow_suffix) + struct strlist *namelist, bool ret_event, + bool allow_suffix) { int i, ret; char *p, *nbase; @@ -2584,13 +2589,13 @@ static int get_new_event_name(char *buf, size_t len, const char *base, if (!nbase) return -ENOMEM; - /* Cut off the dot suffixes (e.g. .const, .isra)*/ - p = strchr(nbase, '.'); + /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ + p = strpbrk(nbase, ".@"); if (p && p != nbase) *p = '\0'; /* Try no suffix number */ - ret = e_snprintf(buf, len, "%s", nbase); + ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); goto out; @@ -2625,6 +2630,14 @@ static int get_new_event_name(char *buf, size_t len, const char *base, out: free(nbase); + + /* Final validation */ + if (ret >= 0 && !is_c_func_name(buf)) { + pr_warning("Internal error: \"%s\" is an invalid event name.\n", + buf); + ret = -EINVAL; + } + return ret; } @@ -2681,8 +2694,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, group = PERFPROBE_GROUP; /* Get an unused new event name */ - ret = get_new_event_name(buf, 64, event, - namelist, allow_suffix); + ret = get_new_event_name(buf, 64, event, namelist, + tev->point.retprobe, allow_suffix); if (ret < 0) return ret; @@ -2792,16 +2805,40 @@ static int find_probe_functions(struct map *map, char *name, int found = 0; struct symbol *sym; struct rb_node *tmp; + const char *norm, *ver; + char *buf = NULL; + bool cut_version = true; if (map__load(map) < 0) return 0; + /* If user gives a version, don't cut off the version from symbols */ + if (strchr(name, '@')) + cut_version = false; + map__for_each_symbol(map, sym, tmp) { - if (strglobmatch(sym->name, name)) { + norm = arch__normalize_symbol_name(sym->name); + if (!norm) + continue; + + if (cut_version) { + /* We don't care about default symbol or not */ + ver = strchr(norm, '@'); + if (ver) { + buf = strndup(norm, ver - norm); + if (!buf) + return -ENOMEM; + norm = buf; + } + } + + if (strglobmatch(norm, name)) { found++; if (syms && found < probe_conf.max_probes) syms[found - 1] = sym; } + if (buf) + zfree(&buf); } return found; @@ -2847,7 +2884,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, * same name but different addresses, this lists all the symbols. */ num_matched_functions = find_probe_functions(map, pp->function, syms); - if (num_matched_functions == 0) { + if (num_matched_functions <= 0) { pr_err("Failed to find symbol %s in %s\n", pp->function, pev->target ? : "kernel"); ret = -ENOENT; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 078681d12168..45b14f020558 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PROBE_EVENT_H #define _PROBE_EVENT_H diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index cdf8d83a484c..4ae1123c6794 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -15,6 +15,7 @@ * */ #include <errno.h> +#include <fcntl.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/uio.h> diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 2ca4163abafe..63f29b1d22c1 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PROBE_FILE_H #define __PROBE_FILE_H diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 27f061551012..16252980ff00 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PROBE_FINDER_H #define _PROBE_FINDER_H diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c index b234a6e3d0d4..797fe1ae2d2e 100644 --- a/tools/perf/util/pstack.c +++ b/tools/perf/util/pstack.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Simple pointer stack * diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h index ded7f2e36624..8729b8be061d 100644 --- a/tools/perf/util/pstack.h +++ b/tools/perf/util/pstack.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_PSTACK_ #define _PERF_PSTACK_ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index e66dc495809a..7aa0ea64544e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,8 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/memswap.c +util/mmap.c util/namespaces.c ../lib/bitmap.c ../lib/find_bit.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index c129e99114ae..b1e999bd21ef 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <Python.h> #include <structmember.h> #include <inttypes.h> @@ -863,7 +864,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, &pages, &overwrite)) return NULL; - if (perf_evlist__mmap(evlist, pages, overwrite) < 0) { + if (perf_evlist__mmap(evlist, pages) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index 1ba8920151d8..22eaa201aa27 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <stdlib.h> #include "strbuf.h" diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index 055ca45bed99..274bf26d3511 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_QUOTE_H #define __PERF_QUOTE_H diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h index 808cc45611fe..a920f702a74d 100644 --- a/tools/perf/util/rb_resort.h +++ b/tools/perf/util/rb_resort.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_RESORT_RB_H_ #define _PERF_RESORT_RB_H_ /* @@ -143,7 +144,8 @@ struct __name##_sorted *__name = __name##_sorted__new __ilist->rblist.nr_entries) /* For 'struct machine->threads' */ -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \ - DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads) +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ + DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \ + __machine->threads[hash_bucket].nr) #endif /* _PERF_RESORT_RB_H_ */ diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 0dfe27d99458..0efc3258c648 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -101,16 +101,21 @@ void rblist__init(struct rblist *rblist) return; } +void rblist__exit(struct rblist *rblist) +{ + struct rb_node *pos, *next = rb_first(&rblist->entries); + + while (next) { + pos = next; + next = rb_next(pos); + rblist__remove_node(rblist, pos); + } +} + void rblist__delete(struct rblist *rblist) { if (rblist != NULL) { - struct rb_node *pos, *next = rb_first(&rblist->entries); - - while (next) { - pos = next; - next = rb_next(pos); - rblist__remove_node(rblist, pos); - } + rblist__exit(rblist); free(rblist); } } diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index ff9913b994c2..76df15c27f5f 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_RBLIST_H #define __PERF_RBLIST_H @@ -28,6 +29,7 @@ struct rblist { }; void rblist__init(struct rblist *rblist); +void rblist__exit(struct rblist *rblist); void rblist__delete(struct rblist *rblist); int rblist__add_node(struct rblist *rblist, const void *new_entry); void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index d91bdf5a1aa4..1e97937b03a9 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "evlist.h" #include "evsel.h" #include "cpumap.h" diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c new file mode 100644 index 000000000000..5e52e7baa7b6 --- /dev/null +++ b/tools/perf/util/rwsem.c @@ -0,0 +1,32 @@ +#include "util.h" +#include "rwsem.h" + +int init_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_init(&sem->lock, NULL); +} + +int exit_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_destroy(&sem->lock); +} + +int down_read(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock); +} + +int up_read(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock); +} + +int down_write(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock); +} + +int up_write(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock); +} diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h new file mode 100644 index 000000000000..94565ad4d494 --- /dev/null +++ b/tools/perf/util/rwsem.h @@ -0,0 +1,19 @@ +#ifndef _PERF_RWSEM_H +#define _PERF_RWSEM_H + +#include <pthread.h> + +struct rw_semaphore { + pthread_rwlock_t lock; +}; + +int init_rwsem(struct rw_semaphore *sem); +int exit_rwsem(struct rw_semaphore *sem); + +int down_read(struct rw_semaphore *sem); +int up_read(struct rw_semaphore *sem); + +int down_write(struct rw_semaphore *sem); +int up_write(struct rw_semaphore *sem); + +#endif /* _PERF_RWSEM_H */ diff --git a/tools/perf/util/sane_ctype.h b/tools/perf/util/sane_ctype.h index 4308c22c22ad..c2b42ff9ff32 100644 --- a/tools/perf/util/sane_ctype.h +++ b/tools/perf/util/sane_ctype.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_SANE_CTYPE_H #define _PERF_SANE_CTYPE_H diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c7187f067d31..ea070883c593 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -43,7 +43,6 @@ #include "../db-export.h" #include "../thread-stack.h" #include "../trace-event.h" -#include "../machine.h" #include "../call-path.h" #include "thread_map.h" #include "cpumap.h" @@ -500,6 +499,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->time)); pydict_set_item_string_decref(dict_sample, "period", PyLong_FromUnsignedLongLong(sample->period)); + pydict_set_item_string_decref(dict_sample, "phys_addr", + PyLong_FromUnsignedLongLong(sample->phys_addr)); set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict, "sample", dict_sample); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a7ebd9fe8e40..c71ced7db152 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <inttypes.h> #include <linux/kernel.h> @@ -26,20 +27,19 @@ static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset); static int perf_session__open(struct perf_session *session) { - struct perf_data_file *file = session->file; + struct perf_data *data = session->data; if (perf_session__read_header(session) < 0) { pr_err("incompatible file format (rerun with -v to learn more)\n"); return -1; } - if (perf_data_file__is_pipe(file)) + if (perf_data__is_pipe(data)) return 0; if (perf_header__has_feat(&session->header, HEADER_STAT)) @@ -106,21 +106,14 @@ static void perf_session__set_comm_exec(struct perf_session *session) static int ordered_events__deliver_event(struct ordered_events *oe, struct ordered_event *event) { - struct perf_sample sample; struct perf_session *session = container_of(oe, struct perf_session, ordered_events); - int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); - - if (ret) { - pr_err("Can't parse sample, err = %d\n", ret); - return ret; - } - return perf_session__deliver_event(session, event->event, &sample, + return perf_session__deliver_event(session, event->event, session->tool, event->file_offset); } -struct perf_session *perf_session__new(struct perf_data_file *file, +struct perf_session *perf_session__new(struct perf_data *data, bool repipe, struct perf_tool *tool) { struct perf_session *session = zalloc(sizeof(*session)); @@ -134,13 +127,13 @@ struct perf_session *perf_session__new(struct perf_data_file *file, machines__init(&session->machines); ordered_events__init(&session->ordered_events, ordered_events__deliver_event); - if (file) { - if (perf_data_file__open(file)) + if (data) { + if (perf_data__open(data)) goto out_delete; - session->file = file; + session->data = data; - if (perf_data_file__is_read(file)) { + if (perf_data__is_read(data)) { if (perf_session__open(session) < 0) goto out_close; @@ -148,7 +141,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, * set session attributes that are present in perf.data * but not in pipe-mode. */ - if (!file->is_pipe) { + if (!data->is_pipe) { perf_session__set_id_hdr_size(session); perf_session__set_comm_exec(session); } @@ -157,7 +150,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, session->machines.host.env = &perf_env; } - if (!file || perf_data_file__is_write(file)) { + if (!data || perf_data__is_write(data)) { /* * In O_RDONLY mode this will be performed when reading the * kernel MMAP event, in perf_event__process_mmap(). @@ -170,7 +163,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is * processed, so perf_evlist__sample_id_all is not meaningful here. */ - if ((!file || !file->is_pipe) && tool && tool->ordering_requires_timestamps && + if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps && tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_events = false; @@ -179,7 +172,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, return session; out_close: - perf_data_file__close(file); + perf_data__close(data); out_delete: perf_session__delete(session); out: @@ -201,8 +194,8 @@ void perf_session__delete(struct perf_session *session) perf_session__delete_threads(session); perf_env__exit(&session->header.env); machines__exit(&session->machines); - if (session->file) - perf_data_file__close(session->file); + if (session->data) + perf_data__close(session->data); free(session); } @@ -290,8 +283,8 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, __maybe_unused) { dump_printf(": unhandled!\n"); - if (perf_data_file__is_pipe(session->file)) - skipn(perf_data_file__fd(session->file), event->auxtrace.size); + if (perf_data__is_pipe(session->data)) + skipn(perf_data__fd(session->data), event->auxtrace.size); return event->auxtrace.size; } @@ -374,6 +367,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->mmap2 = process_event_stub; if (tool->comm == NULL) tool->comm = process_event_stub; + if (tool->namespaces == NULL) + tool->namespaces = process_event_stub; if (tool->fork == NULL) tool->fork = process_event_stub; if (tool->exit == NULL) @@ -870,9 +865,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused, } int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - return ordered_events__queue(&s->ordered_events, event, sample, file_offset); + return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset); } static void callchain__lbr_callstack_printf(struct perf_sample *sample) @@ -1325,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines, static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { + struct perf_sample sample; int ret; - ret = auxtrace__process_event(session, event, sample, tool); + ret = perf_evlist__parse_sample(session->evlist, event, &sample); + if (ret) { + pr_err("Can't parse sample, err = %d\n", ret); + return ret; + } + + ret = auxtrace__process_event(session, event, &sample, tool); if (ret < 0) return ret; if (ret > 0) return 0; return machines__deliver_event(&session->machines, session->evlist, - event, sample, tool, file_offset); + event, &sample, tool, file_offset); } static s64 perf_session__process_user_event(struct perf_session *session, @@ -1347,10 +1348,11 @@ static s64 perf_session__process_user_event(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data_file__fd(session->file); + struct perf_sample sample = { .time = 0, }; + int fd = perf_data__fd(session->data); int err; - dump_event(session->evlist, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, &sample); /* These events are processed right away */ switch (event->header.type) { @@ -1447,10 +1449,10 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, goto out_parse_sample; } - if (perf_data_file__is_pipe(session->file)) + if (perf_data__is_pipe(session->data)) return -1; - fd = perf_data_file__fd(session->file); + fd = perf_data__fd(session->data); hdr_sz = sizeof(struct perf_event_header); if (buf_sz < hdr_sz) @@ -1492,7 +1494,6 @@ static s64 perf_session__process_event(struct perf_session *session, { struct perf_evlist *evlist = session->evlist; struct perf_tool *tool = session->tool; - struct perf_sample sample; int ret; if (session->header.needs_swap) @@ -1506,21 +1507,19 @@ static s64 perf_session__process_event(struct perf_session *session, if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, file_offset); - /* - * For all kernel events we get the sample data - */ - ret = perf_evlist__parse_sample(evlist, event, &sample); - if (ret) - return ret; - if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, &sample, file_offset); + u64 timestamp = -1ULL; + + ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + if (ret && ret != -1) + return ret; + + ret = perf_session__queue_event(session, event, timestamp, file_offset); if (ret != -ETIME) return ret; } - return perf_session__deliver_event(session, event, &sample, tool, - file_offset); + return perf_session__deliver_event(session, event, tool, file_offset); } void perf_event_header__bswap(struct perf_event_header *hdr) @@ -1685,7 +1684,7 @@ static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -1774,7 +1773,8 @@ done: err = perf_session__flush_thread_stacks(session); out_err: free(buf); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); auxtrace__free_events(session); return err; @@ -1826,7 +1826,7 @@ static int __perf_session__process_events(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data_file__fd(session->file); + int fd = perf_data__fd(session->data); u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; @@ -1847,7 +1847,7 @@ static int __perf_session__process_events(struct perf_session *session, if (data_offset + data_size < file_size) file_size = data_offset + data_size; - ui_progress__init(&prog, file_size, "Processing events..."); + ui_progress__init_size(&prog, file_size, "Processing events..."); mmap_size = MMAP_SIZE; if (mmap_size > file_size) { @@ -1930,7 +1930,8 @@ out: err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); /* * We may switching perf.data output, make ordered_events * reusable. @@ -1943,13 +1944,13 @@ out_err: int perf_session__process_events(struct perf_session *session) { - u64 size = perf_data_file__size(session->file); + u64 size = perf_data__size(session->data); int err; if (perf_session__register_idle_thread(session) < 0) return -ENOMEM; - if (!perf_data_file__is_pipe(session->file)) + if (!perf_data__is_pipe(session->data)) err = __perf_session__process_events(session, session->header.data_offset, session->header.data_size, size); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 47b5e7dbcb18..da40b4b380ca 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_SESSION_H #define __PERF_SESSION_H @@ -32,13 +33,13 @@ struct perf_session { void *one_mmap_addr; u64 one_mmap_offset; struct ordered_events ordered_events; - struct perf_data_file *file; + struct perf_data *data; struct perf_tool *tool; }; struct perf_tool; -struct perf_session *perf_session__new(struct perf_data_file *file, +struct perf_session *perf_session__new(struct perf_data *data, bool repipe, struct perf_tool *tool); void perf_session__delete(struct perf_session *session); @@ -52,7 +53,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, int perf_session__process_events(struct perf_session *session); int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); @@ -113,7 +114,7 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, extern volatile int session_done; -#define session_done() ACCESS_ONCE(session_done) +#define session_done() READ_ONCE(session_done) int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index eb3ab902a1c0..2da4d0456a03 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <inttypes.h> #include <regex.h> @@ -225,6 +226,9 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) if (sym_l == sym_r) return 0; + if (sym_l->inlined || sym_r->inlined) + return strcmp(sym_l->name, sym_r->name); + if (sym_l->start != sym_r->start) return (int64_t)(sym_r->start - sym_l->start); @@ -283,6 +287,9 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, ret += repsep_snprintf(bf + ret, size - ret, "%.*s", width - ret, sym->name); + if (sym->inlined) + ret += repsep_snprintf(bf + ret, size - ret, + " (inlined)"); } } else { size_t len = BITS_PER_LONG / 4; @@ -329,7 +336,7 @@ char *hist_entry__get_srcline(struct hist_entry *he) return SRCLINE_UNKNOWN; return get_srcline(map->dso, map__rip_2objdump(map, he->ip), - he->ms.sym, true, true); + he->ms.sym, true, true, he->ip); } static int64_t @@ -373,7 +380,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->from.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->from.al_addr); } if (!right->branch_info->srcline_from) { struct map *map = right->branch_info->from.map; @@ -384,7 +392,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->from.al_addr), right->branch_info->from.sym, - true, true); + true, true, + right->branch_info->from.al_addr); } return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); } @@ -416,7 +425,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->to.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->to.al_addr); } if (!right->branch_info->srcline_to) { struct map *map = right->branch_info->to.map; @@ -427,7 +437,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->to.al_addr), right->branch_info->to.sym, - true, true); + true, true, + right->branch_info->to.al_addr); } return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); } @@ -458,7 +469,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e) return no_srcfile; sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), - e->ms.sym, false, true, true); + e->ms.sym, false, true, true, e->ip); if (!strcmp(sf, SRCLINE_UNKNOWN)) return no_srcfile; p = strchr(sf, ':'); @@ -2876,10 +2887,10 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) tok; tok = strtok_r(NULL, ", ", &tmp)) { ret = output_field_add(list, tok); if (ret == -EINVAL) { - pr_err("Invalid --fields key: `%s'", tok); + ui__error("Invalid --fields key: `%s'", tok); break; } else if (ret == -ESRCH) { - pr_err("Unknown --fields key: `%s'", tok); + ui__error("Unknown --fields key: `%s'", tok); break; } } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index f36dc4980a6c..f5901c10a563 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_SORT_H #define __PERF_SORT_H #include "../builtin.h" @@ -129,7 +130,6 @@ struct hist_entry { }; char *srcline; char *srcfile; - struct inline_node *inline_node; struct symbol *parent; struct branch_info *branch_info; struct hists *hists; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index ed8e8d2de942..3c21fd059b64 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <inttypes.h> #include <stdio.h> #include <stdlib.h> @@ -10,7 +11,7 @@ #include "util/debug.h" #include "util/callchain.h" #include "srcline.h" - +#include "string2.h" #include "symbol.h" bool srcline_full_filename; @@ -33,28 +34,17 @@ static const char *dso__name(struct dso *dso) return dso_name; } -static int inline_list__append(char *filename, char *funcname, int line_nr, - struct inline_node *node, struct dso *dso) +static int inline_list__append(struct symbol *symbol, char *srcline, + struct inline_node *node) { struct inline_list *ilist; - char *demangled; ilist = zalloc(sizeof(*ilist)); if (ilist == NULL) return -1; - ilist->filename = filename; - ilist->line_nr = line_nr; - - if (dso != NULL) { - demangled = dso__demangle_sym(dso, 0, funcname); - if (demangled == NULL) { - ilist->funcname = funcname; - } else { - ilist->funcname = demangled; - free(funcname); - } - } + ilist->symbol = symbol; + ilist->srcline = srcline; if (callchain_param.order == ORDER_CALLEE) list_add_tail(&ilist->list, &node->val); @@ -64,6 +54,65 @@ static int inline_list__append(char *filename, char *funcname, int line_nr, return 0; } +/* basename version that takes a const input string */ +static const char *gnu_basename(const char *path) +{ + const char *base = strrchr(path, '/'); + + return base ? base + 1 : path; +} + +static char *srcline_from_fileline(const char *file, unsigned int line) +{ + char *srcline; + + if (!file) + return NULL; + + if (!srcline_full_filename) + file = gnu_basename(file); + + if (asprintf(&srcline, "%s:%u", file, line) < 0) + return NULL; + + return srcline; +} + +static struct symbol *new_inline_sym(struct dso *dso, + struct symbol *base_sym, + const char *funcname) +{ + struct symbol *inline_sym; + char *demangled = NULL; + + if (dso) { + demangled = dso__demangle_sym(dso, 0, funcname); + if (demangled) + funcname = demangled; + } + + if (base_sym && strcmp(funcname, base_sym->name) == 0) { + /* reuse the real, existing symbol */ + inline_sym = base_sym; + /* ensure that we don't alias an inlined symbol, which could + * lead to double frees in inline_node__delete + */ + assert(!base_sym->inlined); + } else { + /* create a fake symbol for the inline frame */ + inline_sym = symbol__new(base_sym ? base_sym->start : 0, + base_sym ? base_sym->end : 0, + base_sym ? base_sym->binding : 0, + funcname); + if (inline_sym) + inline_sym->inlined = 1; + } + + free(demangled); + + return inline_sym; +} + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -207,18 +256,23 @@ static void addr2line_cleanup(struct a2l_data *a2l) #define MAX_INLINE_NEST 1024 static int inline_list__append_dso_a2l(struct dso *dso, - struct inline_node *node) + struct inline_node *node, + struct symbol *sym) { struct a2l_data *a2l = dso->a2l; - char *funcname = a2l->funcname ? strdup(a2l->funcname) : NULL; - char *filename = a2l->filename ? strdup(a2l->filename) : NULL; + struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname); + char *srcline = NULL; - return inline_list__append(filename, funcname, a2l->line, node, dso); + if (a2l->filename) + srcline = srcline_from_fileline(a2l->filename, a2l->line); + + return inline_list__append(inline_sym, srcline, node); } static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line, struct dso *dso, - bool unwind_inlines, struct inline_node *node) + bool unwind_inlines, struct inline_node *node, + struct symbol *sym) { int ret = 0; struct a2l_data *a2l = dso->a2l; @@ -244,7 +298,7 @@ static int addr2line(const char *dso_name, u64 addr, if (unwind_inlines) { int cnt = 0; - if (node && inline_list__append_dso_a2l(dso, node)) + if (node && inline_list__append_dso_a2l(dso, node, sym)) return 0; while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, @@ -255,7 +309,7 @@ static int addr2line(const char *dso_name, u64 addr, a2l->filename = NULL; if (node != NULL) { - if (inline_list__append_dso_a2l(dso, node)) + if (inline_list__append_dso_a2l(dso, node, sym)) return 0; // found at least one inline frame ret = 1; @@ -287,7 +341,7 @@ void dso__free_a2l(struct dso *dso) } static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso) + struct dso *dso, struct symbol *sym) { struct inline_node *node; @@ -300,17 +354,8 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, INIT_LIST_HEAD(&node->val); node->addr = addr; - if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node)) - goto out_free_inline_node; - - if (list_empty(&node->val)) - goto out_free_inline_node; - + addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); return node; - -out_free_inline_node: - inline_node__delete(node); - return NULL; } #else /* HAVE_LIBBFD_SUPPORT */ @@ -340,7 +385,8 @@ static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, struct dso *dso __maybe_unused, bool unwind_inlines __maybe_unused, - struct inline_node *node __maybe_unused) + struct inline_node *node __maybe_unused, + struct symbol *sym __maybe_unused) { FILE *fp; char cmd[PATH_MAX]; @@ -380,16 +426,18 @@ void dso__free_a2l(struct dso *dso __maybe_unused) } static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso __maybe_unused) + struct dso *dso __maybe_unused, + struct symbol *sym) { FILE *fp; char cmd[PATH_MAX]; struct inline_node *node; char *filename = NULL; - size_t len; + char *funcname = NULL; + size_t filelen, funclen; unsigned int line_nr = 0; - scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i %016"PRIx64, + scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i -f %016"PRIx64, dso_name, addr); fp = popen(cmd, "r"); @@ -407,26 +455,34 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, INIT_LIST_HEAD(&node->val); node->addr = addr; - while (getline(&filename, &len, fp) != -1) { - if (filename_split(filename, &line_nr) != 1) { - free(filename); + /* addr2line -f generates two lines for each inlined functions */ + while (getline(&funcname, &funclen, fp) != -1) { + char *srcline; + struct symbol *inline_sym; + + rtrim(funcname); + + if (getline(&filename, &filelen, fp) == -1) goto out; - } - if (inline_list__append(filename, NULL, line_nr, node, - NULL) != 0) + if (filename_split(filename, &line_nr) != 1) goto out; - filename = NULL; + srcline = srcline_from_fileline(filename, line_nr); + inline_sym = new_inline_sym(dso, sym, funcname); + + if (inline_list__append(inline_sym, srcline, node) != 0) { + free(srcline); + if (inline_sym && inline_sym->inlined) + symbol__delete(inline_sym); + goto out; + } } out: pclose(fp); - - if (list_empty(&node->val)) { - inline_node__delete(node); - return NULL; - } + free(filename); + free(funcname); return node; } @@ -440,7 +496,8 @@ out: #define A2L_FAIL_LIMIT 123 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines) + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip) { char *file = NULL; unsigned line = 0; @@ -454,19 +511,18 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (dso_name == NULL) goto out; - if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines, NULL)) + if (!addr2line(dso_name, addr, &file, &line, dso, + unwind_inlines, NULL, sym)) goto out; - if (asprintf(&srcline, "%s:%u", - srcline_full_filename ? file : basename(file), - line) < 0) { - free(file); + srcline = srcline_from_fileline(file, line); + free(file); + + if (!srcline) goto out; - } dso->a2l_fails = 0; - free(file); return srcline; out: @@ -481,7 +537,7 @@ out: if (sym) { if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", - addr - sym->start) < 0) + ip - sym->start) < 0) return SRCLINE_UNKNOWN; } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0) return SRCLINE_UNKNOWN; @@ -495,12 +551,79 @@ void free_srcline(char *srcline) } char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr) + bool show_sym, bool show_addr, u64 ip) +{ + return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip); +} + +struct srcline_node { + u64 addr; + char *srcline; + struct rb_node rb_node; +}; + +void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline) +{ + struct rb_node **p = &tree->rb_node; + struct rb_node *parent = NULL; + struct srcline_node *i, *node; + + node = zalloc(sizeof(struct srcline_node)); + if (!node) { + perror("not enough memory for the srcline node"); + return; + } + + node->addr = addr; + node->srcline = srcline; + + while (*p != NULL) { + parent = *p; + i = rb_entry(parent, struct srcline_node, rb_node); + if (addr < i->addr) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&node->rb_node, parent, p); + rb_insert_color(&node->rb_node, tree); +} + +char *srcline__tree_find(struct rb_root *tree, u64 addr) { - return __get_srcline(dso, addr, sym, show_sym, show_addr, false); + struct rb_node *n = tree->rb_node; + + while (n) { + struct srcline_node *i = rb_entry(n, struct srcline_node, + rb_node); + + if (addr < i->addr) + n = n->rb_left; + else if (addr > i->addr) + n = n->rb_right; + else + return i->srcline; + } + + return NULL; } -struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr) +void srcline__tree_delete(struct rb_root *tree) +{ + struct srcline_node *pos; + struct rb_node *next = rb_first(tree); + + while (next) { + pos = rb_entry(next, struct srcline_node, rb_node); + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, tree); + free_srcline(pos->srcline); + zfree(&pos); + } +} + +struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, + struct symbol *sym) { const char *dso_name; @@ -508,7 +631,7 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr) if (dso_name == NULL) return NULL; - return addr2inlines(dso_name, addr, dso); + return addr2inlines(dso_name, addr, dso, sym); } void inline_node__delete(struct inline_node *node) @@ -517,10 +640,63 @@ void inline_node__delete(struct inline_node *node) list_for_each_entry_safe(ilist, tmp, &node->val, list) { list_del_init(&ilist->list); - zfree(&ilist->filename); - zfree(&ilist->funcname); + free_srcline(ilist->srcline); + /* only the inlined symbols are owned by the list */ + if (ilist->symbol && ilist->symbol->inlined) + symbol__delete(ilist->symbol); free(ilist); } free(node); } + +void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines) +{ + struct rb_node **p = &tree->rb_node; + struct rb_node *parent = NULL; + const u64 addr = inlines->addr; + struct inline_node *i; + + while (*p != NULL) { + parent = *p; + i = rb_entry(parent, struct inline_node, rb_node); + if (addr < i->addr) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&inlines->rb_node, parent, p); + rb_insert_color(&inlines->rb_node, tree); +} + +struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) +{ + struct rb_node *n = tree->rb_node; + + while (n) { + struct inline_node *i = rb_entry(n, struct inline_node, + rb_node); + + if (addr < i->addr) + n = n->rb_left; + else if (addr > i->addr) + n = n->rb_right; + else + return i; + } + + return NULL; +} + +void inlines__tree_delete(struct rb_root *tree) +{ + struct inline_node *pos; + struct rb_node *next = rb_first(tree); + + while (next) { + pos = rb_entry(next, struct inline_node, rb_node); + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, tree); + inline_node__delete(pos); + } +} diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 7b52ba88676e..b2bb5502fd62 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -1,7 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_SRCLINE_H #define PERF_SRCLINE_H #include <linux/list.h> +#include <linux/rbtree.h> #include <linux/types.h> struct dso; @@ -9,26 +11,44 @@ struct symbol; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr); + bool show_sym, bool show_addr, u64 ip); char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines); + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip); void free_srcline(char *srcline); +/* insert the srcline into the DSO, which will take ownership */ +void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline); +/* find previously inserted srcline */ +char *srcline__tree_find(struct rb_root *tree, u64 addr); +/* delete all srclines within the tree */ +void srcline__tree_delete(struct rb_root *tree); + #define SRCLINE_UNKNOWN ((char *) "??:0") struct inline_list { - char *filename; - char *funcname; - unsigned int line_nr; + struct symbol *symbol; + char *srcline; struct list_head list; }; struct inline_node { u64 addr; struct list_head val; + struct rb_node rb_node; }; -struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr); +/* parse inlined frames for the given address */ +struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, + struct symbol *sym); +/* free resources associated to the inline node list */ void inline_node__delete(struct inline_node *node); +/* insert the inline node list into the DSO, which will take ownership */ +void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines); +/* find previously inserted inline node list */ +struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr); +/* delete all nodes within the tree of inline_node s */ +void inlines__tree_delete(struct rb_root *tree); + #endif /* PERF_SRCLINE_H */ diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a04cf56d3517..594d14a02b67 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include "evsel.h" #include "stat.h" @@ -6,17 +7,7 @@ #include "rblist.h" #include "evlist.h" #include "expr.h" - -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -#define NUM_CTX CTX_BIT_MAX +#include "metricgroup.h" /* * AGGR_GLOBAL: Use CPU 0 @@ -25,37 +16,18 @@ enum { * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? */ -static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; -static struct rblist runtime_saved_values; static bool have_frontend_stalled; +struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; struct saved_value { struct rb_node rb_node; struct perf_evsel *evsel; - int cpu; + enum stat_type type; int ctx; + int cpu; + struct runtime_stat *stat; struct stats stats; }; @@ -66,10 +38,32 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) rb_node); const struct saved_value *b = entry; - if (a->ctx != b->ctx) - return a->ctx - b->ctx; if (a->cpu != b->cpu) return a->cpu - b->cpu; + + /* + * Previously the rbtree was used to link generic metrics. + * The keys were evsel/cpu. Now the rbtree is extended to support + * per-thread shadow stats. For shadow stats case, the keys + * are cpu/type/ctx/stat (evsel is NULL). For generic metrics + * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). + */ + if (a->type != b->type) + return a->type - b->type; + + if (a->ctx != b->ctx) + return a->ctx - b->ctx; + + if (a->evsel == NULL && b->evsel == NULL) { + if (a->stat == b->stat) + return 0; + + if ((char *)a->stat < (char *)b->stat) + return -1; + + return 1; + } + if (a->evsel == b->evsel) return 0; if ((char *)a->evsel < (char *)b->evsel) @@ -88,35 +82,66 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, return &nd->rb_node; } +static void saved_value_delete(struct rblist *rblist __maybe_unused, + struct rb_node *rb_node) +{ + struct saved_value *v; + + BUG_ON(!rb_node); + v = container_of(rb_node, struct saved_value, rb_node); + free(v); +} + static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, - int cpu, int ctx, - bool create) + int cpu, + bool create, + enum stat_type type, + int ctx, + struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { .cpu = cpu, - .ctx = ctx, .evsel = evsel, + .type = type, + .ctx = ctx, + .stat = st, }; - nd = rblist__find(&runtime_saved_values, &dm); + + rblist = &st->value_list; + + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); if (create) { - rblist__add_node(&runtime_saved_values, &dm); - nd = rblist__find(&runtime_saved_values, &dm); + rblist__add_node(rblist, &dm); + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); } return NULL; } +void runtime_stat__init(struct runtime_stat *st) +{ + struct rblist *rblist = &st->value_list; + + rblist__init(rblist); + rblist->node_cmp = saved_value_cmp; + rblist->node_new = saved_value_new; + rblist->node_delete = saved_value_delete; +} + +void runtime_stat__exit(struct runtime_stat *st) +{ + rblist__exit(&st->value_list); +} + void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); - rblist__init(&runtime_saved_values); - runtime_saved_values.node_cmp = saved_value_cmp; - runtime_saved_values.node_new = saved_value_new; - /* No delete for now */ + runtime_stat__init(&rt_stat); } static int evsel_context(struct perf_evsel *evsel) @@ -137,36 +162,13 @@ static int evsel_context(struct perf_evsel *evsel) return ctx; } -void perf_stat__reset_shadow_stats(void) +static void reset_stat(struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *pos, *next; - memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); - memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); - memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); - memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); - memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); - memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); - memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); - memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); - memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); - memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); - memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); - memset(runtime_cycles_in_tx_stats, 0, - sizeof(runtime_cycles_in_tx_stats)); - memset(runtime_transaction_stats, 0, - sizeof(runtime_transaction_stats)); - memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); - memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); - memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); - memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); - memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); - memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); - memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); - memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); - - next = rb_first(&runtime_saved_values.entries); + rblist = &st->value_list; + next = rb_first(&rblist->entries); while (next) { pos = next; next = rb_next(pos); @@ -176,64 +178,95 @@ void perf_stat__reset_shadow_stats(void) } } +void perf_stat__reset_shadow_stats(void) +{ + reset_stat(&rt_stat); + memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); +} + +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) +{ + reset_stat(st); +} + +static void update_runtime_stat(struct runtime_stat *st, + enum stat_type type, + int ctx, int cpu, u64 count) +{ + struct saved_value *v = saved_value_lookup(NULL, cpu, true, + type, ctx, st); + + if (v) + update_stats(&v->stats, count); +} + /* * Update various tracking values we maintain to print * more semantic information such as miss/hit ratios, * instruction rates, etc: */ -void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, - int cpu) +void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, + int cpu, struct runtime_stat *st) { int ctx = evsel_context(counter); + count *= counter->scale; + if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) - update_stats(&runtime_nsecs_stats[cpu], count[0]); + update_runtime_stat(st, STAT_NSECS, 0, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_stats(&runtime_elision_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) - update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) - update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) - update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) - update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]); + update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) - update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); else if (perf_stat_evsel__is(counter, APERF)) - update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); + update_runtime_stat(st, STAT_APERF, ctx, cpu, count); if (counter->collect_stat) { - struct saved_value *v = saved_value_lookup(counter, cpu, ctx, - true); - update_stats(&v->stats, count[0]); + struct saved_value *v = saved_value_lookup(counter, cpu, true, + STAT_NONE, 0, st); + update_stats(&v->stats, count); } } @@ -353,15 +386,40 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) } } +static double runtime_stat_avg(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return avg_stats(&v->stats); +} + +static double runtime_stat_n(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return v->stats.n; +} + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -377,13 +435,14 @@ static void print_stalled_cycles_frontend(int cpu, static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -396,13 +455,14 @@ static void print_stalled_cycles_backend(int cpu, static void print_branch_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_branches_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -415,13 +475,15 @@ static void print_branch_misses(int cpu, static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -434,13 +496,15 @@ static void print_l1_dcache_misses(int cpu, static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -452,13 +516,14 @@ static void print_l1_icache_misses(int cpu, static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -470,13 +535,14 @@ static void print_dtlb_cache_misses(int cpu, static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -488,13 +554,14 @@ static void print_itlb_cache_misses(int cpu, static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -552,68 +619,72 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu) +static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) { - return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); } -static double td_bad_spec(int ctx, int cpu) +static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) { double bad_spec = 0; double total_slots; double total; - total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - - avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + - avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); - total_slots = td_total_slots(ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + + total_slots = td_total_slots(ctx, cpu, st); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu) +static double td_retiring(int ctx, int cpu, struct runtime_stat *st) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu); - double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu) +static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu); - double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu) +static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) { - double sum = (td_fe_bound(ctx, cpu) + - td_bad_spec(ctx, cpu) + - td_retiring(ctx, cpu)); + double sum = (td_fe_bound(ctx, cpu, st) + + td_bad_spec(ctx, cpu, st) + + td_retiring(ctx, cpu, st)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); } static void print_smi_cost(int cpu, struct perf_evsel *evsel, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double smi_num, aperf, cycles, cost = 0.0; int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); - aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); - cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); + aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); + cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if ((cycles == 0) || (aperf == 0)) return; @@ -627,18 +698,75 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel, out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); } +static void generic_metric(const char *metric_expr, + struct perf_evsel **metric_events, + char *name, + const char *metric_name, + double avg, + int cpu, + struct perf_stat_output_ctx *out, + struct runtime_stat *st) +{ + print_metric_t print_metric = out->print_metric; + struct parse_ctx pctx; + double ratio; + int i; + void *ctxp = out->ctx; + + expr__ctx_init(&pctx); + expr__add_id(&pctx, name, avg); + for (i = 0; metric_events[i]; i++) { + struct saved_value *v; + struct stats *stats; + double scale; + + if (!strcmp(metric_events[i]->name, "duration_time")) { + stats = &walltime_nsecs_stats; + scale = 1e-9; + } else { + v = saved_value_lookup(metric_events[i], cpu, false, + STAT_NONE, 0, st); + if (!v) + break; + stats = &v->stats; + scale = 1.0; + } + expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); + } + if (!metric_events[i]) { + const char *p = metric_expr; + + if (expr__parse(&ratio, &pctx, &p) == 0) + print_metric(ctxp, NULL, "%8.1f", + metric_name ? + metric_name : + out->force_header ? name : "", + ratio); + else + print_metric(ctxp, NULL, NULL, + out->force_header ? + (metric_name ? metric_name : name) : "", 0); + } else + print_metric(ctxp, NULL, NULL, "", 0); +} + void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct rblist *metric_events, + struct runtime_stat *st) { void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; const char *color = NULL; int ctx = evsel_context(evsel); + struct metric_event *me; + int num = 1; if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) { ratio = avg / total; print_metric(ctxp, NULL, "%7.2f ", @@ -646,8 +774,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } else { print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } - total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); - total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu); + + total = max(total, runtime_stat_avg(st, + STAT_STALLED_CYCLES_BACK, + ctx, cpu)); if (total && avg) { out->new_line(ctxp); @@ -660,8 +793,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_branches_stats[ctx][cpu].n != 0) - print_branch_misses(cpu, evsel, avg, out); + if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) + print_branch_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -669,8 +802,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_dcache_stats[ctx][cpu].n != 0) - print_l1_dcache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) + print_l1_dcache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( @@ -678,8 +812,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_icache_stats[ctx][cpu].n != 0) - print_l1_icache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) + print_l1_icache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( @@ -687,8 +822,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) - print_dtlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( @@ -696,8 +832,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_itlb_cache_stats[ctx][cpu].n != 0) - print_itlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) + print_itlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( @@ -705,27 +842,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_ll_cache_stats[ctx][cpu].n != 0) - print_ll_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) + print_ll_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); if (total) ratio = avg * 100 / total; - if (runtime_cacherefs_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg, out); + print_stalled_cycles_frontend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg, out); + print_stalled_cycles_backend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { ratio = avg / total; @@ -734,7 +872,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) print_metric(ctxp, NULL, "%7.2f%%", "transactional cycles", @@ -743,8 +882,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + if (total2 < avg) total2 = avg; if (total) @@ -753,19 +893,21 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; - if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(ctxp, NULL, NULL, "cycles / transaction", - 0); + 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; @@ -779,28 +921,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu); + double fe_bound = td_fe_bound(ctx, cpu, st); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu); + double retiring = td_retiring(ctx, cpu, st); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu); + double bad_spec = td_bad_spec(ctx, cpu, st); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu); + double be_bound = td_be_bound(ctx, cpu, st); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -813,44 +955,19 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu) > 0) + if (td_total_slots(ctx, cpu, st) > 0) print_metric(ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { - struct parse_ctx pctx; - int i; - - expr__ctx_init(&pctx); - expr__add_id(&pctx, evsel->name, avg); - for (i = 0; evsel->metric_events[i]; i++) { - struct saved_value *v; - - v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false); - if (!v) - break; - expr__add_id(&pctx, evsel->metric_events[i]->name, - avg_stats(&v->stats)); - } - if (!evsel->metric_events[i]) { - const char *p = evsel->metric_expr; - - if (expr__parse(&ratio, &pctx, &p) == 0) - print_metric(ctxp, NULL, "%8.1f", - evsel->metric_name ? - evsel->metric_name : - out->force_header ? evsel->name : "", - ratio); - else - print_metric(ctxp, NULL, NULL, "", 0); - } else - print_metric(ctxp, NULL, NULL, "", 0); - } else if (runtime_nsecs_stats[cpu].n != 0) { + generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, + evsel->metric_name, avg, cpu, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) ratio = 1000.0 * avg / total; @@ -861,8 +978,22 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(cpu, evsel, out); + print_smi_cost(cpu, evsel, out, st); } else { - print_metric(ctxp, NULL, NULL, NULL, 0); + num = 0; + } + + if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { + struct metric_expr *mexp; + + list_for_each_entry (mexp, &me->head, nd) { + if (num++ > 0) + out->new_line(ctxp); + generic_metric(mexp->metric_expr, mexp->metric_events, + evsel->name, mexp->metric_name, + avg, cpu, out, st); + } } + if (num == 0) + print_metric(ctxp, NULL, NULL, NULL, 0); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 35e9848734d6..32235657c1ac 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <inttypes.h> #include <math.h> @@ -69,7 +70,7 @@ double rel_stddev_stats(double stddev, double avg) bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id) { - struct perf_stat_evsel *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->stats; return ps->id == id; } @@ -93,7 +94,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { void perf_stat_evsel_id_init(struct perf_evsel *evsel) { - struct perf_stat_evsel *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->stats; int i; /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ @@ -109,7 +110,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; - struct perf_stat_evsel *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->stats; for (i = 0; i < 3; i++) init_stats(&ps->res_stats[i]); @@ -119,8 +120,8 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { - evsel->priv = zalloc(sizeof(struct perf_stat_evsel)); - if (evsel->priv == NULL) + evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); + if (evsel->stats == NULL) return -ENOMEM; perf_evsel__reset_stat_priv(evsel); return 0; @@ -128,11 +129,11 @@ static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) { - struct perf_stat_evsel *ps = evsel->priv; + struct perf_stat_evsel *ps = evsel->stats; if (ps) free(ps->group_data); - zfree(&evsel->priv); + zfree(&evsel->stats); } static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, @@ -277,7 +278,16 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->values, cpu); + perf_stat__update_shadow_stats(evsel, count->val, cpu, + &rt_stat); + if (config->aggr_mode == AGGR_THREAD) { + if (config->stats) + perf_stat__update_shadow_stats(evsel, + count->val, 0, &config->stats[thread]); + else + perf_stat__update_shadow_stats(evsel, + count->val, 0, &rt_stat); + } break; case AGGR_GLOBAL: aggr->val += count->val; @@ -318,9 +328,8 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct perf_evsel *counter) { struct perf_counts_values *aggr = &counter->counts->aggr; - struct perf_stat_evsel *ps = counter->priv; + struct perf_stat_evsel *ps = counter->stats; u64 *count = counter->counts->aggr.values; - u64 val; int i, ret; aggr->val = aggr->ena = aggr->run = 0; @@ -360,8 +369,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, /* * Save the full runtime - to allow normalization during printout: */ - val = counter->scale * *count; - perf_stat__update_shadow_stats(counter, &val, 0); + perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index eacaf958e19d..dbc6f7134f61 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -1,9 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_STATS_H #define __PERF_STATS_H #include <linux/types.h> #include <stdio.h> #include "xyarray.h" +#include "rblist.h" struct stats { @@ -42,11 +44,54 @@ enum aggr_mode { AGGR_UNSET, }; +enum { + CTX_BIT_USER = 1 << 0, + CTX_BIT_KERNEL = 1 << 1, + CTX_BIT_HV = 1 << 2, + CTX_BIT_HOST = 1 << 3, + CTX_BIT_IDLE = 1 << 4, + CTX_BIT_MAX = 1 << 5, +}; + +#define NUM_CTX CTX_BIT_MAX + +enum stat_type { + STAT_NONE = 0, + STAT_NSECS, + STAT_CYCLES, + STAT_STALLED_CYCLES_FRONT, + STAT_STALLED_CYCLES_BACK, + STAT_BRANCHES, + STAT_CACHEREFS, + STAT_L1_DCACHE, + STAT_L1_ICACHE, + STAT_LL_CACHE, + STAT_ITLB_CACHE, + STAT_DTLB_CACHE, + STAT_CYCLES_IN_TX, + STAT_TRANSACTION, + STAT_ELISION, + STAT_TOPDOWN_TOTAL_SLOTS, + STAT_TOPDOWN_SLOTS_ISSUED, + STAT_TOPDOWN_SLOTS_RETIRED, + STAT_TOPDOWN_FETCH_BUBBLES, + STAT_TOPDOWN_RECOVERY_BUBBLES, + STAT_SMI_NUM, + STAT_APERF, + STAT_MAX +}; + +struct runtime_stat { + struct rblist value_list; +}; + struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; FILE *output; unsigned int interval; + struct runtime_stat *stats; + int stats_num; }; void update_stats(struct stats *stats, u64 val); @@ -66,6 +111,15 @@ static inline void init_stats(struct stats *stats) struct perf_evsel; struct perf_evlist; +struct perf_aggr_thread_value { + struct perf_evsel *counter; + int id; + double uval; + u64 val; + u64 run; + u64 ena; +}; + bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id); @@ -74,16 +128,20 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, void perf_stat_evsel_id_init(struct perf_evsel *evsel); +extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val); typedef void (*new_line_t )(void *ctx); +void runtime_stat__init(struct runtime_stat *st); +void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); -void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, - int cpu); +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); +void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, + int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; @@ -93,7 +151,9 @@ struct perf_stat_output_ctx { void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, - struct perf_stat_output_ctx *out); + struct perf_stat_output_ctx *out, + struct rblist *metric_events, + struct runtime_stat *st); void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index aafe908b82b5..3d1cf5bf7f18 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "debug.h" #include "util.h" #include <linux/kernel.h> diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 802d743378af..ea94d8628980 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_STRBUF_H #define __PERF_STRBUF_H diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index 4dc0af669a30..7f3253d44afd 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "util.h" #include "string2.h" #include "strfilter.h" diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h index cff5eda88728..e0c25a40f796 100644 --- a/tools/perf/util/strfilter.h +++ b/tools/perf/util/strfilter.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_STRFILTER_H #define __PERF_STRFILTER_H /* General purpose glob matching filter */ diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index cca53b693a48..d8bfd0c4d2cb 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "string2.h" #include <linux/kernel.h> #include <linux/string.h> @@ -395,3 +396,49 @@ out_err_overflow: free(expr); return NULL; } + +/* Like strpbrk(), but not break if it is right after a backslash (escaped) */ +char *strpbrk_esc(char *str, const char *stopset) +{ + char *ptr; + + do { + ptr = strpbrk(str, stopset); + if (ptr == str || + (ptr == str + 1 && *(ptr - 1) != '\\')) + break; + str = ptr + 1; + } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\'); + + return ptr; +} + +/* Like strdup, but do not copy a single backslash */ +char *strdup_esc(const char *str) +{ + char *s, *d, *p, *ret = strdup(str); + + if (!ret) + return NULL; + + d = strchr(ret, '\\'); + if (!d) + return ret; + + s = d + 1; + do { + if (*s == '\0') { + *d = '\0'; + break; + } + p = strchr(s + 1, '\\'); + if (p) { + memmove(d, s, p - s); + d += p - s; + s = p + 1; + } else + memmove(d, s, strlen(s) + 1); + } while (p); + + return ret; +} diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 2f619681bd6a..4c68a09b97e8 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_STRING_H #define PERF_STRING_H @@ -38,5 +39,7 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int return asprintf_expr_inout_ints(var, false, nints, ints); } +char *strpbrk_esc(char *str, const char *stopset); +char *strdup_esc(const char *str); #endif /* PERF_STRING_H */ diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 19207e50fce5..d58f1e08b170 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_STRLIST_H #define __PERF_STRLIST_H diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h index 946fdf2db97c..e55338d5c3bd 100644 --- a/tools/perf/util/svghelper.h +++ b/tools/perf/util/svghelper.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_SVGHELPER_H #define __PERF_SVGHELPER_H diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 5c39f420111e..2de770511e70 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <fcntl.h> #include <stdio.h> #include <errno.h> @@ -810,12 +811,6 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } -void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr, - struct map *map __maybe_unused) -{ - sym->st_value -= shdr->sh_addr - shdr->sh_offset; -} - int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule) { @@ -996,7 +991,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, /* Adjust symbol to map to file offset */ if (adjust_kernel_syms) - arch__adjust_sym_map_offset(&sym, &shdr, map); + sym.st_value -= shdr.sh_addr - shdr.sh_offset; if (strcmp(section_name, (curr_dso->short_name + diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 1a5aa35b0100..ff48d0d49584 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "symbol.h" #include "util.h" diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5909ee4c7ade..cc065d4bfafc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <dirent.h> #include <errno.h> #include <stdlib.h> @@ -45,6 +46,7 @@ struct symbol_conf symbol_conf = { .show_hist_headers = true, .symfs = "", .event_group = true, + .inline_name = true, }; static enum dso_binary_type binary_type_symtab[] = { @@ -92,6 +94,11 @@ static int prefix_underscores_count(const char *str) return tail - str; } +const char * __weak arch__normalize_symbol_name(const char *name) +{ + return name; +} + int __weak arch__compare_symbol_names(const char *namea, const char *nameb) { return strcmp(namea, nameb); @@ -226,7 +233,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) struct maps *maps = &mg->maps[type]; struct map *next, *curr; - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); curr = maps__first(maps); if (curr == NULL) @@ -246,7 +253,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) curr->end = ~0ULL; out_unlock: - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) @@ -1671,7 +1678,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg, struct maps *maps = &mg->maps[type]; struct map *map; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (map = maps__first(maps); map; map = map__next(map)) { if (map->dso && strcmp(map->dso->short_name, name) == 0) @@ -1681,7 +1688,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg, map = NULL; out_unlock: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return map; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2bd6a1f01a1c..0563f33c1eb3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_SYMBOL #define __PERF_SYMBOL 1 @@ -59,6 +60,7 @@ struct symbol { u8 binding; u8 idle:1; u8 ignore:1; + u8 inlined:1; u8 arch_sym; char name[0]; }; @@ -208,6 +210,7 @@ struct addr_location { struct thread *thread; struct map *map; struct symbol *sym; + const char *srcline; u64 addr; char level; u8 filtered; @@ -344,11 +347,9 @@ int setup_intlist(struct intlist **list, const char *list_str, #ifdef HAVE_LIBELF_SUPPORT bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); void arch__sym_update(struct symbol *s, GElf_Sym *sym); -void arch__adjust_sym_map_offset(GElf_Sym *sym, - GElf_Shdr *shdr __maybe_unused, - struct map *map __maybe_unused); #endif +const char *arch__normalize_symbol_name(const char *name); #define SYMBOL_A 0 #define SYMBOL_B 1 diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index 63694e174e5c..6dd2cb88ccbe 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <elf.h> #include <inttypes.h> #include <stdio.h> diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 19e5db90394c..303bdb84ab5a 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -15,9 +15,9 @@ #include "syscalltbl.h" #include <stdlib.h> +#include <linux/compiler.h> #ifdef HAVE_SYSCALL_TABLE -#include <linux/compiler.h> #include <string.h> #include "string2.h" #include "util.h" @@ -26,6 +26,10 @@ #include <asm/syscalls_64.c> const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; static const char **syscalltbl_native = syscalltbl_x86_64; +#elif defined(__s390x__) +#include <asm/syscalls_64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_s390_64; #endif struct syscall { diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h index e9fb8786da7c..c8e7e9ce0f01 100644 --- a/tools/perf/util/syscalltbl.h +++ b/tools/perf/util/syscalltbl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_SYSCALLTBL_H #define __PERF_SYSCALLTBL_H diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 7381b1ca4041..6ef01a83b24e 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_TARGET_H #define _PERF_TARGET_H @@ -63,6 +64,11 @@ static inline bool target__none(struct target *target) return !target__has_task(target) && !target__has_cpu(target); } +static inline bool target__has_per_thread(struct target *target) +{ + return target->system_wide && target->per_thread; +} + static inline bool target__uses_dummy_map(struct target *target) { bool use_dummy = false; @@ -72,6 +78,8 @@ static inline bool target__uses_dummy_map(struct target *target) else if (target__has_task(target) || (!target__has_cpu(target) && !target->uses_mmap)) use_dummy = true; + else if (target__has_per_thread(target)) + use_dummy = true; return use_dummy; } diff --git a/tools/perf/util/term.c b/tools/perf/util/term.c index 8f254a74d97d..e7aa82c06c76 100644 --- a/tools/perf/util/term.c +++ b/tools/perf/util/term.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "term.h" #include <stdlib.h> #include <termios.h> diff --git a/tools/perf/util/term.h b/tools/perf/util/term.h index 2c06a61846a1..607b170a9017 100644 --- a/tools/perf/util/term.h +++ b/tools/perf/util/term.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_TERM_H #define __PERF_TERM_H diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index aee9a42102ba..68b65b10579b 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "../perf.h" #include <errno.h> #include <stdlib.h> @@ -45,6 +46,8 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->cpu = -1; INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->comm_list); + init_rwsem(&thread->namespaces_lock); + init_rwsem(&thread->comm_lock); comm_str = malloc(32); if (!comm_str) @@ -83,18 +86,26 @@ void thread__delete(struct thread *thread) map_groups__put(thread->mg); thread->mg = NULL; } + down_write(&thread->namespaces_lock); list_for_each_entry_safe(namespaces, tmp_namespaces, &thread->namespaces_list, list) { list_del(&namespaces->list); namespaces__free(namespaces); } + up_write(&thread->namespaces_lock); + + down_write(&thread->comm_lock); list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) { list_del(&comm->list); comm__free(comm); } + up_write(&thread->comm_lock); + unwind__finish_access(thread); nsinfo__zput(thread->nsinfo); + exit_rwsem(&thread->namespaces_lock); + exit_rwsem(&thread->comm_lock); free(thread); } @@ -125,8 +136,8 @@ struct namespaces *thread__namespaces(const struct thread *thread) return list_first_entry(&thread->namespaces_list, struct namespaces, list); } -int thread__set_namespaces(struct thread *thread, u64 timestamp, - struct namespaces_event *event) +static int __thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event) { struct namespaces *new, *curr = thread__namespaces(thread); @@ -149,6 +160,17 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp, return 0; } +int thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event) +{ + int ret; + + down_write(&thread->namespaces_lock); + ret = __thread__set_namespaces(thread, timestamp, event); + up_write(&thread->namespaces_lock); + return ret; +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) @@ -170,8 +192,8 @@ struct comm *thread__exec_comm(const struct thread *thread) return last; } -int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, - bool exec) +static int ____thread__set_comm(struct thread *thread, const char *str, + u64 timestamp, bool exec) { struct comm *new, *curr = thread__comm(thread); @@ -195,6 +217,17 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, return 0; } +int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, + bool exec) +{ + int ret; + + down_write(&thread->comm_lock); + ret = ____thread__set_comm(thread, str, timestamp, exec); + up_write(&thread->comm_lock); + return ret; +} + int thread__set_comm_from_proc(struct thread *thread) { char path[64]; @@ -212,7 +245,7 @@ int thread__set_comm_from_proc(struct thread *thread) return err; } -const char *thread__comm_str(const struct thread *thread) +static const char *__thread__comm_str(const struct thread *thread) { const struct comm *comm = thread__comm(thread); @@ -222,6 +255,17 @@ const char *thread__comm_str(const struct thread *thread) return comm__str(comm); } +const char *thread__comm_str(const struct thread *thread) +{ + const char *str; + + down_read((struct rw_semaphore *)&thread->comm_lock); + str = __thread__comm_str(thread); + up_read((struct rw_semaphore *)&thread->comm_lock); + + return str; +} + /* CHECKME: it should probably better return the max comm len from its comm list */ int thread__comm_len(struct thread *thread) { @@ -264,7 +308,7 @@ static int __thread__prepare_access(struct thread *thread) struct maps *maps = &thread->mg->maps[i]; struct map *map; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (map = maps__first(maps); map; map = map__next(map)) { err = unwind__prepare_access(thread, map, &initialized); @@ -272,7 +316,7 @@ static int __thread__prepare_access(struct thread *thread) break; } - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); } return err; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index cb1a5dd5c2b9..40cfa36c022a 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_THREAD_H #define __PERF_THREAD_H @@ -9,6 +10,7 @@ #include "symbol.h" #include <strlist.h> #include <intlist.h> +#include "rwsem.h" struct thread_stack; struct unwind_libunwind_ops; @@ -29,7 +31,9 @@ struct thread { int comm_len; bool dead; /* if set thread has exited */ struct list_head namespaces_list; + struct rw_semaphore namespaces_lock; struct list_head comm_list; + struct rw_semaphore comm_lock; u64 db_id; void *priv; diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 63ead7b06324..3e1038f6491c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <dirent.h> #include <errno.h> #include <limits.h> @@ -91,7 +92,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -struct thread_map *thread_map__new_by_uid(uid_t uid) +static struct thread_map *__thread_map__new_all_cpus(uid_t uid) { DIR *proc; int max_threads = 32, items, i; @@ -112,7 +113,6 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) while ((dirent = readdir(proc)) != NULL) { char *end; bool grow = false; - struct stat st; pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ @@ -120,11 +120,12 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); - if (stat(path, &st) != 0) - continue; + if (uid != UINT_MAX) { + struct stat st; - if (st.st_uid != uid) - continue; + if (stat(path, &st) != 0 || st.st_uid != uid) + continue; + } snprintf(path, sizeof(path), "/proc/%d/task", pid); items = scandir(path, &namelist, filter, NULL); @@ -177,6 +178,16 @@ out_free_closedir: goto out_closedir; } +struct thread_map *thread_map__new_all_cpus(void) +{ + return __thread_map__new_all_cpus(UINT_MAX); +} + +struct thread_map *thread_map__new_by_uid(uid_t uid) +{ + return __thread_map__new_all_cpus(uid); +} + struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) { if (pid != -1) @@ -312,7 +323,7 @@ out_free_threads: } struct thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid) + uid_t uid, bool per_thread) { if (pid) return thread_map__new_by_pid_str(pid); @@ -320,6 +331,9 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, if (!tid && uid != UINT_MAX) return thread_map__new_by_uid(uid); + if (per_thread) + return thread_map__new_all_cpus(); + return thread_map__new_by_tid_str(tid); } diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index bd34d7a0b9fa..0a806b99e73c 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_THREAD_MAP_H #define __PERF_THREAD_MAP_H @@ -22,6 +23,7 @@ struct thread_map *thread_map__new_dummy(void); struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); +struct thread_map *thread_map__new_all_cpus(void); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); struct thread_map *thread_map__new_event(struct thread_map_event *event); @@ -29,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid); + const char *tid, uid_t uid, bool per_thread); struct thread_map *thread_map__new_by_tid_str(const char *tid_str); diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 5b5d0214debd..6193b46050a5 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdlib.h> #include <string.h> #include <sys/time.h> @@ -5,6 +6,7 @@ #include <time.h> #include <errno.h> #include <inttypes.h> +#include <math.h> #include "perf.h" #include "debug.h" @@ -59,11 +61,10 @@ static int parse_timestr_sec_nsec(struct perf_time_interval *ptime, return 0; } -int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +static int split_start_end(char **start, char **end, const char *ostr, char ch) { char *start_str, *end_str; char *d, *str; - int rc = 0; if (ostr == NULL || *ostr == '\0') return 0; @@ -73,25 +74,35 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) if (str == NULL) return -ENOMEM; - ptime->start = 0; - ptime->end = 0; - - /* str has the format: <start>,<stop> - * variations: <start>, - * ,<stop> - * , - */ start_str = str; - d = strchr(start_str, ','); + d = strchr(start_str, ch); if (d) { *d = '\0'; ++d; } end_str = d; + *start = start_str; + *end = end_str; + + return 0; +} + +int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +{ + char *start_str = NULL, *end_str; + int rc; + + rc = split_start_end(&start_str, &end_str, ostr, ','); + if (rc || !start_str) + return rc; + + ptime->start = 0; + ptime->end = 0; + rc = parse_timestr_sec_nsec(ptime, start_str, end_str); - free(str); + free(start_str); /* make sure end time is after start time if it was given */ if (rc == 0 && ptime->end && ptime->end < ptime->start) @@ -103,6 +114,245 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) return rc; } +static int parse_percent(double *pcnt, char *str) +{ + char *c, *endptr; + double d; + + c = strchr(str, '%'); + if (c) + *c = '\0'; + else + return -1; + + d = strtod(str, &endptr); + if (endptr != str + strlen(str)) + return -1; + + *pcnt = d / 100.0; + return 0; +} + +static int percent_slash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *p, *end_str; + double pcnt, start_pcnt, end_pcnt; + u64 total = end - start; + int i; + + /* + * Example: + * 10%/2: select the second 10% slice and the third 10% slice + */ + + /* We can modify this string since the original one is copied */ + p = strchr(str, '/'); + if (!p) + return -1; + + *p = '\0'; + if (parse_percent(&pcnt, str) < 0) + return -1; + + p++; + i = (int)strtol(p, &end_str, 10); + if (*end_str) + return -1; + + if (pcnt <= 0.0) + return -1; + + start_pcnt = pcnt * (i - 1); + end_pcnt = pcnt * i; + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +static int percent_dash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *start_str = NULL, *end_str; + double start_pcnt, end_pcnt; + u64 total = end - start; + int ret; + + /* + * Example: 0%-10% + */ + + ret = split_start_end(&start_str, &end_str, str, '-'); + if (ret || !start_str) + return ret; + + if ((parse_percent(&start_pcnt, start_str) != 0) || + (parse_percent(&end_pcnt, end_str) != 0)) { + free(start_str); + return -1; + } + + free(start_str); + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0 || + start_pcnt > end_pcnt) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +typedef int (*time_pecent_split)(char *, struct perf_time_interval *, + u64 start, u64 end); + +static int percent_comma_split(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end, + time_pecent_split func) +{ + char *str, *p1, *p2; + int len, ret, i = 0; + + str = strdup(ostr); + if (str == NULL) + return -ENOMEM; + + len = strlen(str); + p1 = str; + + while (p1 < str + len) { + if (i >= num) { + free(str); + return -1; + } + + p2 = strchr(p1, ','); + if (p2) + *p2 = '\0'; + + ret = (func)(p1, &ptime_buf[i], start, end); + if (ret < 0) { + free(str); + return -1; + } + + pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start); + pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end); + + i++; + + if (p2) + p1 = p2 + 1; + else + break; + } + + free(str); + return i; +} + +static int one_percent_convert(struct perf_time_interval *ptime_buf, + const char *ostr, u64 start, u64 end, char *c) +{ + char *str; + int len = strlen(ostr), ret; + + /* + * c points to '%'. + * '%' should be the last character + */ + if (ostr + len - 1 != c) + return -1; + + /* + * Construct a string like "xx%/1" + */ + str = malloc(len + 3); + if (str == NULL) + return -ENOMEM; + + memcpy(str, ostr, len); + strcpy(str + len, "/1"); + + ret = percent_slash_split(str, ptime_buf, start, end); + if (ret == 0) + ret = 1; + + free(str); + return ret; +} + +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end) +{ + char *c; + + /* + * ostr example: + * 10%/2,10%/3: select the second 10% slice and the third 10% slice + * 0%-10%,30%-40%: multiple time range + * 50%: just one percent + */ + + memset(ptime_buf, 0, sizeof(*ptime_buf) * num); + + c = strchr(ostr, '/'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_slash_split); + } + + c = strchr(ostr, '-'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_dash_split); + } + + c = strchr(ostr, '%'); + if (c) + return one_percent_convert(ptime_buf, ostr, start, end, c); + + return -1; +} + +struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size) +{ + const char *p1, *p2; + int i = 1; + struct perf_time_interval *ptime; + + /* + * At least allocate one time range. + */ + if (!ostr) + goto alloc; + + p1 = ostr; + while (p1 < ostr + strlen(ostr)) { + p2 = strchr(p1, ','); + if (!p2) + break; + + p1 = p2 + 1; + i++; + } + +alloc: + *size = i; + ptime = calloc(i, sizeof(*ptime)); + return ptime; +} + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) { /* if time is not set don't drop sample */ @@ -118,6 +368,34 @@ bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) return false; } +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp) +{ + struct perf_time_interval *ptime; + int i; + + if ((timestamp == 0) || (num == 0)) + return false; + + if (num == 1) + return perf_time__skip_sample(&ptime_buf[0], timestamp); + + /* + * start/end of multiple time ranges must be valid. + */ + for (i = 0; i < num; i++) { + ptime = &ptime_buf[i]; + + if (timestamp >= ptime->start && + ((timestamp < ptime->end && i < num - 1) || + (timestamp <= ptime->end && i == num - 1))) { + break; + } + } + + return (i == num) ? true : false; +} + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) { u64 sec = timestamp / NSEC_PER_SEC; diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 8656be08513b..70b177d2b98c 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TIME_UTILS_H_ #define _TIME_UTILS_H_ @@ -12,8 +13,16 @@ int parse_nsec_time(const char *str, u64 *ptime); int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end); + +struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size); + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp); + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); int fetch_current_timestamp(char *buf, size_t sz); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index d549e50db397..183c91453522 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_TOOL_H #define __PERF_TOOL_H @@ -75,6 +76,7 @@ struct perf_tool { bool ordered_events; bool ordering_requires_timestamps; bool namespace_events; + bool no_warn; enum show_feature_header show_feat_hdr; }; diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 9bdfb78a9a35..9892323cdd7c 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_TOP_H #define __PERF_TOP_H 1 @@ -37,6 +38,7 @@ struct perf_top { int sym_pcnt_filter; const char *sym_filter; float min_percent; + unsigned int nr_threads_synthesize; }; #define CONSOLE_CLEAR "[H[2J" diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index e7d60d05596d..d7f2113462fb 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -28,7 +28,6 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/wait.h> -#include <pthread.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8a9a677f7576..40b425949aa3 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -27,7 +27,6 @@ #include <sys/stat.h> #include <sys/wait.h> #include <sys/mman.h> -#include <pthread.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index c330780674fc..16a776371d03 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include <unistd.h> diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 1fbc044f9eb0..dcbdb53dc702 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_UTIL_TRACE_EVENT_H #define _PERF_UTIL_TRACE_EVENT_H diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h index e97d7016d771..370138e7e35c 100644 --- a/tools/perf/util/trigger.h +++ b/tools/perf/util/trigger.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __TRIGGER_H_ #define __TRIGGER_H_ 1 diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 1b741646eed0..bfa782421cbd 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/types.h> diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index d5b11e2b85e0..e0c3af34ac8d 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_TSC_H #define __PERF_TSC_H diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c index 4767ec2c5ef6..a46762aec4c9 100644 --- a/tools/perf/util/units.c +++ b/tools/perf/util/units.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "units.h" #include <inttypes.h> #include <limits.h> diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h index f02c87317150..99263b6a23f7 100644 --- a/tools/perf/util/units.h +++ b/tools/perf/util/units.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef PERF_UNIT_H #define PERF_UNIT_H diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 7755a5e0fe5e..1e9c974faf67 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <elfutils/libdw.h> #include <elfutils/libdwfl.h> diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h index 4a2b269a7b3b..0cbd2650e280 100644 --- a/tools/perf/util/unwind-libdw.h +++ b/tools/perf/util/unwind-libdw.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_UNWIND_LIBDW_H #define __PERF_UNWIND_LIBDW_H diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 672c2ada9357..af873044d33a 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. * @@ -630,9 +631,8 @@ static unw_accessors_t accessors = { static int _unwind__prepare_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) + if (!dwarf_callchain_users) return 0; - thread->addr_space = unw_create_addr_space(&accessors, 0); if (!thread->addr_space) { pr_err("unwind: Can't create unwind address space.\n"); @@ -645,17 +645,15 @@ static int _unwind__prepare_access(struct thread *thread) static void _unwind__flush_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) + if (!dwarf_callchain_users) return; - unw_flush_cache(thread->addr_space, 0, 0); } static void _unwind__finish_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) + if (!dwarf_callchain_users) return; - unw_destroy_addr_space(thread->addr_space); } diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 8aef572d0889..b029a5e9ae49 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -1,8 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 #include "unwind.h" #include "thread.h" #include "session.h" #include "debug.h" -#include "arch/common.h" +#include "env.h" struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; @@ -38,7 +39,7 @@ int unwind__prepare_access(struct thread *thread, struct map *map, if (dso_type == DSO__TYPE_UNKNOWN) return 0; - arch = normalize_arch(thread->mg->machine->env->arch); + arch = perf_env__arch(thread->mg->machine->env); if (!strcmp(arch, "x86")) { if (dso_type != DSO__TYPE_64BIT) diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index bfbdcc6198c9..8a44a1569a21 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __UNWIND_H #define __UNWIND_H diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 6cc9d9888ce0..070d25ceea6a 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * usage.c * diff --git a/tools/perf/util/util-cxx.h b/tools/perf/util/util-cxx.h index 0e0e019c9f34..80a99e458d4e 100644 --- a/tools/perf/util/util-cxx.h +++ b/tools/perf/util/util-cxx.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Support C++ source use utilities defined in util.h */ diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 4c360daa4e24..443892dabedb 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "../perf.h" #include "util.h" #include "debug.h" @@ -6,6 +7,7 @@ #include <sys/stat.h> #include <sys/utsname.h> #include <dirent.h> +#include <fcntl.h> #include <inttypes.h> #include <signal.h> #include <stdio.h> @@ -22,6 +24,19 @@ /* * XXX We need to find a better place for these things... */ + +bool perf_singlethreaded = true; + +void perf_set_singlethreaded(void) +{ + perf_singlethreaded = true; +} + +void perf_set_multithreaded(void) +{ + perf_singlethreaded = false; +} + unsigned int page_size; int cacheline_size; @@ -174,7 +189,7 @@ out: return err; } -int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) +static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) { void *ptr; loff_t pgoff; @@ -195,7 +210,7 @@ int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) size -= ret; off_in += ret; - off_out -= ret; + off_out += ret; } munmap(ptr, off_in + size); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b136c271125f..9496365da3d7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H @@ -5,7 +6,6 @@ /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 -#include <fcntl.h> #include <stdbool.h> #include <stddef.h> #include <stdlib.h> @@ -35,7 +35,6 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); -int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); @@ -64,4 +63,19 @@ int sched_getcpu(void); int setns(int fd, int nstype); #endif +extern bool perf_singlethreaded; + +void perf_set_singlethreaded(void); +void perf_set_multithreaded(void); + +#ifndef O_CLOEXEC +#ifdef __sparc__ +#define O_CLOEXEC 0x400000 +#elif defined(__alpha__) || defined(__hppa__) +#define O_CLOEXEC 010000000 +#else +#define O_CLOEXEC 02000000 +#endif +#endif + #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 8a32bb0095e5..4b7a303e4ba8 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <inttypes.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h index 808ff9c73bf5..8c41f22f42cf 100644 --- a/tools/perf/util/values.h +++ b/tools/perf/util/values.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_VALUES_H #define __PERF_VALUES_H diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index d3c39eec89a8..0acb1ec0e2f0 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <unistd.h> #include <stdio.h> @@ -319,7 +320,7 @@ struct dso *machine__findnew_vdso(struct machine *machine, struct vdso_info *vdso_info; struct dso *dso = NULL; - pthread_rwlock_wrlock(&machine->dsos.lock); + down_write(&machine->dsos.lock); if (!machine->vdso_info) machine->vdso_info = vdso_info__new(); @@ -347,7 +348,7 @@ struct dso *machine__findnew_vdso(struct machine *machine, out_unlock: dso__get(dso); - pthread_rwlock_unlock(&machine->dsos.lock); + up_write(&machine->dsos.lock); return dso; } diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h index cdc4fabfc212..bc74ace603ae 100644 --- a/tools/perf/util/vdso.h +++ b/tools/perf/util/vdso.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PERF_VDSO__ #define __PERF_VDSO__ diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c index c8f415d9877b..dc95154f5646 100644 --- a/tools/perf/util/xyarray.c +++ b/tools/perf/util/xyarray.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "xyarray.h" #include "util.h" #include <stdlib.h> diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h index 4ba726c90870..7ffe562e7ae7 100644 --- a/tools/perf/util/xyarray.h +++ b/tools/perf/util/xyarray.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_XYARRAY_H_ #define _PERF_XYARRAY_H_ 1 @@ -23,12 +24,12 @@ static inline void *xyarray__entry(struct xyarray *xy, int x, int y) static inline int xyarray__max_y(struct xyarray *xy) { - return xy->max_x; + return xy->max_y; } static inline int xyarray__max_x(struct xyarray *xy) { - return xy->max_y; + return xy->max_x; } #endif /* _PERF_XYARRAY_H_ */ diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 1329d843eb7b..a725b958cf31 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <sys/stat.h> |