diff options
Diffstat (limited to 'tools/perf/util')
96 files changed, 3854 insertions, 874 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 7efe15b9618d..af72be7f5b3b 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -10,6 +10,7 @@ libperf-y += evlist.o libperf-y += evsel.o libperf-y += evsel_fprintf.o libperf-y += find_bit.o +libperf-y += get_current_dir_name.o libperf-y += kallsyms.o libperf-y += levenshtein.o libperf-y += llvm-utils.o @@ -73,8 +74,10 @@ libperf-y += vdso.o libperf-y += counts.o libperf-y += stat.o libperf-y += stat-shadow.o +libperf-y += stat-display.o libperf-y += record.o libperf-y += srcline.o +libperf-y += srccode.o libperf-y += data.o libperf-y += tsc.o libperf-y += cloexec.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 28cd6a17491b..ac9805e0bc76 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -134,14 +134,20 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i return 0; } +#include "arch/arc/annotate/instructions.c" #include "arch/arm/annotate/instructions.c" #include "arch/arm64/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" +#include "arch/sparc/annotate/instructions.c" static struct arch architectures[] = { { + .name = "arc", + .init = arc__annotate_init, + }, + { .name = "arm", .init = arm__annotate_init, }, @@ -170,6 +176,13 @@ static struct arch architectures[] = { .comment_char = '#', }, }, + { + .name = "sparc", + .init = sparc__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, }; static void ins__delete(struct ins_operands *ops) @@ -992,6 +1005,7 @@ static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch) { unsigned n_insn; + unsigned int cover_insn = 0; u64 offset; n_insn = annotation__count_insn(notes, start, end); @@ -1005,21 +1019,34 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 for (offset = start; offset <= end; offset++) { struct annotation_line *al = notes->offsets[offset]; - if (al) + if (al && al->ipc == 0.0) { al->ipc = ipc; + cover_insn++; + } + } + + if (cover_insn) { + notes->hit_cycles += ch->cycles; + notes->hit_insn += n_insn * ch->num; + notes->cover_insn += cover_insn; } } } void annotation__compute_ipc(struct annotation *notes, size_t size) { - u64 offset; + s64 offset; if (!notes->src || !notes->src->cycles_hist) return; + notes->total_insn = annotation__count_insn(notes, 0, size - 1); + notes->hit_cycles = 0; + notes->hit_insn = 0; + notes->cover_insn = 0; + pthread_mutex_lock(¬es->lock); - for (offset = 0; offset < size; ++offset) { + for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; ch = ¬es->src->cycles_hist[offset]; @@ -1750,7 +1777,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) while (!feof(file)) { /* * The source code line number (lineno) needs to be kept in - * accross calls to symbol__parse_objdump_line(), so that it + * across calls to symbol__parse_objdump_line(), so that it * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ @@ -2555,6 +2582,22 @@ call_like: disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset); } +static void ipc_coverage_string(char *bf, int size, struct annotation *notes) +{ + double ipc = 0.0, coverage = 0.0; + + if (notes->hit_cycles) + ipc = notes->hit_insn / ((double)notes->hit_cycles); + + if (notes->total_insn) { + coverage = notes->cover_insn * 100.0 / + ((double)notes->total_insn); + } + + scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)", + ipc, coverage); +} + static void __annotation_line__write(struct annotation_line *al, struct annotation *notes, bool first_line, bool current_entry, bool change_color, int width, void *obj, unsigned int percent_type, @@ -2650,6 +2693,11 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati ANNOTATION__MINMAX_CYCLES_WIDTH - 1, "Cycle(min/max)"); } + + if (show_title && !*al->line) { + ipc_coverage_string(bf, sizeof(bf), notes); + obj__printf(obj, "%*s", ANNOTATION__AVG_IPC_WIDTH, bf); + } } obj__printf(obj, " "); @@ -2755,6 +2803,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev notes->nr_events = nr_pcnt; annotation__update_column_widths(notes); + sym->annotate2 = true; return 0; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 5399ba2321bb..fb6463730ba4 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -64,6 +64,7 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); #define ANNOTATION__IPC_WIDTH 6 #define ANNOTATION__CYCLES_WIDTH 6 #define ANNOTATION__MINMAX_CYCLES_WIDTH 19 +#define ANNOTATION__AVG_IPC_WIDTH 36 struct annotation_options { bool hide_src_code, @@ -262,6 +263,10 @@ struct annotation { pthread_mutex_t lock; u64 max_coverage; u64 start; + u64 hit_cycles; + u64 hit_insn; + unsigned int total_insn; + unsigned int cover_insn; struct annotation_options *options; struct annotation_line **offsets; int nr_events; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index db1511359c5e..f69961c4a4f3 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -906,9 +906,8 @@ out_free: return err; } -int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_auxtrace_info(struct perf_session *session, + union perf_event *event) { enum auxtrace_type type = event->auxtrace_info.type; @@ -932,9 +931,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, } } -s64 perf_event__process_auxtrace(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session) +s64 perf_event__process_auxtrace(struct perf_session *session, + union perf_event *event) { s64 err; @@ -950,7 +948,7 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool, if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE) return -EINVAL; - err = session->auxtrace->process_auxtrace_event(session, event, tool); + err = session->auxtrace->process_auxtrace_event(session, event, session->tool); if (err < 0) return err; @@ -964,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool, #define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64 #define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024 -void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, + bool no_sample) { - synth_opts->instructions = true; synth_opts->branches = true; synth_opts->transactions = true; synth_opts->ptwrites = true; synth_opts->pwr_events = true; synth_opts->errors = true; - synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; - synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + if (no_sample) { + synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + synth_opts->period = 1; + synth_opts->calls = true; + } else { + synth_opts->instructions = true; + synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; + synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + } synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; synth_opts->initial_skip = 0; @@ -1001,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } if (!str) { - itrace_synth_opts__set_default(synth_opts); + itrace_synth_opts__set_default(synth_opts, false); return 0; } @@ -1185,9 +1190,8 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats) } } -int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_auxtrace_error(struct perf_session *session, + union perf_event *event) { if (auxtrace__dont_decode(session)) return 0; @@ -1196,11 +1200,12 @@ int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused, return 0; } -static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, +static int __auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, bool snapshot, size_t snapshot_size) { + struct auxtrace_mmap *mm = &map->auxtrace_mmap; u64 head, old = mm->prev, offset, ref; unsigned char *data = mm->base; size_t size, head_off, old_off, len1, len2, padding; @@ -1287,7 +1292,7 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, ev.auxtrace.tid = mm->tid; ev.auxtrace.cpu = mm->cpu; - if (fn(tool, &ev, data1, len1, data2, len2)) + if (fn(tool, map, &ev, data1, len1, data2, len2)) return -1; mm->prev = head; @@ -1306,18 +1311,18 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, return 1; } -int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr, +int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn) { - return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0); + return __auxtrace_mmap__read(map, itr, tool, fn, false, 0); } -int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm, +int auxtrace_mmap__read_snapshot(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size) { - return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size); + return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size); } /** @@ -1978,17 +1983,14 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso) { - struct symbol *first_sym = dso__first_symbol(dso); - struct symbol *last_sym = dso__last_symbol(dso); - - if (!first_sym || !last_sym) { - pr_err("Failed to determine filter for %s\nNo symbols found.\n", + if (dso__data_file_size(dso, NULL)) { + pr_err("Failed to determine filter for %s\nCannot determine file size.\n", filt->filename); return -EINVAL; } - filt->addr = first_sym->start; - filt->size = last_sym->end - first_sym->start; + filt->addr = 0; + filt->size = dso->data.file_size; return 0; } diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 71fc3bd74299..8e50f96d4b23 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -23,6 +23,7 @@ #include <linux/list.h> #include <linux/perf_event.h> #include <linux/types.h> +#include <asm/bitsperlong.h> #include "../perf.h" #include "event.h" @@ -33,6 +34,7 @@ union perf_event; struct perf_session; struct perf_evlist; struct perf_tool; +struct perf_mmap; struct option; struct record_opts; struct auxtrace_info_event; @@ -56,6 +58,7 @@ enum itrace_period_type { /** * struct itrace_synth_opts - AUX area tracing synthesis options. * @set: indicates whether or not options have been set + * @default_no_sample: Default to no sampling. * @inject: indicates the event (not just the sample) must be fully synthesized * because 'perf inject' will write it out * @instructions: whether to synthesize 'instructions' events @@ -80,6 +83,7 @@ enum itrace_period_type { */ struct itrace_synth_opts { bool set; + bool default_no_sample; bool inject; bool instructions; bool branches; @@ -434,13 +438,14 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, bool per_cpu); typedef int (*process_auxtrace_t)(struct perf_tool *tool, + struct perf_mmap *map, union perf_event *event, void *data1, size_t len1, void *data2, size_t len2); -int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr, +int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn); -int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm, +int auxtrace_mmap__read_snapshot(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size); @@ -517,18 +522,16 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process); -int perf_event__process_auxtrace_info(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); -s64 perf_event__process_auxtrace(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); -int perf_event__process_auxtrace_error(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_auxtrace_info(struct perf_session *session, + union perf_event *event); +s64 perf_event__process_auxtrace(struct perf_session *session, + union perf_event *event); +int perf_event__process_auxtrace_error(struct perf_session *session, + union perf_event *event); int itrace_parse_synth_opts(const struct option *opt, const char *str, int unset); -void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts); +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, + bool no_sample); size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp); void perf_session__auxtrace_error_inc(struct perf_session *session, @@ -577,6 +580,23 @@ static inline void auxtrace__free(struct perf_session *session) return session->auxtrace->free(session); } +#define ITRACE_HELP \ +" i: synthesize instructions events\n" \ +" b: synthesize branches events\n" \ +" c: synthesize branches events (calls only)\n" \ +" r: synthesize branches events (returns only)\n" \ +" x: synthesize transactions events\n" \ +" w: synthesize ptwrite events\n" \ +" p: synthesize power events\n" \ +" e: synthesize error events\n" \ +" d: create a debug log\n" \ +" g[len]: synthesize a call chain (use with i or x)\n" \ +" l[len]: synthesize last branch entries (use with i or x)\n" \ +" sNUMBER: skip initial number of events\n" \ +" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ +" concatenate multiple options. Default is ibxwpe or cewp\n" + + #else static inline struct auxtrace_record * @@ -717,6 +737,8 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct perf_evlist *evlist, int idx, bool per_cpu); +#define ITRACE_HELP "" + #endif #endif diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 47aac41349a2..2f3eb6d293ee 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -99,7 +99,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) if (err) return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); } else - pr_debug("bpf: successfull builtin compilation\n"); + pr_debug("bpf: successful builtin compilation\n"); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) @@ -1603,7 +1603,7 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha op = bpf_map__add_newop(map, NULL); if (IS_ERR(op)) - return ERR_PTR(PTR_ERR(op)); + return ERR_CAST(op); op->op_type = BPF_MAP_OP_SET_EVSEL; op->v.evsel = evsel; } @@ -1615,7 +1615,7 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha int bpf__setup_stdout(struct perf_evlist *evlist) { struct perf_evsel *evsel = bpf__setup_output_event(evlist, "__bpf_stdout__"); - return IS_ERR(evsel) ? PTR_ERR(evsel) : 0; + return PTR_ERR_OR_ZERO(evsel); } #define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 5ac157056cdf..1ea8f898f1a1 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -14,6 +14,7 @@ #include "util.h" #include "cache.h" #include <subcmd/exec-cmd.h> +#include "util/event.h" /* proc_map_timeout */ #include "util/hist.h" /* perf_hist_config */ #include "util/llvm-utils.h" /* perf_llvm_config */ #include "config.h" @@ -419,6 +420,9 @@ static int perf_buildid_config(const char *var, const char *value) static int perf_default_core_config(const char *var __maybe_unused, const char *value __maybe_unused) { + if (!strcmp(var, "core.proc-map-timeout")) + proc_map_timeout = strtoul(value, NULL, 10); + /* Add other config variables here. */ return 0; } @@ -811,14 +815,14 @@ int config_error_nonbool(const char *var) void set_buildid_dir(const char *dir) { if (dir) - scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir); + scnprintf(buildid_dir, MAXPATHLEN, "%s", dir); /* default to $HOME/.debug */ if (buildid_dir[0] == '\0') { char *home = getenv("HOME"); if (home) { - snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s", + snprintf(buildid_dir, MAXPATHLEN, "%s/%s", home, DEBUG_CACHE_DIR); } else { strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 938def6d0bb9..8c155575c6c5 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -36,7 +36,6 @@ struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); - bool trace_on; dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return; @@ -116,6 +115,19 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, return 1; } +static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params, + ocsd_etmv3_cfg *config) +{ + config->reg_idr = params->etmv3.reg_idr; + config->reg_ctrl = params->etmv3.reg_ctrl; + config->reg_ccer = params->etmv3.reg_ccer; + config->reg_trc_id = params->etmv3.reg_trc_id; + config->arch_ver = ARCH_V7; + config->core_prof = profile_CortexA; + + return 0; +} + static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, ocsd_etmv4_cfg *config) { @@ -237,10 +249,19 @@ cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, struct cs_etm_decoder *decoder) { const char *decoder_name; + ocsd_etmv3_cfg config_etmv3; ocsd_etmv4_cfg trace_config_etmv4; void *trace_config; switch (t_params->protocol) { + case CS_ETM_PROTO_ETMV3: + case CS_ETM_PROTO_PTM: + cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); + decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? + OCSD_BUILTIN_DCD_ETMV3 : + OCSD_BUILTIN_DCD_PTM; + trace_config = &config_etmv3; + break; case CS_ETM_PROTO_ETMV4i: cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); decoder_name = OCSD_BUILTIN_DCD_ETMV4I; @@ -263,11 +284,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->tail = 0; decoder->packet_count = 0; for (i = 0; i < MAX_BUFFER; i++) { + decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; + decoder->packet_buffer[i].instr_count = 0; decoder->packet_buffer[i].last_instr_taken_branch = false; - decoder->packet_buffer[i].exc = false; - decoder->packet_buffer[i].exc_ret = false; + decoder->packet_buffer[i].last_instr_size = 0; decoder->packet_buffer[i].cpu = INT_MIN; } } @@ -294,11 +316,13 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_count++; decoder->packet_buffer[et].sample_type = sample_type; - decoder->packet_buffer[et].exc = false; - decoder->packet_buffer[et].exc_ret = false; + decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; decoder->packet_buffer[et].cpu = *((int *)inode->priv); decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; + decoder->packet_buffer[et].instr_count = 0; + decoder->packet_buffer[et].last_instr_taken_branch = false; + decoder->packet_buffer[et].last_instr_size = 0; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; @@ -321,8 +345,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, packet = &decoder->packet_buffer[decoder->tail]; + switch (elem->isa) { + case ocsd_isa_aarch64: + packet->isa = CS_ETM_ISA_A64; + break; + case ocsd_isa_arm: + packet->isa = CS_ETM_ISA_A32; + break; + case ocsd_isa_thumb2: + packet->isa = CS_ETM_ISA_T32; + break; + case ocsd_isa_tee: + case ocsd_isa_jazelle: + case ocsd_isa_custom: + case ocsd_isa_unknown: + default: + packet->isa = CS_ETM_ISA_UNKNOWN; + } + packet->start_addr = elem->st_addr; packet->end_addr = elem->en_addr; + packet->instr_count = elem->num_instr_range; + switch (elem->last_i_type) { case OCSD_INSTR_BR: case OCSD_INSTR_BR_INDIRECT: @@ -336,15 +380,33 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, break; } + packet->last_instr_size = elem->last_instr_sz; + return ret; } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder, - const uint8_t trace_chan_id) +cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder, + const uint8_t trace_chan_id) { return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, - CS_ETM_TRACE_ON); + CS_ETM_DISCONTINUITY); +} + +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, + const uint8_t trace_chan_id) +{ + return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_EXCEPTION); +} + +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_exception_ret(struct cs_etm_decoder *decoder, + const uint8_t trace_chan_id) +{ + return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_EXCEPTION_RET); } static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( @@ -359,26 +421,25 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( switch (elem->elem_type) { case OCSD_GEN_TRC_ELEM_UNKNOWN: break; + case OCSD_GEN_TRC_ELEM_EO_TRACE: case OCSD_GEN_TRC_ELEM_NO_SYNC: - decoder->trace_on = false; - break; case OCSD_GEN_TRC_ELEM_TRACE_ON: - resp = cs_etm_decoder__buffer_trace_on(decoder, - trace_chan_id); - decoder->trace_on = true; + resp = cs_etm_decoder__buffer_discontinuity(decoder, + trace_chan_id); break; case OCSD_GEN_TRC_ELEM_INSTR_RANGE: resp = cs_etm_decoder__buffer_range(decoder, elem, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: - decoder->packet_buffer[decoder->tail].exc = true; + resp = cs_etm_decoder__buffer_exception(decoder, + trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: - decoder->packet_buffer[decoder->tail].exc_ret = true; + resp = cs_etm_decoder__buffer_exception_ret(decoder, + trace_chan_id); break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: - case OCSD_GEN_TRC_ELEM_EO_TRACE: case OCSD_GEN_TRC_ELEM_ADDR_NACC: case OCSD_GEN_TRC_ELEM_TIMESTAMP: case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: @@ -398,11 +459,20 @@ static int cs_etm_decoder__create_etm_packet_decoder( struct cs_etm_decoder *decoder) { const char *decoder_name; + ocsd_etmv3_cfg config_etmv3; ocsd_etmv4_cfg trace_config_etmv4; void *trace_config; u8 csid; switch (t_params->protocol) { + case CS_ETM_PROTO_ETMV3: + case CS_ETM_PROTO_PTM: + cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); + decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? + OCSD_BUILTIN_DCD_ETMV3 : + OCSD_BUILTIN_DCD_PTM; + trace_config = &config_etmv3; + break; case CS_ETM_PROTO_ETMV4i: cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); decoder_name = OCSD_BUILTIN_DCD_ETMV4I; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 612b5755f742..a6407d41598f 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -23,18 +23,28 @@ struct cs_etm_buffer { }; enum cs_etm_sample_type { - CS_ETM_EMPTY = 0, - CS_ETM_RANGE = 1 << 0, - CS_ETM_TRACE_ON = 1 << 1, + CS_ETM_EMPTY, + CS_ETM_RANGE, + CS_ETM_DISCONTINUITY, + CS_ETM_EXCEPTION, + CS_ETM_EXCEPTION_RET, +}; + +enum cs_etm_isa { + CS_ETM_ISA_UNKNOWN, + CS_ETM_ISA_A64, + CS_ETM_ISA_A32, + CS_ETM_ISA_T32, }; struct cs_etm_packet { enum cs_etm_sample_type sample_type; + enum cs_etm_isa isa; u64 start_addr; u64 end_addr; + u32 instr_count; u8 last_instr_taken_branch; - u8 exc; - u8 exc_ret; + u8 last_instr_size; int cpu; }; @@ -43,6 +53,13 @@ struct cs_etm_queue; typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64, size_t, u8 *); +struct cs_etmv3_trace_params { + u32 reg_ctrl; + u32 reg_trc_id; + u32 reg_ccer; + u32 reg_idr; +}; + struct cs_etmv4_trace_params { u32 reg_idr0; u32 reg_idr1; @@ -55,6 +72,7 @@ struct cs_etmv4_trace_params { struct cs_etm_trace_params { int protocol; union { + struct cs_etmv3_trace_params etmv3; struct cs_etmv4_trace_params etmv4; }; }; @@ -78,6 +96,7 @@ enum { CS_ETM_PROTO_ETMV3 = 1, CS_ETM_PROTO_ETMV4i, CS_ETM_PROTO_ETMV4d, + CS_ETM_PROTO_PTM, }; enum { diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2ae640257fdb..27a374ddf661 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -31,14 +31,6 @@ #define MAX_TIMESTAMP (~0ULL) -/* - * A64 instructions are always 4 bytes - * - * Only A64 is supported, so can use this constant for converting between - * addresses and instruction counts, calculting offsets etc - */ -#define A64_INSTR_SIZE 4 - struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -91,6 +83,19 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid, u64 time_); +/* PTMs ETMIDR [11:8] set to b0011 */ +#define ETMIDR_PTM_VERSION 0x00000300 + +static u32 cs_etm__get_v7_protocol_version(u32 etmidr) +{ + etmidr &= ETMIDR_PTM_VERSION; + + if (etmidr == ETMIDR_PTM_VERSION) + return CS_ETM_PROTO_PTM; + + return CS_ETM_PROTO_ETMV3; +} + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -122,15 +127,31 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, /* Use metadata to fill in trace parameters for trace decoder */ t_params = zalloc(sizeof(*t_params) * etm->num_cpu); for (i = 0; i < etm->num_cpu; i++) { - t_params[i].protocol = CS_ETM_PROTO_ETMV4i; - t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; - t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; - t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; - t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; - t_params[i].etmv4.reg_configr = + if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { + u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; + + t_params[i].protocol = + cs_etm__get_v7_protocol_version(etmidr); + t_params[i].etmv3.reg_ctrl = + etm->metadata[i][CS_ETM_ETMCR]; + t_params[i].etmv3.reg_trc_id = + etm->metadata[i][CS_ETM_ETMTRACEIDR]; + } else if (etm->metadata[i][CS_ETM_MAGIC] == + __perf_cs_etmv4_magic) { + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + t_params[i].etmv4.reg_idr0 = + etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].etmv4.reg_idr1 = + etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].etmv4.reg_idr2 = + etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].etmv4.reg_idr8 = + etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].etmv4.reg_configr = etm->metadata[i][CS_ETMV4_TRCCONFIGR]; - t_params[i].etmv4.reg_traceidr = + t_params[i].etmv4.reg_traceidr = etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + } } /* Set decoder parameters to simply print the trace packets */ @@ -244,6 +265,27 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) +{ + struct machine *machine; + + machine = etmq->etm->machine; + + if (address >= etmq->etm->kernel_start) { + if (machine__is_host(machine)) + return PERF_RECORD_MISC_KERNEL; + else + return PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (machine__is_host(machine)) + return PERF_RECORD_MISC_USER; + else if (perf_guest) + return PERF_RECORD_MISC_GUEST_USER; + else + return PERF_RECORD_MISC_HYPERVISOR; + } +} + static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, size_t size, u8 *buffer) { @@ -258,10 +300,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, return -1; machine = etmq->etm->machine; - if (address >= etmq->etm->kernel_start) - cpumode = PERF_RECORD_MISC_KERNEL; - else - cpumode = PERF_RECORD_MISC_USER; + cpumode = cs_etm__cpu_mode(etmq, address); thread = etmq->thread; if (!thread) { @@ -342,15 +381,31 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, goto out_free; for (i = 0; i < etm->num_cpu; i++) { - t_params[i].protocol = CS_ETM_PROTO_ETMV4i; - t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; - t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; - t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; - t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; - t_params[i].etmv4.reg_configr = + if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { + u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; + + t_params[i].protocol = + cs_etm__get_v7_protocol_version(etmidr); + t_params[i].etmv3.reg_ctrl = + etm->metadata[i][CS_ETM_ETMCR]; + t_params[i].etmv3.reg_trc_id = + etm->metadata[i][CS_ETM_ETMTRACEIDR]; + } else if (etm->metadata[i][CS_ETM_MAGIC] == + __perf_cs_etmv4_magic) { + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + t_params[i].etmv4.reg_idr0 = + etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].etmv4.reg_idr1 = + etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].etmv4.reg_idr2 = + etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].etmv4.reg_idr8 = + etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].etmv4.reg_configr = etm->metadata[i][CS_ETMV4_TRCCONFIGR]; - t_params[i].etmv4.reg_traceidr = + t_params[i].etmv4.reg_traceidr = etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + } } /* Set decoder parameters to simply print the trace packets */ @@ -492,53 +547,54 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) etmq->last_branch_rb->nr = 0; } -static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) -{ - /* Returns 0 for the CS_ETM_TRACE_ON packet */ - if (packet->sample_type == CS_ETM_TRACE_ON) - return 0; +static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, + u64 addr) { + u8 instrBytes[2]; + cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes); /* - * The packet records the execution range with an exclusive end address - * - * A64 instructions are constant size, so the last executed - * instruction is A64_INSTR_SIZE before the end address - * Will need to do instruction level decode for T32 instructions as - * they can be variable size (not yet supported). + * T32 instruction size is indicated by bits[15:11] of the first + * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 + * denote a 32-bit instruction. */ - return packet->end_addr - A64_INSTR_SIZE; + return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; } static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) { - /* Returns 0 for the CS_ETM_TRACE_ON packet */ - if (packet->sample_type == CS_ETM_TRACE_ON) + /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ + if (packet->sample_type == CS_ETM_DISCONTINUITY) return 0; return packet->start_addr; } -static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet) +static inline +u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) { - /* - * Only A64 instructions are currently supported, so can get - * instruction count by dividing. - * Will need to do instruction level decode for T32 instructions as - * they can be variable size (not yet supported). - */ - return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE; + /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ + if (packet->sample_type == CS_ETM_DISCONTINUITY) + return 0; + + return packet->end_addr - packet->last_instr_size; } -static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet, +static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, + const struct cs_etm_packet *packet, u64 offset) { - /* - * Only A64 instructions are currently supported, so can get - * instruction address by muliplying. - * Will need to do instruction level decode for T32 instructions as - * they can be variable size (not yet supported). - */ - return packet->start_addr + offset * A64_INSTR_SIZE; + if (packet->isa == CS_ETM_ISA_T32) { + u64 addr = packet->start_addr; + + while (offset > 0) { + addr += cs_etm__t32_instr_size(etmq, addr); + offset--; + } + return addr; + } + + /* Assume a 4 byte instruction size (A32/A64) */ + return packet->start_addr + offset * 4; } static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) @@ -653,7 +709,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct perf_sample sample = {.ip = 0,}; event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); sample.ip = addr; @@ -665,7 +721,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.cpu = etmq->packet->cpu; sample.flags = 0; sample.insn_len = 1; - sample.cpumode = event->header.misc; + sample.cpumode = event->sample.header.misc; if (etm->synth_opts.last_branch) { cs_etm__copy_last_branch_rb(etmq); @@ -706,12 +762,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) u64 nr; struct branch_entry entries; } dummy_bs; + u64 ip; + + ip = cs_etm__last_executed_instr(etmq->prev_packet); event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); - sample.ip = cs_etm__last_executed_instr(etmq->prev_packet); + sample.ip = ip; sample.pid = etmq->pid; sample.tid = etmq->tid; sample.addr = cs_etm__first_executed_instr(etmq->packet); @@ -720,7 +779,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) sample.period = 1; sample.cpu = etmq->packet->cpu; sample.flags = 0; - sample.cpumode = PERF_RECORD_MISC_USER; + sample.cpumode = event->sample.header.misc; /* * perf report cannot handle events without a branch stack @@ -867,9 +926,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; int ret; - u64 instrs_executed; + u64 instrs_executed = etmq->packet->instr_count; - instrs_executed = cs_etm__instr_count(etmq->packet); etmq->period_instructions += instrs_executed; /* @@ -899,7 +957,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) * executed, but PC has not advanced to next instruction) */ u64 offset = (instrs_executed - instrs_over - 1); - u64 addr = cs_etm__instr_addr(etmq->packet, offset); + u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset); ret = cs_etm__synth_instruction_sample( etmq, addr, etm->instructions_sample_period); @@ -914,7 +972,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) bool generate_sample = false; /* Generate sample for tracing on packet */ - if (etmq->prev_packet->sample_type == CS_ETM_TRACE_ON) + if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) generate_sample = true; /* Generate sample for branch taken packet */ @@ -942,6 +1000,25 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) return 0; } +static int cs_etm__exception(struct cs_etm_queue *etmq) +{ + /* + * When the exception packet is inserted, whether the last instruction + * in previous range packet is taken branch or not, we need to force + * to set 'prev_packet->last_instr_taken_branch' to true. This ensures + * to generate branch sample for the instruction range before the + * exception is trapped to kernel or before the exception returning. + * + * The exception packet includes the dummy address values, so don't + * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful + * for generating instruction and branch samples. + */ + if (etmq->prev_packet->sample_type == CS_ETM_RANGE) + etmq->prev_packet->last_instr_taken_branch = true; + + return 0; +} + static int cs_etm__flush(struct cs_etm_queue *etmq) { int err = 0; @@ -984,7 +1061,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq) } swap_packet: - if (etmq->etm->synth_opts.last_branch) { + if (etm->sample_branches || etm->synth_opts.last_branch) { /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. @@ -997,6 +1074,39 @@ swap_packet: return err; } +static int cs_etm__end_block(struct cs_etm_queue *etmq) +{ + int err; + + /* + * It has no new packet coming and 'etmq->packet' contains the stale + * packet which was set at the previous time with packets swapping; + * so skip to generate branch sample to avoid stale packet. + * + * For this case only flush branch stack and generate a last branch + * event for the branches left in the circular buffer at the end of + * the trace. + */ + if (etmq->etm->synth_opts.last_branch && + etmq->prev_packet->sample_type == CS_ETM_RANGE) { + /* + * Use the address of the end of the last reported execution + * range. + */ + u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); + + err = cs_etm__synth_instruction_sample( + etmq, addr, + etmq->period_instructions); + if (err) + return err; + + etmq->period_instructions = 0; + } + + return 0; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { struct cs_etm_auxtrace *etm = etmq->etm; @@ -1057,7 +1167,16 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) */ cs_etm__sample(etmq); break; - case CS_ETM_TRACE_ON: + case CS_ETM_EXCEPTION: + case CS_ETM_EXCEPTION_RET: + /* + * If the exception packet is coming, + * make sure the previous instruction + * range packet to be handled properly. + */ + cs_etm__exception(etmq); + break; + case CS_ETM_DISCONTINUITY: /* * Discontinuity in trace, flush * previous branch stack @@ -1079,7 +1198,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) if (err == 0) /* Flush any remaining branch stack entries */ - err = cs_etm__flush(etmq); + err = cs_etm__end_block(etmq); } return err; @@ -1432,7 +1551,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { etm->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&etm->synth_opts); + itrace_synth_opts__set_default(&etm->synth_opts, + session->itrace_synth_opts->default_no_sample); etm->synth_opts.callchain = false; } diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index abd38abf1d91..2a36fab76994 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -182,20 +182,20 @@ err_put_field: } static struct bt_ctf_field_type* -get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field) +get_tracepoint_field_type(struct ctf_writer *cw, struct tep_format_field *field) { unsigned long flags = field->flags; - if (flags & FIELD_IS_STRING) + if (flags & TEP_FIELD_IS_STRING) return cw->data.string; - if (!(flags & FIELD_IS_SIGNED)) { + if (!(flags & TEP_FIELD_IS_SIGNED)) { /* unsigned long are mostly pointers */ - if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER) + if (flags & TEP_FIELD_IS_LONG || flags & TEP_FIELD_IS_POINTER) return cw->data.u64_hex; } - if (flags & FIELD_IS_SIGNED) { + if (flags & TEP_FIELD_IS_SIGNED) { if (field->size == 8) return cw->data.s64; else @@ -287,7 +287,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, struct perf_sample *sample, - struct format_field *fmtf) + struct tep_format_field *fmtf) { struct bt_ctf_field_type *type; struct bt_ctf_field *array_field; @@ -304,10 +304,10 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, name = fmtf->alias; offset = fmtf->offset; len = fmtf->size; - if (flags & FIELD_IS_STRING) - flags &= ~FIELD_IS_ARRAY; + if (flags & TEP_FIELD_IS_STRING) + flags &= ~TEP_FIELD_IS_ARRAY; - if (flags & FIELD_IS_DYNAMIC) { + if (flags & TEP_FIELD_IS_DYNAMIC) { unsigned long long tmp_val; tmp_val = tep_read_number(fmtf->event->pevent, @@ -317,7 +317,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, offset &= 0xffff; } - if (flags & FIELD_IS_ARRAY) { + if (flags & TEP_FIELD_IS_ARRAY) { type = bt_ctf_event_class_get_field_by_name( event_class, name); @@ -338,7 +338,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, type = get_tracepoint_field_type(cw, fmtf); for (i = 0; i < n_items; i++) { - if (flags & FIELD_IS_ARRAY) + if (flags & TEP_FIELD_IS_ARRAY) field = bt_ctf_field_array_get_field(array_field, i); else field = bt_ctf_field_create(type); @@ -348,7 +348,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, return -1; } - if (flags & FIELD_IS_STRING) + if (flags & TEP_FIELD_IS_STRING) ret = string_set_value(field, data + offset + i * len); else { unsigned long long value_int; @@ -357,7 +357,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, fmtf->event->pevent, data + offset + i * len, len); - if (!(flags & FIELD_IS_SIGNED)) + if (!(flags & TEP_FIELD_IS_SIGNED)) ret = bt_ctf_field_unsigned_integer_set_value( field, value_int); else @@ -369,7 +369,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, pr_err("failed to set file value %s\n", name); goto err_put_field; } - if (!(flags & FIELD_IS_ARRAY)) { + if (!(flags & TEP_FIELD_IS_ARRAY)) { ret = bt_ctf_event_set_payload(event, name, field); if (ret) { pr_err("failed to set payload %s\n", name); @@ -378,7 +378,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, } bt_ctf_field_put(field); } - if (flags & FIELD_IS_ARRAY) { + if (flags & TEP_FIELD_IS_ARRAY) { ret = bt_ctf_event_set_payload(event, name, array_field); if (ret) { pr_err("Failed add payload array %s\n", name); @@ -396,10 +396,10 @@ err_put_field: static int add_tracepoint_fields_values(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, - struct format_field *fields, + struct tep_format_field *fields, struct perf_sample *sample) { - struct format_field *field; + struct tep_format_field *field; int ret; for (field = fields; field; field = field->next) { @@ -417,8 +417,8 @@ static int add_tracepoint_values(struct ctf_writer *cw, struct perf_evsel *evsel, struct perf_sample *sample) { - struct format_field *common_fields = evsel->tp_format->format.common_fields; - struct format_field *fields = evsel->tp_format->format.fields; + struct tep_format_field *common_fields = evsel->tp_format->format.common_fields; + struct tep_format_field *fields = evsel->tp_format->format.fields; int ret; ret = add_tracepoint_fields_values(cw, event_class, event, @@ -970,7 +970,7 @@ out: static int event_class_add_field(struct bt_ctf_event_class *event_class, struct bt_ctf_field_type *type, - struct format_field *field) + struct tep_format_field *field) { struct bt_ctf_field_type *t = NULL; char *name; @@ -1009,10 +1009,10 @@ static int event_class_add_field(struct bt_ctf_event_class *event_class, } static int add_tracepoint_fields_types(struct ctf_writer *cw, - struct format_field *fields, + struct tep_format_field *fields, struct bt_ctf_event_class *event_class) { - struct format_field *field; + struct tep_format_field *field; int ret; for (field = fields; field; field = field->next) { @@ -1030,15 +1030,15 @@ static int add_tracepoint_fields_types(struct ctf_writer *cw, * type and don't care that it is an array. What we don't * support is an array of strings. */ - if (flags & FIELD_IS_STRING) - flags &= ~FIELD_IS_ARRAY; + if (flags & TEP_FIELD_IS_STRING) + flags &= ~TEP_FIELD_IS_ARRAY; - if (flags & FIELD_IS_ARRAY) + if (flags & TEP_FIELD_IS_ARRAY) type = bt_ctf_field_type_array_create(type, field->arraylen); ret = event_class_add_field(event_class, type, field); - if (flags & FIELD_IS_ARRAY) + if (flags & TEP_FIELD_IS_ARRAY) bt_ctf_field_type_put(type); if (ret) { @@ -1055,8 +1055,8 @@ static int add_tracepoint_types(struct ctf_writer *cw, struct perf_evsel *evsel, struct bt_ctf_event_class *class) { - struct format_field *common_fields = evsel->tp_format->format.common_fields; - struct format_field *fields = evsel->tp_format->format.fields; + struct tep_format_field *common_fields = evsel->tp_format->format.common_fields; + struct tep_format_field *fields = evsel->tp_format->format.fields; int ret; ret = add_tracepoint_fields_types(cw, common_fields, class); @@ -1578,7 +1578,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, { struct perf_session *session; struct perf_data data = { - .file.path = input, + .file = { .path = input, .fd = -1 }, .mode = PERF_DATA_MODE_READ, .force = opts->force, }; diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 7123746edcf4..69fbb0a72d0c 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -463,6 +463,28 @@ int db_export__branch_types(struct db_export *dbe) if (err) break; } + + /* Add trace begin / end variants */ + for (i = 0; branch_types[i].name ; i++) { + const char *name = branch_types[i].name; + u32 type = branch_types[i].branch_type; + char buf[64]; + + if (type == PERF_IP_FLAG_BRANCH || + (type & (PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END))) + continue; + + snprintf(buf, sizeof(buf), "trace begin / %s", name); + err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_BEGIN, buf); + if (err) + break; + + snprintf(buf, sizeof(buf), "%s / trace end", name); + err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_END, buf); + if (err) + break; + } + return err; } diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index bbed90e5d9bb..62c8cf622607 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -295,7 +295,7 @@ static int decompress_kmodule(struct dso *dso, const char *name, unlink(tmpbuf); if (pathname && (fd >= 0)) - strncpy(pathname, tmpbuf, len); + strlcpy(pathname, tmpbuf, len); return fd; } @@ -894,7 +894,7 @@ static ssize_t cached_read(struct dso *dso, struct machine *machine, return r; } -static int data_file_size(struct dso *dso, struct machine *machine) +int dso__data_file_size(struct dso *dso, struct machine *machine) { int ret = 0; struct stat st; @@ -943,7 +943,7 @@ out: */ off_t dso__data_size(struct dso *dso, struct machine *machine) { - if (data_file_size(dso, machine)) + if (dso__data_file_size(dso, machine)) return -1; /* For now just estimate dso data size is close to file size */ @@ -953,7 +953,7 @@ off_t dso__data_size(struct dso *dso, struct machine *machine) static ssize_t data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size) { - if (data_file_size(dso, machine)) + if (dso__data_file_size(dso, machine)) return -1; /* Check the offset sanity. */ diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index c5380500bed4..8c8a7abe809d 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -322,6 +322,7 @@ int dso__data_get_fd(struct dso *dso, struct machine *machine); void dso__data_put_fd(struct dso *dso); void dso__data_close(struct dso *dso); +int dso__data_file_size(struct dso *dso, struct machine *machine); off_t dso__data_size(struct dso *dso, struct machine *machine); ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size); diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c index 10988d3de7ce..2bd8585db93c 100644 --- a/tools/perf/util/dump-insn.c +++ b/tools/perf/util/dump-insn.c @@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused, *lenp = 0; return "?"; } + +__weak +int arch_is_branch(const unsigned char *buf __maybe_unused, + size_t len __maybe_unused, + int x86_64 __maybe_unused) +{ + return 0; +} diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h index 0e06280a8860..650125061530 100644 --- a/tools/perf/util/dump-insn.h +++ b/tools/perf/util/dump-insn.h @@ -20,4 +20,6 @@ struct perf_insn { const char *dump_insn(struct perf_insn *x, u64 ip, u8 *inbuf, int inlen, int *lenp); +int arch_is_branch(const unsigned char *buf, size_t len, int x86_64); + #endif diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 59f38c7693f8..4c23779e271a 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -166,7 +166,7 @@ const char *perf_env__arch(struct perf_env *env) struct utsname uts; char *arch_name; - if (!env) { /* Assume local operation */ + if (!env || !env->arch) { /* Assume local operation */ if (uname(&uts) < 0) return NULL; arch_name = uts.machine; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1f3ccc368530..d01b8355f4ca 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -63,6 +63,7 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; + u64 clockid_res_ns; }; extern struct perf_env perf_env; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bc646185f8d9..937a5a4f71cc 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -25,6 +25,8 @@ #include "asm/bug.h" #include "stat.h" +#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 + static const char *perf_event__names[] = { [0] = "TOTAL", [PERF_RECORD_MMAP] = "MMAP", @@ -72,6 +74,8 @@ static const char *perf_ns__names[] = { [CGROUP_NS_INDEX] = "cgroup", }; +unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT; + const char *perf_event__name(unsigned int id) { if (id >= ARRAY_SIZE(perf_event__names)) @@ -308,6 +312,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, event->fork.pid = tgid; event->fork.tid = pid; event->fork.header.type = PERF_RECORD_FORK; + event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); @@ -322,8 +327,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, - bool mmap_data, - unsigned int proc_map_timeout) + bool mmap_data) { char filename[PATH_MAX]; FILE *fp; @@ -520,8 +524,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, perf_event__handler_t process, struct perf_tool *tool, struct machine *machine, - bool mmap_data, - unsigned int proc_map_timeout) + bool mmap_data) { char filename[PATH_MAX]; DIR *tasks; @@ -547,8 +550,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, */ if (pid == tgid && perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data, - proc_map_timeout)) + process, machine, mmap_data)) return -1; return 0; @@ -597,7 +599,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (_pid == pid) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data, proc_map_timeout); + process, machine, mmap_data); if (rc) break; } @@ -611,8 +613,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, struct thread_map *threads, perf_event__handler_t process, struct machine *machine, - bool mmap_data, - unsigned int proc_map_timeout) + bool mmap_data) { union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *namespaces_event; @@ -642,7 +643,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, thread_map__pid(threads, thread), 0, process, tool, machine, - mmap_data, proc_map_timeout)) { + mmap_data)) { err = -1; break; } @@ -668,7 +669,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, comm_event->comm.pid, 0, process, tool, machine, - mmap_data, proc_map_timeout)) { + mmap_data)) { err = -1; break; } @@ -689,7 +690,6 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, - unsigned int proc_map_timeout, struct dirent **dirent, int start, int num) @@ -733,8 +733,7 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool, */ __event__synthesize_thread(comm_event, mmap_event, fork_event, namespaces_event, pid, 1, process, - tool, machine, mmap_data, - proc_map_timeout); + tool, machine, mmap_data); } err = 0; @@ -754,7 +753,6 @@ struct synthesize_threads_arg { perf_event__handler_t process; struct machine *machine; bool mmap_data; - unsigned int proc_map_timeout; struct dirent **dirent; int num; int start; @@ -766,7 +764,7 @@ static void *synthesize_threads_worker(void *arg) __perf_event__synthesize_threads(args->tool, args->process, args->machine, args->mmap_data, - args->proc_map_timeout, args->dirent, + args->dirent, args->start, args->num); return NULL; } @@ -775,7 +773,6 @@ int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, - unsigned int proc_map_timeout, unsigned int nr_threads_synthesize) { struct synthesize_threads_arg *args = NULL; @@ -805,7 +802,6 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (thread_nr <= 1) { err = __perf_event__synthesize_threads(tool, process, machine, mmap_data, - proc_map_timeout, dirent, base, n); goto free_dirent; } @@ -827,7 +823,6 @@ int perf_event__synthesize_threads(struct perf_tool *tool, args[i].process = process; args[i].machine = machine; args[i].mmap_data = mmap_data; - args[i].proc_map_timeout = proc_map_timeout; args[i].dirent = dirent; } for (i = 0; i < m; i++) { @@ -1576,6 +1571,24 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return al->map; } +/* + * For branch stacks or branch samples, the sample cpumode might not be correct + * because it applies only to the sample 'ip' and not necessary to 'addr' or + * branch stack addresses. If possible, use a fallback to deal with those cases. + */ +struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) +{ + struct map *map = thread__find_map(thread, cpumode, addr, al); + struct machine *machine = thread->mg->machine; + u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr); + + if (map || addr_cpumode == cpumode) + return map; + + return thread__find_map(thread, addr_cpumode, addr, al); +} + struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { @@ -1585,6 +1598,15 @@ struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, return al->sym; } +struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, + u64 addr, struct addr_location *al) +{ + al->sym = NULL; + if (thread__find_map_fb(thread, cpumode, addr, al)) + al->sym = map__find_symbol(al->map, al->addr); + return al->sym; +} + /* * Callers need to drop the reference to al->thread, obtained in * machine__findnew_thread() @@ -1678,7 +1700,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) void thread__resolve(struct thread *thread, struct addr_location *al, struct perf_sample *sample) { - thread__find_map(thread, sample->cpumode, sample->addr, al); + thread__find_map_fb(thread, sample->cpumode, sample->addr, al); al->cpu = sample->cpu; al->sym = NULL; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index bfa60bcafbde..eb95f3384958 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -669,8 +669,7 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool, int perf_event__synthesize_thread_map(struct perf_tool *tool, struct thread_map *threads, perf_event__handler_t process, - struct machine *machine, bool mmap_data, - unsigned int proc_map_timeout); + struct machine *machine, bool mmap_data); int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct thread_map *threads, perf_event__handler_t process, @@ -682,7 +681,6 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool, int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, - unsigned int proc_map_timeout, unsigned int nr_threads_synthesize); int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, @@ -797,8 +795,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, - bool mmap_data, - unsigned int proc_map_timeout); + bool mmap_data); int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, @@ -829,5 +826,6 @@ int perf_event_paranoid(void); extern int sysctl_perf_event_max_stack; extern int sysctl_perf_event_max_contexts_per_stack; +extern unsigned int proc_map_timeout; #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index be440df29615..8c902276d4b4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -34,6 +34,10 @@ #include <linux/log2.h> #include <linux/err.h> +#ifdef LACKS_SIGQUEUE_PROTOTYPE +int sigqueue(pid_t pid, int sig, const union sigval value); +#endif + #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) @@ -358,7 +362,7 @@ void perf_evlist__disable(struct perf_evlist *evlist) struct perf_evsel *pos; evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->fd) + if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) continue; perf_evsel__disable(pos); } @@ -1018,7 +1022,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite) + bool auxtrace_overwrite, int nr_cblocks) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; @@ -1028,7 +1032,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp; + struct mmap_params mp = { .nr_cblocks = nr_cblocks }; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); @@ -1060,7 +1064,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, 0, false); + return perf_evlist__mmap_ex(evlist, pages, 0, false, 0); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) @@ -1176,7 +1180,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e return err; } -int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) +int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter) { struct perf_evsel *evsel; int err = 0; @@ -1193,7 +1197,7 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) return err; } -int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) +int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) { char *filter; int ret = -1; @@ -1214,15 +1218,15 @@ int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t } } - ret = perf_evlist__set_filter(evlist, filter); + ret = perf_evlist__set_tp_filter(evlist, filter); out_free: free(filter); return ret; } -int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) +int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid) { - return perf_evlist__set_filter_pids(evlist, 1, &pid); + return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); } bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) @@ -1810,3 +1814,30 @@ void perf_evlist__force_leader(struct perf_evlist *evlist) leader->forced_leader = true; } } + +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, + struct perf_evsel *evsel) +{ + struct perf_evsel *c2, *leader; + bool is_open = true; + + leader = evsel->leader; + pr_debug("Weak group for %s/%d failed\n", + leader->name, leader->nr_members); + + /* + * for_each_group_member doesn't work here because it doesn't + * include the first entry. + */ + evlist__for_each_entry(evsel_list, c2) { + if (c2 == evsel) + is_open = false; + if (c2->leader == leader) { + if (is_open) + perf_evsel__close(c2); + c2->leader = c2; + c2->nr_members = 0; + } + } + return leader; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index dc66436add98..868294491194 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -98,9 +98,9 @@ void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, #define perf_evlist__reset_sample_bit(evlist, bit) \ __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit) -int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); -int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid); -int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids); +int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter); +int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid); +int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids); struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); @@ -162,7 +162,7 @@ unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite); + bool auxtrace_overwrite, int nr_cblocks); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); @@ -312,4 +312,7 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); void perf_evlist__force_leader(struct perf_evlist *evlist); +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, + struct perf_evsel *evsel); + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e596ae358c4d..dbc0466db368 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->leader = evsel; evsel->unit = ""; evsel->scale = 1.0; + evsel->max_events = ULONG_MAX; evsel->evlist = NULL; evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->node); @@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_MAX_STACK: max_stack = term->val.max_stack; break; + case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS: + evsel->max_events = term->val.max_events; + break; case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by @@ -952,7 +956,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->sample_freq = 0; attr->sample_period = 0; attr->write_backward = 0; - attr->sample_id_all = 0; } if (opts->no_samples) @@ -1089,7 +1092,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } - if (evsel->own_cpus) + if (evsel->own_cpus || evsel->unit) evsel->attr.read_format |= PERF_FORMAT_ID; /* @@ -1203,16 +1206,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter) int perf_evsel__enable(struct perf_evsel *evsel) { - return perf_evsel__run_ioctl(evsel, - PERF_EVENT_IOC_ENABLE, - 0); + int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0); + + if (!err) + evsel->disabled = false; + + return err; } int perf_evsel__disable(struct perf_evsel *evsel) { - return perf_evsel__run_ioctl(evsel, - PERF_EVENT_IOC_DISABLE, - 0); + int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0); + /* + * We mark it disabled here so that tools that disable a event can + * ignore events after they disable it. I.e. the ring buffer may have + * already a few more events queued up before the kernel got the stop + * request. + */ + if (!err) + evsel->disabled = true; + + return err; } int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -2685,7 +2699,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, return 0; } -struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name) +struct tep_format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name) { return tep_find_field(evsel->tp_format, name); } @@ -2693,7 +2707,7 @@ struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *nam void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { - struct format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = perf_evsel__field(evsel, name); int offset; if (!field) @@ -2701,7 +2715,7 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, offset = field->offset; - if (field->flags & FIELD_IS_DYNAMIC) { + if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; } @@ -2709,7 +2723,7 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, return sample->raw_data + offset; } -u64 format_field__intval(struct format_field *field, struct perf_sample *sample, +u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap) { u64 value; @@ -2751,7 +2765,7 @@ u64 format_field__intval(struct format_field *field, struct perf_sample *sample, u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { - struct format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = perf_evsel__field(evsel, name); if (!field) return 0; @@ -2943,3 +2957,32 @@ struct perf_env *perf_evsel__env(struct perf_evsel *evsel) return evsel->evlist->env; return NULL; } + +static int store_evsel_ids(struct perf_evsel *evsel, struct perf_evlist *evlist) +{ + int cpu, thread; + + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (thread = 0; thread < xyarray__max_y(evsel->fd); + thread++) { + int fd = FD(evsel, cpu, thread); + + if (perf_evlist__id_add_fd(evlist, evsel, + cpu, thread, fd) < 0) + return -1; + } + } + + return 0; +} + +int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist) +{ + struct cpu_map *cpus = evsel->cpus; + struct thread_map *threads = evsel->threads; + + if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr)) + return -ENOMEM; + + return store_evsel_ids(evsel, evlist); +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 163c960614d3..82a289ce8b0c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -46,6 +46,7 @@ enum term_type { PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, PERF_EVSEL__CONFIG_TERM_MAX_STACK, + PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, @@ -65,6 +66,7 @@ struct perf_evsel_config_term { bool inherit; bool overwrite; char *branch; + unsigned long max_events; } val; bool weak; }; @@ -99,10 +101,12 @@ struct perf_evsel { struct perf_counts *prev_raw_counts; int idx; u32 ids; + unsigned long max_events; + unsigned long nr_events_printed; char *name; double scale; const char *unit; - struct event_format *tp_format; + struct tep_event *tp_format; off_t id_offset; struct perf_stat_evsel *stats; void *priv; @@ -119,6 +123,7 @@ struct perf_evsel { bool snapshot; bool supported; bool needs_swap; + bool disabled; bool no_aux_samples; bool immediate; bool system_wide; @@ -211,7 +216,7 @@ static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char * struct perf_evsel *perf_evsel__new_cycles(bool precise); -struct event_format *event_format__new(const char *sys, const char *name); +struct tep_event *event_format__new(const char *sys, const char *name); void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx); @@ -296,11 +301,11 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel, return perf_evsel__rawptr(evsel, sample, name); } -struct format_field; +struct tep_format_field; -u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap); +u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap); -struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); +struct tep_format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ @@ -481,4 +486,5 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, struct perf_env *perf_evsel__env(struct perf_evsel *evsel); +int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 06dfb027879d..95ea147f9e18 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -73,7 +73,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, } if (details->trace_fields) { - struct format_field *field; + struct tep_format_field *field; if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { printed += comma_fprintf(fp, &first, " (not a tracepoint)"); @@ -173,6 +173,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (!print_oneline) printed += fprintf(fp, "\n"); + /* Add srccode here too? */ if (symbol_conf.bt_stop_list && node->sym && strlist__has_entry(symbol_conf.bt_stop_list, diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index de322d51c7fe..b72440bf9a79 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__powerpc__) #define GEN_ELF_ARCH EM_PPC #define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__sparc__) && defined(__arch64__) +#define GEN_ELF_ARCH EM_SPARCV9 +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__sparc__) +#define GEN_ELF_ARCH EM_SPARC +#define GEN_ELF_CLASS ELFCLASS32 #else #error "unsupported architecture" #endif diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c new file mode 100644 index 000000000000..267aa609a582 --- /dev/null +++ b/tools/perf/util/get_current_dir_name.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> +// +#ifndef HAVE_GET_CURRENT_DIR_NAME +#include "util.h" +#include <unistd.h> +#include <stdlib.h> +#include <stdlib.h> + +/* Android's 'bionic' library, for one, doesn't have this */ + +char *get_current_dir_name(void) +{ + char pwd[PATH_MAX]; + + return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd); +} +#endif // HAVE_GET_CURRENT_DIR_NAME diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 3cadc252dd89..dec6d218c31c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -988,6 +988,45 @@ static int write_group_desc(struct feat_fd *ff, } /* + * Return the CPU id as a raw string. + * + * Each architecture should provide a more precise id string that + * can be use to match the architecture's "mapfile". + */ +char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + return NULL; +} + +/* Return zero when the cpuid from the mapfile.csv matches the + * cpuid string generated on this platform. + * Otherwise return non-zero. + */ +int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) +{ + regex_t re; + regmatch_t pmatch[1]; + int match; + + if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", mapcpuid); + return 1; + } + + match = !regexec(&re, cpuid, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + + /* Verify the entire string matched. */ + if (match_len == strlen(cpuid)) + return 0; + } + return 1; +} + +/* * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(SRCARCH)/util/header.c */ @@ -1034,6 +1073,13 @@ static int write_auxtrace(struct feat_fd *ff, return err; } +static int write_clockid(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + return do_write(ff, &ff->ph->env.clockid_res_ns, + sizeof(ff->ph->env.clockid_res_ns)); +} + static int cpu_cache_level__sort(const void *a, const void *b) { struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; @@ -1508,6 +1554,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) fprintf(fp, "# Core ID and Socket ID information is not available\n"); } +static void print_clockid(struct feat_fd *ff, FILE *fp) +{ + fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n", + ff->ph->env.clockid_res_ns * 1000); +} + static void free_event_desc(struct perf_evsel *events) { struct perf_evsel *evsel; @@ -2531,6 +2583,15 @@ out: return ret; } +static int process_clockid(struct feat_fd *ff, + void *data __maybe_unused) +{ + if (do_read_u64(ff, &ff->ph->env.clockid_res_ns)) + return -1; + + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2590,6 +2651,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(CACHE, cache, true), FEAT_OPR(SAMPLE_TIME, sample_time, false), FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), + FEAT_OPR(CLOCKID, clockid, false) }; struct header_print_data { @@ -2636,6 +2698,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) struct perf_header *header = &session->header; int fd = perf_data__fd(session->data); struct stat st; + time_t stctime; int ret, bit; hd.fp = fp; @@ -2645,7 +2708,8 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) if (ret == -1) return -1; - fprintf(fp, "# captured on : %s", ctime(&st.st_ctime)); + stctime = st.st_ctime; + fprintf(fp, "# captured on : %s", ctime(&stctime)); fprintf(fp, "# header version : %u\n", header->version); fprintf(fp, "# data offset : %" PRIu64 "\n", header->data_offset); @@ -2736,7 +2800,7 @@ static int perf_header__adds_write(struct perf_header *header, lseek(fd, sec_start, SEEK_SET); /* * may write more than needed due to dropped feature, but - * this is okay, reader will skip the mising entries + * this is okay, reader will skip the missing entries */ err = do_write(&ff, feat_sec, sec_size); if (err < 0) @@ -3206,7 +3270,7 @@ static int read_attr(int fd, struct perf_header *ph, static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel, struct tep_handle *pevent) { - struct event_format *event; + struct tep_event *event; char bf[128]; /* already prepared */ @@ -3448,10 +3512,10 @@ int perf_event__synthesize_features(struct perf_tool *tool, return ret; } -int perf_event__process_feature(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session __maybe_unused) +int perf_event__process_feature(struct perf_session *session, + union perf_event *event) { + struct perf_tool *tool = session->tool; struct feat_fd ff = { .fd = 0 }; struct feature_event *fe = (struct feature_event *)event; int type = fe->header.type; @@ -3521,7 +3585,7 @@ perf_event__synthesize_event_update_unit(struct perf_tool *tool, if (ev == NULL) return -ENOMEM; - strncpy(ev->data, evsel->unit, size); + strlcpy(ev->data, evsel->unit, size + 1); err = process(tool, (union perf_event *)ev, NULL, NULL); free(ev); return err; @@ -3560,7 +3624,7 @@ perf_event__synthesize_event_update_name(struct perf_tool *tool, if (ev == NULL) return -ENOMEM; - strncpy(ev->data, evsel->name, len); + strlcpy(ev->data, evsel->name, len + 1); err = process(tool, (union perf_event*) ev, NULL, NULL); free(ev); return err; @@ -3637,13 +3701,13 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) } int perf_event__synthesize_attrs(struct perf_tool *tool, - struct perf_session *session, - perf_event__handler_t process) + struct perf_evlist *evlist, + perf_event__handler_t process) { struct perf_evsel *evsel; int err = 0; - evlist__for_each_entry(session->evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids, evsel->id, process); if (err) { @@ -3856,9 +3920,8 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, return aligned_size; } -int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_tracing_data(struct perf_session *session, + union perf_event *event) { ssize_t size_read, padding, size = event->tracing_data.size; int fd = perf_data__fd(session->data); @@ -3924,9 +3987,8 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, return err; } -int perf_event__process_build_id(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_build_id(struct perf_session *session, + union perf_event *event) { __event_process_build_id(&event->build_id, event->build_id.filename, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 6d7fe44aadc0..0d553ddca0a3 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -38,6 +38,7 @@ enum { HEADER_CACHE, HEADER_SAMPLE_TIME, HEADER_MEM_TOPOLOGY, + HEADER_CLOCKID, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -116,15 +117,14 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool, perf_event__handler_t process, bool is_pipe); -int perf_event__process_feature(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_feature(struct perf_session *session, + union perf_event *event); int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); int perf_event__synthesize_attrs(struct perf_tool *tool, - struct perf_session *session, + struct perf_evlist *evlist, perf_event__handler_t process); int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct perf_evsel *evsel, @@ -148,17 +148,15 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct perf_evlist *evlist, perf_event__handler_t process); -int perf_event__process_tracing_data(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_tracing_data(struct perf_session *session, + union perf_event *event); int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine); -int perf_event__process_build_id(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_build_id(struct perf_session *session, + union perf_event *event); bool is_perf_magic(u64 magic); #define NAME_ALIGN 64 diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 828cb9794c76..8aad8330e392 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1160,7 +1160,7 @@ void hist_entry__delete(struct hist_entry *he) /* * If this is not the last column, then we need to pad it according to the - * pre-calculated max lenght for this column, otherwise don't bother adding + * pre-calculated max length for this column, otherwise don't bother adding * spaces because that would break viewing this with, for instance, 'less', * that would show tons of trailing spaces when a long C++ demangled method * names is sampled. diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3badd7f1e1b8..664b5eda8d51 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -62,6 +62,7 @@ enum hist_column { HISTC_TRACE, HISTC_SYM_SIZE, HISTC_DSO_SIZE, + HISTC_SYMBOL_IPC, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h index 6a6f4b990547..548100315710 100644 --- a/tools/perf/util/include/asm/uaccess.h +++ b/tools/perf/util/include/asm/uaccess.h @@ -10,6 +10,6 @@ #define get_user __get_user -#define access_ok(type, addr, size) 1 +#define access_ok(addr, size) 1 #endif diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 7f0c83b6332b..ee6ca65f81f4 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue, return 0; } +static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip) +{ + return machine__kernel_ip(bts->machine, ip) ? + PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; +} + static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, struct branch *branch) { @@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, bts->num_events++ <= bts->synth_opts.initial_skip) return 0; - event.sample.header.type = PERF_RECORD_SAMPLE; - event.sample.header.misc = PERF_RECORD_MISC_USER; - event.sample.header.size = sizeof(struct perf_event_header); - - sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = le64_to_cpu(branch->from); + sample.cpumode = intel_bts_cpumode(bts, sample.ip); sample.pid = btsq->pid; sample.tid = btsq->tid; sample.addr = le64_to_cpu(branch->to); @@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, sample.insn_len = btsq->intel_pt_insn.length; memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ); + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = sample.cpumode; + event.sample.header.size = sizeof(struct perf_event_header); + if (bts->synth_opts.inject) { event.sample.header.size = bts->branches_event_size; ret = perf_event__synthesize_sample(&event, @@ -444,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq, continue; intel_bts_get_branch_type(btsq, branch); if (btsq->bts->synth_opts.thread_stack) - thread_stack__event(thread, btsq->sample_flags, + thread_stack__event(thread, btsq->cpu, btsq->sample_flags, le64_to_cpu(branch->from), le64_to_cpu(branch->to), btsq->intel_pt_insn.length, @@ -516,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) !btsq->bts->synth_opts.thread_stack && thread && (!old_buffer || btsq->bts->sampling_mode || (btsq->bts->snapshot_mode && !buffer->consecutive))) - thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); + thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1); err = intel_bts_process_buffer(btsq, buffer, thread); @@ -910,7 +917,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { bts->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&bts->synth_opts); + itrace_synth_opts__set_default(&bts->synth_opts, + session->itrace_synth_opts->default_no_sample); if (session->itrace_synth_opts) bts->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index d404bed7003a..4503f3ca45ab 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1165,7 +1165,7 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) decoder->pge = false; decoder->continuous_period = false; decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->state.to_ip = 0; + decoder->state.type |= INTEL_PT_TRACE_END; return 0; } if (err == INTEL_PT_RETURN) @@ -1179,9 +1179,13 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) decoder->continuous_period = false; decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - if (decoder->packet.count != 0) + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + decoder->state.to_ip = decoder->last_ip; decoder->ip = decoder->last_ip; + } + decoder->state.type |= INTEL_PT_TRACE_END; } else { decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->state.from_ip = decoder->ip; @@ -1208,7 +1212,8 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->ip = to_ip; decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; + decoder->state.to_ip = to_ip; + decoder->state.type |= INTEL_PT_TRACE_END; return 0; } intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch", @@ -1469,6 +1474,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) decoder->have_calc_cyc_to_tsc = false; intel_pt_calc_cyc_to_tsc(decoder, true); } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) @@ -1509,6 +1516,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) decoder->timestamp = timestamp; decoder->timestamp_insn_cnt = 0; + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } /* Walk PSB+ packets when already in sync. */ @@ -1640,14 +1649,15 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGD: decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - if (decoder->packet.count != 0) { + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { intel_pt_set_ip(decoder); - intel_pt_log("Omitting PGD ip " x64_fmt "\n", - decoder->ip); + decoder->state.to_ip = decoder->ip; } decoder->pge = false; decoder->continuous_period = false; + decoder->state.type |= INTEL_PT_TRACE_END; return 0; case INTEL_PT_TIP_PGE: @@ -1661,6 +1671,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) intel_pt_set_ip(decoder); decoder->state.to_ip = decoder->ip; } + decoder->state.type |= INTEL_PT_TRACE_BEGIN; return 0; case INTEL_PT_TIP: @@ -1739,6 +1750,7 @@ next: intel_pt_set_ip(decoder); decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; + decoder->state.type |= INTEL_PT_TRACE_BEGIN; return 0; } @@ -2077,9 +2089,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); - if (decoder->ip) - return 0; - break; + if (!decoder->ip) + break; + if (decoder->packet.type == INTEL_PT_TIP_PGE) + decoder->state.type |= INTEL_PT_TRACE_BEGIN; + if (decoder->packet.type == INTEL_PT_TIP_PGD) + decoder->state.type |= INTEL_PT_TRACE_END; + return 0; case INTEL_PT_FUP: if (intel_pt_have_ip(decoder)) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 51c18d67f4ca..ed088d4726ba 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -37,6 +37,8 @@ enum intel_pt_sample_type { INTEL_PT_EX_STOP = 1 << 6, INTEL_PT_PWR_EXIT = 1 << 7, INTEL_PT_CBR_CHG = 1 << 8, + INTEL_PT_TRACE_BEGIN = 1 << 9, + INTEL_PT_TRACE_END = 1 << 10, }; enum intel_pt_period_type { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 54818828023b..1c0e289f01e6 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, return 0; } +int arch_is_branch(const unsigned char *buf, size_t len, int x86_64) +{ + struct intel_pt_insn in; + if (intel_pt_get_insn(buf, len, x86_64, &in) < 0) + return -1; + return in.branch != INTEL_PT_BR_NO_BRANCH; +} + const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused, u8 *inbuf, int inlen, int *lenp) { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index e02bc7b166a0..5e64da270f97 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -31,6 +31,11 @@ static FILE *f; static char log_name[MAX_LOG_NAME]; bool intel_pt_enable_logging; +void *intel_pt_log_fp(void) +{ + return f; +} + void intel_pt_log_enable(void) { intel_pt_enable_logging = true; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index 45b64f93f358..cc084937f701 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -22,6 +22,7 @@ struct intel_pt_pkt; +void *intel_pt_log_fp(void); void intel_pt_log_enable(void); void intel_pt_log_disable(void); void intel_pt_log_set_name(const char *name); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index aec68908d604..2e72373ec6df 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -206,6 +206,16 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, intel_pt_dump(pt, buf, len); } +static void intel_pt_log_event(union perf_event *event) +{ + FILE *f = intel_pt_log_fp(); + + if (!intel_pt_enable_logging || !f) + return; + + perf_event__fprintf(event, f); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -407,6 +417,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) return auxtrace_cache__lookup(dso->auxtrace_cache, offset); } +static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) +{ + return ip >= pt->kernel_start ? + PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -429,10 +446,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (to_ip && *ip == to_ip) goto out_no_cache; - if (*ip >= ptq->pt->kernel_start) - cpumode = PERF_RECORD_MISC_KERNEL; - else - cpumode = PERF_RECORD_MISC_USER; + cpumode = intel_pt_cpumode(ptq->pt, *ip); thread = ptq->thread; if (!thread) { @@ -759,7 +773,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (pt->synth_opts.callchain) { size_t sz = sizeof(struct ip_callchain); - sz += pt->synth_opts.callchain_sz * sizeof(u64); + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); ptq->chain = zalloc(sz); if (!ptq->chain) goto out_free; @@ -908,6 +923,11 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->insn_len = ptq->state->insn_len; memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ); } + + if (ptq->state->type & INTEL_PT_TRACE_BEGIN) + ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN; + if (ptq->state->type & INTEL_PT_TRACE_END) + ptq->flags |= PERF_IP_FLAG_TRACE_END; } static int intel_pt_setup_queue(struct intel_pt *pt, @@ -1053,15 +1073,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) { - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); - if (!pt->timeless_decoding) sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); - sample->cpumode = PERF_RECORD_MISC_USER; sample->ip = ptq->state->from_ip; + sample->cpumode = intel_pt_cpumode(pt, sample->ip); sample->pid = ptq->pid; sample->tid = ptq->tid; sample->addr = ptq->state->to_ip; @@ -1070,6 +1086,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, sample->flags = ptq->flags; sample->insn_len = ptq->insn_len; memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = sample->cpumode; + event->sample.header.size = sizeof(struct perf_event_header); } static int intel_pt_inject_event(union perf_event *event, @@ -1154,8 +1174,9 @@ static void intel_pt_prep_sample(struct intel_pt *pt, intel_pt_prep_b_sample(pt, ptq, event, sample); if (pt->synth_opts.callchain) { - thread_stack__sample(ptq->thread, ptq->chain, - pt->synth_opts.callchain_sz, sample->ip); + thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, + pt->synth_opts.callchain_sz + 1, + sample->ip, pt->kernel_start); sample->callchain = ptq->chain; } @@ -1505,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return 0; if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) - thread_stack__event(ptq->thread, ptq->flags, state->from_ip, + thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip, state->to_ip, ptq->insn_len, state->trace_nr); else - thread_stack__set_trace_nr(ptq->thread, state->trace_nr); + thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); if (pt->sample_branches) { err = intel_pt_synth_branch_sample(ptq); @@ -1999,9 +2020,9 @@ static int intel_pt_process_event(struct perf_session *session, event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); - intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", - perf_event__name(event->header.type), event->header.type, - sample->cpu, sample->time, timestamp); + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", + event->header.type, sample->cpu, sample->time, timestamp); + intel_pt_log_event(event); return err; } @@ -2554,7 +2575,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&pt->synth_opts); + itrace_synth_opts__set_default(&pt->synth_opts, + session->itrace_synth_opts->default_no_sample); if (use_browser != -1) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index a1863000e972..bf249552a9b0 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -38,7 +38,7 @@ struct jit_buf_desc { uint64_t sample_type; size_t bufsize; FILE *in; - bool needs_bswap; /* handles cross-endianess */ + bool needs_bswap; /* handles cross-endianness */ bool use_arch_timestamp; void *debug_data; void *unwinding_data; diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 19262f98cd4e..5b0b60f00275 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -19,7 +19,7 @@ #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ - "$CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS " \ + "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ "-Wno-unused-value -Wno-pointer-sign " \ "-working-directory $WORKING_DIR " \ "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 111ae858cbcb..6fcb3bce0442 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -137,7 +137,7 @@ struct machine *machine__new_kallsyms(void) struct machine *machine = machine__new_host(); /* * FIXME: - * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely + * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly * ask for not using the kcore parsing code, once this one is fixed * to create a map per module. */ @@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event struct thread *parent = machine__findnew_thread(machine, event->fork.ppid, event->fork.ptid); + bool do_maps_clone = true; int err = 0; if (dump_trace) @@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); + /* + * When synthesizing FORK events, we are trying to create thread + * objects for the already running tasks on the machine. + * + * Normally, for a kernel FORK event, we want to clone the parent's + * maps because that is what the kernel just did. + * + * But when synthesizing, this should not be done. If we do, we end up + * with overlapping maps as we process the sythesized MMAP2 events that + * get delivered shortly thereafter. + * + * Use the FORK event misc flags in an internal way to signal this + * situation, so we can elide the map clone when appropriate. + */ + if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC) + do_maps_clone = false; if (thread == NULL || parent == NULL || - thread__fork(thread, parent, sample->time) < 0) { + thread__fork(thread, parent, sample->time, do_maps_clone) < 0) { dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); err = -1; } @@ -2140,6 +2157,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread, return 0; } +static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, + struct callchain_cursor *cursor, + struct symbol **parent, + struct addr_location *root_al, + u8 *cpumode, int ent) +{ + int err = 0; + + while (--ent >= 0) { + u64 ip = chain->ips[ent]; + + if (ip >= PERF_CONTEXT_MAX) { + err = add_callchain_ip(thread, cursor, parent, + root_al, cpumode, ip, + false, NULL, NULL, 0); + break; + } + } + return err; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct callchain_cursor *cursor, struct perf_evsel *evsel, @@ -2246,6 +2284,12 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: + if (callchain_param.order != ORDER_CALLEE) { + err = find_prev_cpumode(chain, thread, cursor, parent, root_al, + &cpumode, chain->nr - first_call); + if (err) + return (err < 0) ? err : 0; + } for (i = first_call, nr_entries = 0; i < chain_nr && nr_entries < max_stack; i++) { u64 ip; @@ -2260,9 +2304,15 @@ check_calls: continue; #endif ip = chain->ips[j]; - if (ip < PERF_CONTEXT_MAX) ++nr_entries; + else if (callchain_param.order != ORDER_CALLEE) { + err = find_prev_cpumode(chain, thread, cursor, parent, + root_al, &cpumode, j); + if (err) + return (err < 0) ? err : 0; + continue; + } err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, @@ -2443,15 +2493,13 @@ int machines__for_each_thread(struct machines *machines, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct thread_map *threads, perf_event__handler_t process, bool data_mmap, - unsigned int proc_map_timeout, unsigned int nr_threads_synthesize) { if (target__has_task(target)) - return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout); + return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); else if (target__has_cpu(target)) return perf_event__synthesize_threads(tool, process, machine, data_mmap, - proc_map_timeout, nr_threads_synthesize); /* command specified */ return 0; @@ -2542,6 +2590,33 @@ int machine__get_kernel_start(struct machine *machine) return err; } +u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr) +{ + u8 addr_cpumode = cpumode; + bool kernel_ip; + + if (!machine->single_address_space) + goto out; + + kernel_ip = machine__kernel_ip(machine, addr); + switch (cpumode) { + case PERF_RECORD_MISC_KERNEL: + case PERF_RECORD_MISC_USER: + addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; + break; + case PERF_RECORD_MISC_GUEST_KERNEL: + case PERF_RECORD_MISC_GUEST_USER: + addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL : + PERF_RECORD_MISC_GUEST_USER; + break; + default: + break; + } +out: + return addr_cpumode; +} + struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { return dsos__findnew(&machine->dsos, filename); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d856b85862e2..a5d1da60f751 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -42,6 +42,7 @@ struct machine { u16 id_hdr_size; bool comm_exec; bool kptr_restrict_warned; + bool single_address_space; char *root_dir; char *mmap_name; struct threads threads[THREADS__TABLE_SIZE]; @@ -99,6 +100,8 @@ static inline bool machine__kernel_ip(struct machine *machine, u64 ip) return ip >= kernel_start; } +u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr); + struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid); struct comm *machine__thread_exec_comm(struct machine *machine, @@ -247,17 +250,14 @@ int machines__for_each_thread(struct machines *machines, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct thread_map *threads, perf_event__handler_t process, bool data_mmap, - unsigned int proc_map_timeout, unsigned int nr_threads_synthesize); static inline int machine__synthesize_threads(struct machine *machine, struct target *target, struct thread_map *threads, bool data_mmap, - unsigned int proc_map_timeout, unsigned int nr_threads_synthesize) { return __machine__synthesize_threads(machine, NULL, target, threads, perf_event__process, data_mmap, - proc_map_timeout, nr_threads_synthesize); } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6a6929f208b4..6751301a755c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -19,8 +19,10 @@ #include "srcline.h" #include "namespaces.h" #include "unwind.h" +#include "srccode.h" static void __maps__insert(struct maps *maps, struct map *map); +static void __maps__insert_name(struct maps *maps, struct map *map); static inline int is_anon_memory(const char *filename, u32 flags) { @@ -320,12 +322,11 @@ int map__load(struct map *map) build_id__sprintf(map->dso->build_id, sizeof(map->dso->build_id), sbuild_id); - pr_warning("%s with build id %s not found", - name, sbuild_id); + pr_debug("%s with build id %s not found", name, sbuild_id); } else - pr_warning("Failed to open %s", name); + pr_debug("Failed to open %s", name); - pr_warning(", continuing without symbols\n"); + pr_debug(", continuing without symbols\n"); return -1; } else if (nr == 0) { #ifdef HAVE_LIBELF_SUPPORT @@ -334,12 +335,11 @@ int map__load(struct map *map) if (len > sizeof(DSO__DELETED) && strcmp(name + real_len + 1, DSO__DELETED) == 0) { - pr_warning("%.*s was updated (is prelink enabled?). " + pr_debug("%.*s was updated (is prelink enabled?). " "Restart the long running apps that use it!\n", (int)real_len, name); } else { - pr_warning("no symbols found in %s, maybe install " - "a debug package?\n", name); + pr_debug("no symbols found in %s, maybe install a debug package?\n", name); } #endif return -1; @@ -422,6 +422,54 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, return ret; } +int map__fprintf_srccode(struct map *map, u64 addr, + FILE *fp, + struct srccode_state *state) +{ + char *srcfile; + int ret = 0; + unsigned line; + int len; + char *srccode; + + if (!map || !map->dso) + return 0; + srcfile = get_srcline_split(map->dso, + map__rip_2objdump(map, addr), + &line); + if (!srcfile) + return 0; + + /* Avoid redundant printing */ + if (state && + state->srcfile && + !strcmp(state->srcfile, srcfile) && + state->line == line) { + free(srcfile); + return 0; + } + + srccode = find_sourceline(srcfile, line, &len); + if (!srccode) + goto out_free_line; + + ret = fprintf(fp, "|%-8d %.*s", line, len, srccode); + state->srcfile = srcfile; + state->line = line; + return ret; + +out_free_line: + free(srcfile); + return ret; +} + + +void srccode_state_free(struct srccode_state *state) +{ + zfree(&state->srcfile); + state->line = 0; +} + /** * map__rip_2objdump - convert symbol start address to objdump address. * @map: memory map @@ -498,6 +546,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) static void maps__init(struct maps *maps) { maps->entries = RB_ROOT; + maps->names = RB_ROOT; init_rwsem(&maps->lock); } @@ -666,6 +715,7 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) static void __map_groups__insert(struct map_groups *mg, struct map *map) { __maps__insert(&mg->maps, map); + __maps__insert_name(&mg->maps, map); map->groups = mg; } @@ -712,8 +762,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp if (verbose >= 2) { if (use_browser) { - pr_warning("overlapping maps in %s " - "(disable tui for more info)\n", + pr_debug("overlapping maps in %s (disable tui for more info)\n", map->dso->name); } else { fputs("overlapping maps:\n", fp); @@ -827,10 +876,34 @@ static void __maps__insert(struct maps *maps, struct map *map) map__get(map); } +static void __maps__insert_name(struct maps *maps, struct map *map) +{ + struct rb_node **p = &maps->names.rb_node; + struct rb_node *parent = NULL; + struct map *m; + int rc; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node_name); + rc = strcmp(m->dso->short_name, map->dso->short_name); + if (rc < 0) + p = &(*p)->rb_left; + else if (rc > 0) + p = &(*p)->rb_right; + else + return; + } + rb_link_node(&map->rb_node_name, parent, p); + rb_insert_color(&map->rb_node_name, &maps->names); + map__get(map); +} + void maps__insert(struct maps *maps, struct map *map) { down_write(&maps->lock); __maps__insert(maps, map); + __maps__insert_name(maps, map); up_write(&maps->lock); } @@ -849,19 +922,18 @@ void maps__remove(struct maps *maps, struct map *map) struct map *maps__find(struct maps *maps, u64 ip) { - struct rb_node **p, *parent = NULL; + struct rb_node *p; struct map *m; down_read(&maps->lock); - p = &maps->entries.rb_node; - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct map, rb_node); + p = maps->entries.rb_node; + while (p != NULL) { + m = rb_entry(p, struct map, rb_node); if (ip < m->start) - p = &(*p)->rb_left; + p = p->rb_left; else if (ip >= m->end) - p = &(*p)->rb_right; + p = p->rb_right; else goto out; } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index e0f327b51e66..09282aa45c80 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -25,6 +25,7 @@ struct map { struct rb_node rb_node; struct list_head node; }; + struct rb_node rb_node_name; u64 start; u64 end; bool erange_warned; @@ -57,6 +58,7 @@ struct kmap { struct maps { struct rb_root entries; + struct rb_root names; struct rw_semaphore lock; }; @@ -172,6 +174,22 @@ char *map__srcline(struct map *map, u64 addr, struct symbol *sym); int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp); +struct srccode_state { + char *srcfile; + unsigned line; +}; + +static inline void srccode_state_init(struct srccode_state *state) +{ + state->srcfile = NULL; + state->line = 0; +} + +void srccode_state_free(struct srccode_state *state); + +int map__fprintf_srccode(struct map *map, u64 addr, + FILE *fp, struct srccode_state *state); + int map__load(struct map *map); struct symbol *map__find_symbol(struct map *map, u64 addr); struct symbol *map__find_symbol_by_name(struct map *map, const char *name); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 215f69f41672..8fc39311a30d 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -153,8 +153,158 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb { } +#ifdef HAVE_AIO_SUPPORT +static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) +{ + int delta_max, i, prio; + + map->aio.nr_cblocks = mp->nr_cblocks; + if (map->aio.nr_cblocks) { + map->aio.aiocb = calloc(map->aio.nr_cblocks, sizeof(struct aiocb *)); + if (!map->aio.aiocb) { + pr_debug2("failed to allocate aiocb for data buffer, error %m\n"); + return -1; + } + map->aio.cblocks = calloc(map->aio.nr_cblocks, sizeof(struct aiocb)); + if (!map->aio.cblocks) { + pr_debug2("failed to allocate cblocks for data buffer, error %m\n"); + return -1; + } + map->aio.data = calloc(map->aio.nr_cblocks, sizeof(void *)); + if (!map->aio.data) { + pr_debug2("failed to allocate data buffer, error %m\n"); + return -1; + } + delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); + for (i = 0; i < map->aio.nr_cblocks; ++i) { + map->aio.data[i] = malloc(perf_mmap__mmap_len(map)); + if (!map->aio.data[i]) { + pr_debug2("failed to allocate data buffer area, error %m"); + return -1; + } + /* + * Use cblock.aio_fildes value different from -1 + * to denote started aio write operation on the + * cblock so it requires explicit record__aio_sync() + * call prior the cblock may be reused again. + */ + map->aio.cblocks[i].aio_fildes = -1; + /* + * Allocate cblocks with priority delta to have + * faster aio write system calls because queued requests + * are kept in separate per-prio queues and adding + * a new request will iterate thru shorter per-prio + * list. Blocks with numbers higher than + * _SC_AIO_PRIO_DELTA_MAX go with priority 0. + */ + prio = delta_max - i; + map->aio.cblocks[i].aio_reqprio = prio >= 0 ? prio : 0; + } + } + + return 0; +} + +static void perf_mmap__aio_munmap(struct perf_mmap *map) +{ + int i; + + for (i = 0; i < map->aio.nr_cblocks; ++i) + zfree(&map->aio.data[i]); + if (map->aio.data) + zfree(&map->aio.data); + zfree(&map->aio.cblocks); + zfree(&map->aio.aiocb); +} + +int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx, + int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off), + off_t *off) +{ + u64 head = perf_mmap__read_head(md); + unsigned char *data = md->base + page_size; + unsigned long size, size0 = 0; + void *buf; + int rc = 0; + + rc = perf_mmap__read_init(md); + if (rc < 0) + return (rc == -EAGAIN) ? 0 : -1; + + /* + * md->base data is copied into md->data[idx] buffer to + * release space in the kernel buffer as fast as possible, + * thru perf_mmap__consume() below. + * + * That lets the kernel to proceed with storing more + * profiling data into the kernel buffer earlier than other + * per-cpu kernel buffers are handled. + * + * Coping can be done in two steps in case the chunk of + * profiling data crosses the upper bound of the kernel buffer. + * In this case we first move part of data from md->start + * till the upper bound and then the reminder from the + * beginning of the kernel buffer till the end of + * the data chunk. + */ + + size = md->end - md->start; + + if ((md->start & md->mask) + size != (md->end & md->mask)) { + buf = &data[md->start & md->mask]; + size = md->mask + 1 - (md->start & md->mask); + md->start += size; + memcpy(md->aio.data[idx], buf, size); + size0 = size; + } + + buf = &data[md->start & md->mask]; + size = md->end - md->start; + md->start += size; + memcpy(md->aio.data[idx] + size0, buf, size); + + /* + * Increment md->refcount to guard md->data[idx] buffer + * from premature deallocation because md object can be + * released earlier than aio write request started + * on mmap->data[idx] is complete. + * + * perf_mmap__put() is done at record__aio_complete() + * after started request completion. + */ + perf_mmap__get(md); + + md->prev = head; + perf_mmap__consume(md); + + rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off); + if (!rc) { + *off += size0 + size; + } else { + /* + * Decrement md->refcount back if aio write + * operation failed to start. + */ + perf_mmap__put(md); + } + + return rc; +} +#else +static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused, + struct mmap_params *mp __maybe_unused) +{ + return 0; +} + +static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused) +{ +} +#endif + void perf_mmap__munmap(struct perf_mmap *map) { + perf_mmap__aio_munmap(map); if (map->base != NULL) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; @@ -197,7 +347,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c &mp->auxtrace_mp, map->base, fd)) return -1; - return 0; + return perf_mmap__aio_mmap(map, mp); } static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) @@ -281,7 +431,7 @@ int perf_mmap__read_init(struct perf_mmap *map) } int perf_mmap__push(struct perf_mmap *md, void *to, - int push(void *to, void *buf, size_t size)) + int push(struct perf_mmap *map, void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); unsigned char *data = md->base + page_size; @@ -300,7 +450,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to, size = md->mask + 1 - (md->start & md->mask); md->start += size; - if (push(to, buf, size) < 0) { + if (push(md, to, buf, size) < 0) { rc = -1; goto out; } @@ -310,7 +460,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to, size = md->end - md->start; md->start += size; - if (push(to, buf, size) < 0) { + if (push(md, to, buf, size) < 0) { rc = -1; goto out; } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 05a6d47c7956..aeb6942fdb00 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -4,11 +4,15 @@ #include <linux/compiler.h> #include <linux/refcount.h> #include <linux/types.h> -#include <asm/barrier.h> +#include <linux/ring_buffer.h> #include <stdbool.h> +#ifdef HAVE_AIO_SUPPORT +#include <aio.h> +#endif #include "auxtrace.h" #include "event.h" +struct aiocb; /** * struct perf_mmap - perf's ring buffer mmap details * @@ -26,6 +30,14 @@ struct perf_mmap { bool overwrite; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); +#ifdef HAVE_AIO_SUPPORT + struct { + void **data; + struct aiocb *cblocks; + struct aiocb **aiocb; + int nr_cblocks; + } aio; +#endif }; /* @@ -57,7 +69,7 @@ enum bkw_mmap_state { }; struct mmap_params { - int prot, mask; + int prot, mask, nr_cblocks; struct auxtrace_mmap_params auxtrace_mp; }; @@ -71,21 +83,12 @@ void perf_mmap__consume(struct perf_mmap *map); static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { - struct perf_event_mmap_page *pc = mm->base; - u64 head = READ_ONCE(pc->data_head); - rmb(); - return head; + return ring_buffer_read_head(mm->base); } static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) { - struct perf_event_mmap_page *pc = md->base; - - /* - * ensure all reads are done before we write the tail out. - */ - mb(); - pc->data_tail = tail; + ring_buffer_write_tail(md->base, tail); } union perf_event *perf_mmap__read_forward(struct perf_mmap *map); @@ -93,7 +96,19 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map); union perf_event *perf_mmap__read_event(struct perf_mmap *map); int perf_mmap__push(struct perf_mmap *md, void *to, - int push(void *to, void *buf, size_t size)); + int push(struct perf_mmap *map, void *to, void *buf, size_t size)); +#ifdef HAVE_AIO_SUPPORT +int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx, + int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off), + off_t *off); +#else +static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused, + int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused, + off_t *off __maybe_unused) +{ + return 0; +} +#endif size_t perf_mmap__mmap_len(struct perf_mmap *map); diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index cf8bd123cf73..aed170bd4384 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -18,6 +18,7 @@ #include <stdio.h> #include <string.h> #include <unistd.h> +#include <asm/bug.h> struct namespaces *namespaces__new(struct namespaces_event *event) { @@ -186,6 +187,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, char curpath[PATH_MAX]; int oldns = -1; int newns = -1; + char *oldcwd = NULL; if (nc == NULL) return; @@ -199,9 +201,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) return; + oldcwd = get_current_dir_name(); + if (!oldcwd) + return; + oldns = open(curpath, O_RDONLY); if (oldns < 0) - return; + goto errout; newns = open(nsi->mntns_path, O_RDONLY); if (newns < 0) @@ -210,11 +216,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (setns(newns, CLONE_NEWNS) < 0) goto errout; + nc->oldcwd = oldcwd; nc->oldns = oldns; nc->newns = newns; return; errout: + free(oldcwd); if (oldns > -1) close(oldns); if (newns > -1) @@ -223,11 +231,16 @@ errout: void nsinfo__mountns_exit(struct nscookie *nc) { - if (nc == NULL || nc->oldns == -1 || nc->newns == -1) + if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd) return; setns(nc->oldns, CLONE_NEWNS); + if (nc->oldcwd) { + WARN_ON_ONCE(chdir(nc->oldcwd)); + zfree(&nc->oldcwd); + } + if (nc->oldns > -1) { close(nc->oldns); nc->oldns = -1; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index cae1a9a39722..d5f46c09ea31 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -38,6 +38,7 @@ struct nsinfo { struct nscookie { int oldns; int newns; + char *oldcwd; }; int nsinfo__init(struct nsinfo *nsi); diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index bad9e0296e9a..897589507d97 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -80,14 +80,20 @@ static union perf_event *dup_event(struct ordered_events *oe, return oe->copy_on_queue ? __dup_event(oe, event) : event; } -static void free_dup_event(struct ordered_events *oe, union perf_event *event) +static void __free_dup_event(struct ordered_events *oe, union perf_event *event) { - if (event && oe->copy_on_queue) { + if (event) { oe->cur_alloc_size -= event->header.size; free(event); } } +static void free_dup_event(struct ordered_events *oe, union perf_event *event) +{ + if (oe->copy_on_queue) + __free_dup_event(oe, event); +} + #define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct ordered_event)) static struct ordered_event *alloc_event(struct ordered_events *oe, union perf_event *event) @@ -95,21 +101,49 @@ static struct ordered_event *alloc_event(struct ordered_events *oe, struct list_head *cache = &oe->cache; struct ordered_event *new = NULL; union perf_event *new_event; + size_t size; new_event = dup_event(oe, event); if (!new_event) return NULL; + /* + * We maintain the following scheme of buffers for ordered + * event allocation: + * + * to_free list -> buffer1 (64K) + * buffer2 (64K) + * ... + * + * Each buffer keeps an array of ordered events objects: + * buffer -> event[0] + * event[1] + * ... + * + * Each allocated ordered event is linked to one of + * following lists: + * - time ordered list 'events' + * - list of currently removed events 'cache' + * + * Allocation of the ordered event uses the following order + * to get the memory: + * - use recently removed object from 'cache' list + * - use available object in current allocation buffer + * - allocate new buffer if the current buffer is full + * + * Removal of ordered event object moves it from events to + * the cache list. + */ + size = sizeof(*oe->buffer) + MAX_SAMPLE_BUFFER * sizeof(*new); + if (!list_empty(cache)) { new = list_entry(cache->next, struct ordered_event, list); list_del(&new->list); } else if (oe->buffer) { - new = oe->buffer + oe->buffer_idx; + new = &oe->buffer->event[oe->buffer_idx]; if (++oe->buffer_idx == MAX_SAMPLE_BUFFER) oe->buffer = NULL; - } else if (oe->cur_alloc_size < oe->max_alloc_size) { - size_t size = MAX_SAMPLE_BUFFER * sizeof(*new); - + } else if ((oe->cur_alloc_size + size) < oe->max_alloc_size) { oe->buffer = malloc(size); if (!oe->buffer) { free_dup_event(oe, new_event); @@ -122,11 +156,11 @@ static struct ordered_event *alloc_event(struct ordered_events *oe, oe->cur_alloc_size += size; list_add(&oe->buffer->list, &oe->to_free); - /* First entry is abused to maintain the to_free list. */ - oe->buffer_idx = 2; - new = oe->buffer + 1; + oe->buffer_idx = 1; + new = &oe->buffer->event[0]; } else { pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size); + return NULL; } new->event = new_event; @@ -185,13 +219,12 @@ int ordered_events__queue(struct ordered_events *oe, union perf_event *event, return 0; } -static int __ordered_events__flush(struct ordered_events *oe) +static int do_flush(struct ordered_events *oe, bool show_progress) { struct list_head *head = &oe->events; struct ordered_event *tmp, *iter; u64 limit = oe->next_flush; u64 last_ts = oe->last ? oe->last->timestamp : 0ULL; - bool show_progress = limit == ULLONG_MAX; struct ui_progress prog; int ret; @@ -229,7 +262,8 @@ static int __ordered_events__flush(struct ordered_events *oe) return 0; } -int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) +static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how, + u64 timestamp) { static const char * const str[] = { "NONE", @@ -238,12 +272,16 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) "HALF ", }; int err; + bool show_progress = false; if (oe->nr_events == 0) return 0; switch (how) { case OE_FLUSH__FINAL: + show_progress = true; + __fallthrough; + case OE_FLUSH__TOP: oe->next_flush = ULLONG_MAX; break; @@ -264,6 +302,11 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) break; } + case OE_FLUSH__TIME: + oe->next_flush = timestamp; + show_progress = false; + break; + case OE_FLUSH__ROUND: case OE_FLUSH__NONE: default: @@ -274,7 +317,7 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) str[how], oe->nr_events); pr_oe_time(oe->max_timestamp, "max_timestamp\n"); - err = __ordered_events__flush(oe); + err = do_flush(oe, show_progress); if (!err) { if (how == OE_FLUSH__ROUND) @@ -290,7 +333,29 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) return err; } -void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver) +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) +{ + return __ordered_events__flush(oe, how, 0); +} + +int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp) +{ + return __ordered_events__flush(oe, OE_FLUSH__TIME, timestamp); +} + +u64 ordered_events__first_time(struct ordered_events *oe) +{ + struct ordered_event *event; + + if (list_empty(&oe->events)) + return 0; + + event = list_first_entry(&oe->events, struct ordered_event, list); + return event->timestamp; +} + +void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver, + void *data) { INIT_LIST_HEAD(&oe->events); INIT_LIST_HEAD(&oe->cache); @@ -298,17 +363,41 @@ void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t d oe->max_alloc_size = (u64) -1; oe->cur_alloc_size = 0; oe->deliver = deliver; + oe->data = data; +} + +static void +ordered_events_buffer__free(struct ordered_events_buffer *buffer, + unsigned int max, struct ordered_events *oe) +{ + if (oe->copy_on_queue) { + unsigned int i; + + for (i = 0; i < max; i++) + __free_dup_event(oe, buffer->event[i].event); + } + + free(buffer); } void ordered_events__free(struct ordered_events *oe) { - while (!list_empty(&oe->to_free)) { - struct ordered_event *event; + struct ordered_events_buffer *buffer, *tmp; - event = list_entry(oe->to_free.next, struct ordered_event, list); - list_del(&event->list); - free_dup_event(oe, event->event); - free(event); + if (list_empty(&oe->to_free)) + return; + + /* + * Current buffer might not have all the events allocated + * yet, we need to free only allocated ones ... + */ + list_del(&oe->buffer->list); + ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe); + + /* ... and continue with the rest */ + list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) { + list_del(&buffer->list); + ordered_events_buffer__free(buffer, MAX_SAMPLE_BUFFER, oe); } } @@ -318,5 +407,5 @@ void ordered_events__reinit(struct ordered_events *oe) ordered_events__free(oe); memset(oe, '\0', sizeof(*oe)); - ordered_events__init(oe, old_deliver); + ordered_events__init(oe, old_deliver, oe->data); } diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 8c7a2948593e..0920fb0ec6cc 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -18,6 +18,8 @@ enum oe_flush { OE_FLUSH__FINAL, OE_FLUSH__ROUND, OE_FLUSH__HALF, + OE_FLUSH__TOP, + OE_FLUSH__TIME, }; struct ordered_events; @@ -25,32 +27,41 @@ struct ordered_events; typedef int (*ordered_events__deliver_t)(struct ordered_events *oe, struct ordered_event *event); +struct ordered_events_buffer { + struct list_head list; + struct ordered_event event[0]; +}; + struct ordered_events { - u64 last_flush; - u64 next_flush; - u64 max_timestamp; - u64 max_alloc_size; - u64 cur_alloc_size; - struct list_head events; - struct list_head cache; - struct list_head to_free; - struct ordered_event *buffer; - struct ordered_event *last; - ordered_events__deliver_t deliver; - int buffer_idx; - unsigned int nr_events; - enum oe_flush last_flush_type; - u32 nr_unordered_events; - bool copy_on_queue; + u64 last_flush; + u64 next_flush; + u64 max_timestamp; + u64 max_alloc_size; + u64 cur_alloc_size; + struct list_head events; + struct list_head cache; + struct list_head to_free; + struct ordered_events_buffer *buffer; + struct ordered_event *last; + ordered_events__deliver_t deliver; + int buffer_idx; + unsigned int nr_events; + enum oe_flush last_flush_type; + u32 nr_unordered_events; + bool copy_on_queue; + void *data; }; int ordered_events__queue(struct ordered_events *oe, union perf_event *event, u64 timestamp, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); -void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); +int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp); +void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver, + void *data); void ordered_events__free(struct ordered_events *oe); void ordered_events__reinit(struct ordered_events *oe); +u64 ordered_events__first_time(struct ordered_events *oe); static inline void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f8cd3e7c9186..920e1e6551dd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", + [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr", [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", @@ -1037,6 +1038,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + CHECK_TYPE_VAL(NUM); + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_INHERIT: case PARSE_EVENTS__TERM_TYPE_NOINHERIT: case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: return config_term_common(attr, term, err); @@ -1162,6 +1167,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_STACK: ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); break; + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num); + break; case PARSE_EVENTS__TERM_TYPE_OVERWRITE: ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0); break; @@ -2454,7 +2462,7 @@ restart: if (!name_only && strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else - strncpy(name, syms->symbol, MAX_NAME_LEN); + strlcpy(name, syms->symbol, MAX_NAME_LEN); evt_list[evt_i] = strdup(name); if (evt_list[evt_i] == NULL) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 4473dac27aee..5ed035cbcbb7 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -71,6 +71,7 @@ enum { PARSE_EVENTS__TERM_TYPE_NOINHERIT, PARSE_EVENTS__TERM_TYPE_INHERIT, PARSE_EVENTS__TERM_TYPE_MAX_STACK, + PARSE_EVENTS__TERM_TYPE_MAX_EVENTS, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, PARSE_EVENTS__TERM_TYPE_OVERWRITE, PARSE_EVENTS__TERM_TYPE_DRV_CFG, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 5f761f3ed0f3..7805c71aaae2 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -269,6 +269,7 @@ time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); } +nr { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); } inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7799788f662f..11a234740632 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -145,7 +145,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * int fd, ret = -1; char path[PATH_MAX]; - snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.scale", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -175,7 +175,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n ssize_t sret; int fd; - snprintf(path, PATH_MAX, "%s/%s.unit", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.unit", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -205,7 +205,7 @@ perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name) char path[PATH_MAX]; int fd; - snprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -223,7 +223,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, char path[PATH_MAX]; int fd; - snprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name); + scnprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -655,45 +655,6 @@ static int is_arm_pmu_core(const char *name) return 0; } -/* - * Return the CPU id as a raw string. - * - * Each architecture should provide a more precise id string that - * can be use to match the architecture's "mapfile". - */ -char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) -{ - return NULL; -} - -/* Return zero when the cpuid from the mapfile.csv matches the - * cpuid string generated on this platform. - * Otherwise return non-zero. - */ -int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) -{ - regex_t re; - regmatch_t pmatch[1]; - int match; - - if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) { - /* Warn unable to generate match particular string. */ - pr_info("Invalid regular expression %s\n", mapcpuid); - return 1; - } - - match = !regexec(&re, cpuid, 1, pmatch, 0); - regfree(&re); - if (match) { - size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); - - /* Verify the entire string matched. */ - if (match_len == strlen(cpuid)) - return 0; - } - return 1; -} - static char *perf_pmu__getcpuid(struct perf_pmu *pmu) { char *cpuid; @@ -773,7 +734,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) if (!is_arm_pmu_core(name)) { pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) + if (strcmp(pname, name)) continue; } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index f119eb628dbb..18a59fba97ff 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -692,7 +692,7 @@ static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, return ret; for (i = 0; i < ntevs && ret >= 0; i++) { - /* point.address is the addres of point.symbol + point.offset */ + /* point.address is the address of point.symbol + point.offset */ tevs[i].point.address -= stext; tevs[i].point.module = strdup(exec); if (!tevs[i].point.module) { @@ -1819,6 +1819,12 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) tp->offset = strtoul(fmt2_str, NULL, 10); } + if (tev->uprobes) { + fmt2_str = strchr(p, '('); + if (fmt2_str) + tp->ref_ctr_offset = strtoul(fmt2_str + 1, NULL, 0); + } + tev->nargs = argc - 2; tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); if (tev->args == NULL) { @@ -2012,6 +2018,22 @@ static int synthesize_probe_trace_arg(struct probe_trace_arg *arg, return err; } +static int +synthesize_uprobe_trace_def(struct probe_trace_event *tev, struct strbuf *buf) +{ + struct probe_trace_point *tp = &tev->point; + int err; + + err = strbuf_addf(buf, "%s:0x%lx", tp->module, tp->address); + + if (err >= 0 && tp->ref_ctr_offset) { + if (!uprobe_ref_ctr_is_supported()) + return -1; + err = strbuf_addf(buf, "(0x%lx)", tp->ref_ctr_offset); + } + return err >= 0 ? 0 : -1; +} + char *synthesize_probe_trace_command(struct probe_trace_event *tev) { struct probe_trace_point *tp = &tev->point; @@ -2041,15 +2063,17 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) } /* Use the tp->address for uprobes */ - if (tev->uprobes) - err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address); - else if (!strncmp(tp->symbol, "0x", 2)) + if (tev->uprobes) { + err = synthesize_uprobe_trace_def(tev, &buf); + } else if (!strncmp(tp->symbol, "0x", 2)) { /* Absolute address. See try_to_find_absolute_address() */ err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "", tp->module ? ":" : "", tp->address); - else + } else { err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "", tp->module ? ":" : "", tp->symbol, tp->offset); + } + if (err) goto error; @@ -2633,6 +2657,13 @@ static void warn_uprobe_event_compat(struct probe_trace_event *tev) { int i; char *buf = synthesize_probe_trace_command(tev); + struct probe_trace_point *tp = &tev->point; + + if (tp->ref_ctr_offset && !uprobe_ref_ctr_is_supported()) { + pr_warning("A semaphore is associated with %s:%s and " + "seems your kernel doesn't support it.\n", + tev->group, tev->event); + } /* Old uprobe event doesn't support memory dereference */ if (!tev->uprobes || tev->nargs == 0 || !buf) @@ -3031,7 +3062,7 @@ static int try_to_find_absolute_address(struct perf_probe_event *pev, /* * Give it a '0x' leading symbol name. * In __add_probe_trace_events, a NULL symbol is interpreted as - * invalud. + * invalid. */ if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0) goto errout; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 45b14f020558..15a98c3a2a2f 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -27,6 +27,7 @@ struct probe_trace_point { char *symbol; /* Base symbol */ char *module; /* Module name */ unsigned long offset; /* Offset from symbol */ + unsigned long ref_ctr_offset; /* SDT reference counter offset */ unsigned long address; /* Actual address of the trace point */ bool retprobe; /* Return probe flag */ }; diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index b76088fadf3d..0b1195cad0e5 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -424,7 +424,7 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target, if (target && build_id_cache__cached(target)) { /* This is a cached buildid */ - strncpy(sbuildid, target, SBUILD_ID_SIZE); + strlcpy(sbuildid, target, SBUILD_ID_SIZE); dir_name = build_id_cache__linkname(sbuildid, NULL, 0); goto found; } @@ -696,8 +696,16 @@ out_err: #ifdef HAVE_GELF_GETNOTE_SUPPORT static unsigned long long sdt_note__get_addr(struct sdt_note *note) { - return note->bit32 ? (unsigned long long)note->addr.a32[0] - : (unsigned long long)note->addr.a64[0]; + return note->bit32 ? + (unsigned long long)note->addr.a32[SDT_NOTE_IDX_LOC] : + (unsigned long long)note->addr.a64[SDT_NOTE_IDX_LOC]; +} + +static unsigned long long sdt_note__get_ref_ctr_offset(struct sdt_note *note) +{ + return note->bit32 ? + (unsigned long long)note->addr.a32[SDT_NOTE_IDX_REFCTR] : + (unsigned long long)note->addr.a64[SDT_NOTE_IDX_REFCTR]; } static const char * const type_to_suffix[] = { @@ -775,14 +783,21 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, { struct strbuf buf; char *ret = NULL, **args; - int i, args_count; + int i, args_count, err; + unsigned long long ref_ctr_offset; if (strbuf_init(&buf, 32) < 0) return NULL; - if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx", - sdtgrp, note->name, pathname, - sdt_note__get_addr(note)) < 0) + err = strbuf_addf(&buf, "p:%s/%s %s:0x%llx", + sdtgrp, note->name, pathname, + sdt_note__get_addr(note)); + + ref_ctr_offset = sdt_note__get_ref_ctr_offset(note); + if (ref_ctr_offset && err >= 0) + err = strbuf_addf(&buf, "(0x%llx)", ref_ctr_offset); + + if (err < 0) goto error; if (!note->args) @@ -998,6 +1013,7 @@ int probe_cache__show_all_caches(struct strfilter *filter) enum ftrace_readme { FTRACE_README_PROBE_TYPE_X = 0, FTRACE_README_KRETPROBE_OFFSET, + FTRACE_README_UPROBE_REF_CTR, FTRACE_README_END, }; @@ -1009,6 +1025,7 @@ static struct { [idx] = {.pattern = pat, .avail = false} DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"), DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"), + DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"), }; static bool scan_ftrace_readme(enum ftrace_readme type) @@ -1064,3 +1081,8 @@ bool kretprobe_offset_is_supported(void) { return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET); } + +bool uprobe_ref_ctr_is_supported(void) +{ + return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR); +} diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 63f29b1d22c1..2a249182f2a6 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -69,6 +69,7 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache, int probe_cache__show_all_caches(struct strfilter *filter); bool probe_type_is_available(enum probe_type type); bool kretprobe_offset_is_supported(void); +bool uprobe_ref_ctr_is_supported(void); #else /* ! HAVE_LIBELF_SUPPORT */ static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused) { diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index ce501ba14b08..dda0ac978b1e 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -340,7 +340,7 @@ static bool is_tracepoint(struct pyrf_event *pevent) } static PyObject* -tracepoint_field(struct pyrf_event *pe, struct format_field *field) +tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) { struct tep_handle *pevent = field->event->pevent; void *data = pe->sample.raw_data; @@ -348,28 +348,28 @@ tracepoint_field(struct pyrf_event *pe, struct format_field *field) unsigned long long val; unsigned int offset, len; - if (field->flags & FIELD_IS_ARRAY) { + if (field->flags & TEP_FIELD_IS_ARRAY) { offset = field->offset; len = field->size; - if (field->flags & FIELD_IS_DYNAMIC) { + if (field->flags & TEP_FIELD_IS_DYNAMIC) { val = tep_read_number(pevent, data + offset, len); offset = val; len = offset >> 16; offset &= 0xffff; } - if (field->flags & FIELD_IS_STRING && + if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { ret = _PyUnicode_FromString((char *)data + offset); } else { ret = PyByteArray_FromStringAndSize((const char *) data + offset, len); - field->flags &= ~FIELD_IS_STRING; + field->flags &= ~TEP_FIELD_IS_STRING; } } else { val = tep_read_number(pevent, data + field->offset, field->size); - if (field->flags & FIELD_IS_POINTER) + if (field->flags & TEP_FIELD_IS_POINTER) ret = PyLong_FromUnsignedLong((unsigned long) val); - else if (field->flags & FIELD_IS_SIGNED) + else if (field->flags & TEP_FIELD_IS_SIGNED) ret = PyLong_FromLong((long) val); else ret = PyLong_FromUnsignedLong((unsigned long) val); @@ -383,10 +383,10 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) { const char *str = _PyUnicode_AsString(PyObject_Str(attr_name)); struct perf_evsel *evsel = pevent->evsel; - struct format_field *field; + struct tep_format_field *field; if (!evsel->tp_format) { - struct event_format *tp_format; + struct tep_event *tp_format; tp_format = trace_event__tp_format_id(evsel->attr.config); if (!tp_format) @@ -939,7 +939,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, file = PyFile_FromFile(fp, "perf", "r", NULL); #else - file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1); + file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, + NULL, NULL, NULL, 0); #endif if (file == NULL) goto free_list; @@ -1240,7 +1241,7 @@ static struct { static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, PyObject *args, PyObject *kwargs) { - struct event_format *tp_format; + struct tep_event *tp_format; static char *kwlist[] = { "sys", "name", NULL }; char *sys = NULL; char *name = NULL; diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index d2c78ffd9fee..68b2570304ec 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -147,6 +147,9 @@ #include <linux/bitops.h> #include <linux/log2.h> +#include <sys/stat.h> +#include <sys/types.h> + #include "cpumap.h" #include "color.h" #include "evsel.h" @@ -159,6 +162,7 @@ #include "auxtrace.h" #include "s390-cpumsf.h" #include "s390-cpumsf-kernel.h" +#include "config.h" struct s390_cpumsf { struct auxtrace auxtrace; @@ -170,6 +174,8 @@ struct s390_cpumsf { u32 pmu_type; u16 machine_type; bool data_queued; + bool use_logfile; + char *logdir; }; struct s390_cpumsf_queue { @@ -177,6 +183,7 @@ struct s390_cpumsf_queue { unsigned int queue_nr; struct auxtrace_buffer *buffer; int cpu; + FILE *logfile; }; /* Display s390 CPU measurement facility basic-sampling data entry */ @@ -499,7 +506,7 @@ static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) aux_ts = get_trailer_time(buf); if (!aux_ts) { pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", - sfq->buffer->data_offset); + (s64)sfq->buffer->data_offset); aux_ts = ~0ULL; goto out; } @@ -595,6 +602,12 @@ static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, buffer->use_size = buffer->size; buffer->use_data = buffer->data; } + if (sfq->logfile) { /* Write into log file */ + size_t rc = fwrite(buffer->data, buffer->size, 1, + sfq->logfile); + if (rc != 1) + pr_err("Failed to write auxiliary data\n"); + } } else buffer = sfq->buffer; @@ -606,6 +619,13 @@ static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, return -ENOMEM; buffer->use_size = buffer->size; buffer->use_data = buffer->data; + + if (sfq->logfile) { /* Write into log file */ + size_t rc = fwrite(buffer->data, buffer->size, 1, + sfq->logfile); + if (rc != 1) + pr_err("Failed to write auxiliary data\n"); + } } pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, @@ -640,6 +660,23 @@ s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) sfq->sf = sf; sfq->queue_nr = queue_nr; sfq->cpu = -1; + if (sf->use_logfile) { + char *name; + int rc; + + rc = (sf->logdir) + ? asprintf(&name, "%s/aux.smp.%02x", + sf->logdir, queue_nr) + : asprintf(&name, "aux.smp.%02x", queue_nr); + if (rc > 0) + sfq->logfile = fopen(name, "w"); + if (sfq->logfile == NULL) { + pr_err("Failed to open auxiliary log file %s," + "continue...\n", name); + sf->use_logfile = false; + } + free(name); + } return sfq; } @@ -850,8 +887,16 @@ static void s390_cpumsf_free_queues(struct perf_session *session) struct auxtrace_queues *queues = &sf->queues; unsigned int i; - for (i = 0; i < queues->nr_queues; i++) + for (i = 0; i < queues->nr_queues; i++) { + struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) + queues->queue_array[i].priv; + + if (sfq != NULL && sfq->logfile) { + fclose(sfq->logfile); + sfq->logfile = NULL; + } zfree(&queues->queue_array[i].priv); + } auxtrace_queues__free(queues); } @@ -864,6 +909,7 @@ static void s390_cpumsf_free(struct perf_session *session) auxtrace_heap__free(&sf->heap); s390_cpumsf_free_queues(session); session->auxtrace = NULL; + free(sf->logdir); free(sf); } @@ -877,17 +923,55 @@ static int s390_cpumsf_get_type(const char *cpuid) /* Check itrace options set on perf report command. * Return true, if none are set or all options specified can be - * handled on s390. + * handled on s390 (currently only option 'd' for logging. * Return false otherwise. */ static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) { + bool ison = false; + if (!itops || !itops->set) return true; - pr_err("No --itrace options supported\n"); + ison = itops->inject || itops->instructions || itops->branches || + itops->transactions || itops->ptwrites || + itops->pwr_events || itops->errors || + itops->dont_decode || itops->calls || itops->returns || + itops->callchain || itops->thread_stack || + itops->last_branch; + if (!ison) + return true; + pr_err("Unsupported --itrace options specified\n"); return false; } +/* Check for AUXTRACE dump directory if it is needed. + * On failure print an error message but continue. + * Return 0 on wrong keyword in config file and 1 otherwise. + */ +static int s390_cpumsf__config(const char *var, const char *value, void *cb) +{ + struct s390_cpumsf *sf = cb; + struct stat stbuf; + int rc; + + if (strcmp(var, "auxtrace.dumpdir")) + return 0; + sf->logdir = strdup(value); + if (sf->logdir == NULL) { + pr_err("Failed to find auxtrace log directory %s," + " continue with current directory...\n", value); + return 1; + } + rc = stat(sf->logdir, &stbuf); + if (rc == -1 || !S_ISDIR(stbuf.st_mode)) { + pr_err("Missing auxtrace log directory %s," + " continue with current directory...\n", value); + free(sf->logdir); + sf->logdir = NULL; + } + return 1; +} + int s390_cpumsf_process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -906,6 +990,9 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event, err = -EINVAL; goto err_free; } + sf->use_logfile = session->itrace_synth_opts->log; + if (sf->use_logfile) + perf_config(s390_cpumsf__config, sf); err = auxtrace_queues__init(&sf->queues); if (err) @@ -940,6 +1027,7 @@ err_free_queues: auxtrace_queues__free(&sf->queues); session->auxtrace = NULL; err_free: + free(sf->logdir); free(sf); return err; } diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 45484f0f7292..b93f36b887b5 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -99,7 +99,7 @@ static void define_symbolic_value(const char *ev_name, LEAVE; } -static void define_symbolic_values(struct print_flag_sym *field, +static void define_symbolic_values(struct tep_print_flag_sym *field, const char *ev_name, const char *field_name) { @@ -157,7 +157,7 @@ static void define_flag_value(const char *ev_name, LEAVE; } -static void define_flag_values(struct print_flag_sym *field, +static void define_flag_values(struct tep_print_flag_sym *field, const char *ev_name, const char *field_name) { @@ -189,62 +189,62 @@ static void define_flag_field(const char *ev_name, LEAVE; } -static void define_event_symbols(struct event_format *event, +static void define_event_symbols(struct tep_event *event, const char *ev_name, - struct print_arg *args) + struct tep_print_arg *args) { if (args == NULL) return; switch (args->type) { - case PRINT_NULL: + case TEP_PRINT_NULL: break; - case PRINT_ATOM: + case TEP_PRINT_ATOM: define_flag_value(ev_name, cur_field_name, "0", args->atom.atom); zero_flag_atom = 0; break; - case PRINT_FIELD: + case TEP_PRINT_FIELD: free(cur_field_name); cur_field_name = strdup(args->field.name); break; - case PRINT_FLAGS: + case TEP_PRINT_FLAGS: define_event_symbols(event, ev_name, args->flags.field); define_flag_field(ev_name, cur_field_name, args->flags.delim); define_flag_values(args->flags.flags, ev_name, cur_field_name); break; - case PRINT_SYMBOL: + case TEP_PRINT_SYMBOL: define_event_symbols(event, ev_name, args->symbol.field); define_symbolic_field(ev_name, cur_field_name); define_symbolic_values(args->symbol.symbols, ev_name, cur_field_name); break; - case PRINT_HEX: - case PRINT_HEX_STR: + case TEP_PRINT_HEX: + case TEP_PRINT_HEX_STR: define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; - case PRINT_INT_ARRAY: + case TEP_PRINT_INT_ARRAY: define_event_symbols(event, ev_name, args->int_array.field); define_event_symbols(event, ev_name, args->int_array.count); define_event_symbols(event, ev_name, args->int_array.el_size); break; - case PRINT_BSTRING: - case PRINT_DYNAMIC_ARRAY: - case PRINT_DYNAMIC_ARRAY_LEN: - case PRINT_STRING: - case PRINT_BITMASK: + case TEP_PRINT_BSTRING: + case TEP_PRINT_DYNAMIC_ARRAY: + case TEP_PRINT_DYNAMIC_ARRAY_LEN: + case TEP_PRINT_STRING: + case TEP_PRINT_BITMASK: break; - case PRINT_TYPE: + case TEP_PRINT_TYPE: define_event_symbols(event, ev_name, args->typecast.item); break; - case PRINT_OP: + case TEP_PRINT_OP: if (strcmp(args->op.op, ":") == 0) zero_flag_atom = 1; define_event_symbols(event, ev_name, args->op.left); define_event_symbols(event, ev_name, args->op.right); break; - case PRINT_FUNC: + case TEP_PRINT_FUNC: default: pr_err("Unsupported print arg type\n"); /* we should warn... */ @@ -338,8 +338,8 @@ static void perl_process_tracepoint(struct perf_sample *sample, struct addr_location *al) { struct thread *thread = al->thread; - struct event_format *event = evsel->tp_format; - struct format_field *field; + struct tep_event *event = evsel->tp_format; + struct tep_format_field *field; static char handler[256]; unsigned long long val; unsigned long s, ns; @@ -388,9 +388,9 @@ static void perl_process_tracepoint(struct perf_sample *sample, /* common fields other than pid can be accessed via xsub fns */ for (field = event->format.fields; field; field = field->next) { - if (field->flags & FIELD_IS_STRING) { + if (field->flags & TEP_FIELD_IS_STRING) { int offset; - if (field->flags & FIELD_IS_DYNAMIC) { + if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; } else @@ -399,7 +399,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, } else { /* FIELD_IS_NUMERIC */ val = read_size(event, data + field->offset, field->size); - if (field->flags & FIELD_IS_SIGNED) { + if (field->flags & TEP_FIELD_IS_SIGNED) { XPUSHs(sv_2mortal(newSViv(val))); } else { XPUSHs(sv_2mortal(newSVuv(val))); @@ -537,8 +537,8 @@ static int perl_stop_script(void) static int perl_generate_script(struct tep_handle *pevent, const char *outfile) { - struct event_format *event = NULL; - struct format_field *f; + struct tep_event *event = NULL; + struct tep_format_field *f; char fname[PATH_MAX]; int not_first, count; FILE *ofp; @@ -646,11 +646,11 @@ sub print_backtrace\n\ count++; fprintf(ofp, "%s=", f->name); - if (f->flags & FIELD_IS_STRING || - f->flags & FIELD_IS_FLAG || - f->flags & FIELD_IS_SYMBOLIC) + if (f->flags & TEP_FIELD_IS_STRING || + f->flags & TEP_FIELD_IS_FLAG || + f->flags & TEP_FIELD_IS_SYMBOLIC) fprintf(ofp, "%%s"); - else if (f->flags & FIELD_IS_SIGNED) + else if (f->flags & TEP_FIELD_IS_SIGNED) fprintf(ofp, "%%d"); else fprintf(ofp, "%%u"); @@ -668,7 +668,7 @@ sub print_backtrace\n\ if (++count % 5 == 0) fprintf(ofp, "\n\t "); - if (f->flags & FIELD_IS_FLAG) { + if (f->flags & TEP_FIELD_IS_FLAG) { if ((count - 1) % 5 != 0) { fprintf(ofp, "\n\t "); count = 4; @@ -678,7 +678,7 @@ sub print_backtrace\n\ event->name); fprintf(ofp, "\"%s\", $%s)", f->name, f->name); - } else if (f->flags & FIELD_IS_SYMBOLIC) { + } else if (f->flags & TEP_FIELD_IS_SYMBOLIC) { if ((count - 1) % 5 != 0) { fprintf(ofp, "\n\t "); count = 4; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index dfc6093f118c..87ef16a1b17e 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -193,7 +193,7 @@ static void try_call_object(const char *handler_name, PyObject *args) call_object(handler, args, handler_name); } -static void define_value(enum print_arg_type field_type, +static void define_value(enum tep_print_arg_type field_type, const char *ev_name, const char *field_name, const char *field_value, @@ -204,7 +204,7 @@ static void define_value(enum print_arg_type field_type, unsigned long long value; unsigned n = 0; - if (field_type == PRINT_SYMBOL) + if (field_type == TEP_PRINT_SYMBOL) handler_name = "define_symbolic_value"; t = PyTuple_New(4); @@ -223,8 +223,8 @@ static void define_value(enum print_arg_type field_type, Py_DECREF(t); } -static void define_values(enum print_arg_type field_type, - struct print_flag_sym *field, +static void define_values(enum tep_print_arg_type field_type, + struct tep_print_flag_sym *field, const char *ev_name, const char *field_name) { @@ -235,7 +235,7 @@ static void define_values(enum print_arg_type field_type, define_values(field_type, field->next, ev_name, field_name); } -static void define_field(enum print_arg_type field_type, +static void define_field(enum tep_print_arg_type field_type, const char *ev_name, const char *field_name, const char *delim) @@ -244,10 +244,10 @@ static void define_field(enum print_arg_type field_type, PyObject *t; unsigned n = 0; - if (field_type == PRINT_SYMBOL) + if (field_type == TEP_PRINT_SYMBOL) handler_name = "define_symbolic_field"; - if (field_type == PRINT_FLAGS) + if (field_type == TEP_PRINT_FLAGS) t = PyTuple_New(3); else t = PyTuple_New(2); @@ -256,7 +256,7 @@ static void define_field(enum print_arg_type field_type, PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name)); PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name)); - if (field_type == PRINT_FLAGS) + if (field_type == TEP_PRINT_FLAGS) PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim)); try_call_object(handler_name, t); @@ -264,54 +264,54 @@ static void define_field(enum print_arg_type field_type, Py_DECREF(t); } -static void define_event_symbols(struct event_format *event, +static void define_event_symbols(struct tep_event *event, const char *ev_name, - struct print_arg *args) + struct tep_print_arg *args) { if (args == NULL) return; switch (args->type) { - case PRINT_NULL: + case TEP_PRINT_NULL: break; - case PRINT_ATOM: - define_value(PRINT_FLAGS, ev_name, cur_field_name, "0", + case TEP_PRINT_ATOM: + define_value(TEP_PRINT_FLAGS, ev_name, cur_field_name, "0", args->atom.atom); zero_flag_atom = 0; break; - case PRINT_FIELD: + case TEP_PRINT_FIELD: free(cur_field_name); cur_field_name = strdup(args->field.name); break; - case PRINT_FLAGS: + case TEP_PRINT_FLAGS: define_event_symbols(event, ev_name, args->flags.field); - define_field(PRINT_FLAGS, ev_name, cur_field_name, + define_field(TEP_PRINT_FLAGS, ev_name, cur_field_name, args->flags.delim); - define_values(PRINT_FLAGS, args->flags.flags, ev_name, + define_values(TEP_PRINT_FLAGS, args->flags.flags, ev_name, cur_field_name); break; - case PRINT_SYMBOL: + case TEP_PRINT_SYMBOL: define_event_symbols(event, ev_name, args->symbol.field); - define_field(PRINT_SYMBOL, ev_name, cur_field_name, NULL); - define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name, + define_field(TEP_PRINT_SYMBOL, ev_name, cur_field_name, NULL); + define_values(TEP_PRINT_SYMBOL, args->symbol.symbols, ev_name, cur_field_name); break; - case PRINT_HEX: - case PRINT_HEX_STR: + case TEP_PRINT_HEX: + case TEP_PRINT_HEX_STR: define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; - case PRINT_INT_ARRAY: + case TEP_PRINT_INT_ARRAY: define_event_symbols(event, ev_name, args->int_array.field); define_event_symbols(event, ev_name, args->int_array.count); define_event_symbols(event, ev_name, args->int_array.el_size); break; - case PRINT_STRING: + case TEP_PRINT_STRING: break; - case PRINT_TYPE: + case TEP_PRINT_TYPE: define_event_symbols(event, ev_name, args->typecast.item); break; - case PRINT_OP: + case TEP_PRINT_OP: if (strcmp(args->op.op, ":") == 0) zero_flag_atom = 1; define_event_symbols(event, ev_name, args->op.left); @@ -319,11 +319,11 @@ static void define_event_symbols(struct event_format *event, break; default: /* gcc warns for these? */ - case PRINT_BSTRING: - case PRINT_DYNAMIC_ARRAY: - case PRINT_DYNAMIC_ARRAY_LEN: - case PRINT_FUNC: - case PRINT_BITMASK: + case TEP_PRINT_BSTRING: + case TEP_PRINT_DYNAMIC_ARRAY: + case TEP_PRINT_DYNAMIC_ARRAY_LEN: + case TEP_PRINT_FUNC: + case TEP_PRINT_BITMASK: /* we should warn... */ return; } @@ -332,10 +332,10 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static PyObject *get_field_numeric_entry(struct event_format *event, - struct format_field *field, void *data) +static PyObject *get_field_numeric_entry(struct tep_event *event, + struct tep_format_field *field, void *data) { - bool is_array = field->flags & FIELD_IS_ARRAY; + bool is_array = field->flags & TEP_FIELD_IS_ARRAY; PyObject *obj = NULL, *list = NULL; unsigned long long val; unsigned int item_size, n_items, i; @@ -353,7 +353,7 @@ static PyObject *get_field_numeric_entry(struct event_format *event, val = read_size(event, data + field->offset + i * item_size, item_size); - if (field->flags & FIELD_IS_SIGNED) { + if (field->flags & TEP_FIELD_IS_SIGNED) { if ((long long)val >= LONG_MIN && (long long)val <= LONG_MAX) obj = _PyLong_FromLong(val); @@ -494,14 +494,14 @@ static PyObject *python_process_brstack(struct perf_sample *sample, pydict_set_item_string_decref(pyelem, "cycles", PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); - thread__find_map(thread, sample->cpumode, - br->entries[i].from, &al); + thread__find_map_fb(thread, sample->cpumode, + br->entries[i].from, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "from_dsoname", _PyUnicode_FromString(dsoname)); - thread__find_map(thread, sample->cpumode, - br->entries[i].to, &al); + thread__find_map_fb(thread, sample->cpumode, + br->entries[i].to, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "to_dsoname", _PyUnicode_FromString(dsoname)); @@ -576,14 +576,14 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, if (!pyelem) Py_FatalError("couldn't create Python dictionary"); - thread__find_symbol(thread, sample->cpumode, - br->entries[i].from, &al); + thread__find_symbol_fb(thread, sample->cpumode, + br->entries[i].from, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "from", _PyUnicode_FromString(bf)); - thread__find_symbol(thread, sample->cpumode, - br->entries[i].to, &al); + thread__find_symbol_fb(thread, sample->cpumode, + br->entries[i].to, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "to", _PyUnicode_FromString(bf)); @@ -790,11 +790,11 @@ static void python_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { - struct event_format *event = evsel->tp_format; + struct tep_event *event = evsel->tp_format; PyObject *handler, *context, *t, *obj = NULL, *callchain; PyObject *dict = NULL, *all_entries_dict = NULL; static char handler_name[256]; - struct format_field *field; + struct tep_format_field *field; unsigned long s, ns; unsigned n = 0; int pid; @@ -867,22 +867,22 @@ static void python_process_tracepoint(struct perf_sample *sample, unsigned int offset, len; unsigned long long val; - if (field->flags & FIELD_IS_ARRAY) { + if (field->flags & TEP_FIELD_IS_ARRAY) { offset = field->offset; len = field->size; - if (field->flags & FIELD_IS_DYNAMIC) { + if (field->flags & TEP_FIELD_IS_DYNAMIC) { val = tep_read_number(scripting_context->pevent, data + offset, len); offset = val; len = offset >> 16; offset &= 0xffff; } - if (field->flags & FIELD_IS_STRING && + if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { obj = _PyUnicode_FromString((char *) data + offset); } else { obj = PyByteArray_FromStringAndSize((const char *) data + offset, len); - field->flags &= ~FIELD_IS_STRING; + field->flags &= ~TEP_FIELD_IS_STRING; } } else { /* FIELD_IS_NUMERIC */ obj = get_field_numeric_entry(event, field, data); @@ -1590,8 +1590,8 @@ static int python_stop_script(void) static int python_generate_script(struct tep_handle *pevent, const char *outfile) { - struct event_format *event = NULL; - struct format_field *f; + struct tep_event *event = NULL; + struct tep_format_field *f; char fname[PATH_MAX]; int not_first, count; FILE *ofp; @@ -1686,12 +1686,12 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile count++; fprintf(ofp, "%s=", f->name); - if (f->flags & FIELD_IS_STRING || - f->flags & FIELD_IS_FLAG || - f->flags & FIELD_IS_ARRAY || - f->flags & FIELD_IS_SYMBOLIC) + if (f->flags & TEP_FIELD_IS_STRING || + f->flags & TEP_FIELD_IS_FLAG || + f->flags & TEP_FIELD_IS_ARRAY || + f->flags & TEP_FIELD_IS_SYMBOLIC) fprintf(ofp, "%%s"); - else if (f->flags & FIELD_IS_SIGNED) + else if (f->flags & TEP_FIELD_IS_SIGNED) fprintf(ofp, "%%d"); else fprintf(ofp, "%%u"); @@ -1709,7 +1709,7 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile if (++count % 5 == 0) fprintf(ofp, "\n\t\t"); - if (f->flags & FIELD_IS_FLAG) { + if (f->flags & TEP_FIELD_IS_FLAG) { if ((count - 1) % 5 != 0) { fprintf(ofp, "\n\t\t"); count = 4; @@ -1719,7 +1719,7 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile event->name); fprintf(ofp, "\"%s\", %s)", f->name, f->name); - } else if (f->flags & FIELD_IS_SYMBOLIC) { + } else if (f->flags & TEP_FIELD_IS_SYMBOLIC) { if ((count - 1) % 5 != 0) { fprintf(ofp, "\n\t\t"); count = 4; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8b9369303561..5456c84c7dd1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -24,6 +24,7 @@ #include "thread.h" #include "thread-stack.h" #include "stat.h" +#include "arch/common.h" static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, @@ -125,7 +126,8 @@ struct perf_session *perf_session__new(struct perf_data *data, session->tool = tool; INIT_LIST_HEAD(&session->auxtrace_index); machines__init(&session->machines); - ordered_events__init(&session->ordered_events, ordered_events__deliver_event); + ordered_events__init(&session->ordered_events, + ordered_events__deliver_event, NULL); if (data) { if (perf_data__open(data)) @@ -150,6 +152,9 @@ struct perf_session *perf_session__new(struct perf_data *data, session->machines.host.env = &perf_env; } + session->machines.host.single_address_space = + perf_env__single_address_space(session->machines.host.env); + if (!data || perf_data__is_write(data)) { /* * In O_RDONLY mode this will be performed when reading the @@ -199,12 +204,10 @@ void perf_session__delete(struct perf_session *session) free(session); } -static int process_event_synth_tracing_data_stub(struct perf_tool *tool +static int process_event_synth_tracing_data_stub(struct perf_session *session __maybe_unused, union perf_event *event - __maybe_unused, - struct perf_session *session - __maybe_unused) + __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -277,10 +280,8 @@ static int skipn(int fd, off_t n) return 0; } -static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session - __maybe_unused) +static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused, + union perf_event *event) { dump_printf(": unhandled!\n"); if (perf_data__is_pipe(session->data)) @@ -288,9 +289,8 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, return event->auxtrace.size; } -static int process_event_op2_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +static int process_event_op2_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -298,9 +298,8 @@ static int process_event_op2_stub(struct perf_tool *tool __maybe_unused, static -int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +int process_event_thread_map_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { if (dump_trace) perf_event__fprintf_thread_map(event, stdout); @@ -310,9 +309,8 @@ int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused, } static -int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +int process_event_cpu_map_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { if (dump_trace) perf_event__fprintf_cpu_map(event, stdout); @@ -322,9 +320,8 @@ int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused, } static -int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +int process_event_stat_config_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { if (dump_trace) perf_event__fprintf_stat_config(event, stdout); @@ -333,10 +330,8 @@ int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused, return 0; } -static int process_stat_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) +static int process_stat_stub(struct perf_session *perf_session __maybe_unused, + union perf_event *event) { if (dump_trace) perf_event__fprintf_stat(event, stdout); @@ -345,10 +340,8 @@ static int process_stat_stub(struct perf_tool *tool __maybe_unused, return 0; } -static int process_stat_round_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) +static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused, + union perf_event *event) { if (dump_trace) perf_event__fprintf_stat_round(event, stdout); @@ -1374,37 +1367,37 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_HEADER_TRACING_DATA: /* setup for reading amidst mmap */ lseek(fd, file_offset, SEEK_SET); - return tool->tracing_data(tool, event, session); + return tool->tracing_data(session, event); case PERF_RECORD_HEADER_BUILD_ID: - return tool->build_id(tool, event, session); + return tool->build_id(session, event); case PERF_RECORD_FINISHED_ROUND: return tool->finished_round(tool, event, oe); case PERF_RECORD_ID_INDEX: - return tool->id_index(tool, event, session); + return tool->id_index(session, event); case PERF_RECORD_AUXTRACE_INFO: - return tool->auxtrace_info(tool, event, session); + return tool->auxtrace_info(session, event); case PERF_RECORD_AUXTRACE: /* setup for reading amidst mmap */ lseek(fd, file_offset + event->header.size, SEEK_SET); - return tool->auxtrace(tool, event, session); + return tool->auxtrace(session, event); case PERF_RECORD_AUXTRACE_ERROR: perf_session__auxtrace_error_inc(session, event); - return tool->auxtrace_error(tool, event, session); + return tool->auxtrace_error(session, event); case PERF_RECORD_THREAD_MAP: - return tool->thread_map(tool, event, session); + return tool->thread_map(session, event); case PERF_RECORD_CPU_MAP: - return tool->cpu_map(tool, event, session); + return tool->cpu_map(session, event); case PERF_RECORD_STAT_CONFIG: - return tool->stat_config(tool, event, session); + return tool->stat_config(session, event); case PERF_RECORD_STAT: - return tool->stat(tool, event, session); + return tool->stat(session, event); case PERF_RECORD_STAT_ROUND: - return tool->stat_round(tool, event, session); + return tool->stat_round(session, event); case PERF_RECORD_TIME_CONV: session->time_conv = event->time_conv; - return tool->time_conv(tool, event, session); + return tool->time_conv(session, event); case PERF_RECORD_HEADER_FEATURE: - return tool->feature(tool, event, session); + return tool->feature(session, event); default: return -EINVAL; } @@ -1534,6 +1527,13 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) return machine__findnew_thread(&session->machines.host, -1, pid); } +/* + * Threads are identified by pid and tid, and the idle task has pid == tid == 0. + * So here a single thread is created for that, but actually there is a separate + * idle task per cpu, so there should be one 'struct thread' per cpu, but there + * is only 1. That causes problems for some tools, requiring workarounds. For + * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu(). + */ int perf_session__register_idle_thread(struct perf_session *session) { struct thread *thread; @@ -2133,9 +2133,8 @@ out: return err; } -int perf_event__process_id_index(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_id_index(struct perf_session *session, + union perf_event *event) { struct perf_evlist *evlist = session->evlist; struct id_index_event *ie = &event->id_index; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index da40b4b380ca..d96eccd7d27f 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -120,9 +120,8 @@ int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample); -int perf_event__process_id_index(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_id_index(struct perf_session *session, + union perf_event *event); int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 1942f6dd24f6..63f758c655d5 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -5,16 +5,18 @@ from subprocess import Popen, PIPE from re import sub def clang_has_option(option): - return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if "unknown argument" in o] == [ ] + return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] cc = getenv("CC") if cc == "clang": - from _sysconfigdata import build_time_vars - build_time_vars["CFLAGS"] = sub("-specs=[^ ]+", "", build_time_vars["CFLAGS"]) - if not clang_has_option("-mcet"): - build_time_vars["CFLAGS"] = sub("-mcet", "", build_time_vars["CFLAGS"]) - if not clang_has_option("-fcf-protection"): - build_time_vars["CFLAGS"] = sub("-fcf-protection", "", build_time_vars["CFLAGS"]) + from distutils.sysconfig import get_config_vars + vars = get_config_vars() + for var in ('CFLAGS', 'OPT'): + vars[var] = sub("-specs=[^ ]+", "", vars[var]) + if not clang_has_option("-mcet"): + vars[var] = sub("-mcet", "", vars[var]) + if not clang_has_option("-fcf-protection"): + vars[var] = sub("-fcf-protection", "", vars[var]) from distutils.core import setup, Extension diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index b284276ec963..6c1a83768eb0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -13,6 +13,7 @@ #include "strlist.h" #include <traceevent/event-parse.h> #include "mem-events.h" +#include "annotate.h" #include <linux/kernel.h> regex_t parent_regex; @@ -36,7 +37,7 @@ enum sort_mode sort__mode = SORT_MODE__NORMAL; * -t, --field-separator * * option, that uses a special separator character and don't pad with spaces, - * replacing all occurances of this separator in symbol names (and other + * replacing all occurrences of this separator in symbol names (and other * output) with a '.' character, that thus it's the only non valid separator. */ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) @@ -422,6 +423,64 @@ struct sort_entry sort_srcline_to = { .se_width_idx = HISTC_SRCLINE_TO, }; +static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + + struct symbol *sym = he->ms.sym; + struct map *map = he->ms.map; + struct perf_evsel *evsel = hists_to_evsel(he->hists); + struct annotation *notes; + double ipc = 0.0, coverage = 0.0; + char tmp[64]; + + if (!sym) + return repsep_snprintf(bf, size, "%-*s", width, "-"); + + if (!sym->annotate2 && symbol__annotate2(sym, map, evsel, + &annotation__default_options, NULL) < 0) { + return 0; + } + + notes = symbol__annotation(sym); + + if (notes->hit_cycles) + ipc = notes->hit_insn / ((double)notes->hit_cycles); + + if (notes->total_insn) { + coverage = notes->cover_insn * 100.0 / + ((double)notes->total_insn); + } + + snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage); + return repsep_snprintf(bf, size, "%-*s", width, tmp); +} + +struct sort_entry sort_sym_ipc = { + .se_header = "IPC [IPC Coverage]", + .se_cmp = sort__sym_cmp, + .se_snprintf = hist_entry__sym_ipc_snprintf, + .se_width_idx = HISTC_SYMBOL_IPC, +}; + +static int hist_entry__sym_ipc_null_snprintf(struct hist_entry *he + __maybe_unused, + char *bf, size_t size, + unsigned int width) +{ + char tmp[64]; + + snprintf(tmp, sizeof(tmp), "%-5s %2s", "-", "-"); + return repsep_snprintf(bf, size, "%-*s", width, tmp); +} + +struct sort_entry sort_sym_ipc_null = { + .se_header = "IPC [IPC Coverage]", + .se_cmp = sort__sym_cmp, + .se_snprintf = hist_entry__sym_ipc_null_snprintf, + .se_width_idx = HISTC_SYMBOL_IPC, +}; + /* --sort srcfile */ static char no_srcfile[1]; @@ -1574,6 +1633,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size), DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), + DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null), }; #undef DIM @@ -1591,6 +1651,7 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_CYCLES, "cycles", sort_cycles), DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from), DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to), + DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc), }; #undef DIM @@ -1884,7 +1945,7 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, struct hpp_dynamic_entry { struct perf_hpp_fmt hpp; struct perf_evsel *evsel; - struct format_field *field; + struct tep_format_field *field; unsigned dynamic_len; bool raw_trace; }; @@ -1899,7 +1960,7 @@ static int hde_width(struct hpp_dynamic_entry *hde) if (namelen > len) len = namelen; - if (!(hde->field->flags & FIELD_IS_STRING)) { + if (!(hde->field->flags & TEP_FIELD_IS_STRING)) { /* length for print hex numbers */ fieldlen = hde->field->size * 2 + 2; } @@ -1915,7 +1976,7 @@ static void update_dynamic_len(struct hpp_dynamic_entry *hde, struct hist_entry *he) { char *str, *pos; - struct format_field *field = hde->field; + struct tep_format_field *field = hde->field; size_t namelen; bool last = false; @@ -2000,7 +2061,7 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hpp_dynamic_entry *hde; size_t len = fmt->user_len; char *str, *pos; - struct format_field *field; + struct tep_format_field *field; size_t namelen; bool last = false; int ret; @@ -2060,7 +2121,7 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, struct hist_entry *a, struct hist_entry *b) { struct hpp_dynamic_entry *hde; - struct format_field *field; + struct tep_format_field *field; unsigned offset, size; hde = container_of(fmt, struct hpp_dynamic_entry, hpp); @@ -2071,7 +2132,7 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, } field = hde->field; - if (field->flags & FIELD_IS_DYNAMIC) { + if (field->flags & TEP_FIELD_IS_DYNAMIC) { unsigned long long dyn; tep_read_number_field(field, a->raw_data, &dyn); @@ -2117,7 +2178,7 @@ static void hde_free(struct perf_hpp_fmt *fmt) } static struct hpp_dynamic_entry * -__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field, +__alloc_dynamic_entry(struct perf_evsel *evsel, struct tep_format_field *field, int level) { struct hpp_dynamic_entry *hde; @@ -2252,7 +2313,7 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam } static int __dynamic_dimension__add(struct perf_evsel *evsel, - struct format_field *field, + struct tep_format_field *field, bool raw_trace, int level) { struct hpp_dynamic_entry *hde; @@ -2270,7 +2331,7 @@ static int __dynamic_dimension__add(struct perf_evsel *evsel, static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level) { int ret; - struct format_field *field; + struct tep_format_field *field; field = evsel->tp_format->format.fields; while (field) { @@ -2305,7 +2366,7 @@ static int add_all_matching_fields(struct perf_evlist *evlist, { int ret = -ESRCH; struct perf_evsel *evsel; - struct format_field *field; + struct tep_format_field *field; evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type != PERF_TYPE_TRACEPOINT) @@ -2327,7 +2388,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok, { char *str, *event_name, *field_name, *opt_name; struct perf_evsel *evsel; - struct format_field *field; + struct tep_format_field *field; bool raw_trace = symbol_conf.raw_trace; int ret = 0; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index a97cf8e6be86..130fe37fe2df 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -229,6 +229,7 @@ enum sort_type { SORT_SYM_SIZE, SORT_DSO_SIZE, SORT_CGROUP_ID, + SORT_SYM_IPC_NULL, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -242,6 +243,7 @@ enum sort_type { SORT_CYCLES, SORT_SRCLINE_FROM, SORT_SRCLINE_TO, + SORT_SYM_IPC, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c new file mode 100644 index 000000000000..fcc8630f6dff --- /dev/null +++ b/tools/perf/util/srccode.c @@ -0,0 +1,186 @@ +/* + * Manage printing of source lines + * Copyright (c) 2017, Intel Corporation. + * Author: Andi Kleen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include "linux/list.h" +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <assert.h> +#include <string.h> +#include "srccode.h" +#include "debug.h" +#include "util.h" + +#define MAXSRCCACHE (32*1024*1024) +#define MAXSRCFILES 64 +#define SRC_HTAB_SZ 64 + +struct srcfile { + struct hlist_node hash_nd; + struct list_head nd; + char *fn; + char **lines; + char *map; + unsigned numlines; + size_t maplen; +}; + +static struct hlist_head srcfile_htab[SRC_HTAB_SZ]; +static LIST_HEAD(srcfile_list); +static long map_total_sz; +static int num_srcfiles; + +static unsigned shash(unsigned char *s) +{ + unsigned h = 0; + while (*s) + h = 65599 * h + *s++; + return h ^ (h >> 16); +} + +static int countlines(char *map, int maplen) +{ + int numl; + char *end = map + maplen; + char *p = map; + + if (maplen == 0) + return 0; + numl = 0; + while (p < end && (p = memchr(p, '\n', end - p)) != NULL) { + numl++; + p++; + } + if (p < end) + numl++; + return numl; +} + +static void fill_lines(char **lines, int maxline, char *map, int maplen) +{ + int l; + char *end = map + maplen; + char *p = map; + + if (maplen == 0 || maxline == 0) + return; + l = 0; + lines[l++] = map; + while (p < end && (p = memchr(p, '\n', end - p)) != NULL) { + if (l >= maxline) + return; + lines[l++] = ++p; + } + if (p < end) + lines[l] = p; +} + +static void free_srcfile(struct srcfile *sf) +{ + list_del(&sf->nd); + hlist_del(&sf->hash_nd); + map_total_sz -= sf->maplen; + munmap(sf->map, sf->maplen); + free(sf->lines); + free(sf->fn); + free(sf); + num_srcfiles--; +} + +static struct srcfile *find_srcfile(char *fn) +{ + struct stat st; + struct srcfile *h; + int fd; + unsigned long sz; + unsigned hval = shash((unsigned char *)fn) % SRC_HTAB_SZ; + + hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) { + if (!strcmp(fn, h->fn)) { + /* Move to front */ + list_del(&h->nd); + list_add(&h->nd, &srcfile_list); + return h; + } + } + + /* Only prune if there is more than one entry */ + while ((num_srcfiles > MAXSRCFILES || map_total_sz > MAXSRCCACHE) && + srcfile_list.next != &srcfile_list) { + assert(!list_empty(&srcfile_list)); + h = list_entry(srcfile_list.prev, struct srcfile, nd); + free_srcfile(h); + } + + fd = open(fn, O_RDONLY); + if (fd < 0 || fstat(fd, &st) < 0) { + pr_debug("cannot open source file %s\n", fn); + return NULL; + } + + h = malloc(sizeof(struct srcfile)); + if (!h) + return NULL; + + h->fn = strdup(fn); + if (!h->fn) + goto out_h; + + h->maplen = st.st_size; + sz = (h->maplen + page_size - 1) & ~(page_size - 1); + h->map = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); + close(fd); + if (h->map == (char *)-1) { + pr_debug("cannot mmap source file %s\n", fn); + goto out_fn; + } + h->numlines = countlines(h->map, h->maplen); + h->lines = calloc(h->numlines, sizeof(char *)); + if (!h->lines) + goto out_map; + fill_lines(h->lines, h->numlines, h->map, h->maplen); + list_add(&h->nd, &srcfile_list); + hlist_add_head(&h->hash_nd, &srcfile_htab[hval]); + map_total_sz += h->maplen; + num_srcfiles++; + return h; + +out_map: + munmap(h->map, sz); +out_fn: + free(h->fn); +out_h: + free(h); + return NULL; +} + +/* Result is not 0 terminated */ +char *find_sourceline(char *fn, unsigned line, int *lenp) +{ + char *l, *p; + struct srcfile *sf = find_srcfile(fn); + if (!sf) + return NULL; + line--; + if (line >= sf->numlines) + return NULL; + l = sf->lines[line]; + if (!l) + return NULL; + p = memchr(l, '\n', sf->map + sf->maplen - l); + *lenp = p - l; + return l; +} diff --git a/tools/perf/util/srccode.h b/tools/perf/util/srccode.h new file mode 100644 index 000000000000..e500a746d5f1 --- /dev/null +++ b/tools/perf/util/srccode.h @@ -0,0 +1,7 @@ +#ifndef SRCCODE_H +#define SRCCODE_H 1 + +/* Result is not 0 terminated */ +char *find_sourceline(char *fn, unsigned line, int *lenp); + +#endif diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index e767c4a9d4d2..dc86597d0cc4 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -548,6 +548,34 @@ out: return srcline; } +/* Returns filename and fills in line number in line */ +char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line) +{ + char *file = NULL; + const char *dso_name; + + if (!dso->has_srcline) + goto out; + + dso_name = dso__name(dso); + if (dso_name == NULL) + goto out; + + if (!addr2line(dso_name, addr, &file, line, dso, true, NULL, NULL)) + goto out; + + dso->a2l_fails = 0; + return file; + +out: + if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) { + dso->has_srcline = 0; + dso__free_a2l(dso); + } + + return NULL; +} + void free_srcline(char *srcline) { if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0) diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index b2bb5502fd62..5762212dc342 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -16,6 +16,7 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool show_addr, bool unwind_inlines, u64 ip); void free_srcline(char *srcline); +char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line); /* insert the srcline into the DSO, which will take ownership */ void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c new file mode 100644 index 000000000000..665ee374fc01 --- /dev/null +++ b/tools/perf/util/stat-display.c @@ -0,0 +1,1172 @@ +#include <stdio.h> +#include <inttypes.h> +#include <linux/time64.h> +#include <math.h> +#include "evlist.h" +#include "evsel.h" +#include "stat.h" +#include "top.h" +#include "thread_map.h" +#include "cpumap.h" +#include "string2.h" +#include "sane_ctype.h" +#include "cgroup.h" +#include <math.h> +#include <api/fs/fs.h> + +#define CNTR_NOT_SUPPORTED "<not supported>" +#define CNTR_NOT_COUNTED "<not counted>" + +static bool is_duration_time(struct perf_evsel *evsel) +{ + return !strcmp(evsel->name, "duration_time"); +} + +static void print_running(struct perf_stat_config *config, + u64 run, u64 ena) +{ + if (config->csv_output) { + fprintf(config->output, "%s%" PRIu64 "%s%.2f", + config->csv_sep, + run, + config->csv_sep, + ena ? 100.0 * run / ena : 100.0); + } else if (run != ena) { + fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); + } +} + +static void print_noise_pct(struct perf_stat_config *config, + double total, double avg) +{ + double pct = rel_stddev_stats(total, avg); + + if (config->csv_output) + fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); + else if (pct) + fprintf(config->output, " ( +-%6.2f%% )", pct); +} + +static void print_noise(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg) +{ + struct perf_stat_evsel *ps; + + if (config->run_count == 1) + return; + + ps = evsel->stats; + print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg); +} + +static void print_cgroup(struct perf_stat_config *config, struct perf_evsel *evsel) +{ + if (nr_cgroups) { + const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : ""; + fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); + } +} + + +static void aggr_printout(struct perf_stat_config *config, + struct perf_evsel *evsel, int id, int nr) +{ + switch (config->aggr_mode) { + case AGGR_CORE: + fprintf(config->output, "S%d-C%*d%s%*d%s", + cpu_map__id_to_socket(id), + config->csv_output ? 0 : -8, + cpu_map__id_to_cpu(id), + config->csv_sep, + config->csv_output ? 0 : 4, + nr, + config->csv_sep); + break; + case AGGR_SOCKET: + fprintf(config->output, "S%*d%s%*d%s", + config->csv_output ? 0 : -5, + id, + config->csv_sep, + config->csv_output ? 0 : 4, + nr, + config->csv_sep); + break; + case AGGR_NONE: + fprintf(config->output, "CPU%*d%s", + config->csv_output ? 0 : -4, + perf_evsel__cpus(evsel)->map[id], config->csv_sep); + break; + case AGGR_THREAD: + fprintf(config->output, "%*s-%*d%s", + config->csv_output ? 0 : 16, + thread_map__comm(evsel->threads, id), + config->csv_output ? 0 : -8, + thread_map__pid(evsel->threads, id), + config->csv_sep); + break; + case AGGR_GLOBAL: + case AGGR_UNSET: + default: + break; + } +} + +struct outstate { + FILE *fh; + bool newline; + const char *prefix; + int nfields; + int id, nr; + struct perf_evsel *evsel; +}; + +#define METRIC_LEN 35 + +static void new_line_std(struct perf_stat_config *config __maybe_unused, + void *ctx) +{ + struct outstate *os = ctx; + + os->newline = true; +} + +static void do_new_line_std(struct perf_stat_config *config, + struct outstate *os) +{ + fputc('\n', os->fh); + fputs(os->prefix, os->fh); + aggr_printout(config, os->evsel, os->id, os->nr); + if (config->aggr_mode == AGGR_NONE) + fprintf(os->fh, " "); + fprintf(os->fh, " "); +} + +static void print_metric_std(struct perf_stat_config *config, + void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + bool newline = os->newline; + + os->newline = false; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%-*s", METRIC_LEN, ""); + return; + } + + if (newline) + do_new_line_std(config, os); + + n = fprintf(out, " # "); + if (color) + n += color_fprintf(out, color, fmt, val); + else + n += fprintf(out, fmt, val); + fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); +} + +static void new_line_csv(struct perf_stat_config *config, void *ctx) +{ + struct outstate *os = ctx; + int i; + + fputc('\n', os->fh); + if (os->prefix) + fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); + aggr_printout(config, os->evsel, os->id, os->nr); + for (i = 0; i < os->nfields; i++) + fputs(config->csv_sep, os->fh); +} + +static void print_metric_csv(struct perf_stat_config *config __maybe_unused, + void *ctx, + const char *color __maybe_unused, + const char *fmt, const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%s%s", config->csv_sep, config->csv_sep); + return; + } + snprintf(buf, sizeof(buf), fmt, val); + ends = vals = ltrim(buf); + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + while (isspace(*unit)) + unit++; + fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit); +} + +/* Filter out some columns that don't work well in metrics only mode */ + +static bool valid_only_metric(const char *unit) +{ + if (!unit) + return false; + if (strstr(unit, "/sec") || + strstr(unit, "hz") || + strstr(unit, "Hz") || + strstr(unit, "CPUs utilized")) + return false; + return true; +} + +static const char *fixunit(char *buf, struct perf_evsel *evsel, + const char *unit) +{ + if (!strncmp(unit, "of all", 6)) { + snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + unit); + return buf; + } + return unit; +} + +static void print_metric_only(struct perf_stat_config *config, + void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[1024], str[1024]; + unsigned mlen = config->metric_only_len; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(buf, os->evsel, unit); + if (mlen < strlen(unit)) + mlen = strlen(unit) + 1; + + if (color) + mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; + + color_snprintf(str, sizeof(str), color ?: "", fmt, val); + fprintf(out, "%*s ", mlen, str); +} + +static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, + void *ctx, const char *color __maybe_unused, + const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + snprintf(buf, sizeof buf, fmt, val); + ends = vals = ltrim(buf); + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + fprintf(out, "%s%s", vals, config->csv_sep); +} + +static void new_line_metric(struct perf_stat_config *config __maybe_unused, + void *ctx __maybe_unused) +{ +} + +static void print_metric_header(struct perf_stat_config *config, + void *ctx, const char *color __maybe_unused, + const char *fmt __maybe_unused, + const char *unit, double val __maybe_unused) +{ + struct outstate *os = ctx; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + if (config->csv_output) + fprintf(os->fh, "%s%s", unit, config->csv_sep); + else + fprintf(os->fh, "%*s ", config->metric_only_len, unit); +} + +static int first_shadow_cpu(struct perf_stat_config *config, + struct perf_evsel *evsel, int id) +{ + struct perf_evlist *evlist = evsel->evlist; + int i; + + if (!config->aggr_get_id) + return 0; + + if (config->aggr_mode == AGGR_NONE) + return id; + + if (config->aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (config->aggr_get_id(config, evlist->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + +static void abs_printout(struct perf_stat_config *config, + int id, int nr, struct perf_evsel *evsel, double avg) +{ + FILE *output = config->output; + double sc = evsel->scale; + const char *fmt; + + if (config->csv_output) { + fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; + } else { + if (config->big_num) + fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; + else + fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; + } + + aggr_printout(config, evsel, id, nr); + + fprintf(output, fmt, avg, config->csv_sep); + + if (evsel->unit) + fprintf(output, "%-*s%s", + config->csv_output ? 0 : config->unit_width, + evsel->unit, config->csv_sep); + + fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); + + print_cgroup(config, evsel); +} + +static bool is_mixed_hw_group(struct perf_evsel *counter) +{ + struct perf_evlist *evlist = counter->evlist; + u32 pmu_type = counter->attr.type; + struct perf_evsel *pos; + + if (counter->nr_members < 2) + return false; + + evlist__for_each_entry(evlist, pos) { + /* software events can be part of any hardware group */ + if (pos->attr.type == PERF_TYPE_SOFTWARE) + continue; + if (pmu_type == PERF_TYPE_SOFTWARE) { + pmu_type = pos->attr.type; + continue; + } + if (pmu_type != pos->attr.type) + return true; + } + + return false; +} + +static void printout(struct perf_stat_config *config, int id, int nr, + struct perf_evsel *counter, double uval, + char *prefix, u64 run, u64 ena, double noise, + struct runtime_stat *st) +{ + struct perf_stat_output_ctx out; + struct outstate os = { + .fh = config->output, + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, + }; + print_metric_t pm = print_metric_std; + new_line_t nl; + + if (config->metric_only) { + nl = new_line_metric; + if (config->csv_output) + pm = print_metric_only_csv; + else + pm = print_metric_only; + } else + nl = new_line_std; + + if (config->csv_output && !config->metric_only) { + static int aggr_fields[] = { + [AGGR_GLOBAL] = 0, + [AGGR_THREAD] = 1, + [AGGR_NONE] = 1, + [AGGR_SOCKET] = 2, + [AGGR_CORE] = 2, + }; + + pm = print_metric_csv; + nl = new_line_csv; + os.nfields = 3; + os.nfields += aggr_fields[config->aggr_mode]; + if (counter->cgrp) + os.nfields++; + } + if (run == 0 || ena == 0 || counter->counts->scaled == -1) { + if (config->metric_only) { + pm(config, &os, NULL, "", "", 0); + return; + } + aggr_printout(config, counter, id, nr); + + fprintf(config->output, "%*s%s", + config->csv_output ? 0 : 18, + counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, + config->csv_sep); + + if (counter->supported) { + config->print_free_counters_hint = 1; + if (is_mixed_hw_group(counter)) + config->print_mixed_hw_group_error = 1; + } + + fprintf(config->output, "%-*s%s", + config->csv_output ? 0 : config->unit_width, + counter->unit, config->csv_sep); + + fprintf(config->output, "%*s", + config->csv_output ? 0 : -25, + perf_evsel__name(counter)); + + print_cgroup(config, counter); + + if (!config->csv_output) + pm(config, &os, NULL, NULL, "", 0); + print_noise(config, counter, noise); + print_running(config, run, ena); + if (config->csv_output) + pm(config, &os, NULL, NULL, "", 0); + return; + } + + if (!config->metric_only) + abs_printout(config, id, nr, counter, uval); + + out.print_metric = pm; + out.new_line = nl; + out.ctx = &os; + out.force_header = false; + + if (config->csv_output && !config->metric_only) { + print_noise(config, counter, noise); + print_running(config, run, ena); + } + + perf_stat__print_shadow_stats(config, counter, uval, + first_shadow_cpu(config, counter, id), + &out, &config->metric_events, st); + if (!config->csv_output && !config->metric_only) { + print_noise(config, counter, noise); + print_running(config, run, ena); + } +} + +static void aggr_update_shadow(struct perf_stat_config *config, + struct perf_evlist *evlist) +{ + int cpu, s2, id, s; + u64 val; + struct perf_evsel *counter; + + for (s = 0; s < config->aggr_map->nr; s++) { + id = config->aggr_map->map[s]; + evlist__for_each_entry(evlist, counter) { + val = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = config->aggr_get_id(config, evlist->cpus, cpu); + if (s2 != id) + continue; + val += perf_counts(counter->counts, cpu, 0)->val; + } + perf_stat__update_shadow_stats(counter, val, + first_shadow_cpu(config, counter, id), + &rt_stat); + } + } +} + +static void uniquify_event_name(struct perf_evsel *counter) +{ + char *new_name; + char *config; + + if (counter->uniquified_name || + !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, + strlen(counter->pmu_name))) + return; + + config = strchr(counter->name, '/'); + if (config) { + if (asprintf(&new_name, + "%s%s", counter->pmu_name, config) > 0) { + free(counter->name); + counter->name = new_name; + } + } else { + if (asprintf(&new_name, + "%s [%s]", counter->name, counter->pmu_name) > 0) { + free(counter->name); + counter->name = new_name; + } + } + + counter->uniquified_name = true; +} + +static void collect_all_aliases(struct perf_stat_config *config, struct perf_evsel *counter, + void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + struct perf_evlist *evlist = counter->evlist; + struct perf_evsel *alias; + + alias = list_prepare_entry(counter, &(evlist->entries), node); + list_for_each_entry_continue (alias, &evlist->entries, node) { + if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + alias->scale != counter->scale || + alias->cgrp != counter->cgrp || + strcmp(alias->unit, counter->unit) || + perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter)) + break; + alias->merged_stat = true; + cb(config, alias, data, false); + } +} + +static bool collect_data(struct perf_stat_config *config, struct perf_evsel *counter, + void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + if (counter->merged_stat) + return false; + cb(config, counter, data, true); + if (config->no_merge) + uniquify_event_name(counter); + else if (counter->auto_merge_stats) + collect_all_aliases(config, counter, cb, data); + return true; +} + +struct aggr_data { + u64 ena, run, val; + int id; + int nr; + int cpu; +}; + +static void aggr_cb(struct perf_stat_config *config, + struct perf_evsel *counter, void *data, bool first) +{ + struct aggr_data *ad = data; + int cpu, s2; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + struct perf_counts_values *counts; + + s2 = config->aggr_get_id(config, perf_evsel__cpus(counter), cpu); + if (s2 != ad->id) + continue; + if (first) + ad->nr++; + counts = perf_counts(counter->counts, cpu, 0); + /* + * When any result is bad, make them all to give + * consistent output in interval mode. + */ + if (counts->ena == 0 || counts->run == 0 || + counter->counts->scaled == -1) { + ad->ena = 0; + ad->run = 0; + break; + } + ad->val += counts->val; + ad->ena += counts->ena; + ad->run += counts->run; + } +} + +static void print_aggr(struct perf_stat_config *config, + struct perf_evlist *evlist, + char *prefix) +{ + bool metric_only = config->metric_only; + FILE *output = config->output; + struct perf_evsel *counter; + int s, id, nr; + double uval; + u64 ena, run, val; + bool first; + + if (!(config->aggr_map || config->aggr_get_id)) + return; + + aggr_update_shadow(config, evlist); + + /* + * With metric_only everything is on a single line. + * Without each counter has its own line. + */ + for (s = 0; s < config->aggr_map->nr; s++) { + struct aggr_data ad; + if (prefix && metric_only) + fprintf(output, "%s", prefix); + + ad.id = id = config->aggr_map->map[s]; + first = true; + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + if (!collect_data(config, counter, aggr_cb, &ad)) + continue; + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; + if (first && metric_only) { + first = false; + aggr_printout(config, counter, id, nr); + } + if (prefix && !metric_only) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + printout(config, id, nr, counter, uval, prefix, + run, ena, 1.0, &rt_stat); + if (!metric_only) + fputc('\n', output); + } + if (metric_only) + fputc('\n', output); + } +} + +static int cmp_val(const void *a, const void *b) +{ + return ((struct perf_aggr_thread_value *)b)->val - + ((struct perf_aggr_thread_value *)a)->val; +} + +static struct perf_aggr_thread_value *sort_aggr_thread( + struct perf_evsel *counter, + int nthreads, int ncpus, + int *ret, + struct target *_target) +{ + int cpu, thread, i = 0; + double uval; + struct perf_aggr_thread_value *buf; + + buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); + if (!buf) + return NULL; + + for (thread = 0; thread < nthreads; thread++) { + u64 ena = 0, run = 0, val = 0; + + for (cpu = 0; cpu < ncpus; cpu++) { + val += perf_counts(counter->counts, cpu, thread)->val; + ena += perf_counts(counter->counts, cpu, thread)->ena; + run += perf_counts(counter->counts, cpu, thread)->run; + } + + uval = val * counter->scale; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (uval == 0.0 && target__has_per_thread(_target)) + continue; + + buf[i].counter = counter; + buf[i].id = thread; + buf[i].uval = uval; + buf[i].val = val; + buf[i].run = run; + buf[i].ena = ena; + i++; + } + + qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); + + if (ret) + *ret = i; + + return buf; +} + +static void print_aggr_thread(struct perf_stat_config *config, + struct target *_target, + struct perf_evsel *counter, char *prefix) +{ + FILE *output = config->output; + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int thread, sorted_threads, id; + struct perf_aggr_thread_value *buf; + + buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); + if (!buf) { + perror("cannot sort aggr thread"); + return; + } + + for (thread = 0; thread < sorted_threads; thread++) { + if (prefix) + fprintf(output, "%s", prefix); + + id = buf[thread].id; + if (config->stats) + printout(config, id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &config->stats[id]); + else + printout(config, id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &rt_stat); + fputc('\n', output); + } + + free(buf); +} + +struct caggr_data { + double avg, avg_enabled, avg_running; +}; + +static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, + struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct caggr_data *cd = data; + struct perf_stat_evsel *ps = counter->stats; + + cd->avg += avg_stats(&ps->res_stats[0]); + cd->avg_enabled += avg_stats(&ps->res_stats[1]); + cd->avg_running += avg_stats(&ps->res_stats[2]); +} + +/* + * Print out the results of a single counter: + * aggregated counts in system-wide mode + */ +static void print_counter_aggr(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) +{ + bool metric_only = config->metric_only; + FILE *output = config->output; + double uval; + struct caggr_data cd = { .avg = 0.0 }; + + if (!collect_data(config, counter, counter_aggr_cb, &cd)) + return; + + if (prefix && !metric_only) + fprintf(output, "%s", prefix); + + uval = cd.avg * counter->scale; + printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, + cd.avg, &rt_stat); + if (!metric_only) + fprintf(output, "\n"); +} + +static void counter_cb(struct perf_stat_config *config __maybe_unused, + struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct aggr_data *ad = data; + + ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; +} + +/* + * Print out the results of a single counter: + * does not use aggregated count in system-wide + */ +static void print_counter(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) +{ + FILE *output = config->output; + u64 ena, run, val; + double uval; + int cpu; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + struct aggr_data ad = { .cpu = cpu }; + + if (!collect_data(config, counter, counter_cb, &ad)) + return; + val = ad.val; + ena = ad.ena; + run = ad.run; + + if (prefix) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); + + fputc('\n', output); + } +} + +static void print_no_aggr_metric(struct perf_stat_config *config, + struct perf_evlist *evlist, + char *prefix) +{ + int cpu; + int nrcpus = 0; + struct perf_evsel *counter; + u64 ena, run, val; + double uval; + + nrcpus = evlist->cpus->nr; + for (cpu = 0; cpu < nrcpus; cpu++) { + bool first = true; + + if (prefix) + fputs(prefix, config->output); + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + if (first) { + aggr_printout(config, counter, cpu, 0); + first = false; + } + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; + + uval = val * counter->scale; + printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); + } + fputc('\n', config->output); + } +} + +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_SOCKET] = 12, + [AGGR_NONE] = 6, + [AGGR_THREAD] = 24, + [AGGR_GLOBAL] = 0, +}; + +static const char *aggr_header_csv[] = { + [AGGR_CORE] = "core,cpus,", + [AGGR_SOCKET] = "socket,cpus", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_GLOBAL] = "" +}; + +static void print_metric_headers(struct perf_stat_config *config, + struct perf_evlist *evlist, + const char *prefix, bool no_indent) +{ + struct perf_stat_output_ctx out; + struct perf_evsel *counter; + struct outstate os = { + .fh = config->output + }; + + if (prefix) + fprintf(config->output, "%s", prefix); + + if (!config->csv_output && !no_indent) + fprintf(config->output, "%*s", + aggr_header_lens[config->aggr_mode], ""); + if (config->csv_output) { + if (config->interval) + fputs("time,", config->output); + fputs(aggr_header_csv[config->aggr_mode], config->output); + } + + /* Print metrics headers only */ + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + os.evsel = counter; + out.ctx = &os; + out.print_metric = print_metric_header; + out.new_line = new_line_metric; + out.force_header = true; + os.evsel = counter; + perf_stat__print_shadow_stats(config, counter, 0, + 0, + &out, + &config->metric_events, + &rt_stat); + } + fputc('\n', config->output); +} + +static void print_interval(struct perf_stat_config *config, + struct perf_evlist *evlist, + char *prefix, struct timespec *ts) +{ + bool metric_only = config->metric_only; + unsigned int unit_width = config->unit_width; + FILE *output = config->output; + static int num_print_interval; + + if (config->interval_clear) + puts(CONSOLE_CLEAR); + + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); + + if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { + switch (config->aggr_mode) { + case AGGR_SOCKET: + fprintf(output, "# time socket cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_CORE: + fprintf(output, "# time core cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_NONE: + fprintf(output, "# time CPU "); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_THREAD: + fprintf(output, "# time comm-pid"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_GLOBAL: + default: + fprintf(output, "# time"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + case AGGR_UNSET: + break; + } + } + + if ((num_print_interval == 0 || config->interval_clear) && metric_only) + print_metric_headers(config, evlist, " ", true); + if (++num_print_interval == 25) + num_print_interval = 0; +} + +static void print_header(struct perf_stat_config *config, + struct target *_target, + int argc, const char **argv) +{ + FILE *output = config->output; + int i; + + fflush(stdout); + + if (!config->csv_output) { + fprintf(output, "\n"); + fprintf(output, " Performance counter stats for "); + if (_target->system_wide) + fprintf(output, "\'system wide"); + else if (_target->cpu_list) + fprintf(output, "\'CPU(s) %s", _target->cpu_list); + else if (!target__has_task(_target)) { + fprintf(output, "\'%s", argv ? argv[0] : "pipe"); + for (i = 1; argv && (i < argc); i++) + fprintf(output, " %s", argv[i]); + } else if (_target->pid) + fprintf(output, "process id \'%s", _target->pid); + else + fprintf(output, "thread id \'%s", _target->tid); + + fprintf(output, "\'"); + if (config->run_count > 1) + fprintf(output, " (%d runs)", config->run_count); + fprintf(output, ":\n\n"); + } +} + +static int get_precision(double num) +{ + if (num > 1) + return 0; + + return lround(ceil(-log10(num))); +} + +static void print_table(struct perf_stat_config *config, + FILE *output, int precision, double avg) +{ + char tmp[64]; + int idx, indent = 0; + + scnprintf(tmp, 64, " %17.*f", precision, avg); + while (tmp[indent] == ' ') + indent++; + + fprintf(output, "%*s# Table of individual measurements:\n", indent, ""); + + for (idx = 0; idx < config->run_count; idx++) { + double run = (double) config->walltime_run[idx] / NSEC_PER_SEC; + int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5); + + fprintf(output, " %17.*f (%+.*f) ", + precision, run, precision, run - avg); + + for (h = 0; h < n; h++) + fprintf(output, "#"); + + fprintf(output, "\n"); + } + + fprintf(output, "\n%*s# Final result:\n", indent, ""); +} + +static double timeval2double(struct timeval *t) +{ + return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC; +} + +static void print_footer(struct perf_stat_config *config) +{ + double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; + FILE *output = config->output; + int n; + + if (!config->null_run) + fprintf(output, "\n"); + + if (config->run_count == 1) { + fprintf(output, " %17.9f seconds time elapsed", avg); + + if (config->ru_display) { + double ru_utime = timeval2double(&config->ru_data.ru_utime); + double ru_stime = timeval2double(&config->ru_data.ru_stime); + + fprintf(output, "\n\n"); + fprintf(output, " %17.9f seconds user\n", ru_utime); + fprintf(output, " %17.9f seconds sys\n", ru_stime); + } + } else { + double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; + /* + * Display at most 2 more significant + * digits than the stddev inaccuracy. + */ + int precision = get_precision(sd) + 2; + + if (config->walltime_run_table) + print_table(config, output, precision, avg); + + fprintf(output, " %17.*f +- %.*f seconds time elapsed", + precision, avg, precision, sd); + + print_noise_pct(config, sd, avg); + } + fprintf(output, "\n\n"); + + if (config->print_free_counters_hint && + sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && + n > 0) + fprintf(output, +"Some events weren't counted. Try disabling the NMI watchdog:\n" +" echo 0 > /proc/sys/kernel/nmi_watchdog\n" +" perf stat ...\n" +" echo 1 > /proc/sys/kernel/nmi_watchdog\n"); + + if (config->print_mixed_hw_group_error) + fprintf(output, + "The events in group usually have to be from " + "the same PMU. Try reorganizing the group.\n"); +} + +void +perf_evlist__print_counters(struct perf_evlist *evlist, + struct perf_stat_config *config, + struct target *_target, + struct timespec *ts, + int argc, const char **argv) +{ + bool metric_only = config->metric_only; + int interval = config->interval; + struct perf_evsel *counter; + char buf[64], *prefix = NULL; + + if (interval) + print_interval(config, evlist, prefix = buf, ts); + else + print_header(config, _target, argc, argv); + + if (metric_only) { + static int num_print_iv; + + if (num_print_iv == 0 && !interval) + print_metric_headers(config, evlist, prefix, false); + if (num_print_iv++ == 25) + num_print_iv = 0; + if (config->aggr_mode == AGGR_GLOBAL && prefix) + fprintf(config->output, "%s", prefix); + } + + switch (config->aggr_mode) { + case AGGR_CORE: + case AGGR_SOCKET: + print_aggr(config, evlist, prefix); + break; + case AGGR_THREAD: + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + print_aggr_thread(config, _target, counter, prefix); + } + break; + case AGGR_GLOBAL: + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + print_counter_aggr(config, counter, prefix); + } + if (metric_only) + fputc('\n', config->output); + break; + case AGGR_NONE: + if (metric_only) + print_no_aggr_metric(config, evlist, prefix); + else { + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + print_counter(config, counter, prefix); + } + } + break; + case AGGR_UNSET: + default: + break; + } + + if (!interval && !config->csv_output) + print_footer(config); + + fflush(config->output); +} diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 99990f5f2512..3c22c58b3e90 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -209,12 +209,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, int cpu, struct runtime_stat *st) { int ctx = evsel_context(counter); + u64 count_ns = count; count *= counter->scale; - if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || - perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) - update_runtime_stat(st, STAT_NSECS, 0, cpu, count); + if (perf_evsel__is_clock(counter)) + update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) @@ -410,7 +410,8 @@ static double runtime_stat_n(struct runtime_stat *st, return v->stats.n; } -static void print_stalled_cycles_frontend(int cpu, +static void print_stalled_cycles_frontend(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st) @@ -427,13 +428,14 @@ static void print_stalled_cycles_frontend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); if (ratio) - out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", + out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", ratio); else - out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); + out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); } -static void print_stalled_cycles_backend(int cpu, +static void print_stalled_cycles_backend(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st) @@ -449,10 +451,11 @@ static void print_stalled_cycles_backend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); } -static void print_branch_misses(int cpu, +static void print_branch_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -469,10 +472,11 @@ static void print_branch_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); } -static void print_l1_dcache_misses(int cpu, +static void print_l1_dcache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -490,10 +494,11 @@ static void print_l1_dcache_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); } -static void print_l1_icache_misses(int cpu, +static void print_l1_icache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -510,10 +515,11 @@ static void print_l1_icache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); } -static void print_dtlb_cache_misses(int cpu, +static void print_dtlb_cache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -529,10 +535,11 @@ static void print_dtlb_cache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); } -static void print_itlb_cache_misses(int cpu, +static void print_itlb_cache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -548,10 +555,11 @@ static void print_itlb_cache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); } -static void print_ll_cache_misses(int cpu, +static void print_ll_cache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -567,7 +575,7 @@ static void print_ll_cache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } /* @@ -674,7 +682,8 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) return sanitize_val(1.0 - sum); } -static void print_smi_cost(int cpu, struct perf_evsel *evsel, +static void print_smi_cost(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, struct perf_stat_output_ctx *out, struct runtime_stat *st) { @@ -694,11 +703,12 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel, if (cost > 10) color = PERF_COLOR_RED; - out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); - out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); + out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); + out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); } -static void generic_metric(const char *metric_expr, +static void generic_metric(struct perf_stat_config *config, + const char *metric_expr, struct perf_evsel **metric_events, char *name, const char *metric_name, @@ -737,20 +747,21 @@ static void generic_metric(const char *metric_expr, const char *p = metric_expr; if (expr__parse(&ratio, &pctx, &p) == 0) - print_metric(ctxp, NULL, "%8.1f", + print_metric(config, ctxp, NULL, "%8.1f", metric_name ? metric_name : out->force_header ? name : "", ratio); else - print_metric(ctxp, NULL, NULL, + print_metric(config, ctxp, NULL, NULL, out->force_header ? (metric_name ? metric_name : name) : "", 0); } else - print_metric(ctxp, NULL, NULL, "", 0); + print_metric(config, ctxp, NULL, NULL, "", 0); } -void perf_stat__print_shadow_stats(struct perf_evsel *evsel, +void perf_stat__print_shadow_stats(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, struct rblist *metric_events, @@ -769,10 +780,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (total) { ratio = avg / total; - print_metric(ctxp, NULL, "%7.2f ", + print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", ratio); } else { - print_metric(ctxp, NULL, NULL, "insn per cycle", 0); + print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, @@ -783,20 +794,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ctx, cpu)); if (total && avg) { - out->new_line(ctxp); + out->new_line(config, ctxp); ratio = total / avg; - print_metric(ctxp, NULL, "%7.2f ", + print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", ratio); } else if (have_frontend_stalled) { - print_metric(ctxp, NULL, NULL, + print_metric(config, ctxp, NULL, NULL, "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) - print_branch_misses(cpu, evsel, avg, out, st); + print_branch_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all branches", 0); + print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | @@ -804,9 +815,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) - print_l1_dcache_misses(cpu, evsel, avg, out, st); + print_l1_dcache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | @@ -814,9 +825,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) - print_l1_icache_misses(cpu, evsel, avg, out, st); + print_l1_icache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | @@ -824,9 +835,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) - print_dtlb_cache_misses(cpu, evsel, avg, out, st); + print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | @@ -834,9 +845,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) - print_itlb_cache_misses(cpu, evsel, avg, out, st); + print_itlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | @@ -844,9 +855,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) - print_ll_cache_misses(cpu, evsel, avg, out, st); + print_ll_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); @@ -854,32 +865,32 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ratio = avg * 100 / total; if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) - print_metric(ctxp, NULL, "%8.3f %%", + print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else - print_metric(ctxp, NULL, NULL, "of all cache refs", 0); + print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg, out, st); + print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg, out, st); + print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { ratio = avg / total; - print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); + print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); } else { - print_metric(ctxp, NULL, NULL, "Ghz", 0); + print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) - print_metric(ctxp, NULL, + print_metric(config, ctxp, NULL, "%7.2f%%", "transactional cycles", 100.0 * (avg / total)); else - print_metric(ctxp, NULL, NULL, "transactional cycles", + print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); @@ -888,10 +899,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (total2 < avg) total2 = avg; if (total) - print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", + print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 100.0 * ((total2-avg) / total)); else - print_metric(ctxp, NULL, NULL, "aborted cycles", 0); + print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); @@ -900,10 +911,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ratio = total / avg; if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) - print_metric(ctxp, NULL, "%8.0f", + print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else - print_metric(ctxp, NULL, NULL, "cycles / transaction", + print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, @@ -912,33 +923,33 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (avg) ratio = total / avg; - print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); + print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); } else if (perf_evsel__is_clock(evsel)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) - print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", + print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", avg / (ratio * evsel->scale)); else - print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); + print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { double fe_bound = td_fe_bound(ctx, cpu, st); if (fe_bound > 0.2) color = PERF_COLOR_RED; - print_metric(ctxp, color, "%8.1f%%", "frontend bound", + print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { double retiring = td_retiring(ctx, cpu, st); if (retiring > 0.7) color = PERF_COLOR_GREEN; - print_metric(ctxp, color, "%8.1f%%", "retiring", + print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { double bad_spec = td_bad_spec(ctx, cpu, st); if (bad_spec > 0.1) color = PERF_COLOR_RED; - print_metric(ctxp, color, "%8.1f%%", "bad speculation", + print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { double be_bound = td_be_bound(ctx, cpu, st); @@ -955,12 +966,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (be_bound > 0.2) color = PERF_COLOR_RED; if (td_total_slots(ctx, cpu, st) > 0) - print_metric(ctxp, color, "%8.1f%%", name, + print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else - print_metric(ctxp, NULL, NULL, name, 0); + print_metric(config, ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { - generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, + generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, evsel->metric_name, avg, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; @@ -975,9 +986,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, unit = 'K'; } snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); - print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); + print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(cpu, evsel, out, st); + print_smi_cost(config, cpu, evsel, out, st); } else { num = 0; } @@ -987,12 +998,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, list_for_each_entry (mexp, &me->head, nd) { if (num++ > 0) - out->new_line(ctxp); - generic_metric(mexp->metric_expr, mexp->metric_events, + out->new_line(config, ctxp); + generic_metric(config, mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, avg, cpu, out, st); } } if (num == 0) - print_metric(ctxp, NULL, NULL, NULL, 0); + print_metric(config, ctxp, NULL, NULL, NULL, 0); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index a0061e0b0fad..4d40515307b8 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -374,9 +374,8 @@ int perf_stat_process_counter(struct perf_stat_config *config, return 0; } -int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_stat_event(struct perf_session *session, + union perf_event *event) { struct perf_counts_values count; struct stat_event *st = &event->stat; @@ -435,3 +434,98 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) return ret; } + +int create_perf_stat_counter(struct perf_evsel *evsel, + struct perf_stat_config *config, + struct target *target) +{ + struct perf_event_attr *attr = &evsel->attr; + struct perf_evsel *leader = evsel->leader; + + if (config->scale) { + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + } + + /* + * The event is part of non trivial group, let's enable + * the group read (for leader) and ID retrieval for all + * members. + */ + if (leader->nr_members > 1) + attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; + + attr->inherit = !config->no_inherit; + + /* + * Some events get initialized with sample_(period/type) set, + * like tracepoints. Clear it up for counting. + */ + attr->sample_period = 0; + + if (config->identifier) + attr->sample_type = PERF_SAMPLE_IDENTIFIER; + + /* + * Disabling all counters initially, they will be enabled + * either manually by us or by kernel via enable_on_exec + * set later. + */ + if (perf_evsel__is_group_leader(evsel)) { + attr->disabled = 1; + + /* + * In case of initial_delay we enable tracee + * events manually. + */ + if (target__none(target) && !config->initial_delay) + attr->enable_on_exec = 1; + } + + if (target__has_cpu(target) && !target__has_per_thread(target)) + return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); + + return perf_evsel__open_per_thread(evsel, evsel->threads); +} + +int perf_stat_synthesize_config(struct perf_stat_config *config, + struct perf_tool *tool, + struct perf_evlist *evlist, + perf_event__handler_t process, + bool attrs) +{ + int err; + + if (attrs) { + err = perf_event__synthesize_attrs(tool, evlist, process); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + return err; + } + } + + err = perf_event__synthesize_extra_attr(tool, evlist, process, + attrs); + + err = perf_event__synthesize_thread_map2(tool, evlist->threads, + process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_cpu_map(tool, evlist->cpus, + process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_stat_config(tool, config, process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize config.\n"); + return err; + } + + return 0; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 36efb986f7fc..2f9c9159a364 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -4,8 +4,14 @@ #include <linux/types.h> #include <stdio.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/wait.h> #include "xyarray.h" #include "rblist.h" +#include "perf.h" +#include "event.h" struct stats { double n, mean, M2; @@ -84,15 +90,42 @@ struct runtime_stat { struct rblist value_list; }; +typedef int (*aggr_get_id_t)(struct perf_stat_config *config, + struct cpu_map *m, int cpu); + struct perf_stat_config { - enum aggr_mode aggr_mode; - bool scale; - FILE *output; - unsigned int interval; - unsigned int timeout; - int times; - struct runtime_stat *stats; - int stats_num; + enum aggr_mode aggr_mode; + bool scale; + bool no_inherit; + bool identifier; + bool csv_output; + bool interval_clear; + bool metric_only; + bool null_run; + bool ru_display; + bool big_num; + bool no_merge; + bool walltime_run_table; + FILE *output; + unsigned int interval; + unsigned int timeout; + unsigned int initial_delay; + unsigned int unit_width; + unsigned int metric_only_len; + int times; + int run_count; + int print_free_counters_hint; + int print_mixed_hw_group_error; + struct runtime_stat *stats; + int stats_num; + const char *csv_sep; + struct stats *walltime_nsecs_stats; + struct rusage ru_data; + struct cpu_map *aggr_map; + aggr_get_id_t aggr_get_id; + struct cpu_map *cpus_aggr_map; + u64 *walltime_run; + struct rblist metric_events; }; void update_stats(struct stats *stats, u64 val); @@ -130,9 +163,10 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; -typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, +typedef void (*print_metric_t)(struct perf_stat_config *config, + void *ctx, const char *color, const char *unit, const char *fmt, double val); -typedef void (*new_line_t )(void *ctx); +typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx); void runtime_stat__init(struct runtime_stat *st); void runtime_stat__exit(struct runtime_stat *st); @@ -148,7 +182,8 @@ struct perf_stat_output_ctx { bool force_header; }; -void perf_stat__print_shadow_stats(struct perf_evsel *evsel, +void perf_stat__print_shadow_stats(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, struct rblist *metric_events, @@ -164,11 +199,25 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct perf_tool; union perf_event; struct perf_session; -int perf_event__process_stat_event(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_stat_event(struct perf_session *session, + union perf_event *event); size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); + +int create_perf_stat_counter(struct perf_evsel *evsel, + struct perf_stat_config *config, + struct target *target); +int perf_stat_synthesize_config(struct perf_stat_config *config, + struct perf_tool *tool, + struct perf_evlist *evlist, + perf_event__handler_t process, + bool attrs); +void +perf_evlist__print_counters(struct perf_evlist *evlist, + struct perf_stat_config *config, + struct target *_target, + struct timespec *ts, + int argc, const char **argv); #endif diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 3d1cf5bf7f18..9005fbe0780e 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -98,19 +98,25 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) va_copy(ap_saved, ap); len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - if (len < 0) + if (len < 0) { + va_end(ap_saved); return len; + } if (len > strbuf_avail(sb)) { ret = strbuf_grow(sb, len); - if (ret) + if (ret) { + va_end(ap_saved); return ret; + } len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); va_end(ap_saved); if (len > strbuf_avail(sb)) { pr_debug("this should not happen, your vsnprintf is broken"); + va_end(ap_saved); return -EINVAL; } } + va_end(ap_saved); return strbuf_setlen(sb, sb->len + len); } diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 1cbada2dc6be..f735ee038713 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -334,7 +334,7 @@ static char *cpu_model(void) if (file) { while (fgets(buf, 255, file)) { if (strstr(buf, "model name")) { - strncpy(cpu_m, &buf[13], 255); + strlcpy(cpu_m, &buf[13], 255); break; } } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 29770ea61768..66a84d5846c8 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -324,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) plt_entry_size = 16; break; - default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */ + case EM_SPARC: + plt_header_size = 48; + plt_entry_size = 12; + break; + + case EM_SPARCV9: + plt_header_size = 128; + plt_entry_size = 32; + break; + + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ plt_header_size = shdr_plt.sh_entsize; plt_entry_size = shdr_plt.sh_entsize; break; @@ -1947,6 +1957,34 @@ void kcore_extract__delete(struct kcore_extract *kce) } #ifdef HAVE_GELF_GETNOTE_SUPPORT + +static void sdt_adjust_loc(struct sdt_note *tmp, GElf_Addr base_off) +{ + if (!base_off) + return; + + if (tmp->bit32) + tmp->addr.a32[SDT_NOTE_IDX_LOC] = + tmp->addr.a32[SDT_NOTE_IDX_LOC] + base_off - + tmp->addr.a32[SDT_NOTE_IDX_BASE]; + else + tmp->addr.a64[SDT_NOTE_IDX_LOC] = + tmp->addr.a64[SDT_NOTE_IDX_LOC] + base_off - + tmp->addr.a64[SDT_NOTE_IDX_BASE]; +} + +static void sdt_adjust_refctr(struct sdt_note *tmp, GElf_Addr base_addr, + GElf_Addr base_off) +{ + if (!base_off) + return; + + if (tmp->bit32 && tmp->addr.a32[SDT_NOTE_IDX_REFCTR]) + tmp->addr.a32[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off); + else if (tmp->addr.a64[SDT_NOTE_IDX_REFCTR]) + tmp->addr.a64[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off); +} + /** * populate_sdt_note : Parse raw data and identify SDT note * @elf: elf of the opened file @@ -1964,7 +2002,6 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, const char *provider, *name, *args; struct sdt_note *tmp = NULL; GElf_Ehdr ehdr; - GElf_Addr base_off = 0; GElf_Shdr shdr; int ret = -EINVAL; @@ -2060,17 +2097,12 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, * base address in the description of the SDT note. If its different, * then accordingly, adjust the note location. */ - if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) { - base_off = shdr.sh_offset; - if (base_off) { - if (tmp->bit32) - tmp->addr.a32[0] = tmp->addr.a32[0] + base_off - - tmp->addr.a32[1]; - else - tmp->addr.a64[0] = tmp->addr.a64[0] + base_off - - tmp->addr.a64[1]; - } - } + if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) + sdt_adjust_loc(tmp, shdr.sh_offset); + + /* Adjust reference counter offset */ + if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_PROBES_SCN, NULL)) + sdt_adjust_refctr(tmp, shdr.sh_addr, shdr.sh_offset); list_add_tail(&tmp->note_list, sdt_notes); return 0; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index d188b7588152..01f2c7385e38 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1537,17 +1537,6 @@ int dso__load(struct dso *dso, struct map *map) dso->adjust_symbols = 0; if (perfmap) { - struct stat st; - - if (lstat(map_path, &st) < 0) - goto out; - - if (!symbol_conf.force && st.st_uid && (st.st_uid != geteuid())) { - pr_warning("File %s not owned by current user or root, " - "ignoring it (use -f to override).\n", map_path); - goto out; - } - ret = dso__load_perf_map(map_path, dso); dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : DSO_BINARY_TYPE__NOT_FOUND; @@ -1680,11 +1669,22 @@ struct map *map_groups__find_by_name(struct map_groups *mg, const char *name) { struct maps *maps = &mg->maps; struct map *map; + struct rb_node *node; down_read(&maps->lock); - for (map = maps__first(maps); map; map = map__next(map)) { - if (map->dso && strcmp(map->dso->short_name, name) == 0) + for (node = maps->names.rb_node; node; ) { + int rc; + + map = rb_entry(node, struct map, rb_node_name); + + rc = strcmp(map->dso->short_name, name); + if (rc < 0) + node = node->rb_left; + else if (rc > 0) + node = node->rb_right; + else + goto out_unlock; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f25fae4b5743..14d9d438e7e2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -63,6 +63,7 @@ struct symbol { u8 ignore:1; u8 inlined:1; u8 arch_sym; + bool annotate2; char name[0]; }; @@ -123,7 +124,8 @@ struct symbol_conf { const char *vmlinux_name, *kallsyms_name, *source_prefix, - *field_sep; + *field_sep, + *graph_function; const char *default_guest_vmlinux_name, *default_guest_kallsyms, *default_guest_modules; @@ -379,12 +381,19 @@ int get_sdt_note_list(struct list_head *head, const char *target); int cleanup_sdt_note_list(struct list_head *sdt_notes); int sdt_notes__get_count(struct list_head *start); +#define SDT_PROBES_SCN ".probes" #define SDT_BASE_SCN ".stapsdt.base" #define SDT_NOTE_SCN ".note.stapsdt" #define SDT_NOTE_TYPE 3 #define SDT_NOTE_NAME "stapsdt" #define NR_ADDR 3 +enum { + SDT_NOTE_IDX_LOC = 0, + SDT_NOTE_IDX_BASE, + SDT_NOTE_IDX_REFCTR, +}; + struct mem_info *mem_info__new(void); struct mem_info *mem_info__get(struct mem_info *mi); void mem_info__put(struct mem_info *mi); diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index dd17d6a38d3a..d52f27f373ce 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -15,6 +15,7 @@ #include <linux/rbtree.h> #include <linux/list.h> +#include <linux/log2.h> #include <errno.h> #include "thread.h" #include "event.h" @@ -36,6 +37,7 @@ * @branch_count: the branch count when the entry was created * @cp: call path * @no_call: a 'call' was not seen + * @trace_end: a 'call' but trace ended */ struct thread_stack_entry { u64 ret_addr; @@ -44,6 +46,7 @@ struct thread_stack_entry { u64 branch_count; struct call_path *cp; bool no_call; + bool trace_end; }; /** @@ -58,6 +61,7 @@ struct thread_stack_entry { * @last_time: last timestamp * @crp: call/return processor * @comm: current comm + * @arr_sz: size of array if this is the first element of an array */ struct thread_stack { struct thread_stack_entry *stack; @@ -69,8 +73,19 @@ struct thread_stack { u64 last_time; struct call_return_processor *crp; struct comm *comm; + unsigned int arr_sz; }; +/* + * Assume pid == tid == 0 identifies the idle task as defined by + * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, + * and therefore requires a stack for each cpu. + */ +static inline bool thread_stack__per_cpu(struct thread *thread) +{ + return !(thread->tid || thread->pid_); +} + static int thread_stack__grow(struct thread_stack *ts) { struct thread_stack_entry *new_stack; @@ -89,19 +104,14 @@ static int thread_stack__grow(struct thread_stack *ts) return 0; } -static struct thread_stack *thread_stack__new(struct thread *thread, - struct call_return_processor *crp) +static int thread_stack__init(struct thread_stack *ts, struct thread *thread, + struct call_return_processor *crp) { - struct thread_stack *ts; - - ts = zalloc(sizeof(struct thread_stack)); - if (!ts) - return NULL; + int err; - if (thread_stack__grow(ts)) { - free(ts); - return NULL; - } + err = thread_stack__grow(ts); + if (err) + return err; if (thread->mg && thread->mg->machine) ts->kernel_start = machine__kernel_start(thread->mg->machine); @@ -109,10 +119,74 @@ static struct thread_stack *thread_stack__new(struct thread *thread, ts->kernel_start = 1ULL << 63; ts->crp = crp; + return 0; +} + +static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, + struct call_return_processor *crp) +{ + struct thread_stack *ts = thread->ts, *new_ts; + unsigned int old_sz = ts ? ts->arr_sz : 0; + unsigned int new_sz = 1; + + if (thread_stack__per_cpu(thread) && cpu > 0) + new_sz = roundup_pow_of_two(cpu + 1); + + if (!ts || new_sz > old_sz) { + new_ts = calloc(new_sz, sizeof(*ts)); + if (!new_ts) + return NULL; + if (ts) + memcpy(new_ts, ts, old_sz * sizeof(*ts)); + new_ts->arr_sz = new_sz; + zfree(&thread->ts); + thread->ts = new_ts; + ts = new_ts; + } + + if (thread_stack__per_cpu(thread) && cpu > 0 && + (unsigned int)cpu < ts->arr_sz) + ts += cpu; + + if (!ts->stack && + thread_stack__init(ts, thread, crp)) + return NULL; + return ts; } -static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) +static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) +{ + struct thread_stack *ts = thread->ts; + + if (cpu < 0) + cpu = 0; + + if (!ts || (unsigned int)cpu >= ts->arr_sz) + return NULL; + + ts += cpu; + + if (!ts->stack) + return NULL; + + return ts; +} + +static inline struct thread_stack *thread__stack(struct thread *thread, + int cpu) +{ + if (!thread) + return NULL; + + if (thread_stack__per_cpu(thread)) + return thread__cpu_stack(thread, cpu); + + return thread->ts; +} + +static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, + bool trace_end) { int err = 0; @@ -124,6 +198,7 @@ static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) } } + ts->stack[ts->cnt].trace_end = trace_end; ts->stack[ts->cnt++].ret_addr = ret_addr; return err; @@ -150,6 +225,18 @@ static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) } } +static void thread_stack__pop_trace_end(struct thread_stack *ts) +{ + size_t i; + + for (i = ts->cnt; i; ) { + if (ts->stack[--i].trace_end) + ts->cnt = i; + else + return; + } +} + static bool thread_stack__in_kernel(struct thread_stack *ts) { if (!ts->cnt) @@ -210,25 +297,37 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) int thread_stack__flush(struct thread *thread) { - if (thread->ts) - return __thread_stack__flush(thread, thread->ts); + struct thread_stack *ts = thread->ts; + unsigned int pos; + int err = 0; - return 0; + if (ts) { + for (pos = 0; pos < ts->arr_sz; pos++) { + int ret = __thread_stack__flush(thread, ts + pos); + + if (ret) + err = ret; + } + } + + return err; } -int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, +int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr) { + struct thread_stack *ts = thread__stack(thread, cpu); + if (!thread) return -EINVAL; - if (!thread->ts) { - thread->ts = thread_stack__new(thread, NULL); - if (!thread->ts) { + if (!ts) { + ts = thread_stack__new(thread, cpu, NULL); + if (!ts) { pr_warning("Out of memory: no thread stack\n"); return -ENOMEM; } - thread->ts->trace_nr = trace_nr; + ts->trace_nr = trace_nr; } /* @@ -236,14 +335,14 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, * the stack might be completely invalid. Better to report nothing than * to report something misleading, so flush the stack. */ - if (trace_nr != thread->ts->trace_nr) { - if (thread->ts->trace_nr) - __thread_stack__flush(thread, thread->ts); - thread->ts->trace_nr = trace_nr; + if (trace_nr != ts->trace_nr) { + if (ts->trace_nr) + __thread_stack__flush(thread, ts); + ts->trace_nr = trace_nr; } /* Stop here if thread_stack__process() is in use */ - if (thread->ts->crp) + if (ts->crp) return 0; if (flags & PERF_IP_FLAG_CALL) { @@ -254,51 +353,108 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, ret_addr = from_ip + insn_len; if (ret_addr == to_ip) return 0; /* Zero-length calls are excluded */ - return thread_stack__push(thread->ts, ret_addr); - } else if (flags & PERF_IP_FLAG_RETURN) { - if (!from_ip) - return 0; - thread_stack__pop(thread->ts, to_ip); + return thread_stack__push(ts, ret_addr, + flags & PERF_IP_FLAG_TRACE_END); + } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { + /* + * If the caller did not change the trace number (which would + * have flushed the stack) then try to make sense of the stack. + * Possibly, tracing began after returning to the current + * address, so try to pop that. Also, do not expect a call made + * when the trace ended, to return, so pop that. + */ + thread_stack__pop(ts, to_ip); + thread_stack__pop_trace_end(ts); + } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { + thread_stack__pop(ts, to_ip); } return 0; } -void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) +void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) { - if (!thread || !thread->ts) + struct thread_stack *ts = thread__stack(thread, cpu); + + if (!ts) return; - if (trace_nr != thread->ts->trace_nr) { - if (thread->ts->trace_nr) - __thread_stack__flush(thread, thread->ts); - thread->ts->trace_nr = trace_nr; + if (trace_nr != ts->trace_nr) { + if (ts->trace_nr) + __thread_stack__flush(thread, ts); + ts->trace_nr = trace_nr; } } +static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) +{ + __thread_stack__flush(thread, ts); + zfree(&ts->stack); +} + +static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) +{ + unsigned int arr_sz = ts->arr_sz; + + __thread_stack__free(thread, ts); + memset(ts, 0, sizeof(*ts)); + ts->arr_sz = arr_sz; +} + void thread_stack__free(struct thread *thread) { - if (thread->ts) { - __thread_stack__flush(thread, thread->ts); - zfree(&thread->ts->stack); + struct thread_stack *ts = thread->ts; + unsigned int pos; + + if (ts) { + for (pos = 0; pos < ts->arr_sz; pos++) + __thread_stack__free(thread, ts + pos); zfree(&thread->ts); } } -void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip) +static inline u64 callchain_context(u64 ip, u64 kernel_start) { - size_t i; + return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; +} - if (!thread || !thread->ts) - chain->nr = 1; - else - chain->nr = min(sz, thread->ts->cnt + 1); +void thread_stack__sample(struct thread *thread, int cpu, + struct ip_callchain *chain, + size_t sz, u64 ip, u64 kernel_start) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + u64 context = callchain_context(ip, kernel_start); + u64 last_context; + size_t i, j; + + if (sz < 2) { + chain->nr = 0; + return; + } + + chain->ips[0] = context; + chain->ips[1] = ip; + + if (!ts) { + chain->nr = 2; + return; + } - chain->ips[0] = ip; + last_context = context; - for (i = 1; i < chain->nr; i++) - chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; + for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { + ip = ts->stack[ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context != last_context) { + if (i >= sz - 1) + break; + chain->ips[i++] = context; + last_context = context; + } + chain->ips[i] = ip; + } + + chain->nr = i; } struct call_return_processor * @@ -332,7 +488,7 @@ void call_return_processor__free(struct call_return_processor *crp) static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, u64 timestamp, u64 ref, struct call_path *cp, - bool no_call) + bool no_call, bool trace_end) { struct thread_stack_entry *tse; int err; @@ -350,6 +506,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, tse->branch_count = ts->branch_count; tse->cp = cp; tse->no_call = no_call; + tse->trace_end = trace_end; return 0; } @@ -397,7 +554,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, return 1; } -static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, +static int thread_stack__bottom(struct thread_stack *ts, struct perf_sample *sample, struct addr_location *from_al, struct addr_location *to_al, u64 ref) @@ -422,8 +579,8 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, if (!cp) return -ENOMEM; - return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp, - true); + return thread_stack__push_cp(ts, ip, sample->time, ref, cp, + true, false); } static int thread_stack__no_call_return(struct thread *thread, @@ -455,7 +612,7 @@ static int thread_stack__no_call_return(struct thread *thread, if (!cp) return -ENOMEM; return thread_stack__push_cp(ts, 0, sample->time, ref, - cp, true); + cp, true, false); } } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { /* Return to userspace, so pop all kernel addresses */ @@ -480,7 +637,7 @@ static int thread_stack__no_call_return(struct thread *thread, return -ENOMEM; err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, - true); + true, false); if (err) return err; @@ -500,7 +657,7 @@ static int thread_stack__trace_begin(struct thread *thread, /* Pop trace end */ tse = &ts->stack[ts->cnt - 1]; - if (tse->cp->sym == NULL && tse->cp->ip == 0) { + if (tse->trace_end) { err = thread_stack__call_return(thread, ts, --ts->cnt, timestamp, ref, false); if (err) @@ -529,7 +686,7 @@ static int thread_stack__trace_end(struct thread_stack *ts, ret_addr = sample->ip + sample->insn_len; return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, - false); + false, true); } int thread_stack__process(struct thread *thread, struct comm *comm, @@ -538,24 +695,19 @@ int thread_stack__process(struct thread *thread, struct comm *comm, struct addr_location *to_al, u64 ref, struct call_return_processor *crp) { - struct thread_stack *ts = thread->ts; + struct thread_stack *ts = thread__stack(thread, sample->cpu); int err = 0; - if (ts) { - if (!ts->crp) { - /* Supersede thread_stack__event() */ - thread_stack__free(thread); - thread->ts = thread_stack__new(thread, crp); - if (!thread->ts) - return -ENOMEM; - ts = thread->ts; - ts->comm = comm; - } - } else { - thread->ts = thread_stack__new(thread, crp); - if (!thread->ts) + if (ts && !ts->crp) { + /* Supersede thread_stack__event() */ + thread_stack__reset(thread, ts); + ts = NULL; + } + + if (!ts) { + ts = thread_stack__new(thread, sample->cpu, crp); + if (!ts) return -ENOMEM; - ts = thread->ts; ts->comm = comm; } @@ -569,8 +721,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, /* If the stack is empty, put the current symbol on the stack */ if (!ts->cnt) { - err = thread_stack__bottom(thread, ts, sample, from_al, to_al, - ref); + err = thread_stack__bottom(ts, sample, from_al, to_al, ref); if (err) return err; } @@ -579,6 +730,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->last_time = sample->time; if (sample->flags & PERF_IP_FLAG_CALL) { + bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; struct call_path_root *cpr = ts->crp->cpr; struct call_path *cp; u64 ret_addr; @@ -596,7 +748,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, if (!cp) return -ENOMEM; err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, - cp, false); + cp, false, trace_end); } else if (sample->flags & PERF_IP_FLAG_RETURN) { if (!sample->ip || !sample->addr) return 0; @@ -618,9 +770,11 @@ int thread_stack__process(struct thread *thread, struct comm *comm, return err; } -size_t thread_stack__depth(struct thread *thread) +size_t thread_stack__depth(struct thread *thread, int cpu) { - if (!thread->ts) + struct thread_stack *ts = thread__stack(thread, cpu); + + if (!ts) return 0; - return thread->ts->cnt; + return ts->cnt; } diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index b7e41c4ebfdd..1f626f4a1c40 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -80,14 +80,14 @@ struct call_return_processor { void *data; }; -int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, +int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr); -void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); -void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip); +void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr); +void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain, + size_t sz, u64 ip, u64 kernel_start); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); -size_t thread_stack__depth(struct thread *thread); +size_t thread_stack__depth(struct thread *thread, int cpu); struct call_return_processor * call_return_processor__new(int (*process)(struct call_return *cr, void *data), diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 2048d393ece6..c83372329f89 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -64,6 +64,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) RB_CLEAR_NODE(&thread->rb_node); /* Thread holds first ref to nsdata. */ thread->nsinfo = nsinfo__new(pid); + srccode_state_init(&thread->srccode_state); } return thread; @@ -103,6 +104,7 @@ void thread__delete(struct thread *thread) unwind__finish_access(thread); nsinfo__zput(thread->nsinfo); + srccode_state_free(&thread->srccode_state); exit_rwsem(&thread->namespaces_lock); exit_rwsem(&thread->comm_lock); @@ -330,7 +332,8 @@ static int thread__prepare_access(struct thread *thread) } static int thread__clone_map_groups(struct thread *thread, - struct thread *parent) + struct thread *parent, + bool do_maps_clone) { /* This is new thread, we share map groups for process. */ if (thread->pid_ == parent->pid_) @@ -341,15 +344,11 @@ static int thread__clone_map_groups(struct thread *thread, thread->pid_, thread->tid, parent->pid_, parent->tid); return 0; } - /* But this one is new process, copy maps. */ - if (map_groups__clone(thread, parent->mg) < 0) - return -ENOMEM; - - return 0; + return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0; } -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) { if (parent->comm_set) { const char *comm = thread__comm_str(parent); @@ -362,7 +361,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) } thread->ppid = parent->tid; - return thread__clone_map_groups(thread, parent); + return thread__clone_map_groups(thread, parent, do_maps_clone); } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 07606aa6998d..712dd48cc0ca 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -8,6 +8,7 @@ #include <unistd.h> #include <sys/types.h> #include "symbol.h" +#include "map.h" #include <strlist.h> #include <intlist.h> #include "rwsem.h" @@ -38,10 +39,13 @@ struct thread { void *priv; struct thread_stack *ts; struct nsinfo *nsinfo; + struct srccode_state srccode_state; #ifdef HAVE_LIBUNWIND_SUPPORT void *addr_space; struct unwind_libunwind_ops *unwind_libunwind_ops; #endif + bool filter; + int filter_entry_depth; }; struct machine; @@ -87,16 +91,20 @@ struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); int thread__insert_map(struct thread *thread, struct map *map); -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); size_t thread__fprintf(struct thread *thread, FILE *fp); struct thread *thread__main_thread(struct machine *machine, struct thread *thread); struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); +struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al); struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); +struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, + u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, struct addr_location *al); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 183c91453522..56e4ca54020a 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -26,15 +26,12 @@ typedef int (*event_attr_op)(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); -typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event, - struct perf_session *session); +typedef int (*event_op2)(struct perf_session *session, union perf_event *event); +typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event); typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); -typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event, - struct perf_session *session); - enum show_feature_header { SHOW_FEAT_NO_HEADER = 0, SHOW_FEAT_HEADER, diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 8e517def925b..4c8da8c4435f 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -46,8 +46,9 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) samples_per_sec; ret = SNPRINTF(bf, size, " PerfTop:%8.0f irqs/sec kernel:%4.1f%%" - " exact: %4.1f%% [", samples_per_sec, - ksamples_percent, esamples_percent); + " exact: %4.1f%% lost: %" PRIu64 "/%" PRIu64 " drop: %" PRIu64 "/%" PRIu64 " [", + samples_per_sec, ksamples_percent, esamples_percent, + top->lost, top->lost_total, top->drop, top->drop_total); } else { float us_samples_per_sec = top->us_samples / top->delay_secs; float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs; @@ -106,6 +107,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) top->evlist->cpus->nr > 1 ? "s" : ""); } + perf_top__reset_sample_counters(top); return ret; } @@ -113,5 +115,5 @@ void perf_top__reset_sample_counters(struct perf_top *top) { top->samples = top->us_samples = top->kernel_samples = top->exact_samples = top->guest_kernel_samples = - top->guest_us_samples = 0; + top->guest_us_samples = top->lost = top->drop = 0; } diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 9add1f72ce95..19f95eaf75c8 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -22,7 +22,7 @@ struct perf_top { * Symbols will be added here in perf_event__process_sample and will * get out after decayed. */ - u64 samples; + u64 samples, lost, lost_total, drop, drop_total; u64 kernel_samples, us_samples; u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; @@ -40,6 +40,14 @@ struct perf_top { const char *sym_filter; float min_percent; unsigned int nr_threads_synthesize; + + struct { + struct ordered_events *in; + struct ordered_events data[2]; + bool rotate; + pthread_mutex_t mutex; + pthread_cond_t cond; + } qe; }; #define CONSOLE_CLEAR "[H[2J" diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 7b0ca7cbb7de..8ad8e755127b 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -531,12 +531,14 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs, "/tmp/perf-XXXXXX"); if (!mkstemp(tdata->temp_file)) { pr_debug("Can't make temp file"); + free(tdata); return NULL; } temp_fd = open(tdata->temp_file, O_RDWR); if (temp_fd < 0) { pr_debug("Can't read '%s'", tdata->temp_file); + free(tdata); return NULL; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index e76214f8d596..ad74be1f0e42 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -33,14 +33,15 @@ static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) { struct tep_handle *pevent = context->pevent; - struct event_format *event; - struct format_field *field; + struct tep_event *event; + struct tep_format_field *field; if (!*size) { - if (!pevent->events) + + event = tep_get_first_event(pevent); + if (!event) return 0; - event = pevent->events[0]; field = tep_find_common_field(event, type); if (!field) return 0; @@ -94,9 +95,9 @@ int common_pc(struct scripting_context *context) } unsigned long long -raw_field_value(struct event_format *event, const char *name, void *data) +raw_field_value(struct tep_event *event, const char *name, void *data) { - struct format_field *field; + struct tep_format_field *field; unsigned long long val; field = tep_find_any_field(event, name); @@ -108,12 +109,12 @@ raw_field_value(struct event_format *event, const char *name, void *data) return val; } -unsigned long long read_size(struct event_format *event, void *ptr, int size) +unsigned long long read_size(struct tep_event *event, void *ptr, int size) { return tep_read_number(event->pevent, ptr, size); } -void event_format__fprintf(struct event_format *event, +void event_format__fprintf(struct tep_event *event, int cpu, void *data, int size, FILE *fp) { struct tep_record record; @@ -130,7 +131,7 @@ void event_format__fprintf(struct event_format *event, trace_seq_destroy(&s); } -void event_format__print(struct event_format *event, +void event_format__print(struct tep_event *event, int cpu, void *data, int size) { return event_format__fprintf(event, cpu, data, size, stdout); @@ -158,6 +159,7 @@ void parse_ftrace_printk(struct tep_handle *pevent, printk = strdup(fmt+1); line = strtok_r(NULL, "\n", &next); tep_register_print_string(pevent, printk, addr); + free(printk); } } @@ -188,29 +190,33 @@ int parse_event_file(struct tep_handle *pevent, return tep_parse_event(pevent, buf, size, sys); } -struct event_format *trace_find_next_event(struct tep_handle *pevent, - struct event_format *event) +struct tep_event *trace_find_next_event(struct tep_handle *pevent, + struct tep_event *event) { static int idx; + int events_count; + struct tep_event *all_events; - if (!pevent || !pevent->events) + all_events = tep_get_first_event(pevent); + events_count = tep_get_events_count(pevent); + if (!pevent || !all_events || events_count < 1) return NULL; if (!event) { idx = 0; - return pevent->events[0]; + return all_events; } - if (idx < pevent->nr_events && event == pevent->events[idx]) { + if (idx < events_count && event == (all_events + idx)) { idx++; - if (idx == pevent->nr_events) + if (idx == events_count) return NULL; - return pevent->events[idx]; + return (all_events + idx); } - for (idx = 1; idx < pevent->nr_events; idx++) { - if (event == pevent->events[idx - 1]) - return pevent->events[idx]; + for (idx = 1; idx < events_count; idx++) { + if (event == (all_events + (idx - 1))) + return (all_events + idx); } return NULL; } diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 3dfc1db6b25b..efe2f58cff4e 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -102,7 +102,7 @@ static unsigned int read4(struct tep_handle *pevent) if (do_read(&data, 4) < 0) return 0; - return __data2host4(pevent, data); + return tep_read_number(pevent, &data, 4); } static unsigned long long read8(struct tep_handle *pevent) @@ -111,7 +111,7 @@ static unsigned long long read8(struct tep_handle *pevent) if (do_read(&data, 8) < 0) return 0; - return __data2host8(pevent, data); + return tep_read_number(pevent, &data, 8); } static char *read_string(void) @@ -241,7 +241,7 @@ static int read_header_files(struct tep_handle *pevent) * The commit field in the page is of type long, * use that instead, since it represents the kernel. */ - tep_set_long_size(pevent, pevent->header_page_size_size); + tep_set_long_size(pevent, tep_get_header_page_size(pevent)); } free(header_page); @@ -297,10 +297,8 @@ static int read_event_file(struct tep_handle *pevent, char *sys, } ret = do_read(buf, size); - if (ret < 0) { - free(buf); + if (ret < 0) goto out; - } ret = parse_event_file(pevent, buf, size, sys); if (ret < 0) @@ -349,9 +347,12 @@ static int read_event_files(struct tep_handle *pevent) for (x=0; x < count; x++) { size = read8(pevent); ret = read_event_file(pevent, sys, size); - if (ret) + if (ret) { + free(sys); return ret; + } } + free(sys); } return 0; } diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 58bb72f266f3..cbe0dd758e3a 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -72,12 +72,12 @@ void trace_event__cleanup(struct trace_event *t) /* * Returns pointer with encoded error via <linux/err.h> interface. */ -static struct event_format* +static struct tep_event* tp_format(const char *sys, const char *name) { char *tp_dir = get_events_file(sys); struct tep_handle *pevent = tevent.pevent; - struct event_format *event = NULL; + struct tep_event *event = NULL; char path[PATH_MAX]; size_t size; char *data; @@ -102,7 +102,7 @@ tp_format(const char *sys, const char *name) /* * Returns pointer with encoded error via <linux/err.h> interface. */ -struct event_format* +struct tep_event* trace_event__tp_format(const char *sys, const char *name) { if (!tevent_initialized && trace_event__init2()) @@ -111,7 +111,7 @@ trace_event__tp_format(const char *sys, const char *name) return tp_format(sys, name); } -struct event_format *trace_event__tp_format_id(int id) +struct tep_event *trace_event__tp_format_id(int id) { if (!tevent_initialized && trace_event__init2()) return ERR_PTR(-ENOMEM); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 40204ec3a7a2..d9b0a942090a 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -3,6 +3,7 @@ #define _PERF_UTIL_TRACE_EVENT_H #include <traceevent/event-parse.h> +#include <traceevent/trace-seq.h> #include "parse-events.h" struct machine; @@ -10,28 +11,28 @@ struct perf_sample; union perf_event; struct perf_tool; struct thread; -struct plugin_list; +struct tep_plugin_list; struct trace_event { struct tep_handle *pevent; - struct plugin_list *plugin_list; + struct tep_plugin_list *plugin_list; }; int trace_event__init(struct trace_event *t); void trace_event__cleanup(struct trace_event *t); int trace_event__register_resolver(struct machine *machine, tep_func_resolver_t *func); -struct event_format* +struct tep_event* trace_event__tp_format(const char *sys, const char *name); -struct event_format *trace_event__tp_format_id(int id); +struct tep_event *trace_event__tp_format_id(int id); int bigendian(void); -void event_format__fprintf(struct event_format *event, +void event_format__fprintf(struct tep_event *event, int cpu, void *data, int size, FILE *fp); -void event_format__print(struct event_format *event, +void event_format__print(struct tep_event *event, int cpu, void *data, int size); int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size); @@ -39,7 +40,7 @@ int parse_event_file(struct tep_handle *pevent, char *buf, unsigned long size, char *sys); unsigned long long -raw_field_value(struct event_format *event, const char *name, void *data); +raw_field_value(struct tep_event *event, const char *name, void *data); void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size); void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size); @@ -47,9 +48,9 @@ void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int siz ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe); -struct event_format *trace_find_next_event(struct tep_handle *pevent, - struct event_format *event); -unsigned long long read_size(struct event_format *event, void *ptr, int size); +struct tep_event *trace_find_next_event(struct tep_handle *pevent, + struct tep_event *event); +unsigned long long read_size(struct tep_event *event, void *ptr, int size); unsigned long long eval_flag(const char *flag); int read_tracing_data(int fd, struct list_head *pattrs); diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 6f318b15950e..5eff9bfc5758 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -45,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip, Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != al->map->start) + if (s != al->map->start - al->map->pgoff) mod = 0; } if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index eac5b858a371..093352e93d50 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -221,7 +221,7 @@ out: return err; } -static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) +int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) { void *ptr; loff_t pgoff; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index dc58254a2b69..ece040b799f6 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -6,6 +6,7 @@ /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 +#include <fcntl.h> #include <stdbool.h> #include <stddef.h> #include <stdlib.h> @@ -35,6 +36,7 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); +int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size); ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); @@ -57,6 +59,10 @@ int fetch_kernel_version(unsigned int *puint, const char *perf_tip(const char *dirpath); +#ifndef HAVE_GET_CURRENT_DIR_NAME +char *get_current_dir_name(void); +#endif + #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); #endif |